Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tests for particle diffusion II #2245

Draft
wants to merge 27 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
85486cf
Tests for particle diffusion added (for different morphologies; also …
jlubo Aug 15, 2023
6062daf
Black style formatting applied
jlubo Aug 15, 2023
890f315
Black style formatting (v23.7.0) applied
jlubo Aug 16, 2023
1bd43cc
Cleanup
jlubo Aug 16, 2023
72b0d61
Black reformatting
jlubo Aug 16, 2023
5b9ecd3
Revision: list of dictionaries for stimulation added; small improvements
jlubo Aug 24, 2023
e6b48b9
Split function to reduce complexity (following flake8 complaint)
jlubo Aug 24, 2023
2c1e802
Black style applied
jlubo Aug 24, 2023
7f77d60
Revision: discretization policy simplified
jlubo Aug 25, 2023
0bcc339
tests for different segment length added (also fail as of v0.9.0); co…
jlubo Sep 27, 2023
b78d1cb
Merge branch 'master' into tests_for_particle_diffusion
jlubo Sep 27, 2023
8fe2207
fix: apply 'estimated' tests only if radius is fixed
jlubo Oct 7, 2023
51d3990
disabled 'test_diffusion_different_length' and 'test_diffusion_differ…
jlubo Oct 10, 2023
c3b5bf7
generalized tests for 'estimated' equilibrium values; annotations added
jlubo Oct 12, 2023
b334f29
Merge branch 'master' into tests_for_particle_diffusion
jlubo Oct 12, 2023
da87dea
fixed estimated values in two segment-case
jlubo Oct 14, 2023
e72bace
split function 'get_morph_and_decor()' into special cases for 1,2,3 s…
jlubo Oct 14, 2023
455b242
scratch field for diffusion
boeschf Jan 9, 2024
48ea3c8
Merge remote-tracking branch 'upstream/master' into fix/diffusive_con…
boeschf Jan 9, 2024
0c52db8
gpu fix: backend, shfl_down signature, gpu printer
boeschf Jan 10, 2024
fcf48f4
Merge remote-tracking branch 'jlubo/tests_for_particle_diffusion' int…
boeschf Jan 10, 2024
979605f
hack to remove wrong ballot
boeschf Jan 10, 2024
da48a69
Merge branch 'fix/diffusive_concentrations' into fix/diffusive_concen…
boeschf Jan 10, 2024
5fe327a
probe labels
boeschf Jan 10, 2024
6d284bc
test diffusion different length
boeschf Jan 10, 2024
28c642f
formatting
boeschf Jan 10, 2024
b73ea1a
Merge branch 'fix/diffusive_concentrations_jlubo' of github.com:boesc…
boeschf Jan 10, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 51 additions & 12 deletions arbor/backends/gpu/shared_state.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@
#include "util/range.hpp"
#include "util/strprintf.hpp"

#include <iostream>

using arb::memory::make_const_view;

namespace arb {
Expand All @@ -37,6 +35,8 @@ void take_samples_impl(

void add_scalar(std::size_t n, arb_value_type* data, arb_value_type v);

void apply_diffusive_concentration_delta_impl(std::size_t, arb_value_type*, arb_value_type* const *, arb_index_type const *);

// GPU-side minmax: consider CUDA kernel replacement.
std::pair<arb_value_type, arb_value_type> minmax_value_impl(arb_size_type n, const arb_value_type* v) {
auto v_copy = memory::on_host(memory::const_device_view<arb_value_type>(v, n));
Expand Down Expand Up @@ -201,6 +201,12 @@ shared_state::shared_state(task_system_handle tp,
add_scalar(temperature_degC.size(), temperature_degC.data(), -273.15);
}

void shared_state::apply_diffusive_concentration_delta() {
for (auto& [name, state] : ion_data) {
apply_diffusive_concentration_delta_impl(state.Xd_contribution.size(), state.Xd_.data(), state.Xd_contribution_d.data(), state.Xd_index_d.data());
}
}

void shared_state::update_prng_state(mechanism& m) {
if (!m.mech_.n_random_variables) return;
auto const mech_id = m.mechanism_id();
Expand Down Expand Up @@ -250,20 +256,23 @@ void shared_state::instantiate(mechanism& m,
store.globals_ = std::vector<arb_value_type>(m.mech_.n_globals);

// Set ion views
arb_size_type n_ions_with_written_xd = 0;
for (auto idx: make_span(m.mech_.n_ions)) {
auto ion = m.mech_.ions[idx].name;
auto ion_binding = value_by_key(overrides.ion_rebind, ion).value_or(ion);
ion_state* oion = ptr_by_key(ion_data, ion_binding);
if (!oion) throw arbor_internal_error("gpu/mechanism: mechanism holds ion with no corresponding shared state");
auto& ion_state = store.ion_states_[idx];
ion_state = {0};
ion_state.current_density = oion->iX_.data();
ion_state.reversal_potential = oion->eX_.data();
ion_state.internal_concentration = oion->Xi_.data();
ion_state.external_concentration = oion->Xo_.data();
ion_state.diffusive_concentration = oion->Xd_.data();
ion_state.ionic_charge = oion->charge.data();
ion_state.conductivity = oion->gX_.data();
auto& ion_state_ = store.ion_states_[idx]; // arb_ion_state
ion_state_ = {0};
ion_state_.current_density = oion->iX_.data();
ion_state_.reversal_potential = oion->eX_.data();
ion_state_.internal_concentration = oion->Xi_.data();
ion_state_.external_concentration = oion->Xo_.data();
ion_state_.diffusive_concentration = oion->Xd_.data();
ion_state_.ionic_charge = oion->charge.data();
ion_state_.conductivity = oion->gX_.data();

n_ions_with_written_xd += m.mech_.ions[idx].write_diff_concentration;
}

// If there are no sites (is this ever meaningful?) there is nothing more to do.
Expand All @@ -272,7 +281,7 @@ void shared_state::instantiate(mechanism& m,
// Allocate and initialize state and parameter vectors with default values.
{
// Allocate bulk storage
std::size_t count = (m.mech_.n_state_vars + m.mech_.n_parameters + 1)*width_padded + m.mech_.n_globals;
std::size_t count = (m.mech_.n_state_vars + m.mech_.n_parameters + 1 + n_ions_with_written_xd)*width_padded + m.mech_.n_globals;
store.data_ = array(count, NAN);
chunk_writer writer(store.data_.data(), width_padded);

Expand All @@ -295,6 +304,11 @@ void shared_state::instantiate(mechanism& m,
for (auto idx: make_span(m.mech_.n_state_vars)) {
store.state_vars_[idx] = writer.fill(m.mech_.state_vars[idx].default_value);
}
// Set diffusive concentration deltas if needed
for (auto idx: make_span(m.mech_.n_ions)) {
store.ion_states_[idx].diffusive_concentration_delta =
m.mech_.ions[idx].write_diff_concentration ? writer.fill(0) : nullptr;
}
// Assign global scalar parameters. NB: Last chunk, since it breaks the width striding.
for (auto idx: make_span(m.mech_.n_globals)) store.globals_[idx] = m.mech_.globals[idx].default_value;
for (auto& [k, v]: overrides.globals) {
Expand Down Expand Up @@ -332,6 +346,31 @@ void shared_state::instantiate(mechanism& m,
auto indices = util::index_into(pos_data.cv, ni);
std::vector<arb_index_type> mech_ion_index(indices.begin(), indices.end());
store.ion_states_[idx].index = writer.append_with_padding(mech_ion_index, 0);

if (m.mech_.ions[idx].write_diff_concentration) {
auto& Xd_contribution_map = oion->Xd_contribution_map;
auto& Xd_contribution = oion->Xd_contribution;
auto& Xd_index = oion->Xd_index;
auto& Xd_contribution_d = oion->Xd_contribution_d;
auto& Xd_index_d = oion->Xd_index_d;
util::append(
Xd_contribution_map,
util::transform_view(
util::make_span(width),
[mech_ion_index, d = store.ion_states_[idx].diffusive_concentration_delta](const auto& i) {
return std::make_pair(d+i, mech_ion_index[i]);
}));
// sort contribution map according to index and transpose from AoS to SoA
util::stable_sort_by(Xd_contribution_map, [](const auto& p) { return p.second; });
Xd_contribution.clear(); Xd_contribution.reserve(Xd_contribution_map.size());
Xd_index.clear(); Xd_index.reserve(Xd_contribution_map.size());
for (auto [ptr, idx_] : Xd_contribution_map) {
Xd_contribution.push_back(ptr);
Xd_index.push_back(idx_);
}
Xd_contribution_d = memory::on_gpu(Xd_contribution);
Xd_index_d = memory::on_gpu(Xd_index);
}
}

m.ppack_.multiplicity = mult_in_place? writer.append_with_padding(pos_data.multiplicity, 0): nullptr;
Expand Down
23 changes: 23 additions & 0 deletions arbor/backends/gpu/shared_state.cu
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

#include <arbor/gpu/gpu_api.hpp>
#include <arbor/gpu/gpu_common.hpp>
#include <arbor/gpu/reduce_by_key.hpp>

namespace arb {
namespace gpu {
Expand Down Expand Up @@ -40,6 +41,20 @@ __global__ void take_samples_impl(
}
}

__global__ void apply_diffusive_concentration_delta_impl(
std::size_t n,
arb_value_type * __restrict__ const Xd,
arb_value_type * const * __restrict__ const Xd_contribution,
arb_index_type const * __restrict__ const Xd_index)
{
const unsigned i = threadIdx.x+blockIdx.x*blockDim.x;
const unsigned mask = gpu::ballot(0xffffffff, i<n);
if (i < n) {
reduce_by_key(*Xd_contribution[i], Xd, Xd_index[i], mask);
*Xd_contribution[i] = 0;
}
}

} // namespace kernel

void add_scalar(std::size_t n, arb_value_type* data, arb_value_type v) {
Expand All @@ -53,5 +68,13 @@ void take_samples_impl(
launch_1d(s.size(), 128, kernel::take_samples_impl, s.begin_marked, s.end_marked, time, sample_time, sample_value);
}

void apply_diffusive_concentration_delta_impl(
std::size_t n,
arb_value_type * Xd,
arb_value_type * const * Xd_contribution,
arb_index_type const * Xd_index) {
launch_1d(n, 128, kernel::apply_diffusive_concentration_delta_impl, n, Xd, Xd_contribution, Xd_index);
}

} // namespace gpu
} // namespace arb
8 changes: 8 additions & 0 deletions arbor/backends/gpu/shared_state.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,12 @@ struct ARB_ARBOR_API ion_state {

array charge; // charge of ionic species (global, length 1)

std::vector<std::pair<arb_value_type*,arb_index_type>> Xd_contribution_map;
std::vector<arb_value_type*> Xd_contribution;
std::vector<arb_index_type> Xd_index;
memory::device_vector<arb_value_type*> Xd_contribution_d;
memory::device_vector<arb_index_type> Xd_index_d;

solver_ptr solver = nullptr;

ion_state() = default;
Expand Down Expand Up @@ -224,6 +230,8 @@ struct ARB_ARBOR_API shared_state: shared_state_base<shared_state, array, ion_st
const mechanism_layout&,
const std::vector<std::pair<std::string, std::vector<arb_value_type>>>&);

void apply_diffusive_concentration_delta();

void update_prng_state(mechanism&);

void zero_currents();
Expand Down
44 changes: 43 additions & 1 deletion arbor/backends/multicore/shared_state.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,17 @@ std::size_t extend_width(const arb::mechanism& mech, std::size_t width) {
}
} // anonymous namespace

void shared_state::apply_diffusive_concentration_delta() {
for (auto& [name, state] : ion_data) {
auto* Xd_ = state.Xd_.data();
const auto& Xd_c = state.Xd_contribution;
const auto& Xd_i = state.Xd_index;
for (auto i : util::count_along(Xd_c)) {
Xd_[Xd_i[i]] += std::exchange(*Xd_c[i], 0);
}
}
}

void shared_state::update_prng_state(mechanism& m) {
if (!m.mech_.n_random_variables) return;
const auto mech_id = m.mechanism_id();
Expand Down Expand Up @@ -418,6 +429,7 @@ void shared_state::instantiate(arb::mechanism& m,
store.ion_states_.resize(m.mech_.n_ions); m.ppack_.ion_states = store.ion_states_.data();

// Set ion views
arb_size_type n_ions_with_written_xd = 0;
for (auto idx: make_span(m.mech_.n_ions)) {
auto ion = m.mech_.ions[idx].name;
auto ion_binding = value_by_key(overrides.ion_rebind, ion).value_or(ion);
Expand All @@ -433,6 +445,8 @@ void shared_state::instantiate(arb::mechanism& m,
ion_state.diffusive_concentration = oion->Xd_.data();
ion_state.ionic_charge = oion->charge.data();
ion_state.conductivity = oion->gX_.data();

n_ions_with_written_xd += m.mech_.ions[idx].write_diff_concentration;
}

// Initialize state and parameter vectors with default values.
Expand All @@ -446,7 +460,7 @@ void shared_state::instantiate(arb::mechanism& m,
// Allocate bulk storage
std::size_t value_width_padded = extend_width<arb_value_type>(m, pos_data.cv.size());
store.value_width_padded = value_width_padded;
std::size_t count = (m.mech_.n_state_vars + m.mech_.n_parameters + 1 +
std::size_t count = (m.mech_.n_state_vars + m.mech_.n_parameters + 1 + n_ions_with_written_xd +
random_number_storage)*value_width_padded + m.mech_.n_globals;
store.data_ = array(count, NAN, pad);
chunk_writer writer(store.data_.data(), value_width_padded);
Expand All @@ -470,6 +484,11 @@ void shared_state::instantiate(arb::mechanism& m,
for (auto idx: make_span(m.mech_.n_state_vars)) {
m.ppack_.state_vars[idx] = writer.fill(m.mech_.state_vars[idx].default_value);
}
// Set diffusive concentration deltas if needed
for (auto idx: make_span(m.mech_.n_ions)) {
m.ppack_.ion_states[idx].diffusive_concentration_delta =
m.mech_.ions[idx].write_diff_concentration ? writer.fill(0) : nullptr;
}
// Set random numbers
for (auto idx_v: make_span(num_random_numbers_per_cv))
for (auto idx_c: make_span(cbprng::cache_size()))
Expand Down Expand Up @@ -530,7 +549,30 @@ void shared_state::instantiate(arb::mechanism& m,
m.ppack_.ion_states[idx].index = writer.append(indices, util::back(indices));
// Check SIMD constraints
arb_assert(compatible_index_constraints(node_index, util::range_n(m.ppack_.ion_states[idx].index, index_width_padded), m.iface_.partition_width));

if (m.mech_.ions[idx].write_diff_concentration) {
auto mech_ion_index = m.ppack_.ion_states[idx].index;
auto& Xd_contribution_map = oion->Xd_contribution_map;
auto& Xd_contribution = oion->Xd_contribution;
auto& Xd_index = oion->Xd_index;
util::append(
Xd_contribution_map,
util::transform_view(
util::make_span(width),
[mech_ion_index, d = store.ion_states_[idx].diffusive_concentration_delta](const auto& i) {
return std::make_pair(d+i, mech_ion_index[i]);
}));
// sort contribution map according to index and transpose from AoS to SoA
util::stable_sort_by(Xd_contribution_map, [](const auto& p) { return p.second; });
Xd_contribution.clear(); Xd_contribution.reserve(Xd_contribution_map.size());
Xd_index.clear(); Xd_index.reserve(Xd_contribution_map.size());
for (auto [ptr, idx] : Xd_contribution_map) {
Xd_contribution.push_back(ptr);
Xd_index.push_back(idx);
}
}
}

if (mult_in_place) m.ppack_.multiplicity = writer.append(pos_data.multiplicity, 0);
// `peer_index` holds the peer CV of each CV in node_index.
// Peer CVs are only filled for gap junction mechanisms. They are used
Expand Down
6 changes: 6 additions & 0 deletions arbor/backends/multicore/shared_state.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ struct ARB_ARBOR_API ion_state {

array charge; // charge of ionic species (global value, length 1)

std::vector<std::pair<arb_value_type*,arb_index_type>> Xd_contribution_map;
std::vector<arb_value_type*> Xd_contribution;
std::vector<arb_index_type> Xd_index;

solver_ptr solver = nullptr;

ion_state() = default;
Expand Down Expand Up @@ -231,6 +235,8 @@ struct ARB_ARBOR_API shared_state:
const mechanism_layout&,
const std::vector<std::pair<std::string, std::vector<arb_value_type>>>&);

void apply_diffusive_concentration_delta();

void update_prng_state(mechanism&);

void zero_currents();
Expand Down
6 changes: 6 additions & 0 deletions arbor/fvm_lowered_cell_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,8 @@ fvm_integration_result fvm_lowered_cell_impl<Backend>::integrate(
m->update_current();
}

state_->apply_diffusive_concentration_delta();

// Add stimulus current contributions.
// NOTE: performed after dt, time_to calculation, in case we want to
// use mean current contributions as opposed to point sample.
Expand All @@ -236,11 +238,15 @@ fvm_integration_result fvm_lowered_cell_impl<Backend>::integrate(
m->update_state();
}

state_->apply_diffusive_concentration_delta();

// Update ion concentrations.
PE(advance:integrate:ionupdate);
update_ion_state();
PL();

// TODO: can `write_ions` affect xd?

// voltage mechs run now; after the cable_solver, but before the
// threshold test
for (auto& m: voltage_mechanisms_) {
Expand Down
32 changes: 6 additions & 26 deletions arbor/include/arbor/gpu/cuda_api.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,17 +139,6 @@ inline float gpu_atomic_sub(float* address, float val) {

/// Warp-Level Primitives

__device__ __inline__ double shfl(unsigned mask, double x, int lane)
{
auto tmp = static_cast<uint64_t>(x);
auto lo = static_cast<unsigned>(tmp);
auto hi = static_cast<unsigned>(tmp >> 32);
hi = __shfl_sync(mask, static_cast<int>(hi), lane, warpSize);
lo = __shfl_sync(mask, static_cast<int>(lo), lane, warpSize);
return static_cast<double>(static_cast<uint64_t>(hi) << 32 |
static_cast<uint64_t>(lo));
}

__device__ __inline__ unsigned ballot(unsigned mask, unsigned is_root) {
return __ballot_sync(mask, is_root);
}
Expand All @@ -158,24 +147,15 @@ __device__ __inline__ unsigned any(unsigned mask, unsigned width) {
return __any_sync(mask, width);
}

#ifdef __NVCC__
__device__ __inline__ double shfl_up(unsigned mask, int idx, unsigned lane_id, unsigned shift) {
return __shfl_up_sync(mask, idx, shift);
}

__device__ __inline__ double shfl_down(unsigned mask, int idx, unsigned lane_id, unsigned shift) {
return __shfl_down_sync(mask, idx, shift);
template<typename T>
__device__ __inline__ T shfl_up(unsigned mask, T var, unsigned lane_id, unsigned shift) {
return __shfl_up_sync(mask, var, shift);
}

#else
__device__ __inline__ double shfl_up(unsigned mask, int idx, unsigned lane_id, unsigned shift) {
return shfl(mask, idx, lane_id - shift);
template<typename T>
__device__ __inline__ T shfl_down(unsigned mask, T var, unsigned lane_id, unsigned shift) {
return __shfl_down_sync(mask, var, shift);
}

__device__ __inline__ double shfl_down(unsigned mask, int idx, unsigned lane_id, unsigned shift) {
return shfl(mask, idx, lane_id + shift);
}
#endif
#endif

} // namespace gpu
Expand Down
Loading
Loading