diff --git a/arbor/backends/gpu/shared_state.cpp b/arbor/backends/gpu/shared_state.cpp index a15a3f9545..74fe7aed15 100644 --- a/arbor/backends/gpu/shared_state.cpp +++ b/arbor/backends/gpu/shared_state.cpp @@ -22,8 +22,6 @@ #include "util/range.hpp" #include "util/strprintf.hpp" -#include - using arb::memory::make_const_view; namespace arb { @@ -37,6 +35,8 @@ void take_samples_impl( void add_scalar(std::size_t n, arb_value_type* data, arb_value_type v); +void apply_diffusive_concentration_delta_impl(std::size_t, arb_value_type*, arb_value_type* const *, arb_index_type const *); + // GPU-side minmax: consider CUDA kernel replacement. std::pair minmax_value_impl(arb_size_type n, const arb_value_type* v) { auto v_copy = memory::on_host(memory::const_device_view(v, n)); @@ -201,6 +201,12 @@ shared_state::shared_state(task_system_handle tp, add_scalar(temperature_degC.size(), temperature_degC.data(), -273.15); } +void shared_state::apply_diffusive_concentration_delta() { + for (auto& [name, state] : ion_data) { + apply_diffusive_concentration_delta_impl(state.Xd_contribution.size(), state.Xd_.data(), state.Xd_contribution_d.data(), state.Xd_index_d.data()); + } +} + void shared_state::update_prng_state(mechanism& m) { if (!m.mech_.n_random_variables) return; auto const mech_id = m.mechanism_id(); @@ -250,20 +256,23 @@ void shared_state::instantiate(mechanism& m, store.globals_ = std::vector(m.mech_.n_globals); // Set ion views + arb_size_type n_ions_with_written_xd = 0; for (auto idx: make_span(m.mech_.n_ions)) { auto ion = m.mech_.ions[idx].name; auto ion_binding = value_by_key(overrides.ion_rebind, ion).value_or(ion); ion_state* oion = ptr_by_key(ion_data, ion_binding); if (!oion) throw arbor_internal_error("gpu/mechanism: mechanism holds ion with no corresponding shared state"); - auto& ion_state = store.ion_states_[idx]; - ion_state = {0}; - ion_state.current_density = oion->iX_.data(); - ion_state.reversal_potential = oion->eX_.data(); - ion_state.internal_concentration = oion->Xi_.data(); - ion_state.external_concentration = oion->Xo_.data(); - ion_state.diffusive_concentration = oion->Xd_.data(); - ion_state.ionic_charge = oion->charge.data(); - ion_state.conductivity = oion->gX_.data(); + auto& ion_state_ = store.ion_states_[idx]; // arb_ion_state + ion_state_ = {0}; + ion_state_.current_density = oion->iX_.data(); + ion_state_.reversal_potential = oion->eX_.data(); + ion_state_.internal_concentration = oion->Xi_.data(); + ion_state_.external_concentration = oion->Xo_.data(); + ion_state_.diffusive_concentration = oion->Xd_.data(); + ion_state_.ionic_charge = oion->charge.data(); + ion_state_.conductivity = oion->gX_.data(); + + n_ions_with_written_xd += m.mech_.ions[idx].write_diff_concentration; } // If there are no sites (is this ever meaningful?) there is nothing more to do. @@ -272,7 +281,7 @@ void shared_state::instantiate(mechanism& m, // Allocate and initialize state and parameter vectors with default values. { // Allocate bulk storage - std::size_t count = (m.mech_.n_state_vars + m.mech_.n_parameters + 1)*width_padded + m.mech_.n_globals; + std::size_t count = (m.mech_.n_state_vars + m.mech_.n_parameters + 1 + n_ions_with_written_xd)*width_padded + m.mech_.n_globals; store.data_ = array(count, NAN); chunk_writer writer(store.data_.data(), width_padded); @@ -295,6 +304,11 @@ void shared_state::instantiate(mechanism& m, for (auto idx: make_span(m.mech_.n_state_vars)) { store.state_vars_[idx] = writer.fill(m.mech_.state_vars[idx].default_value); } + // Set diffusive concentration deltas if needed + for (auto idx: make_span(m.mech_.n_ions)) { + store.ion_states_[idx].diffusive_concentration_delta = + m.mech_.ions[idx].write_diff_concentration ? writer.fill(0) : nullptr; + } // Assign global scalar parameters. NB: Last chunk, since it breaks the width striding. for (auto idx: make_span(m.mech_.n_globals)) store.globals_[idx] = m.mech_.globals[idx].default_value; for (auto& [k, v]: overrides.globals) { @@ -332,6 +346,31 @@ void shared_state::instantiate(mechanism& m, auto indices = util::index_into(pos_data.cv, ni); std::vector mech_ion_index(indices.begin(), indices.end()); store.ion_states_[idx].index = writer.append_with_padding(mech_ion_index, 0); + + if (m.mech_.ions[idx].write_diff_concentration) { + auto& Xd_contribution_map = oion->Xd_contribution_map; + auto& Xd_contribution = oion->Xd_contribution; + auto& Xd_index = oion->Xd_index; + auto& Xd_contribution_d = oion->Xd_contribution_d; + auto& Xd_index_d = oion->Xd_index_d; + util::append( + Xd_contribution_map, + util::transform_view( + util::make_span(width), + [mech_ion_index, d = store.ion_states_[idx].diffusive_concentration_delta](const auto& i) { + return std::make_pair(d+i, mech_ion_index[i]); + })); + // sort contribution map according to index and transpose from AoS to SoA + util::stable_sort_by(Xd_contribution_map, [](const auto& p) { return p.second; }); + Xd_contribution.clear(); Xd_contribution.reserve(Xd_contribution_map.size()); + Xd_index.clear(); Xd_index.reserve(Xd_contribution_map.size()); + for (auto [ptr, idx_] : Xd_contribution_map) { + Xd_contribution.push_back(ptr); + Xd_index.push_back(idx_); + } + Xd_contribution_d = memory::on_gpu(Xd_contribution); + Xd_index_d = memory::on_gpu(Xd_index); + } } m.ppack_.multiplicity = mult_in_place? writer.append_with_padding(pos_data.multiplicity, 0): nullptr; diff --git a/arbor/backends/gpu/shared_state.cu b/arbor/backends/gpu/shared_state.cu index 403067d70f..e87866b1d3 100644 --- a/arbor/backends/gpu/shared_state.cu +++ b/arbor/backends/gpu/shared_state.cu @@ -7,6 +7,7 @@ #include #include +#include namespace arb { namespace gpu { @@ -40,6 +41,20 @@ __global__ void take_samples_impl( } } +__global__ void apply_diffusive_concentration_delta_impl( + std::size_t n, + arb_value_type * __restrict__ const Xd, + arb_value_type * const * __restrict__ const Xd_contribution, + arb_index_type const * __restrict__ const Xd_index) +{ + const unsigned i = threadIdx.x+blockIdx.x*blockDim.x; + const unsigned mask = gpu::ballot(0xffffffff, i> Xd_contribution_map; + std::vector Xd_contribution; + std::vector Xd_index; + memory::device_vector Xd_contribution_d; + memory::device_vector Xd_index_d; + solver_ptr solver = nullptr; ion_state() = default; @@ -224,6 +230,8 @@ struct ARB_ARBOR_API shared_state: shared_state_base>>&); + void apply_diffusive_concentration_delta(); + void update_prng_state(mechanism&); void zero_currents(); diff --git a/arbor/backends/multicore/shared_state.cpp b/arbor/backends/multicore/shared_state.cpp index a32ea19b0f..dff975f589 100644 --- a/arbor/backends/multicore/shared_state.cpp +++ b/arbor/backends/multicore/shared_state.cpp @@ -329,6 +329,17 @@ std::size_t extend_width(const arb::mechanism& mech, std::size_t width) { } } // anonymous namespace +void shared_state::apply_diffusive_concentration_delta() { + for (auto& [name, state] : ion_data) { + auto* Xd_ = state.Xd_.data(); + const auto& Xd_c = state.Xd_contribution; + const auto& Xd_i = state.Xd_index; + for (auto i : util::count_along(Xd_c)) { + Xd_[Xd_i[i]] += std::exchange(*Xd_c[i], 0); + } + } +} + void shared_state::update_prng_state(mechanism& m) { if (!m.mech_.n_random_variables) return; const auto mech_id = m.mechanism_id(); @@ -418,6 +429,7 @@ void shared_state::instantiate(arb::mechanism& m, store.ion_states_.resize(m.mech_.n_ions); m.ppack_.ion_states = store.ion_states_.data(); // Set ion views + arb_size_type n_ions_with_written_xd = 0; for (auto idx: make_span(m.mech_.n_ions)) { auto ion = m.mech_.ions[idx].name; auto ion_binding = value_by_key(overrides.ion_rebind, ion).value_or(ion); @@ -433,6 +445,8 @@ void shared_state::instantiate(arb::mechanism& m, ion_state.diffusive_concentration = oion->Xd_.data(); ion_state.ionic_charge = oion->charge.data(); ion_state.conductivity = oion->gX_.data(); + + n_ions_with_written_xd += m.mech_.ions[idx].write_diff_concentration; } // Initialize state and parameter vectors with default values. @@ -446,7 +460,7 @@ void shared_state::instantiate(arb::mechanism& m, // Allocate bulk storage std::size_t value_width_padded = extend_width(m, pos_data.cv.size()); store.value_width_padded = value_width_padded; - std::size_t count = (m.mech_.n_state_vars + m.mech_.n_parameters + 1 + + std::size_t count = (m.mech_.n_state_vars + m.mech_.n_parameters + 1 + n_ions_with_written_xd + random_number_storage)*value_width_padded + m.mech_.n_globals; store.data_ = array(count, NAN, pad); chunk_writer writer(store.data_.data(), value_width_padded); @@ -470,6 +484,11 @@ void shared_state::instantiate(arb::mechanism& m, for (auto idx: make_span(m.mech_.n_state_vars)) { m.ppack_.state_vars[idx] = writer.fill(m.mech_.state_vars[idx].default_value); } + // Set diffusive concentration deltas if needed + for (auto idx: make_span(m.mech_.n_ions)) { + m.ppack_.ion_states[idx].diffusive_concentration_delta = + m.mech_.ions[idx].write_diff_concentration ? writer.fill(0) : nullptr; + } // Set random numbers for (auto idx_v: make_span(num_random_numbers_per_cv)) for (auto idx_c: make_span(cbprng::cache_size())) @@ -530,7 +549,30 @@ void shared_state::instantiate(arb::mechanism& m, m.ppack_.ion_states[idx].index = writer.append(indices, util::back(indices)); // Check SIMD constraints arb_assert(compatible_index_constraints(node_index, util::range_n(m.ppack_.ion_states[idx].index, index_width_padded), m.iface_.partition_width)); + + if (m.mech_.ions[idx].write_diff_concentration) { + auto mech_ion_index = m.ppack_.ion_states[idx].index; + auto& Xd_contribution_map = oion->Xd_contribution_map; + auto& Xd_contribution = oion->Xd_contribution; + auto& Xd_index = oion->Xd_index; + util::append( + Xd_contribution_map, + util::transform_view( + util::make_span(width), + [mech_ion_index, d = store.ion_states_[idx].diffusive_concentration_delta](const auto& i) { + return std::make_pair(d+i, mech_ion_index[i]); + })); + // sort contribution map according to index and transpose from AoS to SoA + util::stable_sort_by(Xd_contribution_map, [](const auto& p) { return p.second; }); + Xd_contribution.clear(); Xd_contribution.reserve(Xd_contribution_map.size()); + Xd_index.clear(); Xd_index.reserve(Xd_contribution_map.size()); + for (auto [ptr, idx] : Xd_contribution_map) { + Xd_contribution.push_back(ptr); + Xd_index.push_back(idx); + } + } } + if (mult_in_place) m.ppack_.multiplicity = writer.append(pos_data.multiplicity, 0); // `peer_index` holds the peer CV of each CV in node_index. // Peer CVs are only filled for gap junction mechanisms. They are used diff --git a/arbor/backends/multicore/shared_state.hpp b/arbor/backends/multicore/shared_state.hpp index 6d0d49c804..d132a1e012 100644 --- a/arbor/backends/multicore/shared_state.hpp +++ b/arbor/backends/multicore/shared_state.hpp @@ -73,6 +73,10 @@ struct ARB_ARBOR_API ion_state { array charge; // charge of ionic species (global value, length 1) + std::vector> Xd_contribution_map; + std::vector Xd_contribution; + std::vector Xd_index; + solver_ptr solver = nullptr; ion_state() = default; @@ -231,6 +235,8 @@ struct ARB_ARBOR_API shared_state: const mechanism_layout&, const std::vector>>&); + void apply_diffusive_concentration_delta(); + void update_prng_state(mechanism&); void zero_currents(); diff --git a/arbor/fvm_lowered_cell_impl.hpp b/arbor/fvm_lowered_cell_impl.hpp index 94716b21c5..0e68cfcf48 100644 --- a/arbor/fvm_lowered_cell_impl.hpp +++ b/arbor/fvm_lowered_cell_impl.hpp @@ -213,6 +213,8 @@ fvm_integration_result fvm_lowered_cell_impl::integrate( m->update_current(); } + state_->apply_diffusive_concentration_delta(); + // Add stimulus current contributions. // NOTE: performed after dt, time_to calculation, in case we want to // use mean current contributions as opposed to point sample. @@ -236,11 +238,15 @@ fvm_integration_result fvm_lowered_cell_impl::integrate( m->update_state(); } + state_->apply_diffusive_concentration_delta(); + // Update ion concentrations. PE(advance:integrate:ionupdate); update_ion_state(); PL(); + // TODO: can `write_ions` affect xd? + // voltage mechs run now; after the cable_solver, but before the // threshold test for (auto& m: voltage_mechanisms_) { diff --git a/arbor/include/arbor/gpu/cuda_api.hpp b/arbor/include/arbor/gpu/cuda_api.hpp index 48bc96f262..532b9ec8f9 100644 --- a/arbor/include/arbor/gpu/cuda_api.hpp +++ b/arbor/include/arbor/gpu/cuda_api.hpp @@ -139,17 +139,6 @@ inline float gpu_atomic_sub(float* address, float val) { /// Warp-Level Primitives -__device__ __inline__ double shfl(unsigned mask, double x, int lane) -{ - auto tmp = static_cast(x); - auto lo = static_cast(tmp); - auto hi = static_cast(tmp >> 32); - hi = __shfl_sync(mask, static_cast(hi), lane, warpSize); - lo = __shfl_sync(mask, static_cast(lo), lane, warpSize); - return static_cast(static_cast(hi) << 32 | - static_cast(lo)); -} - __device__ __inline__ unsigned ballot(unsigned mask, unsigned is_root) { return __ballot_sync(mask, is_root); } @@ -158,24 +147,15 @@ __device__ __inline__ unsigned any(unsigned mask, unsigned width) { return __any_sync(mask, width); } -#ifdef __NVCC__ -__device__ __inline__ double shfl_up(unsigned mask, int idx, unsigned lane_id, unsigned shift) { - return __shfl_up_sync(mask, idx, shift); -} - -__device__ __inline__ double shfl_down(unsigned mask, int idx, unsigned lane_id, unsigned shift) { - return __shfl_down_sync(mask, idx, shift); +template +__device__ __inline__ T shfl_up(unsigned mask, T var, unsigned lane_id, unsigned shift) { + return __shfl_up_sync(mask, var, shift); } -#else -__device__ __inline__ double shfl_up(unsigned mask, int idx, unsigned lane_id, unsigned shift) { - return shfl(mask, idx, lane_id - shift); +template +__device__ __inline__ T shfl_down(unsigned mask, T var, unsigned lane_id, unsigned shift) { + return __shfl_down_sync(mask, var, shift); } - -__device__ __inline__ double shfl_down(unsigned mask, int idx, unsigned lane_id, unsigned shift) { - return shfl(mask, idx, lane_id + shift); -} -#endif #endif } // namespace gpu diff --git a/arbor/include/arbor/gpu/hip_api.hpp b/arbor/include/arbor/gpu/hip_api.hpp index fbdba3151f..019f235a8e 100644 --- a/arbor/include/arbor/gpu/hip_api.hpp +++ b/arbor/include/arbor/gpu/hip_api.hpp @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -118,6 +119,14 @@ inline float gpu_atomic_sub(float* address, float val) { /// Warp-level Primitives +template +__device__ __inline__ +std::enable_if_t< !std::is_same_v, double>, std::decay_t> +shfl(T x, int lane) +{ + return __shfl(x, lane); +} + __device__ __inline__ double shfl(double x, int lane) { auto tmp = static_cast(x); @@ -137,12 +146,14 @@ __device__ __inline__ unsigned any(unsigned mask, unsigned width) { return __any(width); } -__device__ __inline__ double shfl_up(unsigned mask, int idx, unsigned lane_id, unsigned shift) { - return shfl(idx, lane_id - shift); +template +__device__ __inline__ T shfl_up(unsigned mask, T var, unsigned lane_id, unsigned shift) { + return shfl(var, (int)lane_id - shift); } -__device__ __inline__ double shfl_down(unsigned mask, int idx, unsigned lane_id, unsigned shift) { - return shfl(idx, lane_id + shift); +template +__device__ __inline__ T shfl_down(unsigned mask, T var, unsigned lane_id, unsigned shift) { + return shfl(var, (int)lane_id + shift); } } // namespace gpu diff --git a/arbor/include/arbor/mechanism_abi.h b/arbor/include/arbor/mechanism_abi.h index bf0a9b30ba..76e9a46917 100644 --- a/arbor/include/arbor/mechanism_abi.h +++ b/arbor/include/arbor/mechanism_abi.h @@ -20,7 +20,7 @@ extern "C" { // Version #define ARB_MECH_ABI_VERSION_MAJOR 0 -#define ARB_MECH_ABI_VERSION_MINOR 6 +#define ARB_MECH_ABI_VERSION_MINOR 7 #define ARB_MECH_ABI_VERSION_PATCH 0 #define ARB_MECH_ABI_VERSION ((ARB_MECH_ABI_VERSION_MAJOR * 10000L * 10000L) + (ARB_MECH_ABI_VERSION_MAJOR * 10000L) + ARB_MECH_ABI_VERSION_PATCH) @@ -59,6 +59,7 @@ typedef struct arb_ion_state { arb_value_type* internal_concentration; arb_value_type* external_concentration; arb_value_type* diffusive_concentration; + arb_value_type* diffusive_concentration_delta; arb_value_type* ionic_charge; arb_index_type* index; } arb_ion_state; @@ -193,6 +194,7 @@ typedef struct arb_ion_info { const char* name; bool write_int_concentration; bool write_ext_concentration; + bool write_diff_concentration; bool use_diff_concentration; bool write_rev_potential; bool read_rev_potential; diff --git a/modcc/blocks.hpp b/modcc/blocks.hpp index 1e12b54287..ef2a001cf5 100644 --- a/modcc/blocks.hpp +++ b/modcc/blocks.hpp @@ -45,6 +45,9 @@ struct IonDep { bool writes_concentration_int() const { return writes_variable(name + "i"); }; + bool writes_concentration_diff() const { + return writes_variable(name + "d"); + }; bool writes_concentration_ext() const { return writes_variable(name + "o"); }; diff --git a/modcc/printer/cprinter.cpp b/modcc/printer/cprinter.cpp index 36ec914e6d..59fd8eb562 100644 --- a/modcc/printer/cprinter.cpp +++ b/modcc/printer/cprinter.cpp @@ -447,6 +447,7 @@ static std::string index_i_name(const std::string& index_var) { namespace { // Access through ppack std::string data_via_ppack(const indexed_variable_info& i) { return pp_var_pfx + i.data_var; } + std::string delta_data_via_ppack(const indexed_variable_info& i) { return pp_var_pfx + i.data_var + "_delta"; } std::string node_index_i_name(const indexed_variable_info& i) { return i.node_index_var + "i_"; } std::string source_index_i_name(const index_prop& i) { return i.source_var + "i_"; } std::string source_var(const index_prop& i) { return pp_var_pfx + i.source_var; } @@ -464,6 +465,15 @@ namespace { return o << data_via_ppack(wrap.d) << '[' << (wrap.d.scalar() ? "0": i_name) << ']'; } }; + + struct deref_delta { + indexed_variable_info d; + deref_delta(indexed_variable_info d): d(d) {} + + friend std::ostream& operator<<(std::ostream& o, const deref_delta& wrap) { + return o << delta_data_via_ppack(wrap.d) << '[' << (wrap.d.scalar() ? "0": "i_") << ']'; + } + }; } // Return the indices that need to be read at the beginning @@ -494,7 +504,7 @@ std::list gather_indexed_vars(const std::vector& ind indices.push_back(outer_index_prop); } } - else { + else if (!d.outer_index_var().empty()) { // Need to read 1 index: outer[index] index_prop outer_index_prop = {d.outer_index_var(), index, d.index_var_kind}; auto it = std::find(indices.begin(), indices.end(), outer_index_prop); @@ -562,9 +572,13 @@ void emit_api_body(std::ostream& out, APIMethod* method, const ApiFlags& flags) std::stringstream v; v << deref(d); var = v.str(); } if (d.additive && flags.use_additive) { + std::string delta_var; + { + std::stringstream v; v << deref_delta(d); delta_var = v.str(); + } out << fmt::format("{3} -= {0};\n" - "{0} = fma({1}{2}, {3}, {0});\n", - var, scale, weight, name); + "{4} = {1}{2}*{3};\n", + var, scale, weight, name, delta_var); } else if (write_voltage) { // SAFETY: we only ever allow *one* V-PROCESS per CV, so this is OK. @@ -783,6 +797,7 @@ void emit_simd_state_update(std::ostream& out, auto ext = external->name(); auto name = from->name(); auto data = data_via_ppack(d); + auto delta_data = delta_data_via_ppack(d); auto node = node_index_i_name(d); auto index = index_i_name(d.outer_index_var()); @@ -801,26 +816,31 @@ void emit_simd_state_update(std::ostream& out, if (d.additive && flags.use_additive) { if (d.index_var_kind == index_kind::node) { if (constraint == simd_expr_constraint::contiguous) { - out << fmt::format("indirect({} + {}, simd_width_) = S::mul({}, {});\n", - data, node, weight, scaled); + // We need this instead of simple assignment! + out << fmt::format("{{\n" + " simd_value t_{0}0_ = {0};\n" + " assign(t_{0}0_, indirect({1} + {2}, simd_width_));\n" + " {0} = S::sub({0}, t_{0}0_);\n" + " indirect({5} + index_, simd_width_) = S::mul({3}, {4});\n" + "}}\n", + name, data, node, weight, scaled, delta_data); } else { - // We need this instead of simple assignment! - out << fmt::format("{{\n" - " simd_value t_{}0_ = simd_cast(0.0);\n" - " assign(t_{}0_, indirect({}, simd_cast({}), simd_width_, constraint_category_));\n" - " {} = S::sub({}, t_{}0_);\n" - " indirect({}, simd_cast({}), simd_width_, constraint_category_) += S::mul({}, {});\n" - "}}\n", - name, - name, data, node, - scaled, scaled, name, - data, node, weight, scaled); + // We need this instead of simple assignment! + out << fmt::format("{{\n" + " simd_value t_{0}0_ = {0};\n" + " assign(t_{0}0_, indirect({1}, simd_cast({2}), simd_width_, constraint_category_));\n" + " {0} = S::sub({0}, t_{0}0_);\n" + " indirect({5} + index_, simd_width_) = S::mul({3}, {4});\n" + "}}\n", + name, data, node, weight, scaled, delta_data); } } else { - out << fmt::format("indirect({}, {}, simd_width_, index_constraint::none) = S::mul({}, {});\n", - data, index, weight, scaled); + //out << fmt::format("indirect({}, {}, simd_width_, index_constraint::none) = S::mul({}, {});\n", + // data, index, weight, scaled); + //only additive node variables allowed + throw compiler_exception("Cannot write to additive non-node variables: "+external->to_string()); } } else if (write_voltage) { diff --git a/modcc/printer/gpuprinter.cpp b/modcc/printer/gpuprinter.cpp index 04857c0aec..10a0f4cdd2 100644 --- a/modcc/printer/gpuprinter.cpp +++ b/modcc/printer/gpuprinter.cpp @@ -217,7 +217,7 @@ ARB_LIBMODCC_API std::string emit_gpu_cu_source(const Module& module_, const pri "}}\n\n"), pp_var_pfx); } - emit_api_kernel(state_api); + emit_api_kernel(state_api, true); emit_api_kernel(current_api, true); emit_api_kernel(write_ions_api); @@ -232,11 +232,12 @@ ARB_LIBMODCC_API std::string emit_gpu_cu_source(const Module& module_, const pri " const auto tid_ = (begin_ + ii_)->mech_index;\n" " if ((ii_ > 0) && ((begin_ + (ii_ - 1))->mech_index == tid_)) return;\n" " for (auto i_ = begin_ + ii_; i_ < end_; ++i_) {{\n" + " int n_ = 0;\n" " if (i_->mech_index != tid_) break;\n" " [[maybe_unused]] auto {0} = i_->weight;\n"), net_receive_api->args().empty() ? "weight" : net_receive_api->args().front()->is_argument()->name()); out << indent << indent << indent; - emit_api_body_cu(out, net_receive_api, ApiFlags{}.point(is_point_proc).loop(false).iface(false)); + emit_api_body_cu(out, net_receive_api, ApiFlags{}.point(is_point_proc).additive(true).loop(false).iface(false)); out << popindent << "}\n" << popindent << "}\n" << popindent << "}\n\n"; } @@ -433,6 +434,16 @@ namespace { << (wrap.v.scalar()? "0": index_i_name(index_var)) << ']'; } }; + + struct deref_delta { + indexed_variable_info v; + deref_delta(indexed_variable_info v): v(v) {} + + friend std::ostream& operator<<(std::ostream& o, const deref_delta& wrap) { + return o << pp_var_pfx + wrap.v.data_var + "_delta" << '[' + << (wrap.v.scalar()? "0": "tid_") << ']'; + } + }; } void emit_state_read_cu(std::ostream& out, LocalVariable* local, const ApiFlags& flags) { @@ -475,12 +486,7 @@ void emit_state_update_cu(std::ostream& out, if (d.additive && flags.use_additive) { out << name << " -= " << var << ";\n"; - if (flags.is_point) { - out << fmt::format("::arb::gpu::reduce_by_key({}*{}, {}, {}, lane_mask_);\n", weight, name, data, index); - } - else { - out << var << " = fma(" << weight << ", " << name << ", " << var << ");\n"; - } + out << deref_delta(d) << " = " << weight << "*" << name << ";\n"; } else if (write_voltage) { /* SAFETY: diff --git a/modcc/printer/infoprinter.cpp b/modcc/printer/infoprinter.cpp index 596b7b80d0..97e7637aa3 100644 --- a/modcc/printer/infoprinter.cpp +++ b/modcc/printer/infoprinter.cpp @@ -52,10 +52,10 @@ ARB_LIBMODCC_API std::string build_info_header(const Module& m, const printer_op id.unit_string(), val, lo, hi); }; auto fmt_ion = [&](const auto& ion) { - return fmt::format(FMT_COMPILE("{{ \"{}\", {}, {}, {}, {}, {}, {}, {}, {} }}"), + return fmt::format(FMT_COMPILE("{{ \"{}\", {}, {}, {}, {}, {}, {}, {}, {}, {} }}"), ion.name, ion.writes_concentration_int(), ion.writes_concentration_ext(), - ion.uses_concentration_diff(), + ion.writes_concentration_diff(), ion.uses_concentration_diff(), ion.writes_rev_potential(), ion.uses_rev_potential(), ion.uses_valence(), ion.verifies_valence(), ion.expected_valence); }; diff --git a/python/test/fixtures.py b/python/test/fixtures.py index 11b9011750..5e80b41d59 100644 --- a/python/test/fixtures.py +++ b/python/test/fixtures.py @@ -173,6 +173,19 @@ def dummy_catalogue(repo_path): return arbor.load_catalogue(str(cat_path)) +@_singleton_fixture +@repo_path() +def diffusion_catalogue(repo_path): + """ + Fixture that returns an `arbor.catalogue` + which contains mechanisms `neuron_with_diffusion` + and `synapse_with_diffusion`. + """ + path = repo_path / "test" / "unit" / "diffusion" + cat_path = _build_cat("diffusion", path) + return arbor.load_catalogue(str(cat_path)) + + @_fixture class empty_recipe(arbor.recipe): """ diff --git a/python/test/readme.md b/python/test/readme.md index 515366df87..ac4cc673e0 100644 --- a/python/test/readme.md +++ b/python/test/readme.md @@ -39,6 +39,6 @@ In subfolders `unit`/`unit_distributed`: ## Naming convention -- modules: `test_xxxs` (ending with `s` since module can consist of multiple classes) -- class(es): `TestXxxs` (ending with `s` since class can consist of multiple test functions) +- modules: `test_xxxs` (if applicable use the plural with `s` as modules can comprise multiple classes) +- class(es): `TestXxxs` (if applicable use the plural with `s` as classes can comprise multiple test functions) - functions: `test_yyy` diff --git a/python/test/unit/test_diffusion.py b/python/test/unit/test_diffusion.py new file mode 100644 index 0000000000..476a50f4c8 --- /dev/null +++ b/python/test/unit/test_diffusion.py @@ -0,0 +1,476 @@ +# -*- coding: utf-8 -*- + +import unittest +import arbor as A +import numpy as np +from .. import fixtures + +""" +Tests for the concentration and amount of diffusive particles across time and morphology. +Three different morphological structures are considered: 1 segment ("soma only"), 2 segments +("soma with dendrite"), and 3 segments ("soma with two dendrites"). + +NOTE: Internally, Arbor only knows concentrations. Thus, particle amounts have to be computed + from concentrations by integrating over the volume of the morphology. The total amount + of particles should be conserved unless there is deliberate injection or removal of + particles. +""" + + +# --------------------------------------------------------------------------------------- +# recipe class +class recipe(A.recipe): + # Constructor + # - cat: catalogue of custom mechanisms + # - cell: cell description + # - probes: list of probes + # - inject_remove: list of dictionaries of the form [ {"time" :