Skip to content

Commit

Permalink
Switch std::string -> hashes for label resolution. (#2215)
Browse files Browse the repository at this point in the history
Use the simple but well-known FNV-1a hash function to map
`cell_tag_type` aka `std::string` to an `uint64_t`
for label resolution. The former type has a size of 32B or more and the
latter 8B, thus cutting the storage and bandwidth
requirements by 3/4. 

The hash function is implemented from the reference given on the
authors' website/wikipedia and is extremely
simple. If we ever experience issues, we might consider switching this
to something of higher quality via an
external library, candidates are `xxHASH` and `Murmur3`.

https://github.com/Cyan4973/xxHash

Note: This should further relieve the memory pressure on larger scale
simulation as formulated in #1969 and make
#2005 less urgent.

There is no performance impact (at laptop scale), but the memory savings
are worth it.
  • Loading branch information
thorstenhater authored Nov 29, 2023
1 parent db868d3 commit c391775
Show file tree
Hide file tree
Showing 21 changed files with 339 additions and 212 deletions.
5 changes: 2 additions & 3 deletions arbor/benchmark_cell_group.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#include <chrono>
#include <exception>

#include <arbor/arbexcept.hpp>
#include <arbor/benchmark_cell.hpp>
Expand Down Expand Up @@ -40,8 +39,8 @@ benchmark_cell_group::benchmark_cell_group(const std::vector<cell_gid_type>& gid
for (const auto& c: cells_) {
cg_sources.add_cell();
cg_targets.add_cell();
cg_sources.add_label(c.source, {0, 1});
cg_targets.add_label(c.target, {0, 1});
cg_sources.add_label(hash_value(c.source), {0, 1});
cg_targets.add_label(hash_value(c.target), {0, 1});
}

benchmark_cell_group::reset();
Expand Down
18 changes: 12 additions & 6 deletions arbor/cable_cell.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ struct cable_cell_impl {
decor decorations;

// The placeable label to lid_range map
dynamic_typed_map<constant_type<std::unordered_multimap<cell_tag_type, lid_range>>::type> labeled_lid_ranges;
dynamic_typed_map<constant_type<std::unordered_multimap<hash_type, lid_range>>::type> labeled_lid_ranges;

cable_cell_impl(const arb::morphology& m, const label_dict& labels, const decor& decorations):
provider(m, labels),
Expand Down Expand Up @@ -120,7 +120,7 @@ struct cable_cell_impl {
}

template <typename Item>
void place(const locset& ls, const Item& item, const cell_tag_type& label) {
void place(const locset& ls, const Item& item, const hash_type& label) {
auto& mm = get_location_map(item);
cell_lid_type& lid = placed_count.get<Item>();
cell_lid_type first = lid;
Expand Down Expand Up @@ -226,7 +226,8 @@ void cable_cell_impl::init(const decor& d) {
for (const auto& p: d.placements()) {
auto& where = std::get<0>(p);
auto& label = std::get<2>(p);
std::visit([this, &where, &label] (auto&& what) {return this->place(where, what, label);}, std::get<1>(p));
std::visit([this, &where, &label] (auto&& what) {return this->place(where, what, label); },
std::get<1>(p));
}
}

Expand Down Expand Up @@ -280,16 +281,21 @@ const cable_cell_parameter_set& cable_cell::default_parameters() const {
return impl_->decorations.defaults();
}

const std::unordered_multimap<cell_tag_type, lid_range>& cable_cell::detector_ranges() const {
const cable_cell::lid_range_map& cable_cell::detector_ranges() const {
return impl_->labeled_lid_ranges.get<threshold_detector>();
}

const std::unordered_multimap<cell_tag_type, lid_range>& cable_cell::synapse_ranges() const {
const cable_cell::lid_range_map& cable_cell::synapse_ranges() const {
return impl_->labeled_lid_ranges.get<synapse>();
}

const std::unordered_multimap<cell_tag_type, lid_range>& cable_cell::junction_ranges() const {
const cable_cell::lid_range_map& cable_cell::junction_ranges() const {
return impl_->labeled_lid_ranges.get<junction>();
}

cell_tag_type decor::tag_of(hash_type hash) const {
if (!hashes_.count(hash)) throw arbor_internal_error{util::pprintf("Unknown hash for {}.", std::to_string(hash))};
return hashes_.at(hash);
}

} // namespace arb
12 changes: 8 additions & 4 deletions arbor/cable_cell_param.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
#include <cfloat>
#include <cmath>
#include <memory>
#include <numeric>
#include <vector>
#include <variant>
#include <tuple>
Expand All @@ -10,7 +7,9 @@
#include <arbor/cable_cell_param.hpp>
#include <arbor/s_expr.hpp>

#include <arbor/util/hash_def.hpp>
#include "util/maputil.hpp"
#include "util/strprintf.hpp"

namespace arb {

Expand Down Expand Up @@ -120,7 +119,12 @@ decor& decor::paint(region where, paintable what) {
}

decor& decor::place(locset where, placeable what, cell_tag_type label) {
placements_.emplace_back(std::move(where), std::move(what), std::move(label));
auto hash = hash_value(label);
if (hashes_.count(hash) && hashes_.at(hash) != label) {
throw arbor_internal_error{util::strprintf("Hash collision {} ./. {}", label, hashes_.at(hash))};
}
placements_.emplace_back(std::move(where), std::move(what), hash);
hashes_.emplace(hash, label);
return *this;
}

Expand Down
12 changes: 5 additions & 7 deletions arbor/communication/mpi_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,11 @@ struct mpi_context_impl {
}

cell_label_range gather_cell_label_range(const cell_label_range& local_ranges) const {
std::vector<cell_size_type> sizes;
std::vector<cell_tag_type> labels;
std::vector<lid_range> ranges;
sizes = mpi::gather_all(local_ranges.sizes(), comm_);
labels = mpi::gather_all(local_ranges.labels(), comm_);
ranges = mpi::gather_all(local_ranges.ranges(), comm_);
return cell_label_range(sizes, labels, ranges);
cell_label_range res;
res.sizes = mpi::gather_all(local_ranges.sizes, comm_);
res.labels = mpi::gather_all(local_ranges.labels, comm_);
res.ranges = mpi::gather_all(local_ranges.ranges, comm_);
return res;
}

cell_labels_and_gids gather_cell_labels_and_gids(const cell_labels_and_gids& local_labels_and_gids) const {
Expand Down
61 changes: 27 additions & 34 deletions arbor/fvm_lowered_cell_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -315,15 +315,27 @@ fvm_detector_info get_detector_info(arb_size_type max,
return { max, std::move(cv), std::move(threshold), ctx };
}

template <typename Backend>
void fvm_lowered_cell_impl<Backend>::add_probes(const std::vector<cell_gid_type>& gids,
const std::vector<cable_cell>& cells,
const recipe& rec,
const fvm_cv_discretization& D,
const std::unordered_map<std::string, mechanism*>& mechptr_by_name,
const fvm_mechanism_data& mech_data,
const std::vector<target_handle>& target_handles,
probe_association_map& probe_map) {
inline cell_size_type
add_labels(cell_label_range& clr, const cable_cell::lid_range_map& ranges) {
clr.add_cell();
cell_size_type count = 0;
std::unordered_map<hash_type, cell_tag_type> hashes;
for (const auto& [label, range]: ranges) {
clr.add_label(label, range);
count += (range.end - range.begin);
}
return count;
}

template <typename Backend> void
fvm_lowered_cell_impl<Backend>::add_probes(const std::vector<cell_gid_type>& gids,
const std::vector<cable_cell>& cells,
const recipe& rec,
const fvm_cv_discretization& D,
const std::unordered_map<std::string, mechanism*>& mechptr_by_name,
const fvm_mechanism_data& mech_data,
const std::vector<target_handle>& target_handles,
probe_association_map& probe_map) {
auto ncell = gids.size();

std::vector<fvm_probe_data> probe_data;
Expand All @@ -343,9 +355,9 @@ void fvm_lowered_cell_impl<Backend>::add_probes(const std::vector<cell_gid_type>
}
}

template <typename Backend>
fvm_initialization_data fvm_lowered_cell_impl<Backend>::initialize(const std::vector<cell_gid_type>& gids,
const recipe& rec) {
template <typename Backend> fvm_initialization_data
fvm_lowered_cell_impl<Backend>::initialize(const std::vector<cell_gid_type>& gids,
const recipe& rec) {
using std::any_cast;
using util::count_along;
using util::make_span;
Expand Down Expand Up @@ -375,28 +387,9 @@ fvm_initialization_data fvm_lowered_cell_impl<Backend>::initialize(const std::ve
for (auto i : util::make_span(ncell)) {
auto gid = gids[i];
const auto& c = cells[i];

fvm_info.source_data.add_cell();
fvm_info.target_data.add_cell();
fvm_info.gap_junction_data.add_cell();

unsigned count = 0;
for (const auto& [label, range]: c.detector_ranges()) {
fvm_info.source_data.add_label(label, range);
count+=(range.end - range.begin);
}
fvm_info.num_sources[gid] = count;

count = 0;
for (const auto& [label, range]: c.synapse_ranges()) {
fvm_info.target_data.add_label(label, range);
count+=(range.end - range.begin);
}
fvm_info.num_targets[gid] = count;

for (const auto& [label, range]: c.junction_ranges()) {
fvm_info.gap_junction_data.add_label(label, range);
}
fvm_info.num_sources[gid] = add_labels(fvm_info.source_data, c.detector_ranges());
fvm_info.num_targets[gid] = add_labels(fvm_info.target_data, c.synapse_ranges());
add_labels(fvm_info.gap_junction_data, c.junction_ranges());
}

cable_cell_global_properties global_props;
Expand Down
10 changes: 5 additions & 5 deletions arbor/include/arbor/cable_cell.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -246,8 +246,8 @@ using cable_cell_location_map = static_typed_map<location_assignment,
synapse, junction, i_clamp, threshold_detector>;

// High-level abstract representation of a cell.
class ARB_SYMBOL_VISIBLE cable_cell {
public:
struct ARB_SYMBOL_VISIBLE cable_cell {
using lid_range_map = std::unordered_multimap<hash_type, lid_range>;
using index_type = cell_lid_type;
using size_type = cell_local_size_type;
using value_type = double;
Expand Down Expand Up @@ -311,9 +311,9 @@ class ARB_SYMBOL_VISIBLE cable_cell {
const cable_cell_parameter_set& default_parameters() const;

// The labeled lid_ranges of sources, targets and gap_junctions on the cell;
const std::unordered_multimap<cell_tag_type, lid_range>& detector_ranges() const;
const std::unordered_multimap<cell_tag_type, lid_range>& synapse_ranges() const;
const std::unordered_multimap<cell_tag_type, lid_range>& junction_ranges() const;
const lid_range_map& detector_ranges() const;
const lid_range_map& synapse_ranges() const;
const lid_range_map& junction_ranges() const;

private:
std::unique_ptr<cable_cell_impl, void (*)(cable_cell_impl*)> impl_;
Expand Down
7 changes: 4 additions & 3 deletions arbor/include/arbor/cable_cell_param.hpp
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
#pragma once

#include <cmath>
#include <memory>
#include <optional>
#include <unordered_map>
#include <string>
#include <variant>
#include <any>

#include <arbor/export.hpp>
#include <arbor/arbexcept.hpp>
Expand Down Expand Up @@ -313,8 +311,9 @@ struct ARB_ARBOR_API cable_cell_parameter_set {
// are to be applied to a morphology in a cable_cell.
class ARB_ARBOR_API decor {
std::vector<std::pair<region, paintable>> paintings_;
std::vector<std::tuple<locset, placeable, cell_tag_type>> placements_;
std::vector<std::tuple<locset, placeable, hash_type>> placements_;
cable_cell_parameter_set defaults_;
std::unordered_map<hash_type, cell_tag_type> hashes_;

public:
const auto& paintings() const {return paintings_; }
Expand All @@ -324,6 +323,8 @@ class ARB_ARBOR_API decor {
decor& paint(region, paintable);
decor& place(locset, placeable, cell_tag_type);
decor& set_default(defaultable);

cell_tag_type tag_of(hash_type) const;
};

ARB_ARBOR_API extern cable_cell_parameter_set neuron_parameter_defaults;
Expand Down
4 changes: 4 additions & 0 deletions arbor/include/arbor/common_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@

namespace arb {

// Internal hashes use this 64bit id

using hash_type = std::size_t;

// For identifying cells globally.

using cell_gid_type = std::uint32_t;
Expand Down
2 changes: 1 addition & 1 deletion arbor/include/arbor/cv_policy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@

namespace arb {

class cable_cell;
struct cable_cell;

struct cv_policy_base {
virtual locset cv_boundary_points(const cable_cell& cell) const = 0;
Expand Down
77 changes: 65 additions & 12 deletions arbor/include/arbor/util/hash_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,27 +17,80 @@
*/

#include <cstddef>
#include <typeindex>
#include <string_view>
#include <functional>

// Helpers for forming hash values of compounds objects.

namespace arb {

inline std::size_t hash_value_combine(std::size_t n) {
return n;
namespace detail {

// Non-cryptographic hash function for mapping strings to internal
// identifiers. Concretely, FNV-1a hash function taken from
//
// http://www.isthe.com/chongo/tech/comp/fnv/index.html
//
// NOTE: It may be worth it considering different hash functions in
// the future that have better characteristic, xxHash or Murmur
// look interesting but are more complex and likely require adding
// external dependencies.
// NOTE: this is the obligatory comment on a better hash function
// that will be here until the end of time.

template <typename T>
inline constexpr std::size_t internal_hash(T&& data) {
using D = std::decay_t<T>;
constexpr std::size_t prime = 0x100000001b3;
constexpr std::size_t offset_basis = 0xcbf29ce484222325;
static_assert(!std::is_pointer_v<D> || std::is_same_v<D, void*> || std::is_convertible_v<T, std::string_view>,
"Pointer types except void* will not be hashed.");
if constexpr (std::is_convertible_v<T, std::string_view>) {
std::size_t hash = offset_basis;
for (uint8_t byte: std::string_view{data}) {
hash = hash ^ byte;
hash = hash * prime;
}
return hash;
}
if constexpr (std::is_integral_v<D>) {
unsigned long long bytes = data;
std::size_t hash = offset_basis;
for (int ix = 0; ix < sizeof(data); ++ix) {
uint8_t byte = bytes & 255;
bytes >>= 8;
hash = hash ^ byte;
hash = hash * prime;
}
return hash;
}
if constexpr (std::is_pointer_v<D>) {
unsigned long long bytes = reinterpret_cast<unsigned long long>(data);
std::size_t hash = offset_basis;
for (int ix = 0; ix < sizeof(data); ++ix) {
uint8_t byte = bytes & 255;
bytes >>= 8;
hash = hash ^ byte;
hash = hash * prime;
}
return hash;
}
return std::hash<D>{}(data);
}

template <typename... T>
std::size_t hash_value_combine(std::size_t n, std::size_t m, T... tail) {
constexpr std::size_t prime2 = 54517;
return hash_value_combine(prime2*n + m, tail...);
inline
std::size_t hash_value_combine(std::size_t n) { return n; }

template <typename T, typename... Ts>
std::size_t hash_value_combine(std::size_t n, const T& head, const Ts&... tail) {
constexpr std::size_t prime = 54517;
return hash_value_combine(prime*n + internal_hash(head), tail...);
}

template <typename... T>
std::size_t hash_value(const T&... t) {
constexpr std::size_t prime1 = 93481;
return hash_value_combine(prime1, std::hash<T>{}(t)...);
}

// User facing API
template <typename... T>
std::size_t hash_value(const T&... ts) { return detail::hash_value_combine(0, ts...); }
}

#define ARB_DEFINE_HASH(type,...)\
Expand Down
Loading

0 comments on commit c391775

Please sign in to comment.