Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove internal Profiler in favour of Tracy #2266

Open
wants to merge 14 commits into
base: master
Choose a base branch
from
Open
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,6 @@
[submodule "ext/units"]
path = ext/units
url = https://github.com/LLNL/units.git
[submodule "ext/tracy"]
path = ext/tracy
url = https://github.com/wolfpld/tracy.git
53 changes: 45 additions & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON)
check_ipo_supported(RESULT HAVE_LTO OUTPUT ERR_LTO)
if(NOT DEFINED CMAKE_INTERPROCEDURAL_OPTIMIZATION)
if(HAVE_LTO)
message (STATUS "LTO support found, enabling")
message (VERBOSE "LTO support found, enabling")
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
else()
message(STATUS "No LTO: ${ERR_LTO}")
Expand All @@ -32,6 +32,7 @@ endif()
# redirected from the terminal (e.g. when using ninja or a pager).

option(ARBDEV_COLOR "Always produce ANSI-colored output (GNU/Clang only)." OFF)
mark_as_advanced(FORCE ARBDEV_COLOR)

#----------------------------------------------------------
# Configure-time build options for Arbor:
Expand All @@ -53,10 +54,12 @@ mark_as_advanced(ARB_USE_HWLOC)
# Use externally built modcc?

set(ARB_MODCC "" CACHE STRING "path to external modcc NMODL compiler")
mark_as_advanced(FORCE ARB_MODCC)

# Use libunwind to generate stack traces on errors?

option(ARB_BACKTRACE "Enable stacktraces on assertion and exceptions (requires Boost)." OFF)
mark_as_advanced(FORCE ARB_BACKTRACE)

# Specify GPU build type

Expand All @@ -77,8 +80,9 @@ option(ARB_USE_BUNDLED_LIBS "Use bundled 3rd party libraries" OFF)
# CPU architecture. Recorded in installed target, for downstream dependencies
# to use.
# Useful, for example, when a user wants to compile with target-specific
# optimization flags.
# optimization flag.spr
set(ARB_CXX_FLAGS_TARGET "" CACHE STRING "Optional additional flags for compilation")
mark_as_advanced(FORCE ARB_CXX_FLAGS_TARGET)

#----------------------------------------------------------
# Debug support
Expand All @@ -94,7 +98,10 @@ mark_as_advanced(ARB_CAT_VERBOSE)

option(ARB_WITH_MPI "build with MPI support" OFF)

option(ARB_WITH_PROFILING "use built-in profiling" OFF)
option(ARB_WITH_PROFILING "enable Tracy profiling" OFF)
cmake_dependent_option(ARB_WITH_STACK_PROFILING "enable stack collection in profiling" OFF "ARB_WITH_PROFILING" OFF)
cmake_dependent_option(ARB_WITH_MEMORY_PROFILING "enable memory in profiling" OFF "ARB_WITH_PROFILING" OFF)
mark_as_advanced(FORCE ARB_WITH_STACK_PROFILING ARB_WITH_MEMORY_PROFILING)

option(ARB_WITH_ASSERTIONS "enable arb_assert() assertions in code" OFF)

Expand Down Expand Up @@ -266,7 +273,7 @@ set(ARB_USE_BUNDLED_UNITS ON CACHE STRING "Use bundled LLNL units.")
cmake_dependent_option(ARB_USE_BUNDLED_JSON "Use bundled Niels Lohmann's json library." ON "ARB_USE_BUNDLED_LIBS" OFF)
if(NOT ARB_USE_BUNDLED_JSON)
find_package(nlohmann_json 3.11.2 CONFIG REQUIRED)
message(STATUS "Using external JSON = ${nlohmann_json_VERSION}")
message(VERBOSE "Using external JSON = ${nlohmann_json_VERSION}")
endif()

cmake_dependent_option(ARB_USE_BUNDLED_RANDOM123 "Use bundled Random123 lib." ON "ARB_USE_BUNDLED_LIBS" OFF)
Expand Down Expand Up @@ -300,14 +307,27 @@ else()
message(FATAL, "TODO: At the time of Arbor 0.10.0 there is no Spack package")
endif()


# Reduce CMAKE Log clutter
set(_saved_CMAKE_MESSAGE_LOG_LEVEL ${CMAKE_MESSAGE_LOG_LEVEL})
set(CMAKE_MESSAGE_LOG_LEVEL WARNING)
add_subdirectory(ext)
set(CMAKE_MESSAGE_LOG_LEVEL ${_saved_CMAKE_MESSAGE_LOG_LEVEL})

install(TARGETS ext-hwloc EXPORT arbor-targets)
install(TARGETS ext-random123 EXPORT arbor-targets)
target_link_libraries(arbor-public-deps INTERFACE ext-units)
install(TARGETS ext-units EXPORT arbor-targets)
install(TARGETS units compile_flags_target EXPORT arbor-targets)

# Hide dependency options
mark_as_advanced(FORCE JSON_CI JSON_Diagnostics JSON_DisableEnumSerialization JSON_GlobalUDLs JSON_ImplicitConversions JSON_Install JSON_LegacyDiscardedValueComparison JSON_MultipleHeaders JSON_SystemInclude)
mark_as_advanced(FORCE TRACY_ENABLE TRACY_ON_DEMAND TRACY_CALLSTACK TRACY_NO_CALLSTACK TRACY_NO_CALLSTACK_INLINES TRACY_ONLY_LOCALHOST TRACY_NO_BROADCAST TRACY_ONLY_IPV4 TRACY_NO_CODE_TRANSFER TRACY_NO_CONTEXT_SWITCH TRACY_NO_EXIT TRACY_NO_SAMPLING TRACY_NO_VERIFY TRACY_NO_VSYNC_CAPTURE TRACY_NO_FRAME_IMAGE TRACY_NO_SYSTEM_TRACING TRACY_PATCHABLE_NOPSLEDS TRACY_DELAYED_INIT TRACY_MANUAL_LIFETIME TRACY_FIBERS TRACY_NO_CRASH_HANDLER TRACY_TIMER_FALLBACK TRACY_CALLSTACK TRACY_DELAYED_INIT TRACY_ENABLE TRACY_FIBERS TRACY_MANUAL_LIFETIME TRACY_NO_BROADCAST TRACY_NO_CALLSTACK TRACY_NO_CALLSTACK_INLINES TRACY_NO_CODE_TRANSFER TRACY_NO_CONTEXT_SWITCH TRACY_NO_CRASH_HANDLER TRACY_NO_EXIT TRACY_NO_FRAME_IMAGE TRACY_NO_SAMPLING TRACY_NO_SYSTEM_TRACING TRACY_NO_VERIFY TRACY_NO_VSYNC_CAPTURE TRACY_ONLY_IPV4 TRACY_ONLY_LOCALHOST TRACY_ON_DEMAND TRACY_PATCHABLE_NOPSLEDS TRACY_STATIC TRACY_TIMER_FALLBACK)
mark_as_advanced(FORCE ARB_USE_BUNDLED_FMT ARB_USE_BUNDLED_GTEST ARB_USE_BUNDLED_JSON ARB_USE_BUNDLED_PUGIXML ARB_USE_BUNDLED_PYBIND11 ARB_USE_BUNDLED_RANDOM123 ARB_USE_BUNDLED_UNITS)

# hide OSX advance config
mark_as_advanced(FORCE CMAKE_OSX_ARCHITECTURES CMAKE_OSX_DEPLOYMENT_TARGET CMAKE_OSX_SYSROOT)


# Keep track of packages we need to add to the generated CMake config
# file for arbor.

Expand Down Expand Up @@ -339,10 +359,10 @@ mark_as_advanced(ARB_SVE_WIDTH)
if (ARB_SVE_WIDTH STREQUAL "auto")
get_sve_length(ARB_HAS_SVE ARB_SVE_BITS)
if (ARB_HAS_SVE)
message(STATUS "SVE detected with vector size = ${ARB_SVE_BITS} bits")
message(VERBOSE "SVE detected with vector size = ${ARB_SVE_BITS} bits")
set(ARB_CXX_SVE_FLAGS " -msve-vector-bits=${ARB_SVE_BITS}")
else()
message(STATUS "NO SVE detected")
message(VERBOSE "NO SVE detected")
set(ARB_CXX_SVE_FLAGS "")
endif()
else()
Expand All @@ -369,7 +389,24 @@ target_compile_options(arborio-private-deps INTERFACE ${ARB_CXX_FLAGS_TARGET_FUL

if(ARB_WITH_PROFILING)
target_compile_definitions(arbor-config-defs INTERFACE ARB_HAVE_PROFILING)
option(TRACY_ENABLE "" ON)
option(TRACY_ON_DEMAND "" ON)

set(_saved_CMAKE_MESSAGE_LOG_LEVEL ${CMAKE_MESSAGE_LOG_LEVEL})
set(CMAKE_MESSAGE_LOG_LEVEL WARNING)
add_subdirectory (ext/tracy) # target: TracyClient or alias Tracy :: TracyClient
set(CMAKE_MESSAGE_LOG_LEVEL ${_saved_CMAKE_MESSAGE_LOG_LEVEL})

target_link_libraries (arbor-private-deps INTERFACE Tracy::TracyClient)
list(APPEND arbor_export_dependencies "Tracy::TracyClient")
if(ARB_WITH_STACK_PROFILING)
target_compile_definitions(arbor-config-defs INTERFACE ARB_HAVE_STACK_PROFILING)
endif()
if(ARB_WITH_MEMORY_PROFILING)
target_compile_definitions(arbor-config-defs INTERFACE ARB_HAVE_MEMORY_PROFILING)
endif()
endif()

if(ARB_WITH_ASSERTIONS)
target_compile_definitions(arbor-config-defs INTERFACE ARB_HAVE_ASSERTIONS)
endif()
Expand Down Expand Up @@ -401,7 +438,7 @@ endif()

if(${Python3_FOUND})
set(PYTHON_EXECUTABLE "${Python3_EXECUTABLE}")
message(STATUS "PYTHON_EXECUTABLE: ${PYTHON_EXECUTABLE}")
message(VERBOSE "PYTHON_EXECUTABLE: ${PYTHON_EXECUTABLE}")
endif()


Expand Down
5 changes: 4 additions & 1 deletion arbor/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ set(arbor_sources
label_resolution.cpp
lif_cell_group.cpp
cable_cell_group.cpp
mechanism.cpp
mechcat.cpp
mechinfo.cpp
memory/gpu_wrappers.cpp
Expand All @@ -50,7 +51,6 @@ set(arbor_sources
profile/memory_meter.cpp
profile/meter_manager.cpp
profile/power_meter.cpp
profile/profiler.cpp
schedule.cpp
spike_event_io.cpp
spike_source_cell_group.cpp
Expand Down Expand Up @@ -122,7 +122,10 @@ install(TARGETS arbor-private-headers EXPORT arbor-targets)
# variable, build_all_mods target. Note: CMake source file properties are
# directory-local.

set(_saved_CMAKE_MESSAGE_LOG_LEVEL ${CMAKE_MESSAGE_LOG_LEVEL})
set(CMAKE_MESSAGE_LOG_LEVEL WARNING)
add_subdirectory(../mechanisms "${CMAKE_BINARY_DIR}/mechanisms")
set(CMAKE_MESSAGE_LOG_LEVEL ${_saved_CMAKE_MESSAGE_LOG_LEVEL})
set_source_files_properties(${arbor-builtin-mechanisms} DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTIES GENERATED TRUE)

if(ARB_WITH_CUDA_CLANG OR ARB_WITH_HIP_CLANG)
Expand Down
4 changes: 4 additions & 0 deletions arbor/backends/gpu/diffusion_state.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "diffusion.hpp"
#include "forest.hpp"
#include "fine.hpp"
#include "profile/profiler_macro.hpp"

namespace arb {
namespace gpu {
Expand Down Expand Up @@ -402,6 +403,7 @@ struct diffusion_state {
// current density [A/m²]
// conductivity [kS/m²]
void assemble(const value_type dt, const_view concentration, const_view voltage, const_view current, const_view conductivity, arb_value_type q) {
PROFILE_ZONE();
assemble_diffusion(d.data(),
rhs.data(),
invariant_d.data(),
Expand All @@ -417,6 +419,7 @@ struct diffusion_state {
}

void solve(array& to) {
PROFILE_ZONE();
solve_diffusion(rhs.data(),
d.data(),
u.data(),
Expand All @@ -438,6 +441,7 @@ struct diffusion_state {
const_view current,
const_view conductivity,
arb_value_type q) {
PROFILE_ZONE();
assemble(dt, concentration, voltage, current, conductivity, q);
solve(concentration);
}
Expand Down
6 changes: 5 additions & 1 deletion arbor/backends/gpu/matrix_state_fine.hpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#pragma once

#include <cstring>

#include <vector>
#include <type_traits>

Expand All @@ -15,6 +14,7 @@
#include "fine.hpp"
#include "matrix_fine.hpp"
#include "forest.hpp"
#include "profile/profiler_macro.hpp"

namespace arb {
namespace gpu {
Expand Down Expand Up @@ -409,6 +409,7 @@ struct matrix_state_fine {
// current density [A/m²]
// conductivity [kS/m²]
void assemble(const T dt, const_view voltage, const_view current, const_view conductivity) {
PROFILE_ZONE();
assemble_matrix_fine(
d.data(),
rhs.data(),
Expand All @@ -424,6 +425,7 @@ struct matrix_state_fine {
}

void solve(array& to) {
PROFILE_ZONE();
solve_matrix_fine(rhs.data(),
d.data(),
u.data(),
Expand All @@ -450,13 +452,15 @@ struct matrix_state_fine {

private:
void flat_to_packed(const array& from, array& to ) {
PROFILE_ZONE();
arb_assert(from.size()==matrix_size);
arb_assert(to.size()==data_size);

scatter(from.data(), to.data(), perm.data(), perm.size());
}

void packed_to_flat(const array& from, array& to ) {
PROFILE_ZONE();
arb_assert(from.size()==data_size);
arb_assert(to.size()==matrix_size);

Expand Down
8 changes: 8 additions & 0 deletions arbor/backends/gpu/shared_state.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "util/meta.hpp"
#include "util/range.hpp"
#include "util/strprintf.hpp"
#include "profile/profiler_macro.hpp"

#include <iostream>

Expand Down Expand Up @@ -161,6 +162,7 @@ void istim_state::reset() {
}

void istim_state::add_current(const arb_value_type time, array& current_density) {
PROFILE_ZONE();
ppack_.time = time;
ppack_.current_density = current_density.data();
istim_add_current_impl((int)size(), ppack_);
Expand Down Expand Up @@ -202,6 +204,7 @@ shared_state::shared_state(task_system_handle tp,
}

void shared_state::update_prng_state(mechanism& m) {
PROFILE_ZONE();
if (!m.mech_.n_random_variables) return;
auto const mech_id = m.mechanism_id();
auto& store = storage[mech_id];
Expand All @@ -213,6 +216,8 @@ void shared_state::instantiate(mechanism& m,
const mechanism_overrides& overrides,
const mechanism_layout& pos_data,
const std::vector<std::pair<std::string, std::vector<arb_value_type>>>& params) {
PROFILE_ZONE();
ANNOTATE_ZONE(m.mech_.name, strlen(m.mech_.name));
assert(m.iface_.backend == arb_backend_kind_gpu);
using util::make_range;
using util::make_span;
Expand Down Expand Up @@ -355,6 +360,7 @@ void shared_state::instantiate(mechanism& m,
}

void shared_state::reset() {
PROFILE_ZONE();
memory::copy(init_voltage, voltage);
memory::fill(current_density, 0);
memory::fill(conductivity, 0);
Expand All @@ -369,6 +375,7 @@ void shared_state::reset() {
}

void shared_state::zero_currents() {
PROFILE_ZONE();
memory::fill(current_density, 0);
memory::fill(conductivity, 0);
for (auto& i: ion_data) {
Expand All @@ -382,6 +389,7 @@ std::pair<arb_value_type, arb_value_type> shared_state::voltage_bounds() const {
}

void shared_state::take_samples() {
PROFILE_ZONE();
sample_events.mark();
if (!sample_events.empty()) {
const auto state = sample_events.marked_events();
Expand Down
1 change: 1 addition & 0 deletions arbor/backends/multicore/cable_solver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ struct cable_solver {
// * will likewise overwrite the first argument with the solction
template<typename T>
void solve(T& rhs, const value_type dt, const_view current, const_view conductivity) {
PROFILE_ZONE();
value_type * const ARB_NO_ALIAS d_ = d.data();
value_type * const ARB_NO_ALIAS r_ = rhs.data();

Expand Down
3 changes: 3 additions & 0 deletions arbor/backends/multicore/diffusion_solver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <memory/memory.hpp>

#include "multicore_common.hpp"
#include "profile/profiler_macro.hpp"

namespace arb {
namespace multicore {
Expand Down Expand Up @@ -77,6 +78,7 @@ struct diffusion_solver {
const_view current,
const_view conductivity,
arb_value_type q) {
PROFILE_ZONE();
auto cell_cv_part = util::partition_view(cell_cv_divs);
index_type ncells = cell_cv_part.size();
// loop over submatrices
Expand All @@ -101,6 +103,7 @@ struct diffusion_solver {
// Separate solver; analoguos with cable solver
template<typename T>
void solve(T& rhs) {
PROFILE_ZONE();
// loop over submatrices
for (const auto& [first, last]: util::partition_view(cell_cv_divs)) {
if (first >= last) continue; // skip cell with no CVs
Expand Down
5 changes: 0 additions & 5 deletions arbor/backends/multicore/fvm.hpp
Original file line number Diff line number Diff line change
@@ -1,19 +1,14 @@
#pragma once

#include <string>
#include <vector>

#include <arbor/mechanism.hpp>

#include "backends/event.hpp"
#include "backends/multicore/event_stream.hpp"
#include "backends/multicore/multicore_common.hpp"
#include "backends/multicore/shared_state.hpp"
#include "backends/multicore/diffusion_solver.hpp"
#include "backends/multicore/cable_solver.hpp"
#include "backends/multicore/threshold_watcher.hpp"
#include "execution_context.hpp"
#include "util/padded_alloc.hpp"
#include "util/range.hpp"
#include "util/rangeutil.hpp"

Expand Down
1 change: 0 additions & 1 deletion arbor/backends/multicore/multicore_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
//
// Defines array, iarray, and specialized multi-event stream classes.

#include <utility>
#include <vector>

#include <arbor/fvm_types.hpp>
Expand Down
Loading
Loading