Skip to content

Commit

Permalink
Extend debug utilities for stuck/errored tracks (#1451)
Browse files Browse the repository at this point in the history
* Add a reference to the debug_print symbol
* Change variable name for consistency
* Add helper function to kill active tracks
* Add test
* Add option to export geant geometry at runtime
* Write geometry file in example
* Improve accel SetupOptions docs
* Add geant UI for geometry export option
* Improve Observerptr semantics
* Add a global params pointer for detailed debug information
* Fix unit test failure from using '@global' in JSON ORANGE
  • Loading branch information
sethrj authored Oct 16, 2024
1 parent 86152b9 commit 5c15b54
Show file tree
Hide file tree
Showing 29 changed files with 465 additions and 198 deletions.
7 changes: 4 additions & 3 deletions doc/development/testing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -193,8 +193,9 @@ can call a global function to print the full state of the current track::
},
...

When unit testing, a "status checker" class is inserted into the stepping loop,
which enables more verbose/useful debug output. (Instead of printing an ID, it
will reference the actual value.)
If the stepping loop "hangs" (i.e., the number of steps seems unbounded) and
you have access to a debugger, you can call the ``Stepper::kill_active`` method
to kill all active tracks and (on CPU) print detailed debug information about
them.

.. _debug_print: https://github.com/celeritas-project/celeritas/pull/1304
1 change: 1 addition & 0 deletions example/accel/simple-offload.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ class DetectorConstruction final : public G4VUserDetectorConstruction
"Aluminium", 13., 26.98 * g / mole, 2.700 * g / cm3}}
{
setup_options.make_along_step = celeritas::UniformAlongStepFactory();
setup_options.geometry_output_file = "simple-example.gdml";
}

G4VPhysicalVolume* Construct() final
Expand Down
14 changes: 8 additions & 6 deletions src/accel/SetupOptions.hh
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,10 @@ struct SetupOptions
std::string output_file;
//! Filename for ROOT dump of physics data
std::string physics_output_file;
//! Filename to dump a HepMC3 copy of offloaded tracks as events
//! Filename to dump a ROOT/HepMC3 copy of offloaded tracks as events
std::string offload_output_file;
//! Filename to dump a GDML file for debugging inside frameworks
std::string geometry_output_file;
//!@}

//!@{
Expand Down Expand Up @@ -150,24 +152,24 @@ struct SetupOptions
short int max_field_substeps{100};
//!@}

//!@{
//! \name Sensitive detector options
//! Sensitive detector options
SDSetupOptions sd;
//!@}

//!@{
//! \name Physics options
//! Ignore the following EM process names
//! Do not use Celeritas physics for the given Geant4 process names
VecString ignore_processes;
//!@}

//!@{
//! \name CUDA options
//! Per-thread stack size (may be needed for VecGeom) [B]
size_type cuda_stack_size{};
//! Dynamic heap size (may be needed for VecGeom) [B]
size_type cuda_heap_size{};
//! Sync the GPU at every kernel for timing
bool action_times{false};
//! Launch all kernels on the default stream
//! Launch all kernels on the default stream for debugging
bool default_stream{false};
//!@}

Expand Down
5 changes: 4 additions & 1 deletion src/accel/SetupOptionsMessenger.cc
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,10 @@ SetupOptionsMessenger::SetupOptionsMessenger(SetupOptions* options)
"Filename for ROOT dump of physics data");
add_cmd(&options->offload_output_file,
"offloadOutputFile",
"Filename for copy of offloaded tracks as events");
"Filename for HepMC3/ROOT dump of offloaded tracks");
add_cmd(&options->geometry_output_file,
"geometryOutputFile",
"Filename for GDML export");
add_cmd(&options->max_num_tracks,
"maxNumTracks",
"Number of track \"slots\" to be transported simultaneously");
Expand Down
3 changes: 2 additions & 1 deletion src/accel/SetupOptionsMessenger.hh
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ struct SetupOptions;
geometryFile | Override detector geometry with a custom GDML
outputFile | Filename for JSON diagnostic output
physicsOutputFile | Filename for ROOT dump of physics data
offloadOutputFile | Filename for HepMC3 copy of offloaded tracks as events
offloadOutputFile | Filename for HepMC3/ROOT dump of offloaded tracks
geometryOutputFile | Filename for GDML export
maxNumTracks | Number of tracks to be transported simultaneously
maxNumEvents | Maximum number of events in use
maxNumSteps | Limit on number of step iterations before aborting
Expand Down
11 changes: 11 additions & 0 deletions src/accel/SharedParams.cc
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,17 @@ void SharedParams::initialize_core(SetupOptions const& options)
export_root(*imported);
}

if (!options.geometry_output_file.empty())
{
CELER_VALIDATE(options.geometry_file.empty(),
<< "the 'geometry_output_file' option cannot be used "
"when manually loading a geometry (the "
"'geometry_file' option is also set)");

write_geant_geometry(GeantImporter::get_world_volume(),
options.geometry_output_file);
}

CoreParams::Input params;

// Create registries
Expand Down
1 change: 1 addition & 0 deletions src/celeritas/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,7 @@ celeritas_polysource(global/alongstep/AlongStepGeneralLinearAction)
celeritas_polysource(global/alongstep/AlongStepNeutralAction)
celeritas_polysource(global/alongstep/AlongStepUniformMscAction)
celeritas_polysource(global/alongstep/AlongStepRZMapFieldMscAction)
celeritas_polysource(global/detail/KillActive)
celeritas_polysource(global/detail/TrackSlotUtils)
celeritas_polysource(neutron/model/ChipsNeutronElasticModel)
celeritas_polysource(neutron/model/NeutronInelasticModel)
Expand Down
14 changes: 7 additions & 7 deletions src/celeritas/global/ActionLauncher.device.hh
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,13 @@ class ActionLauncher : public KernelLauncher<F>

// Launch a kernel for the wrapped executor
void operator()(CoreState<MemSpace::device> const& state,
F const& call_thread) const;
F const& execute_thread) const;

// Launch with reduced grid size for when tracks are sorted
void operator()(StepActionT const& action,
CoreParams const& params,
CoreState<MemSpace::device> const& state,
F const& call_thread) const;
F const& execute_thread) const;
};

//---------------------------------------------------------------------------//
Expand Down Expand Up @@ -104,10 +104,10 @@ ActionLauncher<F>::ActionLauncher(StepActionT const& action,
*/
template<class F>
void ActionLauncher<F>::operator()(CoreState<MemSpace::device> const& state,
F const& call_thread) const
F const& execute_thread) const
{
return (*this)(
range(ThreadId{state.size()}), state.stream_id(), call_thread);
range(ThreadId{state.size()}), state.stream_id(), execute_thread);
}

//---------------------------------------------------------------------------//
Expand All @@ -120,20 +120,20 @@ template<class F>
void ActionLauncher<F>::operator()(StepActionT const& action,
CoreParams const& params,
CoreState<MemSpace::device> const& state,
F const& call_thread) const
F const& execute_thread) const
{
if (state.has_action_range()
&& is_action_sorted(action.order(), params.init()->track_order()))
{
// Launch on a subset of threads
return (*this)(state.get_action_range(action.action_id()),
state.stream_id(),
call_thread);
execute_thread);
}
else
{
// Not partitioned by action: launch on all threads
return (*this)(state, call_thread);
return (*this)(state, execute_thread);
}
}

Expand Down
13 changes: 0 additions & 13 deletions src/celeritas/global/ActionSequence.cc
Original file line number Diff line number Diff line change
Expand Up @@ -83,14 +83,6 @@ void ActionSequence::step(CoreParams const& params, CoreState<M>& state)
stream = celeritas::device().stream(state.stream_id()).get();
}

if constexpr (M == MemSpace::host)
{
if (status_checker_)
{
g_debug_executing_params = &params;
}
}

// When running a single track slot on host, we can preemptively skip
// inapplicable post-step actions
auto const skip_post_action = [&](auto const& action) {
Expand Down Expand Up @@ -143,11 +135,6 @@ void ActionSequence::step(CoreParams const& params, CoreState<M>& state)
}
}
}

if (M == MemSpace::host && status_checker_)
{
g_debug_executing_params = nullptr;
}
}

//---------------------------------------------------------------------------//
Expand Down
3 changes: 3 additions & 0 deletions src/celeritas/global/CoreParams.cc
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,9 @@ CoreParams::CoreParams(Input input) : input_(std::move(input))
// Save maximum number of streams
scalars.max_streams = input_.max_streams;

// Save non-owning pointer to core params for host diagnostics
scalars.host_core_params = ObserverPtr{this};

// Save host reference
host_ref_ = build_params_refs<MemSpace::host>(input_, scalars);
if (celeritas::device())
Expand Down
8 changes: 8 additions & 0 deletions src/celeritas/global/CoreTrackData.hh
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

#include "corecel/Assert.hh"
#include "corecel/data/Collection.hh"
#include "corecel/data/ObserverPtr.hh"
#include "celeritas/Types.hh"
#include "celeritas/em/data/WentzelOKVIData.hh"
#include "celeritas/geo/GeoData.hh"
Expand All @@ -25,6 +26,9 @@

namespace celeritas
{
//---------------------------------------------------------------------------//
class CoreParams;

//---------------------------------------------------------------------------//
/*!
* Memspace-independent core variables.
Expand All @@ -41,6 +45,10 @@ struct CoreScalars

StreamId::size_type max_streams{0};

// Non-owning pointer to core params ONLY for diagnostics:
// see DebugIO.json.cc
ObserverPtr<CoreParams const, MemSpace::host> host_core_params{nullptr};

//! True if assigned and valid
explicit CELER_FUNCTION operator bool() const
{
Expand Down
21 changes: 6 additions & 15 deletions src/celeritas/global/Debug.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,21 +34,12 @@ void debug_print_impl(T const& view)
} // namespace

//---------------------------------------------------------------------------//
/*!
* In the stepping loop, for interactive breakpoints, a debug pointer.
*
* This is accessible when:
* - Inside an \c execute call (i.e., during stepping)
* - Using CoreParams on host
* - The status checker is enabled
*
* ... or if running inside a unit test that sets them.
*
* \warning This is not thread safe: it should only be used in single-threaded
* (or track-parallel) execution modes, and ONLY inside an interactive
* debugger. See celeritas/track/Debug.hh .
*/
CoreParams const* g_debug_executing_params{nullptr};
std::ostream& operator<<(std::ostream& os, StreamableTrack const& track_wrap)
{
nlohmann::json j = track_wrap.track;
os << j.dump();
return os;
}

//---------------------------------------------------------------------------//
#define DEFINE_DEBUG_PRINT(TYPE) \
Expand Down
13 changes: 10 additions & 3 deletions src/celeritas/global/Debug.hh
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@
// SPDX-License-Identifier: (Apache-2.0 OR MIT)
//---------------------------------------------------------------------------//
//! \file celeritas/global/Debug.hh
//! \brief Utilities *only* for interactive debugging
//! \brief Utilities for interactive debugging and diagnostic output
//---------------------------------------------------------------------------//
#pragma once

#include <iosfwd>

#include "celeritas/geo/GeoFwd.hh"

namespace celeritas
Expand All @@ -20,8 +22,13 @@ class ParticleTrackView;
class SimTrackView;

//---------------------------------------------------------------------------//
// Params during an execute call, ONLY for interactive debugging
extern CoreParams const* g_debug_executing_params;
//! Print a track to the given stream
struct StreamableTrack
{
CoreTrackView const& track;
};

std::ostream& operator<<(std::ostream&, StreamableTrack const&);

//---------------------------------------------------------------------------//
// Print everything that can be printed about a core track view
Expand Down
Loading

0 comments on commit 5c15b54

Please sign in to comment.