diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 53aacf0a54..936ad6a3f6 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -73,6 +73,8 @@ The Axom project release numbers follow [Semantic Versioning](http://semver.org/ - Added Sidre function `View::clear()`. - Core now provides an `axom::ArrayView` that provides view/indexing semantics over a raw pointer. This replaces the external buffer logic previously provided by `axom::Array`. +- `axom::Array` is now GPU-compatible, in particular via a memory space template parameter and via + extensions to `axom::ArrayView` that allow for copying into kernels and transfers between memory spaces. ### Changed - `MFEMSidreDataCollection` now reuses FESpace/QSpace objects with the same basis diff --git a/src/axom/core/Array.hpp b/src/axom/core/Array.hpp index b92edfa727..79c887dded 100644 --- a/src/axom/core/Array.hpp +++ b/src/axom/core/Array.hpp @@ -8,7 +8,6 @@ #include "axom/config.hpp" // for compile-time defines #include "axom/core/Macros.hpp" // for axom macros -#include "axom/core/memory_management.hpp" // for memory allocation functions #include "axom/core/utilities/Utilities.hpp" // for processAbort() #include "axom/core/Types.hpp" // for IndexType definition #include "axom/core/ArrayBase.hpp" @@ -20,24 +19,8 @@ namespace axom { -// TODO: Add this as a non-type template parameter to Array/View -// The intent is that there will also be a "Dynamic" or "Polymorphic" -// resource type -// enum MemoryResourceType -// { -// Host, -// Device, -// Unified, -// Pinned, -// Constant, -// File, -// NoOp, -// Shared, -// Unknown -// }; - // Forward declare the templated classes and operator function(s) -template +template class Array; /*! @@ -81,14 +64,15 @@ class Array; * \see https://en.cppreference.com/w/cpp/named_req * */ -template -class Array : public ArrayBase> +template +class Array : public ArrayBase> { public: static constexpr double DEFAULT_RESIZE_RATIO = 2.0; static constexpr IndexType MIN_DEFAULT_CAPACITY = 32; using value_type = T; - using ArrayIterator = ArrayIteratorBase>; + static constexpr MemorySpace space = SPACE; + using ArrayIterator = ArrayIteratorBase>; public: /// \name Native Storage Array Constructors @@ -120,10 +104,20 @@ class Array : public ArrayBase> * \post size() == num_elements * \post getResizeRatio() == DEFAULT_RESIZE_RATIO */ - template ::type* = nullptr> + template ::type* = nullptr> Array(IndexType num_elements, IndexType capacity = 0, - int allocator_id = axom::getDefaultAllocatorID()); + int allocator_id = axom::detail::getAllocatorID()); + + /// \overload + template ::type* = nullptr> + Array(IndexType num_elements, IndexType capacity = 0); /*! * \brief Generic constructor for an Array of arbitrary dimension @@ -137,7 +131,9 @@ class Array : public ArrayBase> * \post size() == num_elements * \post getResizeRatio() == DEFAULT_RESIZE_RATIO */ - template + template ::value>::type* = nullptr> Array(Args... args); /*! @@ -145,13 +141,22 @@ class Array : public ArrayBase> * * \param [in] allocator_id the ID of the allocator to use (optional) */ - Array(const Array& other, int allocator_id = axom::getDefaultAllocatorID()); + Array(const Array& other, + int allocator_id = axom::detail::getAllocatorID()); /*! * \brief Move constructor for an Array instance */ Array(Array&& other); + /*! + * \brief Constructor for transferring between memory spaces + * + * \param [in] other The array in a different memory space to copy from + */ + template + Array(const ArrayBase& other); + /// @} /// \name Array copy and move operators @@ -222,8 +227,8 @@ class Array : public ArrayBase> */ /// @{ - inline T* data() { return m_data; } - inline const T* data() const { return m_data; } + AXOM_HOST_DEVICE inline T* data() { return m_data; } + AXOM_HOST_DEVICE inline const T* data() const { return m_data; } /// @} @@ -271,7 +276,6 @@ class Array : public ArrayBase> * \note The size increases by 1. * */ - template ::type* = nullptr> void insert(IndexType pos, const T& value); /*! @@ -285,7 +289,6 @@ class Array : public ArrayBase> * * \return ArrayIterator to inserted value */ - template ::type* = nullptr> ArrayIterator insert(ArrayIterator pos, const T& value); /*! @@ -318,7 +321,6 @@ class Array : public ArrayBase> * * \return ArrayIterator to first element inserted (pos if n == 0) */ - template ::type* = nullptr> ArrayIterator insert(ArrayIterator pos, IndexType n, const T* values); /*! @@ -335,7 +337,6 @@ class Array : public ArrayBase> * * \pre pos <= m_num_elements. */ - template ::type* = nullptr> void insert(IndexType pos, IndexType n, const T& value); /*! @@ -354,11 +355,10 @@ class Array : public ArrayBase> * * \return ArrayIterator to first element inserted (pos if n == 0) */ - template ::type* = nullptr> ArrayIterator insert(ArrayIterator pos, IndexType n, const T& value); // Make the overload "visible" - using ArrayBase>::insert; + using ArrayBase>::insert; /*! * \brief Appends an Array to the end of the calling object @@ -371,7 +371,7 @@ class Array : public ArrayBase> template void append(const ArrayBase& other) { - ArrayBase>::insert(size(), other); + ArrayBase>::insert(size(), other); } /*! @@ -481,7 +481,7 @@ class Array : public ArrayBase> /*! * \brief Return the number of elements stored in the data array. */ - inline IndexType size() const { return m_num_elements; } + AXOM_HOST_DEVICE inline IndexType size() const { return m_num_elements; } /*! * \brief Update the number of elements stored in the data array. @@ -494,7 +494,7 @@ class Array : public ArrayBase> /*! * \brief Exchanges the contents of this Array with the other. */ - void swap(Array& other); + void swap(Array& other); /*! * \brief Get the ratio by which the capacity increases upon dynamic resize. @@ -587,15 +587,17 @@ using MCArray = Array; //------------------------------------------------------------------------------ //------------------------------------------------------------------------------ -template -Array::Array() : m_allocator_id(axom::getDefaultAllocatorID()) +template +Array::Array() + : m_allocator_id(axom::detail::getAllocatorID()) { } -template -template -Array::Array(Args... args) - : ArrayBase>(args...) - , m_allocator_id(axom::getDefaultAllocatorID()) +template +template ::value>::type*> +Array::Array(Args... args) + : ArrayBase>(args...) + , m_allocator_id(axom::detail::getAllocatorID()) { static_assert(sizeof...(Args) == DIM, "Array size must match number of dimensions"); @@ -606,32 +608,62 @@ Array::Array(Args... args) } //------------------------------------------------------------------------------ -template -template ::type*> -Array::Array(IndexType num_elements, IndexType capacity, int allocator_id) +template +template ::type*> +Array::Array(IndexType num_elements, + IndexType capacity, + int allocator_id) : m_allocator_id(allocator_id) { initialize(num_elements, capacity); } //------------------------------------------------------------------------------ -template -Array::Array(const Array& other, int allocator_id) - : ArrayBase>( - static_cast>&>(other)) - , m_allocator_id(allocator_id) +template +template ::type*> +Array::Array(IndexType num_elements, IndexType capacity) + : m_allocator_id(axom::detail::getAllocatorID()) +{ + initialize(num_elements, capacity); +} + +//------------------------------------------------------------------------------ +template +Array::Array(const Array& other, int allocator_id) + : ArrayBase>( + static_cast>&>(other)) + , m_allocator_id(SPACE == MemorySpace::Dynamic + ? allocator_id + : axom::detail::getAllocatorID()) { +// We can't template/SFINAE away the allocator_id parameter since this is a copy +// constructor, so we just ignore the allocator ID if the memory space isn't Dynamic. +// We can warn the user that their input is being ignored, though. +#ifdef AXOM_DEBUG + if(SPACE != MemorySpace::Dynamic && + allocator_id != axom::detail::getAllocatorID()) + { + std::cerr << "Incorrect allocator ID was provided for an Array object with " + "explicit memory space\n"; + } +#endif initialize(other.size(), other.capacity()); axom::copy(m_data, other.data(), m_num_elements * sizeof(T)); } //------------------------------------------------------------------------------ -template -Array::Array(Array&& other) - : ArrayBase>( - static_cast>&&>(std::move(other))) +template +Array::Array(Array&& other) + : ArrayBase>( + static_cast>&&>(std::move(other))) , m_resize_ratio(0.0) - , m_allocator_id(axom::getDefaultAllocatorID()) + , m_allocator_id(axom::detail::getAllocatorID()) { m_data = other.m_data; m_num_elements = other.m_num_elements; @@ -646,8 +678,24 @@ Array::Array(Array&& other) } //------------------------------------------------------------------------------ -template -Array::~Array() +template +template +Array::Array(const ArrayBase& other) + : ArrayBase>(other) + , m_allocator_id(axom::detail::getAllocatorID()) +{ + initialize(static_cast(other).size(), + static_cast(other).size()); + // axom::copy is aware of pointers registered in Umpire, so this will handle + // the transfer between memory spaces + axom::copy(m_data, + static_cast(other).data(), + m_num_elements * sizeof(T)); +} + +//------------------------------------------------------------------------------ +template +Array::~Array() { if(m_data != nullptr) { @@ -658,8 +706,8 @@ Array::~Array() } //------------------------------------------------------------------------------ -template -inline void Array::fill(const T& value) +template +inline void Array::fill(const T& value) { for(IndexType i = 0; i < m_num_elements; i++) { @@ -668,8 +716,8 @@ inline void Array::fill(const T& value) } //------------------------------------------------------------------------------ -template -inline void Array::set(const T* elements, IndexType n, IndexType pos) +template +inline void Array::set(const T* elements, IndexType n, IndexType pos) { assert(elements != nullptr); assert(pos >= 0); @@ -682,8 +730,8 @@ inline void Array::set(const T* elements, IndexType n, IndexType pos) } //------------------------------------------------------------------------------ -template -inline void Array::clear() +template +inline void Array::clear() { // This most likely needs to be a call to erase() instead. for(IndexType i = 0; i < m_num_elements; ++i) @@ -695,29 +743,29 @@ inline void Array::clear() } //------------------------------------------------------------------------------ -template -template ::type*> -inline void Array::insert(IndexType pos, const T& value) +template +inline void Array::insert(IndexType pos, const T& value) { + static_assert(DIM == 1, "Insertion not supported for multidimensional Arrays"); reserveForInsert(1, pos); m_data[pos] = value; } //------------------------------------------------------------------------------ -template -template ::type*> -inline typename Array::ArrayIterator Array::insert( - Array::ArrayIterator pos, +template +inline typename Array::ArrayIterator Array::insert( + Array::ArrayIterator pos, const T& value) { + static_assert(DIM == 1, "Insertion not supported for multidimensional Arrays"); assert(pos >= begin() && pos <= end()); insert(pos - begin(), value); return pos; } //------------------------------------------------------------------------------ -template -inline void Array::insert(IndexType pos, IndexType n, const T* values) +template +inline void Array::insert(IndexType pos, IndexType n, const T* values) { assert(values != nullptr); reserveForInsert(n, pos); @@ -728,23 +776,23 @@ inline void Array::insert(IndexType pos, IndexType n, const T* values) } //------------------------------------------------------------------------------ -template -template ::type*> -inline typename Array::ArrayIterator Array::insert( - Array::ArrayIterator pos, +template +inline typename Array::ArrayIterator Array::insert( + Array::ArrayIterator pos, IndexType n, const T* values) { + static_assert(DIM == 1, "Insertion not supported for multidimensional Arrays"); assert(pos >= begin() && pos <= end()); insert(pos - begin(), n, values); return pos; } //------------------------------------------------------------------------------ -template -template ::type*> -inline void Array::insert(IndexType pos, IndexType n, const T& value) +template +inline void Array::insert(IndexType pos, IndexType n, const T& value) { + static_assert(DIM == 1, "Insertion not supported for multidimensional Arrays"); reserveForInsert(n, pos); for(IndexType i = 0; i < n; ++i) { @@ -753,22 +801,22 @@ inline void Array::insert(IndexType pos, IndexType n, const T& value) } //------------------------------------------------------------------------------ -template -template ::type*> -inline typename Array::ArrayIterator Array::insert( - Array::ArrayIterator pos, +template +inline typename Array::ArrayIterator Array::insert( + Array::ArrayIterator pos, IndexType n, const T& value) { + static_assert(DIM == 1, "Insertion not supported for multidimensional Arrays"); assert(pos >= begin() && pos <= end()); insert(pos - begin(), n, value); return pos; } //------------------------------------------------------------------------------ -template -inline typename Array::ArrayIterator Array::erase( - Array::ArrayIterator pos) +template +inline typename Array::ArrayIterator Array::erase( + Array::ArrayIterator pos) { assert(pos >= begin() && pos < end()); int counter = 0; @@ -786,10 +834,10 @@ inline typename Array::ArrayIterator Array::erase( } //------------------------------------------------------------------------------ -template -inline typename Array::ArrayIterator Array::erase( - Array::ArrayIterator first, - Array::ArrayIterator last) +template +inline typename Array::ArrayIterator Array::erase( + Array::ArrayIterator first, + Array::ArrayIterator last) { assert(first >= begin() && first < end()); assert(last >= first && last <= end()); @@ -827,19 +875,19 @@ inline typename Array::ArrayIterator Array::erase( } //------------------------------------------------------------------------------ -template +template template -inline void Array::emplace(IndexType pos, Args&&... args) +inline void Array::emplace(IndexType pos, Args&&... args) { reserveForInsert(1, pos); m_data[pos] = std::move(T(std::forward(args)...)); } //------------------------------------------------------------------------------ -template +template template -inline typename Array::ArrayIterator Array::emplace( - Array::ArrayIterator pos, +inline typename Array::ArrayIterator Array::emplace( + Array::ArrayIterator pos, Args&&... args) { assert(pos >= begin() && pos <= end()); @@ -848,9 +896,9 @@ inline typename Array::ArrayIterator Array::emplace( } //------------------------------------------------------------------------------ -template +template template -inline void Array::resize(Args... args) +inline void Array::resize(Args... args) { static_assert(sizeof...(Args) == DIM, "Array size must match number of dimensions"); @@ -859,8 +907,8 @@ inline void Array::resize(Args... args) assert(detail::allNonNegative(tmp_args)); const auto new_num_elements = detail::packProduct(tmp_args); - static_cast>&>(*this) = - ArrayBase> {static_cast(args)...}; + static_cast>&>(*this) = + ArrayBase> {static_cast(args)...}; if(new_num_elements > m_capacity) { @@ -871,10 +919,10 @@ inline void Array::resize(Args... args) } //------------------------------------------------------------------------------ -template -inline void Array::swap(Array& other) +template +inline void Array::swap(Array& other) { - ArrayBase>::swap(other); + ArrayBase>::swap(other); T* temp_data = m_data; IndexType temp_num_elements = m_num_elements; IndexType temp_capacity = m_capacity; @@ -892,8 +940,9 @@ inline void Array::swap(Array& other) } //------------------------------------------------------------------------------ -template -inline void Array::initialize(IndexType num_elements, IndexType capacity) +template +inline void Array::initialize(IndexType num_elements, + IndexType capacity) { assert(num_elements >= 0); @@ -917,8 +966,8 @@ inline void Array::initialize(IndexType num_elements, IndexType capacity } //------------------------------------------------------------------------------ -template -inline T* Array::reserveForInsert(IndexType n, IndexType pos) +template +inline T* Array::reserveForInsert(IndexType n, IndexType pos) { assert(n >= 0); assert(pos >= 0); @@ -947,8 +996,8 @@ inline T* Array::reserveForInsert(IndexType n, IndexType pos) } //------------------------------------------------------------------------------ -template -inline void Array::updateNumElements(IndexType new_num_elements) +template +inline void Array::updateNumElements(IndexType new_num_elements) { assert(new_num_elements >= 0); assert(new_num_elements <= m_capacity); @@ -956,8 +1005,8 @@ inline void Array::updateNumElements(IndexType new_num_elements) } //------------------------------------------------------------------------------ -template -inline void Array::setCapacity(IndexType new_capacity) +template +inline void Array::setCapacity(IndexType new_capacity) { assert(new_capacity >= 0); @@ -973,8 +1022,8 @@ inline void Array::setCapacity(IndexType new_capacity) } //------------------------------------------------------------------------------ -template -inline void Array::dynamicRealloc(IndexType new_num_elements) +template +inline void Array::dynamicRealloc(IndexType new_num_elements) { assert(m_resize_ratio >= 1.0); IndexType new_capacity = new_num_elements * m_resize_ratio + 0.5; diff --git a/src/axom/core/ArrayBase.hpp b/src/axom/core/ArrayBase.hpp index 52358fa9d4..04cc0d0b3c 100644 --- a/src/axom/core/ArrayBase.hpp +++ b/src/axom/core/ArrayBase.hpp @@ -8,13 +8,15 @@ #include "axom/config.hpp" // for compile-time defines #include "axom/core/Macros.hpp" // for axom macros +#include "axom/core/memory_management.hpp" // for memory allocation functions #include "axom/core/utilities/Utilities.hpp" // for processAbort() #include "axom/core/Types.hpp" // for IndexType definition +#include "axom/core/StackArray.hpp" +#include "axom/core/numerics/matvecops.hpp" // for dot_product // C/C++ includes -#include // for std::array #include // for std::cerr and std::ostream -#include // for std::inner_product +#include // for std::accumulate namespace axom { @@ -94,6 +96,19 @@ class ArrayBase updateStrides(); } + /*! + * \brief Copy constructor for arrays of different type + * Because the element type (T) and dimension (DIM) are still locked down, + * this function is nominally used for copying ArrayBase metadata from + * Array <-> ArrayView and/or Array-like objects whose data are in different + * memory spaces + */ + template + ArrayBase(const ArrayBase& other) + : m_dims(other.shape()) + , m_strides(other.strides()) + { } + /*! * \brief Dimension-aware accessor, returns a reference to the given value. * @@ -106,22 +121,20 @@ class ArrayBase */ template ::type> - T& operator()(Args... args) + AXOM_HOST_DEVICE T& operator()(Args... args) { - IndexType indices[] = {static_cast(args)...}; - IndexType idx = - std::inner_product(indices, indices + DIM, m_strides.begin(), 0); + const IndexType indices[] = {static_cast(args)...}; + const IndexType idx = numerics::dot_product(indices, m_strides.begin(), DIM); assert(inBounds(idx)); return asDerived().data()[idx]; } /// \overload template ::type> - const T& operator()(Args... args) const + AXOM_HOST_DEVICE const T& operator()(Args... args) const { - IndexType indices[] = {static_cast(args)...}; - IndexType idx = - std::inner_product(indices, indices + DIM, m_strides.begin(), 0); + const IndexType indices[] = {static_cast(args)...}; + const IndexType idx = numerics::dot_product(indices, m_strides.begin(), DIM); assert(inBounds(idx)); return asDerived().data()[idx]; } @@ -138,13 +151,13 @@ class ArrayBase * * \pre 0 <= idx < m_num_elements */ - T& operator[](const IndexType idx) + AXOM_HOST_DEVICE T& operator[](const IndexType idx) { assert(inBounds(idx)); return asDerived().data()[idx]; } /// \overload - const T& operator[](const IndexType idx) const + AXOM_HOST_DEVICE const T& operator[](const IndexType idx) const { assert(inBounds(idx)); return asDerived().data()[idx]; @@ -159,10 +172,16 @@ class ArrayBase } /// \brief Returns the dimensions of the Array - const std::array& shape() const { return m_dims; } + AXOM_HOST_DEVICE const StackArray& shape() const + { + return m_dims; + } /// \brief Returns the strides of the Array - const std::array& strides() const { return m_strides; } + AXOM_HOST_DEVICE const StackArray& strides() const + { + return m_strides; + } /*! * \brief Appends an Array to the end of the calling object @@ -221,9 +240,12 @@ class ArrayBase private: /// \brief Returns a reference to the Derived CRTP object - see https://www.fluentcpp.com/2017/05/12/curiously-recurring-template-pattern/ - ArrayType& asDerived() { return static_cast(*this); } + AXOM_HOST_DEVICE ArrayType& asDerived() + { + return static_cast(*this); + } /// \overload - const ArrayType& asDerived() const + AXOM_HOST_DEVICE const ArrayType& asDerived() const { return static_cast(*this); } @@ -232,7 +254,7 @@ class ArrayBase /// @{ /*! \brief Test if idx is within bounds */ - inline bool inBounds(IndexType idx) const + AXOM_HOST_DEVICE inline bool inBounds(IndexType idx) const { return idx >= 0 && idx < asDerived().size(); } @@ -240,9 +262,9 @@ class ArrayBase protected: /// \brief The sizes (extents?) in each dimension - std::array m_dims; + StackArray m_dims; /// \brief The strides in each dimension - std::array m_strides; + StackArray m_strides; }; /// \brief Array implementation specific to 1D Arrays @@ -252,6 +274,11 @@ class ArrayBase public: ArrayBase(IndexType = 0) { } + // Empy implementation because no member data + template + ArrayBase(const ArrayBase&) + { } + /*! * \brief Push a value to the back of the array. * @@ -282,9 +309,11 @@ class ArrayBase void emplace_back(Args&&... args); /// \brief Returns the dimensions of the Array - // FIXME: std::array is used for consistency with multidim case, should we just return the scalar? // Double curly braces needed for C++11 prior to resolution of CWG issue 1720 - std::array shape() const { return {{asDerived().size()}}; } + AXOM_HOST_DEVICE StackArray shape() const + { + return {{asDerived().size()}}; + } /*! * \brief Accessor, returns a reference to the given value. @@ -297,13 +326,13 @@ class ArrayBase * \pre 0 <= idx < m_num_elements */ /// @{ - T& operator[](const IndexType idx) + AXOM_HOST_DEVICE T& operator[](const IndexType idx) { assert(inBounds(idx)); return asDerived().data()[idx]; } /// \overload - const T& operator[](const IndexType idx) const + AXOM_HOST_DEVICE const T& operator[](const IndexType idx) const { assert(inBounds(idx)); return asDerived().data()[idx]; @@ -337,9 +366,12 @@ class ArrayBase private: /// \brief Returns a reference to the Derived CRTP object - see https://www.fluentcpp.com/2017/05/12/curiously-recurring-template-pattern/ - ArrayType& asDerived() { return static_cast(*this); } + AXOM_HOST_DEVICE ArrayType& asDerived() + { + return static_cast(*this); + } /// \overload - const ArrayType& asDerived() const + AXOM_HOST_DEVICE const ArrayType& asDerived() const { return static_cast(*this); } @@ -348,7 +380,7 @@ class ArrayBase /// @{ /*! \brief Test if idx is within bounds */ - inline bool inBounds(IndexType idx) const + AXOM_HOST_DEVICE inline bool inBounds(IndexType idx) const { return idx >= 0 && idx < asDerived().size(); } @@ -368,12 +400,13 @@ template inline std::ostream& print(std::ostream& os, const ArrayBase& array) { -#if defined(AXOM_USE_UMPIRE) && defined(AXOM_USE_CUDA) - // FIXME: Re-add check for umpire::resource::Constant as well, but this will crash - // if there exists no allocator for Constant memory. Is there a more fine-grained - // approach we can use to see what allocators are available before trying to get their IDs? - if(static_cast(array).getAllocatorID() == - axom::getUmpireResourceAllocatorID(umpire::resource::Device)) +#if defined(AXOM_USE_UMPIRE) && defined(UMPIRE_ENABLE_DEVICE) + const int alloc_id = static_cast(array).getAllocatorID(); + if(alloc_id == axom::getUmpireResourceAllocatorID(umpire::resource::Device) + #ifdef UMPIRE_ENABLE_CONST + || alloc_id == axom::getUmpireResourceAllocatorID(umpire::resource::Constant) + #endif + ) { std::cerr << "Cannot print Array allocated on the GPU" << std::endl; utilities::processAbort(); @@ -477,6 +510,25 @@ bool allNonNegative(const T (&arr)[N]) return true; } +/// \brief Indirection needed to dodge an MSVC compiler bug +template +struct all_types_are_integral_impl : std::true_type +{ }; + +template +struct all_types_are_integral_impl +{ + static constexpr bool value = std::is_integral::value && + all_types_are_integral_impl::value; +}; + +/// \brief Checks if all types in a parameter pack are integral +template +struct all_types_are_integral +{ + static constexpr bool value = all_types_are_integral_impl::value; +}; + } // namespace detail } /* namespace axom */ diff --git a/src/axom/core/ArrayView.hpp b/src/axom/core/ArrayView.hpp index 9f709a0d39..c92fbf16c6 100644 --- a/src/axom/core/ArrayView.hpp +++ b/src/axom/core/ArrayView.hpp @@ -13,7 +13,7 @@ namespace axom { // Forward declare the templated classes and operator function(s) -template +template class ArrayView; /// \name ArrayView to wrap a pointer and provide indexing semantics @@ -30,16 +30,17 @@ class ArrayView; * \tparam DIM The dimension of the array. * */ -template -class ArrayView : public ArrayBase> +template +class ArrayView : public ArrayBase> { public: using value_type = T; static constexpr int dimension = DIM; - using ArrayViewIterator = ArrayIteratorBase>; + static constexpr MemorySpace space = SPACE; + using ArrayViewIterator = ArrayIteratorBase>; /// \brief Default constructor - ArrayView() = default; + ArrayView() : m_allocator_id(axom::detail::getAllocatorID()) { } /*! * \brief Generic constructor for an ArrayView of arbitrary dimension with external data @@ -54,10 +55,24 @@ class ArrayView : public ArrayBase> template ArrayView(T* data, Args... args); + /*! + * \brief Constructor for transferring between memory spaces + * + * \param [in] other The array in a different memory space to copy from + * + * \note The parameter is non-const because \a other can be modified through the constructed View + * + * \note This constructor is left implicit to allow for convenient function calls that convert + * from \p Array -> \p ArrayView or from dynamic memory spaces to an \p ArrayView of explicitly specified + * space. + */ + template + ArrayView(ArrayBase& other); + /*! * \brief Return the number of elements stored in the data array. */ - inline IndexType size() const { return m_num_elements; } + inline AXOM_HOST_DEVICE IndexType size() const { return m_num_elements; } /*! * \brief Returns an ArrayViewIterator to the first element of the Array @@ -83,23 +98,30 @@ class ArrayView : public ArrayBase> */ /// @{ - inline T* data() { return m_data; } - inline const T* data() const { return m_data; } + AXOM_HOST_DEVICE inline T* data() + { +#ifdef AXOM_DEVICE_CODE + static_assert(SPACE != MemorySpace::Constant, + "Cannot modify Constant memory from device code"); +#endif + return m_data; + } + AXOM_HOST_DEVICE inline const T* data() const { return m_data; } /// @} /*! * \brief Get the ID for the umpire allocator - * - * FIXME: This is just a stand-in impl, extend this class to support wrapping of GPU pointers */ - int getAllocatorID() const { return axom::getDefaultAllocatorID(); } + int getAllocatorID() const { return m_allocator_id; } private: T* m_data = nullptr; /// \brief The full number of elements in the array /// i.e., 3 for a 1D Array of size 3, 9 for a 3x3 2D array, etc IndexType m_num_elements = 0; + /// \brief The allocator ID for the memory space in which m_data was allocated + int m_allocator_id; }; /// \brief Helper alias for multi-component arrays @@ -111,17 +133,59 @@ using MCArrayView = ArrayView; //------------------------------------------------------------------------------ //------------------------------------------------------------------------------ -template +template template -ArrayView::ArrayView(T* data, Args... args) - : ArrayBase>(args...) +ArrayView::ArrayView(T* data, Args... args) + : ArrayBase>(args...) , m_data(data) + , m_allocator_id(axom::detail::getAllocatorID()) { static_assert(sizeof...(Args) == DIM, "Array size must match number of dimensions"); // Intel hits internal compiler error when casting as part of function call IndexType tmp_args[] = {args...}; m_num_elements = detail::packProduct(tmp_args); + +#ifdef AXOM_USE_UMPIRE + // If we have Umpire, we can try and see what space the pointer is allocated in + // Probably not worth checking this if SPACE != Dynamic, we *could* error out + // if e.g., the user gives a host pointer to ArrayView, but even + // Thrust doesn't guard against this. + + // FIXME: Is it worth trying to get rid of this at compile time? + // (using a workaround since we don't have "if constexpr") + if(SPACE == MemorySpace::Dynamic) + { + auto& rm = umpire::ResourceManager::getInstance(); + if(rm.hasAllocator(data)) + { + auto alloc = rm.getAllocator(data); + m_allocator_id = alloc.getId(); + } + } +#endif +} + +//------------------------------------------------------------------------------ +template +template +ArrayView::ArrayView(ArrayBase& other) + : ArrayBase>(other) + , m_data(static_cast(other).data()) + , m_num_elements(static_cast(other).size()) + , m_allocator_id(static_cast(other).getAllocatorID()) +{ +#ifdef AXOM_DEBUG + // If it's not dynamic, the allocator ID from the argument array has to match the template param. + // If that's not the case then things have gone horribly wrong somewhere. + if(SPACE != MemorySpace::Dynamic && + m_allocator_id != axom::detail::getAllocatorID()) + { + std::cerr << "Input argument allocator does not match the explicitly " + "provided memory space\n"; + utilities::processAbort(); + } +#endif } } /* namespace axom */ diff --git a/src/axom/core/StackArray.hpp b/src/axom/core/StackArray.hpp index 5d23b1cfad..780dd9de1a 100644 --- a/src/axom/core/StackArray.hpp +++ b/src/axom/core/StackArray.hpp @@ -57,9 +57,57 @@ struct StackArray /// @} + /*! + * \brief Begin/end iterators + */ + /// @{ + + AXOM_HOST_DEVICE T* begin() noexcept { return &m_data[0]; } + AXOM_HOST_DEVICE const T* begin() const noexcept { return &m_data[0]; } + + AXOM_HOST_DEVICE T* end() noexcept { return &m_data[0] + N; } + AXOM_HOST_DEVICE const T* end() const noexcept { return &m_data[0] + N; } + + /// @} + T m_data[N]; }; +/*! + * \brief Equality comparison operator for StackArray + * + * \param [in] lhs left StackArray to compare + * \param [in] rhs right StackArray to compare + * \return true if the StackArrays have the same element values + */ +template +AXOM_HOST_DEVICE bool operator==(const StackArray& lhs, + const StackArray& rhs) +{ + for(int i = 0; i < N; i++) + { + if(lhs[i] != rhs[i]) + { + return false; + } + } + return true; +} + +/*! + * \brief Inequality comparison operator for StackArray + * + * \param [in] lhs left StackArray to compare + * \param [in] rhs right StackArray to compare + * \return true if the StackArrays have different element values + */ +template +AXOM_HOST_DEVICE bool operator!=(const StackArray& lhs, + const StackArray& rhs) +{ + return !(lhs == rhs); +} + } /* namespace axom */ #endif /* AXOM_STACKARRAY_HPP_ */ diff --git a/src/axom/core/docs/sphinx/core_containers.rst b/src/axom/core/docs/sphinx/core_containers.rst index bc32d9b794..e4d96b822e 100644 --- a/src/axom/core/docs/sphinx/core_containers.rst +++ b/src/axom/core/docs/sphinx/core_containers.rst @@ -17,6 +17,10 @@ Axom Core contains the ``Array``, ``ArrayView``, and ``StackArray`` classes. Among other things, these data containers facilitate porting code that uses ``std::vector`` to GPUs. +##### +Array +##### + ``Array`` is a multidimensional contiguous container template. In the 1-dimensional case, this class behaves similar to ``std::vector``. In higher dimensions, some vector-like functionality, such as ``push_back``, are not @@ -37,7 +41,7 @@ by default. To return all extra memory, an application can call Use ``reserve()`` when the number of nodes is known a priori, or use a constructor that takes an actual size and capacity when possible. -.. note:: The Array destructor deallocates and returns all memory associated +.. note:: The ``Array`` destructor deallocates and returns all memory associated with it to the system. Here's an example showing how to use ``Array`` instead of ``std::vector``. @@ -79,6 +83,10 @@ The output of this example is:: [8, 0, -1] ] +######### +ArrayView +######### + It is also often useful to wrap an external, user-supplied buffer without taking ownership of the data. For this purpose Axom provides the ``ArrayView`` class, which is a lightweight wrapper over a buffer that provides one- or multi-dimensional indexing/reshaping semantics. @@ -137,6 +145,106 @@ The output of this example is:: Range-based for loop over ArrayView c yields: 1 5 6 9 1 4 Standard for loop over ArrayView c yields: 1 5 6 9 1 4 +######################## +Using Arrays in GPU Code +######################## + +Instead of writing kernels and device functions that operate on raw pointers, we can use ``ArrayView`` +in device code. The basic "workflow" for this process is as follows: + + 1. Create an ``Array`` allocated in device-accessible memory via either specifying an allocator ID + or using a class template parameter for the desired memory space. + 2. Write a kernel that accepts an ``ArrayView`` parameter **by value**, not by reference or pointer. + 3. Create an ``ArrayView`` from the ``Array`` to call the function. For non-templated kernels + an implicit conversion is provided. + + +The full template signature for ``Array`` (``ArrayView`` has an analogous signature) is +``Array``. Of particular interest +is the last parameter, which specifies the memory space in which the array's data are allocated. +The default, ``Dynamic``, means that the memory space is set via an allocator ID at runtime. + +.. note:: Allocating ``Array`` s in different memory spaces is only possible when Umpire is available. + To learn more about Umpire, see the `Umpire documentation `_ + +Setting the ``MemorySpace`` to an option other than ``Dynamic`` (for example, ``MemorySpace::Device``) provides +a compile-time guarantee that data can always be accessed from a GPU. "Locking down" the memory space at +compile time can help to prevent illegal memory accesses and segmentation faults when pointers are dereferenced +from the wrong execution space. + +To summarize, there are a couple different options for creating an ``ArrayView``. +Consider a function that takes as an argument an ``ArrayView`` on the device: + +.. literalinclude:: ../../examples/core_containers.cpp + :start-after: _basic_array_function_start + :end-before: _basic_array_function_end + :language: C++ + +To create an argument to this function we can select the space either at runtime or at compile-time as follows: + +.. literalinclude:: ../../examples/core_containers.cpp + :start-after: _basic_array_device_create_start + :end-before: _basic_array_device_create_end + :language: C++ + +The first way we can create the required ``ArrayView`` is by implicit conversion, which also simplifies +the process of "locking down" a ``MemorySpace::Dynamic`` array to an explicit memory space - ``MemorySpace:Device`` in this case. + +.. literalinclude:: ../../examples/core_containers.cpp + :start-after: _basic_array_device_implicit_start + :end-before: _basic_array_device_implicit_end + :language: C++ + +.. warning:: If we had attempted to convert from a ``MemorySpace::Dynamic`` array that had been allocated in host memory, + for example, an error would be produced at runtime. + +We can also explicitly construct the ``ArrayView`` before calling the function. + +.. literalinclude:: ../../examples/core_containers.cpp + :start-after: _basic_array_device_explicit_start + :end-before: _basic_array_device_explicit_end + :language: C++ + +A more realistic example of this functionality involves a GPU kernel requiring +that its argument arrays be allocated in a specific memory space. +To illustrate how different memory spaces can be required, the following kernel requires that its +input arrays ``A`` and ``B`` are in unified memory and its output array ``C`` is in device memory. + +.. literalinclude:: ../../examples/core_containers.cpp + :start-after: _cuda_kernel_start + :end-before: _cuda_kernel_end + :language: C++ + +The following snippet illustrates how one would create and initialize the inputs/outputs to this kernel. + +.. literalinclude:: ../../examples/core_containers.cpp + :start-after: _cuda_array_create_start + :end-before: _cuda_array_create_end + :language: C++ + +.. note:: Unless the Dynamic memory space is in use, the ``Array`` constructor will + ignore an allocator ID that doesn't match its memory space, and in debug + builds will print a warning at runtime. + +We can now launch the kernel and display the results via a transfer back to host-accessible memory: + +.. literalinclude:: ../../examples/core_containers.cpp + :start-after: _cuda_array_call_start + :end-before: _cuda_array_call_end + :language: C++ + +If RAJA is available, we can also use Axom's acceleration utilities to perform an operation on the GPU +via a lambda: + +.. literalinclude:: ../../examples/core_containers.cpp + :start-after: _array_w_raja_start + :end-before: _array_w_raja_end + :language: C++ + +########## +StackArray +########## + The ``StackArray`` class is a work-around for a limitation in older versions of the nvcc compiler, which do not capture arrays on the stack in device lambdas. More details are in the API documentation and in the tests. diff --git a/src/axom/core/examples/CMakeLists.txt b/src/axom/core/examples/CMakeLists.txt index c9a7945890..9100cb8c87 100644 --- a/src/axom/core/examples/CMakeLists.txt +++ b/src/axom/core/examples/CMakeLists.txt @@ -34,12 +34,19 @@ foreach(example_source ${example_sources}) endforeach() if(ENABLE_CUDA) - blt_add_executable( - NAME ${exe_name}_cuda_on_ex + blt_add_executable( + NAME core_acceleration_cuda_on_ex SOURCES core_acceleration.cpp OUTPUT_DIR ${EXAMPLE_OUTPUT_DIRECTORY} DEPENDS_ON axom cuda FOLDER axom/core/examples ) + + blt_add_executable( + NAME core_containers_cuda_on_ex + SOURCES core_containers.cpp + OUTPUT_DIR ${EXAMPLE_OUTPUT_DIRECTORY} + DEPENDS_ON axom cuda + FOLDER axom/core/examples ) endif() blt_add_executable( diff --git a/src/axom/core/examples/core_containers.cpp b/src/axom/core/examples/core_containers.cpp index 95e10e8d68..fb7f84c6b5 100644 --- a/src/axom/core/examples/core_containers.cpp +++ b/src/axom/core/examples/core_containers.cpp @@ -22,15 +22,9 @@ #include "axom/core/Macros.hpp" #include "axom/core/memory_management.hpp" -#ifdef WIN32 - #include "windows.h" -void sleep(int numSeconds) -{ - int numMilliSecs = numSeconds * 1000; - Sleep(numMilliSecs); -} -#else - #include // for sleep() +#ifdef AXOM_USE_RAJA + #include "axom/core/execution/execution_space.hpp" + #include "axom/core/execution/for_all.hpp" #endif // C/C++ includes @@ -176,8 +170,137 @@ void demoArrayBasic() // _iteration_end } +// The following example requires CUDA + Umpire + unified memory +// FIXME: HIP +#if defined(AXOM_USE_UMPIRE) && defined(AXOM_USE_CUDA) && \ + defined(__CUDACC__) && defined(UMPIRE_ENABLE_UM) + #define AXOM_CONTAINERS_EXAMPLE_ON_DEVICE +#endif + +#ifdef AXOM_CONTAINERS_EXAMPLE_ON_DEVICE + +// _cuda_kernel_start +// Aliases used for convenience +using UnifiedIntArrayView = axom::ArrayView; +using DeviceIntArrayView = axom::ArrayView; + +__global__ void add(const UnifiedIntArrayView A, + const UnifiedIntArrayView B, + DeviceIntArrayView C) +{ + for(int i = 0; i < A.size(); i++) + { + C[i] = A[i] + B[i]; + } +} +// _cuda_kernel_end + +// _basic_array_function_start +void takesDeviceArrayView(axom::ArrayView) { } +// _basic_array_function_end +#endif + +void demoArrayDevice() +{ +#ifdef AXOM_CONTAINERS_EXAMPLE_ON_DEVICE + // _basic_array_device_create_start + constexpr int N = 10; + // An device array can be constructed by either specifying the corresponding allocator ID... + const int device_allocator_id = axom::getUmpireResourceAllocatorID( + umpire::resource::MemoryResourceType::Device); + axom::Array device_array_dynamic(N, N, device_allocator_id); + // ...or by providing the memory space via template parameter: + axom::Array device_array_template(N); + // _basic_array_device_create_end + + // _basic_array_device_implicit_start + takesDeviceArrayView(device_array_dynamic); + takesDeviceArrayView(device_array_template); + // _basic_array_device_implicit_end + + // _basic_array_device_explicit_start + axom::ArrayView view_of_dynamic_array( + device_array_dynamic); + takesDeviceArrayView(view_of_dynamic_array); + axom::ArrayView view_of_template_array( + device_array_template); + takesDeviceArrayView(view_of_template_array); + // _basic_array_device_explicit_end + + // _cuda_array_create_start + const int allocator_id = axom::getUmpireResourceAllocatorID( + umpire::resource::MemoryResourceType::Unified); + + // The last template parameter specifies a memory space. + // Its default value is Dynamic, which lets the user specify the + // memory space at runtime with a memory allocator ID. The + // third constructor parameter specifies the allocator. + // If this argument is not provided host memory will be allocated. + axom::Array A_dynamic(N, N, allocator_id); + + // We also have the option to "lock down" the memory space to allow for + // compile-time guarantees against dereferencing pointers in the wrong memory space. + axom::Array B_unified(N); + + // Despite having different types, both of these arrays are in unified memory. + for(int i = 0; i < N; i++) + { + A_dynamic[i] = i * 5; + B_unified[i] = i * 2; + } + + // The result array is allocated in device memory + axom::Array C_device(N); + + // _cuda_array_create_end + // _cuda_array_call_start + + // Passing by reference is not possible for CUDA kernels, so the three arrays + // are converted to corresponding ArrayViews that are "shallow copies" of the + // original Array. + // Note that even though A's memory space has not been locked down at compile time, + // we are able to pass it as an argument - it will be implicitly converted to an ArrayView + // of the correct type. Also note that if we had not constructed A with the UM allocator ID, + // this conversion would fail and produce an error at runtime. + add<<<1, 1>>>(A_dynamic, B_unified, C_device); + + // Since our result array is in device memory, we copy it to host memory so we can view it. + axom::Array C_host = C_device; + std::cout << "Array C_host = " << C_host << std::endl; + + // Since by default allocations happen in host memory, we could have also used a dynamic array (the default) + axom::Array C_dynamic = C_device; + std::cout << "Array C_dynamic = " << C_dynamic << std::endl; + // _cuda_array_call_end + + #ifdef AXOM_USE_RAJA + // _array_w_raja_start + // To use a lambda as a kernel, we create the ArrayViews explicitly. + const UnifiedIntArrayView A_view = A_dynamic; + const UnifiedIntArrayView B_view = B_unified; + // Create a new array for our RAJA result + axom::Array C_device_raja(N); + DeviceIntArrayView C_view = C_device_raja; + + // Declare the lambda mutable so our copy of C_view (captured by value) is mutable + axom::for_all>( + 0, + N, + [=] AXOM_HOST_DEVICE(axom::IndexType i) mutable { + C_view[i] = A_view[i] + B_view[i] + 1; + }); + + // Finally, copy things over to host memory so we can display the data + axom::Array C_host_raja = C_view; + std::cout << "Array C_host_raja = " << C_host_raja << std::endl; + // _array_w_raja_end + #endif +#endif +} + int main(int AXOM_UNUSED_PARAM(argc), char** AXOM_UNUSED_PARAM(argv)) { demoArrayBasic(); + demoArrayDevice(); return 0; } diff --git a/src/axom/core/memory_management.hpp b/src/axom/core/memory_management.hpp index 00f3a1fecf..761b2b3784 100644 --- a/src/axom/core/memory_management.hpp +++ b/src/axom/core/memory_management.hpp @@ -15,6 +15,7 @@ #include "umpire/config.hpp" #include "umpire/ResourceManager.hpp" #include "umpire/op/MemoryOperationRegistry.hpp" + #include "umpire/resource/MemoryResourceTypes.hpp" #include "umpire/strategy/QuickPool.hpp" #else #include // for std::memcpy @@ -25,6 +26,25 @@ namespace axom { constexpr int INVALID_ALLOCATOR_ID = -1; +/*! + * \brief Memory spaces supported by Array-like types + * + * This abstraction is not implemented using Umpire's MemoryResourceType enum + * in order to also include a "Dynamic" option as a default template parameter + * for Array-like types + */ +enum class MemorySpace +{ + Dynamic, +#ifdef AXOM_USE_UMPIRE + Host, + Device, + Unified, + Pinned, + Constant +#endif +}; + /// \name Memory Management Routines /// @{ @@ -251,6 +271,59 @@ inline void copy(void* dst, const void* src, std::size_t numbytes) noexcept #endif } +namespace detail +{ +/// \brief Translates between the MemorySpace enum and Umpire allocator IDs +template +inline int getAllocatorID(); + +template <> +inline int getAllocatorID() +{ + return axom::getDefaultAllocatorID(); +} + +#ifdef AXOM_USE_UMPIRE + +template <> +inline int getAllocatorID() +{ + return axom::getUmpireResourceAllocatorID( + umpire::resource::MemoryResourceType::Host); +} + +template <> +inline int getAllocatorID() +{ + return axom::getUmpireResourceAllocatorID( + umpire::resource::MemoryResourceType::Device); +} + +template <> +inline int getAllocatorID() +{ + return axom::getUmpireResourceAllocatorID( + umpire::resource::MemoryResourceType::Unified); +} + +template <> +inline int getAllocatorID() +{ + return axom::getUmpireResourceAllocatorID( + umpire::resource::MemoryResourceType::Pinned); +} + +template <> +inline int getAllocatorID() +{ + return axom::getUmpireResourceAllocatorID( + umpire::resource::MemoryResourceType::Constant); +} + +#endif + +} // namespace detail + } // namespace axom #endif /* AXOM_MEMORYMANAGEMENT_HPP_ */ diff --git a/src/axom/core/tests/core_array.hpp b/src/axom/core/tests/core_array.hpp index 1bb85ade2a..2909e82b93 100644 --- a/src/axom/core/tests/core_array.hpp +++ b/src/axom/core/tests/core_array.hpp @@ -679,8 +679,8 @@ void check_swap(Array& v) EXPECT_EQ(v_two, v_two_copy); } -template -void check_alloc(Array& v, const int& id) +template +void check_alloc(Array& v, const int id) { // Verify allocation EXPECT_EQ(v.getAllocatorID(), id); @@ -733,6 +733,175 @@ void check_external_view(ArrayView& v) EXPECT_EQ(data_ptr, v.data()); } +// FIXME: HIP +#if defined(__CUDACC__) && defined(AXOM_USE_UMPIRE) + +template +__global__ void assign_raw(T* data, int N) +{ + for(int i = 0; i < N; i++) + { + data[i] = i; + } +} + +template +__global__ void assign_view(ArrayView view) +{ + for(int i = 0; i < view.size(); i++) + { + view[i] = i * 2; + } +} + +/*! + * \brief Check that an array can be modified/accessed from device code + * \param [in] v the array to check. + */ +template +void check_device(Array& v) +{ + const IndexType size = v.size(); + // Then assign to it via a raw device pointer + assign_raw<<<1, 1>>>(v.data(), size); + + // Check the contents of the array by assigning to a Dynamic array + // The default Umpire allocator should be Host, so we can access it from the CPU + Array check_raw_array_dynamic = v; + EXPECT_EQ(check_raw_array_dynamic.size(), size); + for(int i = 0; i < check_raw_array_dynamic.size(); i++) + { + EXPECT_EQ(check_raw_array_dynamic[i], i); + } + + // Then check the contents by assigning to an explicitly Host array + Array check_raw_array_host = v; + EXPECT_EQ(check_raw_array_host.size(), size); + for(int i = 0; i < check_raw_array_host.size(); i++) + { + EXPECT_EQ(check_raw_array_host[i], i); + } + + // Then modify the underlying data via a view + ArrayView view(v); + assign_view<<<1, 1>>>(view); + + // Check the contents of the array by assigning to a Dynamic array + // The default Umpire allocator should be Host, so we can access it from the CPU + Array check_view_array_dynamic = view; + EXPECT_EQ(check_view_array_dynamic.size(), size); + for(int i = 0; i < check_view_array_dynamic.size(); i++) + { + EXPECT_EQ(check_view_array_dynamic[i], i * 2); + } + + // Then check the contents by assigning to an explicitly Host array + Array check_view_array_host = view; + EXPECT_EQ(check_view_array_host.size(), size); + for(int i = 0; i < check_view_array_host.size(); i++) + { + EXPECT_EQ(check_view_array_host[i], i * 2); + } +} + +template +__global__ void assign_raw_2d(T* data, int M, int N) +{ + for(int i = 0; i < N; i++) + { + for(int j = 0; j < N; j++) + { + data[i * N + j] = i * i + j; + } + } +} + +template +__global__ void assign_view_2d(ArrayView view) +{ + for(int i = 0; i < view.shape()[0]; i++) + { + for(int j = 0; j < view.shape()[1]; j++) + { + view(i, j) = j * j + i; + } + } +} + +/*! + * \brief Check that a 2D array can be modified/accessed from device code + * \param [in] v the array to check. + */ +template +void check_device_2D(Array& v) +{ + const IndexType size = v.size(); + const IndexType M = v.shape()[0]; + const IndexType N = v.shape()[1]; + // Then assign to it via a raw device pointer + assign_raw_2d<<<1, 1>>>(v.data(), M, N); + + // Check the contents of the array by assigning to a Dynamic array + // The default Umpire allocator should be Host, so we can access it from the CPU + Array check_raw_array_dynamic = v; + EXPECT_EQ(check_raw_array_dynamic.size(), size); + EXPECT_EQ(check_raw_array_dynamic.shape(), v.shape()); + + for(int i = 0; i < M; i++) + { + for(int j = 0; j < N; j++) + { + EXPECT_EQ(check_raw_array_dynamic(i, j), i * i + j); + } + } + + // Then check the contents by assigning to an explicitly Host array + Array check_raw_array_host = v; + EXPECT_EQ(check_raw_array_host.size(), size); + EXPECT_EQ(check_raw_array_host.shape(), v.shape()); + + for(int i = 0; i < M; i++) + { + for(int j = 0; j < N; j++) + { + EXPECT_EQ(check_raw_array_host(i, j), i * i + j); + } + } + + // Then modify the underlying data via a view + ArrayView view(v); + assign_view_2d<<<1, 1>>>(view); + + // Check the contents of the array by assigning to a Dynamic array + // The default Umpire allocator should be Host, so we can access it from the CPU + Array check_view_array_dynamic = view; + EXPECT_EQ(check_view_array_dynamic.size(), size); + EXPECT_EQ(check_view_array_dynamic.shape(), v.shape()); + + for(int i = 0; i < M; i++) + { + for(int j = 0; j < N; j++) + { + EXPECT_EQ(check_view_array_dynamic(i, j), j * j + i); + } + } + + // Then check the contents by assigning to an explicitly Host array + Array check_view_array_host = view; + EXPECT_EQ(check_view_array_host.size(), size); + EXPECT_EQ(check_view_array_host.shape(), v.shape()); + + for(int i = 0; i < M; i++) + { + for(int j = 0; j < N; j++) + { + EXPECT_EQ(check_view_array_host(i, j), j * j + i); + } + } +} + +#endif // defined(__CUDACC__) && defined(AXOM_USE_UMPIRE) + } /* end namespace internal */ //------------------------------------------------------------------------------ @@ -913,18 +1082,68 @@ TEST(core_array, checkAlloc) #endif }; - for(int id : memory_locations) + for(double ratio = 1.0; ratio <= 2.0; ratio += 0.5) { - for(double ratio = 1.0; ratio <= 2.0; ratio += 0.5) + for(IndexType capacity = 4; capacity <= 512; capacity *= 2) { - for(IndexType capacity = 4; capacity <= 512; capacity *= 2) + // First use the dynamic option + for(int id : memory_locations) { - Array v_int(capacity, capacity, id); + Array v_int(capacity, capacity, id); internal::check_alloc(v_int, id); - Array v_double(capacity, capacity, id); + Array v_double(capacity, + capacity, + id); internal::check_alloc(v_double, id); } +// Then, if Umpire is available, we can use the space as an explicit template parameter +#ifdef AXOM_USE_UMPIRE + #ifdef UMPIRE_ENABLE_DEVICE + Array v_int_device(capacity, capacity); + internal::check_alloc( + v_int_device, + axom::getUmpireResourceAllocatorID(umpire::resource::Device)); + Array v_double_device(capacity, + capacity); + internal::check_alloc( + v_double_device, + axom::getUmpireResourceAllocatorID(umpire::resource::Device)); + #endif + #ifdef UMPIRE_ENABLE_UM + Array v_int_unified(capacity, capacity); + internal::check_alloc( + v_int_unified, + axom::getUmpireResourceAllocatorID(umpire::resource::Unified)); + Array v_double_unified(capacity, + capacity); + internal::check_alloc( + v_double_unified, + axom::getUmpireResourceAllocatorID(umpire::resource::Unified)); + #endif + #ifdef UMPIRE_ENABLE_CONST + Array v_int_const(capacity, capacity); + internal::check_alloc( + v_int_const, + axom::getUmpireResourceAllocatorID(umpire::resource::Constant)); + Array v_double_const(capacity, + capacity); + internal::check_alloc( + v_double_const, + axom::getUmpireResourceAllocatorID(umpire::resource::Constant)); + #endif + #ifdef UMPIRE_ENABLE_PINNED + Array v_int_pinned(capacity, capacity); + internal::check_alloc( + v_int_pinned, + axom::getUmpireResourceAllocatorID(umpire::resource::Pinned)); + Array v_double_pinned(capacity, + capacity); + internal::check_alloc( + v_double_pinned, + axom::getUmpireResourceAllocatorID(umpire::resource::Pinned)); + #endif +#endif } } } @@ -1105,7 +1324,7 @@ TEST(core_array, check_multidimensional) v_int.fill(MAGIC_INT); // Make sure the number of elements and contents are correct EXPECT_EQ(v_int.size(), 2 * 2); - std::array expected_shape = {2, 2}; + StackArray expected_shape = {2, 2}; EXPECT_EQ(v_int.shape(), expected_shape); for(const auto val : v_int) { @@ -1123,7 +1342,7 @@ TEST(core_array, check_multidimensional) v_int_flat[1] = 2; v_int_flat[2] = 3; v_int_flat[3] = 4; - std::array expected_flat_shape = {4}; + StackArray expected_flat_shape = {4}; EXPECT_EQ(v_int_flat.shape(), expected_flat_shape); for(int i = 0; i < v_int_flat.size(); i++) @@ -1135,7 +1354,7 @@ TEST(core_array, check_multidimensional) Array v_double(4, 3, 2); v_double.fill(MAGIC_DOUBLE); EXPECT_EQ(v_double.size(), 4 * 3 * 2); - std::array expected_double_shape = {4, 3, 2}; + StackArray expected_double_shape = {4, 3, 2}; EXPECT_EQ(v_double.shape(), expected_double_shape); for(const auto val : v_double) { @@ -1174,7 +1393,7 @@ TEST(core_array, check_multidimensional_view) ArrayView v_int_view(v_int_arr, 2, 2); // Make sure the number of elements and contents are correct EXPECT_EQ(v_int_view.size(), 2 * 2); - std::array expected_shape = {2, 2}; + StackArray expected_shape = {2, 2}; EXPECT_EQ(v_int_view.shape(), expected_shape); for(const auto val : v_int_view) { @@ -1189,7 +1408,7 @@ TEST(core_array, check_multidimensional_view) // FIXME: Should we add a std::initializer_list ctor? int v_int_flat_arr[] = {1, 2, 3, 4}; ArrayView v_int_flat_view(v_int_flat_arr, 4); - std::array expected_flat_shape = {4}; + StackArray expected_flat_shape = {4}; EXPECT_EQ(v_int_flat_view.shape(), expected_flat_shape); for(int i = 0; i < v_int_flat_view.size(); i++) @@ -1202,7 +1421,7 @@ TEST(core_array, check_multidimensional_view) std::fill_n(v_double_arr, 4 * 3 * 2, MAGIC_DOUBLE); ArrayView v_double_view(v_double_arr, 4, 3, 2); EXPECT_EQ(v_double_view.size(), 4 * 3 * 2); - std::array expected_double_shape = {4, 3, 2}; + StackArray expected_double_shape = {4, 3, 2}; EXPECT_EQ(v_double_view.shape(), expected_double_shape); for(const auto val : v_double_view) { @@ -1231,4 +1450,63 @@ TEST(core_array, check_multidimensional_view) } } +//------------------------------------------------------------------------------ +TEST(core_array, checkDevice) +{ +// FIXME: HIP +#if !defined(__CUDACC__) || !defined(AXOM_USE_UMPIRE) || \ + !defined(UMPIRE_ENABLE_DEVICE) + GTEST_SKIP() + << "CUDA is not available, skipping tests that use Array in device code"; +#else + for(IndexType capacity = 2; capacity < 512; capacity *= 2) + { + // Allocate a Dynamic array in Device memory + Array v_int_dynamic( + capacity, + capacity, + axom::getUmpireResourceAllocatorID(umpire::resource::Device)); + + internal::check_device(v_int_dynamic); + + Array v_double_dynamic( + capacity, + capacity, + axom::getUmpireResourceAllocatorID(umpire::resource::Device)); + + internal::check_device(v_double_dynamic); + + // Then allocate an explicitly Device array + Array v_int_device(capacity, capacity); + internal::check_device(v_int_device); + + Array v_double_device(capacity, + capacity); + internal::check_device(v_double_device); + } +#endif +} + +//------------------------------------------------------------------------------ +TEST(core_array, checkDevice2D) +{ +// FIXME: HIP +#if !defined(__CUDACC__) || !defined(AXOM_USE_UMPIRE) || \ + !defined(UMPIRE_ENABLE_DEVICE) + GTEST_SKIP() + << "CUDA is not available, skipping tests that use Array in device code"; +#else + for(IndexType capacity = 2; capacity < 512; capacity *= 2) + { + // Allocate an explicitly Device array + Array v_int_device(capacity, capacity); + internal::check_device_2D(v_int_device); + + Array v_double_device(capacity, + capacity); + internal::check_device_2D(v_double_device); + } +#endif +} + } /* end namespace axom */ diff --git a/src/axom/sidre/core/Array.hpp b/src/axom/sidre/core/Array.hpp index 29254250dc..5d8f5b3920 100644 --- a/src/axom/sidre/core/Array.hpp +++ b/src/axom/sidre/core/Array.hpp @@ -32,7 +32,7 @@ constexpr axom::IndexType ZERO = 0; namespace detail { inline void describeViewImpl(TypeID T_type, - const std::array& shape, + const StackArray& shape, View* view) { SLIC_ASSERT(view != nullptr); @@ -43,7 +43,7 @@ inline void describeViewImpl(TypeID T_type, } inline void describeViewImpl(TypeID T_type, - const std::array& shape, + const StackArray& shape, View* view) { SLIC_ASSERT(view != nullptr);