From 3624a5a096071e611f92d9bacc29765f4698f26c Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Mon, 25 Mar 2024 16:36:08 -0600 Subject: [PATCH 01/34] KokkosComm::irecv Add an interface to KokkosComm:Req to register a callable at wait(). Use that interface to invoke unpack operations (if needed) at wait(). Add packer_type to MpiArgs. Make failures off rank 0 somewhat visible in the unit tests. --- docs/api/core.rst | 8 +- src/KokkosComm.hpp | 5 + src/KokkosComm_request.hpp | 23 ++++- src/impl/KokkosComm_irecv.hpp | 78 ++++++++++++++ src/impl/KokkosComm_packer.hpp | 5 +- unit_tests/CMakeLists.txt | 1 + unit_tests/test_isendirecv.cpp | 181 +++++++++++++++++++++++++++++++++ 7 files changed, 297 insertions(+), 4 deletions(-) create mode 100644 src/impl/KokkosComm_irecv.hpp create mode 100644 unit_tests/test_isendirecv.cpp diff --git a/docs/api/core.rst b/docs/api/core.rst index fb3a895a..fbdc72fa 100644 --- a/docs/api/core.rst +++ b/docs/api/core.rst @@ -32,6 +32,10 @@ Core * - ``MPI_Reduce`` - ``reduce`` - ✓ + * - MPI_Send + - send + - ✓ + - ✓ Point-to-point -------------- @@ -133,10 +137,10 @@ Related Types .. cpp:function:: void KokkosComm::Req::wait() - Call MPI_Wait on the held MPI_Request and drop copies of any previous arguments to Req::keep_until_wait(). + Call MPI_Wait on the held MPI_Request and drop copies of any previous arguments to Req::drop_at_wait(). .. cpp:function:: template \ - void KokkosComm::Req::keep_until_wait(const View &v) + void KokkosComm::Req::drop_at_wait(const View &v) Extend the lifetime of v at least until Req::wait() is called. This is useful to prevent a View from being destroyed during an asynchronous MPI operation. diff --git a/src/KokkosComm.hpp b/src/KokkosComm.hpp index 84ab90ea..03d64d6a 100644 --- a/src/KokkosComm.hpp +++ b/src/KokkosComm.hpp @@ -43,4 +43,9 @@ void recv(const ExecSpace &space, RecvView &sv, int src, int tag, MPI_Comm comm) return Impl::recv(space, sv, src, tag, comm); } +template +Req irecv(const ExecSpace &space, RecvView &rv, int src, int tag, MPI_Comm comm) { + return Impl::irecv(space, rv, src, tag, comm); +} + } // namespace KokkosComm diff --git a/src/KokkosComm_request.hpp b/src/KokkosComm_request.hpp index 34372478..1a3096fb 100644 --- a/src/KokkosComm_request.hpp +++ b/src/KokkosComm_request.hpp @@ -23,6 +23,21 @@ namespace KokkosComm { class Req { + // a type-erased callable + struct CallableBase { + virtual ~CallableBase() {} + + virtual void operator()() = 0; + }; + template + struct CallableHolder : CallableBase { + CallableHolder(const F &f) : f_(f) {} + + virtual void operator()() override { return f_(); } + + F f_; + }; + // a type-erased view. Request uses these to keep temporary views alive for // the lifetime of "Immediate" MPI operations struct ViewHolderBase { @@ -38,6 +53,7 @@ class Req { Record() : req_(MPI_REQUEST_NULL) {} MPI_Request req_; std::vector> until_waits_; + std::vector> at_waits_; }; public: @@ -52,10 +68,15 @@ class Req { // keep a reference to this view around until wait() is called template - void keep_until_wait(const View &v) { + void drop_at_wait(const View &v) { record_->until_waits_.push_back(std::make_shared>(v)); } + template + void call_and_drop_at_wait(const Callable &c) { + record_->at_waits_.push_back(std::make_shared>(c)); + } + private: std::shared_ptr record_; }; diff --git a/src/impl/KokkosComm_irecv.hpp b/src/impl/KokkosComm_irecv.hpp new file mode 100644 index 00000000..b54a346f --- /dev/null +++ b/src/impl/KokkosComm_irecv.hpp @@ -0,0 +1,78 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#pragma once + +#include + +#include "KokkosComm_pack_traits.hpp" +#include "KokkosComm_traits.hpp" + +// impl +#include "KokkosComm_include_mpi.hpp" + +namespace KokkosComm::Impl { + +template +struct IrecvUnpacker { + IrecvUnpacker(const ExecSpace &space, RecvView &rv, MpiArgs &args) + : space_(space), rv_(rv), args_(args) {} + + void operator()() { + Kokkos::Tools::pushRegion("KokkosComm::Impl::IrecvUnpacker::operator()"); + MpiArgs::packer_type::unpack_into(space_, rv_, args_.view); + space_.fence(); + Kokkos::Tools::popRegion(); + } + + ExecSpace space_; + RecvView rv_; + MpiArgs args_; +}; + +/* FIXME: If RecvView is a Kokkos view, it can be a const ref + same is true for an mdspan? +*/ +template +Req irecv(const ExecSpace &space, RecvView &rv, int src, int tag, + MPI_Comm comm) { + Kokkos::Tools::pushRegion("KokkosComm::Impl::irecv"); + + using KCT = KokkosComm::Traits; + using KCPT = KokkosComm::PackTraits; + + KokkosComm::Req req; + + if (KCPT::needs_unpack(rv)) { + using Packer = typename KCPT::packer_type; + using Args = typename Packer::args_type; + + Args args = Packer::allocate_packed_for(space, "packed", rv); + space.fence(); + MPI_Irecv(KCT::data_handle(args.view), args.count, args.datatype, src, tag, + comm, &req.mpi_req()); + req.call_and_drop_at_wait(IrecvUnpacker{space, rv, args}); + + } else { + using RecvScalar = typename RecvView::value_type; + MPI_Irecv(KCT::data_handle(rv), KCT::span(rv), mpi_type_v, src, + tag, comm, &req.mpi_req()); + } + return req; + + Kokkos::Tools::popRegion(); +} +} // namespace KokkosComm::Impl \ No newline at end of file diff --git a/src/impl/KokkosComm_packer.hpp b/src/impl/KokkosComm_packer.hpp index 47937bc2..c2b7f699 100644 --- a/src/impl/KokkosComm_packer.hpp +++ b/src/impl/KokkosComm_packer.hpp @@ -25,6 +25,9 @@ namespace Packer { template struct MpiArgs { + using packer_type = + Packer; // the type of the packer that produced these arguments + View view; MPI_Datatype datatype; int count; @@ -71,7 +74,7 @@ struct DeepCopy { template struct MpiDatatype { using non_const_packed_view_type = View; - using args_type = MpiArgs; + using args_type = MpiArgs>; // don't actually allocate - return the provided view, but with // a datatype that describes the data in the view diff --git a/unit_tests/CMakeLists.txt b/unit_tests/CMakeLists.txt index 029ff87d..09212db1 100644 --- a/unit_tests/CMakeLists.txt +++ b/unit_tests/CMakeLists.txt @@ -44,6 +44,7 @@ target_link_libraries(test-mpi MPI::MPI_CXX) # Kokkos Comm tests add_executable(test-main test_main.cpp test_gtest_mpi.cpp + test_isendirecv.cpp test_isendrecv.cpp test_reduce.cpp test_sendrecv.cpp diff --git a/unit_tests/test_isendirecv.cpp b/unit_tests/test_isendirecv.cpp new file mode 100644 index 00000000..9893066f --- /dev/null +++ b/unit_tests/test_isendirecv.cpp @@ -0,0 +1,181 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#if KOKKOSCOMM_ENABLE_MDSPAN +#if KOKKOSCOMM_MDSPAN_IN_EXPERIMENTAL +#include +#define MDSPAN_PREFIX() experimental:: +#else +#include +#define MDSPAN_PREFIX() +#endif + +using std::MDSPAN_PREFIX() dextents; +using std::MDSPAN_PREFIX() extents; +using std::MDSPAN_PREFIX() layout_stride; +using std::MDSPAN_PREFIX() mdspan; +#endif // KOKKOSCOMM_ENABLE_MDSPAN + +#include + +#include "KokkosComm.hpp" + +template +class IsendIrecv : public testing::Test { + public: + using Scalar = T; +}; + +using ScalarTypes = + ::testing::Types, + Kokkos::complex, int, unsigned, int64_t, size_t>; +TYPED_TEST_SUITE(IsendIrecv, ScalarTypes); + +TYPED_TEST(IsendIrecv, 1D_contig) { + Kokkos::View a("a", 1000); + + int rank, size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + if (size < 2) { + GTEST_SKIP() << "Requires >= 2 ranks (" << size << " provided)"; + } + + if (0 == rank) { + int dst = 1; + Kokkos::parallel_for( + a.extent(0), KOKKOS_LAMBDA(const int i) { a(i) = i; }); + KokkosComm::Req req = KokkosComm::isend(Kokkos::DefaultExecutionSpace(), a, + dst, 0, MPI_COMM_WORLD); + req.wait(); + } else if (1 == rank) { + int src = 0; + KokkosComm::Req req = KokkosComm::irecv(Kokkos::DefaultExecutionSpace(), a, + src, 0, MPI_COMM_WORLD); + req.wait(); + int errs; + Kokkos::parallel_reduce( + a.extent(0), + KOKKOS_LAMBDA(const int &i, int &lsum) { + lsum += a(i) != typename TestFixture::Scalar(i); + }, + errs); + ASSERT_EQ(errs, 0); + } +} + +TYPED_TEST(IsendIrecv, 1D_noncontig) { + // this is C-style layout, i.e. b(0,0) is next to b(0,1) + Kokkos::View b("a", 10, + 10); + auto a = + Kokkos::subview(b, Kokkos::ALL, 2); // take column 2 (non-contiguous) + + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + if (0 == rank) { + int dst = 1; + Kokkos::parallel_for( + a.extent(0), KOKKOS_LAMBDA(const int i) { a(i) = i; }); + KokkosComm::Req req = KokkosComm::isend(Kokkos::DefaultExecutionSpace(), a, + dst, 0, MPI_COMM_WORLD); + req.wait(); + } else if (1 == rank) { + int src = 0; + KokkosComm::Req req = KokkosComm::irecv(Kokkos::DefaultExecutionSpace(), a, + src, 0, MPI_COMM_WORLD); + req.wait(); + int errs; + Kokkos::parallel_reduce( + a.extent(0), + KOKKOS_LAMBDA(const int &i, int &lsum) { + lsum += a(i) != typename TestFixture::Scalar(i); + }, + errs); + ASSERT_EQ(errs, 0); + } +} +#if 0 + +#if KOKKOSCOMM_ENABLE_MDSPAN + +TYPED_TEST(IsendIrecv, 1D_mdspan_contig) { + using ScalarType = typename TestFixture::Scalar; + + std::vector v(100); + auto a = mdspan(&v[2], 13); // 13 scalars starting at index 2 + + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + if (0 == rank) { + int dst = 1; + for (size_t i = 0; i < a.extent(0); ++i) { + a[i] = i; + } + KokkosComm::Req req = KokkosComm::isend(Kokkos::DefaultExecutionSpace(), a, + dst, 0, MPI_COMM_WORLD); + req.wait(); + } else if (1 == rank) { + int src = 0; + KokkosComm::recv(Kokkos::DefaultExecutionSpace(), a, src, 0, + MPI_COMM_WORLD); + int errs = 0; + for (size_t i = 0; i < a.extent(0); ++i) { + errs += (a[i] != ScalarType(i)); + } + ASSERT_EQ(errs, 0); + } +} + +TYPED_TEST(IsendIrecv, 1D_mdspan_noncontig) { + using ScalarType = typename TestFixture::Scalar; + + std::vector v(100); + + using ExtentsType = dextents; + ExtentsType shape{10}; + std::array strides{10}; + + mdspan a( + &v[2], layout_stride::mapping{shape, strides}); + + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + if (0 == rank) { + int dst = 1; + for (size_t i = 0; i < a.extent(0); ++i) { + a[i] = i; + } + KokkosComm::Req req = KokkosComm::isend(Kokkos::DefaultExecutionSpace(), a, + dst, 0, MPI_COMM_WORLD); + req.wait(); + } else if (1 == rank) { + int src = 0; + KokkosComm::recv(Kokkos::DefaultExecutionSpace(), a, src, 0, + MPI_COMM_WORLD); + int errs = 0; + for (size_t i = 0; i < a.extent(0); ++i) { + errs += (a[i] != ScalarType(i)); + } + ASSERT_EQ(errs, 0); + } +} +#endif + +#endif // KOKKOSCOMM_ENABLE_MDSPAN \ No newline at end of file From d45bf657f03b10c621cbd5c9713f1f5cdfbcfac9 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Tue, 26 Mar 2024 10:09:43 -0600 Subject: [PATCH 02/34] docs: update KokkosComm::Req --- docs/api/core.rst | 6 ++++++ docs/design.rst | 20 +++++++++++++++++--- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/docs/api/core.rst b/docs/api/core.rst index fbdc72fa..a01e195a 100644 --- a/docs/api/core.rst +++ b/docs/api/core.rst @@ -144,3 +144,9 @@ Related Types Extend the lifetime of v at least until Req::wait() is called. This is useful to prevent a View from being destroyed during an asynchronous MPI operation. + + .. cpp:function:: template \ + void KokkosComm::Req::call_and_drop_at_wait(const Callable &c) + + Store a copy of ``c``, and invoke ``c()`` when ``wait`` is called. + Destroy the copy of ``c`` afterwards. diff --git a/docs/design.rst b/docs/design.rst index aa169280..7e26efec 100644 --- a/docs/design.rst +++ b/docs/design.rst @@ -1,10 +1,24 @@ Design ====== -Asynchronous MPI operations and view lifetimes ----------------------------------------------- +Asynchronous MPI operations, View Lifetimes, and Packing +-------------------------------------------------------- -"Immediate" functions (e.g. `isend`) return a `KokkosComm::Req`, which can be `wait()`-ed to block until the input view can be reused. `Req` also manages the lifetimes of any intermediate views needed for packing the data, releasing those views when `wait()` is complete. +Asynchronous MPI operations use an ``MPI_Request``, which is a handle that can be used to refer to the operation later (e.g., ``MPI_Wait``). + +KokkosComm has an analogous concept, `KokkosComm::Req`. +"Immediate" functions (e.g. `isend`) return a `KokkosComm::Req`, which can be `wait()`-ed to block until the input view can be reused. + +There are three consequences + +First, to ensure compatibility with MPI semantics, KokkosComm immediate functions will call the corresponding MPI function before they return. + +Second, the KokkosComm packing strategy may require that an intermediate view be allocated, and this view needs to have a lifetime at least as long as the communication. +The ``KokkosComm::Req::drop_at_wait`` interface allows the `KokkosComm::Req` to hold those views until ``wait`` is called. + +Third, for asynchronous receive operations, the packing strategy may require that the buffer provided by the underlying MPI operation be further unpacked. +The ``KokkosComm::Req::call_and_drop_at_wait`` allows the `KokkosComm::Req` to execute (and then drop) callback functors when ``wait`` is called. +For example, `KokkosComm::irecv` uses this functionality to attach an unpacking operation to the `KokkosComm::Req::wait` call. Non-contiguous Data ------------------- From 5eddb34a835a3e9218912fb8bbd85c78e2c9d8f7 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Tue, 26 Mar 2024 10:09:56 -0600 Subject: [PATCH 03/34] add Invokable concept --- src/impl/KokkosComm_concepts.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/impl/KokkosComm_concepts.hpp b/src/impl/KokkosComm_concepts.hpp index 1eb36996..346f637e 100644 --- a/src/impl/KokkosComm_concepts.hpp +++ b/src/impl/KokkosComm_concepts.hpp @@ -25,5 +25,7 @@ concept KokkosView = Kokkos::is_view_v; template concept KokkosExecutionSpace = Kokkos::is_execution_space_v; +template +concept Invokable = std::is_invocable_v; } // namespace KokkosComm From 6815a0c19dd68d8b7671cfb95a718bce8e184b3b Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Tue, 26 Mar 2024 10:10:39 -0600 Subject: [PATCH 04/34] Req: use Invokable --- src/KokkosComm_request.hpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/KokkosComm_request.hpp b/src/KokkosComm_request.hpp index 1a3096fb..a41097cd 100644 --- a/src/KokkosComm_request.hpp +++ b/src/KokkosComm_request.hpp @@ -24,18 +24,18 @@ namespace KokkosComm { class Req { // a type-erased callable - struct CallableBase { - virtual ~CallableBase() {} + struct InvokableHolderBase { + virtual ~InvokableHolderBase() {} virtual void operator()() = 0; }; - template - struct CallableHolder : CallableBase { - CallableHolder(const F &f) : f_(f) {} + template + struct InvokableHolder : InvokableHolderBase { + InvokableHolder(const Fn &f) : f_(f) {} virtual void operator()() override { return f_(); } - F f_; + Fn f_; }; // a type-erased view. Request uses these to keep temporary views alive for @@ -53,7 +53,7 @@ class Req { Record() : req_(MPI_REQUEST_NULL) {} MPI_Request req_; std::vector> until_waits_; - std::vector> at_waits_; + std::vector> at_waits_; }; public: @@ -72,9 +72,9 @@ class Req { record_->until_waits_.push_back(std::make_shared>(v)); } - template - void call_and_drop_at_wait(const Callable &c) { - record_->at_waits_.push_back(std::make_shared>(c)); + template + void call_and_drop_at_wait(const Fn &f) { + record_->at_waits_.push_back(std::make_shared>(f)); } private: From 1c5859f10a212b07a086dc9c402ba26497d9ad1e Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Tue, 26 Mar 2024 10:10:53 -0600 Subject: [PATCH 05/34] unit_tests: enable IsendIrecv mdspan tests --- unit_tests/test_isendirecv.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/unit_tests/test_isendirecv.cpp b/unit_tests/test_isendirecv.cpp index 9893066f..3a5bdd9f 100644 --- a/unit_tests/test_isendirecv.cpp +++ b/unit_tests/test_isendirecv.cpp @@ -109,7 +109,6 @@ TYPED_TEST(IsendIrecv, 1D_noncontig) { ASSERT_EQ(errs, 0); } } -#if 0 #if KOKKOSCOMM_ENABLE_MDSPAN @@ -131,9 +130,10 @@ TYPED_TEST(IsendIrecv, 1D_mdspan_contig) { dst, 0, MPI_COMM_WORLD); req.wait(); } else if (1 == rank) { - int src = 0; - KokkosComm::recv(Kokkos::DefaultExecutionSpace(), a, src, 0, - MPI_COMM_WORLD); + int src = 0; + KokkosComm::Req req = KokkosComm::irecv(Kokkos::DefaultExecutionSpace(), a, + src, 0, MPI_COMM_WORLD); + req.wait(); int errs = 0; for (size_t i = 0; i < a.extent(0); ++i) { errs += (a[i] != ScalarType(i)); @@ -166,9 +166,10 @@ TYPED_TEST(IsendIrecv, 1D_mdspan_noncontig) { dst, 0, MPI_COMM_WORLD); req.wait(); } else if (1 == rank) { - int src = 0; - KokkosComm::recv(Kokkos::DefaultExecutionSpace(), a, src, 0, - MPI_COMM_WORLD); + int src = 0; + KokkosComm::Req req = KokkosComm::irecv(Kokkos::DefaultExecutionSpace(), a, + src, 0, MPI_COMM_WORLD); + req.wait(); int errs = 0; for (size_t i = 0; i < a.extent(0); ++i) { errs += (a[i] != ScalarType(i)); @@ -176,6 +177,5 @@ TYPED_TEST(IsendIrecv, 1D_mdspan_noncontig) { ASSERT_EQ(errs, 0); } } -#endif #endif // KOKKOSCOMM_ENABLE_MDSPAN \ No newline at end of file From 63bcfe0d25907b67d31d48b515beee7b60e4fb31 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Tue, 26 Mar 2024 10:17:31 -0600 Subject: [PATCH 06/34] rename Req members --- src/KokkosComm_request.hpp | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/src/KokkosComm_request.hpp b/src/KokkosComm_request.hpp index a41097cd..486d3dad 100644 --- a/src/KokkosComm_request.hpp +++ b/src/KokkosComm_request.hpp @@ -23,7 +23,8 @@ namespace KokkosComm { class Req { - // a type-erased callable + // a type-erased callable. Req uses these to attach callbacks to be executed + // at wait struct InvokableHolderBase { virtual ~InvokableHolderBase() {} @@ -52,8 +53,8 @@ class Req { struct Record { Record() : req_(MPI_REQUEST_NULL) {} MPI_Request req_; - std::vector> until_waits_; - std::vector> at_waits_; + std::vector> wait_drops_; + std::vector> wait_callbacks_; }; public: @@ -63,18 +64,24 @@ class Req { void wait() { MPI_Wait(&(record_->req_), MPI_STATUS_IGNORE); - record_->until_waits_.clear(); // drop any views we're keeping alive until wait() + record_->wait_drops_ + .clear(); // drop any views we're keeping alive until wait() + for (auto &c : record_->wait_callbacks_) { + (*c)(); + } + record_->wait_callbacks_.clear(); } // keep a reference to this view around until wait() is called template void drop_at_wait(const View &v) { - record_->until_waits_.push_back(std::make_shared>(v)); + record_->wait_drops_.push_back(std::make_shared>(v)); } template void call_and_drop_at_wait(const Fn &f) { - record_->at_waits_.push_back(std::make_shared>(f)); + record_->wait_callbacks_.push_back( + std::make_shared>(f)); } private: From 95641b11f5217c6e39f8fd75eec37e27004047d5 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Wed, 27 Mar 2024 14:51:05 -0600 Subject: [PATCH 07/34] revert drop_at_wait back to keep_until_wait --- docs/api/core.rst | 4 ++-- docs/design.rst | 2 +- src/KokkosComm_request.hpp | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/api/core.rst b/docs/api/core.rst index a01e195a..2ad829d3 100644 --- a/docs/api/core.rst +++ b/docs/api/core.rst @@ -137,10 +137,10 @@ Related Types .. cpp:function:: void KokkosComm::Req::wait() - Call MPI_Wait on the held MPI_Request and drop copies of any previous arguments to Req::drop_at_wait(). + Call MPI_Wait on the held MPI_Request and drop copies of any previous arguments to Req::keep_until_wait(). .. cpp:function:: template \ - void KokkosComm::Req::drop_at_wait(const View &v) + void KokkosComm::Req::keep_until_wait(const View &v) Extend the lifetime of v at least until Req::wait() is called. This is useful to prevent a View from being destroyed during an asynchronous MPI operation. diff --git a/docs/design.rst b/docs/design.rst index 7e26efec..d2cfacc8 100644 --- a/docs/design.rst +++ b/docs/design.rst @@ -14,7 +14,7 @@ There are three consequences First, to ensure compatibility with MPI semantics, KokkosComm immediate functions will call the corresponding MPI function before they return. Second, the KokkosComm packing strategy may require that an intermediate view be allocated, and this view needs to have a lifetime at least as long as the communication. -The ``KokkosComm::Req::drop_at_wait`` interface allows the `KokkosComm::Req` to hold those views until ``wait`` is called. +The ``KokkosComm::Req::keep_until_wait`` interface allows the `KokkosComm::Req` to hold those views until ``wait`` is called. Third, for asynchronous receive operations, the packing strategy may require that the buffer provided by the underlying MPI operation be further unpacked. The ``KokkosComm::Req::call_and_drop_at_wait`` allows the `KokkosComm::Req` to execute (and then drop) callback functors when ``wait`` is called. diff --git a/src/KokkosComm_request.hpp b/src/KokkosComm_request.hpp index 486d3dad..7bc85918 100644 --- a/src/KokkosComm_request.hpp +++ b/src/KokkosComm_request.hpp @@ -74,7 +74,7 @@ class Req { // keep a reference to this view around until wait() is called template - void drop_at_wait(const View &v) { + void keep_until_wait(const View &v) { record_->wait_drops_.push_back(std::make_shared>(v)); } From 64b20d531811f77c69dc4284a2254ca17f066572 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Wed, 27 Mar 2024 14:54:43 -0600 Subject: [PATCH 08/34] formatting --- src/KokkosComm.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/KokkosComm.hpp b/src/KokkosComm.hpp index 03d64d6a..4b27f7c3 100644 --- a/src/KokkosComm.hpp +++ b/src/KokkosComm.hpp @@ -44,7 +44,8 @@ void recv(const ExecSpace &space, RecvView &sv, int src, int tag, MPI_Comm comm) } template -Req irecv(const ExecSpace &space, RecvView &rv, int src, int tag, MPI_Comm comm) { +Req irecv(const ExecSpace &space, RecvView &rv, int src, int tag, + MPI_Comm comm) { return Impl::irecv(space, rv, src, tag, comm); } From eb5a88e0aea2839aa83de27c76b984d2454acd38 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Wed, 27 Mar 2024 14:57:15 -0600 Subject: [PATCH 09/34] Use default virtual dtor --- src/KokkosComm_request.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KokkosComm_request.hpp b/src/KokkosComm_request.hpp index 7bc85918..2f4ca5a5 100644 --- a/src/KokkosComm_request.hpp +++ b/src/KokkosComm_request.hpp @@ -26,7 +26,7 @@ class Req { // a type-erased callable. Req uses these to attach callbacks to be executed // at wait struct InvokableHolderBase { - virtual ~InvokableHolderBase() {} + virtual ~InvokableHolderBase() = default; virtual void operator()() = 0; }; From c237d59e63c96df16f668101438795992cec01b4 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Wed, 27 Mar 2024 14:57:34 -0600 Subject: [PATCH 10/34] Don't require void return type on Invokable --- src/KokkosComm_request.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KokkosComm_request.hpp b/src/KokkosComm_request.hpp index 2f4ca5a5..3c5fd430 100644 --- a/src/KokkosComm_request.hpp +++ b/src/KokkosComm_request.hpp @@ -34,7 +34,7 @@ class Req { struct InvokableHolder : InvokableHolderBase { InvokableHolder(const Fn &f) : f_(f) {} - virtual void operator()() override { return f_(); } + virtual void operator()() override { f_(); } Fn f_; }; From e1ce0b5dd75a40e7a076cc7f7ae0bf96acd431bf Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Wed, 27 Mar 2024 15:05:33 -0600 Subject: [PATCH 11/34] Keep irecv view alive until wait --- src/impl/KokkosComm_irecv.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/impl/KokkosComm_irecv.hpp b/src/impl/KokkosComm_irecv.hpp index b54a346f..a7e589e1 100644 --- a/src/impl/KokkosComm_irecv.hpp +++ b/src/impl/KokkosComm_irecv.hpp @@ -70,6 +70,7 @@ Req irecv(const ExecSpace &space, RecvView &rv, int src, int tag, using RecvScalar = typename RecvView::value_type; MPI_Irecv(KCT::data_handle(rv), KCT::span(rv), mpi_type_v, src, tag, comm, &req.mpi_req()); + req.keep_until_wait(rv); } return req; From 27305951a489c96a788d74e072c9851f92308c44 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Thu, 28 Mar 2024 14:07:08 -0600 Subject: [PATCH 12/34] Formatting docs/design.rst Co-authored-by: Daniel Arndt --- docs/design.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/design.rst b/docs/design.rst index d2cfacc8..de65a8e2 100644 --- a/docs/design.rst +++ b/docs/design.rst @@ -11,7 +11,7 @@ KokkosComm has an analogous concept, `KokkosComm::Req`. There are three consequences -First, to ensure compatibility with MPI semantics, KokkosComm immediate functions will call the corresponding MPI function before they return. +- First, to ensure compatibility with MPI semantics, KokkosComm immediate functions will call the corresponding MPI function before they return. Second, the KokkosComm packing strategy may require that an intermediate view be allocated, and this view needs to have a lifetime at least as long as the communication. The ``KokkosComm::Req::keep_until_wait`` interface allows the `KokkosComm::Req` to hold those views until ``wait`` is called. From 5e09b95c63044ec9a631d60826de6c4f5d7bc670 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Thu, 28 Mar 2024 14:07:15 -0600 Subject: [PATCH 13/34] Formatting docs/design.rst Co-authored-by: Daniel Arndt --- docs/design.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/design.rst b/docs/design.rst index de65a8e2..189ab592 100644 --- a/docs/design.rst +++ b/docs/design.rst @@ -13,7 +13,7 @@ There are three consequences - First, to ensure compatibility with MPI semantics, KokkosComm immediate functions will call the corresponding MPI function before they return. -Second, the KokkosComm packing strategy may require that an intermediate view be allocated, and this view needs to have a lifetime at least as long as the communication. +- Second, the KokkosComm packing strategy may require that an intermediate view be allocated, and this view needs to have a lifetime at least as long as the communication. The ``KokkosComm::Req::keep_until_wait`` interface allows the `KokkosComm::Req` to hold those views until ``wait`` is called. Third, for asynchronous receive operations, the packing strategy may require that the buffer provided by the underlying MPI operation be further unpacked. From 3849c9aeb101bd9f98469bbe6b3f0e1290b4fbae Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Thu, 28 Mar 2024 14:07:24 -0600 Subject: [PATCH 14/34] Formatting docs/design.rst Co-authored-by: Daniel Arndt --- docs/design.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/design.rst b/docs/design.rst index 189ab592..185750f9 100644 --- a/docs/design.rst +++ b/docs/design.rst @@ -16,7 +16,7 @@ There are three consequences - Second, the KokkosComm packing strategy may require that an intermediate view be allocated, and this view needs to have a lifetime at least as long as the communication. The ``KokkosComm::Req::keep_until_wait`` interface allows the `KokkosComm::Req` to hold those views until ``wait`` is called. -Third, for asynchronous receive operations, the packing strategy may require that the buffer provided by the underlying MPI operation be further unpacked. +- Third, for asynchronous receive operations, the packing strategy may require that the buffer provided by the underlying MPI operation be further unpacked. The ``KokkosComm::Req::call_and_drop_at_wait`` allows the `KokkosComm::Req` to execute (and then drop) callback functors when ``wait`` is called. For example, `KokkosComm::irecv` uses this functionality to attach an unpacking operation to the `KokkosComm::Req::wait` call. From 303700644965fbc0d96b0ab96a3ae7826716f5c9 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Fri, 5 Apr 2024 09:52:15 -0600 Subject: [PATCH 15/34] Irecv: fix include ordering --- src/KokkosComm.hpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/KokkosComm.hpp b/src/KokkosComm.hpp index 4b27f7c3..cee121bb 100644 --- a/src/KokkosComm.hpp +++ b/src/KokkosComm.hpp @@ -16,8 +16,12 @@ #pragma once -#include "KokkosComm_collective.hpp" #include "KokkosComm_version.hpp" +#include "KokkosComm_concepts.hpp" +#include "KokkosComm_request.hpp" + +#include "KokkosComm_collective.hpp" +#include "KokkosComm_irecv.hpp" #include "KokkosComm_isend.hpp" #include "KokkosComm_recv.hpp" #include "KokkosComm_send.hpp" From 536711b8195cedaffc2db3d2193ed9d70e9e83f6 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Wed, 10 Apr 2024 16:46:02 -0600 Subject: [PATCH 16/34] irecv: remove mdspan --- src/KokkosComm.hpp | 2 +- src/impl/KokkosComm_irecv.hpp | 7 ++----- src/impl/KokkosComm_packer.hpp | 2 +- unit_tests/test_isendirecv.cpp | 15 --------------- 4 files changed, 4 insertions(+), 22 deletions(-) diff --git a/src/KokkosComm.hpp b/src/KokkosComm.hpp index cee121bb..e43711fb 100644 --- a/src/KokkosComm.hpp +++ b/src/KokkosComm.hpp @@ -47,7 +47,7 @@ void recv(const ExecSpace &space, RecvView &sv, int src, int tag, MPI_Comm comm) return Impl::recv(space, sv, src, tag, comm); } -template +template Req irecv(const ExecSpace &space, RecvView &rv, int src, int tag, MPI_Comm comm) { return Impl::irecv(space, rv, src, tag, comm); diff --git a/src/impl/KokkosComm_irecv.hpp b/src/impl/KokkosComm_irecv.hpp index a7e589e1..1f6ae307 100644 --- a/src/impl/KokkosComm_irecv.hpp +++ b/src/impl/KokkosComm_irecv.hpp @@ -26,7 +26,7 @@ namespace KokkosComm::Impl { -template +template struct IrecvUnpacker { IrecvUnpacker(const ExecSpace &space, RecvView &rv, MpiArgs &args) : space_(space), rv_(rv), args_(args) {} @@ -43,10 +43,7 @@ struct IrecvUnpacker { MpiArgs args_; }; -/* FIXME: If RecvView is a Kokkos view, it can be a const ref - same is true for an mdspan? -*/ -template +template Req irecv(const ExecSpace &space, RecvView &rv, int src, int tag, MPI_Comm comm) { Kokkos::Tools::pushRegion("KokkosComm::Impl::irecv"); diff --git a/src/impl/KokkosComm_packer.hpp b/src/impl/KokkosComm_packer.hpp index c2b7f699..74fe1eef 100644 --- a/src/impl/KokkosComm_packer.hpp +++ b/src/impl/KokkosComm_packer.hpp @@ -23,7 +23,7 @@ namespace KokkosComm::Impl { namespace Packer { -template +template struct MpiArgs { using packer_type = Packer; // the type of the packer that produced these arguments diff --git a/unit_tests/test_isendirecv.cpp b/unit_tests/test_isendirecv.cpp index 3a5bdd9f..3da3b244 100644 --- a/unit_tests/test_isendirecv.cpp +++ b/unit_tests/test_isendirecv.cpp @@ -14,21 +14,6 @@ // //@HEADER -#if KOKKOSCOMM_ENABLE_MDSPAN -#if KOKKOSCOMM_MDSPAN_IN_EXPERIMENTAL -#include -#define MDSPAN_PREFIX() experimental:: -#else -#include -#define MDSPAN_PREFIX() -#endif - -using std::MDSPAN_PREFIX() dextents; -using std::MDSPAN_PREFIX() extents; -using std::MDSPAN_PREFIX() layout_stride; -using std::MDSPAN_PREFIX() mdspan; -#endif // KOKKOSCOMM_ENABLE_MDSPAN - #include #include "KokkosComm.hpp" From 689885020df120b1c3ee1bbacca7788bc5587130 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Wed, 10 Apr 2024 16:48:56 -0600 Subject: [PATCH 17/34] irecv: remove mdspan tests --- unit_tests/test_isendirecv.cpp | 72 ++-------------------------------- 1 file changed, 3 insertions(+), 69 deletions(-) diff --git a/unit_tests/test_isendirecv.cpp b/unit_tests/test_isendirecv.cpp index 3da3b244..058ea05f 100644 --- a/unit_tests/test_isendirecv.cpp +++ b/unit_tests/test_isendirecv.cpp @@ -18,6 +18,8 @@ #include "KokkosComm.hpp" +#include "view_builder.hpp" + template class IsendIrecv : public testing::Test { public: @@ -95,72 +97,4 @@ TYPED_TEST(IsendIrecv, 1D_noncontig) { } } -#if KOKKOSCOMM_ENABLE_MDSPAN - -TYPED_TEST(IsendIrecv, 1D_mdspan_contig) { - using ScalarType = typename TestFixture::Scalar; - - std::vector v(100); - auto a = mdspan(&v[2], 13); // 13 scalars starting at index 2 - - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - - if (0 == rank) { - int dst = 1; - for (size_t i = 0; i < a.extent(0); ++i) { - a[i] = i; - } - KokkosComm::Req req = KokkosComm::isend(Kokkos::DefaultExecutionSpace(), a, - dst, 0, MPI_COMM_WORLD); - req.wait(); - } else if (1 == rank) { - int src = 0; - KokkosComm::Req req = KokkosComm::irecv(Kokkos::DefaultExecutionSpace(), a, - src, 0, MPI_COMM_WORLD); - req.wait(); - int errs = 0; - for (size_t i = 0; i < a.extent(0); ++i) { - errs += (a[i] != ScalarType(i)); - } - ASSERT_EQ(errs, 0); - } -} - -TYPED_TEST(IsendIrecv, 1D_mdspan_noncontig) { - using ScalarType = typename TestFixture::Scalar; - - std::vector v(100); - - using ExtentsType = dextents; - ExtentsType shape{10}; - std::array strides{10}; - - mdspan a( - &v[2], layout_stride::mapping{shape, strides}); - - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - - if (0 == rank) { - int dst = 1; - for (size_t i = 0; i < a.extent(0); ++i) { - a[i] = i; - } - KokkosComm::Req req = KokkosComm::isend(Kokkos::DefaultExecutionSpace(), a, - dst, 0, MPI_COMM_WORLD); - req.wait(); - } else if (1 == rank) { - int src = 0; - KokkosComm::Req req = KokkosComm::irecv(Kokkos::DefaultExecutionSpace(), a, - src, 0, MPI_COMM_WORLD); - req.wait(); - int errs = 0; - for (size_t i = 0; i < a.extent(0); ++i) { - errs += (a[i] != ScalarType(i)); - } - ASSERT_EQ(errs, 0); - } -} - -#endif // KOKKOSCOMM_ENABLE_MDSPAN \ No newline at end of file +#endif // KOKKOSCOMM_ENABLE_MDSPAN From 1766ae5f3405394b08448eed90b932a97399f9e5 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Wed, 10 Apr 2024 17:15:19 -0600 Subject: [PATCH 18/34] irecv: test utility to make different kinds of views --- unit_tests/view_builder.hpp | 53 +++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 unit_tests/view_builder.hpp diff --git a/unit_tests/view_builder.hpp b/unit_tests/view_builder.hpp new file mode 100644 index 00000000..57cf31c9 --- /dev/null +++ b/unit_tests/view_builder.hpp @@ -0,0 +1,53 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#pragma once + +#include + +struct contig {}; +struct noncontig {}; + +template +struct ViewBuilder; + + +template +struct ViewBuilder { + static auto view(noncontig, int e0) { + + // this is C-style layout, i.e. v(0,0) is next to v(0,1) + Kokkos::View v("", e0, 2); + return Kokkos::subview(v, Kokkos::ALL, 1); // take column 1 + } + + static auto view(contig, int e0) { + return Kokkos::View("", e0); + } +}; + +template +struct ViewBuilder { + static auto view(noncontig, int e0, int e1) { + + Kokkos::View v("", e0, e1, 2); + return Kokkos::subview(v, Kokkos::ALL, Kokkos::ALL, 1); + } + + static auto view(contig, int e0, int e1) { + return Kokkos::View("", e0, e1); + } +}; \ No newline at end of file From f75daab42302837993caf130d13cf96ee2c80ad6 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Wed, 10 Apr 2024 17:15:37 -0600 Subject: [PATCH 19/34] view_builder.hpp: formatting --- unit_tests/view_builder.hpp | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/unit_tests/view_builder.hpp b/unit_tests/view_builder.hpp index 57cf31c9..2e43bf7d 100644 --- a/unit_tests/view_builder.hpp +++ b/unit_tests/view_builder.hpp @@ -24,30 +24,25 @@ struct noncontig {}; template struct ViewBuilder; - template struct ViewBuilder { - static auto view(noncontig, int e0) { - - // this is C-style layout, i.e. v(0,0) is next to v(0,1) - Kokkos::View v("", e0, 2); - return Kokkos::subview(v, Kokkos::ALL, 1); // take column 1 - } + static auto view(noncontig, int e0) { + // this is C-style layout, i.e. v(0,0) is next to v(0,1) + Kokkos::View v("", e0, 2); + return Kokkos::subview(v, Kokkos::ALL, 1); // take column 1 + } - static auto view(contig, int e0) { - return Kokkos::View("", e0); - } + static auto view(contig, int e0) { return Kokkos::View("", e0); } }; template struct ViewBuilder { - static auto view(noncontig, int e0, int e1) { - - Kokkos::View v("", e0, e1, 2); - return Kokkos::subview(v, Kokkos::ALL, Kokkos::ALL, 1); - } - - static auto view(contig, int e0, int e1) { - return Kokkos::View("", e0, e1); - } + static auto view(noncontig, int e0, int e1) { + Kokkos::View v("", e0, e1, 2); + return Kokkos::subview(v, Kokkos::ALL, Kokkos::ALL, 1); + } + + static auto view(contig, int e0, int e1) { + return Kokkos::View("", e0, e1); + } }; \ No newline at end of file From b4277375a0076f9e9442aa81399c08ef3e723311 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Wed, 10 Apr 2024 17:15:48 -0600 Subject: [PATCH 20/34] irecv: add 2d view tests --- unit_tests/test_isendirecv.cpp | 87 +++++++++++++++++++++++++++++++--- 1 file changed, 80 insertions(+), 7 deletions(-) diff --git a/unit_tests/test_isendirecv.cpp b/unit_tests/test_isendirecv.cpp index 058ea05f..61826a7a 100644 --- a/unit_tests/test_isendirecv.cpp +++ b/unit_tests/test_isendirecv.cpp @@ -32,7 +32,7 @@ using ScalarTypes = TYPED_TEST_SUITE(IsendIrecv, ScalarTypes); TYPED_TEST(IsendIrecv, 1D_contig) { - Kokkos::View a("a", 1000); + auto a = ViewBuilder::view(contig{}, 1013); int rank, size; MPI_Comm_rank(MPI_COMM_WORLD, &rank); @@ -65,14 +65,15 @@ TYPED_TEST(IsendIrecv, 1D_contig) { } TYPED_TEST(IsendIrecv, 1D_noncontig) { - // this is C-style layout, i.e. b(0,0) is next to b(0,1) - Kokkos::View b("a", 10, - 10); auto a = - Kokkos::subview(b, Kokkos::ALL, 2); // take column 2 (non-contiguous) + ViewBuilder::view(noncontig{}, 1013); - int rank; + int rank, size; MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + if (size < 2) { + GTEST_SKIP() << "Requires >= 2 ranks (" << size << " provided)"; + } if (0 == rank) { int dst = 1; @@ -97,4 +98,76 @@ TYPED_TEST(IsendIrecv, 1D_noncontig) { } } -#endif // KOKKOSCOMM_ENABLE_MDSPAN +TYPED_TEST(IsendIrecv, 2D_contig) { + auto a = + ViewBuilder::view(contig{}, 137, 17); + + int rank, size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + if (size < 2) { + GTEST_SKIP() << "Requires >= 2 ranks (" << size << " provided)"; + } + + using Policy = Kokkos::MDRangePolicy>; + Policy policy({0, 0}, {a.extent(0), a.extent(1)}); + + if (0 == rank) { + int dst = 1; + Kokkos::parallel_for( + policy, KOKKOS_LAMBDA(int i, int j) { a(i, j) = i * a.extent(0) + j; }); + KokkosComm::Req req = KokkosComm::isend(Kokkos::DefaultExecutionSpace(), a, + dst, 0, MPI_COMM_WORLD); + req.wait(); + } else if (1 == rank) { + int src = 0; + KokkosComm::Req req = KokkosComm::irecv(Kokkos::DefaultExecutionSpace(), a, + src, 0, MPI_COMM_WORLD); + req.wait(); + int errs; + Kokkos::parallel_reduce( + policy, + KOKKOS_LAMBDA(int i, int j, int &lsum) { + lsum += a(i, j) != typename TestFixture::Scalar(i * a.extent(0) + j); + }, + errs); + ASSERT_EQ(errs, 0); + } +} + +TYPED_TEST(IsendIrecv, 2D_noncontig) { + auto a = + ViewBuilder::view(noncontig{}, 137, 17); + + int rank, size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + if (size < 2) { + GTEST_SKIP() << "Requires >= 2 ranks (" << size << " provided)"; + } + + using Policy = Kokkos::MDRangePolicy>; + Policy policy({0, 0}, {a.extent(0), a.extent(1)}); + + if (0 == rank) { + int dst = 1; + Kokkos::parallel_for( + policy, KOKKOS_LAMBDA(int i, int j) { a(i, j) = i * a.extent(0) + j; }); + KokkosComm::Req req = KokkosComm::isend(Kokkos::DefaultExecutionSpace(), a, + dst, 0, MPI_COMM_WORLD); + req.wait(); + } else if (1 == rank) { + int src = 0; + KokkosComm::Req req = KokkosComm::irecv(Kokkos::DefaultExecutionSpace(), a, + src, 0, MPI_COMM_WORLD); + req.wait(); + int errs; + Kokkos::parallel_reduce( + policy, + KOKKOS_LAMBDA(int i, int j, int &lsum) { + lsum += a(i, j) != typename TestFixture::Scalar(i * a.extent(0) + j); + }, + errs); + ASSERT_EQ(errs, 0); + } +} From 123d36ad6d9f1c9c29681910d9cacf98b732c66e Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Wed, 8 May 2024 09:24:22 -0600 Subject: [PATCH 21/34] design.rst: fix bullet list formatting --- docs/design.rst | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/docs/design.rst b/docs/design.rst index 185750f9..b1c3ed38 100644 --- a/docs/design.rst +++ b/docs/design.rst @@ -11,19 +11,14 @@ KokkosComm has an analogous concept, `KokkosComm::Req`. There are three consequences -- First, to ensure compatibility with MPI semantics, KokkosComm immediate functions will call the corresponding MPI function before they return. - -- Second, the KokkosComm packing strategy may require that an intermediate view be allocated, and this view needs to have a lifetime at least as long as the communication. -The ``KokkosComm::Req::keep_until_wait`` interface allows the `KokkosComm::Req` to hold those views until ``wait`` is called. - -- Third, for asynchronous receive operations, the packing strategy may require that the buffer provided by the underlying MPI operation be further unpacked. -The ``KokkosComm::Req::call_and_drop_at_wait`` allows the `KokkosComm::Req` to execute (and then drop) callback functors when ``wait`` is called. -For example, `KokkosComm::irecv` uses this functionality to attach an unpacking operation to the `KokkosComm::Req::wait` call. +* First, to ensure compatibility with MPI semantics, KokkosComm immediate functions will call the corresponding MPI function before they return. +* Second, the KokkosComm packing strategy may require that an intermediate view be allocated, and this view needs to have a lifetime at least as long as the communication. The ``KokkosComm::Req::keep_until_wait`` interface allows the `KokkosComm::Req` to hold those views until ``wait`` is called. +* Third, for asynchronous receive operations, the packing strategy may require that the buffer provided by the underlying MPI operation be further unpacked. The ``KokkosComm::Req::call_and_drop_at_wait`` allows the `KokkosComm::Req` to execute (and then drop) callback functors when ``wait`` is called. For example, `KokkosComm::irecv` uses this functionality to attach an unpacking operation to the `KokkosComm::Req::wait` call. Non-contiguous Data ------------------- -- Packer::DeepCopy uses `Kokkos::deep_copy` to handle packing and unpacking of non-contiguous `Kokkos::View`. This requires an intermediate allocation, which only works for Kokkos Views, see `Device Data`_. +* Packer::DeepCopy uses `Kokkos::deep_copy` to handle packing and unpacking of non-contiguous `Kokkos::View`. This requires an intermediate allocation, which only works for Kokkos Views, see `Device Data`_. Device Data ----------- From b9a1b6317f4a3d16b30224a8020f2ef968b7709d Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Wed, 8 May 2024 09:26:27 -0600 Subject: [PATCH 22/34] core.rst: fix MPI ops table --- docs/api/core.rst | 4 ---- 1 file changed, 4 deletions(-) diff --git a/docs/api/core.rst b/docs/api/core.rst index 2ad829d3..4cefd946 100644 --- a/docs/api/core.rst +++ b/docs/api/core.rst @@ -32,10 +32,6 @@ Core * - ``MPI_Reduce`` - ``reduce`` - ✓ - * - MPI_Send - - send - - ✓ - - ✓ Point-to-point -------------- From 4b76cfa0ba8106f0f340dac044ff6bdc124d5822 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Wed, 8 May 2024 09:39:37 -0600 Subject: [PATCH 23/34] test_isendirecv: factor out common code --- unit_tests/test_isendirecv.cpp | 107 ++++++++++----------------------- 1 file changed, 32 insertions(+), 75 deletions(-) diff --git a/unit_tests/test_isendirecv.cpp b/unit_tests/test_isendirecv.cpp index 61826a7a..58566c21 100644 --- a/unit_tests/test_isendirecv.cpp +++ b/unit_tests/test_isendirecv.cpp @@ -20,6 +20,8 @@ #include "view_builder.hpp" +namespace { + template class IsendIrecv : public testing::Test { public: @@ -31,8 +33,10 @@ using ScalarTypes = Kokkos::complex, int, unsigned, int64_t, size_t>; TYPED_TEST_SUITE(IsendIrecv, ScalarTypes); -TYPED_TEST(IsendIrecv, 1D_contig) { - auto a = ViewBuilder::view(contig{}, 1013); +template +void test_1d(const View1D &a) { + static_assert(View1D::rank == 1, ""); + using Scalar = typename View1D::non_const_value_type; int rank, size; MPI_Comm_rank(MPI_COMM_WORLD, &rank); @@ -56,51 +60,16 @@ TYPED_TEST(IsendIrecv, 1D_contig) { int errs; Kokkos::parallel_reduce( a.extent(0), - KOKKOS_LAMBDA(const int &i, int &lsum) { - lsum += a(i) != typename TestFixture::Scalar(i); - }, + KOKKOS_LAMBDA(const int &i, int &lsum) { lsum += a(i) != Scalar(i); }, errs); ASSERT_EQ(errs, 0); } } -TYPED_TEST(IsendIrecv, 1D_noncontig) { - auto a = - ViewBuilder::view(noncontig{}, 1013); - - int rank, size; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &size); - if (size < 2) { - GTEST_SKIP() << "Requires >= 2 ranks (" << size << " provided)"; - } - - if (0 == rank) { - int dst = 1; - Kokkos::parallel_for( - a.extent(0), KOKKOS_LAMBDA(const int i) { a(i) = i; }); - KokkosComm::Req req = KokkosComm::isend(Kokkos::DefaultExecutionSpace(), a, - dst, 0, MPI_COMM_WORLD); - req.wait(); - } else if (1 == rank) { - int src = 0; - KokkosComm::Req req = KokkosComm::irecv(Kokkos::DefaultExecutionSpace(), a, - src, 0, MPI_COMM_WORLD); - req.wait(); - int errs; - Kokkos::parallel_reduce( - a.extent(0), - KOKKOS_LAMBDA(const int &i, int &lsum) { - lsum += a(i) != typename TestFixture::Scalar(i); - }, - errs); - ASSERT_EQ(errs, 0); - } -} - -TYPED_TEST(IsendIrecv, 2D_contig) { - auto a = - ViewBuilder::view(contig{}, 137, 17); +template +void test_2d(const View2D &a) { + static_assert(View2D::rank == 2, ""); + using Scalar = typename View2D::non_const_value_type; int rank, size; MPI_Comm_rank(MPI_COMM_WORLD, &rank); @@ -128,46 +97,34 @@ TYPED_TEST(IsendIrecv, 2D_contig) { Kokkos::parallel_reduce( policy, KOKKOS_LAMBDA(int i, int j, int &lsum) { - lsum += a(i, j) != typename TestFixture::Scalar(i * a.extent(0) + j); + lsum += a(i, j) != Scalar(i * a.extent(0) + j); }, errs); ASSERT_EQ(errs, 0); } } -TYPED_TEST(IsendIrecv, 2D_noncontig) { - auto a = - ViewBuilder::view(noncontig{}, 137, 17); +TYPED_TEST(IsendIrecv, 1D_contig) { + auto a = ViewBuilder::view(contig{}, 1013); + test_1d(a); +} - int rank, size; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &size); - if (size < 2) { - GTEST_SKIP() << "Requires >= 2 ranks (" << size << " provided)"; - } +TYPED_TEST(IsendIrecv, 1D_noncontig) { + auto a = + ViewBuilder::view(noncontig{}, 1013); + test_1d(a); +} - using Policy = Kokkos::MDRangePolicy>; - Policy policy({0, 0}, {a.extent(0), a.extent(1)}); +TYPED_TEST(IsendIrecv, 2D_contig) { + auto a = + ViewBuilder::view(contig{}, 137, 17); + test_2d(a); +} - if (0 == rank) { - int dst = 1; - Kokkos::parallel_for( - policy, KOKKOS_LAMBDA(int i, int j) { a(i, j) = i * a.extent(0) + j; }); - KokkosComm::Req req = KokkosComm::isend(Kokkos::DefaultExecutionSpace(), a, - dst, 0, MPI_COMM_WORLD); - req.wait(); - } else if (1 == rank) { - int src = 0; - KokkosComm::Req req = KokkosComm::irecv(Kokkos::DefaultExecutionSpace(), a, - src, 0, MPI_COMM_WORLD); - req.wait(); - int errs; - Kokkos::parallel_reduce( - policy, - KOKKOS_LAMBDA(int i, int j, int &lsum) { - lsum += a(i, j) != typename TestFixture::Scalar(i * a.extent(0) + j); - }, - errs); - ASSERT_EQ(errs, 0); - } +TYPED_TEST(IsendIrecv, 2D_noncontig) { + auto a = + ViewBuilder::view(noncontig{}, 137, 17); + test_2d(a); } + +} // namespace From c79e9315bfd3a683e4bdbe1904f510bf12c684f8 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Wed, 8 May 2024 09:41:56 -0600 Subject: [PATCH 24/34] design.rst: drop mdspan-related reference --- docs/design.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/design.rst b/docs/design.rst index b1c3ed38..d80d5ede 100644 --- a/docs/design.rst +++ b/docs/design.rst @@ -18,7 +18,7 @@ There are three consequences Non-contiguous Data ------------------- -* Packer::DeepCopy uses `Kokkos::deep_copy` to handle packing and unpacking of non-contiguous `Kokkos::View`. This requires an intermediate allocation, which only works for Kokkos Views, see `Device Data`_. +* Packer::DeepCopy uses `Kokkos::deep_copy` to handle packing and unpacking of non-contiguous `Kokkos::View`. This requires an intermediate allocation. Device Data ----------- From 1a1d64985241d23f8675a595ddd87dadcff3a3e8 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Wed, 8 May 2024 10:33:04 -0600 Subject: [PATCH 25/34] Restrict KokkosComm::Req interface through pimpl --- src/KokkosComm.hpp | 2 +- src/KokkosComm_request.hpp | 67 ++-------------------- src/impl/KokkosComm_irecv.hpp | 16 +++--- src/impl/KokkosComm_isend.hpp | 6 +- src/impl/KokkosComm_request_impl.hpp | 84 ++++++++++++++++++++++++++++ 5 files changed, 103 insertions(+), 72 deletions(-) create mode 100644 src/impl/KokkosComm_request_impl.hpp diff --git a/src/KokkosComm.hpp b/src/KokkosComm.hpp index e43711fb..49f2c2c1 100644 --- a/src/KokkosComm.hpp +++ b/src/KokkosComm.hpp @@ -50,7 +50,7 @@ void recv(const ExecSpace &space, RecvView &sv, int src, int tag, MPI_Comm comm) template Req irecv(const ExecSpace &space, RecvView &rv, int src, int tag, MPI_Comm comm) { - return Impl::irecv(space, rv, src, tag, comm); + return Req(Impl::irecv(space, rv, src, tag, comm)); } } // namespace KokkosComm diff --git a/src/KokkosComm_request.hpp b/src/KokkosComm_request.hpp index 3c5fd430..8d10fe9e 100644 --- a/src/KokkosComm_request.hpp +++ b/src/KokkosComm_request.hpp @@ -18,74 +18,19 @@ #include -#include "KokkosComm_include_mpi.hpp" +#include "KokkosComm_request_impl.hpp" namespace KokkosComm { class Req { - // a type-erased callable. Req uses these to attach callbacks to be executed - // at wait - struct InvokableHolderBase { - virtual ~InvokableHolderBase() = default; - - virtual void operator()() = 0; - }; - template - struct InvokableHolder : InvokableHolderBase { - InvokableHolder(const Fn &f) : f_(f) {} - - virtual void operator()() override { f_(); } - - Fn f_; - }; - - // a type-erased view. Request uses these to keep temporary views alive for - // the lifetime of "Immediate" MPI operations - struct ViewHolderBase { - virtual ~ViewHolderBase() {} - }; - template - struct ViewHolder : ViewHolderBase { - ViewHolder(const V &v) : v_(v) {} - V v_; - }; - - struct Record { - Record() : req_(MPI_REQUEST_NULL) {} - MPI_Request req_; - std::vector> wait_drops_; - std::vector> wait_callbacks_; - }; - public: - Req() : record_(std::make_shared()) {} - - MPI_Request &mpi_req() { return record_->req_; } - - void wait() { - MPI_Wait(&(record_->req_), MPI_STATUS_IGNORE); - record_->wait_drops_ - .clear(); // drop any views we're keeping alive until wait() - for (auto &c : record_->wait_callbacks_) { - (*c)(); - } - record_->wait_callbacks_.clear(); - } - - // keep a reference to this view around until wait() is called - template - void keep_until_wait(const View &v) { - record_->wait_drops_.push_back(std::make_shared>(v)); - } - - template - void call_and_drop_at_wait(const Fn &f) { - record_->wait_callbacks_.push_back( - std::make_shared>(f)); - } + Req() : impl_(std::make_shared()) {} + Req(const std::shared_ptr &impl) : impl_(impl) {} + MPI_Request &mpi_req() { return impl_->mpi_req(); } + void wait() { impl_->wait(); } private: - std::shared_ptr record_; + std::shared_ptr impl_; }; } // namespace KokkosComm \ No newline at end of file diff --git a/src/impl/KokkosComm_irecv.hpp b/src/impl/KokkosComm_irecv.hpp index 1f6ae307..03a65f77 100644 --- a/src/impl/KokkosComm_irecv.hpp +++ b/src/impl/KokkosComm_irecv.hpp @@ -16,6 +16,8 @@ #pragma once +#include + #include #include "KokkosComm_pack_traits.hpp" @@ -44,14 +46,14 @@ struct IrecvUnpacker { }; template -Req irecv(const ExecSpace &space, RecvView &rv, int src, int tag, - MPI_Comm comm) { +std::shared_ptr irecv(const ExecSpace &space, RecvView &rv, int src, + int tag, MPI_Comm comm) { Kokkos::Tools::pushRegion("KokkosComm::Impl::irecv"); using KCT = KokkosComm::Traits; using KCPT = KokkosComm::PackTraits; - KokkosComm::Req req; + auto req = std::make_shared(); if (KCPT::needs_unpack(rv)) { using Packer = typename KCPT::packer_type; @@ -60,14 +62,14 @@ Req irecv(const ExecSpace &space, RecvView &rv, int src, int tag, Args args = Packer::allocate_packed_for(space, "packed", rv); space.fence(); MPI_Irecv(KCT::data_handle(args.view), args.count, args.datatype, src, tag, - comm, &req.mpi_req()); - req.call_and_drop_at_wait(IrecvUnpacker{space, rv, args}); + comm, &req->mpi_req()); + req->call_and_drop_at_wait(IrecvUnpacker{space, rv, args}); } else { using RecvScalar = typename RecvView::value_type; MPI_Irecv(KCT::data_handle(rv), KCT::span(rv), mpi_type_v, src, - tag, comm, &req.mpi_req()); - req.keep_until_wait(rv); + tag, comm, &req->mpi_req()); + req->keep_until_wait(rv); } return req; diff --git a/src/impl/KokkosComm_isend.hpp b/src/impl/KokkosComm_isend.hpp index 1e2a8ba4..94f7a330 100644 --- a/src/impl/KokkosComm_isend.hpp +++ b/src/impl/KokkosComm_isend.hpp @@ -16,7 +16,7 @@ #pragma once -#include +#include #include @@ -35,7 +35,7 @@ template (); using KCT = KokkosComm::Traits; using KCPT = KokkosComm::PackTraits; @@ -69,7 +69,7 @@ KokkosComm::Req isend(const ExecSpace &space, const SendView &sv, int dest, int using SendScalar = typename SendView::value_type; mpi_isend_fn(KCT::data_handle(sv), KCT::span(sv), mpi_type_v, dest, tag, comm, &req.mpi_req()); if (KCT::is_reference_counted()) { - req.keep_until_wait(sv); + req->keep_until_wait(sv); } } diff --git a/src/impl/KokkosComm_request_impl.hpp b/src/impl/KokkosComm_request_impl.hpp new file mode 100644 index 00000000..50a92e2f --- /dev/null +++ b/src/impl/KokkosComm_request_impl.hpp @@ -0,0 +1,84 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#pragma once + +#include + +#include "KokkosComm_include_mpi.hpp" + +namespace KokkosComm::Impl { + +class Req { + // a type-erased callable. Req uses these to attach callbacks to be executed + // at wait + struct InvokableHolderBase { + virtual ~InvokableHolderBase() = default; + + virtual void operator()() = 0; + }; + template + struct InvokableHolder : InvokableHolderBase { + InvokableHolder(const Fn &f) : f_(f) {} + + virtual void operator()() override { f_(); } + + Fn f_; + }; + + // a type-erased view. Request uses these to keep temporary views alive for + // the lifetime of "Immediate" MPI operations + struct ViewHolderBase { + virtual ~ViewHolderBase() {} + }; + template + struct ViewHolder : ViewHolderBase { + ViewHolder(const V &v) : v_(v) {} + V v_; + }; + + public: + Req() : req_(MPI_REQUEST_NULL) {} + + MPI_Request &mpi_req() { return req_; } + + void wait() { + MPI_Wait(&req_, MPI_STATUS_IGNORE); + wait_drops_.clear(); // drop any views we're keeping alive until wait() + for (auto &c : wait_callbacks_) { + (*c)(); + } + wait_callbacks_.clear(); + } + + // keep a reference to this view around until wait() is called + template + void keep_until_wait(const View &v) { + wait_drops_.push_back(std::make_shared>(v)); + } + + template + void call_and_drop_at_wait(const Fn &f) { + wait_callbacks_.push_back(std::make_shared>(f)); + } + + private: + MPI_Request req_; + std::vector> wait_drops_; + std::vector> wait_callbacks_; +}; + +} // namespace KokkosComm::Impl \ No newline at end of file From 2348031ab2b2a9ba5612879a4001fb0bb9ef6354 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Wed, 8 May 2024 10:36:08 -0600 Subject: [PATCH 26/34] newlines --- src/KokkosComm_request.hpp | 2 +- src/impl/KokkosComm_irecv.hpp | 2 +- src/impl/KokkosComm_request_impl.hpp | 2 +- unit_tests/view_builder.hpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/KokkosComm_request.hpp b/src/KokkosComm_request.hpp index 8d10fe9e..ce2f11a1 100644 --- a/src/KokkosComm_request.hpp +++ b/src/KokkosComm_request.hpp @@ -33,4 +33,4 @@ class Req { std::shared_ptr impl_; }; -} // namespace KokkosComm \ No newline at end of file +} // namespace KokkosComm diff --git a/src/impl/KokkosComm_irecv.hpp b/src/impl/KokkosComm_irecv.hpp index 03a65f77..b20cb458 100644 --- a/src/impl/KokkosComm_irecv.hpp +++ b/src/impl/KokkosComm_irecv.hpp @@ -75,4 +75,4 @@ std::shared_ptr irecv(const ExecSpace &space, RecvView &rv, int src, Kokkos::Tools::popRegion(); } -} // namespace KokkosComm::Impl \ No newline at end of file +} // namespace KokkosComm::Impl diff --git a/src/impl/KokkosComm_request_impl.hpp b/src/impl/KokkosComm_request_impl.hpp index 50a92e2f..2b7ade04 100644 --- a/src/impl/KokkosComm_request_impl.hpp +++ b/src/impl/KokkosComm_request_impl.hpp @@ -81,4 +81,4 @@ class Req { std::vector> wait_callbacks_; }; -} // namespace KokkosComm::Impl \ No newline at end of file +} // namespace KokkosComm::Impl diff --git a/unit_tests/view_builder.hpp b/unit_tests/view_builder.hpp index 2e43bf7d..67675bdf 100644 --- a/unit_tests/view_builder.hpp +++ b/unit_tests/view_builder.hpp @@ -45,4 +45,4 @@ struct ViewBuilder { static auto view(contig, int e0, int e1) { return Kokkos::View("", e0, e1); } -}; \ No newline at end of file +}; From 61bbc249ed6e0a673faccc8dd3331d9baf1522b5 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Wed, 8 May 2024 10:49:08 -0600 Subject: [PATCH 27/34] core.rst: remove non-existent Req members --- docs/api/core.rst | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/docs/api/core.rst b/docs/api/core.rst index 4cefd946..ab38bced 100644 --- a/docs/api/core.rst +++ b/docs/api/core.rst @@ -125,7 +125,7 @@ Related Types .. cpp:class:: KokkosComm::Req - A wrapper around an MPI_Request that can also extend the lifetime of Views. + A communication handle representing an asychronous communication. The communication is not complete until ``wait`` is called. .. cpp:function:: MPI_Request &KokkosComm::Req::mpi_req() @@ -133,16 +133,5 @@ Related Types .. cpp:function:: void KokkosComm::Req::wait() - Call MPI_Wait on the held MPI_Request and drop copies of any previous arguments to Req::keep_until_wait(). + Call MPI_Wait on the held MPI_Request and complete any internal communication-related operations. - .. cpp:function:: template \ - void KokkosComm::Req::keep_until_wait(const View &v) - - Extend the lifetime of v at least until Req::wait() is called. - This is useful to prevent a View from being destroyed during an asynchronous MPI operation. - - .. cpp:function:: template \ - void KokkosComm::Req::call_and_drop_at_wait(const Callable &c) - - Store a copy of ``c``, and invoke ``c()`` when ``wait`` is called. - Destroy the copy of ``c`` afterwards. From b73339caf33ee6b044156105a4f394f86f5aedcb Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Wed, 8 May 2024 10:49:30 -0600 Subject: [PATCH 28/34] improve comments on Impl::Req --- src/impl/KokkosComm_request_impl.hpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/impl/KokkosComm_request_impl.hpp b/src/impl/KokkosComm_request_impl.hpp index 2b7ade04..8a12f911 100644 --- a/src/impl/KokkosComm_request_impl.hpp +++ b/src/impl/KokkosComm_request_impl.hpp @@ -64,12 +64,20 @@ class Req { wait_callbacks_.clear(); } - // keep a reference to this view around until wait() is called + // Keep a reference to this view around until wait() is called. + // This is used when a managed Kokkos::View is provided to an + // asychronous communication routine, to ensure that view is + // still alive for the entire duration of the routine. template void keep_until_wait(const View &v) { wait_drops_.push_back(std::make_shared>(v)); } + // When wait() is called: execute f() and then let f go out of scope. + // Every stored f is called before any stored f is dropped. + // This function can be used by an unpacking routine to attach some + // unpacking logic to a communication that needs to be executed + // after the underlying MPI operation is done. template void call_and_drop_at_wait(const Fn &f) { wait_callbacks_.push_back(std::make_shared>(f)); From 8090c571c4e012555b6785d690a31167a5c8def2 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Wed, 8 May 2024 11:17:23 -0600 Subject: [PATCH 29/34] Clarify async / fencing behavior of isend --- docs/api/core.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/api/core.rst b/docs/api/core.rst index ab38bced..f66c00c2 100644 --- a/docs/api/core.rst +++ b/docs/api/core.rst @@ -40,6 +40,11 @@ Point-to-point Req KokkosComm::isend(const ExecSpace &space, const SendView &sv, int dest, int tag, MPI_Comm comm) Wrapper for ``MPI_Isend``, ``MPI_Irsend`` and ``MPI_Issend``. + The communication operation will be inserted into ``space``. + The caller may safely call this function on data previously produced by operations in ``space`` without first fencing ```space```. + + .. warning:: + Even if ``space`` is fenced after the call to this function, the communication operation is not complete until the ``wait`` operation on the returned ``Req`` is called. :param space: The execution space to operate in :param sv: The data to send From 2fbfc59ea42d08c6dd02b4b55f9926d8b326f886 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Wed, 8 May 2024 12:04:14 -0600 Subject: [PATCH 30/34] space fencing in requests --- src/impl/KokkosComm_irecv.hpp | 2 +- src/impl/KokkosComm_request_impl.hpp | 41 ++++++++++++++++++++++++++-- 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/src/impl/KokkosComm_irecv.hpp b/src/impl/KokkosComm_irecv.hpp index b20cb458..06eadb2e 100644 --- a/src/impl/KokkosComm_irecv.hpp +++ b/src/impl/KokkosComm_irecv.hpp @@ -36,7 +36,6 @@ struct IrecvUnpacker { void operator()() { Kokkos::Tools::pushRegion("KokkosComm::Impl::IrecvUnpacker::operator()"); MpiArgs::packer_type::unpack_into(space_, rv_, args_.view); - space_.fence(); Kokkos::Tools::popRegion(); } @@ -64,6 +63,7 @@ std::shared_ptr irecv(const ExecSpace &space, RecvView &rv, int src, MPI_Irecv(KCT::data_handle(args.view), args.count, args.datatype, src, tag, comm, &req->mpi_req()); req->call_and_drop_at_wait(IrecvUnpacker{space, rv, args}); + req->fence_at_wait(space); } else { using RecvScalar = typename RecvView::value_type; diff --git a/src/impl/KokkosComm_request_impl.hpp b/src/impl/KokkosComm_request_impl.hpp index 8a12f911..b505be4c 100644 --- a/src/impl/KokkosComm_request_impl.hpp +++ b/src/impl/KokkosComm_request_impl.hpp @@ -50,20 +50,46 @@ class Req { V v_; }; + // a type-erased view. Request uses these to keep temporary views alive for + // the lifetime of "Immediate" MPI operations + struct SpaceHolderBase { + virtual ~SpaceHolderBase() {} + + virtual void fence() = 0; + }; + template + struct SpaceHolder : SpaceHolderBase { + SpaceHolder(const ES &es) : es_(es) {} + + virtual void fence() override { es_.fence("KokkosComm::Req::wait()"); } + + ES es_; + }; + public: Req() : req_(MPI_REQUEST_NULL) {} MPI_Request &mpi_req() { return req_; } - void wait() { + void wait_async() { MPI_Wait(&req_, MPI_STATUS_IGNORE); - wait_drops_.clear(); // drop any views we're keeping alive until wait() for (auto &c : wait_callbacks_) { (*c)(); } + + // drop the references to anything that was kept alive until wait + wait_drops_.clear(); wait_callbacks_.clear(); } + void wait() { + wait_async(); + if (wait_fence_) { + wait_fence_->fence(); + } + wait_fence_ = nullptr; + } + // Keep a reference to this view around until wait() is called. // This is used when a managed Kokkos::View is provided to an // asychronous communication routine, to ensure that view is @@ -83,10 +109,21 @@ class Req { wait_callbacks_.push_back(std::make_shared>(f)); } + template + void fence_at_wait(const ExecSpace &space) { + // TODO: only fence once if the same space is provided multiple times + if (wait_fence_) { + Kokkos::abort("Req is already fencing a space!"); + } + + wait_fence_ = std::make_shared>(space); + } + private: MPI_Request req_; std::vector> wait_drops_; std::vector> wait_callbacks_; + std::shared_ptr wait_fence_; }; } // namespace KokkosComm::Impl From dcb918e3747e457ce46f92b70fea8bd964c2fac2 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Fri, 10 May 2024 15:53:00 -0600 Subject: [PATCH 31/34] irecv: update wait semantics --- docs/api/core.rst | 18 ++++++++++++++++-- src/KokkosComm_request.hpp | 3 +++ src/impl/KokkosComm_irecv.hpp | 2 -- src/impl/KokkosComm_request_impl.hpp | 22 ++++++++++------------ 4 files changed, 29 insertions(+), 16 deletions(-) diff --git a/docs/api/core.rst b/docs/api/core.rst index f66c00c2..4cf9f305 100644 --- a/docs/api/core.rst +++ b/docs/api/core.rst @@ -130,7 +130,9 @@ Related Types .. cpp:class:: KokkosComm::Req - A communication handle representing an asychronous communication. The communication is not complete until ``wait`` is called. + A communication handle representing an asychronous communication and an associated Kokkos execution space instance. The handle is scoped to the space instance used in the communication call that produced it. + + .. cpp:function:: MPI_Request &KokkosComm::Req::mpi_req() @@ -138,5 +140,17 @@ Related Types .. cpp:function:: void KokkosComm::Req::wait() - Call MPI_Wait on the held MPI_Request and complete any internal communication-related operations. + Require that the communication be completed before any further work can be exected in the associated execution space instance. May or may not fence. Consider the following example. + + .. code-block:: c++ + :linenos: + + using KC = KokkosComm; + Kokkos::parallel_for(space, ...); + auto req = KC::isend(space, ...); // isend 1 + Kokkos::parallel_for(space, ...); // runs concurrently with isend 1, does not touch send view + req.wait(); // blocks space until isend 1 is complete. May or may not fence. + Kokkos::parallel_for(space, ...); // safe to overwrite the send buffer + space.fence(); // wait for all to complete + Here, ``parallel_for`` on line 6 can overwrite the send buffer because ``req.wait()`` means that isend 1 must be done before additional work can be done in ``space``. This MAY be achieved by an internal call to ``space.fence()``, but some other mechanism may be used. If the host thread wants to be sure that the communication is done, it must separately call ``space.fence()``. \ No newline at end of file diff --git a/src/KokkosComm_request.hpp b/src/KokkosComm_request.hpp index ce2f11a1..409a591a 100644 --- a/src/KokkosComm_request.hpp +++ b/src/KokkosComm_request.hpp @@ -27,6 +27,9 @@ class Req { Req() : impl_(std::make_shared()) {} Req(const std::shared_ptr &impl) : impl_(impl) {} MPI_Request &mpi_req() { return impl_->mpi_req(); } + + // The communication must be done before the associated Kokkos execution space + // can do any further work. The host may or may not be fenced. void wait() { impl_->wait(); } private: diff --git a/src/impl/KokkosComm_irecv.hpp b/src/impl/KokkosComm_irecv.hpp index 06eadb2e..b8b87d9f 100644 --- a/src/impl/KokkosComm_irecv.hpp +++ b/src/impl/KokkosComm_irecv.hpp @@ -63,8 +63,6 @@ std::shared_ptr irecv(const ExecSpace &space, RecvView &rv, int src, MPI_Irecv(KCT::data_handle(args.view), args.count, args.datatype, src, tag, comm, &req->mpi_req()); req->call_and_drop_at_wait(IrecvUnpacker{space, rv, args}); - req->fence_at_wait(space); - } else { using RecvScalar = typename RecvView::value_type; MPI_Irecv(KCT::data_handle(rv), KCT::span(rv), mpi_type_v, src, diff --git a/src/impl/KokkosComm_request_impl.hpp b/src/impl/KokkosComm_request_impl.hpp index b505be4c..df5a6096 100644 --- a/src/impl/KokkosComm_request_impl.hpp +++ b/src/impl/KokkosComm_request_impl.hpp @@ -71,7 +71,10 @@ class Req { MPI_Request &mpi_req() { return req_; } - void wait_async() { + // The communication must be done before the held execution space can do any + // further work. For MPI, this is achieved by blocking the host thread and + // fencing the execution space. + void wait() { MPI_Wait(&req_, MPI_STATUS_IGNORE); for (auto &c : wait_callbacks_) { (*c)(); @@ -80,14 +83,11 @@ class Req { // drop the references to anything that was kept alive until wait wait_drops_.clear(); wait_callbacks_.clear(); - } - void wait() { - wait_async(); - if (wait_fence_) { - wait_fence_->fence(); + if (exec_space_) { + exec_space_->fence(); } - wait_fence_ = nullptr; + exec_space_ = nullptr; } // Keep a reference to this view around until wait() is called. @@ -111,19 +111,17 @@ class Req { template void fence_at_wait(const ExecSpace &space) { - // TODO: only fence once if the same space is provided multiple times - if (wait_fence_) { + if (exec_space_) { Kokkos::abort("Req is already fencing a space!"); } - - wait_fence_ = std::make_shared>(space); + exec_space_ = std::make_shared>(space); } private: MPI_Request req_; std::vector> wait_drops_; std::vector> wait_callbacks_; - std::shared_ptr wait_fence_; + std::shared_ptr exec_space_; }; } // namespace KokkosComm::Impl From 442458d8ae4b25e7154c98165c0262fc0153d3d7 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Fri, 10 May 2024 15:55:32 -0600 Subject: [PATCH 32/34] irecv: clang-format-14 --- src/KokkosComm.hpp | 5 ++--- src/impl/KokkosComm_irecv.hpp | 12 ++++-------- src/impl/KokkosComm_packer.hpp | 8 ++++---- unit_tests/test_isendirecv.cpp | 34 ++++++++++------------------------ unit_tests/view_builder.hpp | 4 +--- 5 files changed, 21 insertions(+), 42 deletions(-) diff --git a/src/KokkosComm.hpp b/src/KokkosComm.hpp index 49f2c2c1..32103e71 100644 --- a/src/KokkosComm.hpp +++ b/src/KokkosComm.hpp @@ -34,7 +34,7 @@ namespace KokkosComm { template Req isend(const ExecSpace &space, const SendView &sv, int dest, int tag, MPI_Comm comm) { - return Impl::isend(space, sv, dest, tag, comm); + return Req(Impl::isend(space, sv, dest, tag, comm)); } template @@ -48,8 +48,7 @@ void recv(const ExecSpace &space, RecvView &sv, int src, int tag, MPI_Comm comm) } template -Req irecv(const ExecSpace &space, RecvView &rv, int src, int tag, - MPI_Comm comm) { +Req irecv(const ExecSpace &space, RecvView &rv, int src, int tag, MPI_Comm comm) { return Req(Impl::irecv(space, rv, src, tag, comm)); } diff --git a/src/impl/KokkosComm_irecv.hpp b/src/impl/KokkosComm_irecv.hpp index b8b87d9f..8ba60421 100644 --- a/src/impl/KokkosComm_irecv.hpp +++ b/src/impl/KokkosComm_irecv.hpp @@ -30,8 +30,7 @@ namespace KokkosComm::Impl { template struct IrecvUnpacker { - IrecvUnpacker(const ExecSpace &space, RecvView &rv, MpiArgs &args) - : space_(space), rv_(rv), args_(args) {} + IrecvUnpacker(const ExecSpace &space, RecvView &rv, MpiArgs &args) : space_(space), rv_(rv), args_(args) {} void operator()() { Kokkos::Tools::pushRegion("KokkosComm::Impl::IrecvUnpacker::operator()"); @@ -45,8 +44,7 @@ struct IrecvUnpacker { }; template -std::shared_ptr irecv(const ExecSpace &space, RecvView &rv, int src, - int tag, MPI_Comm comm) { +std::shared_ptr irecv(const ExecSpace &space, RecvView &rv, int src, int tag, MPI_Comm comm) { Kokkos::Tools::pushRegion("KokkosComm::Impl::irecv"); using KCT = KokkosComm::Traits; @@ -60,13 +58,11 @@ std::shared_ptr irecv(const ExecSpace &space, RecvView &rv, int src, Args args = Packer::allocate_packed_for(space, "packed", rv); space.fence(); - MPI_Irecv(KCT::data_handle(args.view), args.count, args.datatype, src, tag, - comm, &req->mpi_req()); + MPI_Irecv(KCT::data_handle(args.view), args.count, args.datatype, src, tag, comm, &req->mpi_req()); req->call_and_drop_at_wait(IrecvUnpacker{space, rv, args}); } else { using RecvScalar = typename RecvView::value_type; - MPI_Irecv(KCT::data_handle(rv), KCT::span(rv), mpi_type_v, src, - tag, comm, &req->mpi_req()); + MPI_Irecv(KCT::data_handle(rv), KCT::span(rv), mpi_type_v, src, tag, comm, &req->mpi_req()); req->keep_until_wait(rv); } return req; diff --git a/src/impl/KokkosComm_packer.hpp b/src/impl/KokkosComm_packer.hpp index 74fe1eef..1f4c4e81 100644 --- a/src/impl/KokkosComm_packer.hpp +++ b/src/impl/KokkosComm_packer.hpp @@ -25,8 +25,7 @@ namespace Packer { template struct MpiArgs { - using packer_type = - Packer; // the type of the packer that produced these arguments + using packer_type = Packer; // the type of the packer that produced these arguments View view; MPI_Datatype datatype; @@ -40,7 +39,8 @@ template struct DeepCopy { using non_const_packed_view_type = Kokkos::View; - using args_type = MpiArgs; + + using args_type = MpiArgs>; template static args_type allocate_packed_for(const ExecSpace &space, const std::string &label, const View &src) { @@ -74,7 +74,7 @@ struct DeepCopy { template struct MpiDatatype { using non_const_packed_view_type = View; - using args_type = MpiArgs>; + using args_type = MpiArgs>; // don't actually allocate - return the provided view, but with // a datatype that describes the data in the view diff --git a/unit_tests/test_isendirecv.cpp b/unit_tests/test_isendirecv.cpp index 58566c21..eb9038f9 100644 --- a/unit_tests/test_isendirecv.cpp +++ b/unit_tests/test_isendirecv.cpp @@ -29,8 +29,7 @@ class IsendIrecv : public testing::Test { }; using ScalarTypes = - ::testing::Types, - Kokkos::complex, int, unsigned, int64_t, size_t>; + ::testing::Types, Kokkos::complex, int, unsigned, int64_t, size_t>; TYPED_TEST_SUITE(IsendIrecv, ScalarTypes); template @@ -49,19 +48,15 @@ void test_1d(const View1D &a) { int dst = 1; Kokkos::parallel_for( a.extent(0), KOKKOS_LAMBDA(const int i) { a(i) = i; }); - KokkosComm::Req req = KokkosComm::isend(Kokkos::DefaultExecutionSpace(), a, - dst, 0, MPI_COMM_WORLD); + KokkosComm::Req req = KokkosComm::isend(Kokkos::DefaultExecutionSpace(), a, dst, 0, MPI_COMM_WORLD); req.wait(); } else if (1 == rank) { int src = 0; - KokkosComm::Req req = KokkosComm::irecv(Kokkos::DefaultExecutionSpace(), a, - src, 0, MPI_COMM_WORLD); + KokkosComm::Req req = KokkosComm::irecv(Kokkos::DefaultExecutionSpace(), a, src, 0, MPI_COMM_WORLD); req.wait(); int errs; Kokkos::parallel_reduce( - a.extent(0), - KOKKOS_LAMBDA(const int &i, int &lsum) { lsum += a(i) != Scalar(i); }, - errs); + a.extent(0), KOKKOS_LAMBDA(const int &i, int &lsum) { lsum += a(i) != Scalar(i); }, errs); ASSERT_EQ(errs, 0); } } @@ -85,21 +80,15 @@ void test_2d(const View2D &a) { int dst = 1; Kokkos::parallel_for( policy, KOKKOS_LAMBDA(int i, int j) { a(i, j) = i * a.extent(0) + j; }); - KokkosComm::Req req = KokkosComm::isend(Kokkos::DefaultExecutionSpace(), a, - dst, 0, MPI_COMM_WORLD); + KokkosComm::Req req = KokkosComm::isend(Kokkos::DefaultExecutionSpace(), a, dst, 0, MPI_COMM_WORLD); req.wait(); } else if (1 == rank) { int src = 0; - KokkosComm::Req req = KokkosComm::irecv(Kokkos::DefaultExecutionSpace(), a, - src, 0, MPI_COMM_WORLD); + KokkosComm::Req req = KokkosComm::irecv(Kokkos::DefaultExecutionSpace(), a, src, 0, MPI_COMM_WORLD); req.wait(); int errs; Kokkos::parallel_reduce( - policy, - KOKKOS_LAMBDA(int i, int j, int &lsum) { - lsum += a(i, j) != Scalar(i * a.extent(0) + j); - }, - errs); + policy, KOKKOS_LAMBDA(int i, int j, int &lsum) { lsum += a(i, j) != Scalar(i * a.extent(0) + j); }, errs); ASSERT_EQ(errs, 0); } } @@ -110,20 +99,17 @@ TYPED_TEST(IsendIrecv, 1D_contig) { } TYPED_TEST(IsendIrecv, 1D_noncontig) { - auto a = - ViewBuilder::view(noncontig{}, 1013); + auto a = ViewBuilder::view(noncontig{}, 1013); test_1d(a); } TYPED_TEST(IsendIrecv, 2D_contig) { - auto a = - ViewBuilder::view(contig{}, 137, 17); + auto a = ViewBuilder::view(contig{}, 137, 17); test_2d(a); } TYPED_TEST(IsendIrecv, 2D_noncontig) { - auto a = - ViewBuilder::view(noncontig{}, 137, 17); + auto a = ViewBuilder::view(noncontig{}, 137, 17); test_2d(a); } diff --git a/unit_tests/view_builder.hpp b/unit_tests/view_builder.hpp index 67675bdf..934587a4 100644 --- a/unit_tests/view_builder.hpp +++ b/unit_tests/view_builder.hpp @@ -42,7 +42,5 @@ struct ViewBuilder { return Kokkos::subview(v, Kokkos::ALL, Kokkos::ALL, 1); } - static auto view(contig, int e0, int e1) { - return Kokkos::View("", e0, e1); - } + static auto view(contig, int e0, int e1) { return Kokkos::View("", e0, e1); } }; From 7f473976b9572ed5a41cb76b7e2522b44c8b2aa1 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Fri, 10 May 2024 16:11:04 -0600 Subject: [PATCH 33/34] irecv: fix some goofed types --- src/impl/KokkosComm_irecv.hpp | 5 ++++- src/impl/KokkosComm_isend.hpp | 6 +++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/impl/KokkosComm_irecv.hpp b/src/impl/KokkosComm_irecv.hpp index 8ba60421..5e43a225 100644 --- a/src/impl/KokkosComm_irecv.hpp +++ b/src/impl/KokkosComm_irecv.hpp @@ -57,9 +57,12 @@ std::shared_ptr irecv(const ExecSpace &space, RecvView &rv, int src, int ta using Args = typename Packer::args_type; Args args = Packer::allocate_packed_for(space, "packed", rv); - space.fence(); + space.fence(); // make sure allocation is done MPI_Irecv(KCT::data_handle(args.view), args.count, args.datatype, src, tag, comm, &req->mpi_req()); req->call_and_drop_at_wait(IrecvUnpacker{space, rv, args}); + // req.wait() promises that communication is done before any future work put into space after wait can proceed. + // For MPI, wait uses MPI_Wait to make sure the communication is done, which blocks the host, thereby preventing + // later work from being submitted to space, so no space fence is needed at wait. } else { using RecvScalar = typename RecvView::value_type; MPI_Irecv(KCT::data_handle(rv), KCT::span(rv), mpi_type_v, src, tag, comm, &req->mpi_req()); diff --git a/src/impl/KokkosComm_isend.hpp b/src/impl/KokkosComm_isend.hpp index 94f7a330..2ab4db28 100644 --- a/src/impl/KokkosComm_isend.hpp +++ b/src/impl/KokkosComm_isend.hpp @@ -63,11 +63,11 @@ KokkosComm::Req isend(const ExecSpace &space, const SendView &sv, int dest, int MpiArgs args = Packer::pack(space, sv); space.fence(); - mpi_isend_fn(KCT::data_handle(args.view), args.count, args.datatype, dest, tag, comm, &req.mpi_req()); - req.keep_until_wait(args.view); + mpi_isend_fn(KCT::data_handle(args.view), args.count, args.datatype, dest, tag, comm, &req->mpi_req()); + req->keep_until_wait(args.view); } else { using SendScalar = typename SendView::value_type; - mpi_isend_fn(KCT::data_handle(sv), KCT::span(sv), mpi_type_v, dest, tag, comm, &req.mpi_req()); + mpi_isend_fn(KCT::data_handle(sv), KCT::span(sv), mpi_type_v, dest, tag, comm, &req->mpi_req()); if (KCT::is_reference_counted()) { req->keep_until_wait(sv); } From 7a404e2279ef9a40af9cadc9adb214ebb51f3536 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Fri, 10 May 2024 16:15:49 -0600 Subject: [PATCH 34/34] irecv: docs --- docs/api/core.rst | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/docs/api/core.rst b/docs/api/core.rst index 4cf9f305..2bb7a475 100644 --- a/docs/api/core.rst +++ b/docs/api/core.rst @@ -36,6 +36,25 @@ Core Point-to-point -------------- +.. cpp:function:: template \ + Req KokkosComm::irecv(const ExecSpace &space, const RecvView &rv, int src, int tag, MPI_Comm comm) + + Wrapper for ``MPI_Irecv``. + The communication operation will be inserted into ``space``. + The caller may safely call this function on data previously produced by operations in ``space`` without first fencing ```space```. + + .. warning:: + Even if ``space`` is fenced after the call to this function, the communication operation is not complete until the ``wait`` operation on the returned ``Req`` is called. + + :param space: The execution space to operate in + :param rv: The view to recveive data into + :param src: the source rank + :param tag: the MPI tag + :param comm: the MPI communicator + :tparam RecvView: The type of the received-to Kokkos::View + :tparam ExecSpace: A Kokkos execution space to operate in + :returns: A KokkosComm::Req representing the asynchronous communication and any lifetime-extended views, associated with the provided execution space instance. + .. cpp:function:: template \ Req KokkosComm::isend(const ExecSpace &space, const SendView &sv, int dest, int tag, MPI_Comm comm) @@ -54,7 +73,7 @@ Point-to-point :tparam SendMode: A CommMode_ to use. If unspecified, defaults to a synchronous ``MPI_Issend`` if ``KOKKOSCOMM_FORCE_SYNCHRONOUS_MODE`` is defined, otherwise defaults to a standard ``MPI_Isend``. :tparam SendView: A Kokkos::View to send :tparam ExecSpace: A Kokkos execution space to operate in - :returns: A KokkosComm::Req representing the asynchronous communication and any lifetime-extended views. + :returns: A KokkosComm::Req representing the asynchronous communication and any lifetime-extended views, associated with the provided execution space instance. .. cpp:function:: template \ void KokkosComm::send(const ExecSpace &space, const SendView &sv, int dest, int tag, MPI_Comm comm)