diff --git a/perf_tests/CMakeLists.txt b/perf_tests/CMakeLists.txt index e0bef50d..e45ed897 100644 --- a/perf_tests/CMakeLists.txt +++ b/perf_tests/CMakeLists.txt @@ -34,6 +34,8 @@ unset(BENCHMARK_ENABLE_TESTING) add_executable(perf_test-main test_main.cpp test_sendrecv.cpp test_2dhalo.cpp + test_osu_latency_sendrecv.cpp + test_osu_latency_isendirecv.cpp ) if(KOKKOSCOMM_ENABLE_TESTS) kokkoscomm_add_cxx_flags(TARGET perf_test-main) diff --git a/perf_tests/test_osu_latency_isendirecv.cpp b/perf_tests/test_osu_latency_isendirecv.cpp new file mode 100644 index 00000000..645708d4 --- /dev/null +++ b/perf_tests/test_osu_latency_isendirecv.cpp @@ -0,0 +1,94 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +// Adapted from the OSU Benchmarks +// Copyright (c) 2002-2024 the Network-Based Computing Laboratory +// (NBCL), The Ohio State University. + +#include "test_utils.hpp" +#include "KokkosComm.hpp" + +template +void osu_latency_Kokkos_Comm_isendirecv(benchmark::State &, MPI_Comm comm, const Space &space, int rank, + const View &v) { + if (rank == 0) { + KokkosComm::Req sendreq = KokkosComm::isend(space, v, 1, 1, comm); + sendreq.wait(); + } else if (rank == 1) { + KokkosComm::Req recvreq = KokkosComm::irecv(v, 0, 1, comm); + recvreq.wait(); + } +} + +template +void osu_latency_MPI_isendirecv(benchmark::State &, MPI_Comm comm, int rank, const View &v) { + MPI_Barrier(comm); + MPI_Request sendreq, recvreq; + if (rank == 0) { + MPI_Irecv(v.data(), v.size(), KokkosComm::Impl::mpi_type(), 1, 0, comm, &recvreq); + MPI_Wait(&recvreq, MPI_STATUS_IGNORE); + } else if (rank == 1) { + MPI_Isend(v.data(), v.size(), KokkosComm::Impl::mpi_type(), 0, 0, comm, &sendreq); + MPI_Wait(&sendreq, MPI_STATUS_IGNORE); + } +} + +void benchmark_osu_latency_KokkosComm_isendirecv(benchmark::State &state) { + int rank, size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + if (size != 2) { + state.SkipWithError("benchmark_osu_latency_KokkosComm needs exactly 2 ranks"); + } + + auto space = Kokkos::DefaultExecutionSpace(); + using view_type = Kokkos::View; + view_type a("A", state.range(0)); + + while (state.KeepRunning()) { + do_iteration(state, MPI_COMM_WORLD, osu_latency_Kokkos_Comm_isendirecv, + space, rank, a); + } + state.counters["bytes"] = a.size() * 2; +} + +void benchmark_osu_latency_MPI_isendirecv(benchmark::State &state) { + int rank, size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + if (size != 2) { + state.SkipWithError("benchmark_osu_latency_MPI needs exactly 2 ranks"); + } + + using view_type = Kokkos::View; + view_type a("A", state.range(0)); + + while (state.KeepRunning()) { + do_iteration(state, MPI_COMM_WORLD, osu_latency_MPI_isendirecv, rank, a); + } + state.counters["bytes"] = a.size() * 2; +} + +BENCHMARK(benchmark_osu_latency_KokkosComm_isendirecv) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond) + ->RangeMultiplier(2) + ->Range(1, 1000); +BENCHMARK(benchmark_osu_latency_MPI_isendirecv) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond) + ->RangeMultiplier(2) + ->Range(1, 1000); \ No newline at end of file diff --git a/perf_tests/test_osu_latency_sendrecv.cpp b/perf_tests/test_osu_latency_sendrecv.cpp new file mode 100644 index 00000000..5c9be783 --- /dev/null +++ b/perf_tests/test_osu_latency_sendrecv.cpp @@ -0,0 +1,89 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +// Adapted from the OSU Benchmarks +// Copyright (c) 2002-2024 the Network-Based Computing Laboratory +// (NBCL), The Ohio State University. + +#include "test_utils.hpp" +#include "KokkosComm.hpp" + +template +void osu_latency_Kokkos_Comm_sendrecv(benchmark::State &, MPI_Comm comm, const Space &space, int rank, const View &v) { + if (rank == 0) { + KokkosComm::send(space, v, 1, 0, comm); + } else if (rank == 1) { + KokkosComm::recv(space, v, 0, 0, comm); + } +} + +template +void osu_latency_MPI_sendrecv(benchmark::State &, MPI_Comm comm, int rank, const View &v) { + MPI_Barrier(comm); + if (rank == 0) { + MPI_Recv(v.data(), v.size(), KokkosComm::Impl::mpi_type(), 1, 0, comm, + MPI_STATUS_IGNORE); + } else if (rank == 1) { + MPI_Send(v.data(), v.size(), KokkosComm::Impl::mpi_type(), 0, 0, comm); + } +} + +void benchmark_osu_latency_KokkosComm_sendrecv(benchmark::State &state) { + int rank, size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + if (size != 2) { + state.SkipWithError("benchmark_osu_latency_KokkosComm needs exactly 2 ranks"); + } + + auto space = Kokkos::DefaultExecutionSpace(); + using view_type = Kokkos::View; + view_type a("A", state.range(0)); + + while (state.KeepRunning()) { + do_iteration(state, MPI_COMM_WORLD, osu_latency_Kokkos_Comm_sendrecv, + space, rank, a); + } + state.counters["bytes"] = a.size() * 2; +} + +void benchmark_osu_latency_MPI_sendrecv(benchmark::State &state) { + int rank, size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + if (size != 2) { + state.SkipWithError("benchmark_osu_latency_MPI needs exactly 2 ranks"); + } + + using view_type = Kokkos::View; + view_type a("A", state.range(0)); + + while (state.KeepRunning()) { + do_iteration(state, MPI_COMM_WORLD, osu_latency_MPI_sendrecv, rank, a); + } + state.counters["bytes"] = a.size() * 2; +} + +BENCHMARK(benchmark_osu_latency_KokkosComm_sendrecv) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond) + ->RangeMultiplier(2) + ->Range(1, 1000); +BENCHMARK(benchmark_osu_latency_MPI_sendrecv) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond) + ->RangeMultiplier(2) + ->Range(1, 1000);