From eeb4aecdef5c216f57fe98e9fa3609f608a85949 Mon Sep 17 00:00:00 2001 From: nicoleavans Date: Wed, 26 Jun 2024 10:29:38 -0600 Subject: [PATCH 1/5] add osu latency tests --- perf_tests/CMakeLists.txt | 2 + perf_tests/test_osu_latency_isendirecv.cpp | 99 ++++++++++++++++++++++ perf_tests/test_osu_latency_sendrecv.cpp | 89 +++++++++++++++++++ 3 files changed, 190 insertions(+) create mode 100644 perf_tests/test_osu_latency_isendirecv.cpp create mode 100644 perf_tests/test_osu_latency_sendrecv.cpp diff --git a/perf_tests/CMakeLists.txt b/perf_tests/CMakeLists.txt index e0bef50d..e45ed897 100644 --- a/perf_tests/CMakeLists.txt +++ b/perf_tests/CMakeLists.txt @@ -34,6 +34,8 @@ unset(BENCHMARK_ENABLE_TESTING) add_executable(perf_test-main test_main.cpp test_sendrecv.cpp test_2dhalo.cpp + test_osu_latency_sendrecv.cpp + test_osu_latency_isendirecv.cpp ) if(KOKKOSCOMM_ENABLE_TESTS) kokkoscomm_add_cxx_flags(TARGET perf_test-main) diff --git a/perf_tests/test_osu_latency_isendirecv.cpp b/perf_tests/test_osu_latency_isendirecv.cpp new file mode 100644 index 00000000..900cf810 --- /dev/null +++ b/perf_tests/test_osu_latency_isendirecv.cpp @@ -0,0 +1,99 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +// Adapted from the OSU Benchmarks +// Copyright (c) 2002-2024 the Network-Based Computing Laboratory +// (NBCL), The Ohio State University. + +#include "test_utils.hpp" +#include "KokkosComm.hpp" + +template +void osu_latency_Kokkos_Comm_isendirecv(benchmark::State &, MPI_Comm comm, const Space &space, int rank, + const View &v) { + if (rank == 0) { + KokkosComm::Req sendreq = KokkosComm::isend(space, v, 1, 1, comm); + sendreq.wait(); + } else if (rank == 1) { + KokkosComm::Req recvreq = KokkosComm::irecv(v, 0, 1, comm); + recvreq.wait(); + } +} + +template +void osu_latency_MPI_isendirecv(benchmark::State &, MPI_Comm comm, int rank, const View &v) { + MPI_Barrier(comm); + MPI_Request sendreq, recvreq; + if (rank == 0) { + MPI_Irecv(v.data(), v.size(), KokkosComm::Impl::mpi_type(), 1, 0, comm, &recvreq); + MPI_Wait(&recvreq, MPI_STATUS_IGNORE); + } else if (rank == 1) { + MPI_Isend(v.data(), v.size(), KokkosComm::Impl::mpi_type(), 0, 0, comm, &sendreq); + MPI_Wait(&sendreq, MPI_STATUS_IGNORE); + } +} + +void benchmark_osu_latency_KokkosComm_isendirecv(benchmark::State &state) { + int rank, size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + if (size != 2) { + state.SkipWithError("benchmark_osu_latency_KokkosComm needs exactly 2 ranks"); + } + + auto space = Kokkos::DefaultExecutionSpace(); + using view_type = Kokkos::View; + view_type a("", state.range(0)); + + while (state.KeepRunning()) { + do_iteration(state, MPI_COMM_WORLD, osu_latency_Kokkos_Comm_isendirecv, + space, rank, a); + } + state.counters["bytes"] = a.size() * 2; +} + +void benchmark_osu_latency_MPI_isendirecv(benchmark::State &state) { + int rank, size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + if (size != 2) { + state.SkipWithError("benchmark_osu_latency_MPI needs exactly 2 ranks"); + } + + using view_type = Kokkos::View; + view_type a("", state.range(0)); + + while (state.KeepRunning()) { + do_iteration(state, MPI_COMM_WORLD, osu_latency_MPI_isendirecv, rank, a); + } + state.counters["bytes"] = a.size() * 2; +} + +// BENCHMARK(benchmark_osu_latency_KokkosComm_isendirecv)->UseManualTime()->Unit(benchmark::kMicrosecond)->RangeMultiplier(2)->Range(1, +// 32 * 1024 * 1024); +// BENCHMARK(benchmark_osu_latency_MPI_isendirecv)->UseManualTime()->Unit(benchmark::kMicrosecond)->RangeMultiplier(2)->Range(1, +// 32 * 1024 * 1024); + +BENCHMARK(benchmark_osu_latency_KokkosComm_isendirecv) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond) + ->RangeMultiplier(2) + ->Range(1, 1024); +BENCHMARK(benchmark_osu_latency_MPI_isendirecv) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond) + ->RangeMultiplier(2) + ->Range(1, 1024); \ No newline at end of file diff --git a/perf_tests/test_osu_latency_sendrecv.cpp b/perf_tests/test_osu_latency_sendrecv.cpp new file mode 100644 index 00000000..fa2d03be --- /dev/null +++ b/perf_tests/test_osu_latency_sendrecv.cpp @@ -0,0 +1,89 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +// Adapted from the OSU Benchmarks +// Copyright (c) 2002-2024 the Network-Based Computing Laboratory +// (NBCL), The Ohio State University. + +#include "test_utils.hpp" +#include "KokkosComm.hpp" + +template +void osu_latency_Kokkos_Comm_sendrecv(benchmark::State &, MPI_Comm comm, const Space &space, int rank, const View &v) { + if (rank == 0) { + KokkosComm::send(space, v, 1, 0, comm); + } else if (rank == 1) { + KokkosComm::recv(space, v, 0, 0, comm); + } +} + +template +void osu_latency_MPI_sendrecv(benchmark::State &, MPI_Comm comm, int rank, const View &v) { + MPI_Barrier(comm); + if (rank == 0) { + MPI_Recv(v.data(), v.size(), KokkosComm::Impl::mpi_type(), 1, 0, comm, + MPI_STATUS_IGNORE); + } else if (rank == 1) { + MPI_Send(v.data(), v.size(), KokkosComm::Impl::mpi_type(), 0, 0, comm); + } +} + +void benchmark_osu_latency_KokkosComm_sendrecv(benchmark::State &state) { + int rank, size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + if (size != 2) { + state.SkipWithError("benchmark_osu_latency_KokkosComm needs exactly 2 ranks"); + } + + auto space = Kokkos::DefaultExecutionSpace(); + using view_type = Kokkos::View; + view_type a("", state.range(0)); + + while (state.KeepRunning()) { + do_iteration(state, MPI_COMM_WORLD, osu_latency_Kokkos_Comm_sendrecv, + space, rank, a); + } + state.counters["bytes"] = a.size() * 2; +} + +void benchmark_osu_latency_MPI_sendrecv(benchmark::State &state) { + int rank, size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + if (size != 2) { + state.SkipWithError("benchmark_osu_latency_MPI needs exactly 2 ranks"); + } + + using view_type = Kokkos::View; + view_type a("", state.range(0)); + + while (state.KeepRunning()) { + do_iteration(state, MPI_COMM_WORLD, osu_latency_MPI_sendrecv, rank, a); + } + state.counters["bytes"] = a.size() * 2; +} + +BENCHMARK(benchmark_osu_latency_KokkosComm_sendrecv) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond) + ->RangeMultiplier(2) + ->Range(1, 32 * 1024 * 1024); +BENCHMARK(benchmark_osu_latency_MPI_sendrecv) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond) + ->RangeMultiplier(2) + ->Range(1, 32 * 1024 * 1024); \ No newline at end of file From a0971df866b964bc89c5c6dc71a527473363af05 Mon Sep 17 00:00:00 2001 From: nicoleavans Date: Wed, 26 Jun 2024 10:34:18 -0600 Subject: [PATCH 2/5] remove comment, reduce testing range --- perf_tests/test_osu_latency_isendirecv.cpp | 9 ++------- perf_tests/test_osu_latency_sendrecv.cpp | 4 ++-- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/perf_tests/test_osu_latency_isendirecv.cpp b/perf_tests/test_osu_latency_isendirecv.cpp index 900cf810..46b7f030 100644 --- a/perf_tests/test_osu_latency_isendirecv.cpp +++ b/perf_tests/test_osu_latency_isendirecv.cpp @@ -82,18 +82,13 @@ void benchmark_osu_latency_MPI_isendirecv(benchmark::State &state) { state.counters["bytes"] = a.size() * 2; } -// BENCHMARK(benchmark_osu_latency_KokkosComm_isendirecv)->UseManualTime()->Unit(benchmark::kMicrosecond)->RangeMultiplier(2)->Range(1, -// 32 * 1024 * 1024); -// BENCHMARK(benchmark_osu_latency_MPI_isendirecv)->UseManualTime()->Unit(benchmark::kMicrosecond)->RangeMultiplier(2)->Range(1, -// 32 * 1024 * 1024); - BENCHMARK(benchmark_osu_latency_KokkosComm_isendirecv) ->UseManualTime() ->Unit(benchmark::kMicrosecond) ->RangeMultiplier(2) - ->Range(1, 1024); + ->Range(1, 1000); BENCHMARK(benchmark_osu_latency_MPI_isendirecv) ->UseManualTime() ->Unit(benchmark::kMicrosecond) ->RangeMultiplier(2) - ->Range(1, 1024); \ No newline at end of file + ->Range(1, 1000); \ No newline at end of file diff --git a/perf_tests/test_osu_latency_sendrecv.cpp b/perf_tests/test_osu_latency_sendrecv.cpp index fa2d03be..0ad1e861 100644 --- a/perf_tests/test_osu_latency_sendrecv.cpp +++ b/perf_tests/test_osu_latency_sendrecv.cpp @@ -81,9 +81,9 @@ BENCHMARK(benchmark_osu_latency_KokkosComm_sendrecv) ->UseManualTime() ->Unit(benchmark::kMicrosecond) ->RangeMultiplier(2) - ->Range(1, 32 * 1024 * 1024); + ->Range(1, 1000); BENCHMARK(benchmark_osu_latency_MPI_sendrecv) ->UseManualTime() ->Unit(benchmark::kMicrosecond) ->RangeMultiplier(2) - ->Range(1, 32 * 1024 * 1024); \ No newline at end of file + ->Range(1, 1000); \ No newline at end of file From 1c91516cdaa8d1097b3be4744fd20f7486225f33 Mon Sep 17 00:00:00 2001 From: Christian Nicole Avans Date: Wed, 26 Jun 2024 11:28:45 -0600 Subject: [PATCH 3/5] newline Co-authored-by: Carl Pearson --- perf_tests/test_osu_latency_sendrecv.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perf_tests/test_osu_latency_sendrecv.cpp b/perf_tests/test_osu_latency_sendrecv.cpp index 0ad1e861..cb5feb28 100644 --- a/perf_tests/test_osu_latency_sendrecv.cpp +++ b/perf_tests/test_osu_latency_sendrecv.cpp @@ -86,4 +86,4 @@ BENCHMARK(benchmark_osu_latency_MPI_sendrecv) ->UseManualTime() ->Unit(benchmark::kMicrosecond) ->RangeMultiplier(2) - ->Range(1, 1000); \ No newline at end of file + ->Range(1, 1000); From 002a1f01ac04a4144c51cbef9b11fb314057e3bc Mon Sep 17 00:00:00 2001 From: nicoleavans Date: Wed, 26 Jun 2024 13:29:07 -0600 Subject: [PATCH 4/5] add view labels --- perf_tests/test_osu_latency_isendirecv.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/perf_tests/test_osu_latency_isendirecv.cpp b/perf_tests/test_osu_latency_isendirecv.cpp index 46b7f030..645708d4 100644 --- a/perf_tests/test_osu_latency_isendirecv.cpp +++ b/perf_tests/test_osu_latency_isendirecv.cpp @@ -56,7 +56,7 @@ void benchmark_osu_latency_KokkosComm_isendirecv(benchmark::State &state) { auto space = Kokkos::DefaultExecutionSpace(); using view_type = Kokkos::View; - view_type a("", state.range(0)); + view_type a("A", state.range(0)); while (state.KeepRunning()) { do_iteration(state, MPI_COMM_WORLD, osu_latency_Kokkos_Comm_isendirecv, @@ -74,7 +74,7 @@ void benchmark_osu_latency_MPI_isendirecv(benchmark::State &state) { } using view_type = Kokkos::View; - view_type a("", state.range(0)); + view_type a("A", state.range(0)); while (state.KeepRunning()) { do_iteration(state, MPI_COMM_WORLD, osu_latency_MPI_isendirecv, rank, a); From 9f51209527a8c69d981df99f65db278a72d9c57d Mon Sep 17 00:00:00 2001 From: nicoleavans Date: Wed, 26 Jun 2024 13:30:24 -0600 Subject: [PATCH 5/5] add view labels --- perf_tests/test_osu_latency_sendrecv.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/perf_tests/test_osu_latency_sendrecv.cpp b/perf_tests/test_osu_latency_sendrecv.cpp index cb5feb28..5c9be783 100644 --- a/perf_tests/test_osu_latency_sendrecv.cpp +++ b/perf_tests/test_osu_latency_sendrecv.cpp @@ -51,7 +51,7 @@ void benchmark_osu_latency_KokkosComm_sendrecv(benchmark::State &state) { auto space = Kokkos::DefaultExecutionSpace(); using view_type = Kokkos::View; - view_type a("", state.range(0)); + view_type a("A", state.range(0)); while (state.KeepRunning()) { do_iteration(state, MPI_COMM_WORLD, osu_latency_Kokkos_Comm_sendrecv, @@ -69,7 +69,7 @@ void benchmark_osu_latency_MPI_sendrecv(benchmark::State &state) { } using view_type = Kokkos::View; - view_type a("", state.range(0)); + view_type a("A", state.range(0)); while (state.KeepRunning()) { do_iteration(state, MPI_COMM_WORLD, osu_latency_MPI_sendrecv, rank, a);