From b9a73d00a4101d602eacc74ba93cf077e9b00869 Mon Sep 17 00:00:00 2001 From: Istvan Kiss Date: Mon, 11 Jan 2021 18:45:38 +0100 Subject: [PATCH 01/29] Update xnack option --- CMakeLists.txt | 8 ++++++-- test/extra/CMakeLists.txt | 8 ++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d963b43d..7bcc89ba 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -68,8 +68,12 @@ include(cmake/Dependencies.cmake) set(VERSION_STRING "2.10.7") rocm_setup_version(VERSION ${VERSION_STRING}) -# AMD targets -set(AMDGPU_TARGETS gfx900:xnack-;gfx906:xnack-;gfx908:xnack- CACHE STRING "List of specific machine types for library to target") +# Use target ID syntax if supported for AMDGPU_TARGETS +if(TARGET_ID_SUPPORT) + set(AMDGPU_TARGETS gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack- CACHE STRING "List of specific machine types for library to target") +else() + set(AMDGPU_TARGETS gfx803;gfx900;gfx906;gfx908 CACHE STRING "List of specific machine types for library to target") +endif() # Print configuration summary include(cmake/Summary.cmake) diff --git a/test/extra/CMakeLists.txt b/test/extra/CMakeLists.txt index 0a6b9578..808870f7 100644 --- a/test/extra/CMakeLists.txt +++ b/test/extra/CMakeLists.txt @@ -96,8 +96,12 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Werror") -# AMD targets -set(AMDGPU_TARGETS gfx803;gfx900;gfx906 CACHE STRING "List of specific machine types for library to target") +# Use target ID syntax if supported for AMDGPU_TARGETS +if(TARGET_ID_SUPPORT) + set(AMDGPU_TARGETS gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack- CACHE STRING "List of specific machine types for library to target") +else() + set(AMDGPU_TARGETS gfx803;gfx900;gfx906;gfx908 CACHE STRING "List of specific machine types for library to target") +endif() # Enable testing (ctest) enable_testing() From 5c35ae15263c265210975c61b2ba335d7bb65115 Mon Sep 17 00:00:00 2001 From: Istvan Kiss Date: Wed, 13 Jan 2021 01:43:40 +0100 Subject: [PATCH 02/29] Add missing TARGET_ID_SUPPORT --- CMakeLists.txt | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7bcc89ba..055fddc1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -68,6 +68,15 @@ include(cmake/Dependencies.cmake) set(VERSION_STRING "2.10.7") rocm_setup_version(VERSION ${VERSION_STRING}) +# Detect compiler support for target ID +if( CMAKE_CXX_COMPILER MATCHES ".*/hipcc$" ) + execute_process(COMMAND ${CMAKE_CXX_COMPILER} "--help" + OUTPUT_VARIABLE CXX_OUTPUT + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_STRIP_TRAILING_WHITESPACE) + string(REGEX MATCH ".mcode\-object\-version" TARGET_ID_SUPPORT ${CXX_OUTPUT}) +endif() + # Use target ID syntax if supported for AMDGPU_TARGETS if(TARGET_ID_SUPPORT) set(AMDGPU_TARGETS gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack- CACHE STRING "List of specific machine types for library to target") From 98f05b870f0a472f57854e2be3876c6290dcbdbb Mon Sep 17 00:00:00 2001 From: Istvan Kiss Date: Wed, 13 Jan 2021 01:46:58 +0100 Subject: [PATCH 03/29] Update gitlab CI --- .gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 00ae9326..8828a1e8 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -35,7 +35,7 @@ variables: # Local build options LOCAL_CXXFLAGS: "" LOCAL_CMAKE_OPTIONS: "" - ROCM_LATEST_PATH: "/opt/rocm-3.7.0/" + ROCM_LATEST_PATH: "/opt/rocm-4.0.0/" # hipCUB with rocPRIM backend .rocm: From 286823d4a39ea844532645325575a97c6b6b9a40 Mon Sep 17 00:00:00 2001 From: Istvan Kiss Date: Wed, 13 Jan 2021 09:42:09 +0100 Subject: [PATCH 04/29] Move setting of AMDGPU_TARGET before find_package(hip) --- CMakeLists.txt | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 055fddc1..36258cbb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -46,6 +46,22 @@ endif() set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE CACHE BOOL "Add paths to linker search and installed rpath") +# Detect compiler support for target ID +if( CMAKE_CXX_COMPILER MATCHES ".*/hipcc$" ) + execute_process(COMMAND ${CMAKE_CXX_COMPILER} "--help" + OUTPUT_VARIABLE CXX_OUTPUT + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_STRIP_TRAILING_WHITESPACE) + string(REGEX MATCH ".mcode\-object\-version" TARGET_ID_SUPPORT ${CXX_OUTPUT}) +endif() + +# Use target ID syntax if supported for AMDGPU_TARGETS +if(TARGET_ID_SUPPORT) + set(AMDGPU_TARGETS gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack- CACHE STRING "List of specific machine types for library to target") +else() + set(AMDGPU_TARGETS gfx803;gfx900;gfx906;gfx908 CACHE STRING "List of specific machine types for library to target") +endif() + # Verify that hcc compiler is used on ROCM platform include(cmake/VerifyCompiler.cmake) @@ -68,22 +84,6 @@ include(cmake/Dependencies.cmake) set(VERSION_STRING "2.10.7") rocm_setup_version(VERSION ${VERSION_STRING}) -# Detect compiler support for target ID -if( CMAKE_CXX_COMPILER MATCHES ".*/hipcc$" ) - execute_process(COMMAND ${CMAKE_CXX_COMPILER} "--help" - OUTPUT_VARIABLE CXX_OUTPUT - OUTPUT_STRIP_TRAILING_WHITESPACE - ERROR_STRIP_TRAILING_WHITESPACE) - string(REGEX MATCH ".mcode\-object\-version" TARGET_ID_SUPPORT ${CXX_OUTPUT}) -endif() - -# Use target ID syntax if supported for AMDGPU_TARGETS -if(TARGET_ID_SUPPORT) - set(AMDGPU_TARGETS gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack- CACHE STRING "List of specific machine types for library to target") -else() - set(AMDGPU_TARGETS gfx803;gfx900;gfx906;gfx908 CACHE STRING "List of specific machine types for library to target") -endif() - # Print configuration summary include(cmake/Summary.cmake) print_configuration_summary() From 8ac2cb292164d97420d6560b1be061c729c475ce Mon Sep 17 00:00:00 2001 From: Istvan Kiss Date: Wed, 13 Jan 2021 09:47:01 +0100 Subject: [PATCH 05/29] Move setting of AMDGPU_TARGET before find_package(hip) in extra test --- test/extra/CMakeLists.txt | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/test/extra/CMakeLists.txt b/test/extra/CMakeLists.txt index 808870f7..919141aa 100644 --- a/test/extra/CMakeLists.txt +++ b/test/extra/CMakeLists.txt @@ -32,6 +32,22 @@ list(APPEND CMAKE_MODULE_PATH ${HIP_PATH}/cmake /opt/rocm/hip/cmake # FindHIP.cmake ) +# Detect compiler support for target ID +if( CMAKE_CXX_COMPILER MATCHES ".*/hipcc$" ) + execute_process(COMMAND ${CMAKE_CXX_COMPILER} "--help" + OUTPUT_VARIABLE CXX_OUTPUT + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_STRIP_TRAILING_WHITESPACE) + string(REGEX MATCH ".mcode\-object\-version" TARGET_ID_SUPPORT ${CXX_OUTPUT}) +endif() + +# Use target ID syntax if supported for AMDGPU_TARGETS +if(TARGET_ID_SUPPORT) + set(AMDGPU_TARGETS gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack- CACHE STRING "List of specific machine types for library to target") +else() + set(AMDGPU_TARGETS gfx803;gfx900;gfx906;gfx908 CACHE STRING "List of specific machine types for library to target") +endif() + # Verify that hcc compiler is used on ROCM platform include(VerifyCompiler) @@ -96,13 +112,6 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Werror") -# Use target ID syntax if supported for AMDGPU_TARGETS -if(TARGET_ID_SUPPORT) - set(AMDGPU_TARGETS gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack- CACHE STRING "List of specific machine types for library to target") -else() - set(AMDGPU_TARGETS gfx803;gfx900;gfx906;gfx908 CACHE STRING "List of specific machine types for library to target") -endif() - # Enable testing (ctest) enable_testing() From 1ccfa23ac053e1d04e0ca4f2dae30156a5c55adc Mon Sep 17 00:00:00 2001 From: Stanley Tsang Date: Sat, 16 Jan 2021 00:34:47 +0000 Subject: [PATCH 06/29] Updating CHANGELOG and version for ROCm 4.1 --- CHANGELOG.md | 6 +++++- CMakeLists.txt | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ebc12793..32618d4d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,11 @@ See README.md on how to build the hipCUB documentation using Doxygen. -## [Unreleased hipCUB-2.10.7 for ROCm 4.0.0] +## [Unreleased hipCUB-2.10.8 for ROCm 4.1.0] +### Added +- Support for DiscardOutputIterator + +## [hipCUB-2.10.7 for ROCm 4.0.0] ### Added - No new features diff --git a/CMakeLists.txt b/CMakeLists.txt index d963b43d..a0f7e9dd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ # MIT License # -# Copyright (c) 2017-2019 Advanced Micro Devices, Inc. All rights reserved. +# Copyright (c) 2017-2021 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -65,7 +65,7 @@ option(BUILD_BENCHMARK "Build benchmarks" OFF) include(cmake/Dependencies.cmake) # Setup VERSION -set(VERSION_STRING "2.10.7") +set(VERSION_STRING "2.10.8") rocm_setup_version(VERSION ${VERSION_STRING}) # AMD targets From 551c5194c21caa3d4f79a7b2d9b3e7f3bb35c224 Mon Sep 17 00:00:00 2001 From: Navid Date: Mon, 25 Jan 2021 15:33:58 +0100 Subject: [PATCH 07/29] Added iterator test for constant, counting and transform iterators --- .../iterator/constant_input_iterator.hpp | 40 +++ .../iterator/counting_input_iterator.hpp | 41 +++ test/hipcub/CMakeLists.txt | 1 + test/hipcub/test_hipcub_iterators.cpp | 278 ++++++++++++++++++ 4 files changed, 360 insertions(+) create mode 100644 hipcub/include/hipcub/iterator/constant_input_iterator.hpp create mode 100644 hipcub/include/hipcub/iterator/counting_input_iterator.hpp create mode 100644 test/hipcub/test_hipcub_iterators.cpp diff --git a/hipcub/include/hipcub/iterator/constant_input_iterator.hpp b/hipcub/include/hipcub/iterator/constant_input_iterator.hpp new file mode 100644 index 00000000..7cf6465b --- /dev/null +++ b/hipcub/include/hipcub/iterator/constant_input_iterator.hpp @@ -0,0 +1,40 @@ +/****************************************************************************** + * Copyright (c) 2010-2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * Modifications Copyright (c) 2021, Advanced Micro Devices, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +#ifndef HIPCUB_CONSTANT_INPUT_ITERATOR_HPP_ +#define HIPCUB_CONSTANT_INPUT_ITERATOR_HPP_ + +#ifdef __HIP_PLATFORM_HCC__ + #include "../backend/rocprim/iterator/constant_input_iterator.hpp" +#elif defined(__HIP_PLATFORM_NVCC__) + #include "../config.hpp" + #include +#endif + +#endif // HIPCUB_ITERATOR_DISCARD_OUTPUT__HPP_ diff --git a/hipcub/include/hipcub/iterator/counting_input_iterator.hpp b/hipcub/include/hipcub/iterator/counting_input_iterator.hpp new file mode 100644 index 00000000..4890dc48 --- /dev/null +++ b/hipcub/include/hipcub/iterator/counting_input_iterator.hpp @@ -0,0 +1,41 @@ +/****************************************************************************** + * Copyright (c) 2010-2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * Modifications Copyright (c) 2021, Advanced Micro Devices, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +#ifndef HIPCUB_COUNTING_INPUT_ITERATOR_HPP_ +#define HIPCUB_COUNTING_INPUT_ITERATOR_HPP_ + +#ifdef __HIP_PLATFORM_HCC__ + #include "../backend/rocprim/iterator/counting_input_iterator.hpp" +#elif defined(__HIP_PLATFORM_NVCC__) + #include "../config.hpp" + #include +#endif + +#endif // HIPCUB_ITERATOR_DISCARD_OUTPUT__HPP_ + diff --git a/test/hipcub/CMakeLists.txt b/test/hipcub/CMakeLists.txt index 40289cae..5b1ed661 100644 --- a/test/hipcub/CMakeLists.txt +++ b/test/hipcub/CMakeLists.txt @@ -105,3 +105,4 @@ add_hipcub_test("hipcub.DeviceSelect" test_hipcub_device_select.cpp) add_hipcub_test("hipcub.UtilPtx" test_hipcub_util_ptx.cpp) add_hipcub_test("hipcub.WarpReduce" test_hipcub_warp_reduce.cpp) add_hipcub_test("hipcub.WarpScan" test_hipcub_warp_scan.cpp) +add_hipcub_test("hipcub.Iterator" test_hipcub_iterators.cpp) diff --git a/test/hipcub/test_hipcub_iterators.cpp b/test/hipcub/test_hipcub_iterators.cpp new file mode 100644 index 00000000..5e5306c8 --- /dev/null +++ b/test/hipcub/test_hipcub_iterators.cpp @@ -0,0 +1,278 @@ +// MIT License +// +// Copyright (c) 2017-2021 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include +#include +#include + +#include "hipcub/iterator/constant_input_iterator.hpp" +#include "hipcub/iterator/counting_input_iterator.hpp" +#include "hipcub/iterator/transform_input_iterator.hpp" + +#include "common_test_header.hpp" + + +//--------------------------------------------------------------------- +// Globals, constants and typedefs +//--------------------------------------------------------------------- + +#define INTEGER_SEED (0) + +//hipcub::CachingDeviceAllocator g_allocator(true); + +// Params for tests +template +struct IteratorParams +{ + using input_type = InputType; +}; + +template +class HipcubIteratorTests : public ::testing::Test +{ + public: + using input_type = typename Params::input_type; +}; + +typedef ::testing::Types< + //IteratorParams, + //IteratorParams, + IteratorParams + //IteratorParams, + //IteratorParams +> HipcubIteratorTestsParams; + +static std::vector base_values = {0, 99}; + +// TODO need to implement the seeding like CUB +template +__host__ __device__ __forceinline__ void +InitValue(uint32_t seed, T& value, uint32_t index = 0) +{ + (void) seed; + value = (index > 0); +} + +template +struct TransformOp +{ + // Increment transform + __host__ __device__ __forceinline__ T operator()(T input) const + { + T addend; + InitValue(INTEGER_SEED, addend, 1); + return input + addend; + } +}; + +struct SelectOp +{ + template + __host__ __device__ __forceinline__ bool operator()(T input) + { + (void) input; + return true; + } +}; + +//--------------------------------------------------------------------- +// Test kernels +//--------------------------------------------------------------------- + +/** +* Test random access input iterator +*/ +template +__global__ void Kernel( + InputIteratorT d_in, + T *d_out, + InputIteratorT *d_itrs) +{ + d_out[0] = *d_in; // Value at offset 0 + d_out[1] = d_in[100]; // Value at offset 100 + d_out[2] = *(d_in + 1000); // Value at offset 1000 + d_out[3] = *(d_in + 10000); // Value at offset 10000 + + d_in++; + d_out[4] = d_in[0]; // Value at offset 1 + + d_in += 20; + d_out[5] = d_in[0]; // Value at offset 21 + d_itrs[0] = d_in; // Iterator at offset 21 + + d_in -= 10; + d_out[6] = d_in[0]; // Value at offset 11; + + d_in -= 11; + d_out[7] = d_in[0]; // Value at offset 0 + d_itrs[1] = d_in; // Iterator at offset 0 +} + +template +void iterator_test_function(IteratorType d_itr, std::vector &h_reference) +{ + std::vector output(h_reference.size()); + + IteratorType *d_itrs = NULL; + HIP_CHECK(hipMalloc(&d_itrs, sizeof(IteratorType) * 2)); + + IteratorType *h_itrs = (IteratorType*)malloc(sizeof(IteratorType) * 2); + + T* device_output; + HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); + + // Run unguarded kernel + Kernel<<<1, 1>>>(d_itr, device_output, d_itrs); + + hipPeekAtLastError(); + hipDeviceSynchronize(); + + HIP_CHECK( + hipMemcpy( + output.data(), device_output, + output.size() * sizeof(T), + hipMemcpyDeviceToHost + ) + ); + + HIP_CHECK( + hipMemcpy( + h_itrs, d_itrs, + sizeof(IteratorType) * 2, + hipMemcpyDeviceToHost + ) + ); + + for(size_t i = 0; i < output.size(); i++) + { + ASSERT_EQ(output[i], h_reference[i]); + } + + IteratorType h_itr = d_itr + 21; + ASSERT_TRUE(h_itr == h_itrs[0]); + + ASSERT_TRUE(d_itr == h_itrs[1]); +} + +TYPED_TEST_CASE(HipcubIteratorTests, HipcubIteratorTestsParams); + +TYPED_TEST(HipcubIteratorTests, TestConstant) +{ + using T = typename TestFixture::input_type; + using IteratorType = hipcub::ConstantInputIterator; + + constexpr uint32_t array_size = 8; + + std::vector h_reference(array_size); + + for(uint32_t base_index = 0; base_index < base_values.size(); base_index++) + { + T base_value = (T)base_values[base_index]; + + IteratorType d_itr(base_value); + + for(uint32_t i = 0; i < h_reference.size(); i++) + { + h_reference[i] = base_value; + } + + iterator_test_function(d_itr, h_reference); + } + +} + +TYPED_TEST(HipcubIteratorTests, TestCounting) +{ + using T = typename TestFixture::input_type; + using IteratorType = hipcub::CountingInputIterator; + + constexpr uint32_t array_size = 8; + + std::vector h_reference(array_size); + + for(uint32_t base_index = 0; base_index < base_values.size(); base_index++) + { + T base_value = (T)base_values[base_index]; + + IteratorType d_itr(base_value); + + h_reference[0] = base_value + 0; // Value at offset 0 + h_reference[1] = base_value + 100; // Value at offset 100 + h_reference[2] = base_value + 1000; // Value at offset 1000 + h_reference[3] = base_value + 10000; // Value at offset 10000 + h_reference[4] = base_value + 1; // Value at offset 1 + h_reference[5] = base_value + 21; // Value at offset 21 + h_reference[6] = base_value + 11; // Value at offset 11 + h_reference[7] = base_value + 0; // Value at offset 0; + + iterator_test_function(d_itr, h_reference); + } + +} + +TYPED_TEST(HipcubIteratorTests, TestTransform) +{ + using T = typename TestFixture::input_type; + using CastT = typename TestFixture::input_type; + using IteratorType = hipcub::TransformInputIterator, CastT*>; + + constexpr int TEST_VALUES = 11000; + + //T *h_data = new T[TEST_VALUES]; + std::vector h_data(TEST_VALUES); + for (int i = 0; i < TEST_VALUES; ++i) + { + InitValue(INTEGER_SEED, h_data[i], i); + } + + // Allocate device arrays + T *d_data = NULL; + //g_allocator.DeviceAllocate((void**)&d_data, sizeof(T) * TEST_VALUES); + HIP_CHECK(hipMalloc(&d_data, h_data.size() * sizeof(typename decltype(h_data)::value_type))); + //cudaMemcpy(d_data, h_data, sizeof(T) * TEST_VALUES, cudaMemcpyHostToDevice); + + HIP_CHECK( + hipMemcpy( + d_data, h_data.data(), + TEST_VALUES * sizeof(T), + hipMemcpyHostToDevice + ) + ); + + TransformOp op; + + // Initialize reference data + constexpr uint32_t array_size = 8; + std::vector h_reference(array_size); + h_reference[0] = op(h_data[0]); // Value at offset 0 + h_reference[1] = op(h_data[100]); // Value at offset 100 + h_reference[2] = op(h_data[1000]); // Value at offset 1000 + h_reference[3] = op(h_data[10000]); // Value at offset 10000 + h_reference[4] = op(h_data[1]); // Value at offset 1 + h_reference[5] = op(h_data[21]); // Value at offset 21 + h_reference[6] = op(h_data[11]); // Value at offset 11 + h_reference[7] = op(h_data[0]); // Value at offset 0; + + IteratorType d_itr((CastT*) d_data, op); + + iterator_test_function(d_itr, h_reference); +} From d46d3fab526b1bedcc88941550eeac5057971d19 Mon Sep 17 00:00:00 2001 From: Navid Saremi Date: Wed, 27 Jan 2021 15:22:31 +0100 Subject: [PATCH 08/29] Added TexObjInputIterator and TexRefInputIterator to hipcub. --- .../iterator/tex_obj_input_iterator.hpp | 35 ++++- .../iterator/tex_ref_input_iterator.hpp | 80 +++++++++++ .../iterator/tex_obj_input_iterator.hpp | 41 ++++++ .../iterator/tex_ref_input_iterator.hpp | 42 ++++++ test/hipcub/test_hipcub_iterators.cpp | 130 +++++++++++++++++- 5 files changed, 323 insertions(+), 5 deletions(-) create mode 100644 hipcub/include/hipcub/backend/rocprim/iterator/tex_ref_input_iterator.hpp create mode 100644 hipcub/include/hipcub/iterator/tex_obj_input_iterator.hpp create mode 100644 hipcub/include/hipcub/iterator/tex_ref_input_iterator.hpp diff --git a/hipcub/include/hipcub/backend/rocprim/iterator/tex_obj_input_iterator.hpp b/hipcub/include/hipcub/backend/rocprim/iterator/tex_obj_input_iterator.hpp index 18291f20..38759043 100644 --- a/hipcub/include/hipcub/backend/rocprim/iterator/tex_obj_input_iterator.hpp +++ b/hipcub/include/hipcub/backend/rocprim/iterator/tex_obj_input_iterator.hpp @@ -1,7 +1,7 @@ /****************************************************************************** * Copyright (c) 2010-2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. - * Modifications Copyright (c) 2017-2020, Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright (c) 2017-2021, Advanced Micro Devices, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -40,7 +40,38 @@ template< typename T, typename OffsetT = std::ptrdiff_t > -using TexObjInputIterator = ::rocprim::texture_cache_iterator; +class TexObjInputIterator : public ::rocprim::texture_cache_iterator +{ + public: + template + inline + hipError_t BindTexture(Qualified* ptr, + size_t bytes = size_t(-1), + size_t texture_offset = 0) + { + return ::rocprim::texture_cache_iterator::bind_texture(ptr, bytes, texture_offset); + } + + inline hipError_t UnbindTexture() + { + return ::rocprim::texture_cache_iterator::unbind_texture(); + } + + ROCPRIM_HOST_DEVICE inline + ~TexObjInputIterator() = default; + + ROCPRIM_HOST_DEVICE inline + TexObjInputIterator() : ::rocprim::texture_cache_iterator() + { + } + + ROCPRIM_HOST_DEVICE inline + TexObjInputIterator(const ::rocprim::texture_cache_iterator other) + : ::rocprim::texture_cache_iterator(other) + { + } + +}; END_HIPCUB_NAMESPACE diff --git a/hipcub/include/hipcub/backend/rocprim/iterator/tex_ref_input_iterator.hpp b/hipcub/include/hipcub/backend/rocprim/iterator/tex_ref_input_iterator.hpp new file mode 100644 index 00000000..f1f63c37 --- /dev/null +++ b/hipcub/include/hipcub/backend/rocprim/iterator/tex_ref_input_iterator.hpp @@ -0,0 +1,80 @@ +/****************************************************************************** + * Copyright (c) 2010-2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * Modifications Copyright (c) 2017-2021, Advanced Micro Devices, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +#ifndef HIPCUB_ROCPRIM_ITERATOR_TEX_REF_INPUT_ITERATOR_HPP_ +#define HIPCUB_ROCPRIM_ITERATOR_TEX_REF_INPUT_ITERATOR_HPP_ + +#include "../../../config.hpp" + +#include + +BEGIN_HIPCUB_NAMESPACE + +template< + typename T, + int UNIQUE_ID, // Unused parameter for compatibility with original definition in cub + typename OffsetT = std::ptrdiff_t +> +class TexRefInputIterator : public ::rocprim::texture_cache_iterator +{ + public: + template + inline + hipError_t BindTexture(Qualified* ptr, + size_t bytes = size_t(-1), + size_t texture_offset = 0) + { + return ::rocprim::texture_cache_iterator::bind_texture(ptr, bytes, texture_offset); + } + + inline hipError_t UnbindTexture() + { + return ::rocprim::texture_cache_iterator::unbind_texture(); + } + + ROCPRIM_HOST_DEVICE inline + ~TexRefInputIterator() = default; + + ROCPRIM_HOST_DEVICE inline + TexRefInputIterator() : ::rocprim::texture_cache_iterator() + { + } + + ROCPRIM_HOST_DEVICE inline + TexRefInputIterator(const ::rocprim::texture_cache_iterator other) + : ::rocprim::texture_cache_iterator(other) + { + } + +}; + +END_HIPCUB_NAMESPACE + +#endif // HIPCUB_ROCPRIM_ITERATOR_TEX_OBJ_INPUT_ITERATOR_HPP_ + diff --git a/hipcub/include/hipcub/iterator/tex_obj_input_iterator.hpp b/hipcub/include/hipcub/iterator/tex_obj_input_iterator.hpp new file mode 100644 index 00000000..eaa0d628 --- /dev/null +++ b/hipcub/include/hipcub/iterator/tex_obj_input_iterator.hpp @@ -0,0 +1,41 @@ +/****************************************************************************** + * Copyright (c) 2010-2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * Modifications Copyright (c) 2021, Advanced Micro Devices, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +#ifndef HIPCUB_TEX_OBJ_INPUT_ITERATOR_HPP_ +#define HIPCUB_TEX_OBJ_INPUT_ITERATOR_HPP_ + +#ifdef __HIP_PLATFORM_HCC__ + #include "../backend/rocprim/iterator/tex_obj_input_iterator.hpp" +#elif defined(__HIP_PLATFORM_NVCC__) + #include "../config.hpp" + #include +#endif + +#endif // HIPCUB_ITERATOR_DISCARD_OUTPUT__HPP_ + diff --git a/hipcub/include/hipcub/iterator/tex_ref_input_iterator.hpp b/hipcub/include/hipcub/iterator/tex_ref_input_iterator.hpp new file mode 100644 index 00000000..80bd23de --- /dev/null +++ b/hipcub/include/hipcub/iterator/tex_ref_input_iterator.hpp @@ -0,0 +1,42 @@ +/****************************************************************************** + * Copyright (c) 2010-2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * Modifications Copyright (c) 2021, Advanced Micro Devices, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +#ifndef HIPCUB_TEX_REF_INPUT_ITERATOR_HPP_ +#define HIPCUB_TEX_REF_INPUT_ITERATOR_HPP_ + +#ifdef __HIP_PLATFORM_HCC__ + #include "../backend/rocprim/iterator/tex_ref_input_iterator.hpp" +#elif defined(__HIP_PLATFORM_NVCC__) + #include "../config.hpp" + #include +#endif + +#endif // HIPCUB_ITERATOR_DISCARD_OUTPUT__HPP_ + + diff --git a/test/hipcub/test_hipcub_iterators.cpp b/test/hipcub/test_hipcub_iterators.cpp index 5e5306c8..6a0ac212 100644 --- a/test/hipcub/test_hipcub_iterators.cpp +++ b/test/hipcub/test_hipcub_iterators.cpp @@ -27,9 +27,21 @@ #include "hipcub/iterator/constant_input_iterator.hpp" #include "hipcub/iterator/counting_input_iterator.hpp" #include "hipcub/iterator/transform_input_iterator.hpp" +#include "hipcub/iterator/tex_obj_input_iterator.hpp" +#include "hipcub/iterator/tex_ref_input_iterator.hpp" + +#include "hipcub/util_allocator.hpp" + +#if 0 +//#include +//#include +//#include +//#include +#endif #include "common_test_header.hpp" +hipcub::CachingDeviceAllocator g_allocator(true); //--------------------------------------------------------------------- // Globals, constants and typedefs @@ -197,7 +209,6 @@ TYPED_TEST(HipcubIteratorTests, TestConstant) iterator_test_function(d_itr, h_reference); } - } TYPED_TEST(HipcubIteratorTests, TestCounting) @@ -226,7 +237,6 @@ TYPED_TEST(HipcubIteratorTests, TestCounting) iterator_test_function(d_itr, h_reference); } - } TYPED_TEST(HipcubIteratorTests, TestTransform) @@ -248,7 +258,7 @@ TYPED_TEST(HipcubIteratorTests, TestTransform) T *d_data = NULL; //g_allocator.DeviceAllocate((void**)&d_data, sizeof(T) * TEST_VALUES); HIP_CHECK(hipMalloc(&d_data, h_data.size() * sizeof(typename decltype(h_data)::value_type))); - //cudaMemcpy(d_data, h_data, sizeof(T) * TEST_VALUES, cudaMemcpyHostToDevice); + //hipMemcpy(d_data, h_data, sizeof(T) * TEST_VALUES, hipMemcpyHostToDevice); HIP_CHECK( hipMemcpy( @@ -276,3 +286,117 @@ TYPED_TEST(HipcubIteratorTests, TestTransform) iterator_test_function(d_itr, h_reference); } + +TYPED_TEST(HipcubIteratorTests, TestTexObj) +{ + using T = typename TestFixture::input_type; + using CastT = typename TestFixture::input_type; + using IteratorType = hipcub::TexObjInputIterator; + + // + // Test iterator manipulation in kernel + // + + constexpr uint32_t TEST_VALUES = 11000; + constexpr uint32_t DUMMY_OFFSET = 500; + constexpr uint32_t DUMMY_TEST_VALUES = TEST_VALUES - DUMMY_OFFSET; + + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) + { + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; + + //T *h_data = new T[TEST_VALUES]; + std::vector h_data(TEST_VALUES); + std::vector output = test_utils::get_random_data(TEST_VALUES, 2, 200, seed_value); + + // Allocate device arrays + T *d_data = NULL; + T *d_dummy = NULL; + g_allocator.DeviceAllocate((void**)&d_data, sizeof(T) * TEST_VALUES); + hipMemcpy(d_data, h_data.data(), sizeof(T) * TEST_VALUES, hipMemcpyHostToDevice); + + g_allocator.DeviceAllocate((void**)&d_dummy, sizeof(T) * DUMMY_TEST_VALUES); + hipMemcpy(d_dummy, h_data.data() + DUMMY_OFFSET, sizeof(T) * DUMMY_TEST_VALUES, hipMemcpyHostToDevice); + + // Initialize reference data + constexpr uint32_t array_size = 8; + std::vector h_reference(array_size); + h_reference[0] = h_data[0]; // Value at offset 0 + h_reference[1] = h_data[100]; // Value at offset 100 + h_reference[2] = h_data[1000]; // Value at offset 1000 + h_reference[3] = h_data[10000]; // Value at offset 10000 + h_reference[4] = h_data[1]; // Value at offset 1 + h_reference[5] = h_data[21]; // Value at offset 21 + h_reference[6] = h_data[11]; // Value at offset 11 + h_reference[7] = h_data[0]; // Value at offset 0; + + // Create and bind obj-based test iterator + IteratorType d_obj_itr; + d_obj_itr.BindTexture((CastT*) d_data, sizeof(T) * TEST_VALUES); + + iterator_test_function(d_obj_itr, h_reference); + } +} + +TYPED_TEST(HipcubIteratorTests, TestTexRef) +{ + using T = typename TestFixture::input_type; + using CastT = typename TestFixture::input_type; + using IteratorType = hipcub::TexRefInputIterator; + + // + // Test iterator manipulation in kernel + // + + constexpr uint32_t TEST_VALUES = 11000; + constexpr uint32_t DUMMY_OFFSET = 500; + constexpr uint32_t DUMMY_TEST_VALUES = TEST_VALUES - DUMMY_OFFSET; + + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) + { + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; + + //T *h_data = new T[TEST_VALUES]; + std::vector h_data(TEST_VALUES); + std::vector output = test_utils::get_random_data(TEST_VALUES, 2, 200, seed_value); + + // Allocate device arrays + T *d_data = NULL; + T *d_dummy = NULL; + g_allocator.DeviceAllocate((void**)&d_data, sizeof(T) * TEST_VALUES); + hipMemcpy(d_data, h_data.data(), sizeof(T) * TEST_VALUES, hipMemcpyHostToDevice); + + g_allocator.DeviceAllocate((void**)&d_dummy, sizeof(T) * DUMMY_TEST_VALUES); + hipMemcpy(d_dummy, h_data.data() + DUMMY_OFFSET, sizeof(T) * DUMMY_TEST_VALUES, hipMemcpyHostToDevice); + + // Initialize reference data + constexpr uint32_t array_size = 8; + std::vector h_reference(array_size); + h_reference[0] = h_data[0]; // Value at offset 0 + h_reference[1] = h_data[100]; // Value at offset 100 + h_reference[2] = h_data[1000]; // Value at offset 1000 + h_reference[3] = h_data[10000]; // Value at offset 10000 + h_reference[4] = h_data[1]; // Value at offset 1 + h_reference[5] = h_data[21]; // Value at offset 21 + h_reference[6] = h_data[11]; // Value at offset 11 + h_reference[7] = h_data[0]; // Value at offset 0; + + // Create and bind ref-based test iterator + IteratorType d_ref_itr; + d_ref_itr.BindTexture((CastT*) d_data, sizeof(T) * TEST_VALUES); + + // Create and bind dummy iterator of same type to check with interferance + IteratorType d_ref_itr2; + d_ref_itr2.BindTexture((CastT*) d_dummy, sizeof(T) * DUMMY_TEST_VALUES); + + iterator_test_function(d_ref_itr, h_reference); + } +} + +TYPED_TEST(HipcubIteratorTests, TestTexTransform) +{ +} + +TYPED_TEST(HipcubIteratorTests, TestCacheModified) +{ +} From 3ce95dcbc57131ee6fe65628d54274ebd8ab5771 Mon Sep 17 00:00:00 2001 From: Navid Saremi Date: Thu, 28 Jan 2021 11:39:27 +0100 Subject: [PATCH 09/29] Enabled more types for testing --- test/hipcub/test_hipcub_iterators.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test/hipcub/test_hipcub_iterators.cpp b/test/hipcub/test_hipcub_iterators.cpp index 6a0ac212..5ef043b9 100644 --- a/test/hipcub/test_hipcub_iterators.cpp +++ b/test/hipcub/test_hipcub_iterators.cpp @@ -66,10 +66,10 @@ class HipcubIteratorTests : public ::testing::Test }; typedef ::testing::Types< - //IteratorParams, - //IteratorParams, - IteratorParams - //IteratorParams, + IteratorParams, + IteratorParams, + IteratorParams, + IteratorParams //IteratorParams > HipcubIteratorTestsParams; @@ -307,7 +307,7 @@ TYPED_TEST(HipcubIteratorTests, TestTexObj) //T *h_data = new T[TEST_VALUES]; std::vector h_data(TEST_VALUES); - std::vector output = test_utils::get_random_data(TEST_VALUES, 2, 200, seed_value); + std::vector output = test_utils::get_random_data(TEST_VALUES, T(2), T(200), seed_value); // Allocate device arrays T *d_data = NULL; @@ -358,7 +358,7 @@ TYPED_TEST(HipcubIteratorTests, TestTexRef) //T *h_data = new T[TEST_VALUES]; std::vector h_data(TEST_VALUES); - std::vector output = test_utils::get_random_data(TEST_VALUES, 2, 200, seed_value); + std::vector output = test_utils::get_random_data(TEST_VALUES, T(2), T(200), seed_value); // Allocate device arrays T *d_data = NULL; From 172d8b3ccf6382ca64554a504bf408ea263bd219 Mon Sep 17 00:00:00 2001 From: Navid Saremi Date: Thu, 28 Jan 2021 12:08:25 +0100 Subject: [PATCH 10/29] Added texture transform test --- test/hipcub/test_hipcub_iterators.cpp | 47 ++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/test/hipcub/test_hipcub_iterators.cpp b/test/hipcub/test_hipcub_iterators.cpp index 5ef043b9..a8f79c72 100644 --- a/test/hipcub/test_hipcub_iterators.cpp +++ b/test/hipcub/test_hipcub_iterators.cpp @@ -77,7 +77,7 @@ static std::vector base_values = {0, 99}; // TODO need to implement the seeding like CUB template -__host__ __device__ __forceinline__ void +__host__ __device__ __forceinline__ void InitValue(uint32_t seed, T& value, uint32_t index = 0) { (void) seed; @@ -395,6 +395,51 @@ TYPED_TEST(HipcubIteratorTests, TestTexRef) TYPED_TEST(HipcubIteratorTests, TestTexTransform) { + using T = typename TestFixture::input_type; + using CastT = typename TestFixture::input_type; + using TextureIteratorType = hipcub::TexRefInputIterator; + + constexpr uint32_t TEST_VALUES = 11000; + + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) + { + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; + + //T *h_data = new T[TEST_VALUES]; + std::vector h_data(TEST_VALUES); + std::vector output = test_utils::get_random_data(TEST_VALUES, T(2), T(200), seed_value); + + // Allocate device arrays + T *d_data = NULL; + g_allocator.DeviceAllocate((void**)&d_data, sizeof(T) * TEST_VALUES); + hipMemcpy(d_data, h_data.data(), sizeof(T) * TEST_VALUES, hipMemcpyHostToDevice); + + TransformOp op; + + // Initialize reference data + constexpr uint32_t array_size = 8; + std::vector h_reference(array_size); + h_reference[0] = op(h_data[0]); // Value at offset 0 + h_reference[1] = op(h_data[100]); // Value at offset 100 + h_reference[2] = op(h_data[1000]); // Value at offset 1000 + h_reference[3] = op(h_data[10000]); // Value at offset 10000 + h_reference[4] = op(h_data[1]); // Value at offset 1 + h_reference[5] = op(h_data[21]); // Value at offset 21 + h_reference[6] = op(h_data[11]); // Value at offset 11 + h_reference[7] = op(h_data[0]); // Value at offset 0; + + // Create and bind ref-based test iterator + TextureIteratorType d_tex_itr; + d_tex_itr.BindTexture((CastT*) d_data, sizeof(T) * TEST_VALUES); + + // Create transform iterator + hipcub::TransformInputIterator, TextureIteratorType> xform_itr(d_tex_itr, op); + + iterator_test_function< + hipcub::TransformInputIterator, TextureIteratorType>, + T> + (xform_itr, h_reference); + } } TYPED_TEST(HipcubIteratorTests, TestCacheModified) From 12d897aff229eddd1c3c292fe3799a570ab18583 Mon Sep 17 00:00:00 2001 From: Navid Saremi Date: Mon, 1 Feb 2021 12:38:27 +0100 Subject: [PATCH 11/29] Added arg index iterator and some cleanup --- .../iterator/arg_index_input_iterator.hpp | 41 +++++++++++++++++++ test/hipcub/test_hipcub_iterators.cpp | 32 +++++++-------- 2 files changed, 56 insertions(+), 17 deletions(-) create mode 100644 hipcub/include/hipcub/iterator/arg_index_input_iterator.hpp diff --git a/hipcub/include/hipcub/iterator/arg_index_input_iterator.hpp b/hipcub/include/hipcub/iterator/arg_index_input_iterator.hpp new file mode 100644 index 00000000..76b00eb0 --- /dev/null +++ b/hipcub/include/hipcub/iterator/arg_index_input_iterator.hpp @@ -0,0 +1,41 @@ +/****************************************************************************** + * Copyright (c) 2010-2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * Modifications Copyright (c) 2021, Advanced Micro Devices, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +#ifndef HIPCUB_ARG_INDEX_INPUT_ITERATOR_HPP_ +#define HIPCUB_ARG_INDEX_INPUT_ITERATOR_HPP_ + +#ifdef __HIP_PLATFORM_HCC__ + #include "../backend/rocprim/iterator/arg_index_input_iterator.hpp" +#elif defined(__HIP_PLATFORM_NVCC__) + #include "../config.hpp" + #include +#endif + +#endif // HIPCUB_ITERATOR_DISCARD_OUTPUT__HPP_ + diff --git a/test/hipcub/test_hipcub_iterators.cpp b/test/hipcub/test_hipcub_iterators.cpp index a8f79c72..7dbbda9b 100644 --- a/test/hipcub/test_hipcub_iterators.cpp +++ b/test/hipcub/test_hipcub_iterators.cpp @@ -24,6 +24,7 @@ #include #include +#include "hipcub/iterator/arg_index_input_iterator.hpp" #include "hipcub/iterator/constant_input_iterator.hpp" #include "hipcub/iterator/counting_input_iterator.hpp" #include "hipcub/iterator/transform_input_iterator.hpp" @@ -32,13 +33,6 @@ #include "hipcub/util_allocator.hpp" -#if 0 -//#include -//#include -//#include -//#include -#endif - #include "common_test_header.hpp" hipcub::CachingDeviceAllocator g_allocator(true); @@ -150,7 +144,7 @@ void iterator_test_function(IteratorType d_itr, std::vector &h_reference) IteratorType *h_itrs = (IteratorType*)malloc(sizeof(IteratorType) * 2); T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); + g_allocator.DeviceAllocate((void**)&device_output, output.size() * sizeof(typename decltype(output)::value_type)); // Run unguarded kernel Kernel<<<1, 1>>>(d_itr, device_output, d_itrs); @@ -181,8 +175,9 @@ void iterator_test_function(IteratorType d_itr, std::vector &h_reference) IteratorType h_itr = d_itr + 21; ASSERT_TRUE(h_itr == h_itrs[0]); - ASSERT_TRUE(d_itr == h_itrs[1]); + + g_allocator.DeviceFree(device_output); } TYPED_TEST_CASE(HipcubIteratorTests, HipcubIteratorTestsParams); @@ -247,7 +242,6 @@ TYPED_TEST(HipcubIteratorTests, TestTransform) constexpr int TEST_VALUES = 11000; - //T *h_data = new T[TEST_VALUES]; std::vector h_data(TEST_VALUES); for (int i = 0; i < TEST_VALUES; ++i) { @@ -256,9 +250,7 @@ TYPED_TEST(HipcubIteratorTests, TestTransform) // Allocate device arrays T *d_data = NULL; - //g_allocator.DeviceAllocate((void**)&d_data, sizeof(T) * TEST_VALUES); - HIP_CHECK(hipMalloc(&d_data, h_data.size() * sizeof(typename decltype(h_data)::value_type))); - //hipMemcpy(d_data, h_data, sizeof(T) * TEST_VALUES, hipMemcpyHostToDevice); + g_allocator.DeviceAllocate((void**)&d_data, sizeof(T) * TEST_VALUES); HIP_CHECK( hipMemcpy( @@ -285,6 +277,8 @@ TYPED_TEST(HipcubIteratorTests, TestTransform) IteratorType d_itr((CastT*) d_data, op); iterator_test_function(d_itr, h_reference); + + g_allocator.DeviceFree(d_data); } TYPED_TEST(HipcubIteratorTests, TestTexObj) @@ -335,6 +329,9 @@ TYPED_TEST(HipcubIteratorTests, TestTexObj) d_obj_itr.BindTexture((CastT*) d_data, sizeof(T) * TEST_VALUES); iterator_test_function(d_obj_itr, h_reference); + + g_allocator.DeviceFree(d_data); + g_allocator.DeviceFree(d_dummy); } } @@ -390,6 +387,9 @@ TYPED_TEST(HipcubIteratorTests, TestTexRef) d_ref_itr2.BindTexture((CastT*) d_dummy, sizeof(T) * DUMMY_TEST_VALUES); iterator_test_function(d_ref_itr, h_reference); + + g_allocator.DeviceFree(d_data); + g_allocator.DeviceFree(d_dummy); } } @@ -439,9 +439,7 @@ TYPED_TEST(HipcubIteratorTests, TestTexTransform) hipcub::TransformInputIterator, TextureIteratorType>, T> (xform_itr, h_reference); - } -} -TYPED_TEST(HipcubIteratorTests, TestCacheModified) -{ + g_allocator.DeviceFree(d_data); + } } From eacc3b16b90d608c3e11be046f59c26dd11e7c83 Mon Sep 17 00:00:00 2001 From: Navid Saremi Date: Mon, 1 Feb 2021 15:29:16 +0100 Subject: [PATCH 12/29] Fix namespace issue in texture iterators --- .../backend/rocprim/iterator/tex_obj_input_iterator.hpp | 6 +++--- .../backend/rocprim/iterator/tex_ref_input_iterator.hpp | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/hipcub/include/hipcub/backend/rocprim/iterator/tex_obj_input_iterator.hpp b/hipcub/include/hipcub/backend/rocprim/iterator/tex_obj_input_iterator.hpp index 38759043..0fd76661 100644 --- a/hipcub/include/hipcub/backend/rocprim/iterator/tex_obj_input_iterator.hpp +++ b/hipcub/include/hipcub/backend/rocprim/iterator/tex_obj_input_iterator.hpp @@ -57,15 +57,15 @@ class TexObjInputIterator : public ::rocprim::texture_cache_iterator return ::rocprim::texture_cache_iterator::unbind_texture(); } - ROCPRIM_HOST_DEVICE inline + HIPCUB_HOST_DEVICE inline ~TexObjInputIterator() = default; - ROCPRIM_HOST_DEVICE inline + HIPCUB_HOST_DEVICE inline TexObjInputIterator() : ::rocprim::texture_cache_iterator() { } - ROCPRIM_HOST_DEVICE inline + HIPCUB_HOST_DEVICE inline TexObjInputIterator(const ::rocprim::texture_cache_iterator other) : ::rocprim::texture_cache_iterator(other) { diff --git a/hipcub/include/hipcub/backend/rocprim/iterator/tex_ref_input_iterator.hpp b/hipcub/include/hipcub/backend/rocprim/iterator/tex_ref_input_iterator.hpp index f1f63c37..e5386e9e 100644 --- a/hipcub/include/hipcub/backend/rocprim/iterator/tex_ref_input_iterator.hpp +++ b/hipcub/include/hipcub/backend/rocprim/iterator/tex_ref_input_iterator.hpp @@ -58,15 +58,15 @@ class TexRefInputIterator : public ::rocprim::texture_cache_iterator return ::rocprim::texture_cache_iterator::unbind_texture(); } - ROCPRIM_HOST_DEVICE inline + HIPCUB_HOST_DEVICE inline ~TexRefInputIterator() = default; - ROCPRIM_HOST_DEVICE inline + HIPCUB_HOST_DEVICE inline TexRefInputIterator() : ::rocprim::texture_cache_iterator() { } - ROCPRIM_HOST_DEVICE inline + HIPCUB_HOST_DEVICE inline TexRefInputIterator(const ::rocprim::texture_cache_iterator other) : ::rocprim::texture_cache_iterator(other) { From 5240370b2a792ef11db87e7e4cc47e3f75cb636d Mon Sep 17 00:00:00 2001 From: Navid Saremi Date: Tue, 2 Feb 2021 13:48:58 +0100 Subject: [PATCH 13/29] Added DevicePartition to hipcub --- .../backend/cub/device/device_partition.hpp | 101 +++++ .../rocprim/device/device_partition.hpp | 102 +++++ .../hipcub/device/device_partition.hpp | 40 ++ test/hipcub/CMakeLists.txt | 1 + test/hipcub/identity_iterator.hpp | 161 ++++++++ test/hipcub/test_hipcub_device_partition.cpp | 371 ++++++++++++++++++ test/hipcub/test_utils.hpp | 9 + 7 files changed, 785 insertions(+) create mode 100644 hipcub/include/hipcub/backend/cub/device/device_partition.hpp create mode 100644 hipcub/include/hipcub/backend/rocprim/device/device_partition.hpp create mode 100644 hipcub/include/hipcub/device/device_partition.hpp create mode 100644 test/hipcub/identity_iterator.hpp create mode 100644 test/hipcub/test_hipcub_device_partition.cpp diff --git a/hipcub/include/hipcub/backend/cub/device/device_partition.hpp b/hipcub/include/hipcub/backend/cub/device/device_partition.hpp new file mode 100644 index 00000000..bc742fe4 --- /dev/null +++ b/hipcub/include/hipcub/backend/cub/device/device_partition.hpp @@ -0,0 +1,101 @@ +/****************************************************************************** + * Copyright (c) 2010-2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * Modifications Copyright (c) 2017-2021, Advanced Micro Devices, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +#ifndef HIPCUB_CUB_DEVICE_DEVICE_PARTITION_HPP_ +#define HIPCUB_CUB_DEVICE_DEVICE_PARTITION_HPP_ + +#include "../../../config.hpp" + +#include + +BEGIN_HIPCUB_NAMESPACE + +struct DevicePartition +{ + template < + typename InputIteratorT, + typename FlagIterator, + typename OutputIteratorT, + typename NumSelectedIteratorT> + HIPCUB_HOST_DEVICE __forceinline__ + static hipError_t Flagged( + void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items + FlagIterator d_flags, ///< [in] Pointer to the input sequence of selection flags + OutputIteratorT d_out, ///< [out] Pointer to the output sequence of partitioned data items + NumSelectedIteratorT d_num_selected_out, ///< [out] Pointer to the output total number of items selected (i.e., the offset of the unselected partition) + int num_items, ///< [in] Total number of items to select from + hipStream_t stream = 0, ///< [in] [optional] hip stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. + { + return DevicePartition::Flagged( + d_temp_storage, + temp_storage_bytes, + d_in, + d_flags, + d_out, + d_num_selected_out, + num_items, + stream, + debug_synchronous); + } + + template < + typename InputIteratorT, + typename OutputIteratorT, + typename NumSelectedIteratorT, + typename SelectOp> + HIPCUB_HOST_DEVICE __forceinline__ + static hipError_t If( + void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items + OutputIteratorT d_out, ///< [out] Pointer to the output sequence of partitioned data items + NumSelectedIteratorT d_num_selected_out, ///< [out] Pointer to the output total number of items selected (i.e., the offset of the unselected partition) + int num_items, ///< [in] Total number of items to select from + SelectOp select_op, ///< [in] Unary selection operator + hipStream_t stream = 0, ///< [in] [optional] hip stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. + { + return DevicePartition::If(d_temp_storage, + temp_storage_bytes, + d_in, + d_out, + d_num_selected_out, + num_items, + select_op, + stream, + debug_synchronous); + } +}; + +END_HIPCUB_NAMESPACE + +#endif // HIPCUB_CUB_DEVICE_DEVICE_PARTITION_HPP_ diff --git a/hipcub/include/hipcub/backend/rocprim/device/device_partition.hpp b/hipcub/include/hipcub/backend/rocprim/device/device_partition.hpp new file mode 100644 index 00000000..3d312bd6 --- /dev/null +++ b/hipcub/include/hipcub/backend/rocprim/device/device_partition.hpp @@ -0,0 +1,102 @@ +/****************************************************************************** + * Copyright (c) 2010-2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * Modifications Copyright (c) 2017-2020, Advanced Micro Devices, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +#ifndef HIPCUB_ROCPRIM_DEVICE_PARTITION_HPP_ +#define HIPCUB_ROCPRIM_DEVICE_PARTITION_HPP_ + +#include "../../../config.hpp" + +#include + +BEGIN_HIPCUB_NAMESPACE + +struct DevicePartition +{ + template < + typename InputIteratorT, + typename FlagIterator, + typename OutputIteratorT, + typename NumSelectedIteratorT> + HIPCUB_HOST_DEVICE __forceinline__ + static hipError_t Flagged( + void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items + FlagIterator d_flags, ///< [in] Pointer to the input sequence of selection flags + OutputIteratorT d_out, ///< [out] Pointer to the output sequence of partitioned data items + NumSelectedIteratorT d_num_selected_out, ///< [out] Pointer to the output total number of items selected (i.e., the offset of the unselected partition) + int num_items, ///< [in] Total number of items to select from + hipStream_t stream = 0, ///< [in] [optional] hip stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. + { + return rocprim::partition( + d_temp_storage, + temp_storage_bytes, + d_in, + d_flags, + d_out, + d_num_selected_out, + num_items, + stream, + debug_synchronous); + } + + template < + typename InputIteratorT, + typename OutputIteratorT, + typename NumSelectedIteratorT, + typename SelectOp> + HIPCUB_HOST_DEVICE __forceinline__ + static hipError_t If( + void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items + OutputIteratorT d_out, ///< [out] Pointer to the output sequence of partitioned data items + NumSelectedIteratorT d_num_selected_out, ///< [out] Pointer to the output total number of items selected (i.e., the offset of the unselected partition) + int num_items, ///< [in] Total number of items to select from + SelectOp select_op, ///< [in] Unary selection operator + hipStream_t stream = 0, ///< [in] [optional] hip stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. + { + return rocprim::partition( + d_temp_storage, + temp_storage_bytes, + d_in, + d_out, + d_num_selected_out, + num_items, + select_op, + stream, + debug_synchronous); + } +}; + +END_HIPCUB_NAMESPACE + +#endif diff --git a/hipcub/include/hipcub/device/device_partition.hpp b/hipcub/include/hipcub/device/device_partition.hpp new file mode 100644 index 00000000..bae01364 --- /dev/null +++ b/hipcub/include/hipcub/device/device_partition.hpp @@ -0,0 +1,40 @@ +/****************************************************************************** + * Copyright (c) 2010-2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * Modifications Copyright (c) 2021, Advanced Micro Devices, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +#ifndef HIPCUB_DEVICE_PARTITION_HPP_ +#define HIPCUB_DEVICE_PARTITION_HPP_ + +#ifdef __HIP_PLATFORM_HCC__ + #include "../backend/rocprim/device/device_partition.hpp" +#elif defined(__HIP_PLATFORM_NVCC__) + #include "../backend/cub/device/device_partition.hpp" +#endif + +#endif // HIPCUB_DEVICE_DEVICE_PARTITION_HPP_ + diff --git a/test/hipcub/CMakeLists.txt b/test/hipcub/CMakeLists.txt index 5b1ed661..1c43ac54 100644 --- a/test/hipcub/CMakeLists.txt +++ b/test/hipcub/CMakeLists.txt @@ -102,6 +102,7 @@ add_hipcub_test("hipcub.DeviceScan" test_hipcub_device_scan.cpp) add_hipcub_test("hipcub.DeviceSegmentedRadixSort" test_hipcub_device_segmented_radix_sort.cpp) add_hipcub_test("hipcub.DeviceSegmentedReduce" test_hipcub_device_segmented_reduce.cpp) add_hipcub_test("hipcub.DeviceSelect" test_hipcub_device_select.cpp) +add_hipcub_test("hipcub.DevicePartition" test_hipcub_device_partition.cpp) add_hipcub_test("hipcub.UtilPtx" test_hipcub_util_ptx.cpp) add_hipcub_test("hipcub.WarpReduce" test_hipcub_warp_reduce.cpp) add_hipcub_test("hipcub.WarpScan" test_hipcub_warp_scan.cpp) diff --git a/test/hipcub/identity_iterator.hpp b/test/hipcub/identity_iterator.hpp new file mode 100644 index 00000000..d5cae63c --- /dev/null +++ b/test/hipcub/identity_iterator.hpp @@ -0,0 +1,161 @@ +// MIT License +// +// Copyright (c) 2017-2021 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef TEST_IDENTITY_ITERATOR_HPP_ +#define TEST_IDENTITY_ITERATOR_HPP_ + +namespace test_utils +{ + +// Output iterator used in tests to check situtations when +// value_type of output iterator is void +template +class identity_iterator +{ +public: + // Iterator traits + using difference_type = std::ptrdiff_t; + using value_type = void; + using pointer = void; + using reference = T&; + + using iterator_category = std::random_access_iterator_tag; + + HIPCUB_HOST_DEVICE inline + identity_iterator(T * ptr) + : ptr_(ptr) + { } + + HIPCUB_HOST_DEVICE inline + ~identity_iterator() = default; + + HIPCUB_HOST_DEVICE inline + identity_iterator& operator++() + { + ptr_++; + return *this; + } + + HIPCUB_HOST_DEVICE inline + identity_iterator operator++(int) + { + identity_iterator old = *this; + ptr_++; + return old; + } + + HIPCUB_HOST_DEVICE inline + identity_iterator& operator--() + { + ptr_--; + return *this; + } + + HIPCUB_HOST_DEVICE inline + identity_iterator operator--(int) + { + identity_iterator old = *this; + ptr_--; + return old; + } + + HIPCUB_HOST_DEVICE inline + reference operator*() const + { + return *ptr_; + } + + HIPCUB_HOST_DEVICE inline + reference operator[](difference_type n) const + { + return *(ptr_ + n); + } + + HIPCUB_HOST_DEVICE inline + identity_iterator operator+(difference_type distance) const + { + auto i = ptr_ + distance; + return identity_iterator(i); + } + + HIPCUB_HOST_DEVICE inline + identity_iterator& operator+=(difference_type distance) + { + ptr_ += distance; + return *this; + } + + HIPCUB_HOST_DEVICE inline + identity_iterator operator-(difference_type distance) const + { + auto i = ptr_ - distance; + return identity_iterator(i); + } + + HIPCUB_HOST_DEVICE inline + identity_iterator& operator-=(difference_type distance) + { + ptr_ -= distance; + return *this; + } + + HIPCUB_HOST_DEVICE inline + difference_type operator-(identity_iterator other) const + { + return ptr_ - other.ptr_; + } + + HIPCUB_HOST_DEVICE inline + bool operator==(identity_iterator other) const + { + return ptr_ == other.ptr_; + } + + HIPCUB_HOST_DEVICE inline + bool operator!=(identity_iterator other) const + { + return ptr_ != other.ptr_; + } + +private: + T* ptr_; +}; + +template +inline +auto wrap_in_identity_iterator(T* ptr) + -> typename std::enable_if>::type +{ + return identity_iterator(ptr); +} + +template +inline +auto wrap_in_identity_iterator(T* ptr) + -> typename std::enable_if::type +{ + return ptr; +} + +} // end test_utils namespace + +#endif // TEST_IDENTITY_ITERATOR_HPP_ diff --git a/test/hipcub/test_hipcub_device_partition.cpp b/test/hipcub/test_hipcub_device_partition.cpp new file mode 100644 index 00000000..c8a28973 --- /dev/null +++ b/test/hipcub/test_hipcub_device_partition.cpp @@ -0,0 +1,371 @@ +// MIT License +// +// Copyright (c) 2017-2020 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "common_test_header.hpp" + +// hipcub API +#include "hipcub/device/device_partition.hpp" +#include "identity_iterator.hpp" + +// Params for tests +template< + class InputType, + class OutputType = InputType, + class FlagType = unsigned int, + bool UseIdentityIterator = false +> +struct DevicePartitionParams +{ + using input_type = InputType; + using output_type = OutputType; + using flag_type = FlagType; + static constexpr bool use_identity_iterator = UseIdentityIterator; +}; + +template +class HipcubDevicePartitionTests : public ::testing::Test +{ +public: + using input_type = typename Params::input_type; + using output_type = typename Params::output_type; + using flag_type = typename Params::flag_type; + const bool debug_synchronous = false; + static constexpr bool use_identity_iterator = Params::use_identity_iterator; +}; + +typedef ::testing::Types< + DevicePartitionParams, + DevicePartitionParams, + DevicePartitionParams, + DevicePartitionParams, + DevicePartitionParams, + DevicePartitionParams> +> HipcubDevicePartitionTestsParams; + +std::vector get_sizes(int seed_value) +{ + std::vector sizes = { + 2, 32, 64, 256, + 1024, 2048, + 3072, 4096, + 27845, (1 << 18) + 1111, + 1024 * 1024 * 32 + }; + const std::vector random_sizes = test_utils::get_random_data(2, 1, 16384, seed_value); + sizes.insert(sizes.end(), random_sizes.begin(), random_sizes.end()); + std::sort(sizes.begin(), sizes.end()); + return sizes; +} + +TYPED_TEST_CASE(HipcubDevicePartitionTests, HipcubDevicePartitionTestsParams); + +TYPED_TEST(HipcubDevicePartitionTests, Flagged) +{ + using T = typename TestFixture::input_type; + using U = typename TestFixture::output_type; + using F = typename TestFixture::flag_type; + static constexpr bool use_identity_iterator = TestFixture::use_identity_iterator; + const bool debug_synchronous = TestFixture::debug_synchronous; + + hipStream_t stream = 0; // default stream + + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) + { + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + const std::vector sizes = get_sizes(seed_value); + for(auto size : sizes) + { + SCOPED_TRACE(testing::Message() << "with size = " << size); + + // Generate data + std::vector input = test_utils::get_random_data(size, 1, 100, seed_value); + std::vector flags = test_utils::get_random_data01(size, 0.25, seed_value); + + T * d_input; + F * d_flags; + U * d_output; + unsigned int * d_selected_count_output; + HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(T))); + HIP_CHECK(hipMalloc(&d_flags, flags.size() * sizeof(F))); + HIP_CHECK(hipMalloc(&d_output, input.size() * sizeof(U))); + HIP_CHECK(hipMalloc(&d_selected_count_output, sizeof(unsigned int))); + HIP_CHECK( + hipMemcpy( + d_input, input.data(), + input.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK( + hipMemcpy( + d_flags, flags.data(), + flags.size() * sizeof(F), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // Calculate expected_selected and expected_rejected results on host + std::vector expected_selected; + std::vector expected_rejected; + expected_selected.reserve(input.size()/2); + expected_rejected.reserve(input.size()/2); + for(size_t i = 0; i < input.size(); i++) + { + if(flags[i] != 0) + { + expected_selected.push_back(input[i]); + } + else + { + expected_rejected.push_back(input[i]); + } + } + std::reverse(expected_rejected.begin(), expected_rejected.end()); + + // temp storage + size_t temp_storage_size_bytes; + // Get size of d_temp_storage + HIP_CHECK( + hipcub::DevicePartition::Flagged( + nullptr, + temp_storage_size_bytes, + d_input, + d_flags, + test_utils::wrap_in_identity_iterator(d_output), + d_selected_count_output, + input.size(), + stream, + debug_synchronous + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // temp_storage_size_bytes must be >0 + ASSERT_GT(temp_storage_size_bytes, 0); + + // allocate temporary storage + void * d_temp_storage = nullptr; + HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); + HIP_CHECK(hipDeviceSynchronize()); + + // Run + HIP_CHECK( + hipcub::DevicePartition::Flagged( + d_temp_storage, + temp_storage_size_bytes, + d_input, + d_flags, + test_utils::wrap_in_identity_iterator(d_output), + d_selected_count_output, + input.size(), + stream, + debug_synchronous + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // Check if number of selected value is as expected_selected + unsigned int selected_count_output = 0; + HIP_CHECK( + hipMemcpy( + &selected_count_output, d_selected_count_output, + sizeof(unsigned int), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + ASSERT_EQ(selected_count_output, expected_selected.size()); + + // Check if output values are as expected_selected + std::vector output(input.size()); + HIP_CHECK( + hipMemcpy( + output.data(), d_output, + output.size() * sizeof(U), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + std::vector output_rejected; + for(size_t i = 0; i < expected_rejected.size(); i++) + { + auto j = i + expected_selected.size(); + output_rejected.push_back(output[j]); + } + ASSERT_NO_FATAL_FAILURE(test_utils::custom_assert_eq(output, expected_selected, expected_selected.size())); + ASSERT_NO_FATAL_FAILURE(test_utils::custom_assert_eq(output_rejected, expected_rejected, expected_rejected.size())); + + hipFree(d_input); + hipFree(d_flags); + hipFree(d_output); + hipFree(d_selected_count_output); + hipFree(d_temp_storage); + } + } +} + +TYPED_TEST(HipcubDevicePartitionTests, If) +{ + using T = typename TestFixture::input_type; + using U = typename TestFixture::output_type; + static constexpr bool use_identity_iterator = TestFixture::use_identity_iterator; + const bool debug_synchronous = TestFixture::debug_synchronous; + + hipStream_t stream = 0; // default stream + + auto select_op = [] __host__ __device__ (const T& value) -> bool + { + if(value == T(50)) return true; + return false; + }; + + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) + { + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + const std::vector sizes = get_sizes(seed_value); + for(auto size : sizes) + { + SCOPED_TRACE(testing::Message() << "with size = " << size); + + // Generate data + std::vector input = test_utils::get_random_data(size, 1, 100, seed_value); + + T * d_input; + U * d_output; + unsigned int * d_selected_count_output; + HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(T))); + HIP_CHECK(hipMalloc(&d_output, input.size() * sizeof(U))); + HIP_CHECK(hipMalloc(&d_selected_count_output, sizeof(unsigned int))); + HIP_CHECK( + hipMemcpy( + d_input, input.data(), + input.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // Calculate expected_selected and expected_rejected results on host + std::vector expected_selected; + std::vector expected_rejected; + expected_selected.reserve(input.size()/2); + expected_rejected.reserve(input.size()/2); + for(size_t i = 0; i < input.size(); i++) + { + if(select_op(input[i])) + { + expected_selected.push_back(input[i]); + } + else + { + expected_rejected.push_back(input[i]); + } + } + std::reverse(expected_rejected.begin(), expected_rejected.end()); + + // temp storage + size_t temp_storage_size_bytes; + // Get size of d_temp_storage + HIP_CHECK( + hipcub::DevicePartition::If( + nullptr, + temp_storage_size_bytes, + d_input, + test_utils::wrap_in_identity_iterator(d_output), + d_selected_count_output, + input.size(), + select_op, + stream, + debug_synchronous + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // temp_storage_size_bytes must be >0 + ASSERT_GT(temp_storage_size_bytes, 0); + + // allocate temporary storage + void * d_temp_storage = nullptr; + HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); + HIP_CHECK(hipDeviceSynchronize()); + + // Run + HIP_CHECK( + hipcub::DevicePartition::If( + d_temp_storage, + temp_storage_size_bytes, + d_input, + test_utils::wrap_in_identity_iterator(d_output), + d_selected_count_output, + input.size(), + select_op, + stream, + debug_synchronous + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // Check if number of selected value is as expected_selected + unsigned int selected_count_output = 0; + HIP_CHECK( + hipMemcpy( + &selected_count_output, d_selected_count_output, + sizeof(unsigned int), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + ASSERT_EQ(selected_count_output, expected_selected.size()); + + // Check if output values are as expected_selected + std::vector output(input.size()); + HIP_CHECK( + hipMemcpy( + output.data(), d_output, + output.size() * sizeof(U), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + std::vector output_rejected; + for(size_t i = 0; i < expected_rejected.size(); i++) + { + auto j = i + expected_selected.size(); + output_rejected.push_back(output[j]); + } + ASSERT_NO_FATAL_FAILURE(test_utils::custom_assert_eq(output, expected_selected, expected_selected.size())); + ASSERT_NO_FATAL_FAILURE(test_utils::custom_assert_eq(output_rejected, expected_rejected, expected_rejected.size())); + + hipFree(d_input); + hipFree(d_output); + hipFree(d_selected_count_output); + hipFree(d_temp_storage); + } + } +} diff --git a/test/hipcub/test_utils.hpp b/test/hipcub/test_utils.hpp index 0f5d61ab..3bd4b5d7 100644 --- a/test/hipcub/test_utils.hpp +++ b/test/hipcub/test_utils.hpp @@ -465,6 +465,15 @@ auto assert_near(const std::vector& result, const std::vector& expected, c } } +template +void custom_assert_eq(const std::vector& result, const std::vector& expected, size_t size) +{ + for(size_t i = 0; i < size; i++) + { + ASSERT_EQ(result[i], expected[i]) << "where index = " << i; + } +} + } // end test_util namespace // Need for hipcub::DeviceReduce::Min/Max etc. From c6d998984543712c57b21c00623ebc09a69a1641 Mon Sep 17 00:00:00 2001 From: Istvan Kiss Date: Mon, 22 Feb 2021 18:31:41 +0100 Subject: [PATCH 14/29] Update setupNVCC.cmake Update setupNVCC.cmake WIP WIP WIP WIP WIP WIP --- .gitlab-ci.yml | 31 ++++----- CMakeLists.txt | 4 +- benchmark/CMakeLists.txt | 58 +++++++--------- cmake/Dependencies.cmake | 12 ++-- cmake/SetupNVCC.cmake | 47 +++++++++---- cmake/VerifyCompiler.cmake | 2 +- .../backend/cub/device/device_partition.hpp | 35 ++++++---- .../rocprim/device/device_partition.hpp | 4 +- test/extra/CMakeLists.txt | 64 +++++++---------- test/hipcub/CMakeLists.txt | 68 ++++++------------- test/hipcub/test_hipcub_block_load_store.cpp | 2 +- test/hipcub/test_hipcub_device_partition.cpp | 17 +++++ 12 files changed, 168 insertions(+), 176 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 8828a1e8..4f621c9d 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -28,7 +28,7 @@ stages: variables: SUDO_CMD: "" # Must be "sudo" on images which don't use root user DEPS_DIR: "$CI_PROJECT_DIR/__dependencies" - CMAKE_URL: "https://cmake.org/files/v3.5/cmake-3.5.1-Linux-x86_64.tar.gz" + CMAKE_MINIMUM_URL: "https://cmake.org/files/v3.10/cmake-3.10.2-Linux-x86_64.tar.gz" # General build flags CXXFLAGS: "" CMAKE_OPTIONS: "" @@ -50,7 +50,7 @@ variables: - hipconfig # cmake - mkdir -p $DEPS_DIR/cmake - - wget --no-check-certificate --quiet -O - $CMAKE_URL | tar --strip-components=1 -xz -C $DEPS_DIR/cmake + - wget --no-check-certificate --quiet -O - $CMAKE_MINIMUM_URL | tar --strip-components=1 -xz -C $DEPS_DIR/cmake - export PATH=$DEPS_DIR/cmake/bin:$PATH # Combine global build options with local options - export CXXFLAGS=$CXXFLAGS" "$LOCAL_CXXFLAGS @@ -65,7 +65,7 @@ variables: - hipconfig # cmake - mkdir -p $DEPS_DIR/cmake - - wget --no-check-certificate --quiet -O - $CMAKE_URL | tar --strip-components=1 -xz -C $DEPS_DIR/cmake + - wget --no-check-certificate --quiet -O - $CMAKE_MINIMUM_URL | tar --strip-components=1 -xz -C $DEPS_DIR/cmake - export PATH=$DEPS_DIR/cmake/bin:$PATH # Combine global build options with local options - export CXXFLAGS=$CXXFLAGS" "$LOCAL_CXXFLAGS @@ -95,7 +95,8 @@ build:rocm: extends: .rocm:build stage: build script: - - mkdir build + - if [ ! -d "build" ] ; then mkdir build; + - fi; - cd build - cmake -G Ninja @@ -126,7 +127,7 @@ build:rocm-benchmark: extends: .rocm:build stage: build only: - - internal_benchmark + - cub_update_1-11-0 - develop_stream - develop - master @@ -166,7 +167,7 @@ benchmark:rocm_vega20: stage: benchmark when: manual only: - - internal_benchmark + - cub_update_1-11-0 - develop_stream - develop - master @@ -199,7 +200,7 @@ benchmark:rocm_s9300: stage: benchmark when: manual only: - - internal_benchmark + - cub_update_1-11-0 - develop_stream - develop - master @@ -230,7 +231,7 @@ benchmark:rocm_mi25: stage: benchmark when: manual only: - - internal_benchmark + - cub_update_1-11-0 - develop_stream - develop - master @@ -265,7 +266,7 @@ test:rocm_package: - cd ../.. # hipCUB - cd build - - $SUDO_CMD dpkg -i hipcub-*.deb + - $SUDO_CMD dpkg -i hipcub*.deb - mkdir ../package_test && cd ../package_test - CXX=hipcc cmake ../test/extra/. -Drocprim_DIR="/opt/rocm/rocprim" - make VERBOSE=1 @@ -327,7 +328,7 @@ test:rocm_install: - hipconfig # cmake - mkdir -p $DEPS_DIR/cmake - - wget --no-check-certificate --quiet -O - $CMAKE_URL | tar --strip-components=1 -xz -C $DEPS_DIR/cmake + - wget --no-check-certificate --quiet -O - $CMAKE_MINIMUM_URL | tar --strip-components=1 -xz -C $DEPS_DIR/cmake - export PATH=$DEPS_DIR/cmake/bin:$PATH # Combine global build options with local options - export CXXFLAGS=$CXXFLAGS" "$LOCAL_CXXFLAGS @@ -342,7 +343,6 @@ build:nvcc: - cmake -G Ninja -D CMAKE_BUILD_TYPE=Release - -D CMAKE_CXX_COMPILER=nvcc -D BUILD_TEST=ON -D BUILD_EXAMPLE=ON -B build @@ -367,7 +367,7 @@ build:nvcc-benchmark: extends: .nvcc stage: build only: - - internal_benchmark + - cub_update_1-11-0 - develop_stream - develop - master @@ -377,7 +377,6 @@ build:nvcc-benchmark: - cmake -G Ninja -D CMAKE_BUILD_TYPE=Release - -D CMAKE_CXX_COMPILER=nvcc -D BUILD_BENCHMARK=ON -B build ../. @@ -403,7 +402,7 @@ benchmark:nvcc_titanv: stage: benchmark when: manual only: - - internal_benchmark + - cub_update_1-11-0 - develop_stream - develop - master @@ -430,7 +429,7 @@ benchmark:nvcc_980: stage: benchmark when: manual only: - - internal_benchmark + - cub_update_1-11-0 - develop_stream - develop - master @@ -450,7 +449,7 @@ test:nvcc_package: - build:nvcc script: - cd build - - $SUDO_CMD dpkg -i hipcub_nvcc-*.deb + - $SUDO_CMD dpkg -i hipcub_nvcc*.deb - mkdir ../package_test && cd ../package_test - cmake ../test/extra/. - make VERBOSE=1 diff --git a/CMakeLists.txt b/CMakeLists.txt index 45fb9ca1..41dd338b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,7 +20,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -cmake_minimum_required(VERSION 3.5.1 FATAL_ERROR) +cmake_minimum_required(VERSION 3.10.2 FATAL_ERROR) # Install prefix set(CMAKE_INSTALL_PREFIX "/opt/rocm" CACHE PATH "Install path prefix, prepended onto install directories") @@ -66,7 +66,7 @@ endif() include(cmake/VerifyCompiler.cmake) # Set CXX flags -set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD 14) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 2aef6b1c..6d107764 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -22,47 +22,35 @@ function(add_hipcub_benchmark BENCHMARK_SOURCE) get_filename_component(BENCHMARK_TARGET ${BENCHMARK_SOURCE} NAME_WE) + add_executable(${BENCHMARK_TARGET} ${BENCHMARK_SOURCE}) + target_include_directories(${BENCHMARK_TARGET} SYSTEM BEFORE + PUBLIC + "${GOOGLEBENCHMARK_ROOT}/include" + ) + target_link_libraries(${BENCHMARK_TARGET} + PRIVATE + benchmark::benchmark + hipcub + ) if((HIP_COMPILER STREQUAL "nvcc")) - if((CMAKE_VERSION VERSION_LESS "3.10")) - CUDA_INCLUDE_DIRECTORIES( - "${PROJECT_BINARY_DIR}/hipcub/include/hipcub" - "${PROJECT_BINARY_DIR}/hipcub/include" - "${PROJECT_SOURCE_DIR}/hipcub/include" - "${GOOGLEBENCHMARK_ROOT}/include" - ${GTEST_INCLUDE_DIRS} - ${CUB_INCLUDE_DIR} - ) - endif() - set_source_files_properties(${BENCHMARK_SOURCE} - PROPERTIES - CUDA_SOURCE_PROPERTY_FORMAT OBJ - ) - CUDA_ADD_EXECUTABLE(${BENCHMARK_TARGET} - ${BENCHMARK_SOURCE} - OPTIONS - --expt-extended-lambda + set_target_properties(${BENCHMARK_TARGET} + PROPERTIES CUDA_SEPARABLE_COMPILATION ON + CUDA_STANDARD 14 + CUDA_STANDARD_REQUIRED ON + CUDA_EXTENSIONS OFF + CXX_STANDARD 14 + CXX_STANDARD_REQUIRED ON + CXX_EXTENSIONS ON ) - target_include_directories(${BENCHMARK_TARGET} SYSTEM BEFORE - PUBLIC - "${GOOGLEBENCHMARK_ROOT}/include" - ) - target_link_libraries(${BENCHMARK_TARGET} - hipcub_cub - benchmark::benchmark + set_source_files_properties(${BENCHMARK_SOURCE} PROPERTIES LANGUAGE CUDA) + target_compile_options(${BENCHMARK_TARGET} + PRIVATE + $<$:--expt-extended-lambda> ) - else() - add_executable(${BENCHMARK_TARGET} ${BENCHMARK_SOURCE}) target_link_libraries(${BENCHMARK_TARGET} PRIVATE - hipcub - benchmark::benchmark + hipcub_cub ) - foreach(amdgpu_target ${AMDGPU_TARGETS}) - target_link_libraries(${BENCHMARK_TARGET} - PRIVATE - --amdgpu-target=${amdgpu_target} - ) - endforeach() endif() set_target_properties(${BENCHMARK_TARGET} PROPERTIES diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index 671cea42..5fa35507 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -40,28 +40,28 @@ endif() if(HIP_COMPILER STREQUAL "nvcc") if(NOT DEFINED CUB_INCLUDE_DIR) file( - DOWNLOAD https://github.com/NVlabs/cub/archive/1.8.0.zip - ${CMAKE_CURRENT_BINARY_DIR}/cub-1.8.0.zip + DOWNLOAD https://github.com/NVlabs/cub/archive/1.11.0.zip + ${CMAKE_CURRENT_BINARY_DIR}/cub-1.11.0.zip STATUS cub_download_status LOG cub_download_log ) list(GET cub_download_status 0 cub_download_error_code) if(cub_download_error_code) message(FATAL_ERROR "Error: downloading " - "https://github.com/NVlabs/cub/archive/1.8.0.zip failed " + "https://github.com/NVlabs/cub/archive/1.11.0.zip failed " "error_code: ${cub_download_error_code} " "log: ${cub_download_log} " ) endif() execute_process( - COMMAND ${CMAKE_COMMAND} -E tar xzf ${CMAKE_CURRENT_BINARY_DIR}/cub-1.8.0.zip + COMMAND ${CMAKE_COMMAND} -E tar xzf ${CMAKE_CURRENT_BINARY_DIR}/cub-1.11.0.zip WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} RESULT_VARIABLE cub_unpack_error_code ) if(cub_unpack_error_code) - message(FATAL_ERROR "Error: unpacking ${CMAKE_CURRENT_BINARY_DIR}/cub-1.8.0.zip failed") + message(FATAL_ERROR "Error: unpacking ${CMAKE_CURRENT_BINARY_DIR}/cub-1.11.0.zip failed") endif() - set(CUB_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/cub-1.8.0/ CACHE PATH "") + set(CUB_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/cub-1.11.0/ CACHE PATH "") endif() endif() diff --git a/cmake/SetupNVCC.cmake b/cmake/SetupNVCC.cmake index aea66331..679da57d 100644 --- a/cmake/SetupNVCC.cmake +++ b/cmake/SetupNVCC.cmake @@ -61,7 +61,7 @@ function(hip_cuda_detect_lowest_cc out_variable) endif() if(NOT HIP_CUDA_lowest_cc) - set(HIP_CUDA_lowest_cc "20") + set(HIP_CUDA_lowest_cc "35") set(${out_variable} ${HIP_CUDA_lowest_cc} PARENT_SCOPE) else() set(${out_variable} ${HIP_CUDA_lowest_cc} PARENT_SCOPE) @@ -69,34 +69,51 @@ function(hip_cuda_detect_lowest_cc out_variable) endfunction() ################################################################################################ -# Non macro/function section +### Non macro/function section ################################################################################################ +# Set the default value for CMAKE_CUDA_COMPILER if it's empty +if(CMAKE_CUDA_COMPILER STREQUAL "") + set(CMAKE_CUDA_COMPILER "nvcc") +endif() + # Get CUDA -find_package(CUDA REQUIRED) +enable_language("CUDA") -# Finds lowest supported CUDA CC -# -# Use NVGPU_TARGETS to set CUDA arch compilation flags -# For example: -DNVGPU_TARGETS="--gpu-architecture=compute_50 --gpu-code=compute_50,sm_50,sm_52" -set(HIP_NVCC_FLAGS " ${HIP_NVCC_FLAGS} -Wno-deprecated-gpu-targets") # Suppressing warnings +# Suppressing warnings +set(HIP_NVCC_FLAGS " ${HIP_NVCC_FLAGS} -Wno-deprecated-gpu-targets -Xcompiler -Wno-return-type -Wno-deprecated-declarations ") + +# Use NVGPU_TARGETS to set CUDA architectures (compute capabilities) +# For example: -DNVGPU_TARGETS="50;61;62" +set(DEFAULT_NVGPU_TARGETS "") +# If NVGPU_TARGETS is empty get default value for it if("x${NVGPU_TARGETS}" STREQUAL "x") hip_cuda_detect_lowest_cc(lowest_cc) - set(HIP_NVCC_FLAGS "${HIP_NVCC_FLAGS} --gpu-architecture=sm_${lowest_cc}") -else() - set(HIP_NVCC_FLAGS "${HIP_NVCC_FLAGS} ${NVGPU_TARGETS}") + set(DEFAULT_NVGPU_TARGETS "${lowest_cc}") endif() +set(NVGPU_TARGETS "${DEFAULT_NVGPU_TARGETS}" + CACHE STRING "List of NVIDIA GPU targets (compute capabilities), for example \"35;50\"" +) +# Generate compiler flags based on targeted CUDA architectures +foreach(CUDA_ARCH ${NVGPU_TARGETS}) + list(APPEND HIP_NVCC_FLAGS "--generate-code arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH} ") + list(APPEND HIP_NVCC_FLAGS "--generate-code arch=compute_${CUDA_ARCH},code=compute_${CUDA_ARCH} ") +endforeach() -# Add HIP flags/options/includes to CUDA_NVCC_FLAGS execute_process( COMMAND ${HIP_HIPCONFIG_EXECUTABLE} --cpp_config OUTPUT_VARIABLE HIP_CPP_CONFIG_FLAGS OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_STRIP_TRAILING_WHITESPACE ) -string(REPLACE " " ";" HIP_CPP_CONFIG_FLAGS ${HIP_CPP_CONFIG_FLAGS}) -list(APPEND CUDA_NVCC_FLAGS "-std=c++11 ${HIP_CPP_CONFIG_FLAGS} ${HIP_NVCC_FLAGS}") + +# Update list parameter +string(REPLACE ";" " " HIP_NVCC_FLAGS ${HIP_NVCC_FLAGS}) + +set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${HIP_CPP_CONFIG_FLAGS} ${HIP_NVCC_FLAGS}" + CACHE STRING "Cuda compile flags" FORCE) # Ignore warnings about #pragma unroll # and about deprecated CUDA function(s) used in hip/nvcc_detail/hip_runtime_api.h -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unknown-pragmas -Wno-deprecated-declarations") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${HIP_CPP_CONFIG_FLAGS_STRIP} -Wno-unknown-pragmas -Wno-deprecated-declarations" + CACHE STRING "compile flags" FORCE) diff --git a/cmake/VerifyCompiler.cmake b/cmake/VerifyCompiler.cmake index 20754226..7705e58d 100644 --- a/cmake/VerifyCompiler.cmake +++ b/cmake/VerifyCompiler.cmake @@ -26,7 +26,7 @@ if(CMAKE_CXX_COMPILER MATCHES ".*/nvcc$" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL if(NOT hip_FOUND) find_package(HIP REQUIRED) endif() - if((HIP_COMPILER STREQUAL "hcc") AND (HIP_PLATFORM STREQUAL "nvcc")) + if((HIP_COMPILER STREQUAL "hcc") OR (HIP_COMPILER STREQUAL "clang")) # TODO: The HIP package on NVIDIA platform is incorrect at few versions set(HIP_COMPILER "nvcc" CACHE STRING "HIP Compiler" FORCE) endif() diff --git a/hipcub/include/hipcub/backend/cub/device/device_partition.hpp b/hipcub/include/hipcub/backend/cub/device/device_partition.hpp index bc742fe4..8c8362f8 100644 --- a/hipcub/include/hipcub/backend/cub/device/device_partition.hpp +++ b/hipcub/include/hipcub/backend/cub/device/device_partition.hpp @@ -43,7 +43,7 @@ struct DevicePartition typename FlagIterator, typename OutputIteratorT, typename NumSelectedIteratorT> - HIPCUB_HOST_DEVICE __forceinline__ + HIPCUB_RUNTIME_FUNCTION __forceinline__ static hipError_t Flagged( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation @@ -55,16 +55,19 @@ struct DevicePartition hipStream_t stream = 0, ///< [in] [optional] hip stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { - return DevicePartition::Flagged( - d_temp_storage, - temp_storage_bytes, - d_in, - d_flags, - d_out, - d_num_selected_out, - num_items, - stream, - debug_synchronous); + return hipCUDAErrorTohipError( + ::cub::DevicePartition::Flagged( + d_temp_storage, + temp_storage_bytes, + d_in, + d_flags, + d_out, + d_num_selected_out, + num_items, + stream, + debug_synchronous + ) + ); } template < @@ -72,7 +75,7 @@ struct DevicePartition typename OutputIteratorT, typename NumSelectedIteratorT, typename SelectOp> - HIPCUB_HOST_DEVICE __forceinline__ + HIPCUB_RUNTIME_FUNCTION __forceinline__ static hipError_t If( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation @@ -84,7 +87,9 @@ struct DevicePartition hipStream_t stream = 0, ///< [in] [optional] hip stream to launch kernels within. Default is stream0. bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. { - return DevicePartition::If(d_temp_storage, + return hipCUDAErrorTohipError( + ::cub::DevicePartition::If( + d_temp_storage, temp_storage_bytes, d_in, d_out, @@ -92,7 +97,9 @@ struct DevicePartition num_items, select_op, stream, - debug_synchronous); + debug_synchronous + ) + ); } }; diff --git a/hipcub/include/hipcub/backend/rocprim/device/device_partition.hpp b/hipcub/include/hipcub/backend/rocprim/device/device_partition.hpp index 3d312bd6..06df1f7e 100644 --- a/hipcub/include/hipcub/backend/rocprim/device/device_partition.hpp +++ b/hipcub/include/hipcub/backend/rocprim/device/device_partition.hpp @@ -43,7 +43,7 @@ struct DevicePartition typename FlagIterator, typename OutputIteratorT, typename NumSelectedIteratorT> - HIPCUB_HOST_DEVICE __forceinline__ + HIPCUB_RUNTIME_FUNCTION __forceinline__ static hipError_t Flagged( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation @@ -72,7 +72,7 @@ struct DevicePartition typename OutputIteratorT, typename NumSelectedIteratorT, typename SelectOp> - HIPCUB_HOST_DEVICE __forceinline__ + HIPCUB_RUNTIME_FUNCTION __forceinline__ static hipError_t If( void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation diff --git a/test/extra/CMakeLists.txt b/test/extra/CMakeLists.txt index 919141aa..4eea2dd0 100644 --- a/test/extra/CMakeLists.txt +++ b/test/extra/CMakeLists.txt @@ -20,7 +20,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -cmake_minimum_required(VERSION 3.5.1 FATAL_ERROR) +cmake_minimum_required(VERSION 3.10.2 FATAL_ERROR) # This project includes tests that should be run after # hipCUB is installed from package or using `make install` @@ -56,28 +56,28 @@ include(DownloadProject) if(HIP_COMPILER STREQUAL "nvcc") if(NOT DEFINED CUB_INCLUDE_DIR) file( - DOWNLOAD https://github.com/NVlabs/cub/archive/1.8.0.zip - ${CMAKE_CURRENT_BINARY_DIR}/cub-1.8.0.zip + DOWNLOAD https://github.com/NVlabs/cub/archive/1.11.0.zip + ${CMAKE_CURRENT_BINARY_DIR}/cub-1.11.0.zip STATUS cub_download_status LOG cub_download_log ) list(GET cub_download_status 0 cub_download_error_code) if(cub_download_error_code) message(FATAL_ERROR "Error: downloading " - "https://github.com/NVlabs/cub/archive/1.8.0.zip failed " + "https://github.com/NVlabs/cub/archive/1.11.0.zip failed " "error_code: ${cub_download_error_code} " "log: ${cub_download_log} " ) endif() execute_process( - COMMAND ${CMAKE_COMMAND} -E tar xzf ${CMAKE_CURRENT_BINARY_DIR}/cub-1.8.0.zip + COMMAND ${CMAKE_COMMAND} -E tar xzf ${CMAKE_CURRENT_BINARY_DIR}/cub-1.11.0.zip WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} RESULT_VARIABLE cub_unpack_error_code ) if(cub_unpack_error_code) - message(FATAL_ERROR "Error: unpacking ${CMAKE_CURRENT_BINARY_DIR}/cub-1.8.0.zip failed") + message(FATAL_ERROR "Error: unpacking ${CMAKE_CURRENT_BINARY_DIR}/cub-1.11.0.zip failed") endif() - set(CUB_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/cub-1.8.0/ CACHE PATH "") + set(CUB_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/cub-1.11.0/ CACHE PATH "") endif() endif() @@ -107,7 +107,7 @@ endif() find_package(hipcub REQUIRED CONFIG HINTS ${hipcub_DIR} PATHS "/opt/rocm/hipcub") # Build CXX flags -set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD 14) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Werror") @@ -115,45 +115,33 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Werror") # Enable testing (ctest) enable_testing() -# Use CUDA_INCLUDE_DIRECTORIES to include required dirs -# for nvcc if cmake version is less than 3.10 -if((HIP_COMPILER STREQUAL "nvcc") AND (CMAKE_VERSION VERSION_LESS "3.10")) - CUDA_INCLUDE_DIRECTORIES( - ${hipcub_INCLUDE_DIR} - ${GTEST_INCLUDE_DIRS} - ${CUB_INCLUDE_DIR} - ) -endif() - function(add_hipcub_test TEST_NAME TEST_SOURCES) list(GET TEST_SOURCES 0 TEST_MAIN_SOURCE) get_filename_component(TEST_TARGET ${TEST_MAIN_SOURCE} NAME_WE) - if(HIP_COMPILER STREQUAL "hcc" OR HIP_COMPILER STREQUAL "clang") - add_executable(${TEST_TARGET} ${TEST_SOURCES}) + add_executable(${TEST_TARGET} ${TEST_SOURCES}) + + if(HIP_COMPILER STREQUAL "nvcc") + set_property(TARGET ${TEST_TARGET} PROPERTY CUDA_STANDARD 14) + set_source_files_properties(${TEST_SOURCES} PROPERTIES LANGUAGE CUDA) target_link_libraries(${TEST_TARGET} PRIVATE - ${hipcub_LIBRARIES} # hip::hipcub + hip::hipcub ) - foreach(amdgpu_target ${AMDGPU_TARGETS}) - target_link_libraries(${TEST_TARGET} + target_include_directories(${TEST_TARGET} + SYSTEM PRIVATE - --amdgpu-target=${amdgpu_target} - ) - endforeach() - else() # CUDA/nvcc - set_source_files_properties(${TEST_SOURCES} - PROPERTIES - CUDA_SOURCE_PROPERTY_FORMAT OBJ - ) - CUDA_ADD_EXECUTABLE(${TEST_TARGET} - ${TEST_SOURCES} - OPTIONS - --expt-extended-lambda + ${CUB_INCLUDE_DIR} + ) + elseif(HIP_COMPILER STREQUAL "hcc" OR HIP_COMPILER STREQUAL "clang") + target_link_libraries(${TEST_TARGET} + PRIVATE + ${hipcub_LIBRARIES} # hip::hipcub ) - target_link_libraries(${TEST_TARGET} hip::hipcub) - target_include_directories(${TEST_TARGET} SYSTEM PRIVATE ${CUB_INCLUDE_DIR}) endif() - add_test(${TEST_NAME} ${TEST_TARGET}) + add_test( + NAME ${TEST_NAME} + COMMAND ${TEST_TARGET} + ) endfunction() # hipCUB package test diff --git a/test/hipcub/CMakeLists.txt b/test/hipcub/CMakeLists.txt index 1c43ac54..14899192 100644 --- a/test/hipcub/CMakeLists.txt +++ b/test/hipcub/CMakeLists.txt @@ -20,62 +20,37 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -# Use CUDA_INCLUDE_DIRECTORIES to include required dirs -# for nvcc if cmake version is less than 3.10 -if((HIP_COMPILER STREQUAL "nvcc") AND (CMAKE_VERSION VERSION_LESS "3.10")) - CUDA_INCLUDE_DIRECTORIES( - "${PROJECT_BINARY_DIR}/hipcub/include/hipcub" - "${PROJECT_BINARY_DIR}/hipcub/include" - "${PROJECT_SOURCE_DIR}/hipcub/include" - ${GTEST_INCLUDE_DIRS} - ${CUB_INCLUDE_DIR} - ) -endif() - function(add_hipcub_test TEST_NAME TEST_SOURCES) list(GET TEST_SOURCES 0 TEST_MAIN_SOURCE) get_filename_component(TEST_TARGET ${TEST_MAIN_SOURCE} NAME_WE) - if(HIP_COMPILER STREQUAL "hcc" OR HIP_COMPILER STREQUAL "clang") - add_executable(${TEST_TARGET} ${TEST_SOURCES}) - target_include_directories(${TEST_TARGET} SYSTEM BEFORE - PUBLIC - ${GTEST_INCLUDE_DIRS} - ) + add_executable(${TEST_TARGET} ${TEST_SOURCES}) + target_include_directories(${TEST_TARGET} SYSTEM BEFORE + PUBLIC + ${GTEST_INCLUDE_DIRS} + ) + target_link_libraries(${TEST_TARGET} + PRIVATE + ${GTEST_BOTH_LIBRARIES} + hipcub + ) + + if(HIP_COMPILER STREQUAL "nvcc") + set_property(TARGET ${TEST_TARGET} PROPERTY CUDA_STANDARD 14) + set_source_files_properties(${TEST_SOURCES} PROPERTIES LANGUAGE CUDA) target_link_libraries(${TEST_TARGET} PRIVATE - hipcub - ${GTEST_BOTH_LIBRARIES} - ) - foreach(amdgpu_target ${AMDGPU_TARGETS}) - target_link_libraries(${TEST_TARGET} - PRIVATE - --amdgpu-target=${amdgpu_target} - ) - endforeach() - else() - set_source_files_properties(${TEST_SOURCES} - PROPERTIES - CUDA_SOURCE_PROPERTY_FORMAT OBJ - ) - CUDA_ADD_EXECUTABLE(${TEST_TARGET} - ${TEST_SOURCES} - OPTIONS - --expt-extended-lambda - ) - target_include_directories(${TEST_TARGET} SYSTEM BEFORE - PUBLIC - ${GTEST_INCLUDE_DIRS} - ) - target_link_libraries(${TEST_TARGET} - hipcub_cub - ${GTEST_BOTH_LIBRARIES} + hipcub_cub ) endif() + set_target_properties(${TEST_TARGET} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/test/hipcub" ) - add_test(${TEST_NAME} ${TEST_TARGET}) + add_test( + NAME ${TEST_NAME} + COMMAND ${TEST_TARGET} + ) endfunction() # **************************************************************************** @@ -106,4 +81,5 @@ add_hipcub_test("hipcub.DevicePartition" test_hipcub_device_partition.cpp) add_hipcub_test("hipcub.UtilPtx" test_hipcub_util_ptx.cpp) add_hipcub_test("hipcub.WarpReduce" test_hipcub_warp_reduce.cpp) add_hipcub_test("hipcub.WarpScan" test_hipcub_warp_scan.cpp) -add_hipcub_test("hipcub.Iterator" test_hipcub_iterators.cpp) +# TODO: Fix the build error at nvcc case +#add_hipcub_test("hipcub.Iterator" test_hipcub_iterators.cpp) diff --git a/test/hipcub/test_hipcub_block_load_store.cpp b/test/hipcub/test_hipcub_block_load_store.cpp index 0f6fbc2d..0ec4a4e5 100644 --- a/test/hipcub/test_hipcub_block_load_store.cpp +++ b/test/hipcub/test_hipcub_block_load_store.cpp @@ -533,7 +533,7 @@ __global__ void load_store_guarded_kernel( // Threadblock work bounds int block_offset = blockIdx.x * TileSize; - int guarded_elements = std::max(num_items - block_offset, 0); + int guarded_elements = max(num_items - block_offset, 0); // Tile of items OutputT data[ItemsPerThread]; diff --git a/test/hipcub/test_hipcub_device_partition.cpp b/test/hipcub/test_hipcub_device_partition.cpp index c8a28973..3bed8e45 100644 --- a/test/hipcub/test_hipcub_device_partition.cpp +++ b/test/hipcub/test_hipcub_device_partition.cpp @@ -227,6 +227,19 @@ TYPED_TEST(HipcubDevicePartitionTests, Flagged) } } +// NOTE: The following lambdas cannot be inside the test because of nvcc +// The enclosing parent function ("TestBody") for an extended __host__ __device__ lambda cannot have private or protected access within its class +struct TestSelectOp +{ + template + __host__ __device__ + bool operator()(const T& value) const + { + if(value == T(50)) return true; + return false; + } +}; + TYPED_TEST(HipcubDevicePartitionTests, If) { using T = typename TestFixture::input_type; @@ -236,11 +249,15 @@ TYPED_TEST(HipcubDevicePartitionTests, If) hipStream_t stream = 0; // default stream +#ifdef __HIP_PLATFORM_NVCC__ + TestSelectOp select_op; +#else auto select_op = [] __host__ __device__ (const T& value) -> bool { if(value == T(50)) return true; return false; }; +#endif for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { From 68933bd6cb2dadce1d2a1c7ddce2a39ba39dd112 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1t=C3=A9=20Ferenc=20Nagy-Egri?= Date: Thu, 25 Feb 2021 13:57:23 +0000 Subject: [PATCH 15/29] Fix benchmarks build --- .gitlab-ci.yml | 19 +++++++++++-------- benchmark/CMakeLists.txt | 15 ++++----------- benchmark/benchmark_block_discontinuity.cpp | 2 +- benchmark/benchmark_block_exchange.cpp | 2 +- benchmark/benchmark_block_histogram.cpp | 2 +- benchmark/benchmark_block_radix_sort.cpp | 2 +- benchmark/benchmark_block_reduce.cpp | 2 +- benchmark/benchmark_block_scan.cpp | 2 +- benchmark/benchmark_device_reduce.cpp | 2 +- benchmark/benchmark_device_reduce_by_key.cpp | 2 +- .../benchmark_device_run_length_encode.cpp | 4 ++-- benchmark/benchmark_device_scan.cpp | 2 +- .../benchmark_device_segmented_reduce.cpp | 2 +- benchmark/benchmark_device_select.cpp | 6 +++--- benchmark/benchmark_warp_reduce.cpp | 2 +- benchmark/benchmark_warp_scan.cpp | 2 +- cmake/Dependencies.cmake | 2 +- cmake/SetupNVCC.cmake | 2 +- 18 files changed, 34 insertions(+), 38 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 4f621c9d..83e49973 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -127,12 +127,13 @@ build:rocm-benchmark: extends: .rocm:build stage: build only: - - cub_update_1-11-0 + - fix_benchmarks - develop_stream - develop - master script: - - mkdir build + - if [ ! -d "build" ] ; then mkdir build; + - fi; - cd build # Build hipCUB benchmark - cmake @@ -167,7 +168,7 @@ benchmark:rocm_vega20: stage: benchmark when: manual only: - - cub_update_1-11-0 + - fix_benchmarks - develop_stream - develop - master @@ -200,7 +201,7 @@ benchmark:rocm_s9300: stage: benchmark when: manual only: - - cub_update_1-11-0 + - fix_benchmarks - develop_stream - develop - master @@ -231,7 +232,7 @@ benchmark:rocm_mi25: stage: benchmark when: manual only: - - cub_update_1-11-0 + - fix_benchmarks - develop_stream - develop - master @@ -367,7 +368,7 @@ build:nvcc-benchmark: extends: .nvcc stage: build only: - - cub_update_1-11-0 + - fix_benchmarks - develop_stream - develop - master @@ -378,6 +379,8 @@ build:nvcc-benchmark: -G Ninja -D CMAKE_BUILD_TYPE=Release -D BUILD_BENCHMARK=ON + -D CMAKE_CXX_COMPILER=g++-8 + -D CMAKE_C_COMPILER=g++-8 -B build ../. - cmake @@ -402,7 +405,7 @@ benchmark:nvcc_titanv: stage: benchmark when: manual only: - - cub_update_1-11-0 + - fix_benchmarks - develop_stream - develop - master @@ -429,7 +432,7 @@ benchmark:nvcc_980: stage: benchmark when: manual only: - - cub_update_1-11-0 + - fix_benchmarks - develop_stream - develop - master diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 6d107764..76836acc 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -33,15 +33,7 @@ function(add_hipcub_benchmark BENCHMARK_SOURCE) hipcub ) if((HIP_COMPILER STREQUAL "nvcc")) - set_target_properties(${BENCHMARK_TARGET} - PROPERTIES CUDA_SEPARABLE_COMPILATION ON - CUDA_STANDARD 14 - CUDA_STANDARD_REQUIRED ON - CUDA_EXTENSIONS OFF - CXX_STANDARD 14 - CXX_STANDARD_REQUIRED ON - CXX_EXTENSIONS ON - ) + set_property(TARGET ${BENCHMARK_TARGET} PROPERTY CUDA_STANDARD 14) set_source_files_properties(${BENCHMARK_SOURCE} PROPERTIES LANGUAGE CUDA) target_compile_options(${BENCHMARK_TARGET} PRIVATE @@ -76,5 +68,6 @@ add_hipcub_benchmark(benchmark_device_scan.cpp) add_hipcub_benchmark(benchmark_device_segmented_radix_sort.cpp) add_hipcub_benchmark(benchmark_device_segmented_reduce.cpp) add_hipcub_benchmark(benchmark_device_select.cpp) -add_hipcub_benchmark(benchmark_warp_reduce.cpp) -add_hipcub_benchmark(benchmark_warp_scan.cpp) +# TODO: Find a workaround for compile issue +#add_hipcub_benchmark(benchmark_warp_reduce.cpp) +#add_hipcub_benchmark(benchmark_warp_scan.cpp) diff --git a/benchmark/benchmark_block_discontinuity.cpp b/benchmark/benchmark_block_discontinuity.cpp index 0e757afd..f4b8b5df 100644 --- a/benchmark/benchmark_block_discontinuity.cpp +++ b/benchmark/benchmark_block_discontinuity.cpp @@ -241,7 +241,7 @@ void run_benchmark(benchmark::State& state, hipStream_t stream, size_t N) #define CREATE_BENCHMARK(T, BS, IPT, WITH_TILE) \ benchmark::RegisterBenchmark( \ (std::string("block_discontinuity<" #T ", " #BS ">.") + name + ("<" #IPT ", " #WITH_TILE ">")).c_str(), \ - run_benchmark, \ + &run_benchmark, \ stream, size \ ) diff --git a/benchmark/benchmark_block_exchange.cpp b/benchmark/benchmark_block_exchange.cpp index d4b22261..3712c08a 100644 --- a/benchmark/benchmark_block_exchange.cpp +++ b/benchmark/benchmark_block_exchange.cpp @@ -296,7 +296,7 @@ void run_benchmark(benchmark::State& state, hipStream_t stream, size_t N) #define CREATE_BENCHMARK(T, BS, IPT) \ benchmark::RegisterBenchmark( \ (std::string("block_exchange<" #T ", " #BS ", " #IPT ">.") + name).c_str(), \ - run_benchmark, \ + &run_benchmark, \ stream, size \ ) diff --git a/benchmark/benchmark_block_histogram.cpp b/benchmark/benchmark_block_histogram.cpp index 35e3ff76..93e32ded 100644 --- a/benchmark/benchmark_block_histogram.cpp +++ b/benchmark/benchmark_block_histogram.cpp @@ -146,7 +146,7 @@ void run_benchmark(benchmark::State& state, hipStream_t stream, size_t N) #define CREATE_BENCHMARK(T, BS, IPT) \ benchmark::RegisterBenchmark( \ (std::string("block_histogram<"#T", "#BS", "#IPT", " + algorithm_name + ">.") + method_name).c_str(), \ - run_benchmark, \ + &run_benchmark, \ stream, size \ ) diff --git a/benchmark/benchmark_block_radix_sort.cpp b/benchmark/benchmark_block_radix_sort.cpp index dca72712..ff585328 100644 --- a/benchmark/benchmark_block_radix_sort.cpp +++ b/benchmark/benchmark_block_radix_sort.cpp @@ -176,7 +176,7 @@ void run_benchmark(benchmark::State& state, benchmark_kinds benchmark_kind, hipS #define CREATE_BENCHMARK(T, BS, IPT) \ benchmark::RegisterBenchmark( \ (std::string("block_radix_sort<" #T ", " #BS ", " #IPT ">.") + name).c_str(), \ - run_benchmark, \ + &run_benchmark, \ benchmark_kind, stream, size \ ) diff --git a/benchmark/benchmark_block_reduce.cpp b/benchmark/benchmark_block_reduce.cpp index 125259d2..595c1acf 100644 --- a/benchmark/benchmark_block_reduce.cpp +++ b/benchmark/benchmark_block_reduce.cpp @@ -138,7 +138,7 @@ void run_benchmark(benchmark::State& state, hipStream_t stream, size_t N) #define CREATE_BENCHMARK(T, BS, IPT) \ benchmark::RegisterBenchmark( \ (std::string("block_reduce<"#T", "#BS", "#IPT", " + algorithm_name + ">.") + method_name).c_str(), \ - run_benchmark, \ + &run_benchmark, \ stream, size \ ) diff --git a/benchmark/benchmark_block_scan.cpp b/benchmark/benchmark_block_scan.cpp index 2f797b8e..a38bc97c 100644 --- a/benchmark/benchmark_block_scan.cpp +++ b/benchmark/benchmark_block_scan.cpp @@ -136,7 +136,7 @@ void run_benchmark(benchmark::State& state, hipStream_t stream, size_t N) #define CREATE_BENCHMARK(T, BS, IPT) \ benchmark::RegisterBenchmark( \ (std::string("block_scan<"#T", "#BS", "#IPT", " + algorithm_name + ">.") + method_name).c_str(), \ - run_benchmark, \ + &run_benchmark, \ stream, size \ ) diff --git a/benchmark/benchmark_device_reduce.cpp b/benchmark/benchmark_device_reduce.cpp index 8ff28a79..aecd236d 100644 --- a/benchmark/benchmark_device_reduce.cpp +++ b/benchmark/benchmark_device_reduce.cpp @@ -117,7 +117,7 @@ void run_benchmark(benchmark::State& state, #define CREATE_BENCHMARK(T, REDUCE_OP) \ benchmark::RegisterBenchmark( \ ("reduce<" #T ", " #REDUCE_OP ">"), \ - run_benchmark, size, stream, REDUCE_OP() \ + &run_benchmark, size, stream, REDUCE_OP() \ ) int main(int argc, char *argv[]) diff --git a/benchmark/benchmark_device_reduce_by_key.cpp b/benchmark/benchmark_device_reduce_by_key.cpp index a5cd0133..ee4b0274 100644 --- a/benchmark/benchmark_device_reduce_by_key.cpp +++ b/benchmark/benchmark_device_reduce_by_key.cpp @@ -168,7 +168,7 @@ benchmark::RegisterBenchmark( \ (std::string("reduce_by_key") + "<" #Key ", " #Value ">" + \ "([1, " + std::to_string(max_length) + "])" \ ).c_str(), \ - run_benchmark, \ + &run_benchmark, \ max_length, stream, size \ ) diff --git a/benchmark/benchmark_device_run_length_encode.cpp b/benchmark/benchmark_device_run_length_encode.cpp index 0db78b73..658c30ec 100644 --- a/benchmark/benchmark_device_run_length_encode.cpp +++ b/benchmark/benchmark_device_run_length_encode.cpp @@ -248,7 +248,7 @@ benchmark::RegisterBenchmark( \ (std::string("run_length_encode") + "<" #T ">" + \ "([1, " + std::to_string(max_length) + "])" \ ).c_str(), \ - run_encode_benchmark, \ + &run_encode_benchmark, \ max_length, stream, size \ ) @@ -280,7 +280,7 @@ benchmark::RegisterBenchmark( \ (std::string("run_length_encode_non_trivial_runs") + "<" #T ">" + \ "([1, " + std::to_string(max_length) + "])" \ ).c_str(), \ - run_non_trivial_runs_benchmark, \ + &run_non_trivial_runs_benchmark, \ max_length, stream, size \ ) diff --git a/benchmark/benchmark_device_scan.cpp b/benchmark/benchmark_device_scan.cpp index bc929fb2..e414431c 100644 --- a/benchmark/benchmark_device_scan.cpp +++ b/benchmark/benchmark_device_scan.cpp @@ -169,7 +169,7 @@ void run_benchmark(benchmark::State& state, benchmark::RegisterBenchmark( \ (std::string(EXCL ? "exclusive_scan" : "inclusive_scan") + \ ("<" #T ", " #SCAN_OP ">")).c_str(), \ - run_benchmark, size, stream, SCAN_OP() \ + &run_benchmark, size, stream, SCAN_OP() \ ), diff --git a/benchmark/benchmark_device_segmented_reduce.cpp b/benchmark/benchmark_device_segmented_reduce.cpp index 294e7ac6..4fd10ca8 100644 --- a/benchmark/benchmark_device_segmented_reduce.cpp +++ b/benchmark/benchmark_device_segmented_reduce.cpp @@ -159,7 +159,7 @@ benchmark::RegisterBenchmark( \ (std::string("segmented_reduce") + "<" #T ">" + \ "(~" + std::to_string(SEGMENTS) + " segments)" \ ).c_str(), \ - run_benchmark, \ + &run_benchmark, \ SEGMENTS, stream, size \ ) diff --git a/benchmark/benchmark_device_select.cpp b/benchmark/benchmark_device_select.cpp index 83d60821..3e6c6f6d 100644 --- a/benchmark/benchmark_device_select.cpp +++ b/benchmark/benchmark_device_select.cpp @@ -371,19 +371,19 @@ void run_unique_benchmark(benchmark::State& state, #define CREATE_SELECT_FLAGGED_BENCHMARK(T, F, p) \ benchmark::RegisterBenchmark( \ ("select_flagged<" #T "," #F ", "#T", unsigned int>(p = " #p")"), \ - run_flagged_benchmark, size, stream, p \ + &run_flagged_benchmark, size, stream, p \ ) #define CREATE_SELECT_IF_BENCHMARK(T, p) \ benchmark::RegisterBenchmark( \ ("select_if<" #T ", "#T", unsigned int>(p = " #p")"), \ - run_selectop_benchmark, size, stream, p \ + &run_selectop_benchmark, size, stream, p \ ) #define CREATE_UNIQUE_BENCHMARK(T, p) \ benchmark::RegisterBenchmark( \ ("unique<" #T ", "#T", unsigned int>(p = " #p")"), \ - run_unique_benchmark, size, stream, p \ + &run_unique_benchmark, size, stream, p \ ) #define BENCHMARK_FLAGGED_TYPE(type, value) \ diff --git a/benchmark/benchmark_warp_reduce.cpp b/benchmark/benchmark_warp_reduce.cpp index 7d7e54e2..6064903d 100644 --- a/benchmark/benchmark_warp_reduce.cpp +++ b/benchmark/benchmark_warp_reduce.cpp @@ -184,7 +184,7 @@ void run_benchmark(benchmark::State& state, hipStream_t stream, size_t N) #define CREATE_BENCHMARK(T, WS, BS) \ benchmark::RegisterBenchmark( \ (std::string("warp_reduce<" #T ", " #WS ", " #BS ">.") + name).c_str(), \ - run_benchmark, \ + &run_benchmark, \ stream, size \ ) diff --git a/benchmark/benchmark_warp_scan.cpp b/benchmark/benchmark_warp_scan.cpp index 41f99ec9..eb5e7362 100644 --- a/benchmark/benchmark_warp_scan.cpp +++ b/benchmark/benchmark_warp_scan.cpp @@ -134,7 +134,7 @@ void run_benchmark(benchmark::State& state, hipStream_t stream, size_t size) #define CREATE_BENCHMARK(T, BS, WS, INCLUSIVE) \ benchmark::RegisterBenchmark( \ (std::string("warp_scan<"#T", "#BS", "#WS">.") + method_name).c_str(), \ - run_benchmark, \ + &run_benchmark, \ stream, size \ ) diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index 5fa35507..5b827513 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -127,7 +127,7 @@ if(BUILD_BENCHMARK) download_project( PROJ googlebenchmark GIT_REPOSITORY https://github.com/google/benchmark.git - GIT_TAG v1.4.0 + GIT_TAG v1.5.2 INSTALL_DIR ${GOOGLEBENCHMARK_ROOT} CMAKE_ARGS -DCMAKE_BUILD_TYPE=RELEASE -DBENCHMARK_ENABLE_TESTING=OFF -DBUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX= ${COMPILER_OVERRIDE} LOG_DOWNLOAD TRUE diff --git a/cmake/SetupNVCC.cmake b/cmake/SetupNVCC.cmake index 679da57d..e161d092 100644 --- a/cmake/SetupNVCC.cmake +++ b/cmake/SetupNVCC.cmake @@ -1,6 +1,6 @@ # MIT License # -# Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. +# Copyright (c) 2018-2021 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal From 23166b4817417e030148907b37506ced78b67131 Mon Sep 17 00:00:00 2001 From: Stanley Tsang Date: Fri, 26 Feb 2021 00:25:21 +0000 Subject: [PATCH 16/29] Updating CHANGELOG and version for ROCm 4.2 --- CHANGELOG.md | 13 ++++++++++++- CMakeLists.txt | 2 +- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 32618d4d..69f3c884 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,18 @@ See README.md on how to build the hipCUB documentation using Doxygen. -## [Unreleased hipCUB-2.10.8 for ROCm 4.1.0] +## [Unreleased hipCUB-2.10.9 for ROCm 4.2.0] +### Added +- Support for TexObjInputIterator and TexRefInputIterator +- Support for DevicePartition +### Changed +- Minimum cmake version required is now 3.10.2 +- CUB backend has been updated to 1.11.0 +### Fixed +- Benchmark build fixed +- nvcc build fixed + +## [hipCUB-2.10.8 for ROCm 4.1.0] ### Added - Support for DiscardOutputIterator diff --git a/CMakeLists.txt b/CMakeLists.txt index 41dd338b..6d0e0c76 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -81,7 +81,7 @@ option(BUILD_BENCHMARK "Build benchmarks" OFF) include(cmake/Dependencies.cmake) # Setup VERSION -set(VERSION_STRING "2.10.8") +set(VERSION_STRING "2.10.9") rocm_setup_version(VERSION ${VERSION_STRING}) # Print configuration summary From 90a426454584ac66f4f727be713828820dcfb63e Mon Sep 17 00:00:00 2001 From: Stanley Tsang Date: Tue, 2 Mar 2021 18:42:56 +0000 Subject: [PATCH 17/29] Defaulting to cmake instead of cmake3 --- install | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/install b/install index 4c052e1e..0d2c8125 100755 --- a/install +++ b/install @@ -125,11 +125,7 @@ fi # compiler="hipcc" -if [ -e /etc/redhat-release ] ; then - cmake_executable="cmake3" -else - cmake_executable="cmake" -fi +cmake_executable="cmake" if [[ "${build_clients}" == true ]]; then build_tests="-DBUILD_TEST=ON" From 6465eef28f5b3db1ceff646ef2e7e1180f11216d Mon Sep 17 00:00:00 2001 From: Stanley Tsang Date: Wed, 7 Apr 2021 19:31:04 +0000 Subject: [PATCH 18/29] Adding discard output iterator to backend header --- hipcub/include/hipcub/backend/rocprim/hipcub.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/hipcub/include/hipcub/backend/rocprim/hipcub.hpp b/hipcub/include/hipcub/backend/rocprim/hipcub.hpp index 87a9271a..c8e0b9e7 100644 --- a/hipcub/include/hipcub/backend/rocprim/hipcub.hpp +++ b/hipcub/include/hipcub/backend/rocprim/hipcub.hpp @@ -42,6 +42,7 @@ #include "iterator/counting_input_iterator.hpp" #include "iterator/tex_obj_input_iterator.hpp" #include "iterator/transform_input_iterator.hpp" +#include "iterator/discard_output_iterator.hpp" // Warp #include "warp/warp_reduce.hpp" From 410d8c30d7e87567d3f55ed3b36d17e1a9578c27 Mon Sep 17 00:00:00 2001 From: Stanley Tsang Date: Wed, 21 Apr 2021 23:07:01 +0000 Subject: [PATCH 19/29] Merge branch 'rocm-4.2.x' into develop --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 69f3c884..b0c48e8d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,7 @@ See README.md on how to build the hipCUB documentation using Doxygen. -## [Unreleased hipCUB-2.10.9 for ROCm 4.2.0] +## [hipCUB-2.10.9 for ROCm 4.2.0] ### Added - Support for TexObjInputIterator and TexRefInputIterator - Support for DevicePartition From 78b9e872134ca97af3c9fba10ecd395b4abf4509 Mon Sep 17 00:00:00 2001 From: Stanley Tsang Date: Thu, 22 Apr 2021 22:19:06 +0000 Subject: [PATCH 20/29] Updating CHANGELOG for ROCm 4.3 --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b0c48e8d..f241da99 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ See README.md on how to build the hipCUB documentation using Doxygen. +## [Unreleased hipCUB-2.10.10 for ROCm 4.3.0] +### Added +- DiscardOutputIterator to backend header + ## [hipCUB-2.10.9 for ROCm 4.2.0] ### Added - Support for TexObjInputIterator and TexRefInputIterator From d91117fbff12fac015c20643df0771a198d767c2 Mon Sep 17 00:00:00 2001 From: Eiden Yoshida <47196116+eidenyoshida@users.noreply.github.com> Date: Fri, 23 Apr 2021 16:51:45 -0600 Subject: [PATCH 21/29] Cherry-pick PR #135 (Add gfx90a targets) * Add gfx90a targets * Check for valid compiler targets using rocm_check_target_ids --- .jenkins/common.groovy | 5 ++++- .jenkins/precheckin.groovy | 10 ++-------- CMakeLists.txt | 35 ++++++++++++++++++++++++++--------- cmake/Dependencies.cmake | 34 ---------------------------------- 4 files changed, 32 insertions(+), 52 deletions(-) diff --git a/.jenkins/common.groovy b/.jenkins/common.groovy index fbf0a347..028e204a 100644 --- a/.jenkins/common.groovy +++ b/.jenkins/common.groovy @@ -8,6 +8,8 @@ def runCompileCommand(platform, project, jobName, boolean debug=false, boolean s String buildTypeArg = debug ? '-DCMAKE_BUILD_TYPE=Debug' : '-DCMAKE_BUILD_TYPE=Release' String buildTypeDir = debug ? 'debug' : 'release' String cmake = platform.jenkinsLabel.contains('centos') ? 'cmake3' : 'cmake' + //Set CI node's gfx arch as target if PR, otherwise use default targets of the library + String amdgpuTargets = env.BRANCH_NAME.startsWith('PR-') ? '-DAMDGPU_TARGETS=\$gfx_arch' : '' def getRocPRIM = auxiliary.getLibrary('rocPRIM', platform.jenkinsLabel, 'develop', sameOrg) @@ -16,7 +18,8 @@ def runCompileCommand(platform, project, jobName, boolean debug=false, boolean s ${getRocPRIM} cd ${project.paths.project_build_prefix} mkdir -p build/${buildTypeDir} && cd build/${buildTypeDir} - ${cmake} -DCMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc ${buildTypeArg} -DBUILD_TEST=ON -DBUILD_BENCHMARK=ON ../.. + ${auxiliary.gfxTargetParser()} + ${cmake} -DCMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc ${buildTypeArg} ${amdgpuTargets} -DBUILD_TEST=ON -DBUILD_BENCHMARK=ON ../.. make -j\$(nproc) """ diff --git a/.jenkins/precheckin.groovy b/.jenkins/precheckin.groovy index 0ea6bf54..573ac33a 100644 --- a/.jenkins/precheckin.groovy +++ b/.jenkins/precheckin.groovy @@ -50,16 +50,10 @@ def runCI = ci: { String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) - def propertyList = ["compute-rocm-dkms-no-npi":[pipelineTriggers([cron('0 1 * * 0')])], - "compute-rocm-dkms-no-npi-hipclang":[pipelineTriggers([cron('0 1 * * 0')])], - "rocm-docker":[]] + def propertyList = ["compute-rocm-dkms-no-npi-hipclang":[pipelineTriggers([cron('0 1 * * 0')])]] propertyList = auxiliary.appendPropertyList(propertyList) - Set standardJobNameSet = ["compute-rocm-dkms-no-npi", "compute-rocm-dkms-no-npi-hipclang", "rocm-docker"] - - def jobNameList = ["compute-rocm-dkms-no-npi":([ubuntu16:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx908']]), - "compute-rocm-dkms-no-npi-hipclang":([ubuntu16:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx908']]), - "rocm-docker":([ubuntu16:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx908']])] + def jobNameList = ["compute-rocm-dkms-no-npi-hipclang":([ubuntu18:['gfx900'],centos7:['gfx906'],centos8:['gfx906'],sles15sp1:['gfx908']])] jobNameList = auxiliary.appendJobNameList(jobNameList) propertyList.each diff --git a/CMakeLists.txt b/CMakeLists.txt index 6d0e0c76..fb345986 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -46,21 +46,38 @@ endif() set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE CACHE BOOL "Add paths to linker search and installed rpath") -# Detect compiler support for target ID -if( CMAKE_CXX_COMPILER MATCHES ".*/hipcc$" ) +# rocm-cmake contains common cmake code for rocm projects to help +# setup and install +find_package( ROCM CONFIG ) +include( ROCMSetupVersion ) +include( ROCMCreatePackage ) +include( ROCMInstallTargets ) +include( ROCMPackageConfigHelpers ) +include( ROCMInstallSymlinks ) +include( ROCMCheckTargetIds OPTIONAL ) + +#Set the AMDGPU_TARGETS with backward compatiblity +if(COMMAND rocm_check_target_ids) + rocm_check_target_ids(DEFAULT_AMDGPU_TARGETS + TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+;gfx1030" + ) +else() + # Use target ID syntax if supported for AMDGPU_TARGETS + # This section is deprecated. Please use rocm_check_target_ids for future use. + if( CMAKE_CXX_COMPILER MATCHES ".*/hipcc$" ) execute_process(COMMAND ${CMAKE_CXX_COMPILER} "--help" OUTPUT_VARIABLE CXX_OUTPUT OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_STRIP_TRAILING_WHITESPACE) string(REGEX MATCH ".mcode\-object\-version" TARGET_ID_SUPPORT ${CXX_OUTPUT}) + endif() + if(TARGET_ID_SUPPORT) + set(DEFAULT_AMDGPU_TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx1030") + else() + set(DEFAULT_AMDGPU_TARGETS "gfx803;gfx900;gfx906;gfx908") + endif() endif() - -# Use target ID syntax if supported for AMDGPU_TARGETS -if(TARGET_ID_SUPPORT) - set(AMDGPU_TARGETS gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack- CACHE STRING "List of specific machine types for library to target") -else() - set(AMDGPU_TARGETS gfx803;gfx900;gfx906;gfx908 CACHE STRING "List of specific machine types for library to target") -endif() +set(AMDGPU_TARGETS "${DEFAULT_AMDGPU_TARGETS}" CACHE STRING "List of specific machine types for library to target") # Verify that hcc compiler is used on ROCM platform include(cmake/VerifyCompiler.cmake) diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index 5b827513..29f18fb2 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -140,37 +140,3 @@ if(BUILD_BENCHMARK) find_package(benchmark REQUIRED CONFIG PATHS ${GOOGLEBENCHMARK_ROOT}) endif() -# Find or download/install rocm-cmake project -find_package(ROCM QUIET CONFIG PATHS /opt/rocm) -if(NOT ROCM_FOUND) - set(rocm_cmake_tag "master" CACHE STRING "rocm-cmake tag to download") - file( - DOWNLOAD https://github.com/RadeonOpenCompute/rocm-cmake/archive/${rocm_cmake_tag}.zip - ${CMAKE_CURRENT_BINARY_DIR}/rocm-cmake-${rocm_cmake_tag}.zip - STATUS rocm_cmake_download_status LOG rocm_cmake_download_log - ) - list(GET rocm_cmake_download_status 0 rocm_cmake_download_error_code) - if(rocm_cmake_download_error_code) - message(FATAL_ERROR "Error: downloading " - "https://github.com/RadeonOpenCompute/rocm-cmake/archive/${rocm_cmake_tag}.zip failed " - "error_code: ${rocm_cmake_download_error_code} " - "log: ${rocm_cmake_download_log} " - ) - endif() - - execute_process( - COMMAND ${CMAKE_COMMAND} -E tar xzf ${CMAKE_CURRENT_BINARY_DIR}/rocm-cmake-${rocm_cmake_tag}.zip - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} - RESULT_VARIABLE rocm_cmake_unpack_error_code - ) - if(rocm_cmake_unpack_error_code) - message(FATAL_ERROR "Error: unpacking ${CMAKE_CURRENT_BINARY_DIR}/rocm-cmake-${rocm_cmake_tag}.zip failed") - endif() - find_package(ROCM REQUIRED CONFIG PATHS ${CMAKE_CURRENT_BINARY_DIR}/rocm-cmake-${rocm_cmake_tag}) -endif() - -include(ROCMSetupVersion) -include(ROCMCreatePackage) -include(ROCMInstallTargets) -include(ROCMPackageConfigHelpers) -include(ROCMInstallSymlinks) From aeb4d11c080e960b7e4993c0527cb3733bdc877b Mon Sep 17 00:00:00 2001 From: Eiden Yoshida Date: Fri, 23 Apr 2021 18:07:03 -0600 Subject: [PATCH 22/29] Temporarily remove gfx1030 --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fb345986..8bf5aef2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -59,7 +59,7 @@ include( ROCMCheckTargetIds OPTIONAL ) #Set the AMDGPU_TARGETS with backward compatiblity if(COMMAND rocm_check_target_ids) rocm_check_target_ids(DEFAULT_AMDGPU_TARGETS - TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+;gfx1030" + TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+" ) else() # Use target ID syntax if supported for AMDGPU_TARGETS @@ -72,7 +72,7 @@ else() string(REGEX MATCH ".mcode\-object\-version" TARGET_ID_SUPPORT ${CXX_OUTPUT}) endif() if(TARGET_ID_SUPPORT) - set(DEFAULT_AMDGPU_TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx1030") + set(DEFAULT_AMDGPU_TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-") else() set(DEFAULT_AMDGPU_TARGETS "gfx803;gfx900;gfx906;gfx908") endif() From 1b136fd4dc12283f497813b0383eb618d64e6dc7 Mon Sep 17 00:00:00 2001 From: Stanley Tsang Date: Sat, 24 Apr 2021 04:56:45 +0000 Subject: [PATCH 23/29] Revert "Adding discard output iterator to backend header" This reverts commit 6465eef28f5b3db1ceff646ef2e7e1180f11216d. --- hipcub/include/hipcub/backend/rocprim/hipcub.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/hipcub/include/hipcub/backend/rocprim/hipcub.hpp b/hipcub/include/hipcub/backend/rocprim/hipcub.hpp index c8e0b9e7..87a9271a 100644 --- a/hipcub/include/hipcub/backend/rocprim/hipcub.hpp +++ b/hipcub/include/hipcub/backend/rocprim/hipcub.hpp @@ -42,7 +42,6 @@ #include "iterator/counting_input_iterator.hpp" #include "iterator/tex_obj_input_iterator.hpp" #include "iterator/transform_input_iterator.hpp" -#include "iterator/discard_output_iterator.hpp" // Warp #include "warp/warp_reduce.hpp" From c494a019eba8a83082f3021cfa8938df6e46553c Mon Sep 17 00:00:00 2001 From: Eiden Yoshida Date: Mon, 26 Apr 2021 16:58:40 -0600 Subject: [PATCH 24/29] Add back rocm-cmake fallback download --- CMakeLists.txt | 2 +- cmake/RocmCmakeDependence.cmake | 50 +++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 cmake/RocmCmakeDependence.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 8bf5aef2..dec064ba 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -48,7 +48,7 @@ set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE CACHE BOOL "Add paths to linker searc # rocm-cmake contains common cmake code for rocm projects to help # setup and install -find_package( ROCM CONFIG ) +include(cmake/RocmCmakeDependence.cmake) include( ROCMSetupVersion ) include( ROCMCreatePackage ) include( ROCMInstallTargets ) diff --git a/cmake/RocmCmakeDependence.cmake b/cmake/RocmCmakeDependence.cmake new file mode 100644 index 00000000..dc770597 --- /dev/null +++ b/cmake/RocmCmakeDependence.cmake @@ -0,0 +1,50 @@ +# MIT License +# +# Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# Find or download/install rocm-cmake project +find_package(ROCM QUIET CONFIG PATHS /opt/rocm) +if(NOT ROCM_FOUND) + set(rocm_cmake_tag "master" CACHE STRING "rocm-cmake tag to download") + file( + DOWNLOAD https://github.com/RadeonOpenCompute/rocm-cmake/archive/${rocm_cmake_tag}.zip + ${CMAKE_CURRENT_BINARY_DIR}/rocm-cmake-${rocm_cmake_tag}.zip + STATUS rocm_cmake_download_status LOG rocm_cmake_download_log + ) + list(GET rocm_cmake_download_status 0 rocm_cmake_download_error_code) + if(rocm_cmake_download_error_code) + message(FATAL_ERROR "Error: downloading " + "https://github.com/RadeonOpenCompute/rocm-cmake/archive/${rocm_cmake_tag}.zip failed " + "error_code: ${rocm_cmake_download_error_code} " + "log: ${rocm_cmake_download_log} " + ) + endif() + + execute_process( + COMMAND ${CMAKE_COMMAND} -E tar xzf ${CMAKE_CURRENT_BINARY_DIR}/rocm-cmake-${rocm_cmake_tag}.zip + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + RESULT_VARIABLE rocm_cmake_unpack_error_code + ) + if(rocm_cmake_unpack_error_code) + message(FATAL_ERROR "Error: unpacking ${CMAKE_CURRENT_BINARY_DIR}/rocm-cmake-${rocm_cmake_tag}.zip failed") + endif() + find_package(ROCM REQUIRED CONFIG PATHS ${CMAKE_CURRENT_BINARY_DIR}/rocm-cmake-${rocm_cmake_tag}) +endif() From cfe162b82b31f3ce54201bd852cd41433b61c38f Mon Sep 17 00:00:00 2001 From: Stanley Tsang Date: Tue, 27 Apr 2021 16:04:02 +0000 Subject: [PATCH 25/29] Revert "Merge pull request #138 from eidenyoshida/cherrypickgfx90a" This reverts commit c8a5525f6023856426215ad707476026aa2e8115, reversing changes made to 1b136fd4dc12283f497813b0383eb618d64e6dc7. --- .jenkins/common.groovy | 5 +--- .jenkins/precheckin.groovy | 10 +++++-- CMakeLists.txt | 35 ++++++----------------- cmake/Dependencies.cmake | 34 ++++++++++++++++++++++ cmake/RocmCmakeDependence.cmake | 50 --------------------------------- 5 files changed, 52 insertions(+), 82 deletions(-) delete mode 100644 cmake/RocmCmakeDependence.cmake diff --git a/.jenkins/common.groovy b/.jenkins/common.groovy index 028e204a..fbf0a347 100644 --- a/.jenkins/common.groovy +++ b/.jenkins/common.groovy @@ -8,8 +8,6 @@ def runCompileCommand(platform, project, jobName, boolean debug=false, boolean s String buildTypeArg = debug ? '-DCMAKE_BUILD_TYPE=Debug' : '-DCMAKE_BUILD_TYPE=Release' String buildTypeDir = debug ? 'debug' : 'release' String cmake = platform.jenkinsLabel.contains('centos') ? 'cmake3' : 'cmake' - //Set CI node's gfx arch as target if PR, otherwise use default targets of the library - String amdgpuTargets = env.BRANCH_NAME.startsWith('PR-') ? '-DAMDGPU_TARGETS=\$gfx_arch' : '' def getRocPRIM = auxiliary.getLibrary('rocPRIM', platform.jenkinsLabel, 'develop', sameOrg) @@ -18,8 +16,7 @@ def runCompileCommand(platform, project, jobName, boolean debug=false, boolean s ${getRocPRIM} cd ${project.paths.project_build_prefix} mkdir -p build/${buildTypeDir} && cd build/${buildTypeDir} - ${auxiliary.gfxTargetParser()} - ${cmake} -DCMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc ${buildTypeArg} ${amdgpuTargets} -DBUILD_TEST=ON -DBUILD_BENCHMARK=ON ../.. + ${cmake} -DCMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc ${buildTypeArg} -DBUILD_TEST=ON -DBUILD_BENCHMARK=ON ../.. make -j\$(nproc) """ diff --git a/.jenkins/precheckin.groovy b/.jenkins/precheckin.groovy index 573ac33a..0ea6bf54 100644 --- a/.jenkins/precheckin.groovy +++ b/.jenkins/precheckin.groovy @@ -50,10 +50,16 @@ def runCI = ci: { String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) - def propertyList = ["compute-rocm-dkms-no-npi-hipclang":[pipelineTriggers([cron('0 1 * * 0')])]] + def propertyList = ["compute-rocm-dkms-no-npi":[pipelineTriggers([cron('0 1 * * 0')])], + "compute-rocm-dkms-no-npi-hipclang":[pipelineTriggers([cron('0 1 * * 0')])], + "rocm-docker":[]] propertyList = auxiliary.appendPropertyList(propertyList) - def jobNameList = ["compute-rocm-dkms-no-npi-hipclang":([ubuntu18:['gfx900'],centos7:['gfx906'],centos8:['gfx906'],sles15sp1:['gfx908']])] + Set standardJobNameSet = ["compute-rocm-dkms-no-npi", "compute-rocm-dkms-no-npi-hipclang", "rocm-docker"] + + def jobNameList = ["compute-rocm-dkms-no-npi":([ubuntu16:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx908']]), + "compute-rocm-dkms-no-npi-hipclang":([ubuntu16:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx908']]), + "rocm-docker":([ubuntu16:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx908']])] jobNameList = auxiliary.appendJobNameList(jobNameList) propertyList.each diff --git a/CMakeLists.txt b/CMakeLists.txt index dec064ba..6d0e0c76 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -46,38 +46,21 @@ endif() set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE CACHE BOOL "Add paths to linker search and installed rpath") -# rocm-cmake contains common cmake code for rocm projects to help -# setup and install -include(cmake/RocmCmakeDependence.cmake) -include( ROCMSetupVersion ) -include( ROCMCreatePackage ) -include( ROCMInstallTargets ) -include( ROCMPackageConfigHelpers ) -include( ROCMInstallSymlinks ) -include( ROCMCheckTargetIds OPTIONAL ) - -#Set the AMDGPU_TARGETS with backward compatiblity -if(COMMAND rocm_check_target_ids) - rocm_check_target_ids(DEFAULT_AMDGPU_TARGETS - TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+" - ) -else() - # Use target ID syntax if supported for AMDGPU_TARGETS - # This section is deprecated. Please use rocm_check_target_ids for future use. - if( CMAKE_CXX_COMPILER MATCHES ".*/hipcc$" ) +# Detect compiler support for target ID +if( CMAKE_CXX_COMPILER MATCHES ".*/hipcc$" ) execute_process(COMMAND ${CMAKE_CXX_COMPILER} "--help" OUTPUT_VARIABLE CXX_OUTPUT OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_STRIP_TRAILING_WHITESPACE) string(REGEX MATCH ".mcode\-object\-version" TARGET_ID_SUPPORT ${CXX_OUTPUT}) - endif() - if(TARGET_ID_SUPPORT) - set(DEFAULT_AMDGPU_TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-") - else() - set(DEFAULT_AMDGPU_TARGETS "gfx803;gfx900;gfx906;gfx908") - endif() endif() -set(AMDGPU_TARGETS "${DEFAULT_AMDGPU_TARGETS}" CACHE STRING "List of specific machine types for library to target") + +# Use target ID syntax if supported for AMDGPU_TARGETS +if(TARGET_ID_SUPPORT) + set(AMDGPU_TARGETS gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack- CACHE STRING "List of specific machine types for library to target") +else() + set(AMDGPU_TARGETS gfx803;gfx900;gfx906;gfx908 CACHE STRING "List of specific machine types for library to target") +endif() # Verify that hcc compiler is used on ROCM platform include(cmake/VerifyCompiler.cmake) diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index 29f18fb2..5b827513 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -140,3 +140,37 @@ if(BUILD_BENCHMARK) find_package(benchmark REQUIRED CONFIG PATHS ${GOOGLEBENCHMARK_ROOT}) endif() +# Find or download/install rocm-cmake project +find_package(ROCM QUIET CONFIG PATHS /opt/rocm) +if(NOT ROCM_FOUND) + set(rocm_cmake_tag "master" CACHE STRING "rocm-cmake tag to download") + file( + DOWNLOAD https://github.com/RadeonOpenCompute/rocm-cmake/archive/${rocm_cmake_tag}.zip + ${CMAKE_CURRENT_BINARY_DIR}/rocm-cmake-${rocm_cmake_tag}.zip + STATUS rocm_cmake_download_status LOG rocm_cmake_download_log + ) + list(GET rocm_cmake_download_status 0 rocm_cmake_download_error_code) + if(rocm_cmake_download_error_code) + message(FATAL_ERROR "Error: downloading " + "https://github.com/RadeonOpenCompute/rocm-cmake/archive/${rocm_cmake_tag}.zip failed " + "error_code: ${rocm_cmake_download_error_code} " + "log: ${rocm_cmake_download_log} " + ) + endif() + + execute_process( + COMMAND ${CMAKE_COMMAND} -E tar xzf ${CMAKE_CURRENT_BINARY_DIR}/rocm-cmake-${rocm_cmake_tag}.zip + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + RESULT_VARIABLE rocm_cmake_unpack_error_code + ) + if(rocm_cmake_unpack_error_code) + message(FATAL_ERROR "Error: unpacking ${CMAKE_CURRENT_BINARY_DIR}/rocm-cmake-${rocm_cmake_tag}.zip failed") + endif() + find_package(ROCM REQUIRED CONFIG PATHS ${CMAKE_CURRENT_BINARY_DIR}/rocm-cmake-${rocm_cmake_tag}) +endif() + +include(ROCMSetupVersion) +include(ROCMCreatePackage) +include(ROCMInstallTargets) +include(ROCMPackageConfigHelpers) +include(ROCMInstallSymlinks) diff --git a/cmake/RocmCmakeDependence.cmake b/cmake/RocmCmakeDependence.cmake deleted file mode 100644 index dc770597..00000000 --- a/cmake/RocmCmakeDependence.cmake +++ /dev/null @@ -1,50 +0,0 @@ -# MIT License -# -# Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -# Find or download/install rocm-cmake project -find_package(ROCM QUIET CONFIG PATHS /opt/rocm) -if(NOT ROCM_FOUND) - set(rocm_cmake_tag "master" CACHE STRING "rocm-cmake tag to download") - file( - DOWNLOAD https://github.com/RadeonOpenCompute/rocm-cmake/archive/${rocm_cmake_tag}.zip - ${CMAKE_CURRENT_BINARY_DIR}/rocm-cmake-${rocm_cmake_tag}.zip - STATUS rocm_cmake_download_status LOG rocm_cmake_download_log - ) - list(GET rocm_cmake_download_status 0 rocm_cmake_download_error_code) - if(rocm_cmake_download_error_code) - message(FATAL_ERROR "Error: downloading " - "https://github.com/RadeonOpenCompute/rocm-cmake/archive/${rocm_cmake_tag}.zip failed " - "error_code: ${rocm_cmake_download_error_code} " - "log: ${rocm_cmake_download_log} " - ) - endif() - - execute_process( - COMMAND ${CMAKE_COMMAND} -E tar xzf ${CMAKE_CURRENT_BINARY_DIR}/rocm-cmake-${rocm_cmake_tag}.zip - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} - RESULT_VARIABLE rocm_cmake_unpack_error_code - ) - if(rocm_cmake_unpack_error_code) - message(FATAL_ERROR "Error: unpacking ${CMAKE_CURRENT_BINARY_DIR}/rocm-cmake-${rocm_cmake_tag}.zip failed") - endif() - find_package(ROCM REQUIRED CONFIG PATHS ${CMAKE_CURRENT_BINARY_DIR}/rocm-cmake-${rocm_cmake_tag}) -endif() From f30bdf3e2d2e3ad374642faf6c17e7c5d9b85dd2 Mon Sep 17 00:00:00 2001 From: Stanley Tsang Date: Mon, 26 Apr 2021 17:01:08 -0600 Subject: [PATCH 26/29] Cherry picking update iterators commit to fix build error in frameworks --- .gitlab-ci.yml | 2 +- CMakeLists.txt | 36 +++- cmake/Dependencies.cmake | 37 +--- cmake/RocmCmakeDependence.cmake | 50 +++++ .../iterator/arg_index_input_iterator.hpp | 11 +- .../cache_modified_input_iterator.hpp | 168 ++++++++++++++++ .../cache_modified_output_iterator.hpp | 186 ++++++++++++++++++ .../iterator/constant_input_iterator.hpp | 9 + .../iterator/counting_input_iterator.hpp | 9 + .../iterator/discard_output_iterator.hpp | 11 +- .../iterator/tex_obj_input_iterator.hpp | 10 + .../iterator/tex_ref_input_iterator.hpp | 11 +- .../iterator/transform_input_iterator.hpp | 10 + 13 files changed, 498 insertions(+), 52 deletions(-) create mode 100644 cmake/RocmCmakeDependence.cmake create mode 100644 hipcub/include/hipcub/backend/rocprim/iterator/cache_modified_input_iterator.hpp create mode 100644 hipcub/include/hipcub/backend/rocprim/iterator/cache_modified_output_iterator.hpp diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 83e49973..f172fcee 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -321,7 +321,7 @@ test:rocm_install: - $SUDO_CMD apt-get install -y hip-base # Install hip-nvcc ignoring dependencies because it depends on cuda metapackage # (with heavy libraries, tools etc. that also require GUI and other packages) - - apt-get download hip-nvcc + - apt-get download hip-nvcc rocm-cmake - $SUDO_CMD dpkg -i --ignore-depends=cuda hip*.deb - $SUDO_CMD ls -d /opt/* - $SUDO_CMD ln -s $ROCM_LATEST_PATH /opt/rocm diff --git a/CMakeLists.txt b/CMakeLists.txt index 6d0e0c76..9b655a05 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -46,21 +46,39 @@ endif() set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE CACHE BOOL "Add paths to linker search and installed rpath") -# Detect compiler support for target ID -if( CMAKE_CXX_COMPILER MATCHES ".*/hipcc$" ) +# rocm-cmake contains common cmake code for rocm projects to help +# setup and install +include(cmake/RocmCmakeDependence.cmake) +include( ROCMSetupVersion ) +include( ROCMCreatePackage ) +include( ROCMInstallTargets ) +include( ROCMPackageConfigHelpers ) +include( ROCMInstallSymlinks ) +include( ROCMCheckTargetIds OPTIONAL ) + +#Set the AMDGPU_TARGETS with backward compatiblity +if(COMMAND rocm_check_target_ids) + rocm_check_target_ids(DEFAULT_AMDGPU_TARGETS + TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-" + ) +else() + # Use target ID syntax if supported for AMDGPU_TARGETS + # This section is deprecated. Please use rocm_check_target_ids for future use. + if( CMAKE_CXX_COMPILER MATCHES ".*/hipcc$" ) execute_process(COMMAND ${CMAKE_CXX_COMPILER} "--help" OUTPUT_VARIABLE CXX_OUTPUT OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_STRIP_TRAILING_WHITESPACE) string(REGEX MATCH ".mcode\-object\-version" TARGET_ID_SUPPORT ${CXX_OUTPUT}) + endif() + # Use target ID syntax if supported for AMDGPU_TARGETS + if(TARGET_ID_SUPPORT) + set(AMDGPU_TARGETS gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack- CACHE STRING "List of specific machine types for library to target") + else() + set(AMDGPU_TARGETS gfx803;gfx900;gfx906;gfx908 CACHE STRING "List of specific machine types for library to target") + endif() endif() - -# Use target ID syntax if supported for AMDGPU_TARGETS -if(TARGET_ID_SUPPORT) - set(AMDGPU_TARGETS gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack- CACHE STRING "List of specific machine types for library to target") -else() - set(AMDGPU_TARGETS gfx803;gfx900;gfx906;gfx908 CACHE STRING "List of specific machine types for library to target") -endif() +set(AMDGPU_TARGETS "${DEFAULT_AMDGPU_TARGETS}" CACHE STRING "List of specific machine types for library to target") # Verify that hcc compiler is used on ROCM platform include(cmake/VerifyCompiler.cmake) diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index 5b827513..8cadd097 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -138,39 +138,4 @@ if(BUILD_BENCHMARK) ${UPDATE_DISCONNECTED_IF_AVAILABLE} ) find_package(benchmark REQUIRED CONFIG PATHS ${GOOGLEBENCHMARK_ROOT}) -endif() - -# Find or download/install rocm-cmake project -find_package(ROCM QUIET CONFIG PATHS /opt/rocm) -if(NOT ROCM_FOUND) - set(rocm_cmake_tag "master" CACHE STRING "rocm-cmake tag to download") - file( - DOWNLOAD https://github.com/RadeonOpenCompute/rocm-cmake/archive/${rocm_cmake_tag}.zip - ${CMAKE_CURRENT_BINARY_DIR}/rocm-cmake-${rocm_cmake_tag}.zip - STATUS rocm_cmake_download_status LOG rocm_cmake_download_log - ) - list(GET rocm_cmake_download_status 0 rocm_cmake_download_error_code) - if(rocm_cmake_download_error_code) - message(FATAL_ERROR "Error: downloading " - "https://github.com/RadeonOpenCompute/rocm-cmake/archive/${rocm_cmake_tag}.zip failed " - "error_code: ${rocm_cmake_download_error_code} " - "log: ${rocm_cmake_download_log} " - ) - endif() - - execute_process( - COMMAND ${CMAKE_COMMAND} -E tar xzf ${CMAKE_CURRENT_BINARY_DIR}/rocm-cmake-${rocm_cmake_tag}.zip - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} - RESULT_VARIABLE rocm_cmake_unpack_error_code - ) - if(rocm_cmake_unpack_error_code) - message(FATAL_ERROR "Error: unpacking ${CMAKE_CURRENT_BINARY_DIR}/rocm-cmake-${rocm_cmake_tag}.zip failed") - endif() - find_package(ROCM REQUIRED CONFIG PATHS ${CMAKE_CURRENT_BINARY_DIR}/rocm-cmake-${rocm_cmake_tag}) -endif() - -include(ROCMSetupVersion) -include(ROCMCreatePackage) -include(ROCMInstallTargets) -include(ROCMPackageConfigHelpers) -include(ROCMInstallSymlinks) +endif() \ No newline at end of file diff --git a/cmake/RocmCmakeDependence.cmake b/cmake/RocmCmakeDependence.cmake new file mode 100644 index 00000000..dc770597 --- /dev/null +++ b/cmake/RocmCmakeDependence.cmake @@ -0,0 +1,50 @@ +# MIT License +# +# Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# Find or download/install rocm-cmake project +find_package(ROCM QUIET CONFIG PATHS /opt/rocm) +if(NOT ROCM_FOUND) + set(rocm_cmake_tag "master" CACHE STRING "rocm-cmake tag to download") + file( + DOWNLOAD https://github.com/RadeonOpenCompute/rocm-cmake/archive/${rocm_cmake_tag}.zip + ${CMAKE_CURRENT_BINARY_DIR}/rocm-cmake-${rocm_cmake_tag}.zip + STATUS rocm_cmake_download_status LOG rocm_cmake_download_log + ) + list(GET rocm_cmake_download_status 0 rocm_cmake_download_error_code) + if(rocm_cmake_download_error_code) + message(FATAL_ERROR "Error: downloading " + "https://github.com/RadeonOpenCompute/rocm-cmake/archive/${rocm_cmake_tag}.zip failed " + "error_code: ${rocm_cmake_download_error_code} " + "log: ${rocm_cmake_download_log} " + ) + endif() + + execute_process( + COMMAND ${CMAKE_COMMAND} -E tar xzf ${CMAKE_CURRENT_BINARY_DIR}/rocm-cmake-${rocm_cmake_tag}.zip + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + RESULT_VARIABLE rocm_cmake_unpack_error_code + ) + if(rocm_cmake_unpack_error_code) + message(FATAL_ERROR "Error: unpacking ${CMAKE_CURRENT_BINARY_DIR}/rocm-cmake-${rocm_cmake_tag}.zip failed") + endif() + find_package(ROCM REQUIRED CONFIG PATHS ${CMAKE_CURRENT_BINARY_DIR}/rocm-cmake-${rocm_cmake_tag}) +endif() diff --git a/hipcub/include/hipcub/backend/rocprim/iterator/arg_index_input_iterator.hpp b/hipcub/include/hipcub/backend/rocprim/iterator/arg_index_input_iterator.hpp index 7f021abf..b4d96819 100644 --- a/hipcub/include/hipcub/backend/rocprim/iterator/arg_index_input_iterator.hpp +++ b/hipcub/include/hipcub/backend/rocprim/iterator/arg_index_input_iterator.hpp @@ -1,5 +1,5 @@ /****************************************************************************** - * Copyright (c) 2010-2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. * Modifications Copyright (c) 2017-2020, Advanced Micro Devices, Inc. All rights reserved. * @@ -30,10 +30,19 @@ #ifndef HIPCUB_ROCPRIM_ITERATOR_ARG_INDEX_INPUT_ITERATOR_HPP_ #define HIPCUB_ROCPRIM_ITERATOR_ARG_INDEX_INPUT_ITERATOR_HPP_ +#include +#include + #include "../../../config.hpp" #include +#if (THRUST_VERSION >= 100700) + // This iterator is compatible with Thrust API 1.7 and newer + #include + #include +#endif // THRUST_VERSION + BEGIN_HIPCUB_NAMESPACE template< diff --git a/hipcub/include/hipcub/backend/rocprim/iterator/cache_modified_input_iterator.hpp b/hipcub/include/hipcub/backend/rocprim/iterator/cache_modified_input_iterator.hpp new file mode 100644 index 00000000..59d35924 --- /dev/null +++ b/hipcub/include/hipcub/backend/rocprim/iterator/cache_modified_input_iterator.hpp @@ -0,0 +1,168 @@ +/****************************************************************************** + * Copyright (c) 2010-2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * Modifications Copyright (c) 2017-2020, Advanced Micro Devices, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +#ifndef HIPCUB_ROCPRIM_ITERATOR_CACHE_MODIFIED_INPUT_ITERATOR_HPP_ +#define HIPCUB_ROCPRIM_ITERATOR_CACHE_MODIFIED_INPUT_ITERATOR_HPP_ + +#include +#include + +#include "../thread/thread_load.hpp" +#include "../util_type.hpp" + +#if (THRUST_VERSION >= 100700) + // This iterator is compatible with Thrust API 1.7 and newer + #include + #include +#endif // THRUST_VERSION + +BEGIN_HIPCUB_NAMESPACE + +template < + CacheLoadModifier MODIFIER, + typename ValueType, + typename OffsetT = ptrdiff_t> +class CacheModifiedInputIterator +{ +public: + + // Required iterator traits + typedef CacheModifiedInputIterator self_type; ///< My own type + typedef OffsetT difference_type; ///< Type to express the result of subtracting one iterator from another + typedef ValueType value_type; ///< The type of the element the iterator can point to + typedef ValueType* pointer; ///< The type of a pointer to an element the iterator can point to + typedef ValueType reference; ///< The type of a reference to an element the iterator can point to + typedef std::random_access_iterator_tag iterator_category; ///< The iterator category + +public: + + /// Wrapped native pointer + ValueType* ptr; + + /// Constructor + __host__ __device__ __forceinline__ CacheModifiedInputIterator( + ValueType* ptr) ///< Native pointer to wrap + : + ptr(const_cast::Type *>(ptr)) + {} + + /// Postfix increment + __host__ __device__ __forceinline__ self_type operator++(int) + { + self_type retval = *this; + ptr++; + return retval; + } + + /// Prefix increment + __host__ __device__ __forceinline__ self_type operator++() + { + ptr++; + return *this; + } + + /// Indirection + __device__ __forceinline__ reference operator*() const + { + return ThreadLoad(ptr); + } + + /// Addition + template + __host__ __device__ __forceinline__ self_type operator+(Distance n) const + { + self_type retval(ptr + n); + return retval; + } + + /// Addition assignment + template + __host__ __device__ __forceinline__ self_type& operator+=(Distance n) + { + ptr += n; + return *this; + } + + /// Subtraction + template + __host__ __device__ __forceinline__ self_type operator-(Distance n) const + { + self_type retval(ptr - n); + return retval; + } + + /// Subtraction assignment + template + __host__ __device__ __forceinline__ self_type& operator-=(Distance n) + { + ptr -= n; + return *this; + } + + /// Distance + __host__ __device__ __forceinline__ difference_type operator-(self_type other) const + { + return ptr - other.ptr; + } + + /// Array subscript + template + __device__ __forceinline__ reference operator[](Distance n) const + { + return ThreadLoad(ptr + n); + } + + /// Structure dereference + __device__ __forceinline__ pointer operator->() + { + return &ThreadLoad(ptr); + } + + /// Equal to + __host__ __device__ __forceinline__ bool operator==(const self_type& rhs) + { + return (ptr == rhs.ptr); + } + + /// Not equal to + __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs) + { + return (ptr != rhs.ptr); + } + + /// ostream operator + friend std::ostream& operator<<(std::ostream& os, const self_type& /*itr*/) + { + return os; + } +}; + +END_HIPCUB_NAMESPACE + +#endif // HIPCUB_ROCPRIM_ITERATOR_CACHE_MODIFIED_INPUT_ITERATOR_HPP_ diff --git a/hipcub/include/hipcub/backend/rocprim/iterator/cache_modified_output_iterator.hpp b/hipcub/include/hipcub/backend/rocprim/iterator/cache_modified_output_iterator.hpp new file mode 100644 index 00000000..85acd671 --- /dev/null +++ b/hipcub/include/hipcub/backend/rocprim/iterator/cache_modified_output_iterator.hpp @@ -0,0 +1,186 @@ +/****************************************************************************** + * Copyright (c) 2010-2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * Modifications Copyright (c) 2017-2020, Advanced Micro Devices, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +#ifndef HIPCUB_ROCPRIM_ITERATOR_CACHE_MODIFIED_OUTPUT_ITERATOR_HPP_ +#define HIPCUB_ROCPRIM_ITERATOR_CACHE_MODIFIED_OUTPUT_ITERATOR_HPP_ + +#include +#include + +#include "../thread/thread_load.hpp" +#include "../thread/thread_store.hpp" +#include "../util_type.hpp" + +#if (THRUST_VERSION >= 100700) + // This iterator is compatible with Thrust API 1.7 and newer + #include + #include +#endif // THRUST_VERSION + + +BEGIN_HIPCUB_NAMESPACE + +template < + CacheStoreModifier MODIFIER, + typename ValueType, + typename OffsetT = ptrdiff_t> +class CacheModifiedOutputIterator +{ +private: + + // Proxy object + struct Reference + { + ValueType* ptr; + + /// Constructor + __host__ __device__ __forceinline__ Reference(ValueType* ptr) : ptr(ptr) {} + + /// Assignment + __device__ __forceinline__ ValueType operator =(ValueType val) + { + ThreadStore(ptr, val); + return val; + } + }; + +public: + + // Required iterator traits + typedef CacheModifiedOutputIterator self_type; ///< My own type + typedef OffsetT difference_type; ///< Type to express the result of subtracting one iterator from another + typedef void value_type; ///< The type of the element the iterator can point to + typedef void pointer; ///< The type of a pointer to an element the iterator can point to + typedef Reference reference; ///< The type of a reference to an element the iterator can point to + typedef std::random_access_iterator_tag iterator_category; ///< The iterator category + +private: + + ValueType* ptr; + +public: + + /// Constructor + template + __host__ __device__ __forceinline__ CacheModifiedOutputIterator( + QualifiedValueType* ptr) ///< Native pointer to wrap + : + ptr(const_cast::Type *>(ptr)) + {} + + /// Postfix increment + __host__ __device__ __forceinline__ self_type operator++(int) + { + self_type retval = *this; + ptr++; + return retval; + } + + + /// Prefix increment + __host__ __device__ __forceinline__ self_type operator++() + { + ptr++; + return *this; + } + + /// Indirection + __host__ __device__ __forceinline__ reference operator*() const + { + return Reference(ptr); + } + + /// Addition + template + __host__ __device__ __forceinline__ self_type operator+(Distance n) const + { + self_type retval(ptr + n); + return retval; + } + + /// Addition assignment + template + __host__ __device__ __forceinline__ self_type& operator+=(Distance n) + { + ptr += n; + return *this; + } + + /// Subtraction + template + __host__ __device__ __forceinline__ self_type operator-(Distance n) const + { + self_type retval(ptr - n); + return retval; + } + + /// Subtraction assignment + template + __host__ __device__ __forceinline__ self_type& operator-=(Distance n) + { + ptr -= n; + return *this; + } + + /// Distance + __host__ __device__ __forceinline__ difference_type operator-(self_type other) const + { + return ptr - other.ptr; + } + + /// Array subscript + template + __host__ __device__ __forceinline__ reference operator[](Distance n) const + { + return Reference(ptr + n); + } + + /// Equal to + __host__ __device__ __forceinline__ bool operator==(const self_type& rhs) + { + return (ptr == rhs.ptr); + } + + /// Not equal to + __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs) + { + return (ptr != rhs.ptr); + } + + /// ostream operator + friend std::ostream& operator<<(std::ostream& os, const self_type& itr) + { + (void)itr; + return os; + } +}; + +END_HIPCUB_NAMESPACE + +#endif diff --git a/hipcub/include/hipcub/backend/rocprim/iterator/constant_input_iterator.hpp b/hipcub/include/hipcub/backend/rocprim/iterator/constant_input_iterator.hpp index 5b1b7e61..f9b4ed10 100644 --- a/hipcub/include/hipcub/backend/rocprim/iterator/constant_input_iterator.hpp +++ b/hipcub/include/hipcub/backend/rocprim/iterator/constant_input_iterator.hpp @@ -30,10 +30,19 @@ #ifndef HIPCUB_ROCPRIM_ITERATOR_CONSTANT_INPUT_ITERATOR_HPP_ #define HIPCUB_ROCPRIM_ITERATOR_CONSTANT_INPUT_ITERATOR_HPP_ +#include +#include + #include "../../../config.hpp" #include +#if (THRUST_VERSION >= 100700) + // This iterator is compatible with Thrust API 1.7 and newer + #include + #include +#endif // THRUST_VERSION + BEGIN_HIPCUB_NAMESPACE template< diff --git a/hipcub/include/hipcub/backend/rocprim/iterator/counting_input_iterator.hpp b/hipcub/include/hipcub/backend/rocprim/iterator/counting_input_iterator.hpp index e7741a53..ae074f6e 100644 --- a/hipcub/include/hipcub/backend/rocprim/iterator/counting_input_iterator.hpp +++ b/hipcub/include/hipcub/backend/rocprim/iterator/counting_input_iterator.hpp @@ -30,10 +30,19 @@ #ifndef HIPCUB_ROCPRIM_ITERATOR_COUNTING_INPUT_ITERATOR_HPP_ #define HIPCUB_ROCPRIM_ITERATOR_COUNTING_INPUT_ITERATOR_HPP_ +#include +#include + #include "../../../config.hpp" #include +#if (THRUST_VERSION >= 100700) + // This iterator is compatible with Thrust API 1.7 and newer + #include + #include +#endif // THRUST_VERSION + BEGIN_HIPCUB_NAMESPACE template< diff --git a/hipcub/include/hipcub/backend/rocprim/iterator/discard_output_iterator.hpp b/hipcub/include/hipcub/backend/rocprim/iterator/discard_output_iterator.hpp index d49f4bd6..b8bd7713 100644 --- a/hipcub/include/hipcub/backend/rocprim/iterator/discard_output_iterator.hpp +++ b/hipcub/include/hipcub/backend/rocprim/iterator/discard_output_iterator.hpp @@ -30,12 +30,17 @@ #ifndef HIPCUB_ROCPRIM_ITERATOR_DISCARD_OUTPUT_ITERATOR_HPP_ #define HIPCUB_ROCPRIM_ITERATOR_DISCARD_OUTPUT_ITERATOR_HPP_ -#include "../../../config.hpp" +#include +#include -// TODO: Check, if we can update rocPRIM, to use the rocPRIM discard iterator. -//#include +#include "../../../config.hpp" BEGIN_HIPCUB_NAMESPACE +#if (THRUST_VERSION >= 100700) + // This iterator is compatible with Thrust API 1.7 and newer + #include + #include +#endif // THRUST_VERSION /** * \addtogroup UtilIterator diff --git a/hipcub/include/hipcub/backend/rocprim/iterator/tex_obj_input_iterator.hpp b/hipcub/include/hipcub/backend/rocprim/iterator/tex_obj_input_iterator.hpp index 0fd76661..7a2ea482 100644 --- a/hipcub/include/hipcub/backend/rocprim/iterator/tex_obj_input_iterator.hpp +++ b/hipcub/include/hipcub/backend/rocprim/iterator/tex_obj_input_iterator.hpp @@ -30,8 +30,18 @@ #ifndef HIPCUB_ROCPRIM_ITERATOR_TEX_OBJ_INPUT_ITERATOR_HPP_ #define HIPCUB_ROCPRIM_ITERATOR_TEX_OBJ_INPUT_ITERATOR_HPP_ +#include +#include + #include "../../../config.hpp" +#if (THRUST_VERSION >= 100700) + // This iterator is compatible with Thrust API 1.7 and newer + #include + #include +#endif // THRUST_VERSION + + #include BEGIN_HIPCUB_NAMESPACE diff --git a/hipcub/include/hipcub/backend/rocprim/iterator/tex_ref_input_iterator.hpp b/hipcub/include/hipcub/backend/rocprim/iterator/tex_ref_input_iterator.hpp index e5386e9e..016d866f 100644 --- a/hipcub/include/hipcub/backend/rocprim/iterator/tex_ref_input_iterator.hpp +++ b/hipcub/include/hipcub/backend/rocprim/iterator/tex_ref_input_iterator.hpp @@ -1,5 +1,5 @@ /****************************************************************************** - * Copyright (c) 2010-2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011, Duane Merrill. All rights reserved. * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. * Modifications Copyright (c) 2017-2021, Advanced Micro Devices, Inc. All rights reserved. * @@ -30,8 +30,16 @@ #ifndef HIPCUB_ROCPRIM_ITERATOR_TEX_REF_INPUT_ITERATOR_HPP_ #define HIPCUB_ROCPRIM_ITERATOR_TEX_REF_INPUT_ITERATOR_HPP_ +#include +#include + #include "../../../config.hpp" +#if (THRUST_VERSION >= 100700) // This iterator is compatible with Thrust API 1.7 and newer + #include + #include +#endif // THRUST_VERSION + #include BEGIN_HIPCUB_NAMESPACE @@ -77,4 +85,3 @@ class TexRefInputIterator : public ::rocprim::texture_cache_iterator END_HIPCUB_NAMESPACE #endif // HIPCUB_ROCPRIM_ITERATOR_TEX_OBJ_INPUT_ITERATOR_HPP_ - diff --git a/hipcub/include/hipcub/backend/rocprim/iterator/transform_input_iterator.hpp b/hipcub/include/hipcub/backend/rocprim/iterator/transform_input_iterator.hpp index 50e24b18..cb7e2c72 100644 --- a/hipcub/include/hipcub/backend/rocprim/iterator/transform_input_iterator.hpp +++ b/hipcub/include/hipcub/backend/rocprim/iterator/transform_input_iterator.hpp @@ -30,10 +30,20 @@ #ifndef HIPCUB_ROCPRIM_ITERATOR_TRANSFORM_INPUT_ITERATOR_HPP_ #define HIPCUB_ROCPRIM_ITERATOR_TRANSFORM_INPUT_ITERATOR_HPP_ +#include +#include + #include "../../../config.hpp" #include +#if (THRUST_VERSION >= 100700) + // This iterator is compatible with Thrust API 1.7 and newer + #include + #include +#endif // THRUST_VERSION + + BEGIN_HIPCUB_NAMESPACE template< From 05db89d7518516f79f8c97baee1e54981988bb71 Mon Sep 17 00:00:00 2001 From: Stanley Tsang Date: Tue, 27 Apr 2021 21:17:32 +0000 Subject: [PATCH 27/29] Revert "Revert "Adding discard output iterator to backend header"" This reverts commit 1b136fd4dc12283f497813b0383eb618d64e6dc7. --- hipcub/include/hipcub/backend/rocprim/hipcub.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/hipcub/include/hipcub/backend/rocprim/hipcub.hpp b/hipcub/include/hipcub/backend/rocprim/hipcub.hpp index 87a9271a..c8e0b9e7 100644 --- a/hipcub/include/hipcub/backend/rocprim/hipcub.hpp +++ b/hipcub/include/hipcub/backend/rocprim/hipcub.hpp @@ -42,6 +42,7 @@ #include "iterator/counting_input_iterator.hpp" #include "iterator/tex_obj_input_iterator.hpp" #include "iterator/transform_input_iterator.hpp" +#include "iterator/discard_output_iterator.hpp" // Warp #include "warp/warp_reduce.hpp" From f2aa7711e9692efaa164ecf34aef2edd574ec01a Mon Sep 17 00:00:00 2001 From: Eiden Yoshida <47196116+eidenyoshida@users.noreply.github.com> Date: Thu, 29 Apr 2021 20:10:09 -0600 Subject: [PATCH 28/29] Add gfx90a targets (#142) --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9b655a05..e1f0a0e3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -59,7 +59,7 @@ include( ROCMCheckTargetIds OPTIONAL ) #Set the AMDGPU_TARGETS with backward compatiblity if(COMMAND rocm_check_target_ids) rocm_check_target_ids(DEFAULT_AMDGPU_TARGETS - TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-" + TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+" ) else() # Use target ID syntax if supported for AMDGPU_TARGETS From 8c41e8bf871b8d4ecd7c656c48cf1603b112732f Mon Sep 17 00:00:00 2001 From: Stanley Tsang Date: Thu, 27 May 2021 20:37:04 +0000 Subject: [PATCH 29/29] Updating CHANGELOG for ROCm 4.3 --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f241da99..a68d5d23 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,7 @@ See README.md on how to build the hipCUB documentation using Doxygen. -## [Unreleased hipCUB-2.10.10 for ROCm 4.3.0] +## [hipCUB-2.10.10 for ROCm 4.3.0] ### Added - DiscardOutputIterator to backend header @@ -60,4 +60,4 @@ See README.md on how to build the hipCUB documentation using Doxygen. - BlockHistogram - BlockRadixSort - BlockReduce - - BlockScan \ No newline at end of file + - BlockScan