Skip to content

Commit

Permalink
Merge branch 'release/rocm-rel-4.3' into xnack_on_hmm
Browse files Browse the repository at this point in the history
  • Loading branch information
stanleytsang-amd authored Jun 3, 2021
2 parents d41a5d6 + 8c41e8b commit aaaf755
Show file tree
Hide file tree
Showing 48 changed files with 2,264 additions and 241 deletions.
40 changes: 21 additions & 19 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,14 @@ stages:
variables:
SUDO_CMD: "" # Must be "sudo" on images which don't use root user
DEPS_DIR: "$CI_PROJECT_DIR/__dependencies"
CMAKE_URL: "https://cmake.org/files/v3.5/cmake-3.5.1-Linux-x86_64.tar.gz"
CMAKE_MINIMUM_URL: "https://cmake.org/files/v3.10/cmake-3.10.2-Linux-x86_64.tar.gz"
# General build flags
CXXFLAGS: ""
CMAKE_OPTIONS: ""
# Local build options
LOCAL_CXXFLAGS: ""
LOCAL_CMAKE_OPTIONS: ""
ROCM_LATEST_PATH: "/opt/rocm-3.7.0/"
ROCM_LATEST_PATH: "/opt/rocm-4.0.0/"

# hipCUB with rocPRIM backend
.rocm:
Expand All @@ -50,7 +50,7 @@ variables:
- hipconfig
# cmake
- mkdir -p $DEPS_DIR/cmake
- wget --no-check-certificate --quiet -O - $CMAKE_URL | tar --strip-components=1 -xz -C $DEPS_DIR/cmake
- wget --no-check-certificate --quiet -O - $CMAKE_MINIMUM_URL | tar --strip-components=1 -xz -C $DEPS_DIR/cmake
- export PATH=$DEPS_DIR/cmake/bin:$PATH
# Combine global build options with local options
- export CXXFLAGS=$CXXFLAGS" "$LOCAL_CXXFLAGS
Expand All @@ -65,7 +65,7 @@ variables:
- hipconfig
# cmake
- mkdir -p $DEPS_DIR/cmake
- wget --no-check-certificate --quiet -O - $CMAKE_URL | tar --strip-components=1 -xz -C $DEPS_DIR/cmake
- wget --no-check-certificate --quiet -O - $CMAKE_MINIMUM_URL | tar --strip-components=1 -xz -C $DEPS_DIR/cmake
- export PATH=$DEPS_DIR/cmake/bin:$PATH
# Combine global build options with local options
- export CXXFLAGS=$CXXFLAGS" "$LOCAL_CXXFLAGS
Expand Down Expand Up @@ -95,7 +95,8 @@ build:rocm:
extends: .rocm:build
stage: build
script:
- mkdir build
- if [ ! -d "build" ] ; then mkdir build;
- fi;
- cd build
- cmake
-G Ninja
Expand Down Expand Up @@ -126,12 +127,13 @@ build:rocm-benchmark:
extends: .rocm:build
stage: build
only:
- internal_benchmark
- fix_benchmarks
- develop_stream
- develop
- master
script:
- mkdir build
- if [ ! -d "build" ] ; then mkdir build;
- fi;
- cd build
# Build hipCUB benchmark
- cmake
Expand Down Expand Up @@ -166,7 +168,7 @@ benchmark:rocm_vega20:
stage: benchmark
when: manual
only:
- internal_benchmark
- fix_benchmarks
- develop_stream
- develop
- master
Expand Down Expand Up @@ -199,7 +201,7 @@ benchmark:rocm_s9300:
stage: benchmark
when: manual
only:
- internal_benchmark
- fix_benchmarks
- develop_stream
- develop
- master
Expand Down Expand Up @@ -230,7 +232,7 @@ benchmark:rocm_mi25:
stage: benchmark
when: manual
only:
- internal_benchmark
- fix_benchmarks
- develop_stream
- develop
- master
Expand Down Expand Up @@ -265,7 +267,7 @@ test:rocm_package:
- cd ../..
# hipCUB
- cd build
- $SUDO_CMD dpkg -i hipcub-*.deb
- $SUDO_CMD dpkg -i hipcub*.deb
- mkdir ../package_test && cd ../package_test
- CXX=hipcc cmake ../test/extra/. -Drocprim_DIR="/opt/rocm/rocprim"
- make VERBOSE=1
Expand Down Expand Up @@ -319,15 +321,15 @@ test:rocm_install:
- $SUDO_CMD apt-get install -y hip-base
# Install hip-nvcc ignoring dependencies because it depends on cuda metapackage
# (with heavy libraries, tools etc. that also require GUI and other packages)
- apt-get download hip-nvcc
- apt-get download hip-nvcc rocm-cmake
- $SUDO_CMD dpkg -i --ignore-depends=cuda hip*.deb
- $SUDO_CMD ls -d /opt/*
- $SUDO_CMD ln -s $ROCM_LATEST_PATH /opt/rocm
- export PATH=$PATH:/opt/rocm/bin
- hipconfig
# cmake
- mkdir -p $DEPS_DIR/cmake
- wget --no-check-certificate --quiet -O - $CMAKE_URL | tar --strip-components=1 -xz -C $DEPS_DIR/cmake
- wget --no-check-certificate --quiet -O - $CMAKE_MINIMUM_URL | tar --strip-components=1 -xz -C $DEPS_DIR/cmake
- export PATH=$DEPS_DIR/cmake/bin:$PATH
# Combine global build options with local options
- export CXXFLAGS=$CXXFLAGS" "$LOCAL_CXXFLAGS
Expand All @@ -342,7 +344,6 @@ build:nvcc:
- cmake
-G Ninja
-D CMAKE_BUILD_TYPE=Release
-D CMAKE_CXX_COMPILER=nvcc
-D BUILD_TEST=ON
-D BUILD_EXAMPLE=ON
-B build
Expand All @@ -367,7 +368,7 @@ build:nvcc-benchmark:
extends: .nvcc
stage: build
only:
- internal_benchmark
- fix_benchmarks
- develop_stream
- develop
- master
Expand All @@ -377,8 +378,9 @@ build:nvcc-benchmark:
- cmake
-G Ninja
-D CMAKE_BUILD_TYPE=Release
-D CMAKE_CXX_COMPILER=nvcc
-D BUILD_BENCHMARK=ON
-D CMAKE_CXX_COMPILER=g++-8
-D CMAKE_C_COMPILER=g++-8
-B build
../.
- cmake
Expand All @@ -403,7 +405,7 @@ benchmark:nvcc_titanv:
stage: benchmark
when: manual
only:
- internal_benchmark
- fix_benchmarks
- develop_stream
- develop
- master
Expand All @@ -430,7 +432,7 @@ benchmark:nvcc_980:
stage: benchmark
when: manual
only:
- internal_benchmark
- fix_benchmarks
- develop_stream
- develop
- master
Expand All @@ -450,7 +452,7 @@ test:nvcc_package:
- build:nvcc
script:
- cd build
- $SUDO_CMD dpkg -i hipcub_nvcc-*.deb
- $SUDO_CMD dpkg -i hipcub_nvcc*.deb
- mkdir ../package_test && cd ../package_test
- cmake ../test/extra/.
- make VERBOSE=1
Expand Down
23 changes: 21 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,26 @@

See README.md on how to build the hipCUB documentation using Doxygen.

## [Unreleased hipCUB-2.10.7 for ROCm 4.0.0]
## [hipCUB-2.10.10 for ROCm 4.3.0]
### Added
- DiscardOutputIterator to backend header

## [hipCUB-2.10.9 for ROCm 4.2.0]
### Added
- Support for TexObjInputIterator and TexRefInputIterator
- Support for DevicePartition
### Changed
- Minimum cmake version required is now 3.10.2
- CUB backend has been updated to 1.11.0
### Fixed
- Benchmark build fixed
- nvcc build fixed

## [hipCUB-2.10.8 for ROCm 4.1.0]
### Added
- Support for DiscardOutputIterator

## [hipCUB-2.10.7 for ROCm 4.0.0]
### Added
- No new features

Expand Down Expand Up @@ -41,4 +60,4 @@ See README.md on how to build the hipCUB documentation using Doxygen.
- BlockHistogram
- BlockRadixSort
- BlockReduce
- BlockScan
- BlockScan
45 changes: 38 additions & 7 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# MIT License
#
# Copyright (c) 2017-2019 Advanced Micro Devices, Inc. All rights reserved.
# Copyright (c) 2017-2021 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
Expand All @@ -20,7 +20,7 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

cmake_minimum_required(VERSION 3.5.1 FATAL_ERROR)
cmake_minimum_required(VERSION 3.10.2 FATAL_ERROR)

# Install prefix
set(CMAKE_INSTALL_PREFIX "/opt/rocm" CACHE PATH "Install path prefix, prepended onto install directories")
Expand All @@ -46,11 +46,45 @@ endif()

set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE CACHE BOOL "Add paths to linker search and installed rpath")

# rocm-cmake contains common cmake code for rocm projects to help
# setup and install
include(cmake/RocmCmakeDependence.cmake)
include( ROCMSetupVersion )
include( ROCMCreatePackage )
include( ROCMInstallTargets )
include( ROCMPackageConfigHelpers )
include( ROCMInstallSymlinks )
include( ROCMCheckTargetIds OPTIONAL )

#Set the AMDGPU_TARGETS with backward compatiblity
if(COMMAND rocm_check_target_ids)
rocm_check_target_ids(DEFAULT_AMDGPU_TARGETS
TARGETS "gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack-;gfx90a:xnack+"
)
else()
# Use target ID syntax if supported for AMDGPU_TARGETS
# This section is deprecated. Please use rocm_check_target_ids for future use.
if( CMAKE_CXX_COMPILER MATCHES ".*/hipcc$" )
execute_process(COMMAND ${CMAKE_CXX_COMPILER} "--help"
OUTPUT_VARIABLE CXX_OUTPUT
OUTPUT_STRIP_TRAILING_WHITESPACE
ERROR_STRIP_TRAILING_WHITESPACE)
string(REGEX MATCH ".mcode\-object\-version" TARGET_ID_SUPPORT ${CXX_OUTPUT})
endif()
# Use target ID syntax if supported for AMDGPU_TARGETS
if(TARGET_ID_SUPPORT)
set(AMDGPU_TARGETS gfx803;gfx900:xnack-;gfx906:xnack-;gfx908:xnack- CACHE STRING "List of specific machine types for library to target")
else()
set(AMDGPU_TARGETS gfx803;gfx900;gfx906;gfx908 CACHE STRING "List of specific machine types for library to target")
endif()
endif()
set(AMDGPU_TARGETS "${DEFAULT_AMDGPU_TARGETS}" CACHE STRING "List of specific machine types for library to target")

# Verify that hcc compiler is used on ROCM platform
include(cmake/VerifyCompiler.cmake)

# Set CXX flags
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)

Expand All @@ -65,12 +99,9 @@ option(BUILD_BENCHMARK "Build benchmarks" OFF)
include(cmake/Dependencies.cmake)

# Setup VERSION
set(VERSION_STRING "2.10.7")
set(VERSION_STRING "2.10.9")
rocm_setup_version(VERSION ${VERSION_STRING})

# AMD targets
set(AMDGPU_TARGETS gfx900:xnack-;gfx906:xnack-;gfx908:xnack-;gfx908:xnack+ CACHE STRING "List of specific machine types for library to target")

# Print configuration summary
include(cmake/Summary.cmake)
print_configuration_summary()
Expand Down
57 changes: 19 additions & 38 deletions benchmark/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,47 +22,27 @@

function(add_hipcub_benchmark BENCHMARK_SOURCE)
get_filename_component(BENCHMARK_TARGET ${BENCHMARK_SOURCE} NAME_WE)
if((HIP_COMPILER STREQUAL "nvcc"))
if((CMAKE_VERSION VERSION_LESS "3.10"))
CUDA_INCLUDE_DIRECTORIES(
"${PROJECT_BINARY_DIR}/hipcub/include/hipcub"
"${PROJECT_BINARY_DIR}/hipcub/include"
"${PROJECT_SOURCE_DIR}/hipcub/include"
"${GOOGLEBENCHMARK_ROOT}/include"
${GTEST_INCLUDE_DIRS}
${CUB_INCLUDE_DIR}
)
endif()
set_source_files_properties(${BENCHMARK_SOURCE}
PROPERTIES
CUDA_SOURCE_PROPERTY_FORMAT OBJ
)
CUDA_ADD_EXECUTABLE(${BENCHMARK_TARGET}
${BENCHMARK_SOURCE}
OPTIONS
--expt-extended-lambda
)
target_include_directories(${BENCHMARK_TARGET} SYSTEM BEFORE
PUBLIC
"${GOOGLEBENCHMARK_ROOT}/include"
)
target_link_libraries(${BENCHMARK_TARGET}
hipcub_cub
add_executable(${BENCHMARK_TARGET} ${BENCHMARK_SOURCE})
target_include_directories(${BENCHMARK_TARGET} SYSTEM BEFORE
PUBLIC
"${GOOGLEBENCHMARK_ROOT}/include"
)
target_link_libraries(${BENCHMARK_TARGET}
PRIVATE
benchmark::benchmark
hipcub
)
if((HIP_COMPILER STREQUAL "nvcc"))
set_property(TARGET ${BENCHMARK_TARGET} PROPERTY CUDA_STANDARD 14)
set_source_files_properties(${BENCHMARK_SOURCE} PROPERTIES LANGUAGE CUDA)
target_compile_options(${BENCHMARK_TARGET}
PRIVATE
$<$<COMPILE_LANGUAGE:CUDA>:--expt-extended-lambda>
)
else()
add_executable(${BENCHMARK_TARGET} ${BENCHMARK_SOURCE})
target_link_libraries(${BENCHMARK_TARGET}
PRIVATE
hipcub
benchmark::benchmark
hipcub_cub
)
foreach(amdgpu_target ${AMDGPU_TARGETS})
target_link_libraries(${BENCHMARK_TARGET}
PRIVATE
--amdgpu-target=${amdgpu_target}
)
endforeach()
endif()
set_target_properties(${BENCHMARK_TARGET}
PROPERTIES
Expand All @@ -88,5 +68,6 @@ add_hipcub_benchmark(benchmark_device_scan.cpp)
add_hipcub_benchmark(benchmark_device_segmented_radix_sort.cpp)
add_hipcub_benchmark(benchmark_device_segmented_reduce.cpp)
add_hipcub_benchmark(benchmark_device_select.cpp)
add_hipcub_benchmark(benchmark_warp_reduce.cpp)
add_hipcub_benchmark(benchmark_warp_scan.cpp)
# TODO: Find a workaround for compile issue
#add_hipcub_benchmark(benchmark_warp_reduce.cpp)
#add_hipcub_benchmark(benchmark_warp_scan.cpp)
2 changes: 1 addition & 1 deletion benchmark/benchmark_block_discontinuity.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ void run_benchmark(benchmark::State& state, hipStream_t stream, size_t N)
#define CREATE_BENCHMARK(T, BS, IPT, WITH_TILE) \
benchmark::RegisterBenchmark( \
(std::string("block_discontinuity<" #T ", " #BS ">.") + name + ("<" #IPT ", " #WITH_TILE ">")).c_str(), \
run_benchmark<Benchmark, T, BS, IPT, WITH_TILE>, \
&run_benchmark<Benchmark, T, BS, IPT, WITH_TILE>, \
stream, size \
)

Expand Down
2 changes: 1 addition & 1 deletion benchmark/benchmark_block_exchange.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ void run_benchmark(benchmark::State& state, hipStream_t stream, size_t N)
#define CREATE_BENCHMARK(T, BS, IPT) \
benchmark::RegisterBenchmark( \
(std::string("block_exchange<" #T ", " #BS ", " #IPT ">.") + name).c_str(), \
run_benchmark<Benchmark, T, BS, IPT>, \
&run_benchmark<Benchmark, T, BS, IPT>, \
stream, size \
)

Expand Down
2 changes: 1 addition & 1 deletion benchmark/benchmark_block_histogram.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ void run_benchmark(benchmark::State& state, hipStream_t stream, size_t N)
#define CREATE_BENCHMARK(T, BS, IPT) \
benchmark::RegisterBenchmark( \
(std::string("block_histogram<"#T", "#BS", "#IPT", " + algorithm_name + ">.") + method_name).c_str(), \
run_benchmark<Benchmark, T, BS, IPT>, \
&run_benchmark<Benchmark, T, BS, IPT>, \
stream, size \
)

Expand Down
2 changes: 1 addition & 1 deletion benchmark/benchmark_block_radix_sort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ void run_benchmark(benchmark::State& state, benchmark_kinds benchmark_kind, hipS
#define CREATE_BENCHMARK(T, BS, IPT) \
benchmark::RegisterBenchmark( \
(std::string("block_radix_sort<" #T ", " #BS ", " #IPT ">.") + name).c_str(), \
run_benchmark<T, BS, IPT>, \
&run_benchmark<T, BS, IPT>, \
benchmark_kind, stream, size \
)

Expand Down
Loading

0 comments on commit aaaf755

Please sign in to comment.