From a985da4737b586feeca6d5cd74ab073f45217570 Mon Sep 17 00:00:00 2001
From: Maneesh Gupta <Maneesh.Gupta@amd.com>
Date: Wed, 30 Jun 2021 04:17:44 -0400
Subject: [PATCH 1/9] Revert "Merge branch 'amd-staging' into amd-master-next"

This reverts commit b2d6259d40e19e7326d3b8e311b91e8842fe1f9f.

Reason for revert: Root cause for SWDEV-293424

Change-Id: Ib91024a75e26314f3a9af2cad421d0aaf5e47f08
---
 catch/CMakeLists.txt                          |  22 +-
 catch/README.md                               |   7 +-
 catch/hipTestMain/CMakeLists.txt              |  34 +-
 catch/hipTestMain/hip_test_context.cc         |  66 ++-
 catch/hipTestMain/main.cc                     |   2 +-
 catch/include/hip_test_checkers.hh            | 164 ------
 catch/include/hip_test_common.hh              |  14 -
 catch/include/hip_test_context.hh             |   5 +
 catch/include/hip_test_kernels.hh             |  62 ---
 catch/multiproc/CMakeLists.txt                |  13 -
 catch/multiproc/childMalloc.cc                |  62 ---
 catch/multiproc/hipMallocConcurrency.cc       | 188 -------
 catch/unit/CMakeLists.txt                     |   3 +-
 catch/unit/deviceLib/CMakeLists.txt           |   9 -
 catch/unit/kernels/add.cc                     |   9 +-
 catch/unit/rtc/CMakeLists.txt                 |  18 +-
 catch/unit/rtc/saxpy.cc                       |  81 +--
 catch/unit/rtc/test.cc                        |   6 +
 perftests/memory/hipPerfMemFill.cpp           | 526 ------------------
 .../15_static_library/host_functions/Makefile |   2 +-
 20 files changed, 140 insertions(+), 1153 deletions(-)
 delete mode 100644 catch/include/hip_test_checkers.hh
 delete mode 100644 catch/include/hip_test_kernels.hh
 delete mode 100644 catch/multiproc/CMakeLists.txt
 delete mode 100644 catch/multiproc/childMalloc.cc
 delete mode 100644 catch/multiproc/hipMallocConcurrency.cc
 create mode 100644 catch/unit/rtc/test.cc
 delete mode 100644 perftests/memory/hipPerfMemFill.cpp

diff --git a/catch/CMakeLists.txt b/catch/CMakeLists.txt
index d3b3f028e..53122d59f 100644
--- a/catch/CMakeLists.txt
+++ b/catch/CMakeLists.txt
@@ -44,33 +44,15 @@ include_directories(
     ${HIP_PATH}/include
     ${JSON_PARSER}
 )
-
-if(HIP_PLATFORM MATCHES "amd" AND HIP_COMPILER MATCHES "clang")
-    add_compile_options(-Wall -Wextra -pedantic -Werror)
-endif()
-
 cmake_policy(PUSH)
 if(POLICY CMP0037)
     cmake_policy(SET CMP0037 OLD)
 endif()
-
-# Use clang as host compiler with nvcc
-if(HIP_COMPILER MATCHES "nvcc")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ccbin clang")
-endif()
-
-# Disable CXX extensions (gnu++11 etc)
-set(CMAKE_CXX_EXTENSIONS OFF)
-
 add_custom_target(build_tests)
+add_custom_target(test COMMAND ${CMAKE_CTEST_COMMAND})
+add_dependencies(test build_tests)
 
-# Tests folder
 add_subdirectory(unit)
 add_subdirectory(hipTestMain)
 add_subdirectory(stress)
-
-if(UNIX AND HIP_PLATFORM MATCHES "amd")
-    add_subdirectory(multiproc)
-endif()
-
 cmake_policy(POP)
diff --git a/catch/README.md b/catch/README.md
index 9d57c8a59..38d507c9c 100644
--- a/catch/README.md
+++ b/catch/README.md
@@ -12,7 +12,7 @@ Tests in Catch2 are declared via ```TEST_CASE```.
 
 ## Taking care of existing features
 - Don’t build on platform: EXCLUDE_(HIP_PLATFORM/HIP_RUNTIME), can be done via CMAKE. Adding source in if(HIP_PLATFORM == amd/nvidia).
-- HIPCC_OPTIONS/CLANG Options: Can be done via: set_source_files_properties(src.cc PROPERTIES COMPILE_FLAGS “…”).
+- HIPCC_OPTIONS/CLANG Options: Can be done via: set_source_files_properties(src.cc PROPERTIES COMPILE_FLAGS “…”).
 - Additional libraries: Can be done via target_link_libraries()
 - Multiple runs with different args: This can be done by Catch’s Feature: GENERATE(…)
 Running Subtest: ctest –R “...” (Regex to match the subtest name)
@@ -31,6 +31,8 @@ Some useful functions are:
 - `bool isLinux()` : true if os is linux
 - `bool isAmd()` : true if platform is AMD
 - `bool isNvidia()` : true if platform is NVIDIA
+- `std::vector<std::string> getDevices()` : returns a vector of strings that contains device names (eg: For AMD: gfx906, gfx908 etc / For NVIDIA: RTX 2070 Super)
+- `std::vector<std::string> getTargetId()` : (AMD Only) returns target id for gpus (eg: gfx906:sramecc+:xnack- etc)
 
 This information can be accessed in any test via using: `TestContext::get().isAmd()`.
 
@@ -70,4 +72,5 @@ Catch2 allows multiple ways in which you can debug the test case.
 
 ## External Libs being used
 - [Catch2](https://github.com/catchorg/Catch2) - Testing framework
-- [picojson](https://github.com/kazuho/picojson) - For config file parsing
+- [taocpp/json](https://github.com/taocpp/json) - For config file parsing
+- [taocpp/PEGTL](https://github.com/taocpp/PEGTL) - Helper lib for taojson
diff --git a/catch/hipTestMain/CMakeLists.txt b/catch/hipTestMain/CMakeLists.txt
index 3ca07fb7c..e0a7dfb0b 100644
--- a/catch/hipTestMain/CMakeLists.txt
+++ b/catch/hipTestMain/CMakeLists.txt
@@ -1,33 +1,11 @@
 if(CMAKE_BUILD_TYPE MATCHES "^Debug$")
     add_definitions(-DHT_LOG_ENABLE)
 endif()
-
-add_executable(UnitTests EXCLUDE_FROM_ALL main.cc hip_test_context.cc)
-if(HIP_PLATFORM MATCHES "amd")
-    set_property(TARGET UnitTests PROPERTY CXX_STANDARD 17)
-else()
-    target_compile_options(UnitTests PUBLIC -std=c++17)
-endif()
-
-target_link_libraries(UnitTests PRIVATE DeviceLibs
-                                        MemoryTest
-                                        Kernels
-                                        stdc++fs)
-
-# Add AMD Only Tests
-if(HIP_PLATFORM MATCHES "amd")
-    # target_link_libraries(UnitTests PRIVATE RTC)
-endif()
-
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTAO_PEGTL_STD_EXPERIMENTAL_FILESYSTEM=1")
+add_library(ht_context SHARED EXCLUDE_FROM_ALL hip_test_context.cc)
+set_property(TARGET ht_context PROPERTY CXX_STANDARD 17)
+target_link_libraries(ht_context PRIVATE stdc++fs)
+add_executable(UnitTests EXCLUDE_FROM_ALL main.cc)
+target_link_libraries(UnitTests PRIVATE ht_context DeviceLibs MemoryTest Kernels stdc++fs)
 catch_discover_tests(UnitTests PROPERTIES  SKIP_REGULAR_EXPRESSION "HIP_SKIP_THIS_TEST")
 add_dependencies(build_tests UnitTests)
-
-# Add Multiproc tests as seperate binary
-if(UNIX AND HIP_PLATFORM MATCHES "amd")
-    add_executable(MultiProcTests EXCLUDE_FROM_ALL main.cc hip_test_context.cc)
-    set_property(TARGET MultiProcTests PROPERTY CXX_STANDARD 17)
-    target_link_libraries(MultiProcTests PRIVATE MultiProc
-                                          stdc++fs)
-    catch_discover_tests(MultiProcTests PROPERTIES  SKIP_REGULAR_EXPRESSION "HIP_SKIP_THIS_TEST")
-    add_dependencies(build_tests MultiProcTests)
-endif()
diff --git a/catch/hipTestMain/hip_test_context.cc b/catch/hipTestMain/hip_test_context.cc
index 71c2a9bc0..4fb6b7031 100644
--- a/catch/hipTestMain/hip_test_context.cc
+++ b/catch/hipTestMain/hip_test_context.cc
@@ -15,6 +15,26 @@ namespace fs = std::experimental::filesystem;
 
 #include <regex>
 
+static std::string getValue(std::string option, const std::string& opt_str) {
+  std::string s_opt = opt_str;
+  return s_opt.erase(0, option.size());
+}
+
+static std::string trimName(std::string input, char trim) {
+  auto pos_ = input.find(trim);
+  auto res = input;
+  if (pos_ == std::string::npos) {
+    input = "";
+  } else {
+    res = input.substr(0, pos_);
+    input = input.substr(pos_);
+  }
+  return res;
+}
+
+const std::vector<std::string>& TestContext::getDevices() const { return config_.devices; }
+const std::vector<std::string>& TestContext::getTargetId() const { return config_.targetId; }
+
 void TestContext::detectOS() {
 #if (HT_WIN == 1)
   p_windows = true;
@@ -37,21 +57,18 @@ void TestContext::fillConfig() {
             (env_config != nullptr) ? env_config : "Not found, using default config");
 
   // Check if path has been provided
-  std::string def_config_json = "config.json";
   std::string config_str;
   if (env_config != nullptr) {
     config_str = env_config;
   } else {
-    config_str = def_config_json;
+    config_str = "config.json";
   }
 
   fs::path config_path = config_str;
-  if (config_path.has_parent_path() && config_path.has_filename()) {
+  if (config_path.has_parent_path()) {
     config_.json_file = config_str;
-  } else if (config_path.has_parent_path()) {
-    config_.json_file = config_path / def_config_json;
   } else {
-    config_.json_file = exe_path + def_config_json;
+    config_.json_file = exe_path + config_str;
   }
   LogPrintf("Config file path: %s", config_.json_file.c_str());
 
@@ -62,6 +79,37 @@ void TestContext::fillConfig() {
     LogPrintf("%s", "Either Config or Os is unknown, this wont end well");
     abort();
   }
+
+  int deviceCount = 0;
+  auto res = hipGetDeviceCount(&deviceCount);
+  if (res != hipSuccess) {
+    LogPrintf("HIP Device Count query failed with: %s", hipGetErrorString(res));
+    abort();
+  }
+  if (deviceCount == 0) {
+    LogPrintf("%s", "No hip devices found");
+    abort();
+  }
+  config_.devices.reserve(deviceCount);
+  for (int i = 0; i < deviceCount; i++) {
+    hipDeviceProp_t props;
+    res = hipGetDeviceProperties(&props, i);
+    if (res != hipSuccess) {
+      LogPrintf("HIP Device Count query failed with: %s", hipGetErrorString(res));
+      abort();
+    }
+    if (amd) {
+      std::string tid = std::string(props.gcnArchName);
+      config_.targetId.push_back(tid);
+      std::string dev = trimName(tid, ':');
+      config_.devices.push_back(dev);
+    } else if (nvidia) {
+      config_.devices.push_back(std::string(props.name));
+    } else {
+      LogPrintf("%s", "Unknown platform");
+      abort();
+    }
+  }
 }
 
 TestContext::TestContext(int argc, char** argv) {
@@ -74,7 +122,6 @@ TestContext::TestContext(int argc, char** argv) {
 }
 
 void TestContext::setExePath(int argc, char** argv) {
-  if (argc == 0) return;
   fs::path p = std::string(argv[0]);
   if (p.has_filename()) p.remove_filename();
   exe_path = p.string();
@@ -120,7 +167,9 @@ bool TestContext::parseJsonFile() {
   LogPrintf("Json contents:: %s", json_str.data());
 
   picojson::value v;
-  std::string err = picojson::parse(v, json_str);
+  std::string err;
+  const char* json_end =
+      picojson::parse(v, json_str.data(), json_str.data() + json_str.size(), &err);
   if (err.size() > 1) {
     LogPrintf("Error from PicoJson: %s", err.data());
     return false;
@@ -130,7 +179,6 @@ bool TestContext::parseJsonFile() {
     LogPrintf("%s", "Data in json is not in correct format, it should be an object");
     return false;
   }
-
   const picojson::object &o = v.get<picojson::object>();
   for (picojson::object::const_iterator i = o.begin(); i != o.end(); ++i) {
     // Processing for DisabledTests
diff --git a/catch/hipTestMain/main.cc b/catch/hipTestMain/main.cc
index 886aa7a8d..c70bf755c 100644
--- a/catch/hipTestMain/main.cc
+++ b/catch/hipTestMain/main.cc
@@ -6,7 +6,7 @@ int main(int argc, char** argv) {
   auto& context = TestContext::get(argc, argv);
   if (context.skipTest()) {
     // CTest uses this regex to figure out if the test has been skipped
-    std::cout << "HIP_SKIP_THIS_TEST" << std::endl;
+    std::cout << "HIP_SKIP_THIS_TEST" << context.getCurrentTest() << std::endl;
     return 0;
   }
   return Catch::Session().run(argc, argv);
diff --git a/catch/include/hip_test_checkers.hh b/catch/include/hip_test_checkers.hh
deleted file mode 100644
index d0c180c25..000000000
--- a/catch/include/hip_test_checkers.hh
+++ /dev/null
@@ -1,164 +0,0 @@
-#pragma once
-#include "hip_test_common.hh"
-
-namespace HipTest {
-template <typename T>
-size_t checkVectors(T* A, T* B, T* Out, size_t N, T (*F)(T a, T b), bool expectMatch = true,
-                    bool reportMismatch = true) {
-  size_t mismatchCount = 0;
-  size_t firstMismatch = 0;
-  size_t mismatchesToPrint = 10;
-  for (size_t i = 0; i < N; i++) {
-    T expected = F(A[i], B[i]);
-    if (Out[i] != expected) {
-      if (mismatchCount == 0) {
-        firstMismatch = i;
-      }
-      mismatchCount++;
-      if ((mismatchCount <= mismatchesToPrint) && expectMatch) {
-        INFO("Mismatch at " << i << " Computed: " << Out[i] << " Expeted: " << expected);
-        CHECK(false);
-      }
-    }
-  }
-
-  if (reportMismatch) {
-    if (expectMatch) {
-      if (mismatchCount) {
-        INFO(mismatchCount << " Mismatches  First Mismatch at index : " << firstMismatch);
-        REQUIRE(false);
-      }
-    } else {
-      if (mismatchCount == 0) {
-        INFO("Expected Mismatch but not found any");
-        REQUIRE(false);
-      }
-    }
-  }
-
-  return mismatchCount;
-}
-
-template <typename T>
-size_t checkVectorADD(T* A_h, T* B_h, T* result_H, size_t N, bool expectMatch = true,
-                      bool reportMismatch = true) {
-  return checkVectors<T>(
-      A_h, B_h, result_H, N, [](T a, T b) { return a + b; }, expectMatch, reportMismatch);
-}
-
-template <typename T>
-void checkTest(T* expected_H, T* result_H, size_t N, bool expectMatch = true) {
-  checkVectors<T>(
-      expected_H, expected_H, result_H, N,
-      [](T a, T b) {
-        guarantee(a == b, "Both values should be equal");
-        return a;
-      },
-      expectMatch);
-}
-
-
-// Setters and Memory Management
-
-template <typename T> void setDefaultData(size_t numElements, T* A_h, T* B_h, T* C_h) {
-  // Initialize the host data:
-  for (size_t i = 0; i < numElements; i++) {
-    if (A_h) (A_h)[i] = 3.146f + i;  // Pi
-    if (B_h) (B_h)[i] = 1.618f + i;  // Phi
-    if (C_h) (C_h)[i] = 0.0f + i;
-  }
-}
-
-template <typename T>
-bool initArraysForHost(T** A_h, T** B_h, T** C_h, size_t N, bool usePinnedHost = false) {
-  size_t Nbytes = N * sizeof(T);
-
-  if (usePinnedHost) {
-    if (A_h) {
-      HIPCHECK(hipHostMalloc((void**)A_h, Nbytes));
-    }
-    if (B_h) {
-      HIPCHECK(hipHostMalloc((void**)B_h, Nbytes));
-    }
-    if (C_h) {
-      HIPCHECK(hipHostMalloc((void**)C_h, Nbytes));
-    }
-  } else {
-    if (A_h) {
-      *A_h = (T*)malloc(Nbytes);
-      REQUIRE(*A_h != NULL);
-    }
-
-    if (B_h) {
-      *B_h = (T*)malloc(Nbytes);
-      REQUIRE(*B_h != NULL);
-    }
-
-    if (C_h) {
-      *C_h = (T*)malloc(Nbytes);
-      REQUIRE(*C_h != NULL);
-    }
-  }
-
-  setDefaultData(N, A_h ? *A_h : NULL, B_h ? *B_h : NULL, C_h ? *C_h : NULL);
-  return true;
-}
-
-template <typename T>
-bool initArrays(T** A_d, T** B_d, T** C_d, T** A_h, T** B_h, T** C_h, size_t N,
-                bool usePinnedHost = false) {
-  size_t Nbytes = N * sizeof(T);
-
-  if (A_d) {
-    HIPCHECK(hipMalloc(A_d, Nbytes));
-  }
-  if (B_d) {
-    HIPCHECK(hipMalloc(B_d, Nbytes));
-  }
-  if (C_d) {
-    HIPCHECK(hipMalloc(C_d, Nbytes));
-  }
-
-  return initArraysForHost(A_h, B_h, C_h, N, usePinnedHost);
-}
-
-template <typename T> bool freeArraysForHost(T* A_h, T* B_h, T* C_h, bool usePinnedHost) {
-  if (usePinnedHost) {
-    if (A_h) {
-      HIPCHECK(hipHostFree(A_h));
-    }
-    if (B_h) {
-      HIPCHECK(hipHostFree(B_h));
-    }
-    if (C_h) {
-      HIPCHECK(hipHostFree(C_h));
-    }
-  } else {
-    if (A_h) {
-      free(A_h);
-    }
-    if (B_h) {
-      free(B_h);
-    }
-    if (C_h) {
-      free(C_h);
-    }
-  }
-  return true;
-}
-
-template <typename T>
-bool freeArrays(T* A_d, T* B_d, T* C_d, T* A_h, T* B_h, T* C_h, bool usePinnedHost) {
-  if (A_d) {
-    HIPCHECK(hipFree(A_d));
-  }
-  if (B_d) {
-    HIPCHECK(hipFree(B_d));
-  }
-  if (C_d) {
-    HIPCHECK(hipFree(C_d));
-  }
-
-  return freeArraysForHost(A_h, B_h, C_h, usePinnedHost);
-}
-}  // namespace HipTest
diff --git a/catch/include/hip_test_common.hh b/catch/include/hip_test_common.hh
index a6e07973f..b65444548 100644
--- a/catch/include/hip_test_common.hh
+++ b/catch/include/hip_test_common.hh
@@ -1,16 +1,2 @@
-#pragma once
 #include "hip_test_context.hh"
 #include <catch.hpp>
-
-#define HIP_PRINT_STATUS(status) INFO(hipGetErrorName(status) << " at line: " << __LINE__);
-
-#define HIPCHECK(error)                                                                            \
-  {                                                                                                \
-    hipError_t localError = error;                                                                 \
-    if ((localError != hipSuccess) && (localError != hipErrorPeerAccessAlreadyEnabled)) {          \
-      INFO("Error: " << hipGetErrorString(localError) << " Code: " << localError << " Str: "       \
-                     << #error << " In File: " << __FILE__ << " At line: " << __LINE__);           \
-      REQUIRE(false);                                                                              \
-    }                                                                                              \
-  }
-
diff --git a/catch/include/hip_test_context.hh b/catch/include/hip_test_context.hh
index cd81024aa..6b0100fe1 100644
--- a/catch/include/hip_test_context.hh
+++ b/catch/include/hip_test_context.hh
@@ -34,9 +34,12 @@ static int _log_enable = (std::getenv("HT_LOG_ENABLE") ? 1 : 0);
     }                                                                                              \
   }
 
+
 typedef struct Config_ {
   std::string json_file;              // Json file
   std::string platform;               // amd/nvidia
+  std::vector<std::string> devices;   // gfx906, etc
+  std::vector<std::string> targetId;  // Target Ids, only for AMD, gfx906:sramecc+:xnack-
   std::string os;                     // windows/linux
 } Config;
 
@@ -70,6 +73,8 @@ class TestContext {
   bool isNvidia() const;
   bool isAmd() const;
   bool skipTest() const;
+  const std::vector<std::string>& getDevices() const;
+  const std::vector<std::string>& getTargetId() const;
 
   const std::string& getCurrentTest() const { return current_test; }
   std::string currentPath();
diff --git a/catch/include/hip_test_kernels.hh b/catch/include/hip_test_kernels.hh
deleted file mode 100644
index 7196accd9..000000000
--- a/catch/include/hip_test_kernels.hh
+++ /dev/null
@@ -1,62 +0,0 @@
-#pragma once
-
-#include <hip/hip_runtime.h>
-
-namespace HipTest {
-template <typename T> __global__ void vectorADD(const T* A_d, const T* B_d, T* C_d, size_t NELEM) {
-  size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
-  size_t stride = blockDim.x * gridDim.x;
-
-  for (size_t i = offset; i < NELEM; i += stride) {
-    C_d[i] = A_d[i] + B_d[i];
-  }
-}
-
-
-template <typename T>
-__global__ void vectorADDReverse(const T* A_d, const T* B_d, T* C_d, size_t NELEM) {
-  size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
-  size_t stride = blockDim.x * gridDim.x;
-
-  for (int64_t i = NELEM - stride + offset; i >= 0; i -= stride) {
-    C_d[i] = A_d[i] + B_d[i];
-  }
-}
-
-
-template <typename T> __global__ void addCount(const T* A_d, T* C_d, size_t NELEM, int count) {
-  size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
-  size_t stride = blockDim.x * gridDim.x;
-
-  // Deliberately do this in an inefficient way to increase kernel runtime
-  for (int i = 0; i < count; i++) {
-    for (size_t i = offset; i < NELEM; i += stride) {
-      C_d[i] = A_d[i] + (T)count;
-    }
-  }
-}
-
-
-template <typename T>
-__global__ void addCountReverse(const T* A_d, T* C_d, int64_t NELEM, int count) {
-  size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
-  size_t stride = blockDim.x * gridDim.x;
-
-  // Deliberately do this in an inefficient way to increase kernel runtime
-  for (int i = 0; i < count; i++) {
-    for (int64_t i = NELEM - stride + offset; i >= 0; i -= stride) {
-      C_d[i] = A_d[i] + (T)count;
-    }
-  }
-}
-
-
-template <typename T> __global__ void memsetReverse(T* C_d, T val, int64_t NELEM) {
-  size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
-  size_t stride = blockDim.x * gridDim.x;
-
-  for (int64_t i = NELEM - stride + offset; i >= 0; i -= stride) {
-    C_d[i] = val;
-  }
-}
-}  // namespace HipTest
\ No newline at end of file
diff --git a/catch/multiproc/CMakeLists.txt b/catch/multiproc/CMakeLists.txt
deleted file mode 100644
index a78226238..000000000
--- a/catch/multiproc/CMakeLists.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-# AMD Tests
-set(LINUX_TEST_SRC
-    hipMallocConcurrency.cc
-    childMalloc.cc
-)
-
-if(UNIX)
-    # Create shared lib of all tests
-    add_library(MultiProc SHARED EXCLUDE_FROM_ALL ${LINUX_TEST_SRC})
-
-    # Add dependency on build_tests to build it on this custom target
-    add_dependencies(build_tests MultiProc)
-endif()
diff --git a/catch/multiproc/childMalloc.cc b/catch/multiproc/childMalloc.cc
deleted file mode 100644
index 858fd0878..000000000
--- a/catch/multiproc/childMalloc.cc
+++ /dev/null
@@ -1,62 +0,0 @@
-#include <hip_test_common.hh>
-#include <hip_test_checkers.hh>
-#include <hip_test_kernels.hh>
-
-#ifdef __linux__
-#include <unistd.h>
-#include <stdlib.h>
-#include <sys/wait.h>
-#include <dlfcn.h>
-#endif
-
-
-bool testMallocFromChild() {
-  int fd[2];
-  pid_t childpid;
-  bool testResult = false;
-
-  // create pipe descriptors
-  pipe(fd);
-
-  childpid = fork();
-  if (childpid > 0) {  // Parent
-    close(fd[1]);
-    // parent will wait to read the device cnt
-    read(fd[0], &testResult, sizeof(testResult));
-
-    // close the read-descriptor
-    close(fd[0]);
-
-    // wait for child exit
-    wait(NULL);
-
-    return testResult;
-
-  } else if (!childpid) {  // Child
-    // writing only, no need for read-descriptor
-    close(fd[0]);
-
-    char* A_d = nullptr;
-    hipError_t ret = hipMalloc(&A_d, 1024);
-
-    printf("hipMalloc returned : %s\n", hipGetErrorString(ret));
-    if (ret == hipSuccess)
-      testResult = true;
-    else
-      testResult = false;
-
-    // send the value on the write-descriptor:
-    write(fd[1], &testResult, sizeof(testResult));
-
-    // close the write descriptor:
-    close(fd[1]);
-    exit(0);
-  }
-  return false;
-}
-
-
-TEST_CASE("ChildMalloc") {
-  auto res = testMallocFromChild();
-  REQUIRE(res == true);
-}
diff --git a/catch/multiproc/hipMallocConcurrency.cc b/catch/multiproc/hipMallocConcurrency.cc
deleted file mode 100644
index 72d17c26a..000000000
--- a/catch/multiproc/hipMallocConcurrency.cc
+++ /dev/null
@@ -1,188 +0,0 @@
-#include <sys/types.h>
-#include <hip_test_checkers.hh>
-#include <hip_test_kernels.hh>
-#ifdef __linux__
-#include <sys/wait.h>
-#include <unistd.h>
-#endif
-#include <iostream>
-#include <vector>
-#include <limits>
-#include <atomic>
-
-
-#include <hip_test_common.hh>
-
-size_t N = 4 * 1024 * 1024;
-unsigned blocksPerCU = 6;  // to hide latency
-unsigned threadsPerBlock = 256;
-
-
-unsigned setNumBlocks(unsigned blocksPerCU, unsigned threadsPerBlock, size_t N) {
-  int device;
-  HIPCHECK(hipGetDevice(&device));
-  hipDeviceProp_t props;
-  HIPCHECK(hipGetDeviceProperties(&props, device));
-
-  unsigned blocks = props.multiProcessorCount * blocksPerCU;
-  if (blocks * threadsPerBlock > N) {
-    blocks = (N + threadsPerBlock - 1) / threadsPerBlock;
-  }
-
-  return blocks;
-}
-
-
-/**
- * Validates data consitency on supplied gpu
- */
-bool validateMemoryOnGPU(int gpu, bool concurOnOneGPU = false) {
-  size_t Nbytes = N * sizeof(int);
-  int *A_d, *B_d, *C_d;
-  int *A_h, *B_h, *C_h;
-  size_t prevAvl, prevTot, curAvl, curTot;
-  bool TestPassed = true;
-
-  HIPCHECK(hipSetDevice(gpu));
-  HIPCHECK(hipMemGetInfo(&prevAvl, &prevTot));
-  printf("tgs allocating..\n");
-  HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
-
-  unsigned blocks = setNumBlocks(blocksPerCU, threadsPerBlock, N);
-
-  HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
-  HIPCHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
-
-  hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0,
-                     static_cast<const int*>(A_d), static_cast<const int*>(B_d), C_d, N);
-
-  HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
-
-  if (!HipTest::checkVectorADD(A_h, B_h, C_h, N)) {
-    printf("Validation PASSED for gpu %d from pid %d\n", gpu, getpid());
-  } else {
-    printf("%s : Validation FAILED for gpu %d from pid %d\n", __func__, gpu, getpid());
-    TestPassed &= false;
-  }
-
-  HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
-  HIPCHECK(hipMemGetInfo(&curAvl, &curTot));
-
-  if (!concurOnOneGPU && (prevAvl != curAvl || prevTot != curTot)) {
-    // In concurrent calls on one GPU, we cannot verify leaking in this way
-    printf(
-        "%s : Memory allocation mismatch observed."
-        "Possible memory leak.\n",
-        __func__);
-    TestPassed &= false;
-  }
-
-  return TestPassed;
-}
-
-
-#if 1
-/**
- * Fetches Gpu device count
- */
-void getDeviceCount1(int* pdevCnt) {
-#ifdef __linux__
-  int fd[2], val = 0;
-  pid_t childpid;
-
-  // create pipe descriptors
-  pipe(fd);
-
-  // disable visible_devices env from shell
-  unsetenv("ROCR_VISIBLE_DEVICES");
-  unsetenv("HIP_VISIBLE_DEVICES");
-
-  childpid = fork();
-
-  if (childpid > 0) {  // Parent
-    close(fd[1]);
-    // parent will wait to read the device cnt
-    read(fd[0], &val, sizeof(val));
-
-    // close the read-descriptor
-    close(fd[0]);
-
-    // wait for child exit
-    wait(NULL);
-
-    *pdevCnt = val;
-  } else if (!childpid) {  // Child
-    int devCnt = 1;
-    // writing only, no need for read-descriptor
-    close(fd[0]);
-
-    HIPCHECK(hipGetDeviceCount(&devCnt));
-    // send the value on the write-descriptor:
-    write(fd[1], &devCnt, sizeof(devCnt));
-
-    // close the write descriptor:
-    close(fd[1]);
-    exit(0);
-  } else {  // failure
-    *pdevCnt = 1;
-    return;
-  }
-
-#else
-  HIPCHECK(hipGetDeviceCount(pdevCnt));
-#endif
-}
-#endif
-
-
-TEST_CASE("hipMallocChild_Concurrency_MultiGpu") {
-  bool TestPassed = false;
-#ifdef __linux__
-  // Parallel execution on multiple gpus from different child processes
-  int devCnt = 1, pid = 0;
-
-  // Get GPU count
-  getDeviceCount1(&devCnt);
-
-  // Spawn child for each GPU
-  for (int gpu = 0; gpu < devCnt; gpu++) {
-    if ((pid = fork()) < 0) {
-      INFO("Child_Concurrency_MultiGpu : fork() returned error" << pid);
-      REQUIRE(false);
-
-    } else if (!pid) {  // Child process
-      bool TestPassedChild = false;
-      TestPassedChild = validateMemoryOnGPU(gpu);
-
-      if (TestPassedChild) {
-        printf("returning exit(1) for success\n");
-        exit(1);  // child exit with success status
-      } else {
-        printf("Child_Concurrency_MultiGpu : childpid %d failed\n", getpid());
-        exit(2);  // child exit with failure status
-      }
-    }
-  }
-
-  // Parent shall wait for child to complete
-  int cnt = 0;
-
-  for (int i = 0; i < devCnt; i++) {
-    int pidwait = 0, exitStatus;
-    pidwait = wait(&exitStatus);
-
-    printf("exitStatus for iter:%d is %d\n", i, exitStatus);
-    if (pidwait < 0) {
-      break;
-    }
-
-    if (WEXITSTATUS(exitStatus) == 1) cnt++;
-  }
-
-  if (cnt && (cnt == devCnt)) TestPassed = true;
-
-#else
-  INFO("Test hipMallocChild_Concurrency_MultiGpu skipped on non-linux");
-#endif
-  REQUIRE(TestPassed == true);
-}
diff --git a/catch/unit/CMakeLists.txt b/catch/unit/CMakeLists.txt
index cff55f37a..d913d25a5 100644
--- a/catch/unit/CMakeLists.txt
+++ b/catch/unit/CMakeLists.txt
@@ -1,5 +1,4 @@
 add_subdirectory(memory)
 add_subdirectory(deviceLib)
 add_subdirectory(kernels)
-# Disable Saxpy test temporarily to see if CI Passes
-# add_subdirectory(rtc)
+add_subdirectory(rtc)
diff --git a/catch/unit/deviceLib/CMakeLists.txt b/catch/unit/deviceLib/CMakeLists.txt
index 22de79c68..421261b51 100644
--- a/catch/unit/deviceLib/CMakeLists.txt
+++ b/catch/unit/deviceLib/CMakeLists.txt
@@ -1,18 +1,9 @@
 # Common Tests - Test independent of all platforms
 set(TEST_SRC
     floatMath.cc
-)
-
-# AMD only tests
-set(AMD_TEST_SRC
     vectorTypesDevice.cc
 )
 
-if(HIP_PLATFORM MATCHES "amd")
-    set(TEST_SRC ${TEST_SRC} ${AMD_TEST_SRC})
-endif()
-
-
 # Create shared lib of all tests
 add_library(DeviceLibs SHARED EXCLUDE_FROM_ALL ${TEST_SRC})
 
diff --git a/catch/unit/kernels/add.cc b/catch/unit/kernels/add.cc
index 4f70ffef7..7adfde51d 100644
--- a/catch/unit/kernels/add.cc
+++ b/catch/unit/kernels/add.cc
@@ -2,11 +2,12 @@
 #include <iostream>
 
 template <typename T> __global__ void add(T* a, T* b, T* c, size_t size) {
-  size_t i = threadIdx.x;
-  if (i < size) c[i] = a[i] + b[i];
+  int i = threadIdx.x;
+  c[i] = a[i] + b[i];
 }
 
 TEMPLATE_TEST_CASE("Add Kernel", "[kernel][add]", int, long, float, long long, double) {
+  auto addKernel = add<TestType>;
   auto size = GENERATE(as<size_t>{}, 100, 500, 1000);
   TestType *d_a, *d_b, *d_c;
   auto res = hipMalloc(&d_a, sizeof(TestType) * size);
@@ -17,7 +18,7 @@ TEMPLATE_TEST_CASE("Add Kernel", "[kernel][add]", int, long, float, long long, d
   REQUIRE(res == hipSuccess);
 
   std::vector<TestType> a, b, c;
-  for (size_t i = 0; i < size; i++) {
+  for (int i = 0; i < size; i++) {
     a.push_back(i + 1);
     b.push_back(i + 1);
     c.push_back(2 * (i + 1));
@@ -28,7 +29,7 @@ TEMPLATE_TEST_CASE("Add Kernel", "[kernel][add]", int, long, float, long long, d
   res = hipMemcpy(d_b, b.data(), sizeof(TestType) * size, hipMemcpyHostToDevice);
   REQUIRE(res == hipSuccess);
 
-  hipLaunchKernelGGL(add<TestType>, 1, size, 0, 0, d_a, d_b, d_c, size);
+  hipLaunchKernelGGL(addKernel, 1, size, 0, 0, d_a, d_b, d_c, size);
 
   res = hipMemcpy(a.data(), d_c, sizeof(TestType) * size, hipMemcpyDeviceToHost);
   REQUIRE(res == hipSuccess);
diff --git a/catch/unit/rtc/CMakeLists.txt b/catch/unit/rtc/CMakeLists.txt
index 062e4153c..435d372fb 100644
--- a/catch/unit/rtc/CMakeLists.txt
+++ b/catch/unit/rtc/CMakeLists.txt
@@ -1,12 +1,14 @@
-# AMD Tests
-set(AMD_TEST_SRC
+# Common Tests - Test independent of all platforms
+set(TEST_SRC
     saxpy.cc
 )
 
-if(HIP_PLATFORM MATCHES "amd")
-    # Create shared lib of all tests
-    add_library(RTC SHARED EXCLUDE_FROM_ALL ${AMD_TEST_SRC})
+# Set source File properties
+set_source_files_properties(saxpy.cc PROPERTIES COMPILE_FLAGS " -std=c++14 ")
+set_source_files_properties(test.cc PROPERTIES COMPILE_FLAGS " -std=c++17 ")
 
-    # Add dependency on build_tests to build it on this custom target
-    add_dependencies(build_tests RTC)
-endif()
+# Create shared lib of all tests
+add_library(RTC SHARED EXCLUDE_FROM_ALL ${TEST_SRC})
+
+# Add dependency on build_tests to build it on this custom target
+add_dependencies(build_tests RTC)
diff --git a/catch/unit/rtc/saxpy.cc b/catch/unit/rtc/saxpy.cc
index af7ca24a2..186349ae0 100644
--- a/catch/unit/rtc/saxpy.cc
+++ b/catch/unit/rtc/saxpy.cc
@@ -15,7 +15,7 @@ static constexpr auto NUM_THREADS{128};
 static constexpr auto NUM_BLOCKS{32};
 
 static constexpr auto saxpy{
-R"(
+    R"(
 #include <hip/hip_runtime.h>
 extern "C"
 __global__
@@ -23,7 +23,8 @@ void saxpy(float a, float* x, float* y, float* out, size_t n)
 {
     size_t tid = blockIdx.x * blockDim.x + threadIdx.x;
         if (tid < n) {
-               out[tid] = a * x[tid] + y[tid];
+               out[tid] = a * x[tid] + y[tid] ;
+                   
         }
         
 }
@@ -71,42 +72,42 @@ TEST_CASE("saxpy", "[hiprtc][saxpy]") {
   unique_ptr<float[]> hX{new float[n]};
   unique_ptr<float[]> hY{new float[n]};
   unique_ptr<float[]> hOut{new float[n]};
-  for (size_t i = 0; i < n; ++i) {
-    hX[i] = static_cast<float>(i);
-    hY[i] = static_cast<float>(i * 2);
-  }
-
-  hipDeviceptr_t dX, dY, dOut;
-  hipMalloc(&dX, bufferSize);
-  hipMalloc(&dY, bufferSize);
-  hipMalloc(&dOut, bufferSize);
-  hipMemcpyHtoD(dX, hX.get(), bufferSize);
-  hipMemcpyHtoD(dY, hY.get(), bufferSize);
-
-  struct {
-    float a_;
-    hipDeviceptr_t b_;
-    hipDeviceptr_t c_;
-    hipDeviceptr_t d_;
-    size_t e_;
-  } args{a, dX, dY, dOut, n};
-
-  auto size = sizeof(args);
-  void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
-                    HIP_LAUNCH_PARAM_END};
-
-  hipModuleLaunchKernel(kernel, NUM_BLOCKS, 1, 1, NUM_THREADS, 1, 1, 0, nullptr, nullptr, config);
-  hipMemcpyDtoH(hOut.get(), dOut, bufferSize);
-
-  for (size_t i = 0; i < n; ++i) {
-    INFO("For " << i << " Value: " << fabs(a * hX[i] + hY[i] - hOut[i])
-                << " with: " << (fabs(hOut[i] * 1.0f) * 1e-6));
-    REQUIRE(fabs(a * hX[i] + hY[i] - hOut[i]) <= fabs(hOut[i]) * 1e-6);
-  }
-
-  hipFree(dX);
-  hipFree(dY);
-  hipFree(dOut);
-
-  hipModuleUnload(module);
+for (size_t i = 0; i < n; ++i) {
+        hX[i] = static_cast<float>(i);
+        hY[i] = static_cast<float>(i * 2);
+    }
+
+    hipDeviceptr_t dX, dY, dOut;
+    hipMalloc(&dX, bufferSize);
+    hipMalloc(&dY, bufferSize);
+    hipMalloc(&dOut, bufferSize);
+    hipMemcpyHtoD(dX, hX.get(), bufferSize);
+    hipMemcpyHtoD(dY, hY.get(), bufferSize);
+
+    struct {
+        float a_;
+        hipDeviceptr_t b_;
+        hipDeviceptr_t c_;
+        hipDeviceptr_t d_;
+        size_t e_;
+    } args{a, dX, dY, dOut, n};
+
+    auto size = sizeof(args);
+    void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args,
+                      HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
+                      HIP_LAUNCH_PARAM_END};
+
+    hipModuleLaunchKernel(kernel, NUM_BLOCKS, 1, 1, NUM_THREADS, 1, 1,
+                          0, nullptr, nullptr, config);
+    hipMemcpyDtoH(hOut.get(), dOut, bufferSize);
+
+    for (size_t i = 0; i < n; ++i) {
+      REQUIRE(fabs(a * hX[i] + hY[i] - hOut[i]) > fabs(hOut[i]) * 1e-6);
+    }
+
+    hipFree(dX);
+    hipFree(dY);
+    hipFree(dOut);
+
+    hipModuleUnload(module);
 }
diff --git a/catch/unit/rtc/test.cc b/catch/unit/rtc/test.cc
new file mode 100644
index 000000000..3b1261045
--- /dev/null
+++ b/catch/unit/rtc/test.cc
@@ -0,0 +1,6 @@
+#include <hip_test_common.hh>
+
+TEST_CASE("cpp17 test") {
+  constexpr auto l = []() { return 2 * 10 * 30; };
+  REQUIRE(l() == 600);
+}
diff --git a/perftests/memory/hipPerfMemFill.cpp b/perftests/memory/hipPerfMemFill.cpp
deleted file mode 100644
index 1570c8430..000000000
--- a/perftests/memory/hipPerfMemFill.cpp
+++ /dev/null
@@ -1,526 +0,0 @@
-/*
- Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved.
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
- */
-
-/* HIT_START
- * BUILD: %t %s ../../src/test_common.cpp EXCLUDE_HIP_PLATFORM nvidia
- * TEST: %t
- * HIT_END
- */
-
-#include "test_common.h"
-#include <printf/printf_common.h>
-#include <iostream>
-#include <chrono>
-#include <sys/time.h>
-
-#define SIMPLY_ASSIGN 0
-#define USE_HIPTEST_SETNUMBLOCKS 0
-
-using namespace std;
-
-template<class T>
-__global__ void vec_fill(T *x, T coef, int N) {
-  const int istart = threadIdx.x + blockIdx.x * blockDim.x;
-  const int ishift = blockDim.x * gridDim.x;
-  for (int i = istart; i < N; i += ishift) {
-#if SIMPLY_ASSIGN
-    x[i] = coef;
-#else
-    x[i] = coef * i;
-#endif
-  }
-}
-
-__device__ void print_log(int i, double value, double expected) {
-  printf("failed at %d: val=%g, expected=%g\n", i, value, expected);
-}
-
-__device__ void print_log(int i, int value, int expected) {
-  printf("failed at %d: val=%d, expected=%d\n", i, value, expected);
-}
-
-template<class T>
-__global__ void vec_verify(T *x, T coef, int N) {
-  const int istart = threadIdx.x + blockIdx.x * blockDim.x;
-  const int ishift = blockDim.x * gridDim.x;
-  for (int i = istart; i < N; i += ishift) {
-#if SIMPLY_ASSIGN
-    if(x[i] != coef) {
-      print_log(i, x[i], coef);
-    }
-#else
-    if(x[i] != coef * i) {
-      print_log(i, x[i], coef * i);
-    }
-#endif
-  }
-}
-
-template<class T>
-__global__ void daxpy(T *__restrict__ x, T *__restrict__ y,
-    const T coef, int Niter, int N) {
-  const int istart = threadIdx.x + blockIdx.x * blockDim.x;
-  const int ishift = blockDim.x * gridDim.x;
-  for (int iter = 0; iter < Niter; ++iter) {
-    T iv = coef * iter;
-    for (int i = istart; i < N; i += ishift)
-    y[i] = iv * x[i] + y[i];
-  }
-}
-
-template<class T>
-class hipPerfMemFill {
- private:
-  static constexpr int NUM_START = 27;
-  static constexpr int NUM_SIZE = 5;
-  static constexpr int NUM_ITER = 10;
-  size_t totalSizes_[NUM_SIZE];
-  hipDeviceProp_t props_;
-  const T coef_ = getCoefficient(3.14159);
-  const unsigned int blocksPerCU_;
-  const unsigned int threadsPerBlock_;
-
- public:
-  hipPerfMemFill(unsigned int blocksPerCU, unsigned int threadsPerBlock) :
-    blocksPerCU_(blocksPerCU), threadsPerBlock_(threadsPerBlock) {
-    for (int i = 0; i < NUM_SIZE; i++) {
-      totalSizes_[i] = 1ull << (i + NUM_START); // 128M, 256M, 512M, 1024M, 2048M
-    }
-  }
-
-  ~hipPerfMemFill() {
-  }
-
-  bool supportLargeBar() {
-    return props_.isLargeBar != 0;
-  }
-
-  bool supportManagedMemory() {
-    return props_.managedMemory != 0;
-  }
-
-  const T getCoefficient(double val) {
-    return static_cast<T>(val);
-  }
-
-  void setHostBuffer(T *A, T val, size_t size) {
-    size_t len = size / sizeof(T);
-    for (int i = 0; i < len; i++) {
-      A[i] = val;
-    }
-  }
-
-  void open(int deviceId) {
-    int nGpu = 0;
-    HIPCHECK(hipGetDeviceCount(&nGpu));
-    if (nGpu < 1) {
-      cout << "Info: didn't find any GPU! skipping the test!\n";
-      passed();
-    } else if (deviceId >= nGpu) {
-      failed("Info: wrong GPU Id %d\n", deviceId);
-    }
-
-    HIPCHECK(hipSetDevice(deviceId));
-    HIPCHECK(hipGetDeviceProperties(&props_, deviceId));
-    std::cout << "Info: running on device: id: " << deviceId << ", bus: 0x"
-        << props_.pciBusID << " " << props_.name << " with "
-        << props_.multiProcessorCount << " CUs, large bar: "
-        << supportLargeBar() << ", managed memory: " << supportManagedMemory()
-        << ", DeviceMallocFinegrained: " << supportDeviceMallocFinegrained()
-        << std::endl;
-  }
-
-  void log_host(const char* title, double GBytes, double sec) {
-    cout << title << " [" << setw(7) << GBytes << " GB]: cost " << setw(10) << sec
-        << " s in bandwidth " << setw(10) << GBytes / sec << " [GB/s]" << endl;
-  }
-
-  void log_kernel(const char* title, double GBytes, double sec, double sec_hv, double sec_kv) {
-    cout << title << " [" << setw(7) << GBytes << " GB]: cost " << setw(10) << sec
-        << " s in bandwidth " << setw(10) << GBytes / sec << " [GB/s]" << ", hostVerify cost "
-        << setw(10) << sec_hv << " s in bandwidth " << setw(10) << GBytes / sec_hv << " [GB/s]"
-        << ", kernelVerify cost "<< setw(10) << sec_kv << " s in bandwidth " << setw(10)
-        << GBytes / sec_kv << " [GB/s]" << endl;
-  }
-
-  void hostFill(size_t size, T *data, T coef, double &sec) {
-    size_t num = size / sizeof(T);  // Size of elements
-    auto start = chrono::steady_clock::now();
-    for (int i = 0; i < num; ++i) {
-#if SIMPLY_ASSIGN
-      data[i] = coef;
-#else
-      data[i] = coef * i;
-#endif
-    }
-    auto end = chrono::steady_clock::now();
-    chrono::duration<double> diff = end - start;  // in second
-    sec = diff.count();
-  }
-
-  void kernelFill(size_t size, T *data, T coef, double &sec) {
-    size_t num = size / sizeof(T);  // Size of elements
-    unsigned blocks = setNumBlocks(num);
-
-    hipLaunchKernelGGL(HIP_KERNEL_NAME(vec_fill<T>), dim3(blocks),
-                           dim3(threadsPerBlock), 0, 0, data, 0, num);  // kernel will be loaded first time
-    HIPCHECK(hipDeviceSynchronize());
-
-    auto start = chrono::steady_clock::now();
-
-    for (int iter = 0; iter < NUM_ITER; ++iter) {
-      hipLaunchKernelGGL(HIP_KERNEL_NAME(vec_fill<T>), dim3(blocks),
-                             dim3(threadsPerBlock), 0, 0, data, coef, num);
-    }
-    HIPCHECK(hipDeviceSynchronize());
-
-    auto end = chrono::steady_clock::now();
-    chrono::duration<double> diff = end - start;  // in second
-    sec = diff.count() / NUM_ITER;  // in second
-  }
-
-  void hostVerify(size_t size, T *data, T coef, double &sec) {
-    size_t num = size / sizeof(T);  // Size of elements
-    auto start = chrono::steady_clock::now();
-    for (int i = 0; i < num; ++i) {
-#if SIMPLY_ASSIGN
-      if(data[i] != coef) {
-        cout << "hostVerify failed: i=" << i << ", data[i]=" << data[i] << ", expected=" << coef << endl;
-        failed("failed\n");
-      }
-#else
-      if(data[i] != coef * i) {
-        cout << "hostVerify failed: i=" << i << ", data[i]=" << data[i] << ", expected=" << coef * i << endl;
-        failed("failed\n");
-      }
-#endif
-    }
-    auto end = chrono::steady_clock::now();
-    chrono::duration<double> diff = end - start;  // in second
-    sec = diff.count();
-  }
-
-  void kernelVerify(size_t size, T *data, T coef, double &sec) {
-    size_t num = size / sizeof(T);  // Size of elements
-    unsigned blocks = setNumBlocks(num);
-
-    CaptureStream *capture = new CaptureStream(stdout);
-    capture->Begin();
-
-    hipLaunchKernelGGL(HIP_KERNEL_NAME(vec_verify<T>), dim3(blocks),
-                       dim3(threadsPerBlock), 0, 0, data, coef, num);  // kernel will be loaded first time
-    HIPCHECK(hipDeviceSynchronize());
-
-    capture->End();
-    capture->Truncate(1000); // Don't want too long log if existing
-    std::string device_output = capture->getData();
-    delete capture;
-    if (device_output.length() > 0) {
-      failed("kernelVerify failed:\n%s\n", device_output.c_str());
-    }
-
-    // Now all data verified. The following is to test bandwidth.
-    auto start = chrono::steady_clock::now();
-
-    for (int iter = 0; iter < NUM_ITER; ++iter) {
-      hipLaunchKernelGGL(HIP_KERNEL_NAME(vec_verify<T>), dim3(blocks),
-                             dim3(threadsPerBlock), 0, 0, data, coef, num);
-    }
-    HIPCHECK(hipDeviceSynchronize());
-
-    auto end = chrono::steady_clock::now();
-    chrono::duration<double> diff = end - start;  // in second
-    sec = diff.count() / NUM_ITER;  // in second
-  }
-
-  bool testLargeBarDeviceMemoryHostFill(size_t size) {
-    if (!supportLargeBar()) {
-      return false;
-    }
-
-    double GBytes = (double) size / (1024.0 * 1024.0 * 1024.0);
-
-    T *A;
-    HIPCHECK(hipMalloc(&A, size));
-    double sec = 0;
-    hostFill(size, A, coef_, sec);  // Cpu can access device mem in LB
-    HIPCHECK(hipFree(A));
-
-    log_host("Largebar: host   fill", GBytes, sec);
-    return true;
-  }
-
-  bool testLargeBar() {
-    if (!supportLargeBar()) {
-      return false;
-    }
-
-    cout << "Test large bar device memory host filling" << endl;
-    for (int i = 0; i < NUM_SIZE; i++) {
-      if (!testLargeBarDeviceMemoryHostFill(totalSizes_[i])) {
-        return false;
-      }
-    }
-
-    return true;
-  }
-
-  bool testManagedMemoryHostFill(size_t size) {
-    if (!supportManagedMemory()) {
-      return false;
-    }
-    double GBytes = (double) size / (1024.0 * 1024.0 * 1024.0);
-
-    T *A;
-    HIPCHECK(hipMallocManaged(&A, size));
-    double sec = 0;
-    hostFill(size, A, coef_, sec);  // Cpu can access HMM mem
-    HIPCHECK(hipFree(A));
-
-    log_host("Managed: host   fill", GBytes, sec);
-    return true;
-  }
-
-  bool testManagedMemoryKernelFill(size_t size) {
-    if (!supportManagedMemory()) {
-      return false;
-    }
-    double GBytes = (double) size / (1024.0 * 1024.0 * 1024.0);
-
-    T *A;
-    HIPCHECK(hipMallocManaged(&A, size));
-
-    double sec = 0, sec_hv = 0, sec_kv = 0;
-    kernelFill(size, A, coef_, sec);
-    hostVerify(size, A, coef_, sec_hv);  // Managed memory can be verified by host
-    kernelVerify(size, A, coef_, sec_kv);
-    HIPCHECK(hipFree(A));
-
-    log_kernel("Managed: kernel fill", GBytes, sec, sec_hv, sec_kv);
-
-    return true;
-  }
-
-  bool testManagedMemory() {
-    if (!supportManagedMemory()) {
-      return false;
-    }
-
-    cout << "Test managed memory host filling" << endl;
-    for (int i = 0; i < NUM_SIZE; i++) {
-      if (!testManagedMemoryHostFill(totalSizes_[i])) {
-        return false;
-      }
-    }
-
-    cout << "Test managed memory kernel filling" << endl;
-    for (int i = 0; i < NUM_SIZE; i++) {
-      if (!testManagedMemoryKernelFill(totalSizes_[i])) {
-        return false;
-      }
-    }
-
-    return true;
-  }
-
-  bool testHostMemoryHostFill(size_t size, unsigned int flags) {
-    double GBytes = (double) size / (1024.0 * 1024.0 * 1024.0);
-    T *A;
-    HIPCHECK(hipHostMalloc(&A, size, flags));
-    double sec = 0;
-    hostFill(size, A, coef_, sec);
-    HIPCHECK(hipHostFree(A));
-
-    log_host("Host: host   fill", GBytes, sec);
-    return true;
-  }
-
-  bool testHostMemoryKernelFill(size_t size, unsigned int flags) {
-    double GBytes = (double) size / (1024.0 * 1024.0 * 1024.0);
-
-    T *A;
-    HIPCHECK(hipHostMalloc((void** ) &A, size, flags));
-    double sec = 0, sec_hv = 0, sec_kv = 0;
-    kernelFill(size, A, coef_, sec);
-    hostVerify(size, A, coef_, sec_hv);
-    kernelVerify(size, A, coef_, sec_kv);
-    HIPCHECK(hipHostFree(A));
-
-    log_kernel("Host: kernel fill", GBytes, sec, sec_hv, sec_kv);
-    return true;
-  }
-
-  bool testHostMemory() {
-    cout << "Test coherent host memory host filling" << endl;
-    for (int i = 0; i < NUM_SIZE; i++) {
-      if (!testHostMemoryHostFill(totalSizes_[i], hipHostMallocCoherent)) {
-        return false;
-      }
-    }
-
-    cout << "Test non-coherent host memory host filling" << endl;
-    for (int i = 0; i < NUM_SIZE; i++) {
-      if (!testHostMemoryHostFill(totalSizes_[i], hipHostMallocNonCoherent)) {
-        return false;
-      }
-    }
-
-    cout << "Test coherent host memory kernel filling" << endl;
-    for (int i = 0; i < NUM_SIZE; i++) {
-      if (!testHostMemoryKernelFill(totalSizes_[i], hipHostMallocCoherent)) {
-        return false;
-      }
-    }
-
-    cout << "Test non-coherent host memory kernel filling" << endl;
-    for (int i = 0; i < NUM_SIZE; i++) {
-      if (!testHostMemoryKernelFill(totalSizes_[i], hipHostMallocNonCoherent)) {
-        return false;
-      }
-    }
-
-    return true;
-  }
-
-  /* This fuction should be via device attribute query*/
-  bool supportDeviceMallocFinegrained() {
-    T *A = nullptr;
-    hipExtMallocWithFlags((void **)&A, sizeof(T), hipDeviceMallocFinegrained);
-    if (!A) {
-      return false;
-    }
-    HIPCHECK(hipFree(A));
-    return true;
-  }
-
-  unsigned int setNumBlocks(size_t size) {
-    size_t num = size/sizeof(T);
-
-#if USE_HIPTEST_SETNUMBLOCKS
-    return HipTest::setNumBlocks(blocksPerCU_, threadsPerBlock_,
-                                 num);
-#else
-    return (num + threadsPerBlock_ - 1) / threadsPerBlock_;
-#endif
-  }
-
-  bool testExtDeviceMemoryHostFill(size_t size, unsigned int flags) {
-    double GBytes = (double) size / (1024.0 * 1024.0 * 1024.0);
-
-    T *A = nullptr;
-    HIPCHECK(hipExtMallocWithFlags((void **)&A, size, flags));
-    if (!A) {
-      cout << "failed hipExtMallocWithFlags() with size =" << size << " flags="
-           << std::hex << flags << endl;
-      return false;
-    }
-
-    double sec = 0;
-    hostFill(size, A, coef_, sec);  // Cpu can access this mem
-    HIPCHECK(hipFree(A));
-
-    log_host("ExtDevice: host   fill", GBytes, sec);
-    return true;
-  }
-
-  bool testExtDeviceMemoryKernelFill(size_t size, unsigned int flags) {
-    double GBytes = (double) size / (1024.0 * 1024.0 * 1024.0);
-
-    T *A = nullptr;
-    HIPCHECK(hipExtMallocWithFlags((void **)&A, size, flags));
-    if (!A) {
-      cout << "failed hipExtMallocWithFlags() with size =" << size << " flags="
-           << std::hex << flags << endl;
-      return false;
-    }
-
-    double sec = 0, sec_hv = 0, sec_kv = 0;
-    kernelFill(size, A, coef_, sec);
-    hostVerify(size, A, coef_, sec_hv);  // Fine grained device memory can be verified by host
-    kernelVerify(size, A, coef_, sec_kv);
-    HIPCHECK(hipFree(A));
-
-    log_kernel("ExtDevice: kernel fill", GBytes, sec, sec_hv, sec_kv);
-
-    return true;
-  }
-
-  bool testExtDeviceMemory() {
-    cout << "Test fine grained device memory host filling"
-        << endl;
-    for (int i = 0; i < NUM_SIZE; i++) {
-      if (!testExtDeviceMemoryHostFill(totalSizes_[i],
-                                       hipDeviceMallocFinegrained)) {
-        return false;
-      }
-    }
-
-    cout << "Test fine grained device memory kernel filling"
-        << endl;
-    for (int i = 0; i < NUM_SIZE; i++) {
-      if (!testExtDeviceMemoryKernelFill(totalSizes_[i],
-                                         hipDeviceMallocFinegrained)) {
-        return false;
-      }
-    }
-
-    return true;
-  }
-
-  bool run() {
-    if (supportLargeBar()) {
-      if (!testLargeBar()) {
-        return false;
-      }
-    }
-
-    if (supportManagedMemory()) {
-      if (!testManagedMemory()) {
-        return false;
-      }
-    }
-
-    if (!testHostMemory()) {
-      return false;
-    }
-
-    if (supportDeviceMallocFinegrained()) {
-      if (!testExtDeviceMemory()) {
-        return false;
-      }
-    }
-    return true;
-  }
-
-};
-
-int main(int argc, char *argv[]) {
-  HipTest::parseStandardArguments(argc, argv, true); // For ::p_gpuDevice, ::blocksPerCU, ::threadsPerBlock
-  cout << "Test int" << endl;
-  hipPerfMemFill<int> hipPerfMemFillInt(::blocksPerCU, ::threadsPerBlock);
-  hipPerfMemFillInt.open(::p_gpuDevice);
-  HIPASSERT(hipPerfMemFillInt.run());
-
-  cout << "Test double" << endl;
-  hipPerfMemFill<double> hipPerfMemFillDouble(::blocksPerCU, ::threadsPerBlock);
-  hipPerfMemFillDouble.open(::p_gpuDevice);
-  HIPASSERT(hipPerfMemFillDouble.run());
-
-  passed();
-}
diff --git a/samples/2_Cookbook/15_static_library/host_functions/Makefile b/samples/2_Cookbook/15_static_library/host_functions/Makefile
index 4945075ca..2bbc26727 100644
--- a/samples/2_Cookbook/15_static_library/host_functions/Makefile
+++ b/samples/2_Cookbook/15_static_library/host_functions/Makefile
@@ -25,7 +25,7 @@ $(HIPCC_EXE): $(EMIT_STATIC_LIB)
 
 # Compiles hipMain1 with g++ and links with libHipOptLibrary.a which contains host function.
 $(HOST_EXE): $(EMIT_STATIC_LIB)
-	$(GXX) $(EMIT_STATIC_MAIN_SRC) -L. -lHipOptLibrary -L$(HIP_PATH)/lib -lamdhip64 -Wl,-rpath=$(HIP_PATH)/lib -o $@
+	$(GXX) $(EMIT_STATIC_MAIN_SRC) -L. -lHipOptLibrary -L$(HIP_PATH)/lib -lamdhip64 -o $@
 
 test: $(HIPCC_EXE) $(HOST_EXE)
 	$(HIPCC_EXE)

From 33248ef08a6351aa4a5115e77fa8ebd5f703e45a Mon Sep 17 00:00:00 2001
From: Rahul Garg <Rahul.Garg@amd.com>
Date: Wed, 12 Oct 2022 06:06:15 +0000
Subject: [PATCH 2/9] SWDEV-355313 - Move catch tests and samples

Move catch tests and samples from hip to hip-tests

Change-Id: Id7bd6dd708936a33d3901bada1c3577325f0a955
---
 LICENSE.txt | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 LICENSE.txt

diff --git a/LICENSE.txt b/LICENSE.txt
new file mode 100644
index 000000000..4cbb63923
--- /dev/null
+++ b/LICENSE.txt
@@ -0,0 +1,20 @@
+Copyright (c) 2008 - 2022 Advanced Micro Devices, Inc.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+

From 1cd34206e66c5cb936daba64160a09647ff69b5f Mon Sep 17 00:00:00 2001
From: Rahul Garg <Rahul.Garg@amd.com>
Date: Fri, 9 Dec 2022 21:11:43 +0000
Subject: [PATCH 3/9] SWDEV-355313 - Merge remote-tracking branch
 'hip/amd-master-next' into amd-mainline

Change-Id: I96c31a18708f7aa175117e627dd2e054c3580f4c
---
 .clang-format | 10 ++++++++++
 .gitignore    | 17 +++++++++++++++++
 2 files changed, 27 insertions(+)
 create mode 100644 .clang-format
 create mode 100644 .gitignore

diff --git a/.clang-format b/.clang-format
new file mode 100644
index 000000000..5572a72cd
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,10 @@
+Language: Cpp
+BasedOnStyle: Google
+AlignEscapedNewlinesLeft: false
+AlignOperands: false
+ColumnLimit: 100
+AlwaysBreakTemplateDeclarations: false
+DerivePointerAlignment: false
+IndentFunctionDeclarationAfterType: false
+MaxEmptyLinesToKeep: 2
+SortIncludes: false
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 000000000..f3f605803
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,17 @@
+.*
+!.gitignore
+*.o
+*.exe
+*.swp
+lib
+packages
+build
+tags
+samples/0_Intro/module_api/runKernel.hip.out
+samples/0_Intro/module_api/vcpy_isa.code
+samples/0_Intro/module_api/vcpy_isa.hsaco
+samples/0_Intro/module_api/vcpy_kernel.co
+samples/0_Intro/module_api/vcpy_kernel.code
+samples/1_Utils/hipInfo/hipInfo
+samples/1_Utils/hipBusBandwidth/hipBusBandwidth
+samples/1_Utils/hipDispatchLatency/hipDispatchLatency

From bc9d57895a4dbdfa2feb841465352b644d21e1f5 Mon Sep 17 00:00:00 2001
From: Siu Chi Chan <siuchi.chan@amd.com>
Date: Fri, 20 Jan 2023 20:22:13 +0000
Subject: [PATCH 4/9] SWDEV-384035 - Link tests to required libraries (#124)

A lot of the unit tests make use of the C++ thread library but the
cmakefile doesn't explicitly link them to the pthread library but
instead rely on hipcc to implicitly link in the pthread library.
Some tests that rely on librt have a similar issue.  The tests
break when we are cleaning up hipcc by removing the implcit linking
to those libraries.

Change-Id: Ic69287273a147fdcd13cf18f09a5fa9818bf221b
---
 catch/external/Catch2/cmake/Catch2/Catch.cmake | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/catch/external/Catch2/cmake/Catch2/Catch.cmake b/catch/external/Catch2/cmake/Catch2/Catch.cmake
index 868ccfa73..8aab30f44 100644
--- a/catch/external/Catch2/cmake/Catch2/Catch.cmake
+++ b/catch/external/Catch2/cmake/Catch2/Catch.cmake
@@ -217,6 +217,8 @@ function(hip_add_exe_to_target)
   if(UNIX)
     set(_LINKER_LIBS ${_LINKER_LIBS} stdc++fs)
     set(_LINKER_LIBS ${_LINKER_LIBS} -ldl)
+    set(_LINKER_LIBS ${_LINKER_LIBS} pthread)
+    set(_LINKER_LIBS ${_LINKER_LIBS} rt)
   else()
     # res files are built resource files using rc files.
     # use llvm-rc exe to build the res files

From 5e3ed90344d00448dd895d09ba8a2afaa977ab4e Mon Sep 17 00:00:00 2001
From: Maneesh Gupta <maneesh.gupta@amd.com>
Date: Tue, 21 Feb 2023 07:05:32 +0000
Subject: [PATCH 5/9] SWDEV-383703 - Fix hipDeviceSetSharedMemConfig tests

Change-Id: I9da97d15eaa55266dee2d25cbfa8410e617396ae
---
 catch/unit/device/hipDeviceSetGetSharedMemConfig.cc | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/catch/unit/device/hipDeviceSetGetSharedMemConfig.cc b/catch/unit/device/hipDeviceSetGetSharedMemConfig.cc
index 045f40092..572f4b06d 100644
--- a/catch/unit/device/hipDeviceSetGetSharedMemConfig.cc
+++ b/catch/unit/device/hipDeviceSetGetSharedMemConfig.cc
@@ -35,17 +35,13 @@ TEST_CASE("Unit_hipDeviceSetSharedMemConfig_Positive_Basic") {
   HIP_CHECK(hipSetDevice(device));
   INFO("Current device is " << device);
 
-#if HT_AMD
-  HIP_CHECK_ERROR(hipDeviceSetSharedMemConfig(mem_config), hipErrorNotSupported);
-#elif HT_NVIDIA
   HIP_CHECK(hipDeviceSetSharedMemConfig(mem_config));
-#endif
 }
 
 TEST_CASE("Unit_hipDeviceSetSharedMemConfig_Negative_Parameters") {
 #if HT_AMD
   HIP_CHECK_ERROR(hipDeviceSetSharedMemConfig(static_cast<hipSharedMemConfig>(-1)),
-                  hipErrorNotSupported);
+                  hipSuccess);
 #elif HT_NVIDIA
   HIP_CHECK_ERROR(hipDeviceSetSharedMemConfig(static_cast<hipSharedMemConfig>(-1)),
                   hipErrorInvalidValue);
@@ -118,4 +114,4 @@ TEST_CASE("Unit_hipDeviceGetSharedMemConfig_Positive_Threaded") {
 
 TEST_CASE("Unit_hipDeviceGetSharedMemConfig_Negative_Parameters") {
   HIP_CHECK_ERROR(hipDeviceGetSharedMemConfig(nullptr), hipErrorInvalidValue);
-}
\ No newline at end of file
+}

From 5ecbdaf9849e4491cd0584c64bc0652113b7e981 Mon Sep 17 00:00:00 2001
From: Anusha GodavarthySurya <anusha.godavarthysurya@amd.com>
Date: Mon, 20 Feb 2023 13:15:07 +0000
Subject: [PATCH 6/9] SWDEV-366636 - Fix test timing issue

Change-Id: I7de327adc59516f5d32a8861c7810f28a5c3d22b
---
 catch/unit/event/hipEventSynchronize.cc | 2 --
 1 file changed, 2 deletions(-)

diff --git a/catch/unit/event/hipEventSynchronize.cc b/catch/unit/event/hipEventSynchronize.cc
index a347badb2..dc70323b1 100644
--- a/catch/unit/event/hipEventSynchronize.cc
+++ b/catch/unit/event/hipEventSynchronize.cc
@@ -110,8 +110,6 @@ TEST_CASE("Unit_hipEventSynchronize_NoEventRecord_Positive") {
 
   // Record the end_event
   HIP_CHECK(hipEventRecord(end_event, NULL));
-  // End event has not been completed
-  HIP_CHECK_ERROR(hipEventQuery(end_event), hipErrorNotReady);
 
   // When hipEventSynchronized is called on event that has not been recorded,
   // the function returns immediately

From 0e7e528cdc09a069346445a00562c10de413f25d Mon Sep 17 00:00:00 2001
From: Satyanvesh Dittakavi <Satyanvesh.Dittakavi@amd.com>
Date: Thu, 24 Nov 2022 12:00:53 +0000
Subject: [PATCH 7/9] SWDEV-380913, SWDEV-369555 - Update and enable
 hipMemAdvise test

Change-Id: If291514eeacef21dea99bc4b4a78bc98fb0bc1ca
---
 catch/unit/memory/hipMemAdvise.cc | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/catch/unit/memory/hipMemAdvise.cc b/catch/unit/memory/hipMemAdvise.cc
index e5412ddfc..5e3787148 100644
--- a/catch/unit/memory/hipMemAdvise.cc
+++ b/catch/unit/memory/hipMemAdvise.cc
@@ -224,8 +224,6 @@ TEST_CASE("Unit_hipMemAdvise_TstFlags") {
 }
 
 TEST_CASE("Unit_hipMemAdvise_NegtveTsts") {
-  HipTest::HIP_SKIP_TEST("Fixed few issues to match with Nvidia, Skip now to avoid CI failures");
-  return;
   int MangdMem = HmmAttrPrint();
   if (MangdMem == 1) {
     bool IfTestPassed = true;
@@ -234,15 +232,6 @@ TEST_CASE("Unit_hipMemAdvise_NegtveTsts") {
     std::string str;
     HIP_CHECK(hipGetDeviceCount(&NumDevs));
     HIP_CHECK(hipMallocManaged(&Hmm, MEM_SIZE * 2, hipMemAttachGlobal));
-#if HT_AMD
-    // Passing invalid value(99) device param
-    IfTestPassed &= CheckError(hipMemAdvise(Hmm, MEM_SIZE * 2,
-                               hipMemAdviseSetReadMostly, 99), __LINE__);
-
-    // Passing invalid value(-12) device param
-    IfTestPassed &= CheckError(hipMemAdvise(Hmm, MEM_SIZE * 2,
-                               hipMemAdviseSetReadMostly, -12), __LINE__);
-#endif
     // Passing NULL as first parameter instead of valid pointer to a memory
     IfTestPassed &= CheckError(hipMemAdvise(NULL, MEM_SIZE * 2,
                                hipMemAdviseSetReadMostly, 0), __LINE__);
@@ -380,6 +369,12 @@ TEST_CASE("Unit_hipMemAdvise_ReadMostly") {
       WARN("out value: " << out);
       IfTestPassed = false;
     }
+    // hipMemAdvise should succeed for SetReadMostly and UnsetReadMostly
+    // irrespective of the device
+    HIP_CHECK(hipMemAdvise(Hmm, MEM_SIZE, hipMemAdviseSetReadMostly, 99));
+
+    HIP_CHECK(hipMemAdvise(Hmm, MEM_SIZE, hipMemAdviseUnsetReadMostly, -12));
+
     HIP_CHECK(hipFree(Hmm));
     REQUIRE(IfTestPassed);
   } else {

From 15d0fb71737995f314a84ad65fc1bfd599347fc5 Mon Sep 17 00:00:00 2001
From: Maneesh Gupta <maneesh.gupta@amd.com>
Date: Mon, 24 Apr 2023 10:07:51 +0000
Subject: [PATCH 8/9] SWDEV-379572 - Fix hipDeviceSetCacheConfig tests

Change-Id: I109bcaa2b1dce15f3b6d3f1dda82c964c95ff9a6
---
 catch/unit/device/hipDeviceSetGetCacheConfig.cc | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/catch/unit/device/hipDeviceSetGetCacheConfig.cc b/catch/unit/device/hipDeviceSetGetCacheConfig.cc
index b093336d6..dc82a85fc 100644
--- a/catch/unit/device/hipDeviceSetGetCacheConfig.cc
+++ b/catch/unit/device/hipDeviceSetGetCacheConfig.cc
@@ -38,16 +38,12 @@ TEST_CASE("Unit_hipDeviceSetCacheConfig_Positive_Basic") {
 
   const auto cache_config =
       GENERATE(from_range(std::begin(kCacheConfigs), std::end(kCacheConfigs)));
-#if HT_AMD
-  HIP_CHECK_ERROR(hipDeviceSetCacheConfig(cache_config), hipErrorNotSupported);
-#elif HT_NVIDIA
   HIP_CHECK(hipDeviceSetCacheConfig(cache_config));
-#endif
 }
 
 TEST_CASE("Unit_hipDeviceSetCacheConfig_Negative_Parameters") {
 #if HT_AMD
-  HIP_CHECK_ERROR(hipDeviceSetCacheConfig(static_cast<hipFuncCache_t>(-1)), hipErrorNotSupported);
+  HIP_CHECK_ERROR(hipDeviceSetCacheConfig(static_cast<hipFuncCache_t>(-1)), hipSuccess);
 #elif HT_NVIDIA
   HIP_CHECK_ERROR(hipDeviceSetCacheConfig(static_cast<hipFuncCache_t>(-1)), hipErrorInvalidValue);
 #endif
@@ -105,4 +101,4 @@ TEST_CASE("Unit_hipDeviceGetCacheConfig_Positive_Threaded") {
 
 TEST_CASE("Unit_HipDeviceGetCacheConfig_Negative_Parameters") {
   HIP_CHECK_ERROR(hipDeviceGetCacheConfig(nullptr), hipErrorInvalidValue);
-}
\ No newline at end of file
+}

From c84c7dfb1387119cca0dfa631646509e1d65df60 Mon Sep 17 00:00:00 2001
From: Ajay <ajay.gunashekar@amd.com>
Date: Thu, 4 May 2023 23:23:46 +0000
Subject: [PATCH 9/9] SWDEV-398541/SWDEV-390170 - skip building failing texture
 tests for CUDA 12.0

Change-Id: I109b2c2b67d489d13a2117f53a8e77eca7315b46
---
 catch/unit/texture/hipBindTex2DPitch.cc          | 4 ++++
 catch/unit/texture/hipBindTexRef1DFetch.cc       | 4 ++++
 catch/unit/texture/hipNormalizedFloatValueTex.cc | 4 ++++
 catch/unit/texture/hipSimpleTexture2DLayered.cc  | 4 ++++
 catch/unit/texture/hipSimpleTexture3D.cc         | 4 ++++
 catch/unit/texture/hipTextureRef2D.cc            | 4 ++++
 6 files changed, 24 insertions(+)

diff --git a/catch/unit/texture/hipBindTex2DPitch.cc b/catch/unit/texture/hipBindTex2DPitch.cc
index 4bd671af0..4cf59d1ad 100644
--- a/catch/unit/texture/hipBindTex2DPitch.cc
+++ b/catch/unit/texture/hipBindTex2DPitch.cc
@@ -20,6 +20,8 @@ THE SOFTWARE.
 #include <hip_test_common.hh>
 #include <hip_test_checkers.hh>
 
+#if CUDA_VERSION < CUDA_12000
+
 #define SIZE_H 8
 #define SIZE_W 12
 #define TYPE_t float
@@ -77,3 +79,5 @@ TEST_CASE("Unit_hipBindTexture2D_Pitch") {
   HIP_CHECK(hipFree(devPtrA));
   HIP_CHECK(hipFree(devPtrB));
 }
+
+#endif // CUDA_VERSION < CUDA_12000
\ No newline at end of file
diff --git a/catch/unit/texture/hipBindTexRef1DFetch.cc b/catch/unit/texture/hipBindTexRef1DFetch.cc
index 7291b786b..58e3dae0a 100644
--- a/catch/unit/texture/hipBindTexRef1DFetch.cc
+++ b/catch/unit/texture/hipBindTexRef1DFetch.cc
@@ -19,6 +19,8 @@ THE SOFTWARE.
 
 #include <hip_test_common.hh>
 
+#if CUDA_VERSION < CUDA_12000
+
 #define N 512
 
 texture<float, 1, hipReadModeElementType> tex;
@@ -79,3 +81,5 @@ TEST_CASE("Unit_hipBindTexture_tex1DfetchVerification") {
   HIP_CHECK(hipFree(texBuf));
   HIP_CHECK(hipFree(devBuf));
 }
+
+#endif // CUDA_VERSION < CUDA_12000
\ No newline at end of file
diff --git a/catch/unit/texture/hipNormalizedFloatValueTex.cc b/catch/unit/texture/hipNormalizedFloatValueTex.cc
index 8e660c41c..13edd8ed8 100644
--- a/catch/unit/texture/hipNormalizedFloatValueTex.cc
+++ b/catch/unit/texture/hipNormalizedFloatValueTex.cc
@@ -19,6 +19,8 @@ THE SOFTWARE.
 
 #include <hip_test_common.hh>
 
+#if CUDA_VERSION < CUDA_12000
+
 #define SIZE          10
 #define EPSILON       0.00001
 #define THRESH_HOLD   0.01  // For filter mode
@@ -159,3 +161,5 @@ TEST_CASE("Unit_hipNormalizedFloatValueTex_CheckModes") {
     runTest_hipTextureFilterMode<hipFilterModeLinear>();
   }
 }
+
+#endif  // CUDA_VERSION < CUDA_12000
\ No newline at end of file
diff --git a/catch/unit/texture/hipSimpleTexture2DLayered.cc b/catch/unit/texture/hipSimpleTexture2DLayered.cc
index 1bf6ea3af..a51a683f2 100644
--- a/catch/unit/texture/hipSimpleTexture2DLayered.cc
+++ b/catch/unit/texture/hipSimpleTexture2DLayered.cc
@@ -20,6 +20,8 @@ THE SOFTWARE.
 #include <hip_test_common.hh>
 #include <hip_test_checkers.hh>
 
+#if CUDA_VERSION < CUDA_12000
+
 typedef float T;
 
 // Texture reference for 2D Layered texture
@@ -107,3 +109,5 @@ TEST_CASE("Unit_hipSimpleTexture2DLayered_Check") {
   free(hData);
   free(hOutputData);
 }
+
+#endif // CUDA_VERSION < CUDA_12000
\ No newline at end of file
diff --git a/catch/unit/texture/hipSimpleTexture3D.cc b/catch/unit/texture/hipSimpleTexture3D.cc
index 0de3aec09..44509f34c 100644
--- a/catch/unit/texture/hipSimpleTexture3D.cc
+++ b/catch/unit/texture/hipSimpleTexture3D.cc
@@ -20,6 +20,8 @@ THE SOFTWARE.
 #include <hip_test_common.hh>
 #include <hip_test_checkers.hh>
 
+#if CUDA_VERSION < CUDA_12000
+
 // Texture reference for 3D texture
 texture<float, hipTextureType3D, hipReadModeElementType> texf;
 texture<int, hipTextureType3D, hipReadModeElementType>   texi;
@@ -119,3 +121,5 @@ TEST_CASE("Unit_hipSimpleTexture3D_Check_DataTypes") {
     runSimpleTexture3D_Check<char>(i, i+1, i, &texc);
   }
 }
+
+#endif // CUDA_VERSION < CUDA_12000
\ No newline at end of file
diff --git a/catch/unit/texture/hipTextureRef2D.cc b/catch/unit/texture/hipTextureRef2D.cc
index d0a31644b..cbf14f0c2 100644
--- a/catch/unit/texture/hipTextureRef2D.cc
+++ b/catch/unit/texture/hipTextureRef2D.cc
@@ -19,6 +19,8 @@ THE SOFTWARE.
 
 #include <hip_test_common.hh>
 
+#if CUDA_VERSION < CUDA_12000
+
 texture<float, 2, hipReadModeElementType> tex;
 
 __global__ void tex2DKernel(float* outputData, int width) {
@@ -90,3 +92,5 @@ TEST_CASE("Unit_hipTextureRef2D_Check") {
   HIP_CHECK(hipFree(dData));
   HIP_CHECK(hipFreeArray(hipArray));
 }
+
+#endif // CUDA_VERSION < CUDA_12000
\ No newline at end of file