From 4f3203749123b33c8fe99f551961c728d1454c70 Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Tue, 30 Jul 2024 14:59:56 -0700 Subject: [PATCH 1/7] minor fixes --- .../fractal-ex2-RAJA-CUDA-solution.cpp | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/Intermediate_Tutorial/02-CUDA/solutions/fractal-ex2-RAJA-CUDA-solution.cpp b/Intermediate_Tutorial/02-CUDA/solutions/fractal-ex2-RAJA-CUDA-solution.cpp index dc4a057..1f6ef06 100644 --- a/Intermediate_Tutorial/02-CUDA/solutions/fractal-ex2-RAJA-CUDA-solution.cpp +++ b/Intermediate_Tutorial/02-CUDA/solutions/fractal-ex2-RAJA-CUDA-solution.cpp @@ -9,6 +9,8 @@ #include #include "RAJA/RAJA.hpp" +#include "umpire/Umpire.hpp" +#include "umpire/strategy/QuickPool.hpp" #include "../../tpl/writeBMP.hpp" #define xMin 0.74395 @@ -17,7 +19,7 @@ #define yMax 0.11899 /* TODO: create a variable called "THREADS" to be used when calling the kernel*/ -#define THREADS 512 +#define THREADS 512 int main(int argc, char *argv[]) { @@ -41,23 +43,23 @@ int main(int argc, char *argv[]) //pixels of the fractal image. auto& rm = umpire::ResourceManager::getInstance(); unsigned char *cnt{nullptr}; - auto allocator = rm.getAllocator("PINNED"); + auto allocator = rm.getAllocator("UVM"); auto pool = rm.makeAllocator("qpool", allocator); cnt = static_cast(pool.allocate(width * width * sizeof(unsigned char))); - /* TODO: Set up a RAJA::KernelPolicy. The Policy should describe a cuda kernel with one outer loop - * and one inner loop. Only the inner for loop will be calculating pixels. + /* TODO: Set up a RAJA::KernelPolicy. The Policy should describe a cuda kernel with one outer loop + * and one inner loop. Only the inner for loop will be calculating pixels. */ using KERNEL_POLICY = RAJA::KernelPolicy< RAJA::statement::CudaKernel< - RAJA::statement::For<1, RAJA::cuda_block_x_loop, - RAJA::statement::For<0, RAJA::cuda_thread_x_loop, + RAJA::statement::For<1, RAJA::cuda_global_thread_y, + RAJA::statement::For<0, RAJA::cuda_global_thread_x, RAJA::statement::Lambda<0> > - > + > > >; - + /* compute fractal */ gettimeofday(&start, NULL); /* TODO: Add a RAJA::Kernel which takes the KERNEL_POLICY you just created above. @@ -85,7 +87,7 @@ int main(int argc, char *argv[]) x = x2 - y2 - cx; depth--; } while ((depth > 0) && ((x2 + y2) <= 5.0)); - d_cnt[row * width + col] = depth & 255; //Remember to index the image like normal + cnt[row * width + col] = depth & 255; //Remember to index the image like normal }); gettimeofday(&end, NULL); //By the time we exit the RAJA::Kernel, host and device are synchronized for us. From fad972a88a1dbc3c5522dadd1cdbb55f819d0aaa Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Tue, 30 Jul 2024 15:48:27 -0700 Subject: [PATCH 2/7] minor bug fixes --- .../fractal-ex2-RAJA-CUDA-solution.cpp | 10 ++++----- .../03-HIP/fractal-ex3-RAJA-HIP.cpp | 2 ++ .../fractal-ex3-RAJA-HIP-solution.cpp | 21 +++++++++---------- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/Intermediate_Tutorial/02-CUDA/solutions/fractal-ex2-RAJA-CUDA-solution.cpp b/Intermediate_Tutorial/02-CUDA/solutions/fractal-ex2-RAJA-CUDA-solution.cpp index 1f6ef06..ea21672 100644 --- a/Intermediate_Tutorial/02-CUDA/solutions/fractal-ex2-RAJA-CUDA-solution.cpp +++ b/Intermediate_Tutorial/02-CUDA/solutions/fractal-ex2-RAJA-CUDA-solution.cpp @@ -19,7 +19,7 @@ #define yMax 0.11899 /* TODO: create a variable called "THREADS" to be used when calling the kernel*/ -#define THREADS 512 +#define THREADS 256 int main(int argc, char *argv[]) { @@ -43,7 +43,7 @@ int main(int argc, char *argv[]) //pixels of the fractal image. auto& rm = umpire::ResourceManager::getInstance(); unsigned char *cnt{nullptr}; - auto allocator = rm.getAllocator("UVM"); + auto allocator = rm.getAllocator("UM"); auto pool = rm.makeAllocator("qpool", allocator); cnt = static_cast(pool.allocate(width * width * sizeof(unsigned char))); @@ -51,9 +51,9 @@ int main(int argc, char *argv[]) * and one inner loop. Only the inner for loop will be calculating pixels. */ using KERNEL_POLICY = RAJA::KernelPolicy< - RAJA::statement::CudaKernel< - RAJA::statement::For<1, RAJA::cuda_global_thread_y, - RAJA::statement::For<0, RAJA::cuda_global_thread_x, + RAJA::statement::CudaKernelFixed, + RAJA::statement::For<0, RAJA::cuda_global_size_x_direct<16>, RAJA::statement::Lambda<0> > > diff --git a/Intermediate_Tutorial/03-HIP/fractal-ex3-RAJA-HIP.cpp b/Intermediate_Tutorial/03-HIP/fractal-ex3-RAJA-HIP.cpp index 478b643..a780ab5 100644 --- a/Intermediate_Tutorial/03-HIP/fractal-ex3-RAJA-HIP.cpp +++ b/Intermediate_Tutorial/03-HIP/fractal-ex3-RAJA-HIP.cpp @@ -9,6 +9,8 @@ #include #include "RAJA/RAJA.hpp" +#include "umpire/Umpire.hpp" +#include "umpire/strategy/QuickPool.hpp" #include "../../tpl/writeBMP.hpp" #define xMin 0.74395 diff --git a/Intermediate_Tutorial/03-HIP/solutions/fractal-ex3-RAJA-HIP-solution.cpp b/Intermediate_Tutorial/03-HIP/solutions/fractal-ex3-RAJA-HIP-solution.cpp index 17fd01f..983705a 100644 --- a/Intermediate_Tutorial/03-HIP/solutions/fractal-ex3-RAJA-HIP-solution.cpp +++ b/Intermediate_Tutorial/03-HIP/solutions/fractal-ex3-RAJA-HIP-solution.cpp @@ -9,6 +9,8 @@ #include #include "RAJA/RAJA.hpp" +#include "umpire/Umpire.hpp" +#include "umpire/strategy/QuickPool.hpp" #include "../../tpl/writeBMP.hpp" #define xMin 0.74395 @@ -17,7 +19,7 @@ #define yMax 0.11899 /* TODO: create a variable called "THREADS" to be used when calling the kernel*/ -#define THREADS 512 +#define THREADS 256 int main(int argc, char *argv[]) { @@ -37,21 +39,21 @@ int main(int argc, char *argv[]) printf("computing %d by %d fractal with a maximum depth of %d\n", width, width, maxdepth); - //TODO: Create an Umpire QuickPool allocator with pinned memory that will hold the + //TODO: Create an Umpire QuickPool allocator with unified memory that will hold the //pixels of the fractal image. auto& rm = umpire::ResourceManager::getInstance(); unsigned char *cnt{nullptr}; - auto allocator = rm.getAllocator("PINNED"); + auto allocator = rm.getAllocator("UM"); auto pool = rm.makeAllocator("qpool", allocator); cnt = static_cast(pool.allocate(width * width * sizeof(unsigned char))); /* TODO: Set up a RAJA::KernelPolicy. The Policy should describe a hip kernel with one outer loop * and one inner loop. Only the inner for loop will be calculating pixels. */ - using KERNEL_POLICY = RAJA::KernelPolicy< - RAJA::statement::HipKernel< - RAJA::statement::For<1, RAJA::hip_block_x_loop, - RAJA::statement::For<0, RAJA::hip_thread_x_loop, + using KERNEL_POLICY = RAJA::KernelPolicyFixed< + RAJA::statement::HipKernel, + RAJA::statement::For<0, RAJA::hip_global_size_x_direct<16>, RAJA::statement::Lambda<0> > > @@ -84,15 +86,12 @@ int main(int argc, char *argv[]) x = x2 - y2 - cx; depth--; } while ((depth > 0) && ((x2 + y2) <= 5.0)); - d_cnt[row * width + col] = depth & 255; //Remember to index the image like normal + cnt[row * width + col] = depth & 255; //Remember to index the image like normal }); gettimeofday(&end, NULL); //By the time we exit the RAJA::Kernel, host and device are synchronized for us. printf("compute time: %.8f s\n", end.tv_sec + end.tv_usec / 1000000.0 - start.tv_sec - start.tv_usec / 1000000.0); - /* TODO: In order to create a bmp image, we need to copy the completed fractal to the Host memory space */ - hipMemcpyAsync(cnt, d_cnt, width * width * sizeof(unsigned char), hipMemcpyDeviceToHost); - /* verify result by writing it to a file */ if (width <= 2048) { wbmp.WriteBMP(width, width, cnt, "fractal.bmp"); From 5738783ebb9e90e3f92e7792b540972abcef25a6 Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Tue, 30 Jul 2024 15:51:03 -0700 Subject: [PATCH 3/7] clean up hip code --- Intermediate_Tutorial/03-HIP/fractal-ex3-RAJA-HIP.cpp | 8 ++++---- .../03-HIP/solutions/fractal-ex3-RAJA-HIP-solution.cpp | 1 - 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/Intermediate_Tutorial/03-HIP/fractal-ex3-RAJA-HIP.cpp b/Intermediate_Tutorial/03-HIP/fractal-ex3-RAJA-HIP.cpp index a780ab5..ba5b9ed 100644 --- a/Intermediate_Tutorial/03-HIP/fractal-ex3-RAJA-HIP.cpp +++ b/Intermediate_Tutorial/03-HIP/fractal-ex3-RAJA-HIP.cpp @@ -18,7 +18,7 @@ #define yMin 0.11321 #define yMax 0.11899 -#define THREADS 512 +#define THREADS 256 //TODO: uncomment this out in order to build! // #define COMPILE @@ -43,11 +43,11 @@ int main(int argc, char *argv[]) printf("computing %d by %d fractal with a maximum depth of %d\n", width, width, maxdepth); - //TODO: Create an Umpire QuickPool allocator with pinned memory that will hold the + //TODO: Create an Umpire QuickPool allocator with unified memory that will hold the //pixels of the fractal image. auto& rm = umpire::ResourceManager::getInstance(); unsigned char *cnt{nullptr}; - auto allocator = rm.getAllocator("PINNED"); + auto allocator = rm.getAllocator("UM"); auto pool = rm.makeAllocator("qpool", allocator); cnt = static_cast(pool.allocate(width * width * sizeof(unsigned char))); @@ -87,7 +87,7 @@ int main(int argc, char *argv[]) x = x2 - y2 - cx; depth--; } while ((depth > 0) && ((x2 + y2) <= 5.0)); - d_cnt[row * width + col] = depth & 255; //Remember to index the image like normal + cnt[row * width + col] = depth & 255; //Remember to index the image like normal }); gettimeofday(&end, NULL); //By the time we exit the RAJA::Kernel, host and device are synchronized for us. diff --git a/Intermediate_Tutorial/03-HIP/solutions/fractal-ex3-RAJA-HIP-solution.cpp b/Intermediate_Tutorial/03-HIP/solutions/fractal-ex3-RAJA-HIP-solution.cpp index 983705a..de0408c 100644 --- a/Intermediate_Tutorial/03-HIP/solutions/fractal-ex3-RAJA-HIP-solution.cpp +++ b/Intermediate_Tutorial/03-HIP/solutions/fractal-ex3-RAJA-HIP-solution.cpp @@ -18,7 +18,6 @@ #define yMin 0.11321 #define yMax 0.11899 -/* TODO: create a variable called "THREADS" to be used when calling the kernel*/ #define THREADS 256 int main(int argc, char *argv[]) From 2814e06da3c453d91fddfd547b6bd09695fa454d Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Tue, 30 Jul 2024 15:53:23 -0700 Subject: [PATCH 4/7] clean up cuda version --- Intermediate_Tutorial/02-CUDA/fractal-ex2-RAJA-CUDA.cpp | 8 +++++--- .../02-CUDA/solutions/fractal-ex2-RAJA-CUDA-solution.cpp | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/Intermediate_Tutorial/02-CUDA/fractal-ex2-RAJA-CUDA.cpp b/Intermediate_Tutorial/02-CUDA/fractal-ex2-RAJA-CUDA.cpp index 8e35f91..ef38846 100644 --- a/Intermediate_Tutorial/02-CUDA/fractal-ex2-RAJA-CUDA.cpp +++ b/Intermediate_Tutorial/02-CUDA/fractal-ex2-RAJA-CUDA.cpp @@ -9,6 +9,8 @@ #include #include "RAJA/RAJA.hpp" +#include "umpire/Umpire.hpp" +#include "umpire/strategy/QuickPool.hpp" #include "../../tpl/writeBMP.hpp" #define xMin 0.74395 @@ -16,7 +18,7 @@ #define yMin 0.11321 #define yMax 0.11899 -#define THREADS 512 +#define THREADS 256 //TODO: uncomment this out in order to build! // #define COMPILE @@ -41,7 +43,7 @@ int main(int argc, char *argv[]) printf("computing %d by %d fractal with a maximum depth of %d\n", width, width, maxdepth); - //TODO: Create an Umpire QuickPool allocator with pinned memory that will hold the + //TODO: Create an Umpire QuickPool allocator with unified memory that will hold the //pixels of the fractal image. auto& rm = umpire::ResourceManager::getInstance(); unsigned char *cnt{nullptr}; @@ -85,7 +87,7 @@ int main(int argc, char *argv[]) x = x2 - y2 - cx; depth--; } while ((depth > 0) && ((x2 + y2) <= 5.0)); - d_cnt[row * width + col] = depth & 255; //Remember to index the image like normal + cnt[row * width + col] = depth & 255; //Remember to index the image like normal }); gettimeofday(&end, NULL); //By the time we exit the RAJA::Kernel, host and device are synchronized for us. diff --git a/Intermediate_Tutorial/02-CUDA/solutions/fractal-ex2-RAJA-CUDA-solution.cpp b/Intermediate_Tutorial/02-CUDA/solutions/fractal-ex2-RAJA-CUDA-solution.cpp index ea21672..a844c23 100644 --- a/Intermediate_Tutorial/02-CUDA/solutions/fractal-ex2-RAJA-CUDA-solution.cpp +++ b/Intermediate_Tutorial/02-CUDA/solutions/fractal-ex2-RAJA-CUDA-solution.cpp @@ -39,7 +39,7 @@ int main(int argc, char *argv[]) printf("computing %d by %d fractal with a maximum depth of %d\n", width, width, maxdepth); - //TODO: Create an Umpire QuickPool allocator with pinned memory that will hold the + //TODO: Create an Umpire QuickPool allocator with unified memory that will hold the //pixels of the fractal image. auto& rm = umpire::ResourceManager::getInstance(); unsigned char *cnt{nullptr}; From e09269912efbe9bcbf45328a7086543f31283b74 Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Tue, 30 Jul 2024 16:34:41 -0700 Subject: [PATCH 5/7] clean up pass --- .../fractal-ex4-RAJA-launch-solution.cpp | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/Intermediate_Tutorial/04-LAUNCH/solution/fractal-ex4-RAJA-launch-solution.cpp b/Intermediate_Tutorial/04-LAUNCH/solution/fractal-ex4-RAJA-launch-solution.cpp index 96aba17..d4f693d 100644 --- a/Intermediate_Tutorial/04-LAUNCH/solution/fractal-ex4-RAJA-launch-solution.cpp +++ b/Intermediate_Tutorial/04-LAUNCH/solution/fractal-ex4-RAJA-launch-solution.cpp @@ -42,45 +42,51 @@ int main(int argc, char *argv[]) printf("computing %d by %d fractal with a maximum depth of %d\n", width, width, maxdepth); - //TODO: Create an Umpire QuickPool allocator with Unified Memory that will hold the + //TODO: Create an Umpire QuickPool allocator with unified memory that will hold the // pixels of the fractal image. auto& rm = umpire::ResourceManager::getInstance(); unsigned char *cnt{nullptr}; - auto allocator = rm.getAllocator("PINNED"); + auto allocator = rm.getAllocator("UM"); auto pool = rm.makeAllocator("qpool", allocator); cnt = static_cast(pool.allocate(width * width * sizeof(unsigned char))); //TODO: Create a RAJA Kernel Policy which uses the loop_exec policy. We want to start //with a normal serial nested loop first before continuing onward. + constexpr int block_dim = 16; using host_launch = RAJA::seq_launch_t; #if defined(RAJA_ENABLE_CUDA) using device_launch = RAJA::cuda_launch_t; -#elif defined(RAJA_ENABLE_HIP) - using device_launch = RAJA::hip_launch_t; #endif using launch_policy = RAJA::LaunchPolicy< host_launch -#if defined(RAJA_GPU_ACTIVE) +#if defined(RAJA_ENABLE_CUDA) ,device_launch #endif >; - using col_loop = RAJA::LoopPolicy; + using col_loop = RAJA::LoopPolicy +#endif + >; - using row_loop = RAJA::LoopPolicy; + using row_loop = RAJA::LoopPolicy +#endif + >; /* start time */ gettimeofday(&start, NULL); - constexpr int block_sz = 16; - int n_blocks = (width + block_sz-1) / block_sz + 1; + int n_blocks = (width + block_dim-1) / block_dim + 1; RAJA::launch (select_cpu_or_gpu, RAJA::LaunchParams(RAJA::Teams(n_blocks, n_blocks), - RAJA::Threads(block_sz, block_sz)), + RAJA::Threads(block_dim, block_dim)), [=] RAJA_HOST_DEVICE (RAJA::LaunchContext ctx) { RAJA::loop(ctx, RAJA::RangeSegment(0, width), [&] (int col) { From 31f6ae6c135b096d68bbaa00afdb047739521d8b Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Tue, 30 Jul 2024 16:40:53 -0700 Subject: [PATCH 6/7] clean up pass --- .../04-LAUNCH/fractal-ex4-RAJA-launch.cpp | 12 +++++++----- .../fractal-ex4-RAJA-launch-solution.cpp | 18 +++++++----------- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/Intermediate_Tutorial/04-LAUNCH/fractal-ex4-RAJA-launch.cpp b/Intermediate_Tutorial/04-LAUNCH/fractal-ex4-RAJA-launch.cpp index 7cd1c18..37db388 100644 --- a/Intermediate_Tutorial/04-LAUNCH/fractal-ex4-RAJA-launch.cpp +++ b/Intermediate_Tutorial/04-LAUNCH/fractal-ex4-RAJA-launch.cpp @@ -49,15 +49,18 @@ int main(int argc, char *argv[]) auto& rm = umpire::ResourceManager::getInstance(); unsigned char *cnt{nullptr}; - auto allocator = rm.getAllocator("PINNED"); + auto allocator = rm.getAllocator("UM"); auto pool = rm.makeAllocator("qpool", allocator); cnt = static_cast(pool.allocate(width * width * sizeof(unsigned char))); + constexpr int team_dim = 16; + using host_launch = RAJA::seq_launch_t; + //TODO: Create a RAJA launch policy for the host and device using launch_policy = RAJA::LaunchPolicy; - //TODO: create RAJA loop policies for the host and device + //TODO: create RAJA global thread loop policies for the host and device using col_loop = RAJA::LoopPolicy; using row_loop = RAJA::LoopPolicy; @@ -66,13 +69,12 @@ int main(int argc, char *argv[]) gettimeofday(&start, NULL); //Calculate number of blocks - constexpr int team_sz = 16; - int n_teams = (width + team_sz - 1) / team_sz + 1; + int n_teams = (width + team_dim - 1) / team_dim + 1; //Teams are akin to to CUDA/HIP blocks RAJA::launch (select_cpu_or_gpu, RAJA::LaunchParams(RAJA::Teams(n_teams, n_teams), - RAJA::Threads(team_sz, team_sz)), + RAJA::Threads(team_dim, team_dim)), [=] RAJA_HOST_DEVICE (RAJA::LaunchContext ctx) { RAJA::loop(ctx, RAJA::RangeSegment(0, width), [&] (int col) { diff --git a/Intermediate_Tutorial/04-LAUNCH/solution/fractal-ex4-RAJA-launch-solution.cpp b/Intermediate_Tutorial/04-LAUNCH/solution/fractal-ex4-RAJA-launch-solution.cpp index d4f693d..d315d5e 100644 --- a/Intermediate_Tutorial/04-LAUNCH/solution/fractal-ex4-RAJA-launch-solution.cpp +++ b/Intermediate_Tutorial/04-LAUNCH/solution/fractal-ex4-RAJA-launch-solution.cpp @@ -42,20 +42,16 @@ int main(int argc, char *argv[]) printf("computing %d by %d fractal with a maximum depth of %d\n", width, width, maxdepth); - //TODO: Create an Umpire QuickPool allocator with unified memory that will hold the - // pixels of the fractal image. auto& rm = umpire::ResourceManager::getInstance(); unsigned char *cnt{nullptr}; auto allocator = rm.getAllocator("UM"); auto pool = rm.makeAllocator("qpool", allocator); cnt = static_cast(pool.allocate(width * width * sizeof(unsigned char))); - //TODO: Create a RAJA Kernel Policy which uses the loop_exec policy. We want to start - //with a normal serial nested loop first before continuing onward. - - constexpr int block_dim = 16; + constexpr int team_dim = 16; using host_launch = RAJA::seq_launch_t; + //TODO: create RAJA global thread loop policies for the host and device #if defined(RAJA_ENABLE_CUDA) using device_launch = RAJA::cuda_launch_t; #endif @@ -69,24 +65,24 @@ int main(int argc, char *argv[]) using col_loop = RAJA::LoopPolicy + ,RAJA::cuda_global_size_y_direct #endif >; using row_loop = RAJA::LoopPolicy + ,RAJA::cuda_global_size_x_direct #endif >; /* start time */ gettimeofday(&start, NULL); - int n_blocks = (width + block_dim-1) / block_dim + 1; + int n_teams = (width + team_dim-1) / team_dim + 1; RAJA::launch - (select_cpu_or_gpu, RAJA::LaunchParams(RAJA::Teams(n_blocks, n_blocks), - RAJA::Threads(block_dim, block_dim)), + (select_cpu_or_gpu, RAJA::LaunchParams(RAJA::Teams(n_teams, n_teams), + RAJA::Threads(team_dim, team_dim)), [=] RAJA_HOST_DEVICE (RAJA::LaunchContext ctx) { RAJA::loop(ctx, RAJA::RangeSegment(0, width), [&] (int col) { From da374e16b68a96abd94d0cde52ed979e94059db5 Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Wed, 31 Jul 2024 10:29:48 -0700 Subject: [PATCH 7/7] loop -> seq --- .../04-LAUNCH/solution/fractal-ex4-RAJA-launch-solution.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Intermediate_Tutorial/04-LAUNCH/solution/fractal-ex4-RAJA-launch-solution.cpp b/Intermediate_Tutorial/04-LAUNCH/solution/fractal-ex4-RAJA-launch-solution.cpp index d315d5e..c991070 100644 --- a/Intermediate_Tutorial/04-LAUNCH/solution/fractal-ex4-RAJA-launch-solution.cpp +++ b/Intermediate_Tutorial/04-LAUNCH/solution/fractal-ex4-RAJA-launch-solution.cpp @@ -51,7 +51,7 @@ int main(int argc, char *argv[]) constexpr int team_dim = 16; using host_launch = RAJA::seq_launch_t; - //TODO: create RAJA global thread loop policies for the host and device + //TODO: create RAJA global thread loop policies for the host and device #if defined(RAJA_ENABLE_CUDA) using device_launch = RAJA::cuda_launch_t; #endif @@ -63,13 +63,13 @@ int main(int argc, char *argv[]) #endif >; - using col_loop = RAJA::LoopPolicy #endif >; - using row_loop = RAJA::LoopPolicy #endif