From f376a507edab38c58dd00ffe7b773a69827eecfe Mon Sep 17 00:00:00 2001 From: Liu Liu Date: Tue, 19 Nov 2024 19:42:08 -0500 Subject: [PATCH] Add basic use of cufile. --- WORKSPACE | 4 ++-- config/ccv.bzl | 4 ++-- lib/configure | 4 ++-- lib/configure.ac | 2 +- lib/nnc/ccv_nnc_tensor.c | 5 +---- lib/nnc/gpu/ccv_nnc_compat.cu | 16 ++++++++++++++++ lib/nnc/gpu/ccv_nnc_compat.h | 15 +++++++++++++++ 7 files changed, 39 insertions(+), 11 deletions(-) diff --git a/WORKSPACE b/WORKSPACE index 65ad3b377..45f8667db 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -13,8 +13,8 @@ git_repository( git_repository( name = "build_bazel_rules_cuda", remote = "https://github.com/liuliu/rules_cuda.git", - commit = "be346d4d12883469878edd693097f87723400c5b", - shallow_since = "1681409802 -0400" + commit = "7dcc4673fa487ad12fe3abe84d01edd9fe588e85", + shallow_since = "1732063237 -0500" ) http_archive( diff --git a/config/ccv.bzl b/config/ccv.bzl index fbc63a203..4ef7f703d 100644 --- a/config/ccv.bzl +++ b/config/ccv.bzl @@ -98,8 +98,8 @@ def ccv_deps(): git_repository, name = "build_bazel_rules_cuda", remote = "https://github.com/liuliu/rules_cuda.git", - commit = "be346d4d12883469878edd693097f87723400c5b", - shallow_since = "1681409802 -0400" + commit = "7dcc4673fa487ad12fe3abe84d01edd9fe588e85", + shallow_since = "1732063237 -0500" ) _maybe( http_archive, diff --git a/lib/configure b/lib/configure index 8329af1b5..81ae2a304 100755 --- a/lib/configure +++ b/lib/configure @@ -4506,7 +4506,7 @@ if [ -d "$cuda_prefix" ]; then DEFINE_MACROS="$DEFINE_MACROS-D HAVE_CUDA " # CUB requires stdc++, unfortunately. - MKLDFLAGS="$MKLDFLAGS-lcudart -lcublas -lcurand -lstdc++ " + MKLDFLAGS="$MKLDFLAGS-lcudart -lcublas -lcurand -lcufile -lstdc++ " CPPFLAGS="$CPPFLAGS-I$cuda_prefix/include " @@ -4760,7 +4760,7 @@ if test "$mps_support" = yes; then printf "%s\n" "yes" >&6; } DEFINE_MACROS="$DEFINE_MACROS-D HAVE_MPS " - MKLDFLAGS="$MKLDFLAGS-framework MetalPerformanceShaders -framework MetalPerformanceShadersGraph -framework Foundation -framework Metal -framework OpenCL -lc++ " + MKLDFLAGS="$MKLDFLAGS-framework MetalPerformanceShaders -framework MetalPerformanceShadersGraph -framework Foundation -framework Metal -lc++ " CUDA_SRCS="" diff --git a/lib/configure.ac b/lib/configure.ac index ba49af529..544e059e3 100644 --- a/lib/configure.ac +++ b/lib/configure.ac @@ -159,7 +159,7 @@ AC_ARG_WITH(cuda, [AS_HELP_STRING([--with-cuda], [CUDA installation [ARG=/usr/lo if [[ -d "$cuda_prefix" ]]; then AC_SUBST(DEFINE_MACROS, ["$DEFINE_MACROS-D HAVE_CUDA "]) # CUB requires stdc++, unfortunately. - AC_SUBST(MKLDFLAGS, ["$MKLDFLAGS-lcudart -lcublas -lcurand -lstdc++ "]) + AC_SUBST(MKLDFLAGS, ["$MKLDFLAGS-lcudart -lcublas -lcurand -lcufile -lstdc++ "]) AC_SUBST(CPPFLAGS, ["$CPPFLAGS-I$cuda_prefix/include "]) if [[ -d "$cuda_prefix/lib64" ]]; then AC_SUBST(MKLDFLAGS, ["$MKLDFLAGS-L$cuda_prefix/lib64 "]) diff --git a/lib/nnc/ccv_nnc_tensor.c b/lib/nnc/ccv_nnc_tensor.c index 497d55036..5869644b0 100644 --- a/lib/nnc/ccv_nnc_tensor.c +++ b/lib/nnc/ccv_nnc_tensor.c @@ -132,11 +132,8 @@ ccv_nnc_tensor_t* ccv_nnc_tensor_new_from_file(const ccv_nnc_tensor_param_t para // This is not supported yet on CUDA. tensor->data.u8 = (uint8_t*)cumalloc(CCV_TENSOR_GET_DEVICE_ID(params.type), size); int fd = open(filename, O_RDONLY, 0); - void* bufptr = mmap(0, size, PROT_READ, MAP_PRIVATE, fd, offset); + cufileread(fd, offset, tensor->data.u8, size); close(fd); - madvise(bufptr, size, MADV_SEQUENTIAL | MADV_WILLNEED); - cumemcpy(tensor->data.u8, CCV_TENSOR_GPU_MEMORY, bufptr, CCV_TENSOR_CPU_MEMORY, size); - munmap(bufptr, size); } else tensor->data.u8 = 0; } else { diff --git a/lib/nnc/gpu/ccv_nnc_compat.cu b/lib/nnc/gpu/ccv_nnc_compat.cu index e6d8c7a29..620eba428 100644 --- a/lib/nnc/gpu/ccv_nnc_compat.cu +++ b/lib/nnc/gpu/ccv_nnc_compat.cu @@ -1,4 +1,5 @@ #include "ccv_nnc_compat.h" +#include // For GPUDirect Storage extern "C" { #include #include @@ -305,6 +306,21 @@ void cuunregister(void* ptr) CUDA_ENFORCE(cudaHostUnregister(ptr)); } +void cufileread(const int fd, const off_t file_offset, void* const buf, const size_t size) +{ + CUfileDescr_t file_descr = { + .type = CU_FILE_HANDLE_TYPE_OPAQUE_FD, + .handle = { + .fd = fd, + }, + .fs_ops = 0, + }; + CUfileHandle_t file_handle; + CUFILE_ENFORCE(cuFileHandleRegister(&file_handle, &file_descr)); + cuFileRead(file_handle, buf, size, file_offset, 0); + cuFileHandleDeregister(file_handle); +} + typedef struct { cudaStream_t stream; cublasHandle_t cublas; diff --git a/lib/nnc/gpu/ccv_nnc_compat.h b/lib/nnc/gpu/ccv_nnc_compat.h index ecf6b94a3..e88f81eb1 100644 --- a/lib/nnc/gpu/ccv_nnc_compat.h +++ b/lib/nnc/gpu/ccv_nnc_compat.h @@ -34,6 +34,7 @@ int curegmp(int device_id, cump_f func, void* const context); // register memory void cuunregmp(const int id); // un-register memory pressure handler. void cusetprofiler(int state); void cusetdevicemap(const int* const device_map, const int size); +void cufileread(const int fd, const off_t file_offset, void* const buf, const size_t size); // Stream context CCV_WARN_UNUSED(ccv_nnc_stream_context_t*) ccv_nnc_init_stream_context(ccv_nnc_stream_context_t* const stream_context); @@ -120,6 +121,20 @@ CCV_WARN_UNUSED(size_t) ccv_nnc_cublas_workspace_size_in_bytes(const ccv_nnc_ten } while (0) #endif +#ifdef NDEBUG +#define CUFILE_ENFORCE(status) status +#else +#define CUFILE_ENFORCE(status) do { \ + const CUfileError_t __status = status; \ + if (__status.err != CU_FILE_SUCCESS) { \ + printf("[%s:%d]:CUDA - Error: %s\n", \ + __FILE__, __LINE__, CUFILE_ERRSTR(__status.err)); \ + assert(0); \ + exit(EXIT_FAILURE); \ + } \ +} while (0) +#endif + #ifdef NDEBUG #define CUBLAS_ENFORCE(status) status #else