Skip to content

Commit

Permalink
Add basic use of cufile.
Browse files Browse the repository at this point in the history
  • Loading branch information
liuliu committed Nov 20, 2024
1 parent 9a56808 commit f376a50
Show file tree
Hide file tree
Showing 7 changed files with 39 additions and 11 deletions.
4 changes: 2 additions & 2 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ git_repository(
git_repository(
name = "build_bazel_rules_cuda",
remote = "https://github.com/liuliu/rules_cuda.git",
commit = "be346d4d12883469878edd693097f87723400c5b",
shallow_since = "1681409802 -0400"
commit = "7dcc4673fa487ad12fe3abe84d01edd9fe588e85",
shallow_since = "1732063237 -0500"
)

http_archive(
Expand Down
4 changes: 2 additions & 2 deletions config/ccv.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,8 @@ def ccv_deps():
git_repository,
name = "build_bazel_rules_cuda",
remote = "https://github.com/liuliu/rules_cuda.git",
commit = "be346d4d12883469878edd693097f87723400c5b",
shallow_since = "1681409802 -0400"
commit = "7dcc4673fa487ad12fe3abe84d01edd9fe588e85",
shallow_since = "1732063237 -0500"
)
_maybe(
http_archive,
Expand Down
4 changes: 2 additions & 2 deletions lib/configure
Original file line number Diff line number Diff line change
Expand Up @@ -4506,7 +4506,7 @@ if [ -d "$cuda_prefix" ]; then
DEFINE_MACROS="$DEFINE_MACROS-D HAVE_CUDA "
# CUB requires stdc++, unfortunately.
MKLDFLAGS="$MKLDFLAGS-lcudart -lcublas -lcurand -lstdc++ "
MKLDFLAGS="$MKLDFLAGS-lcudart -lcublas -lcurand -lcufile -lstdc++ "
CPPFLAGS="$CPPFLAGS-I$cuda_prefix/include "
Expand Down Expand Up @@ -4760,7 +4760,7 @@ if test "$mps_support" = yes; then
printf "%s\n" "yes" >&6; }
DEFINE_MACROS="$DEFINE_MACROS-D HAVE_MPS "
MKLDFLAGS="$MKLDFLAGS-framework MetalPerformanceShaders -framework MetalPerformanceShadersGraph -framework Foundation -framework Metal -framework OpenCL -lc++ "
MKLDFLAGS="$MKLDFLAGS-framework MetalPerformanceShaders -framework MetalPerformanceShadersGraph -framework Foundation -framework Metal -lc++ "
CUDA_SRCS=""
Expand Down
2 changes: 1 addition & 1 deletion lib/configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ AC_ARG_WITH(cuda, [AS_HELP_STRING([--with-cuda], [CUDA installation [ARG=/usr/lo
if [[ -d "$cuda_prefix" ]]; then
AC_SUBST(DEFINE_MACROS, ["$DEFINE_MACROS-D HAVE_CUDA "])
# CUB requires stdc++, unfortunately.
AC_SUBST(MKLDFLAGS, ["$MKLDFLAGS-lcudart -lcublas -lcurand -lstdc++ "])
AC_SUBST(MKLDFLAGS, ["$MKLDFLAGS-lcudart -lcublas -lcurand -lcufile -lstdc++ "])
AC_SUBST(CPPFLAGS, ["$CPPFLAGS-I$cuda_prefix/include "])
if [[ -d "$cuda_prefix/lib64" ]]; then
AC_SUBST(MKLDFLAGS, ["$MKLDFLAGS-L$cuda_prefix/lib64 "])
Expand Down
5 changes: 1 addition & 4 deletions lib/nnc/ccv_nnc_tensor.c
Original file line number Diff line number Diff line change
Expand Up @@ -132,11 +132,8 @@ ccv_nnc_tensor_t* ccv_nnc_tensor_new_from_file(const ccv_nnc_tensor_param_t para
// This is not supported yet on CUDA.
tensor->data.u8 = (uint8_t*)cumalloc(CCV_TENSOR_GET_DEVICE_ID(params.type), size);
int fd = open(filename, O_RDONLY, 0);
void* bufptr = mmap(0, size, PROT_READ, MAP_PRIVATE, fd, offset);
cufileread(fd, offset, tensor->data.u8, size);
close(fd);
madvise(bufptr, size, MADV_SEQUENTIAL | MADV_WILLNEED);
cumemcpy(tensor->data.u8, CCV_TENSOR_GPU_MEMORY, bufptr, CCV_TENSOR_CPU_MEMORY, size);
munmap(bufptr, size);
} else
tensor->data.u8 = 0;
} else {
Expand Down
16 changes: 16 additions & 0 deletions lib/nnc/gpu/ccv_nnc_compat.cu
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "ccv_nnc_compat.h"
#include <cufile.h> // For GPUDirect Storage
extern "C" {
#include <nnc/ccv_nnc_easy.h>
#include <nnc/_ccv_nnc_stream.h>
Expand Down Expand Up @@ -305,6 +306,21 @@ void cuunregister(void* ptr)
CUDA_ENFORCE(cudaHostUnregister(ptr));
}

void cufileread(const int fd, const off_t file_offset, void* const buf, const size_t size)
{
CUfileDescr_t file_descr = {
.type = CU_FILE_HANDLE_TYPE_OPAQUE_FD,
.handle = {
.fd = fd,
},
.fs_ops = 0,
};
CUfileHandle_t file_handle;
CUFILE_ENFORCE(cuFileHandleRegister(&file_handle, &file_descr));
cuFileRead(file_handle, buf, size, file_offset, 0);
cuFileHandleDeregister(file_handle);
}

typedef struct {
cudaStream_t stream;
cublasHandle_t cublas;
Expand Down
15 changes: 15 additions & 0 deletions lib/nnc/gpu/ccv_nnc_compat.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ int curegmp(int device_id, cump_f func, void* const context); // register memory
void cuunregmp(const int id); // un-register memory pressure handler.
void cusetprofiler(int state);
void cusetdevicemap(const int* const device_map, const int size);
void cufileread(const int fd, const off_t file_offset, void* const buf, const size_t size);

// Stream context
CCV_WARN_UNUSED(ccv_nnc_stream_context_t*) ccv_nnc_init_stream_context(ccv_nnc_stream_context_t* const stream_context);
Expand Down Expand Up @@ -120,6 +121,20 @@ CCV_WARN_UNUSED(size_t) ccv_nnc_cublas_workspace_size_in_bytes(const ccv_nnc_ten
} while (0)
#endif

#ifdef NDEBUG
#define CUFILE_ENFORCE(status) status
#else
#define CUFILE_ENFORCE(status) do { \
const CUfileError_t __status = status; \
if (__status.err != CU_FILE_SUCCESS) { \
printf("[%s:%d]:CUDA - Error: %s\n", \
__FILE__, __LINE__, CUFILE_ERRSTR(__status.err)); \
assert(0); \
exit(EXIT_FAILURE); \
} \
} while (0)
#endif

#ifdef NDEBUG
#define CUBLAS_ENFORCE(status) status
#else
Expand Down

0 comments on commit f376a50

Please sign in to comment.