Skip to content

Commit

Permalink
2.6.0
Browse files Browse the repository at this point in the history
  • Loading branch information
IndeedMiners committed Nov 18, 2018
1 parent 87842ec commit 85c5bc3
Show file tree
Hide file tree
Showing 8 changed files with 705 additions and 657 deletions.
41 changes: 41 additions & 0 deletions doc/compile.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,44 @@ After the configuration you need to compile the miner, follow the guide for your
- `XMR-STAK_THREADS` give the compiler information which value for `threads` is used at runtime
- default is `0` (compile time optimization)
- if the miner is compiled and used at runtime with the some value it can increase the hash rate: `cmake .. -DXMR-STAK_THREADS=32`

#### CUDA Runtime versus CUDA SDK
nVidia packages the CUDA **runtime** with the GPU drivers, and the CUDA **SDK** should match.
While it is possible to compile with old SDK and then run on newer runtime/driver, in most cases it does not work well.

SDK usually bundles a driver that supports the particular CUDA version, but it is always best to get (usually newer)
drivers from the official site.

For Example: Built with 8.0 SDK running on a 9.2 driver crashes randomly on some GPUs, however worked fine on most 9.1
drivers. Backward compatibility "should" work, but in reality there are many cases where it does not (YMMV)

**NOTE**: The inverse case, installing CUDA 10.0 SDK on a system with older driver
does not magically add CUDA 10.0 support to the old driver. You must build with
CUDA SDK to match that driver runtime (check driver release notes PDF under 'supported technologies' list within the
first several pages) - *OR* - upgrade the driver to minimum `411.63` to have the CUDA 10.0 runtime
(unless, Fermi... they can't use CUDA 9.x or 10.0, even though newer drivers still run their *graphics* parts)

Other gotchas based on GPU family:
* Anything less than Fermi will never work
* Fermi (arch 2x) was removed after CUDA 8.0
* Volta (arch 7x) was added in CUDA 9.0
* Turing (arch 75) was added in CUDA 10.0

Here is a rough table of driver revisions and CUDA runtime contained:

| CUDA | Driver min | Driver max | notes
| ----:| ----------:| ----------:| -----
| 10.0 | 411.63 | (current) |
| 9.2 | 397.93 | 399.24 |
| 9.1 | 388.71 | 397.64 |
| 9.0 | 387.92 | 388.59 | Fermi removed (must use CUDA == 8.0)
| 8.0 | 372.70 | 386.28 | except 372.95 has CUDA7.5
| 7.5 | | | *Don't bother, won't compile anymore*

nVidia generally uses the same version numbering on all OS, the above was however based
on Windows Driver Release Notes
nVidia always puts the runtime-included CUDA version in the release notes PDF for whatever driver, doesn't hurt to
double check your specific one.

For better navigation of CUDA version matching, xmr-stak will display both version numbers during CUDA detection phases
such as `[9.2/10.0]` which is the compiled (SDK) version and the current (driver) runtime version.
17 changes: 11 additions & 6 deletions xmrstak/backend/amd/amd_gpu/gpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -903,6 +903,9 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx)
const char *fastIntMathV2CL =
#include "./opencl/fast_int_math_v2.cl"
;
const char *fastDivHeavyCL =
#include "./opencl/fast_div_heavy.cl"
;
const char *cryptonightCL =
#include "./opencl/cryptonight.cl"
;
Expand All @@ -924,6 +927,7 @@ size_t InitOpenCL(GpuContext* ctx, size_t num_gpus, size_t platform_idx)

std::string source_code(cryptonightCL);
source_code = std::regex_replace(source_code, std::regex("XMRSTAK_INCLUDE_FAST_INT_MATH_V2"), fastIntMathV2CL);
source_code = std::regex_replace(source_code, std::regex("XMRSTAK_INCLUDE_FAST_DIV_HEAVY"), fastDivHeavyCL);
source_code = std::regex_replace(source_code, std::regex("XMRSTAK_INCLUDE_WOLF_AES"), wolfAesCL);
source_code = std::regex_replace(source_code, std::regex("XMRSTAK_INCLUDE_WOLF_SKEIN"), wolfSkeinCL);
source_code = std::regex_replace(source_code, std::regex("XMRSTAK_INCLUDE_JH"), jhCL);
Expand Down Expand Up @@ -965,7 +969,7 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t tar
input[input_len] = 0x01;
memset(input + input_len + 1, 0, 88 - input_len - 1);

size_t numThreads = ctx->rawIntensity;
cl_uint numThreads = ctx->rawIntensity;

if((ret = clEnqueueWriteBuffer(ctx->CommandQueues, ctx->InputBuffer, CL_TRUE, 0, 88, input, 0, NULL, NULL)) != CL_SUCCESS)
{
Expand Down Expand Up @@ -994,7 +998,7 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t tar
}

// Threads
if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][0], 3, sizeof(cl_ulong), &numThreads)) != CL_SUCCESS)
if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][0], 3, sizeof(cl_uint), &numThreads)) != CL_SUCCESS)
{
printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 0, argument 3.", err_to_str(ret));
return(ERR_OCL_API);
Expand All @@ -1017,7 +1021,7 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t tar
}

// Threads
if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][1], 2, sizeof(cl_ulong), &numThreads)) != CL_SUCCESS)
if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][1], 2, sizeof(cl_uint), &numThreads)) != CL_SUCCESS)
{
printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 1, argument 2.", err_to_str(ret));
return(ERR_OCL_API);
Expand Down Expand Up @@ -1077,7 +1081,7 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t tar
}

// Threads
if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][2], 6, sizeof(cl_ulong), &numThreads)) != CL_SUCCESS)
if((ret = clSetKernelArg(ctx->Kernels[kernel_storage][2], 6, sizeof(cl_uint), &numThreads)) != CL_SUCCESS)
{
printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel 2, argument 6.", err_to_str(ret));
return(ERR_OCL_API);
Expand Down Expand Up @@ -1156,7 +1160,7 @@ size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput, xmrstak_algo miner_algo)

clFinish(ctx->CommandQueues);

size_t Nonce[2] = {ctx->Nonce, 1}, gthreads[2] = { g_thd, 8 }, lthreads[2] = { w_size, 8 };
size_t Nonce[2] = {ctx->Nonce, 1}, gthreads[2] = { g_thd, 8 }, lthreads[2] = { 8, 8 };
if((ret = clEnqueueNDRangeKernel(ctx->CommandQueues, ctx->Kernels[kernel_storage][0], 2, Nonce, gthreads, lthreads, 0, NULL, NULL)) != CL_SUCCESS)
{
printer::inst()->print_msg(L1,"Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 0);
Expand Down Expand Up @@ -1208,7 +1212,8 @@ size_t XMRRunJob(GpuContext* ctx, cl_uint* HashOutput, xmrstak_algo miner_algo)
if(BranchNonces[i])
{
// Threads
if((clSetKernelArg(ctx->Kernels[kernel_storage][i + 3], 4, sizeof(cl_ulong), BranchNonces + i)) != CL_SUCCESS)
cl_uint numThreads = BranchNonces[i];
if((clSetKernelArg(ctx->Kernels[kernel_storage][i + 3], 4, sizeof(cl_uint), &numThreads)) != CL_SUCCESS)
{
printer::inst()->print_msg(L1,"Error %s when calling clSetKernelArg for kernel %d, argument %d.", err_to_str(ret), i + 3, 4);
return(ERR_OCL_API);
Expand Down
Loading

0 comments on commit 85c5bc3

Please sign in to comment.