diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu index 7aa44e89e..43740d295 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu @@ -352,7 +352,7 @@ __global__ void cryptonight_core_gpu_phase3( int threads, int bfactor, int parti int sub = subv << 2; const int batchsize = MEMORY >> bfactor; - const int start = partidx * batchsize; + const int start = (partidx % (1 << bfactor)) * batchsize; const int end = start + batchsize; if ( thread >= threads ) @@ -365,15 +365,15 @@ __global__ void cryptonight_core_gpu_phase3( int threads, int bfactor, int parti __syncthreads( ); #if( __CUDA_ARCH__ < 300 ) - extern __shared__ uint32_t shuffleMem[]; - volatile uint32_t* sPtr = (volatile uint32_t*)(shuffleMem + (threadIdx.x& 0xFFFFFFFC)); + extern __shared__ uint32_t shuffleMem[]; + volatile uint32_t* sPtr = (volatile uint32_t*)(shuffleMem + (threadIdx.x& 0xFFFFFFF8)); #else - volatile uint32_t* sPtr = NULL; + volatile uint32_t* sPtr = NULL; #endif for ( int i = start; i < end; i += 32 ) { -#pragma unroll + #pragma unroll for ( int j = 0; j < 4; ++j ) text[j] ^= long_state[((IndexType) thread * MEMORY) + ( sub + i + j)]; @@ -381,25 +381,7 @@ __global__ void cryptonight_core_gpu_phase3( int threads, int bfactor, int parti if(ALGO == cryptonight_heavy) { -#pragma unroll - for ( int j = 0; j < 4; ++j ) - text[j] ^= shuffle<8>(sPtr, subv, text[j], (subv+1)&7); - } - } - - if(ALGO == cryptonight_heavy) - { - __syncthreads( ); - - for ( int i = start; i < end; i += 32 ) - { -#pragma unroll - for ( int j = 0; j < 4; ++j ) - text[j] ^= long_state[((IndexType) thread * MEMORY) + ( sub + i + j)]; - - cn_aes_pseudo_round_mut( sharedMemory, text, key ); - -#pragma unroll + #pragma unroll for ( int j = 0; j < 4; ++j ) text[j] ^= shuffle<8>(sPtr, subv, text[j], (subv+1)&7); } @@ -466,9 +448,21 @@ void cryptonight_core_gpu_hash(nvid_ctx* ctx, uint32_t nonce) if ( partcount > 1 && ctx->device_bsleep > 0) compat_usleep( ctx->device_bsleep ); } - for ( int i = 0; i < partcountOneThree; i++ ) + int roundsPhase3 = partcountOneThree; + + if(ALGO == cryptonight_heavy) + { + // cryptonight_heavy used two full rounds over the scratchpad memory + roundsPhase3 *= 2; + } + + for ( int i = 0; i < roundsPhase3; i++ ) { - CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_core_gpu_phase3<<< grid, block8 >>>( ctx->device_blocks*ctx->device_threads, + CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_core_gpu_phase3<<< + grid, + block8, + block8.x * sizeof(uint32_t) * static_cast< int >( ctx->device_arch[0] < 3 ) + >>>( ctx->device_blocks*ctx->device_threads, bfactorOneThree, i, ctx->d_long_state, ctx->d_ctx_state, ctx->d_ctx_key2 )); diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu index e2f0b2da4..02c157ed7 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu @@ -483,7 +483,7 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx) * with a sm_20 only compiled binary */ for(int i = 0; i < arch.size(); ++i) - if(minSupportedArch == 0 || (arch[i] >= 30 && arch[i] < minSupportedArch)) + if(arch[i] >= 30 && (minSupportedArch == 0 || arch[i] < minSupportedArch)) minSupportedArch = arch[i]; if(minSupportedArch < 30 || gpuArch < minSupportedArch) { diff --git a/xmrstak/jconf.cpp b/xmrstak/jconf.cpp index f99698a0e..fa55f09a8 100644 --- a/xmrstak/jconf.cpp +++ b/xmrstak/jconf.cpp @@ -97,16 +97,19 @@ struct xmrstak_coin_algo xmrstak_coin_algo coin_algos[] = { { "aeon7", cryptonight_aeon, cryptonight_lite, 7u, "mine.aeon-pool.com:5555" }, + { "croat", cryptonight, cryptonight, 0u, nullptr }, { "cryptonight", cryptonight, cryptonight, 0u, nullptr }, { "cryptonight_lite", cryptonight_lite, cryptonight_lite, 0u, nullptr }, { "edollar", cryptonight, cryptonight, 0u, nullptr }, { "electroneum", cryptonight, cryptonight, 0u, nullptr }, { "graft", cryptonight, cryptonight, 0u, nullptr }, + { "haven", cryptonight_heavy, cryptonight, 2u, nullptr }, { "intense", cryptonight, cryptonight, 0u, nullptr }, { "karbo", cryptonight, cryptonight, 0u, nullptr }, { "monero7", cryptonight_monero, cryptonight, 7u, "pool.usxmrpool.com:3333" }, { "stellite", cryptonight_monero, cryptonight, 3u, nullptr }, { "sumokoin", cryptonight_heavy, cryptonight, 3u, nullptr } + }; constexpr size_t coin_alogo_size = (sizeof(coin_algos)/sizeof(coin_algos[0])); diff --git a/xmrstak/misc/executor.cpp b/xmrstak/misc/executor.cpp index 4c964b5ff..c6bed0b64 100644 --- a/xmrstak/misc/executor.cpp +++ b/xmrstak/misc/executor.cpp @@ -563,7 +563,7 @@ void executor::ex_main() else pools.emplace_front(0, "indeedminers.eu:3333", "", "", "", 0.0, true, false, "", true); break; - case cryptonight_aeon: + case cryptonight_lite: if(dev_tls) pools.emplace_front(0, "indeedminers.eu:2222", "", "", "", 0.0, true, false, "", true); diff --git a/xmrstak/pools.tpl b/xmrstak/pools.tpl index f5afff6c5..7973f7c6c 100644 --- a/xmrstak/pools.tpl +++ b/xmrstak/pools.tpl @@ -21,11 +21,13 @@ POOLCONF], * Currency to mine. Supported values: * * aeon7 (use this for Aeon's new PoW) + * croat * cryptonight (try this if your coin is not listed) * cryptonight_lite * edollar * electroneum * graft + * haven * intense * karbo * monero7 (use this for Monero's new PoW) diff --git a/xmrstak/version.cpp b/xmrstak/version.cpp index 031078be4..98adeb22a 100644 --- a/xmrstak/version.cpp +++ b/xmrstak/version.cpp @@ -3,7 +3,7 @@ //! git will put "#define GIT_ARCHIVE 1" on the next line inside archives. #define GIT_ARCHIVE 1 #if defined(GIT_ARCHIVE) && !defined(GIT_COMMIT_HASH) -#define GIT_COMMIT_HASH 5ce9892b +#define GIT_COMMIT_HASH 945524b3 #endif #ifndef GIT_COMMIT_HASH @@ -19,7 +19,7 @@ #endif #define XMR_STAK_NAME "xmr-stak" -#define XMR_STAK_VERSION "2.4.1" +#define XMR_STAK_VERSION "2.4.2" #if defined(_WIN32) #define OS_TYPE "win"