diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu
index 7aa44e89e..43740d295 100644
--- a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu
+++ b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu
@@ -352,7 +352,7 @@ __global__ void cryptonight_core_gpu_phase3( int threads, int bfactor, int parti
 	int sub = subv << 2;
 
 	const int batchsize = MEMORY >> bfactor;
-	const int start = partidx * batchsize;
+	const int start = (partidx % (1 << bfactor)) * batchsize;
 	const int end = start + batchsize;
 
 	if ( thread >= threads )
@@ -365,15 +365,15 @@ __global__ void cryptonight_core_gpu_phase3( int threads, int bfactor, int parti
 	__syncthreads( );
 	
 #if( __CUDA_ARCH__ < 300 )
-        extern __shared__ uint32_t shuffleMem[];
-        volatile uint32_t* sPtr = (volatile uint32_t*)(shuffleMem + (threadIdx.x& 0xFFFFFFFC));
+	extern __shared__ uint32_t shuffleMem[];
+	volatile uint32_t* sPtr = (volatile uint32_t*)(shuffleMem + (threadIdx.x& 0xFFFFFFF8));
 #else
-        volatile uint32_t* sPtr = NULL;
+	volatile uint32_t* sPtr = NULL;
 #endif
 
 	for ( int i = start; i < end; i += 32 )
 	{
-#pragma unroll
+		#pragma unroll
 		for ( int j = 0; j < 4; ++j )
 			text[j] ^= long_state[((IndexType) thread * MEMORY) + ( sub + i + j)];
 
@@ -381,25 +381,7 @@ __global__ void cryptonight_core_gpu_phase3( int threads, int bfactor, int parti
 		
 		if(ALGO == cryptonight_heavy)
 		{
-#pragma unroll
-			for ( int j = 0; j < 4; ++j )
-				text[j] ^= shuffle<8>(sPtr, subv, text[j], (subv+1)&7);
-		}
-	}
-
-	if(ALGO == cryptonight_heavy)
-	{
-		__syncthreads( );
-		
-		for ( int i = start; i < end; i += 32 )
-		{
-#pragma unroll
-			for ( int j = 0; j < 4; ++j )
-				text[j] ^= long_state[((IndexType) thread * MEMORY) + ( sub + i + j)];
-
-			cn_aes_pseudo_round_mut( sharedMemory, text, key );
-
-#pragma unroll
+			#pragma unroll
 			for ( int j = 0; j < 4; ++j )
 				text[j] ^= shuffle<8>(sPtr, subv, text[j], (subv+1)&7);
 		}
@@ -466,9 +448,21 @@ void cryptonight_core_gpu_hash(nvid_ctx* ctx, uint32_t nonce)
 		if ( partcount > 1 && ctx->device_bsleep > 0) compat_usleep( ctx->device_bsleep );
 	}
 
-	for ( int i = 0; i < partcountOneThree; i++ )
+	int roundsPhase3 = partcountOneThree;
+
+	if(ALGO == cryptonight_heavy)
+	{
+		// cryptonight_heavy used two full rounds over the scratchpad memory
+		roundsPhase3 *= 2;
+	}
+
+	for ( int i = 0; i < roundsPhase3; i++ )
 	{
-		CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_core_gpu_phase3<ITERATIONS,MEMORY, ALGO><<< grid, block8 >>>( ctx->device_blocks*ctx->device_threads,
+		CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_core_gpu_phase3<ITERATIONS,MEMORY, ALGO><<< 
+			grid,
+			block8,
+			block8.x * sizeof(uint32_t) * static_cast< int >( ctx->device_arch[0] < 3 )
+		>>>( ctx->device_blocks*ctx->device_threads,
 			bfactorOneThree, i,
 			ctx->d_long_state,
 			ctx->d_ctx_state, ctx->d_ctx_key2 ));
diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
index e2f0b2da4..02c157ed7 100644
--- a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
+++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
@@ -483,7 +483,7 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx)
 		 *   with a sm_20 only compiled binary
 		 */
 		for(int i = 0; i < arch.size(); ++i)
-			if(minSupportedArch == 0 || (arch[i] >= 30 && arch[i] < minSupportedArch))
+			if(arch[i] >= 30  && (minSupportedArch == 0 || arch[i] < minSupportedArch))
 				minSupportedArch = arch[i];
 		if(minSupportedArch < 30 || gpuArch < minSupportedArch)
 		{
diff --git a/xmrstak/jconf.cpp b/xmrstak/jconf.cpp
index f99698a0e..fa55f09a8 100644
--- a/xmrstak/jconf.cpp
+++ b/xmrstak/jconf.cpp
@@ -97,16 +97,19 @@ struct xmrstak_coin_algo
 
 xmrstak_coin_algo coin_algos[] = { 
 	{ "aeon7", cryptonight_aeon, cryptonight_lite, 7u, "mine.aeon-pool.com:5555" },
+  { "croat", cryptonight, cryptonight, 0u, nullptr },
 	{ "cryptonight", cryptonight, cryptonight, 0u, nullptr },
 	{ "cryptonight_lite", cryptonight_lite, cryptonight_lite, 0u, nullptr },
 	{ "edollar", cryptonight, cryptonight, 0u, nullptr },
 	{ "electroneum", cryptonight, cryptonight, 0u, nullptr },
 	{ "graft", cryptonight, cryptonight, 0u, nullptr },
+  { "haven", cryptonight_heavy, cryptonight, 2u, nullptr },
 	{ "intense", cryptonight, cryptonight, 0u, nullptr },
 	{ "karbo", cryptonight, cryptonight, 0u, nullptr },
 	{ "monero7", cryptonight_monero, cryptonight, 7u, "pool.usxmrpool.com:3333" },
 	{ "stellite", cryptonight_monero, cryptonight, 3u, nullptr },
 	{ "sumokoin", cryptonight_heavy, cryptonight, 3u, nullptr }
+  
 };
 
 constexpr size_t coin_alogo_size = (sizeof(coin_algos)/sizeof(coin_algos[0]));
diff --git a/xmrstak/misc/executor.cpp b/xmrstak/misc/executor.cpp
index 4c964b5ff..c6bed0b64 100644
--- a/xmrstak/misc/executor.cpp
+++ b/xmrstak/misc/executor.cpp
@@ -563,7 +563,7 @@ void executor::ex_main()
 		else
 			pools.emplace_front(0, "indeedminers.eu:3333", "", "", "", 0.0, true, false, "", true);
 		break;
-	case cryptonight_aeon:
+	
 	case cryptonight_lite:
 		if(dev_tls)
 			pools.emplace_front(0, "indeedminers.eu:2222", "", "", "", 0.0, true, false, "", true);
diff --git a/xmrstak/pools.tpl b/xmrstak/pools.tpl
index f5afff6c5..7973f7c6c 100644
--- a/xmrstak/pools.tpl
+++ b/xmrstak/pools.tpl
@@ -21,11 +21,13 @@ POOLCONF],
  * Currency to mine. Supported values:
  *
  *    aeon7 (use this for Aeon's new PoW)
+ *    croat
  *    cryptonight (try this if your coin is not listed)
  *    cryptonight_lite
  *    edollar
  *    electroneum
  *    graft
+ *    haven
  *    intense
  *    karbo
  *    monero7 (use this for Monero's new PoW)
diff --git a/xmrstak/version.cpp b/xmrstak/version.cpp
index 031078be4..98adeb22a 100644
--- a/xmrstak/version.cpp
+++ b/xmrstak/version.cpp
@@ -3,7 +3,7 @@
 //! git will put "#define GIT_ARCHIVE 1" on the next line inside archives. 
 #define GIT_ARCHIVE 1
 #if defined(GIT_ARCHIVE) && !defined(GIT_COMMIT_HASH)
-#define GIT_COMMIT_HASH 5ce9892b
+#define GIT_COMMIT_HASH 945524b3
 #endif
 
 #ifndef GIT_COMMIT_HASH
@@ -19,7 +19,7 @@
 #endif
 
 #define XMR_STAK_NAME "xmr-stak"
-#define XMR_STAK_VERSION "2.4.1"
+#define XMR_STAK_VERSION "2.4.2"
 
 #if defined(_WIN32)
 #define OS_TYPE "win"