diff --git a/Algo256/blake256.cu b/Algo256/blake256.cu
index 0cc43ec5..adaeb0f9 100644
--- a/Algo256/blake256.cu
+++ b/Algo256/blake256.cu
@@ -718,8 +718,9 @@ extern int scanhash_blake256(int thr_id, uint32_t *pdata, uint32_t *ptarget,
 	if(!init)
 	{
 		CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
-		cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
-		cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
+		CUDA_SAFE_CALL(cudaDeviceReset());
+		CUDA_SAFE_CALL(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync));
+		CUDA_SAFE_CALL(cudaDeviceSetCacheConfig(cudaFuncCachePreferL1));
 		CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
 		CUDA_SAFE_CALL(cudaMallocHost(&h_data, 15 * sizeof(uint32_t)));
 		CUDA_SAFE_CALL(cudaMallocHost(&h_resNonce, NBN * sizeof(uint32_t)));
diff --git a/Algo256/cuda_fugue256.cu b/Algo256/cuda_fugue256.cu
index 7285e2cd..bc5a89ff 100644
--- a/Algo256/cuda_fugue256.cu
+++ b/Algo256/cuda_fugue256.cu
@@ -722,8 +722,9 @@ fugue256_gpu_hash(int thr_id, uint32_t threads, uint32_t startNounce, void *outp
 void fugue256_cpu_init(int thr_id, uint32_t threads)
 {
 	CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
-	cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
-	cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
+	CUDA_SAFE_CALL(cudaDeviceReset());
+	CUDA_SAFE_CALL(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync));
+	CUDA_SAFE_CALL(cudaDeviceSetCacheConfig(cudaFuncCachePreferL1));
 
 	// Kopiere die Hash-Tabellen in den GPU-Speicher
 	texDef(mixTab0Tex, mixTab0m, mixtab0_cpu, sizeof(uint32_t)*256);
diff --git a/Algo256/keccak256.cu b/Algo256/keccak256.cu
index cec9ded7..90ea840d 100644
--- a/Algo256/keccak256.cu
+++ b/Algo256/keccak256.cu
@@ -51,8 +51,9 @@ extern int scanhash_keccak256(int thr_id, uint32_t *pdata,
 	if(!init)
 	{
 		CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
-		cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
-		cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
+		CUDA_SAFE_CALL(cudaDeviceReset());
+		CUDA_SAFE_CALL(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync));
+		CUDA_SAFE_CALL(cudaDeviceSetCacheConfig(cudaFuncCachePreferL1));
 		CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
 		CUDA_SAFE_CALL(cudaMallocHost(&h_nounce, 2 * sizeof(uint32_t)));
 		keccak256_cpu_init(thr_id, (int)throughputmax);
diff --git a/JHA/jackpotcoin.cu b/JHA/jackpotcoin.cu
index 6e6ec3e9..85174fa7 100644
--- a/JHA/jackpotcoin.cu
+++ b/JHA/jackpotcoin.cu
@@ -96,8 +96,9 @@ extern int scanhash_jackpot(int thr_id, uint32_t *pdata,
 	if (!init)
 	{
 		CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
-		cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
-		cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
+		CUDA_SAFE_CALL(cudaDeviceReset());
+		CUDA_SAFE_CALL(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync));
+		CUDA_SAFE_CALL(cudaDeviceSetCacheConfig(cudaFuncCachePreferL1));
 		CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
 
 		CUDA_SAFE_CALL(cudaMalloc(&d_hash, 16 * sizeof(uint32_t) * throughputmax));
diff --git a/Sia/sia.cu b/Sia/sia.cu
index ad02c1d5..dc2ee8f3 100644
--- a/Sia/sia.cu
+++ b/Sia/sia.cu
@@ -221,8 +221,9 @@ int scanhash_sia(int thr_id, uint32_t *pdata, uint32_t *ptarget, uint32_t max_no
 	if(!init)
 	{
 		CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
-		cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
-		cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
+		CUDA_SAFE_CALL(cudaDeviceReset());
+		CUDA_SAFE_CALL(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync));
+		CUDA_SAFE_CALL(cudaDeviceSetCacheConfig(cudaFuncCachePreferL1));
 
 		CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
 		CUDA_SAFE_CALL(cudaMallocHost(&h_nounce, MAXRESULTS * sizeof(uint32_t)));
diff --git a/bitcoin.cu b/bitcoin.cu
index aecf49c3..31eb7710 100644
--- a/bitcoin.cu
+++ b/bitcoin.cu
@@ -138,8 +138,9 @@ int scanhash_bitcoin(int thr_id, uint32_t *pdata,
 	if(!init)
 	{
 		CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
-		cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
-		cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
+		CUDA_SAFE_CALL(cudaDeviceReset());
+		CUDA_SAFE_CALL(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync));
+		CUDA_SAFE_CALL(cudaDeviceSetCacheConfig(cudaFuncCachePreferL1));
 
 		CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
 		bitcoin_cpu_init(thr_id);
diff --git a/ccminer.cpp b/ccminer.cpp
index 40c6507b..f1bea93b 100644
--- a/ccminer.cpp
+++ b/ccminer.cpp
@@ -2791,7 +2791,6 @@ int main(int argc, char *argv[])
 		fprintf(stderr, "%s: no URL supplied\n", argv[0]);
 		show_usage_and_exit(1);
 	}
-	cuda_devicereset();
 
 	if(!rpc_userpass)
 	{
diff --git a/cuda_myriadgroestl.cu b/cuda_myriadgroestl.cu
index 6316309f..8d5453c3 100644
--- a/cuda_myriadgroestl.cu
+++ b/cuda_myriadgroestl.cu
@@ -282,8 +282,9 @@ static THREAD cudaStream_t stream[3];
 __host__ void myriadgroestl_cpu_init(int thr_id, uint32_t threads)
 {
 	CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
-	cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
-	cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
+	CUDA_SAFE_CALL(cudaDeviceReset());
+	CUDA_SAFE_CALL(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync));
+	CUDA_SAFE_CALL(cudaDeviceSetCacheConfig(cudaFuncCachePreferL1));
 	CUDA_SAFE_CALL(cudaStreamCreate(&stream[0]));
 	CUDA_SAFE_CALL(cudaStreamCreate(&stream[1]));
 	CUDA_SAFE_CALL(cudaStreamCreate(&stream[2]));
diff --git a/cuda_nist5.cu b/cuda_nist5.cu
index 7508049c..ea216352 100644
--- a/cuda_nist5.cu
+++ b/cuda_nist5.cu
@@ -85,8 +85,9 @@ extern int scanhash_nist5(int thr_id, uint32_t *pdata,
 	{
 		oldthroughput = throughput;
 		CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
-		cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
-		cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
+		CUDA_SAFE_CALL(cudaDeviceReset());
+		CUDA_SAFE_CALL(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync));
+		CUDA_SAFE_CALL(cudaDeviceSetCacheConfig(cudaFuncCachePreferL1));
 		CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
 #if defined WIN32 && !defined _WIN64
 		// 2GB limit for cudaMalloc
diff --git a/groestlcoin.cpp b/groestlcoin.cpp
index 9897a668..dade6210 100644
--- a/groestlcoin.cpp
+++ b/groestlcoin.cpp
@@ -67,8 +67,9 @@ extern int scanhash_groestlcoin(int thr_id, uint32_t *pdata, uint32_t *ptarget,
 	if(!init)
     {
 		CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
-		cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
-		cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
+		CUDA_SAFE_CALL(cudaDeviceReset());
+		CUDA_SAFE_CALL(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync));
+		CUDA_SAFE_CALL(cudaDeviceSetCacheConfig(cudaFuncCachePreferL1));
 		CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
 
 		groestlcoin_cpu_init(thr_id, throughputmax);
diff --git a/lyra2/lyra2REv2.cu b/lyra2/lyra2REv2.cu
index 076d4217..e2d9ea44 100644
--- a/lyra2/lyra2REv2.cu
+++ b/lyra2/lyra2REv2.cu
@@ -134,8 +134,9 @@ int scanhash_lyra2v2(int thr_id, uint32_t *pdata,
 	if (!init)
 	{ 
 		CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
-		cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
-		cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
+		CUDA_SAFE_CALL(cudaDeviceReset());
+		CUDA_SAFE_CALL(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync));
+		CUDA_SAFE_CALL(cudaDeviceSetCacheConfig(cudaFuncCachePreferL1));
 		CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
 #if defined WIN32 && !defined _WIN64
 		// 2GB limit for cudaMalloc
diff --git a/neoscrypt/neoscrypt.cu b/neoscrypt/neoscrypt.cu
index 4c56637a..ae290fca 100644
--- a/neoscrypt/neoscrypt.cu
+++ b/neoscrypt/neoscrypt.cu
@@ -36,6 +36,9 @@ int scanhash_neoscrypt(bool stratum, int thr_id, uint32_t *pdata,
 	if(!init)
 	{
 		CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
+		CUDA_SAFE_CALL(cudaDeviceReset());
+		CUDA_SAFE_CALL(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync));
+		CUDA_SAFE_CALL(cudaDeviceSetCacheConfig(cudaFuncCachePreferL1));
 
 		cudaDeviceProp props;
 		cudaGetDeviceProperties(&props, device_map[thr_id]);
@@ -99,7 +102,6 @@ int scanhash_neoscrypt(bool stratum, int thr_id, uint32_t *pdata,
 		}
 
 		throughputmax = device_intensity(device_map[thr_id], __func__, intensity) / 2;
-		cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
 		//		cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);	
 		CUDA_SAFE_CALL(cudaMallocHost(&foundNonce, 2 * 4));
 
diff --git a/pentablake.cu b/pentablake.cu
index 0c791dad..9619a7da 100644
--- a/pentablake.cu
+++ b/pentablake.cu
@@ -454,8 +454,9 @@ extern int scanhash_pentablake(int thr_id, uint32_t *pdata, uint32_t *ptarget,
 	if (!init[thr_id]) 
 	{
 		CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
-		cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
-		cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
+		CUDA_SAFE_CALL(cudaDeviceReset());
+		CUDA_SAFE_CALL(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync));
+		CUDA_SAFE_CALL(cudaDeviceSetCacheConfig(cudaFuncCachePreferL1));
 		CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
 #if defined WIN32 && !defined _WIN64
 		// 2GB limit for cudaMalloc
diff --git a/quark/quarkcoin.cu b/quark/quarkcoin.cu
index 52d312c6..c9c39e79 100644
--- a/quark/quarkcoin.cu
+++ b/quark/quarkcoin.cu
@@ -148,8 +148,9 @@ extern int scanhash_quark(int thr_id, uint32_t *pdata,
 	if(!init)
 	{
 		CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
-		cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
-		cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
+		CUDA_SAFE_CALL(cudaDeviceReset());
+		CUDA_SAFE_CALL(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync));
+		CUDA_SAFE_CALL(cudaDeviceSetCacheConfig(cudaFuncCachePreferL1));
 		CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
 //		}
 
diff --git a/qubit/deep.cu b/qubit/deep.cu
index 11d6ad5d..cdd483f7 100644
--- a/qubit/deep.cu
+++ b/qubit/deep.cu
@@ -69,8 +69,9 @@ extern int scanhash_deep(int thr_id, uint32_t *pdata,
 	if (!init)
 	{
 		CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
-		cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
-		cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
+		CUDA_SAFE_CALL(cudaDeviceReset());
+		CUDA_SAFE_CALL(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync));
+		CUDA_SAFE_CALL(cudaDeviceSetCacheConfig(cudaFuncCachePreferL1));
 		CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
 #if defined WIN32 && !defined _WIN64
 		// 2GB limit for cudaMalloc
diff --git a/qubit/doom.cu b/qubit/doom.cu
index 479e7b36..81d0ec48 100644
--- a/qubit/doom.cu
+++ b/qubit/doom.cu
@@ -48,8 +48,9 @@ extern int scanhash_doom(int thr_id, uint32_t *pdata,
 	if(!init)
 	{
 		CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
-		cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
-		cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
+		CUDA_SAFE_CALL(cudaDeviceReset());
+		CUDA_SAFE_CALL(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync));
+		CUDA_SAFE_CALL(cudaDeviceSetCacheConfig(cudaFuncCachePreferL1));
 		CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
 #if defined WIN32 && !defined _WIN64
 		// 2GB limit for cudaMalloc
diff --git a/qubit/qubit.cu b/qubit/qubit.cu
index 0961eac3..451a1527 100644
--- a/qubit/qubit.cu
+++ b/qubit/qubit.cu
@@ -120,8 +120,9 @@ extern int scanhash_qubit(int thr_id, uint32_t *pdata,
 	if(!init)
 	{
 		CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
-		cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
-		cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
+		CUDA_SAFE_CALL(cudaDeviceReset());
+		CUDA_SAFE_CALL(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync));
+		CUDA_SAFE_CALL(cudaDeviceSetCacheConfig(cudaFuncCachePreferL1));
 		CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
 
 #if defined WIN32 && !defined _WIN64
diff --git a/skein.cu b/skein.cu
index 8336a91c..a0e8b6a7 100644
--- a/skein.cu
+++ b/skein.cu
@@ -62,8 +62,9 @@ int scanhash_skeincoin(int thr_id, uint32_t *pdata,
 	if(!init)
 	{
 		CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
-		cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
-		cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
+		CUDA_SAFE_CALL(cudaDeviceReset());
+		CUDA_SAFE_CALL(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync));
+		CUDA_SAFE_CALL(cudaDeviceSetCacheConfig(cudaFuncCachePreferL1));
 		CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
 		CUDA_SAFE_CALL(cudaMallocHost(&foundnonces, 2 * 4));
 		mining_has_stopped[thr_id] = false;
diff --git a/x11/c11.cu b/x11/c11.cu
index c21bbfbf..77506d23 100644
--- a/x11/c11.cu
+++ b/x11/c11.cu
@@ -145,6 +145,7 @@ int scanhash_c11(int thr_id, uint32_t *pdata,
 	if(!init)
 	{
 		CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
+		CUDA_SAFE_CALL(cudaDeviceReset());
 		CUDA_SAFE_CALL(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync));
 		CUDA_SAFE_CALL(cudaDeviceSetCacheConfig(cudaFuncCachePreferL1));
 		CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
diff --git a/x11/fresh.cu b/x11/fresh.cu
index 959adc6e..0a0654e8 100644
--- a/x11/fresh.cu
+++ b/x11/fresh.cu
@@ -86,8 +86,9 @@ extern int scanhash_fresh(int thr_id, uint32_t *pdata,
 	if(!init)
 	{
 		CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
-		cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
-		cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
+		CUDA_SAFE_CALL(cudaDeviceReset());
+		CUDA_SAFE_CALL(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync));
+		CUDA_SAFE_CALL(cudaDeviceSetCacheConfig(cudaFuncCachePreferL1));
 		CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
 #if defined WIN32 && !defined _WIN64
 		// 2GB limit for cudaMalloc
diff --git a/x11/s3.cu b/x11/s3.cu
index 7a391548..da1c7655 100644
--- a/x11/s3.cu
+++ b/x11/s3.cu
@@ -75,8 +75,9 @@ extern int scanhash_s3(int thr_id, uint32_t *pdata,
 	if(!init)
 	{
 		CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
-		cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
-		cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
+		CUDA_SAFE_CALL(cudaDeviceReset());
+		CUDA_SAFE_CALL(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync));
+		CUDA_SAFE_CALL(cudaDeviceSetCacheConfig(cudaFuncCachePreferL1));
 		CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
 #if defined WIN32 && !defined _WIN64
 		// 2GB limit for cudaMalloc
diff --git a/x11/x11.cu b/x11/x11.cu
index 248c4e19..7bfccfa0 100644
--- a/x11/x11.cu
+++ b/x11/x11.cu
@@ -148,6 +148,7 @@ extern int scanhash_x11(int thr_id, uint32_t *pdata,
 	if(!init)
 	{
 		CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
+		CUDA_SAFE_CALL(cudaDeviceReset());
 		CUDA_SAFE_CALL(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync));
 		CUDA_SAFE_CALL(cudaDeviceSetCacheConfig(cudaFuncCachePreferL1));
 		CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
diff --git a/x13/x13.cu b/x13/x13.cu
index 4dd05174..a25e42ed 100644
--- a/x13/x13.cu
+++ b/x13/x13.cu
@@ -170,8 +170,9 @@ extern int scanhash_x13(int thr_id, uint32_t *pdata,
 	if(!init)
 	{
 		CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
-		cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
-		cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
+		CUDA_SAFE_CALL(cudaDeviceReset());
+		CUDA_SAFE_CALL(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync));
+		CUDA_SAFE_CALL(cudaDeviceSetCacheConfig(cudaFuncCachePreferL1));
 		CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
 #if defined WIN32 && !defined _WIN64
 		// 2GB limit for cudaMalloc
diff --git a/x15/whirlpool.cu b/x15/whirlpool.cu
index 0f891407..c8e02781 100644
--- a/x15/whirlpool.cu
+++ b/x15/whirlpool.cu
@@ -64,8 +64,9 @@ extern int scanhash_whc(int thr_id, uint32_t *pdata,
 	if(!init)
 	{
 		CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
-		cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
-		cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
+		CUDA_SAFE_CALL(cudaDeviceReset());
+		CUDA_SAFE_CALL(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync));
+		CUDA_SAFE_CALL(cudaDeviceSetCacheConfig(cudaFuncCachePreferL1));
 		CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
 #if defined WIN32 && !defined _WIN64
 		// 2GB limit for cudaMalloc
diff --git a/x15/whirlpoolx.cu b/x15/whirlpoolx.cu
index be0b18b3..6841d4cd 100644
--- a/x15/whirlpoolx.cu
+++ b/x15/whirlpoolx.cu
@@ -52,8 +52,9 @@ int scanhash_whirlpoolx(int thr_id, uint32_t *pdata, uint32_t *ptarget, uint32_t
 	if(!init)
 	{
 		CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
-		cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
-		cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
+		CUDA_SAFE_CALL(cudaDeviceReset());
+		CUDA_SAFE_CALL(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync));
+		CUDA_SAFE_CALL(cudaDeviceSetCacheConfig(cudaFuncCachePreferL1));
 		CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
 		whirlpoolx_cpu_init(thr_id, throughputmax);
 		mining_has_stopped[thr_id] = false;
diff --git a/x15/x14.cu b/x15/x14.cu
index b575ffc9..c6a101ac 100644
--- a/x15/x14.cu
+++ b/x15/x14.cu
@@ -169,8 +169,9 @@ extern int scanhash_x14(int thr_id, uint32_t *pdata,
 	if(!init)
 	{
 		CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
-		cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
-		cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
+		CUDA_SAFE_CALL(cudaDeviceReset());
+		CUDA_SAFE_CALL(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync));
+		CUDA_SAFE_CALL(cudaDeviceSetCacheConfig(cudaFuncCachePreferL1));
 		CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
 #if defined WIN32 && !defined _WIN64
 		// 2GB limit for cudaMalloc
diff --git a/x15/x15.cu b/x15/x15.cu
index 1f7aa634..2b93ef94 100644
--- a/x15/x15.cu
+++ b/x15/x15.cu
@@ -180,8 +180,9 @@ extern int scanhash_x15(int thr_id, uint32_t *pdata,
 	if(!init)
 	{
 		CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
-		cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
-		cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
+		CUDA_SAFE_CALL(cudaDeviceReset());
+		CUDA_SAFE_CALL(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync));
+		CUDA_SAFE_CALL(cudaDeviceSetCacheConfig(cudaFuncCachePreferL1));
 		CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
 #if defined WIN32 && !defined _WIN64
 		// 2GB limit for cudaMalloc
diff --git a/x17/x17.cu b/x17/x17.cu
index 3d9a9932..97b85639 100644
--- a/x17/x17.cu
+++ b/x17/x17.cu
@@ -199,8 +199,9 @@ extern int scanhash_x17(int thr_id, uint32_t *pdata,
 	if (!init[thr_id])
 	{
 		CUDA_SAFE_CALL(cudaSetDevice(device_map[thr_id]));
-		cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
-		cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
+		CUDA_SAFE_CALL(cudaDeviceReset());
+		CUDA_SAFE_CALL(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync));
+		CUDA_SAFE_CALL(cudaDeviceSetCacheConfig(cudaFuncCachePreferL1));
 		CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
 #if defined WIN32 && !defined _WIN64
 		// 2GB limit for cudaMalloc