diff --git a/cuda_nist5.cu b/cuda_nist5.cu
index 57355d94..d0d3dd92 100644
--- a/cuda_nist5.cu
+++ b/cuda_nist5.cu
@@ -89,6 +89,16 @@ extern int scanhash_nist5(int thr_id, uint32_t *pdata,
 		cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
 		CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
 		get_cuda_arch(&cuda_arch[thr_id]);
+#if defined WIN32 && !defined _WIN64
+		// 2GB limit for cudaMalloc
+		if(throughput > 0x7fffffffULL / (16 * sizeof(uint32_t)))
+		{
+			applog(LOG_ERR, "intensity too high");
+			mining_has_stopped[thr_id] = true;
+			cudaStreamDestroy(gpustream[thr_id]);
+			proper_exit(2);
+		}
+#endif
 
 		// Konstanten kopieren, Speicher belegen
 		quark_groestl512_cpu_init(thr_id, throughput);
diff --git a/fuguecoin.cpp b/fuguecoin.cpp
index 13fd0fbf..24a0d9cc 100644
--- a/fuguecoin.cpp
+++ b/fuguecoin.cpp
@@ -41,6 +41,15 @@ extern int scanhash_fugue256(int thr_id, uint32_t *pdata, uint32_t *ptarget,
 	static THREAD volatile bool init = false;
 	if(!init)
 	{
+#if defined WIN32 && !defined _WIN64
+		// 2GB limit for cudaMalloc
+		if(throughputmax > 0x7fffffffULL / (8 * sizeof(uint32_t)))
+		{
+			applog(LOG_ERR, "intensity too high");
+			mining_has_stopped[thr_id] = true;
+			proper_exit(2);
+		}
+#endif
 		fugue256_cpu_init(thr_id, throughputmax);
 		init = true;
 	}
diff --git a/lyra2/lyra2REv2.cu b/lyra2/lyra2REv2.cu
index 4e9072e3..ee084ef3 100644
--- a/lyra2/lyra2REv2.cu
+++ b/lyra2/lyra2REv2.cu
@@ -125,7 +125,7 @@ int scanhash_lyra2v2(int thr_id, uint32_t *pdata,
 		cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
 		cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
 		CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
-#if !defined _WIN64
+#if defined WIN32 && !defined _WIN64
 		// 2GB limit for cudaMalloc
 		if(throughputmax > 0x7fffffffULL / (16 * 4 * 4 * sizeof(uint64_t)))
 		{
diff --git a/myriadgroestl.cpp b/myriadgroestl.cpp
index 7375b1cd..0922a91d 100644
--- a/myriadgroestl.cpp
+++ b/myriadgroestl.cpp
@@ -57,6 +57,15 @@ extern int scanhash_myriad(int thr_id, uint32_t *pdata, uint32_t *ptarget,
 	{
 #if BIG_DEBUG
 #else
+#if defined WIN32 && !defined _WIN64
+		// 2GB limit for cudaMalloc
+		if(throughputmax > 0x7fffffffULL / (16 * sizeof(uint32_t)))
+		{
+			applog(LOG_ERR, "intensity too high");
+			mining_has_stopped[thr_id] = true;
+			proper_exit(2);
+		}
+#endif
 		myriadgroestl_cpu_init(thr_id, throughputmax);
 #endif
 		cudaMallocHost(&h_found, 4 * sizeof(uint32_t));
diff --git a/neoscrypt/neoscrypt.cu b/neoscrypt/neoscrypt.cu
index 21d7cc15..9df9bea9 100644
--- a/neoscrypt/neoscrypt.cu
+++ b/neoscrypt/neoscrypt.cu
@@ -88,6 +88,17 @@ int scanhash_neoscrypt(bool stratum, int thr_id, uint32_t *pdata,
 		CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
 		CUDA_SAFE_CALL(cudaMallocHost(&foundNonce, 2 * 4));
 
+#if defined WIN32 && !defined _WIN64
+		// 2GB limit for cudaMalloc
+		if(throughputmax > 0x7fffffffULL / (32 * 128 * sizeof(uint64_t)))
+		{
+			applog(LOG_ERR, "intensity too high");
+			mining_has_stopped[thr_id] = true;
+			cudaStreamDestroy(gpustream[thr_id]);
+			proper_exit(2);
+		}
+#endif
+
 		neoscrypt_cpu_init_2stream(thr_id, throughputmax);
 		init = true;
 	}
diff --git a/pentablake.cu b/pentablake.cu
index 761f96b3..6f531635 100644
--- a/pentablake.cu
+++ b/pentablake.cu
@@ -457,6 +457,16 @@ extern int scanhash_pentablake(int thr_id, uint32_t *pdata, uint32_t *ptarget,
 		cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
 		cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
 		CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
+#if defined WIN32 && !defined _WIN64
+		// 2GB limit for cudaMalloc
+		if(throughputmax > 0x7fffffffULL / 64)
+		{
+			applog(LOG_ERR, "intensity too high");
+			mining_has_stopped[thr_id] = true;
+			cudaStreamDestroy(gpustream[thr_id]);
+			proper_exit(2);
+		}
+#endif
 		CUDA_SAFE_CALL(cudaMalloc(&d_hash, 64 * throughputmax));
 		CUDA_SAFE_CALL(cudaMallocHost(&h_resNounce[thr_id], 2*sizeof(uint32_t)));
 		CUDA_SAFE_CALL(cudaMalloc(&d_resNounce[thr_id], 2*sizeof(uint32_t)));
diff --git a/quark/quarkcoin.cu b/quark/quarkcoin.cu
index 04ca5e48..4636e183 100644
--- a/quark/quarkcoin.cu
+++ b/quark/quarkcoin.cu
@@ -154,6 +154,17 @@ extern int scanhash_quark(int thr_id, uint32_t *pdata,
 		get_cuda_arch(&cuda_arch[thr_id]);
 //		}
 
+#if defined WIN32 && !defined _WIN64
+		// 2GB limit for cudaMalloc
+		if(throughputmax > 0x7fffffffULL / (16 * sizeof(uint32_t)))
+		{
+			applog(LOG_ERR, "intensity too high");
+			mining_has_stopped[thr_id] = true;
+			cudaStreamDestroy(gpustream[thr_id]);
+			proper_exit(2);
+		}
+#endif
+
 		// Konstanten kopieren, Speicher belegen
 		CUDA_SAFE_CALL(cudaMalloc(&d_hash, 16 * sizeof(uint32_t) * throughputmax));
 		CUDA_SAFE_CALL(cudaMallocHost(&foundnonces, 4 * 4));
@@ -166,8 +177,6 @@ extern int scanhash_quark(int thr_id, uint32_t *pdata,
 		CUDA_SAFE_CALL(cudaMalloc(&d_branch2Nonces, sizeof(uint32_t)*noncebuffersize2));
 		CUDA_SAFE_CALL(cudaMalloc(&d_branch3Nonces, sizeof(uint32_t)*noncebuffersize));
 		quark_blake512_cpu_init(thr_id);
-		quark_groestl512_cpu_init(thr_id, throughputmax);
-		quark_bmw512_cpu_init(thr_id, throughputmax);
 		quark_compactTest_cpu_init(thr_id, throughputmax);
 		quark_keccak512_cpu_init(thr_id);
 		quark_jh512_cpu_init(thr_id);
diff --git a/qubit/deep.cu b/qubit/deep.cu
index afc019cd..c7d5a74e 100644
--- a/qubit/deep.cu
+++ b/qubit/deep.cu
@@ -72,6 +72,16 @@ extern int scanhash_deep(int thr_id, uint32_t *pdata,
 		cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
 		cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
 		CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
+#if defined WIN32 && !defined _WIN64
+		// 2GB limit for cudaMalloc
+		if(throughputmax > 0x7fffffffULL / (16 * sizeof(uint32_t)))
+		{
+			applog(LOG_ERR, "intensity too high");
+			mining_has_stopped[thr_id] = true;
+			cudaStreamDestroy(gpustream[thr_id]);
+			proper_exit(2);
+		}
+#endif
 
 		CUDA_SAFE_CALL(cudaMalloc(&d_hash, 16 * sizeof(uint32_t) * throughputmax));
 
diff --git a/qubit/doom.cu b/qubit/doom.cu
index 3028dafe..d00ba674 100644
--- a/qubit/doom.cu
+++ b/qubit/doom.cu
@@ -51,6 +51,16 @@ extern int scanhash_doom(int thr_id, uint32_t *pdata,
 		cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
 		cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
 		CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
+#if defined WIN32 && !defined _WIN64
+		// 2GB limit for cudaMalloc
+		if(throughputmax > 0x7fffffffULL / (16 * sizeof(uint32_t)))
+		{
+			applog(LOG_ERR, "intensity too high");
+			mining_has_stopped[thr_id] = true;
+			cudaStreamDestroy(gpustream[thr_id]);
+			proper_exit(2);
+		}
+#endif
 
 		CUDA_SAFE_CALL(cudaMalloc(&d_hash, 16 * sizeof(uint32_t) * throughputmax));
 
diff --git a/qubit/qubit.cu b/qubit/qubit.cu
index 01a37d14..c4669aac 100644
--- a/qubit/qubit.cu
+++ b/qubit/qubit.cu
@@ -124,6 +124,17 @@ extern int scanhash_qubit(int thr_id, uint32_t *pdata,
 		cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
 		CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
 
+#if defined WIN32 && !defined _WIN64
+		// 2GB limit for cudaMalloc
+		if(throughputmax > 0x7fffffffULL / (64 * sizeof(uint4)))
+		{
+			applog(LOG_ERR, "intensity too high");
+			mining_has_stopped[thr_id] = true;
+			cudaStreamDestroy(gpustream[thr_id]);
+			proper_exit(2);
+		}
+#endif
+
 		qubit_luffa512_cpu_init(thr_id, throughputmax);
 		x11_simd512_cpu_init(thr_id, throughputmax);
 		x11_echo512_cpu_init(thr_id, throughputmax);
diff --git a/x11/c11.cu b/x11/c11.cu
index f32b1dfe..2a29c22f 100644
--- a/x11/c11.cu
+++ b/x11/c11.cu
@@ -150,6 +150,16 @@ int scanhash_c11(int thr_id, uint32_t *pdata,
 		cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
 		cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
 		CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
+#if defined WIN32 && !defined _WIN64
+		// 2GB limit for cudaMalloc
+		if(throughputmax > 0x7fffffffULL / (64 * sizeof(uint4)))
+		{
+			applog(LOG_ERR, "intensity too high");
+			mining_has_stopped[thr_id] = true;
+			cudaStreamDestroy(gpustream[thr_id]);
+			proper_exit(2);
+		}
+#endif
 
 		x11_echo512_cpu_init(thr_id, throughputmax);
 		if(x11_simd512_cpu_init(thr_id, throughputmax) != 0)
diff --git a/x11/fresh.cu b/x11/fresh.cu
index 1528bfd7..c5873878 100644
--- a/x11/fresh.cu
+++ b/x11/fresh.cu
@@ -89,6 +89,16 @@ extern int scanhash_fresh(int thr_id, uint32_t *pdata,
 		cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
 		cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
 		CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
+#if defined WIN32 && !defined _WIN64
+		// 2GB limit for cudaMalloc
+		if(throughputmax > 0x7fffffffULL / (64 * sizeof(uint4)))
+		{
+			applog(LOG_ERR, "intensity too high");
+			mining_has_stopped[thr_id] = true;
+			cudaStreamDestroy(gpustream[thr_id]);
+			proper_exit(2);
+		}
+#endif
 
 		x11_simd512_cpu_init(thr_id, throughputmax);
 		x11_echo512_cpu_init(thr_id, throughputmax);
diff --git a/x11/s3.cu b/x11/s3.cu
index e384122b..b7bdb7dd 100644
--- a/x11/s3.cu
+++ b/x11/s3.cu
@@ -79,6 +79,16 @@ extern int scanhash_s3(int thr_id, uint32_t *pdata,
 		cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
 		CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
 		get_cuda_arch(&cuda_arch[thr_id]);
+#if defined WIN32 && !defined _WIN64
+		// 2GB limit for cudaMalloc
+		if(throughputmax > 0x7fffffffULL / (64 * sizeof(uint4)))
+		{
+			applog(LOG_ERR, "intensity too high");
+			mining_has_stopped[thr_id] = true;
+			cudaStreamDestroy(gpustream[thr_id]);
+			proper_exit(2);
+		}
+#endif
 
 		x11_simd512_cpu_init(thr_id, throughputmax);
 		quark_skein512_cpu_init(thr_id);
diff --git a/x11/x11.cu b/x11/x11.cu
index 39899044..15a9374e 100644
--- a/x11/x11.cu
+++ b/x11/x11.cu
@@ -161,6 +161,16 @@ extern int scanhash_x11(int thr_id, uint32_t *pdata,
 		cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
 		CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
 		get_cuda_arch(&cuda_arch[thr_id]);
+#if defined WIN32 && !defined _WIN64
+		// 2GB limit for cudaMalloc
+		if(throughputmax > 0x7fffffffULL / (64 * sizeof(uint4)))
+		{
+			applog(LOG_ERR, "intensity too high");
+			mining_has_stopped[thr_id] = true;
+			cudaStreamDestroy(gpustream[thr_id]);
+			proper_exit(2);
+		}
+#endif
 		quark_groestl512_cpu_init(thr_id, throughputmax);
 		quark_bmw512_cpu_init(thr_id, throughputmax);
 		x11_echo512_cpu_init(thr_id, throughputmax);
diff --git a/x13/x13.cu b/x13/x13.cu
index 895e2749..4bbcf87c 100644
--- a/x13/x13.cu
+++ b/x13/x13.cu
@@ -174,6 +174,16 @@ extern int scanhash_x13(int thr_id, uint32_t *pdata,
 		cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
 		CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
 		get_cuda_arch(&cuda_arch[thr_id]);
+#if defined WIN32 && !defined _WIN64
+		// 2GB limit for cudaMalloc
+		if(throughputmax > 0x7fffffffULL / (64 * sizeof(uint4)))
+		{
+			applog(LOG_ERR, "intensity too high");
+			mining_has_stopped[thr_id] = true;
+			cudaStreamDestroy(gpustream[thr_id]);
+			proper_exit(2);
+		}
+#endif
 
 		quark_groestl512_cpu_init(thr_id, throughputmax);
 		quark_bmw512_cpu_init(thr_id, throughputmax);
diff --git a/x15/whirlpool.cu b/x15/whirlpool.cu
index 52681dc3..3a818334 100644
--- a/x15/whirlpool.cu
+++ b/x15/whirlpool.cu
@@ -67,6 +67,16 @@ extern int scanhash_whc(int thr_id, uint32_t *pdata,
 		cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
 		cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
 		CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
+#if defined WIN32 && !defined _WIN64
+		// 2GB limit for cudaMalloc
+		if(throughputmax > 0x7fffffffULL / (16 * sizeof(uint32_t)))
+		{
+			applog(LOG_ERR, "intensity too high");
+			mining_has_stopped[thr_id] = true;
+			cudaStreamDestroy(gpustream[thr_id]);
+			proper_exit(2);
+		}
+#endif
 
 		CUDA_SAFE_CALL(cudaMalloc(&d_hash, 16 * sizeof(uint32_t) * throughputmax));
 		x15_whirlpool_cpu_init(thr_id, throughputmax, 1 /* old whirlpool */);
diff --git a/x15/x14.cu b/x15/x14.cu
index acabf2e9..2fdb4e21 100644
--- a/x15/x14.cu
+++ b/x15/x14.cu
@@ -173,6 +173,16 @@ extern int scanhash_x14(int thr_id, uint32_t *pdata,
 		cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
 		CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
 		get_cuda_arch(&cuda_arch[thr_id]);
+#if defined WIN32 && !defined _WIN64
+		// 2GB limit for cudaMalloc
+		if(throughputmax > 0x7fffffffULL / (64 * sizeof(uint4)))
+		{
+			applog(LOG_ERR, "intensity too high");
+			mining_has_stopped[thr_id] = true;
+			cudaStreamDestroy(gpustream[thr_id]);
+			proper_exit(2);
+		}
+#endif
 
 		quark_groestl512_cpu_init(thr_id, throughputmax);
 		quark_skein512_cpu_init(thr_id);
diff --git a/x15/x15.cu b/x15/x15.cu
index 1dad54e8..5528f30d 100644
--- a/x15/x15.cu
+++ b/x15/x15.cu
@@ -184,6 +184,16 @@ extern int scanhash_x15(int thr_id, uint32_t *pdata,
 		cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
 		CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
 		get_cuda_arch(&cuda_arch[thr_id]);
+#if defined WIN32 && !defined _WIN64
+		// 2GB limit for cudaMalloc
+		if(throughputmax > 0x7fffffffULL / (64 * sizeof(uint4)))
+		{
+			applog(LOG_ERR, "intensity too high");
+			mining_has_stopped[thr_id] = true;
+			cudaStreamDestroy(gpustream[thr_id]);
+			proper_exit(2);
+		}
+#endif
 
 		quark_groestl512_cpu_init(thr_id, throughputmax);
 		quark_skein512_cpu_init(thr_id);
diff --git a/x17/x17.cu b/x17/x17.cu
index c2837d81..028ed690 100644
--- a/x17/x17.cu
+++ b/x17/x17.cu
@@ -203,6 +203,16 @@ extern int scanhash_x17(int thr_id, uint32_t *pdata,
 		cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
 		CUDA_SAFE_CALL(cudaStreamCreate(&gpustream[thr_id]));
 		get_cuda_arch(&cuda_arch[thr_id]);
+#if defined WIN32 && !defined _WIN64
+		// 2GB limit for cudaMalloc
+		if(throughputmax > 0x7fffffffULL / (64 * sizeof(uint4)))
+		{
+			applog(LOG_ERR, "intensity too high");
+			mining_has_stopped[thr_id] = true;
+			cudaStreamDestroy(gpustream[thr_id]);
+			proper_exit(2);
+		}
+#endif
 
 		quark_groestl512_cpu_init(thr_id, throughputmax);
 		quark_skein512_cpu_init(thr_id);