remove #if __CUDA_ARCH__ < 300

xiaolin1579 · May 29, 2015 · 2ea1ff6 · 2ea1ff6
1 parent 3c2b698
commit 2ea1ff6
Show file tree

Hide file tree

Showing 5 changed files with 0 additions and 52 deletions.
diff --git a/JHA/cuda_jha_compactionTest.cu b/JHA/cuda_jha_compactionTest.cu
@@ -48,14 +48,6 @@ __host__ void jackpot_compactTest_cpu_init(int thr_id, uint32_t threads)
 	cudaMalloc(&d_partSum[1][thr_id], sizeof(uint32_t) * s1); // BLOCKSIZE (Threads/Block)
 }
 
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 300
-/**
- * __shfl_up() calculates a source lane ID by subtracting delta from the caller's lane ID, and clamping to the range 0..width-1
- */
-#undef __shfl_up
-#define __shfl_up(var, delta, width) (0)
-#endif
-
 // Die Summenfunktion (vom NVIDIA SDK)
 __global__ void jackpot_compactTest_gpu_SCAN(uint32_t *data, int width, uint32_t *partial_sums=NULL, cuda_compactTestFunction_t testFunc=NULL, uint32_t threads=0, uint32_t startNounce=0, uint32_t *inpHashes=NULL, uint32_t *d_validNonceTable=NULL)
 {

diff --git a/bitslice_transformations_quad.cu b/bitslice_transformations_quad.cu
@@ -1,11 +1,3 @@
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 300
-/**
-* __shfl() returns the value of var held by the thread whose ID is given by srcLane.
-* If srcLane is outside the range 0..width-1, the thread's own value of var is returned.
-*/
-#undef __shfl
-#define __shfl(var, srcLane, width) (uint32_t)(var)
-#endif
 
 #define merge8(z, x, y, b)\
 		z=__byte_perm(x, y, b); \

diff --git a/groestl_functions_quad.cu b/groestl_functions_quad.cu
@@ -240,15 +240,6 @@ __device__ __forceinline__ void G256_ShiftBytesQ_quad(uint32_t &x7, uint32_t &x6
     x7 = __byte_perm(t0, t1, 0x5410);
 }
 
-#if __CUDA_ARCH__ < 300
-/**
- * __shfl() returns the value of var held by the thread whose ID is given by srcLane.
- * If srcLane is outside the range 0..width-1, the thread’s own value of var is returned.
- */
-#undef __shfl
-#define __shfl(var, srcLane, width) (uint32_t)(var)
-#endif
-
 __device__ __forceinline__ void G256_MixFunction_quad(uint32_t *r)
 {
 #define SHIFT64_16(hi, lo)    __byte_perm(lo, hi, 0x5432)

diff --git a/pentablake.cu b/pentablake.cu
@@ -294,20 +294,10 @@ void pentablake_gpu_hash_80(uint32_t threads, const uint32_t startNounce, void *
 
 		pentablake_compress(h, buf, 640ULL);
 
-#if __CUDA_ARCH__ < 300
-		uint32_t *outHash = (uint32_t *)outputHash + 16 * thread;
-		#pragma unroll 8
-		for (uint32_t i=0; i < 8; i++) {
-			outHash[2*i]   = cuda_swab32( _HIWORD(h[i]) );
-			outHash[2*i+1] = cuda_swab32( _LOWORD(h[i]) );
-		}
-#else
 		uint64_t *outHash = (uint64_t *)outputHash + 8 * thread;
 		for (uint32_t i=0; i < 8; i++) {
 			outHash[i] = cuda_swab64( h[i] );
 		}
-#endif
-
 	}
 }
 
@@ -348,19 +338,10 @@ void pentablake_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint64_t *g_
 		// Ending round
 		pentablake_compress(h, buf, 512);
 
-#if __CUDA_ARCH__ < 300
-		uint32_t *outHash = (uint32_t*)&g_hash[thread<<3];
-		#pragma unroll 8
-		for (int i=0; i < 8; i++) {
-			outHash[2*i+0] = cuda_swab32( _HIWORD(h[i]) );
-			outHash[2*i+1] = cuda_swab32( _LOWORD(h[i]) );
-		}
-#else
 		uint64_t *outHash = &g_hash[thread<<3];
 		for (int i=0; i < 8; i++) {
 			outHash[i] = cuda_swab64(h[i]);
 		}
-#endif
 	}
 }
 

diff --git a/quark/cuda_quark_compactionTest.cu b/quark/cuda_quark_compactionTest.cu
@@ -46,14 +46,6 @@ __host__ void quark_compactTest_cpu_init(int thr_id, uint32_t threads)
 	cudaMalloc(&d_partSum[1][thr_id], sizeof(uint32_t) * s1); // BLOCKSIZE (Threads/Block)
 }
 
-#if __CUDA_ARCH__ < 300
-/**
- * __shfl_up() calculates a source lane ID by subtracting delta from the caller's lane ID, and clamping to the range 0..width-1
- */
-#undef __shfl_up
-#define __shfl_up(var, delta, width) (0)
-#endif
-
 // Die Summenfunktion (vom NVIDIA SDK)
 __global__ void quark_compactTest_gpu_SCAN(uint32_t *data, int width, uint32_t *partial_sums=NULL, cuda_compactTestFunction_t testFunc=NULL, uint32_t threads=0, uint32_t startNounce=0, const uint32_t *inpHashes=NULL, const uint32_t *d_validNonceTable=NULL)
 {