Skip to content

Commit

Permalink
remove #if __CUDA_ARCH__ < 300
Browse files Browse the repository at this point in the history
  • Loading branch information
KlausT committed May 29, 2015
1 parent 3c2b698 commit 2ea1ff6
Show file tree
Hide file tree
Showing 5 changed files with 0 additions and 52 deletions.
8 changes: 0 additions & 8 deletions JHA/cuda_jha_compactionTest.cu
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,6 @@ __host__ void jackpot_compactTest_cpu_init(int thr_id, uint32_t threads)
cudaMalloc(&d_partSum[1][thr_id], sizeof(uint32_t) * s1); // BLOCKSIZE (Threads/Block)
}

#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 300
/**
* __shfl_up() calculates a source lane ID by subtracting delta from the caller's lane ID, and clamping to the range 0..width-1
*/
#undef __shfl_up
#define __shfl_up(var, delta, width) (0)
#endif

// Die Summenfunktion (vom NVIDIA SDK)
__global__ void jackpot_compactTest_gpu_SCAN(uint32_t *data, int width, uint32_t *partial_sums=NULL, cuda_compactTestFunction_t testFunc=NULL, uint32_t threads=0, uint32_t startNounce=0, uint32_t *inpHashes=NULL, uint32_t *d_validNonceTable=NULL)
{
Expand Down
8 changes: 0 additions & 8 deletions bitslice_transformations_quad.cu
Original file line number Diff line number Diff line change
@@ -1,11 +1,3 @@
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 300
/**
* __shfl() returns the value of var held by the thread whose ID is given by srcLane.
* If srcLane is outside the range 0..width-1, the thread's own value of var is returned.
*/
#undef __shfl
#define __shfl(var, srcLane, width) (uint32_t)(var)
#endif

#define merge8(z, x, y, b)\
z=__byte_perm(x, y, b); \
Expand Down
9 changes: 0 additions & 9 deletions groestl_functions_quad.cu
Original file line number Diff line number Diff line change
Expand Up @@ -240,15 +240,6 @@ __device__ __forceinline__ void G256_ShiftBytesQ_quad(uint32_t &x7, uint32_t &x6
x7 = __byte_perm(t0, t1, 0x5410);
}

#if __CUDA_ARCH__ < 300
/**
* __shfl() returns the value of var held by the thread whose ID is given by srcLane.
* If srcLane is outside the range 0..width-1, the thread’s own value of var is returned.
*/
#undef __shfl
#define __shfl(var, srcLane, width) (uint32_t)(var)
#endif

__device__ __forceinline__ void G256_MixFunction_quad(uint32_t *r)
{
#define SHIFT64_16(hi, lo) __byte_perm(lo, hi, 0x5432)
Expand Down
19 changes: 0 additions & 19 deletions pentablake.cu
Original file line number Diff line number Diff line change
Expand Up @@ -294,20 +294,10 @@ void pentablake_gpu_hash_80(uint32_t threads, const uint32_t startNounce, void *

pentablake_compress(h, buf, 640ULL);

#if __CUDA_ARCH__ < 300
uint32_t *outHash = (uint32_t *)outputHash + 16 * thread;
#pragma unroll 8
for (uint32_t i=0; i < 8; i++) {
outHash[2*i] = cuda_swab32( _HIWORD(h[i]) );
outHash[2*i+1] = cuda_swab32( _LOWORD(h[i]) );
}
#else
uint64_t *outHash = (uint64_t *)outputHash + 8 * thread;
for (uint32_t i=0; i < 8; i++) {
outHash[i] = cuda_swab64( h[i] );
}
#endif

}
}

Expand Down Expand Up @@ -348,19 +338,10 @@ void pentablake_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint64_t *g_
// Ending round
pentablake_compress(h, buf, 512);

#if __CUDA_ARCH__ < 300
uint32_t *outHash = (uint32_t*)&g_hash[thread<<3];
#pragma unroll 8
for (int i=0; i < 8; i++) {
outHash[2*i+0] = cuda_swab32( _HIWORD(h[i]) );
outHash[2*i+1] = cuda_swab32( _LOWORD(h[i]) );
}
#else
uint64_t *outHash = &g_hash[thread<<3];
for (int i=0; i < 8; i++) {
outHash[i] = cuda_swab64(h[i]);
}
#endif
}
}

Expand Down
8 changes: 0 additions & 8 deletions quark/cuda_quark_compactionTest.cu
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,6 @@ __host__ void quark_compactTest_cpu_init(int thr_id, uint32_t threads)
cudaMalloc(&d_partSum[1][thr_id], sizeof(uint32_t) * s1); // BLOCKSIZE (Threads/Block)
}

#if __CUDA_ARCH__ < 300
/**
* __shfl_up() calculates a source lane ID by subtracting delta from the caller's lane ID, and clamping to the range 0..width-1
*/
#undef __shfl_up
#define __shfl_up(var, delta, width) (0)
#endif

// Die Summenfunktion (vom NVIDIA SDK)
__global__ void quark_compactTest_gpu_SCAN(uint32_t *data, int width, uint32_t *partial_sums=NULL, cuda_compactTestFunction_t testFunc=NULL, uint32_t threads=0, uint32_t startNounce=0, const uint32_t *inpHashes=NULL, const uint32_t *d_validNonceTable=NULL)
{
Expand Down

0 comments on commit 2ea1ff6

Please sign in to comment.