Skip to content

Commit

Permalink
add CUDA_SAFE_CALL
Browse files Browse the repository at this point in the history
  • Loading branch information
KlausT committed Jun 15, 2016
1 parent 383e953 commit 31b27c7
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 9 deletions.
12 changes: 5 additions & 7 deletions neoscrypt/cuda_neoscrypt.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1448,7 +1448,7 @@ void neoscrypt_cpu_init_2stream(int thr_id, int threads)

__host__ void neoscrypt_cpu_hash_k4_2stream(bool stratum, int thr_id, int threads, uint32_t startNounce, uint32_t *result)
{
cudaMemsetAsync(d_NNonce[thr_id], 0xff, 2 * sizeof(uint32_t), stream[0]);
CUDA_SAFE_CALL(cudaMemsetAsync(d_NNonce[thr_id], 0xff, 2 * sizeof(uint32_t), stream[0]));

const int threadsperblock = TPB;

Expand All @@ -1458,24 +1458,22 @@ __host__ void neoscrypt_cpu_hash_k4_2stream(bool stratum, int thr_id, int thread
const int threadsperblock2 = TPB2;
dim3 grid2((threads + threadsperblock2 - 1) / threadsperblock2);
dim3 block2(threadsperblock2);

neoscrypt_gpu_hash_start << <grid2, block2, 0, stream[0] >> >(stratum, threads, startNounce); //fastkdf

cudaStreamSynchronize(stream[0]);
CUDA_SAFE_CALL(cudaStreamSynchronize(stream[0]));

neoscrypt_gpu_hash_salsa1_stream1 << <grid, block, 0, stream[0] >> >(threads, startNounce); //chacha
neoscrypt_gpu_hash_chacha1_stream1 << <grid, block, 0, stream[1] >> >(threads, startNounce); //salsa

neoscrypt_gpu_hash_salsa2_stream1 << <grid, block, 0, stream[0] >> >(threads, startNounce); //chacha
neoscrypt_gpu_hash_chacha2_stream1 << <grid, block, 0, stream[1] >> >(threads, startNounce); //salsa

cudaDeviceSynchronize();
CUDA_SAFE_CALL(cudaDeviceSynchronize());

neoscrypt_gpu_hash_ending << <grid2, block2 >> >(stratum, threads, startNounce, d_NNonce[thr_id]); //fastkdf+end

cudaMemcpy(result, d_NNonce[thr_id], 2 * sizeof(uint32_t), cudaMemcpyDeviceToHost);

CUDA_SAFE_CALL(cudaGetLastError());
CUDA_SAFE_CALL(cudaMemcpy(result, d_NNonce[thr_id], 2 * sizeof(uint32_t), cudaMemcpyDeviceToHost));
}

__host__ void neoscrypt_setBlockTarget(uint32_t* pdata, const void *target)
Expand Down
4 changes: 2 additions & 2 deletions quark/cuda_skein512.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2710,7 +2710,7 @@ void skein512_cpu_hash_80_52(int thr_id, uint32_t threads, uint32_t startNounce,
dim3 block(1024);
cudaMemsetAsync(d_nonce[thr_id], 0xff, 2 * sizeof(uint32_t), gpustream[thr_id]);
skein512_gpu_hash_80_52 << < grid, block, 0, gpustream[thr_id]>>> (threads, startNounce, d_nonce[thr_id], target, thr_id);
cudaMemcpyAsync(h_found, d_nonce[thr_id], 2 * sizeof(uint32_t), cudaMemcpyDeviceToHost, gpustream[thr_id]);
CUDA_SAFE_CALL(cudaMemcpyAsync(h_found, d_nonce[thr_id], 2 * sizeof(uint32_t), cudaMemcpyDeviceToHost, gpustream[thr_id]));
cudaStreamSynchronize(gpustream[thr_id]);
}
__host__
Expand All @@ -2720,6 +2720,6 @@ void skein512_cpu_hash_80_50(int thr_id, uint32_t threads, uint32_t startNounce,
dim3 block(256);
cudaMemsetAsync(d_nonce[thr_id], 0xff, 2 * sizeof(uint32_t), gpustream[thr_id]);
skein512_gpu_hash_80_50 << < grid, block, 0, gpustream[thr_id]>>> (threads, startNounce, d_nonce[thr_id], target, thr_id);
cudaMemcpyAsync(h_found, d_nonce[thr_id], 2 * sizeof(uint32_t), cudaMemcpyDeviceToHost, gpustream[thr_id]);
CUDA_SAFE_CALL(cudaMemcpyAsync(h_found, d_nonce[thr_id], 2 * sizeof(uint32_t), cudaMemcpyDeviceToHost, gpustream[thr_id]));
cudaStreamSynchronize(gpustream[thr_id]);
}

0 comments on commit 31b27c7

Please sign in to comment.