-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathpi.cu
73 lines (59 loc) · 1.64 KB
/
pi.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
/*
!nvcc pi.cu -o pi -Wno-deprecated-gpu-targets -lcurand
*/
#include <stdio.h>
#include <stdlib.h>
#include <cuda.h>
#include <curand.h>
#include <curand_kernel.h>
#include <time.h>
#define NUM_BLOCKS 1024
#define THREADS_PER_BLOCK 256
#define ITER_PER_THREAD 2048
#define PI 3.14159265359
__global__ void kernel(int *count)
{
double x, y, z;
// find the overall ID of the thread
int index = blockDim.x * blockIdx.x + threadIdx.x;
count[index] = 0;
curandState state;
curand_init((unsigned long long)clock() + index, 0, 0, &state);
for (int i = 0; i < ITER_PER_THREAD; i++)
{
x = curand_uniform_double(&state);
y = curand_uniform_double(&state);
z = x * x + y * y;
if (z <= 1)
count[index] += 1;
}
}
void CUDAErrorCheck()
{
cudaError_t error = cudaGetLastError();
if (error != cudaSuccess)
{
printf("CUDA error : %s (%d)\n", cudaGetErrorString(error), error);
exit(EXIT_FAILURE);
}
}
int main()
{
long unsigned int n = NUM_BLOCKS * THREADS_PER_BLOCK;
int *count;
cudaMallocManaged(&count, n * sizeof(int));
CUDAErrorCheck();
kernel<<<NUM_BLOCKS, THREADS_PER_BLOCK>>>(count);
cudaDeviceSynchronize();
CUDAErrorCheck();
long unsigned int reduced_count = 0;
for(int i = 0; i < n; i++)
reduced_count += count[i];
cudaFree(count);
// find the ratio
long unsigned int total_iter = n * ITER_PER_THREAD;
double pi = ((double)reduced_count / total_iter) * 4.0;
printf("PI [%lu iterations] = %.10g\n", total_iter, pi);
printf("Error = %.10g\n", pi - PI);
return 0;
}