forked from NVIDIA/CUDALibrarySamples
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path1d_r2c_example.cpp
125 lines (103 loc) · 4.67 KB
/
1d_r2c_example.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
/*
* Copyright 2020 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO LICENSEE:
*
* This source code and/or documentation ("Licensed Deliverables") are
* subject to NVIDIA intellectual property rights under U.S. and
* international Copyright laws.
*
* These Licensed Deliverables contained herein is PROPRIETARY and
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
* conditions of a form of NVIDIA software license agreement by and
* between NVIDIA and Licensee ("License Agreement") or electronically
* accepted by Licensee. Notwithstanding any terms or conditions to
* the contrary in the License Agreement, reproduction or disclosure
* of the Licensed Deliverables to any third party without the express
* written consent of NVIDIA is prohibited.
*
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
* OF THESE LICENSED DELIVERABLES.
*
* U.S. Government End Users. These Licensed Deliverables are a
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
* 1995), consisting of "commercial computer software" and "commercial
* computer software documentation" as such terms are used in 48
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
* U.S. Government End Users acquire the Licensed Deliverables with
* only those rights set forth herein.
*
* Any use of the Licensed Deliverables in individual and commercial
* software must include, in the user documentation and internal
* comments to the code, the above Disclaimer and U.S. Government End
* Users Notice.
*/
#include <complex>
#include <iostream>
#include <vector>
#include <cuda_runtime.h>
#include <cufftXt.h>
#include "cufft_utils.h"
int main(int argc, char *argv[]) {
cufftHandle plan;
cudaStream_t stream = NULL;
int n = 8;
int batch_size = 2;
int fft_size = batch_size * n;
using scalar_type = float;
using input_type = scalar_type;
using output_type = std::complex<scalar_type>;
std::vector<input_type> input(fft_size, 0);
std::vector<output_type> output(static_cast<int>((fft_size / 2 + 1)));
for (int i = 0; i < fft_size; i++) {
input[i] = static_cast<input_type>(i);
}
std::printf("Input array:\n");
for (auto &i : input) {
std::printf("%f\n", i);
}
std::printf("=====\n");
input_type *d_input = nullptr;
cufftComplex *d_output = nullptr;
CUFFT_CALL(cufftCreate(&plan));
CUFFT_CALL(cufftPlan1d(&plan, input.size(), CUFFT_R2C, batch_size));
CUDA_RT_CALL(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
CUFFT_CALL(cufftSetStream(plan, stream));
// Create device arrays
CUDA_RT_CALL(
cudaMalloc(reinterpret_cast<void **>(&d_input), sizeof(input_type) * input.size()));
CUDA_RT_CALL(
cudaMalloc(reinterpret_cast<void **>(&d_output), sizeof(output_type) * output.size()));
CUDA_RT_CALL(cudaMemcpyAsync(d_input, input.data(), sizeof(input_type) * input.size(),
cudaMemcpyHostToDevice, stream));
CUFFT_CALL(cufftExecR2C(plan, d_input, d_output));
CUDA_RT_CALL(cudaMemcpyAsync(output.data(), d_output, sizeof(output_type) * output.size(),
cudaMemcpyDeviceToHost, stream));
CUDA_RT_CALL(cudaStreamSynchronize(stream));
std::printf("Output array:\n");
for (auto &i : output) {
std::printf("%f + %fj\n", i.real(), i.imag());
}
std::printf("=====\n");
/* free resources */
CUDA_RT_CALL(cudaFree(d_input));
CUDA_RT_CALL(cudaFree(d_output));
CUFFT_CALL(cufftDestroy(plan));
CUDA_RT_CALL(cudaStreamDestroy(stream));
CUDA_RT_CALL(cudaDeviceReset());
return EXIT_SUCCESS;
}