ViennaCL - The Vienna Computing Library  1.5.1
fft.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_OPENCL_KERNELS_FFT_HPP
2 #define VIENNACL_LINALG_OPENCL_KERNELS_FFT_HPP
3 
7 #include "viennacl/ocl/utils.hpp"
8 
11 namespace viennacl
12 {
13  namespace linalg
14  {
15  namespace opencl
16  {
17  namespace kernels
18  {
19 
21 
22 
23  // Postprocessing phase of Bluestein algorithm
24  template <typename StringType>
25  void generate_fft_bluestein_post(StringType & source, std::string const & numeric_string)
26  {
27  source.append("__kernel void bluestein_post(__global "); source.append(numeric_string); source.append("2 *Z, \n");
28  source.append(" __global "); source.append(numeric_string); source.append("2 *out, \n");
29  source.append(" unsigned int size) \n");
30  source.append("{ \n");
31  source.append(" unsigned int glb_id = get_global_id(0); \n");
32  source.append(" unsigned int glb_sz = get_global_size(0); \n");
33 
34  source.append(" unsigned int double_size = size << 1; \n");
35  source.append(" "); source.append(numeric_string); source.append(" sn_a, cs_a; \n");
36  source.append(" const "); source.append(numeric_string); source.append(" NUM_PI = 3.14159265358979323846; \n");
37 
38  source.append(" for(unsigned int i = glb_id; i < size; i += glb_sz) { \n");
39  source.append(" unsigned int rm = i * i % (double_size); \n");
40  source.append(" "); source.append(numeric_string); source.append(" angle = ("); source.append(numeric_string); source.append(")rm / size * (-NUM_PI); \n");
41 
42  source.append(" sn_a = sincos(angle, &cs_a); \n");
43 
44  source.append(" "); source.append(numeric_string); source.append("2 b_i = ("); source.append(numeric_string); source.append("2)(cs_a, sn_a); \n");
45  source.append(" out[i] = ("); source.append(numeric_string); source.append("2)(Z[i].x * b_i.x - Z[i].y * b_i.y, Z[i].x * b_i.y + Z[i].y * b_i.x); \n");
46  source.append(" } \n");
47  source.append("} \n");
48  }
49 
50  // Preprocessing phase of Bluestein algorithm
51  template <typename StringType>
52  void generate_fft_bluestein_pre(StringType & source, std::string const & numeric_string)
53  {
54  source.append("__kernel void bluestein_pre(__global "); source.append(numeric_string); source.append("2 *input, \n");
55  source.append(" __global "); source.append(numeric_string); source.append("2 *A, \n");
56  source.append(" __global "); source.append(numeric_string); source.append("2 *B, \n");
57  source.append(" unsigned int size, \n");
58  source.append(" unsigned int ext_size \n");
59  source.append(" ) { \n");
60  source.append(" unsigned int glb_id = get_global_id(0); \n");
61  source.append(" unsigned int glb_sz = get_global_size(0); \n");
62 
63  source.append(" unsigned int double_size = size << 1; \n");
64 
65  source.append(" "); source.append(numeric_string); source.append(" sn_a, cs_a; \n");
66  source.append(" const "); source.append(numeric_string); source.append(" NUM_PI = 3.14159265358979323846; \n");
67 
68  source.append(" for(unsigned int i = glb_id; i < size; i += glb_sz) { \n");
69  source.append(" unsigned int rm = i * i % (double_size); \n");
70  source.append(" "); source.append(numeric_string); source.append(" angle = ("); source.append(numeric_string); source.append(")rm / size * NUM_PI; \n");
71 
72  source.append(" sn_a = sincos(-angle, &cs_a); \n");
73 
74  source.append(" "); source.append(numeric_string); source.append("2 a_i = ("); source.append(numeric_string); source.append("2)(cs_a, sn_a); \n");
75  source.append(" "); source.append(numeric_string); source.append("2 b_i = ("); source.append(numeric_string); source.append("2)(cs_a, -sn_a); \n");
76 
77  source.append(" A[i] = ("); source.append(numeric_string); source.append("2)(input[i].x * a_i.x - input[i].y * a_i.y, input[i].x * a_i.y + input[i].y * a_i.x); \n");
78  source.append(" B[i] = b_i; \n");
79 
80  // very bad instruction, to be fixed
81  source.append(" if(i) \n");
82  source.append(" B[ext_size - i] = b_i; \n");
83  source.append(" } \n");
84  source.append("} \n");
85  }
86 
88  template <typename StringType>
89  void generate_fft_complex_to_real(StringType & source, std::string const & numeric_string)
90  {
91  source.append("__kernel void complex_to_real(__global "); source.append(numeric_string); source.append("2 *in, \n");
92  source.append(" __global "); source.append(numeric_string); source.append(" *out, \n");
93  source.append(" unsigned int size) { \n");
94  source.append(" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) \n");
95  source.append(" out[i] = in[i].x; \n");
96  source.append("} \n");
97  }
98 
100  template <typename StringType>
101  void generate_fft_div_vec_scalar(StringType & source, std::string const & numeric_string)
102  {
103  source.append("__kernel void fft_div_vec_scalar(__global "); source.append(numeric_string); source.append("2 *input1, \n");
104  source.append(" unsigned int size, \n");
105  source.append(" "); source.append(numeric_string); source.append(" factor) { \n");
106  source.append(" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) \n");
107  source.append(" input1[i] /= factor; \n");
108  source.append("} \n");
109  }
110 
112  template <typename StringType>
113  void generate_fft_mult_vec(StringType & source, std::string const & numeric_string)
114  {
115  source.append("__kernel void fft_mult_vec(__global const "); source.append(numeric_string); source.append("2 *input1, \n");
116  source.append(" __global const "); source.append(numeric_string); source.append("2 *input2, \n");
117  source.append(" __global "); source.append(numeric_string); source.append("2 *output, \n");
118  source.append(" unsigned int size) { \n");
119  source.append(" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) { \n");
120  source.append(" "); source.append(numeric_string); source.append("2 in1 = input1[i]; \n");
121  source.append(" "); source.append(numeric_string); source.append("2 in2 = input2[i]; \n");
122 
123  source.append(" output[i] = ("); source.append(numeric_string); source.append("2)(in1.x * in2.x - in1.y * in2.y, in1.x * in2.y + in1.y * in2.x); \n");
124  source.append(" } \n");
125  source.append("} \n");
126  }
127 
129  template <typename StringType>
130  void generate_fft_real_to_complex(StringType & source, std::string const & numeric_string)
131  {
132  source.append("__kernel void real_to_complex(__global "); source.append(numeric_string); source.append(" *in, \n");
133  source.append(" __global "); source.append(numeric_string); source.append("2 *out, \n");
134  source.append(" unsigned int size) { \n");
135  source.append(" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) { \n");
136  source.append(" "); source.append(numeric_string); source.append("2 val = 0; \n");
137  source.append(" val.x = in[i]; \n");
138  source.append(" out[i] = val; \n");
139  source.append(" } \n");
140  source.append("} \n");
141  }
142 
144  template <typename StringType>
145  void generate_fft_reverse_inplace(StringType & source, std::string const & numeric_string)
146  {
147  source.append("__kernel void reverse_inplace(__global "); source.append(numeric_string); source.append(" *vec, uint size) { \n");
148  source.append(" for(uint i = get_global_id(0); i < (size >> 1); i+=get_global_size(0)) { \n");
149  source.append(" "); source.append(numeric_string); source.append(" val1 = vec[i]; \n");
150  source.append(" "); source.append(numeric_string); source.append(" val2 = vec[size - i - 1]; \n");
151 
152  source.append(" vec[i] = val2; \n");
153  source.append(" vec[size - i - 1] = val1; \n");
154  source.append(" } \n");
155  source.append("} \n");
156  }
157 
159  template <typename StringType>
160  void generate_fft_transpose(StringType & source, std::string const & numeric_string)
161  {
162  source.append("__kernel void transpose(__global "); source.append(numeric_string); source.append("2 *input, \n");
163  source.append(" __global "); source.append(numeric_string); source.append("2 *output, \n");
164  source.append(" unsigned int row_num, \n");
165  source.append(" unsigned int col_num) { \n");
166  source.append(" unsigned int size = row_num * col_num; \n");
167  source.append(" for(unsigned int i = get_global_id(0); i < size; i+= get_global_size(0)) { \n");
168  source.append(" unsigned int row = i / col_num; \n");
169  source.append(" unsigned int col = i - row*col_num; \n");
170 
171  source.append(" unsigned int new_pos = col * row_num + row; \n");
172 
173  source.append(" output[new_pos] = input[i]; \n");
174  source.append(" } \n");
175  source.append("} \n");
176  }
177 
179  template <typename StringType>
180  void generate_fft_transpose_inplace(StringType & source, std::string const & numeric_string)
181  {
182  source.append("__kernel void transpose_inplace(__global "); source.append(numeric_string); source.append("2* input, \n");
183  source.append(" unsigned int row_num, \n");
184  source.append(" unsigned int col_num) { \n");
185  source.append(" unsigned int size = row_num * col_num; \n");
186  source.append(" for(unsigned int i = get_global_id(0); i < size; i+= get_global_size(0)) { \n");
187  source.append(" unsigned int row = i / col_num; \n");
188  source.append(" unsigned int col = i - row*col_num; \n");
189 
190  source.append(" unsigned int new_pos = col * row_num + row; \n");
191 
192  source.append(" if(i < new_pos) { \n");
193  source.append(" "); source.append(numeric_string); source.append("2 val = input[i]; \n");
194  source.append(" input[i] = input[new_pos]; \n");
195  source.append(" input[new_pos] = val; \n");
196  source.append(" } \n");
197  source.append(" } \n");
198  source.append("} \n");
199  }
200 
202  template <typename StringType>
203  void generate_fft_vandermonde_prod(StringType & source, std::string const & numeric_string)
204  {
205  source.append("__kernel void vandermonde_prod(__global "); source.append(numeric_string); source.append(" *vander, \n");
206  source.append(" __global "); source.append(numeric_string); source.append(" *vector, \n");
207  source.append(" __global "); source.append(numeric_string); source.append(" *result, \n");
208  source.append(" uint size) { \n");
209  source.append(" for(uint i = get_global_id(0); i < size; i+= get_global_size(0)) { \n");
210  source.append(" "); source.append(numeric_string); source.append(" mul = vander[i]; \n");
211  source.append(" "); source.append(numeric_string); source.append(" pwr = 1; \n");
212  source.append(" "); source.append(numeric_string); source.append(" val = 0; \n");
213 
214  source.append(" for(uint j = 0; j < size; j++) { \n");
215  source.append(" val = val + pwr * vector[j]; \n");
216  source.append(" pwr *= mul; \n");
217  source.append(" } \n");
218 
219  source.append(" result[i] = val; \n");
220  source.append(" } \n");
221  source.append("} \n");
222  }
223 
225  template <typename StringType>
226  void generate_fft_zero2(StringType & source, std::string const & numeric_string)
227  {
228  source.append("__kernel void zero2(__global "); source.append(numeric_string); source.append("2 *input1, \n");
229  source.append(" __global "); source.append(numeric_string); source.append("2 *input2, \n");
230  source.append(" unsigned int size) { \n");
231  source.append(" for (unsigned int i = get_global_id(0); i < size; i += get_global_size(0)) { \n");
232  source.append(" input1[i] = 0; \n");
233  source.append(" input2[i] = 0; \n");
234  source.append(" } \n");
235  source.append("} \n");
236  }
237 
239 
240  // main kernel class
242  template <class NumericT>
243  struct fft
244  {
245  static std::string program_name()
246  {
248  }
249 
250  static void init(viennacl::ocl::context & ctx)
251  {
253  std::string numeric_string = viennacl::ocl::type_to_string<NumericT>::apply();
254 
255  static std::map<cl_context, bool> init_done;
256  if (!init_done[ctx.handle().get()])
257  {
258  std::string source;
259  source.reserve(8192);
260 
261  viennacl::ocl::append_double_precision_pragma<NumericT>(ctx, source);
262 
263  // unary operations
264  if (numeric_string == "float" || numeric_string == "double")
265  {
266  generate_fft_bluestein_post(source, numeric_string);
267  generate_fft_bluestein_pre(source, numeric_string);
268  generate_fft_complex_to_real(source, numeric_string);
269  generate_fft_div_vec_scalar(source, numeric_string);
270  generate_fft_mult_vec(source, numeric_string);
271  generate_fft_real_to_complex(source, numeric_string);
272  generate_fft_reverse_inplace(source, numeric_string);
273  generate_fft_transpose(source, numeric_string);
274  generate_fft_transpose_inplace(source, numeric_string);
275  generate_fft_vandermonde_prod(source, numeric_string);
276  generate_fft_zero2(source, numeric_string);
277  }
278 
279  std::string prog_name = program_name();
280  #ifdef VIENNACL_BUILD_INFO
281  std::cout << "Creating program " << prog_name << std::endl;
282  #endif
283  ctx.add_program(source, prog_name);
284  init_done[ctx.handle().get()] = true;
285  } //if
286  } //init
287  };
288 
289  } // namespace kernels
290  } // namespace opencl
291  } // namespace linalg
292 } // namespace viennacl
293 #endif
294 
void generate_fft_bluestein_pre(StringType &source, std::string const &numeric_string)
Definition: fft.hpp:52
static std::string program_name()
Definition: fft.hpp:245
Implements a OpenCL platform within ViennaCL.
void generate_fft_mult_vec(StringType &source, std::string const &numeric_string)
Elementwise product of two complex vectors.
Definition: fft.hpp:113
void generate_fft_zero2(StringType &source, std::string const &numeric_string)
Zero two complex vectors (to avoid kernel launch overhead)
Definition: fft.hpp:226
void generate_fft_transpose(StringType &source, std::string const &numeric_string)
Simplistic matrix transpose function.
Definition: fft.hpp:160
Various little tools used here and there in ViennaCL.
Main kernel class for generating OpenCL kernels for the fast Fourier transform.
Definition: fft.hpp:243
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:51
Provides OpenCL-related utilities.
static void init(viennacl::ocl::context &ctx)
Definition: fft.hpp:250
void generate_fft_transpose_inplace(StringType &source, std::string const &numeric_string)
Simplistic inplace matrix transpose function.
Definition: fft.hpp:180
const viennacl::ocl::handle< cl_context > & handle() const
Returns the context handle.
Definition: context.hpp:476
void generate_fft_complex_to_real(StringType &source, std::string const &numeric_string)
Extract real part of a complex number array.
Definition: fft.hpp:89
void generate_fft_real_to_complex(StringType &source, std::string const &numeric_string)
Embedds a real-valued vector into a complex one.
Definition: fft.hpp:130
const OCL_TYPE & get() const
Definition: handle.hpp:189
void generate_fft_div_vec_scalar(StringType &source, std::string const &numeric_string)
OpenCL kernel generation code for dividing a complex number by a real number.
Definition: fft.hpp:101
void generate_fft_vandermonde_prod(StringType &source, std::string const &numeric_string)
Computes the matrix vector product with a Vandermonde matrix.
Definition: fft.hpp:203
static void apply(viennacl::ocl::context const &)
Definition: utils.hpp:40
void generate_fft_reverse_inplace(StringType &source, std::string const &numeric_string)
Reverses the entries in a vector.
Definition: fft.hpp:145
void generate_fft_bluestein_post(StringType &source, std::string const &numeric_string)
Definition: fft.hpp:25
Representation of an OpenCL kernel in ViennaCL.
Helper class for converting a type to its string representation.
Definition: utils.hpp:57