TensorFunctors.h
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H
12 
13 namespace Eigen {
14 namespace internal {
15 
16 
21 template <typename T>
22 struct scalar_sigmoid_op {
23  EIGEN_EMPTY_STRUCT_CTOR(scalar_sigmoid_op)
24  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(const T& x) const {
25  const T one = T(1);
26  return one / (one + std::exp(-x));
27  }
28 
29  template <typename Packet>
30  inline Packet packetOp(const Packet& x) const {
31  const Packet one = pset1<Packet>(1);
32  return pdiv(one, padd(one, pexp(pnegate(x))));
33  }
34 };
35 
36 template <typename T>
37 struct functor_traits<scalar_sigmoid_op<T> > {
38  enum {
39  Cost = NumTraits<T>::AddCost * 2 + NumTraits<T>::MulCost * 6,
40  PacketAccess = packet_traits<T>::HasAdd && packet_traits<T>::HasDiv &&
41  packet_traits<T>::HasNegate && packet_traits<T>::HasExp
42  };
43 };
44 
45 
46 // Standard reduction functors
47 template <typename T> struct SumReducer
48 {
49  static const bool PacketAccess = true;
50  static const bool IsStateful = false;
51 
52  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
53  (*accum) += t;
54  }
55  template <typename Packet>
56  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const {
57  (*accum) = padd<Packet>(*accum, p);
58  }
59 
60  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
61  return static_cast<T>(0);
62  }
63  template <typename Packet>
64  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const {
65  return pset1<Packet>(0);
66  }
67  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const {
68  return accum;
69  }
70  template <typename Packet>
71  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const {
72  return vaccum;
73  }
74  template <typename Packet>
75  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const {
76  return saccum + predux(vaccum);
77  }
78 };
79 
80 template <typename T> struct MeanReducer
81 {
82  static const bool PacketAccess = true;
83  static const bool IsStateful = true;
84 
85  MeanReducer() : scalarCount_(0), packetCount_(0) { }
86 
87  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) {
88  (*accum) += t;
89  scalarCount_++;
90  }
91  template <typename Packet>
92  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) {
93  (*accum) = padd<Packet>(*accum, p);
94  packetCount_++;
95  }
96 
97  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
98  return static_cast<T>(0);
99  }
100  template <typename Packet>
101  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const {
102  return pset1<Packet>(0);
103  }
104  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const {
105  return accum / scalarCount_;
106  }
107  template <typename Packet>
108  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const {
109  return pdiv(vaccum, pset1<Packet>(packetCount_));
110  }
111  template <typename Packet>
112  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const {
113  return (saccum + predux(vaccum)) / (scalarCount_ + packetCount_ * unpacket_traits<Packet>::size);
114  }
115 
116  protected:
117  int scalarCount_;
118  int packetCount_;
119 };
120 
121 template <typename T> struct MaxReducer
122 {
123  static const bool PacketAccess = true;
124  static const bool IsStateful = false;
125 
126  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
127  if (t > *accum) { *accum = t; }
128  }
129  template <typename Packet>
130  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const {
131  (*accum) = pmax<Packet>(*accum, p);
132  }
133 
134  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
135  return -(std::numeric_limits<T>::max)();
136  }
137  template <typename Packet>
138  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const {
139  return pset1<Packet>(-(std::numeric_limits<T>::max)());
140  }
141  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const {
142  return accum;
143  }
144  template <typename Packet>
145  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const {
146  return vaccum;
147  }
148  template <typename Packet>
149  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const {
150  return numext::maxi(saccum, predux_max(vaccum));
151  }
152 };
153 
154 template <typename T> struct MinReducer
155 {
156  static const bool PacketAccess = true;
157  static const bool IsStateful = false;
158 
159  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
160  if (t < *accum) { *accum = t; }
161  }
162  template <typename Packet>
163  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const {
164  (*accum) = pmin<Packet>(*accum, p);
165  }
166 
167  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
168  return (std::numeric_limits<T>::max)();
169  }
170  template <typename Packet>
171  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const {
172  return pset1<Packet>((std::numeric_limits<T>::max)());
173  }
174  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const {
175  return accum;
176  }
177  template <typename Packet>
178  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const {
179  return vaccum;
180  }
181  template <typename Packet>
182  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const {
183  return numext::mini(saccum, predux_min(vaccum));
184  }
185 };
186 
187 
188 template <typename T> struct ProdReducer
189 {
190  static const bool PacketAccess = true;
191  static const bool IsStateful = false;
192 
193  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
194  (*accum) *= t;
195  }
196  template <typename Packet>
197  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const {
198  (*accum) = pmul<Packet>(*accum, p);
199  }
200 
201  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
202  return static_cast<T>(1);
203  }
204  template <typename Packet>
205  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const {
206  return pset1<Packet>(1);
207  }
208  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const {
209  return accum;
210  }
211  template <typename Packet>
212  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const {
213  return vaccum;
214  }
215  template <typename Packet>
216  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const {
217  return saccum * predux_mul(vaccum);
218  }
219 };
220 
221 
222 // Argmin/Argmax reducers
223 template <typename T> struct ArgMaxTupleReducer
224 {
225  static const bool PacketAccess = false;
226  static const bool IsStateful = false;
227 
228  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const {
229  if (t.second > accum->second) { *accum = t; }
230  }
231  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
232  return T(0, NumTraits<typename T::second_type>::lowest());
233  }
234  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T& accum) const {
235  return accum;
236  }
237 };
238 
239 template <typename T> struct ArgMinTupleReducer
240 {
241  static const bool PacketAccess = false;
242  static const bool IsStateful = false;
243 
244  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T& t, T* accum) const {
245  if (t.second < accum->second) { *accum = t; }
246  }
247  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
248  return T(0, NumTraits<typename T::second_type>::highest());
249  }
250  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T& accum) const {
251  return accum;
252  }
253 };
254 
255 
256 // Random number generation
257 namespace {
258 #ifdef __CUDA_ARCH__
259 __device__ int get_random_seed() {
260  return clock();
261 }
262 #else
263 int get_random_seed() {
264 #ifdef _WIN32
265  SYSTEMTIME st;
266  GetSystemTime(&st);
267  return st.wSecond + 1000 * st.wMilliseconds;
268 #elif defined __APPLE__
269  return static_cast<int>(mach_absolute_time());
270 #else
271  timespec ts;
272  clock_gettime(CLOCK_REALTIME, &ts);
273  return static_cast<int>(ts.tv_nsec);
274 #endif
275 }
276 #endif
277 }
278 
279 #if !defined (EIGEN_USE_GPU) || !defined(__CUDACC__) || !defined(__CUDA_ARCH__)
280 // We're not compiling a cuda kernel
281 template <typename T> class UniformRandomGenerator {
282 
283  public:
284  static const bool PacketAccess = true;
285 
286  UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {
287  if (!deterministic) {
288  srand(get_random_seed());
289  }
290  }
291  UniformRandomGenerator(const UniformRandomGenerator& other) {
292  m_deterministic = other.m_deterministic;
293  }
294 
295  template<typename Index>
296  T operator()(Index, Index = 0) const {
297  return random<T>();
298  }
299  template<typename Index>
300  typename internal::packet_traits<T>::type packetOp(Index, Index = 0) const {
301  const int packetSize = internal::packet_traits<T>::size;
302  EIGEN_ALIGN_MAX T values[packetSize];
303  for (int i = 0; i < packetSize; ++i) {
304  values[i] = random<T>();
305  }
306  return internal::pload<typename internal::packet_traits<T>::type>(values);
307  }
308 
309  private:
310  bool m_deterministic;
311 };
312 
313 #if __cplusplus > 199711
314 template <> class UniformRandomGenerator<float> {
315  public:
316  static const bool PacketAccess = true;
317 
318  UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {
319  if (!deterministic) {
320  m_generator.seed(get_random_seed());
321  }
322  }
323  UniformRandomGenerator(const UniformRandomGenerator<float>& other) {
324  m_generator.seed(other(0, 0) * UINT_MAX);
325  m_deterministic = other.m_deterministic;
326  }
327 
328  template<typename Index>
329  float operator()(Index, Index = 0) const {
330  return m_distribution(m_generator);
331  }
332  template<typename Index>
333  typename internal::packet_traits<float>::type packetOp(Index i, Index j = 0) const {
334  const int packetSize = internal::packet_traits<float>::size;
335  EIGEN_ALIGN_MAX float values[packetSize];
336  for (int k = 0; k < packetSize; ++k) {
337  values[k] = this->operator()(i, j);
338  }
339  return internal::pload<typename internal::packet_traits<float>::type>(values);
340  }
341 
342  private:
343  UniformRandomGenerator& operator = (const UniformRandomGenerator&);
344  // Make sure m_deterministic comes first to match the layout of the cpu
345  // version of the code.
346  bool m_deterministic;
347  mutable std::mt19937 m_generator;
348  mutable std::uniform_real_distribution<float> m_distribution;
349 };
350 
351 template <> class UniformRandomGenerator<double> {
352  public:
353  static const bool PacketAccess = true;
354 
355  UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {
356  if (!deterministic) {
357  m_generator.seed(get_random_seed());
358  }
359  }
360  UniformRandomGenerator(const UniformRandomGenerator<double>& other) {
361  m_generator.seed(other(0, 0) * UINT_MAX);
362  m_deterministic = other.m_deterministic;
363  }
364 
365  template<typename Index>
366  double operator()(Index, Index = 0) const {
367  return m_distribution(m_generator);
368  }
369  template<typename Index>
370  typename internal::packet_traits<double>::type packetOp(Index i, Index j = 0) const {
371  const int packetSize = internal::packet_traits<double>::size;
372  EIGEN_ALIGN_MAX double values[packetSize];
373  for (int k = 0; k < packetSize; ++k) {
374  values[k] = this->operator()(i, j);
375  }
376  return internal::pload<typename internal::packet_traits<double>::type>(values);
377  }
378 
379  private:
380  UniformRandomGenerator& operator = (const UniformRandomGenerator&);
381  // Make sure m_deterministic comes first to match the layout of the cpu
382  // version of the code.
383  bool m_deterministic;
384  mutable std::mt19937 m_generator;
385  mutable std::uniform_real_distribution<double> m_distribution;
386 };
387 #endif
388 
389 #else
390 
391 // We're compiling a cuda kernel
392 template <typename T> class UniformRandomGenerator;
393 
394 template <> class UniformRandomGenerator<float> {
395  public:
396  static const bool PacketAccess = true;
397 
398  __device__ UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {
399  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
400  const int seed = deterministic ? 0 : get_random_seed();
401  curand_init(seed, tid, 0, &m_state);
402  }
403 
404  __device__ UniformRandomGenerator(const UniformRandomGenerator& other) {
405  m_deterministic = other.m_deterministic;
406  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
407  const int seed = m_deterministic ? 0 : get_random_seed();
408  curand_init(seed, tid, 0, &m_state);
409  }
410 
411  template<typename Index>
412  __device__ float operator()(Index, Index = 0) const {
413  return curand_uniform(&m_state);
414  }
415  template<typename Index>
416  __device__ float4 packetOp(Index, Index = 0) const {
417  return curand_uniform4(&m_state);
418  }
419 
420  private:
421  bool m_deterministic;
422  mutable curandStatePhilox4_32_10_t m_state;
423 };
424 
425 template <> class UniformRandomGenerator<double> {
426  public:
427  static const bool PacketAccess = true;
428 
429  __device__ UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {
430  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
431  const int seed = deterministic ? 0 : get_random_seed();
432  curand_init(seed, tid, 0, &m_state);
433  }
434  __device__ UniformRandomGenerator(const UniformRandomGenerator& other) {
435  m_deterministic = other.m_deterministic;
436  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
437  const int seed = m_deterministic ? 0 : get_random_seed();
438  curand_init(seed, tid, 0, &m_state);
439  }
440  template<typename Index>
441  __device__ double operator()(Index, Index = 0) const {
442  return curand_uniform_double(&m_state);
443  }
444  template<typename Index>
445  __device__ double2 packetOp(Index, Index = 0) const {
446  return curand_uniform2_double(&m_state);
447  }
448 
449  private:
450  bool m_deterministic;
451  mutable curandStatePhilox4_32_10_t m_state;
452 };
453 
454 template <> class UniformRandomGenerator<std::complex<float> > {
455  public:
456  static const bool PacketAccess = false;
457 
458  __device__ UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {
459  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
460  const int seed = deterministic ? 0 : get_random_seed();
461  curand_init(seed, tid, 0, &m_state);
462  }
463  __device__ UniformRandomGenerator(const UniformRandomGenerator& other) {
464  m_deterministic = other.m_deterministic;
465  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
466  const int seed = m_deterministic ? 0 : get_random_seed();
467  curand_init(seed, tid, 0, &m_state);
468  }
469  template<typename Index>
470  __device__ std::complex<float> operator()(Index, Index = 0) const {
471  float4 vals = curand_uniform4(&m_state);
472  return std::complex<float>(vals.x, vals.y);
473  }
474 
475  private:
476  bool m_deterministic;
477  mutable curandStatePhilox4_32_10_t m_state;
478 };
479 
480 template <> class UniformRandomGenerator<std::complex<double> > {
481  public:
482  static const bool PacketAccess = false;
483 
484  __device__ UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {
485  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
486  const int seed = deterministic ? 0 : get_random_seed();
487  curand_init(seed, tid, 0, &m_state);
488  }
489  __device__ UniformRandomGenerator(const UniformRandomGenerator& other) {
490  m_deterministic = other.m_deterministic;
491  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
492  const int seed = m_deterministic ? 0 : get_random_seed();
493  curand_init(seed, tid, 0, &m_state);
494  }
495  template<typename Index>
496  __device__ std::complex<double> operator()(Index, Index = 0) const {
497  double2 vals = curand_uniform2_double(&m_state);
498  return std::complex<double>(vals.x, vals.y);
499  }
500 
501  private:
502  bool m_deterministic;
503  mutable curandStatePhilox4_32_10_t m_state;
504 };
505 
506 #endif
507 
508 
509 #if (!defined (EIGEN_USE_GPU) || !defined(__CUDACC__) || !defined(__CUDA_ARCH__)) && __cplusplus > 199711
510 // We're not compiling a cuda kernel
511 template <typename T> class NormalRandomGenerator {
512  public:
513  static const bool PacketAccess = true;
514 
515  NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic), m_distribution(0, 1) {
516  if (!deterministic) {
517  m_generator.seed(get_random_seed());
518  }
519  }
520  NormalRandomGenerator(const NormalRandomGenerator& other)
521  : m_deterministic(other.m_deterministic), m_distribution(other.m_distribution) {
522  m_generator.seed(other(0, 0) * UINT_MAX);
523  }
524 
525  template<typename Index>
526  T operator()(Index, Index = 0) const {
527  return m_distribution(m_generator);
528  }
529  template<typename Index>
530  typename internal::packet_traits<T>::type packetOp(Index, Index = 0) const {
531  const int packetSize = internal::packet_traits<T>::size;
532  EIGEN_ALIGN_MAX T values[packetSize];
533  for (int i = 0; i < packetSize; ++i) {
534  values[i] = m_distribution(m_generator);
535  }
536  return internal::pload<typename internal::packet_traits<T>::type>(values);
537  }
538 
539  private:
540  bool m_deterministic;
541  mutable std::normal_distribution<T> m_distribution;
542  mutable std::mt19937 m_generator;
543 };
544 
545 #elif defined (EIGEN_USE_GPU) && defined(__CUDACC__) && defined(__CUDA_ARCH__)
546 
547 // We're compiling a cuda kernel
548 template <typename T> class NormalRandomGenerator;
549 
550 template <> class NormalRandomGenerator<float> {
551  public:
552  static const bool PacketAccess = true;
553 
554  __device__ NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {
555  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
556  const int seed = deterministic ? 0 : get_random_seed();
557  curand_init(seed, tid, 0, &m_state);
558  }
559  __device__ NormalRandomGenerator(const NormalRandomGenerator<float>& other) {
560  m_deterministic = other.m_deterministic;
561  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
562  const int seed = m_deterministic ? 0 : get_random_seed();
563  curand_init(seed, tid, 0, &m_state);
564  }
565  template<typename Index>
566  __device__ float operator()(Index, Index = 0) const {
567  return curand_normal(&m_state);
568  }
569  template<typename Index>
570  __device__ float4 packetOp(Index, Index = 0) const {
571  return curand_normal4(&m_state);
572  }
573 
574  private:
575  bool m_deterministic;
576  mutable curandStatePhilox4_32_10_t m_state;
577 };
578 
579 template <> class NormalRandomGenerator<double> {
580  public:
581  static const bool PacketAccess = true;
582 
583  __device__ NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {
584  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
585  const int seed = deterministic ? 0 : get_random_seed();
586  curand_init(seed, tid, 0, &m_state);
587  }
588  __device__ NormalRandomGenerator(const NormalRandomGenerator<double>& other) {
589  m_deterministic = other.m_deterministic;
590  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
591  const int seed = m_deterministic ? 0 : get_random_seed();
592  curand_init(seed, tid, 0, &m_state);
593  }
594  template<typename Index>
595  __device__ double operator()(Index, Index = 0) const {
596  return curand_normal_double(&m_state);
597  }
598  template<typename Index>
599  __device__ double2 packetOp(Index, Index = 0) const {
600  return curand_normal2_double(&m_state);
601  }
602 
603  private:
604  bool m_deterministic;
605  mutable curandStatePhilox4_32_10_t m_state;
606 };
607 
608 template <> class NormalRandomGenerator<std::complex<float> > {
609  public:
610  static const bool PacketAccess = false;
611 
612  __device__ NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {
613  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
614  const int seed = deterministic ? 0 : get_random_seed();
615  curand_init(seed, tid, 0, &m_state);
616  }
617  __device__ NormalRandomGenerator(const NormalRandomGenerator& other) {
618  m_deterministic = other.m_deterministic;
619  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
620  const int seed = m_deterministic ? 0 : get_random_seed();
621  curand_init(seed, tid, 0, &m_state);
622  }
623  template<typename Index>
624  __device__ std::complex<float> operator()(Index, Index = 0) const {
625  float4 vals = curand_normal4(&m_state);
626  return std::complex<float>(vals.x, vals.y);
627  }
628 
629  private:
630  bool m_deterministic;
631  mutable curandStatePhilox4_32_10_t m_state;
632 };
633 
634 template <> class NormalRandomGenerator<std::complex<double> > {
635  public:
636  static const bool PacketAccess = false;
637 
638  __device__ NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {
639  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
640  const int seed = deterministic ? 0 : get_random_seed();
641  curand_init(seed, tid, 0, &m_state);
642  }
643  __device__ NormalRandomGenerator(const NormalRandomGenerator& other) {
644  m_deterministic = other.m_deterministic;
645  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
646  const int seed = m_deterministic ? 0 : get_random_seed();
647  curand_init(seed, tid, 0, &m_state);
648  }
649  template<typename Index>
650  __device__ std::complex<double> operator()(Index, Index = 0) const {
651  double2 vals = curand_normal2_double(&m_state);
652  return std::complex<double>(vals.x, vals.y);
653  }
654 
655  private:
656  bool m_deterministic;
657  mutable curandStatePhilox4_32_10_t m_state;
658 };
659 
660 #else
661 
662 template <typename T> class NormalRandomGenerator {
663  public:
664  NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {}
665 
666  private:
667  bool m_deterministic;
668 };
669 
670 #endif
671 
672 
673 template <typename T, typename Index, size_t NumDims>
674 class GaussianGenerator {
675  public:
676  static const bool PacketAccess = false;
677 
678  EIGEN_DEVICE_FUNC GaussianGenerator(const array<T, NumDims>& means,
679  const array<T, NumDims>& std_devs)
680  : m_means(means)
681  {
682  for (size_t i = 0; i < NumDims; ++i) {
683  m_two_sigmas[i] = std_devs[i] * std_devs[i] * 2;
684  }
685  }
686 
687  T operator()(const array<Index, NumDims>& coordinates) const {
688  T tmp = T(0);
689  for (size_t i = 0; i < NumDims; ++i) {
690  T offset = coordinates[i] - m_means[i];
691  tmp += offset * offset / m_two_sigmas[i];
692  }
693  return std::exp(-tmp);
694  }
695 
696  private:
697  array<T, NumDims> m_means;
698  array<T, NumDims> m_two_sigmas;
699 };
700 
701 
702 } // end namespace internal
703 } // end namespace Eigen
704 
705 #endif // EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H
Namespace containing all symbols from the Eigen library.
Definition: CXX11Meta.h:13