10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H 11 #define EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H 20 template <
typename Scalar>
21 struct scalar_mod_op {
22 EIGEN_DEVICE_FUNC scalar_mod_op(
const Scalar& divisor) : m_divisor(divisor) {}
23 EIGEN_DEVICE_FUNC
inline Scalar operator() (
const Scalar& a)
const {
return a % m_divisor; }
24 const Scalar m_divisor;
26 template <
typename Scalar>
27 struct functor_traits<scalar_mod_op<Scalar> >
28 {
enum { Cost = NumTraits<Scalar>::template Div<false>::Cost, PacketAccess =
false }; };
34 template <
typename Scalar>
35 struct scalar_mod2_op {
36 EIGEN_EMPTY_STRUCT_CTOR(scalar_mod2_op);
37 EIGEN_DEVICE_FUNC
inline Scalar operator() (
const Scalar& a,
const Scalar& b)
const {
return a % b; }
39 template <
typename Scalar>
40 struct functor_traits<scalar_mod2_op<Scalar> >
41 {
enum { Cost = NumTraits<Scalar>::template Div<false>::Cost, PacketAccess =
false }; };
43 template <
typename Scalar>
44 struct scalar_fmod_op {
45 EIGEN_EMPTY_STRUCT_CTOR(scalar_fmod_op);
46 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar
47 operator()(
const Scalar& a,
const Scalar& b)
const {
48 return numext::fmod(a, b);
51 template <
typename Scalar>
52 struct functor_traits<scalar_fmod_op<Scalar> > {
54 PacketAccess =
false };
63 struct scalar_sigmoid_op {
64 EIGEN_EMPTY_STRUCT_CTOR(scalar_sigmoid_op)
65 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(const T& x)
const {
67 return one / (one + numext::exp(-x));
70 template <
typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
71 Packet packetOp(
const Packet& x)
const {
72 const Packet one = pset1<Packet>(T(1));
73 return pdiv(one, padd(one, pexp(pnegate(x))));
78 struct functor_traits<scalar_sigmoid_op<T> > {
80 Cost = NumTraits<T>::AddCost * 2 + NumTraits<T>::MulCost * 6,
81 PacketAccess = packet_traits<T>::HasAdd && packet_traits<T>::HasDiv &&
82 packet_traits<T>::HasNegate && packet_traits<T>::HasExp
87 template<
typename Reducer,
typename Device>
88 struct reducer_traits {
96 template <
typename T>
struct SumReducer
98 static const bool PacketAccess = packet_traits<T>::HasAdd;
99 static const bool IsStateful =
false;
101 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void reduce(
const T t, T* accum)
const {
104 template <
typename Packet>
105 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void reducePacket(
const Packet& p, Packet* accum)
const {
106 (*accum) = padd<Packet>(*accum, p);
109 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize()
const {
110 internal::scalar_cast_op<int, T> conv;
113 template <
typename Packet>
114 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket()
const {
115 return pset1<Packet>(initialize());
117 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(
const T accum)
const {
120 template <
typename Packet>
121 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(
const Packet& vaccum)
const {
124 template <
typename Packet>
125 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(
const T saccum,
const Packet& vaccum)
const {
126 return saccum + predux(vaccum);
130 template <
typename T,
typename Device>
131 struct reducer_traits<SumReducer<T>, Device> {
133 Cost = NumTraits<T>::AddCost,
134 PacketAccess = PacketType<T, Device>::HasAdd
139 template <
typename T>
struct MeanReducer
141 static const bool PacketAccess = packet_traits<T>::HasAdd && !NumTraits<T>::IsInteger;
142 static const bool IsStateful =
true;
144 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
145 MeanReducer() : scalarCount_(0), packetCount_(0) { }
147 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void reduce(
const T t, T* accum) {
151 template <
typename Packet>
152 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void reducePacket(
const Packet& p, Packet* accum) {
153 (*accum) = padd<Packet>(*accum, p);
157 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize()
const {
158 internal::scalar_cast_op<int, T> conv;
161 template <
typename Packet>
162 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket()
const {
163 return pset1<Packet>(initialize());
165 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(
const T accum)
const {
166 return accum / scalarCount_;
168 template <
typename Packet>
169 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(
const Packet& vaccum)
const {
170 return pdiv(vaccum, pset1<Packet>(packetCount_));
172 template <
typename Packet>
173 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(
const T saccum,
const Packet& vaccum)
const {
174 return (saccum + predux(vaccum)) / (scalarCount_ + packetCount_ * unpacket_traits<Packet>::size);
178 DenseIndex scalarCount_;
179 DenseIndex packetCount_;
182 template <
typename T,
typename Device>
183 struct reducer_traits<MeanReducer<T>, Device> {
185 Cost = NumTraits<T>::AddCost,
186 PacketAccess = PacketType<T, Device>::HasAdd
191 template <
typename T>
struct MaxReducer
193 static const bool PacketAccess = packet_traits<T>::HasMax;
194 static const bool IsStateful =
false;
196 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void reduce(
const T t, T* accum)
const {
197 if (t > *accum) { *accum = t; }
199 template <
typename Packet>
200 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void reducePacket(
const Packet& p, Packet* accum)
const {
201 (*accum) = pmax<Packet>(*accum, p);
204 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize()
const {
205 return Eigen::NumTraits<T>::lowest();
207 template <
typename Packet>
208 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket()
const {
209 return pset1<Packet>(initialize());
211 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(
const T accum)
const {
214 template <
typename Packet>
215 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(
const Packet& vaccum)
const {
218 template <
typename Packet>
219 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(
const T saccum,
const Packet& vaccum)
const {
220 return numext::maxi(saccum, predux_max(vaccum));
224 template <
typename T,
typename Device>
225 struct reducer_traits<MaxReducer<T>, Device> {
227 Cost = NumTraits<T>::AddCost,
228 PacketAccess = PacketType<T, Device>::HasMax
233 template <
typename T>
struct MinReducer
235 static const bool PacketAccess = packet_traits<T>::HasMin;
236 static const bool IsStateful =
false;
238 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void reduce(
const T t, T* accum)
const {
239 if (t < *accum) { *accum = t; }
241 template <
typename Packet>
242 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void reducePacket(
const Packet& p, Packet* accum)
const {
243 (*accum) = pmin<Packet>(*accum, p);
246 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize()
const {
247 return Eigen::NumTraits<T>::highest();
249 template <
typename Packet>
250 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket()
const {
251 return pset1<Packet>(initialize());
253 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(
const T accum)
const {
256 template <
typename Packet>
257 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(
const Packet& vaccum)
const {
260 template <
typename Packet>
261 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(
const T saccum,
const Packet& vaccum)
const {
262 return numext::mini(saccum, predux_min(vaccum));
266 template <
typename T,
typename Device>
267 struct reducer_traits<MinReducer<T>, Device> {
269 Cost = NumTraits<T>::AddCost,
270 PacketAccess = PacketType<T, Device>::HasMin
275 template <
typename T>
struct ProdReducer
277 static const bool PacketAccess = packet_traits<T>::HasMul;
278 static const bool IsStateful =
false;
280 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void reduce(
const T t, T* accum)
const {
283 template <
typename Packet>
284 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void reducePacket(
const Packet& p, Packet* accum)
const {
285 (*accum) = pmul<Packet>(*accum, p);
288 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize()
const {
289 internal::scalar_cast_op<int, T> conv;
292 template <
typename Packet>
293 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket()
const {
294 return pset1<Packet>(initialize());
296 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(
const T accum)
const {
299 template <
typename Packet>
300 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(
const Packet& vaccum)
const {
303 template <
typename Packet>
304 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(
const T saccum,
const Packet& vaccum)
const {
305 return saccum * predux_mul(vaccum);
309 template <
typename T,
typename Device>
310 struct reducer_traits<ProdReducer<T>, Device> {
312 Cost = NumTraits<T>::MulCost,
313 PacketAccess = PacketType<T, Device>::HasMul
320 static const bool PacketAccess =
false;
321 static const bool IsStateful =
false;
323 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void reduce(
bool t,
bool* accum)
const {
324 *accum = *accum && t;
326 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
bool initialize()
const {
329 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
bool finalize(
bool accum)
const {
334 template <
typename Device>
335 struct reducer_traits<AndReducer, Device> {
344 static const bool PacketAccess =
false;
345 static const bool IsStateful =
false;
347 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void reduce(
bool t,
bool* accum)
const {
348 *accum = *accum || t;
350 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
bool initialize()
const {
353 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
bool finalize(
bool accum)
const {
358 template <
typename Device>
359 struct reducer_traits<OrReducer, Device> {
368 template <
typename T>
struct ArgMaxTupleReducer
370 static const bool PacketAccess =
false;
371 static const bool IsStateful =
false;
373 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void reduce(
const T t, T* accum)
const {
374 if (t.second > accum->second) { *accum = t; }
376 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize()
const {
377 return T(0, NumTraits<typename T::second_type>::lowest());
379 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(
const T& accum)
const {
384 template <
typename T,
typename Device>
385 struct reducer_traits<ArgMaxTupleReducer<T>, Device> {
387 Cost = NumTraits<T>::AddCost,
393 template <
typename T>
struct ArgMinTupleReducer
395 static const bool PacketAccess =
false;
396 static const bool IsStateful =
false;
398 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void reduce(
const T& t, T* accum)
const {
399 if (t.second < accum->second) { *accum = t; }
401 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize()
const {
402 return T(0, NumTraits<typename T::second_type>::highest());
404 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(
const T& accum)
const {
409 template <
typename T,
typename Device>
410 struct reducer_traits<ArgMinTupleReducer<T>, Device> {
412 Cost = NumTraits<T>::AddCost,
421 __device__
int get_random_seed() {
425 static inline int get_random_seed() {
429 return st.wSecond + 1000 * st.wMilliseconds;
430 #elif defined __APPLE__ 431 return static_cast<int>(mach_absolute_time());
434 clock_gettime(CLOCK_REALTIME, &ts);
435 return static_cast<int>(ts.tv_nsec);
441 #if !defined (EIGEN_USE_GPU) || !defined(__CUDACC__) || !defined(__CUDA_ARCH__) 443 template <
typename T>
class UniformRandomGenerator {
446 static const bool PacketAccess =
true;
448 UniformRandomGenerator(
bool deterministic =
true) : m_deterministic(deterministic) {
449 if (!deterministic) {
450 srand(get_random_seed());
453 UniformRandomGenerator(
const UniformRandomGenerator& other) {
454 m_deterministic = other.m_deterministic;
457 template<
typename Index>
458 T operator()(Index)
const {
461 template<
typename Index,
typename PacketType>
462 PacketType packetOp(Index)
const {
463 const int packetSize = internal::unpacket_traits<PacketType>::size;
464 EIGEN_ALIGN_MAX T values[packetSize];
465 for (
int i = 0; i < packetSize; ++i) {
466 values[i] = random<T>();
468 return internal::pload<PacketType>(values);
472 bool m_deterministic;
475 #if __cplusplus > 199711 || EIGEN_COMP_MSVC >= 1900 476 template <>
class UniformRandomGenerator<float> {
478 static const bool PacketAccess =
true;
480 UniformRandomGenerator(
bool deterministic =
true) : m_deterministic(deterministic), m_generator(new std::mt19937()) {
481 if (!deterministic) {
482 m_generator->seed(get_random_seed());
485 UniformRandomGenerator(
const UniformRandomGenerator<float>& other) {
486 m_generator =
new std::mt19937();
487 m_generator->seed(other(0) * UINT_MAX);
488 m_deterministic = other.m_deterministic;
490 ~UniformRandomGenerator() {
494 template<
typename Index>
495 float operator()(Index)
const {
496 return m_distribution(*m_generator);
498 template<
typename Index,
typename PacketType>
499 PacketType packetOp(Index i)
const {
500 const int packetSize = internal::unpacket_traits<PacketType>::size;
501 EIGEN_ALIGN_MAX
float values[packetSize];
502 for (
int k = 0; k < packetSize; ++k) {
503 values[k] = this->operator()(i);
505 return internal::pload<PacketType>(values);
509 UniformRandomGenerator& operator = (
const UniformRandomGenerator&);
512 bool m_deterministic;
513 std::mt19937* m_generator;
514 mutable std::uniform_real_distribution<float> m_distribution;
517 template <>
class UniformRandomGenerator<double> {
519 static const bool PacketAccess =
true;
521 UniformRandomGenerator(
bool deterministic =
true) : m_deterministic(deterministic), m_generator(new std::mt19937()) {
522 if (!deterministic) {
523 m_generator->seed(get_random_seed());
526 UniformRandomGenerator(
const UniformRandomGenerator<double>& other) {
527 m_generator =
new std::mt19937();
528 m_generator->seed(other(0) * UINT_MAX);
529 m_deterministic = other.m_deterministic;
531 ~UniformRandomGenerator() {
535 template<
typename Index>
536 double operator()(Index)
const {
537 return m_distribution(*m_generator);
539 template<
typename Index,
typename PacketType>
540 PacketType packetOp(Index i)
const {
541 const int packetSize = internal::unpacket_traits<PacketType>::size;
542 EIGEN_ALIGN_MAX
double values[packetSize];
543 for (
int k = 0; k < packetSize; ++k) {
544 values[k] = this->operator()(i);
546 return internal::pload<PacketType>(values);
550 UniformRandomGenerator& operator = (
const UniformRandomGenerator&);
553 bool m_deterministic;
554 std::mt19937* m_generator;
555 mutable std::uniform_real_distribution<double> m_distribution;
562 template <
typename T>
class UniformRandomGenerator;
564 template <>
class UniformRandomGenerator<float> {
566 static const bool PacketAccess =
true;
568 __device__ UniformRandomGenerator(
bool deterministic =
true) : m_deterministic(deterministic) {
569 const int tid = blockIdx.x * blockDim.x + threadIdx.x;
570 const int seed = deterministic ? 0 : get_random_seed();
571 curand_init(seed, tid, 0, &m_state);
574 __device__ UniformRandomGenerator(
const UniformRandomGenerator& other) {
575 m_deterministic = other.m_deterministic;
576 const int tid = blockIdx.x * blockDim.x + threadIdx.x;
577 const int seed = m_deterministic ? 0 : get_random_seed();
578 curand_init(seed, tid, 0, &m_state);
581 template<
typename Index>
582 __device__
float operator()(Index)
const {
583 return curand_uniform(&m_state);
585 template<
typename Index,
typename PacketType>
586 __device__ float4 packetOp(Index)
const {
587 EIGEN_STATIC_ASSERT((is_same<PacketType, float4>::value), YOU_MADE_A_PROGRAMMING_MISTAKE);
588 return curand_uniform4(&m_state);
592 bool m_deterministic;
593 mutable curandStatePhilox4_32_10_t m_state;
596 template <>
class UniformRandomGenerator<double> {
598 static const bool PacketAccess =
true;
600 __device__ UniformRandomGenerator(
bool deterministic =
true) : m_deterministic(deterministic) {
601 const int tid = blockIdx.x * blockDim.x + threadIdx.x;
602 const int seed = deterministic ? 0 : get_random_seed();
603 curand_init(seed, tid, 0, &m_state);
605 __device__ UniformRandomGenerator(
const UniformRandomGenerator& other) {
606 m_deterministic = other.m_deterministic;
607 const int tid = blockIdx.x * blockDim.x + threadIdx.x;
608 const int seed = m_deterministic ? 0 : get_random_seed();
609 curand_init(seed, tid, 0, &m_state);
611 template<
typename Index>
612 __device__
double operator()(Index)
const {
613 return curand_uniform_double(&m_state);
615 template<
typename Index,
typename PacketType>
616 __device__ double2 packetOp(Index)
const {
617 EIGEN_STATIC_ASSERT((is_same<PacketType, double2>::value), YOU_MADE_A_PROGRAMMING_MISTAKE);
618 return curand_uniform2_double(&m_state);
622 bool m_deterministic;
623 mutable curandStatePhilox4_32_10_t m_state;
626 template <>
class UniformRandomGenerator<std::complex<float> > {
628 static const bool PacketAccess =
false;
630 __device__ UniformRandomGenerator(
bool deterministic =
true) : m_deterministic(deterministic) {
631 const int tid = blockIdx.x * blockDim.x + threadIdx.x;
632 const int seed = deterministic ? 0 : get_random_seed();
633 curand_init(seed, tid, 0, &m_state);
635 __device__ UniformRandomGenerator(
const UniformRandomGenerator& other) {
636 m_deterministic = other.m_deterministic;
637 const int tid = blockIdx.x * blockDim.x + threadIdx.x;
638 const int seed = m_deterministic ? 0 : get_random_seed();
639 curand_init(seed, tid, 0, &m_state);
641 template<
typename Index>
642 __device__ std::complex<float> operator()(Index)
const {
643 float4 vals = curand_uniform4(&m_state);
644 return std::complex<float>(vals.x, vals.y);
648 bool m_deterministic;
649 mutable curandStatePhilox4_32_10_t m_state;
652 template <>
class UniformRandomGenerator<std::complex<double> > {
654 static const bool PacketAccess =
false;
656 __device__ UniformRandomGenerator(
bool deterministic =
true) : m_deterministic(deterministic) {
657 const int tid = blockIdx.x * blockDim.x + threadIdx.x;
658 const int seed = deterministic ? 0 : get_random_seed();
659 curand_init(seed, tid, 0, &m_state);
661 __device__ UniformRandomGenerator(
const UniformRandomGenerator& other) {
662 m_deterministic = other.m_deterministic;
663 const int tid = blockIdx.x * blockDim.x + threadIdx.x;
664 const int seed = m_deterministic ? 0 : get_random_seed();
665 curand_init(seed, tid, 0, &m_state);
667 template<
typename Index>
668 __device__ std::complex<double> operator()(Index)
const {
669 double2 vals = curand_uniform2_double(&m_state);
670 return std::complex<double>(vals.x, vals.y);
674 bool m_deterministic;
675 mutable curandStatePhilox4_32_10_t m_state;
680 template <
typename Scalar>
681 struct functor_traits<UniformRandomGenerator<Scalar> > {
684 Cost = 100 * NumTraits<Scalar>::MulCost,
685 PacketAccess = UniformRandomGenerator<Scalar>::PacketAccess
691 #if (!defined (EIGEN_USE_GPU) || !defined(__CUDACC__) || !defined(__CUDA_ARCH__)) && (__cplusplus > 199711 || EIGEN_COMP_MSVC >= 1900) 693 template <
typename T>
class NormalRandomGenerator {
695 static const bool PacketAccess =
true;
697 NormalRandomGenerator(
bool deterministic =
true) : m_deterministic(deterministic), m_distribution(0, 1), m_generator(new std::mt19937()) {
698 if (!deterministic) {
699 m_generator->seed(get_random_seed());
702 NormalRandomGenerator(
const NormalRandomGenerator& other)
703 : m_deterministic(other.m_deterministic), m_distribution(other.m_distribution), m_generator(new std::mt19937()) {
704 m_generator->seed(other(0) * UINT_MAX);
706 ~NormalRandomGenerator() {
709 template<
typename Index>
710 T operator()(Index)
const {
711 return m_distribution(*m_generator);
713 template<
typename Index,
typename PacketType>
714 PacketType packetOp(Index)
const {
715 const int packetSize = internal::unpacket_traits<PacketType>::size;
716 EIGEN_ALIGN_MAX T values[packetSize];
717 for (
int i = 0; i < packetSize; ++i) {
718 values[i] = m_distribution(*m_generator);
720 return internal::pload<PacketType>(values);
725 NormalRandomGenerator& operator = (
const NormalRandomGenerator&);
727 bool m_deterministic;
728 mutable std::normal_distribution<T> m_distribution;
729 std::mt19937* m_generator;
732 #elif defined (EIGEN_USE_GPU) && defined(__CUDACC__) && defined(__CUDA_ARCH__) 735 template <
typename T>
class NormalRandomGenerator;
737 template <>
class NormalRandomGenerator<float> {
739 static const bool PacketAccess =
true;
741 __device__ NormalRandomGenerator(
bool deterministic =
true) : m_deterministic(deterministic) {
742 const int tid = blockIdx.x * blockDim.x + threadIdx.x;
743 const int seed = deterministic ? 0 : get_random_seed();
744 curand_init(seed, tid, 0, &m_state);
746 __device__ NormalRandomGenerator(
const NormalRandomGenerator<float>& other) {
747 m_deterministic = other.m_deterministic;
748 const int tid = blockIdx.x * blockDim.x + threadIdx.x;
749 const int seed = m_deterministic ? 0 : get_random_seed();
750 curand_init(seed, tid, 0, &m_state);
752 template<
typename Index>
753 __device__
float operator()(Index)
const {
754 return curand_normal(&m_state);
756 template<
typename Index,
typename PacketType>
757 __device__ float4 packetOp(Index)
const {
758 EIGEN_STATIC_ASSERT((is_same<PacketType, float4>::value), YOU_MADE_A_PROGRAMMING_MISTAKE);
759 return curand_normal4(&m_state);
763 bool m_deterministic;
764 mutable curandStatePhilox4_32_10_t m_state;
767 template <>
class NormalRandomGenerator<double> {
769 static const bool PacketAccess =
true;
771 __device__ NormalRandomGenerator(
bool deterministic =
true) : m_deterministic(deterministic) {
772 const int tid = blockIdx.x * blockDim.x + threadIdx.x;
773 const int seed = deterministic ? 0 : get_random_seed();
774 curand_init(seed, tid, 0, &m_state);
776 __device__ NormalRandomGenerator(
const NormalRandomGenerator<double>& other) {
777 m_deterministic = other.m_deterministic;
778 const int tid = blockIdx.x * blockDim.x + threadIdx.x;
779 const int seed = m_deterministic ? 0 : get_random_seed();
780 curand_init(seed, tid, 0, &m_state);
782 template<
typename Index>
783 __device__
double operator()(Index)
const {
784 return curand_normal_double(&m_state);
786 template<
typename Index,
typename PacketType>
787 __device__ double2 packetOp(Index)
const {
788 EIGEN_STATIC_ASSERT((is_same<PacketType, double2>::value), YOU_MADE_A_PROGRAMMING_MISTAKE);
789 return curand_normal2_double(&m_state);
793 bool m_deterministic;
794 mutable curandStatePhilox4_32_10_t m_state;
797 template <>
class NormalRandomGenerator<std::complex<float> > {
799 static const bool PacketAccess =
false;
801 __device__ NormalRandomGenerator(
bool deterministic =
true) : m_deterministic(deterministic) {
802 const int tid = blockIdx.x * blockDim.x + threadIdx.x;
803 const int seed = deterministic ? 0 : get_random_seed();
804 curand_init(seed, tid, 0, &m_state);
806 __device__ NormalRandomGenerator(
const NormalRandomGenerator& other) {
807 m_deterministic = other.m_deterministic;
808 const int tid = blockIdx.x * blockDim.x + threadIdx.x;
809 const int seed = m_deterministic ? 0 : get_random_seed();
810 curand_init(seed, tid, 0, &m_state);
812 template<
typename Index>
813 __device__ std::complex<float> operator()(Index)
const {
814 float4 vals = curand_normal4(&m_state);
815 return std::complex<float>(vals.x, vals.y);
819 bool m_deterministic;
820 mutable curandStatePhilox4_32_10_t m_state;
823 template <>
class NormalRandomGenerator<std::complex<double> > {
825 static const bool PacketAccess =
false;
827 __device__ NormalRandomGenerator(
bool deterministic =
true) : m_deterministic(deterministic) {
828 const int tid = blockIdx.x * blockDim.x + threadIdx.x;
829 const int seed = deterministic ? 0 : get_random_seed();
830 curand_init(seed, tid, 0, &m_state);
832 __device__ NormalRandomGenerator(
const NormalRandomGenerator& other) {
833 m_deterministic = other.m_deterministic;
834 const int tid = blockIdx.x * blockDim.x + threadIdx.x;
835 const int seed = m_deterministic ? 0 : get_random_seed();
836 curand_init(seed, tid, 0, &m_state);
838 template<
typename Index>
839 __device__ std::complex<double> operator()(Index)
const {
840 double2 vals = curand_normal2_double(&m_state);
841 return std::complex<double>(vals.x, vals.y);
845 bool m_deterministic;
846 mutable curandStatePhilox4_32_10_t m_state;
851 template <
typename T>
class NormalRandomGenerator {
853 static const bool PacketAccess =
false;
854 NormalRandomGenerator(
bool deterministic =
true) : m_deterministic(deterministic) {}
857 bool m_deterministic;
862 template <
typename Scalar>
863 struct functor_traits<NormalRandomGenerator<Scalar> > {
866 Cost = 100 * NumTraits<Scalar>::MulCost,
867 PacketAccess = NormalRandomGenerator<Scalar>::PacketAccess
872 template <
typename T,
typename Index,
size_t NumDims>
873 class GaussianGenerator {
875 static const bool PacketAccess =
false;
877 EIGEN_DEVICE_FUNC GaussianGenerator(
const array<T, NumDims>& means,
878 const array<T, NumDims>& std_devs)
881 for (
size_t i = 0; i < NumDims; ++i) {
882 m_two_sigmas[i] = std_devs[i] * std_devs[i] * 2;
886 T operator()(
const array<Index, NumDims>& coordinates)
const {
888 for (
size_t i = 0; i < NumDims; ++i) {
889 T offset = coordinates[i] - m_means[i];
890 tmp += offset * offset / m_two_sigmas[i];
892 return numext::exp(-tmp);
896 array<T, NumDims> m_means;
897 array<T, NumDims> m_two_sigmas;
900 template <
typename T,
typename Index,
size_t NumDims>
901 struct functor_traits<GaussianGenerator<T, Index, NumDims> > {
903 Cost = NumDims * (2 * NumTraits<T>::AddCost + NumTraits<T>::MulCost +
904 functor_traits<scalar_quotient_op<T, T> >::Cost) +
905 functor_traits<scalar_exp_op<T> >::Cost,
906 PacketAccess = GaussianGenerator<T, Index, NumDims>::PacketAccess
913 #endif // EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H Namespace containing all symbols from the Eigen library.
Definition: AdolcForward:45