10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H
22 struct scalar_sigmoid_op {
23 EIGEN_EMPTY_STRUCT_CTOR(scalar_sigmoid_op)
24 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(const T& x)
const {
26 return one / (one + std::exp(-x));
29 template <
typename Packet>
30 inline Packet packetOp(
const Packet& x)
const {
31 const Packet one = pset1<Packet>(1);
32 return pdiv(one, padd(one, pexp(pnegate(x))));
37 struct functor_traits<scalar_sigmoid_op<T> > {
39 Cost = NumTraits<T>::AddCost * 2 + NumTraits<T>::MulCost * 6,
40 PacketAccess = packet_traits<T>::HasAdd && packet_traits<T>::HasDiv &&
41 packet_traits<T>::HasNegate && packet_traits<T>::HasExp
47 template <
typename T>
struct SumReducer
49 static const bool PacketAccess =
true;
50 static const bool IsStateful =
false;
52 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void reduce(
const T t, T* accum)
const {
55 template <
typename Packet>
56 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void reducePacket(
const Packet& p, Packet* accum)
const {
57 (*accum) = padd<Packet>(*accum, p);
60 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize()
const {
61 return static_cast<T
>(0);
63 template <
typename Packet>
64 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket()
const {
65 return pset1<Packet>(0);
67 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(
const T accum)
const {
70 template <
typename Packet>
71 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(
const Packet& vaccum)
const {
74 template <
typename Packet>
75 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(
const T saccum,
const Packet& vaccum)
const {
76 return saccum + predux(vaccum);
80 template <
typename T>
struct MeanReducer
82 static const bool PacketAccess =
true;
83 static const bool IsStateful =
true;
85 MeanReducer() : scalarCount_(0), packetCount_(0) { }
87 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void reduce(
const T t, T* accum) {
91 template <
typename Packet>
92 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void reducePacket(
const Packet& p, Packet* accum) {
93 (*accum) = padd<Packet>(*accum, p);
97 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize()
const {
98 return static_cast<T
>(0);
100 template <
typename Packet>
101 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket()
const {
102 return pset1<Packet>(0);
104 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(
const T accum)
const {
105 return accum / scalarCount_;
107 template <
typename Packet>
108 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(
const Packet& vaccum)
const {
109 return pdiv(vaccum, pset1<Packet>(packetCount_));
111 template <
typename Packet>
112 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(
const T saccum,
const Packet& vaccum)
const {
113 return (saccum + predux(vaccum)) / (scalarCount_ + packetCount_ * unpacket_traits<Packet>::size);
121 template <
typename T>
struct MaxReducer
123 static const bool PacketAccess =
true;
124 static const bool IsStateful =
false;
126 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void reduce(
const T t, T* accum)
const {
127 if (t > *accum) { *accum = t; }
129 template <
typename Packet>
130 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void reducePacket(
const Packet& p, Packet* accum)
const {
131 (*accum) = pmax<Packet>(*accum, p);
134 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize()
const {
135 return -(std::numeric_limits<T>::max)();
137 template <
typename Packet>
138 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket()
const {
139 return pset1<Packet>(-(std::numeric_limits<T>::max)());
141 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(
const T accum)
const {
144 template <
typename Packet>
145 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(
const Packet& vaccum)
const {
148 template <
typename Packet>
149 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(
const T saccum,
const Packet& vaccum)
const {
150 return numext::maxi(saccum, predux_max(vaccum));
154 template <
typename T>
struct MinReducer
156 static const bool PacketAccess =
true;
157 static const bool IsStateful =
false;
159 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void reduce(
const T t, T* accum)
const {
160 if (t < *accum) { *accum = t; }
162 template <
typename Packet>
163 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void reducePacket(
const Packet& p, Packet* accum)
const {
164 (*accum) = pmin<Packet>(*accum, p);
167 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize()
const {
168 return (std::numeric_limits<T>::max)();
170 template <
typename Packet>
171 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket()
const {
172 return pset1<Packet>((std::numeric_limits<T>::max)());
174 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(
const T accum)
const {
177 template <
typename Packet>
178 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(
const Packet& vaccum)
const {
181 template <
typename Packet>
182 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(
const T saccum,
const Packet& vaccum)
const {
183 return numext::mini(saccum, predux_min(vaccum));
188 template <
typename T>
struct ProdReducer
190 static const bool PacketAccess =
true;
191 static const bool IsStateful =
false;
193 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void reduce(
const T t, T* accum)
const {
196 template <
typename Packet>
197 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void reducePacket(
const Packet& p, Packet* accum)
const {
198 (*accum) = pmul<Packet>(*accum, p);
201 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize()
const {
202 return static_cast<T
>(1);
204 template <
typename Packet>
205 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket()
const {
206 return pset1<Packet>(1);
208 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(
const T accum)
const {
211 template <
typename Packet>
212 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(
const Packet& vaccum)
const {
215 template <
typename Packet>
216 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(
const T saccum,
const Packet& vaccum)
const {
217 return saccum * predux_mul(vaccum);
223 template <
typename T>
struct ArgMaxTupleReducer
225 static const bool PacketAccess =
false;
226 static const bool IsStateful =
false;
228 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void reduce(
const T t, T* accum)
const {
229 if (t.second > accum->second) { *accum = t; }
231 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize()
const {
232 return T(0, NumTraits<typename T::second_type>::lowest());
234 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(
const T& accum)
const {
239 template <
typename T>
struct ArgMinTupleReducer
241 static const bool PacketAccess =
false;
242 static const bool IsStateful =
false;
244 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void reduce(
const T& t, T* accum)
const {
245 if (t.second < accum->second) { *accum = t; }
247 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize()
const {
248 return T(0, NumTraits<typename T::second_type>::highest());
250 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(
const T& accum)
const {
259 __device__
int get_random_seed() {
263 int get_random_seed() {
267 return st.wSecond + 1000 * st.wMilliseconds;
268 #elif defined __APPLE__
269 return static_cast<int>(mach_absolute_time());
272 clock_gettime(CLOCK_REALTIME, &ts);
273 return static_cast<int>(ts.tv_nsec);
279 #if !defined (EIGEN_USE_GPU) || !defined(__CUDACC__) || !defined(__CUDA_ARCH__)
281 template <
typename T>
class UniformRandomGenerator {
284 static const bool PacketAccess =
true;
286 UniformRandomGenerator(
bool deterministic =
true) : m_deterministic(deterministic) {
287 if (!deterministic) {
288 srand(get_random_seed());
291 UniformRandomGenerator(
const UniformRandomGenerator& other) {
292 m_deterministic = other.m_deterministic;
295 template<
typename Index>
296 T operator()(Index, Index = 0)
const {
299 template<
typename Index>
300 typename internal::packet_traits<T>::type packetOp(Index, Index = 0)
const {
301 const int packetSize = internal::packet_traits<T>::size;
302 EIGEN_ALIGN_MAX T values[packetSize];
303 for (
int i = 0; i < packetSize; ++i) {
304 values[i] = random<T>();
306 return internal::pload<typename internal::packet_traits<T>::type>(values);
310 bool m_deterministic;
313 #if __cplusplus > 199711
314 template <>
class UniformRandomGenerator<float> {
316 static const bool PacketAccess =
true;
318 UniformRandomGenerator(
bool deterministic =
true) : m_deterministic(deterministic) {
319 if (!deterministic) {
320 m_generator.seed(get_random_seed());
323 UniformRandomGenerator(
const UniformRandomGenerator<float>& other) {
324 m_generator.seed(other(0, 0) * UINT_MAX);
325 m_deterministic = other.m_deterministic;
328 template<
typename Index>
329 float operator()(Index, Index = 0)
const {
330 return m_distribution(m_generator);
332 template<
typename Index>
333 typename internal::packet_traits<float>::type packetOp(Index i, Index j = 0)
const {
334 const int packetSize = internal::packet_traits<float>::size;
335 EIGEN_ALIGN_MAX
float values[packetSize];
336 for (
int k = 0; k < packetSize; ++k) {
337 values[k] = this->operator()(i, j);
339 return internal::pload<typename internal::packet_traits<float>::type>(values);
343 UniformRandomGenerator& operator = (
const UniformRandomGenerator&);
346 bool m_deterministic;
347 mutable std::mt19937 m_generator;
348 mutable std::uniform_real_distribution<float> m_distribution;
351 template <>
class UniformRandomGenerator<double> {
353 static const bool PacketAccess =
true;
355 UniformRandomGenerator(
bool deterministic =
true) : m_deterministic(deterministic) {
356 if (!deterministic) {
357 m_generator.seed(get_random_seed());
360 UniformRandomGenerator(
const UniformRandomGenerator<double>& other) {
361 m_generator.seed(other(0, 0) * UINT_MAX);
362 m_deterministic = other.m_deterministic;
365 template<
typename Index>
366 double operator()(Index, Index = 0)
const {
367 return m_distribution(m_generator);
369 template<
typename Index>
370 typename internal::packet_traits<double>::type packetOp(Index i, Index j = 0)
const {
371 const int packetSize = internal::packet_traits<double>::size;
372 EIGEN_ALIGN_MAX
double values[packetSize];
373 for (
int k = 0; k < packetSize; ++k) {
374 values[k] = this->operator()(i, j);
376 return internal::pload<typename internal::packet_traits<double>::type>(values);
380 UniformRandomGenerator& operator = (
const UniformRandomGenerator&);
383 bool m_deterministic;
384 mutable std::mt19937 m_generator;
385 mutable std::uniform_real_distribution<double> m_distribution;
392 template <
typename T>
class UniformRandomGenerator;
394 template <>
class UniformRandomGenerator<float> {
396 static const bool PacketAccess =
true;
398 __device__ UniformRandomGenerator(
bool deterministic =
true) : m_deterministic(deterministic) {
399 const int tid = blockIdx.x * blockDim.x + threadIdx.x;
400 const int seed = deterministic ? 0 : get_random_seed();
401 curand_init(seed, tid, 0, &m_state);
404 __device__ UniformRandomGenerator(
const UniformRandomGenerator& other) {
405 m_deterministic = other.m_deterministic;
406 const int tid = blockIdx.x * blockDim.x + threadIdx.x;
407 const int seed = m_deterministic ? 0 : get_random_seed();
408 curand_init(seed, tid, 0, &m_state);
411 template<
typename Index>
412 __device__
float operator()(Index, Index = 0)
const {
413 return curand_uniform(&m_state);
415 template<
typename Index>
416 __device__ float4 packetOp(Index, Index = 0)
const {
417 return curand_uniform4(&m_state);
421 bool m_deterministic;
422 mutable curandStatePhilox4_32_10_t m_state;
425 template <>
class UniformRandomGenerator<double> {
427 static const bool PacketAccess =
true;
429 __device__ UniformRandomGenerator(
bool deterministic =
true) : m_deterministic(deterministic) {
430 const int tid = blockIdx.x * blockDim.x + threadIdx.x;
431 const int seed = deterministic ? 0 : get_random_seed();
432 curand_init(seed, tid, 0, &m_state);
434 __device__ UniformRandomGenerator(
const UniformRandomGenerator& other) {
435 m_deterministic = other.m_deterministic;
436 const int tid = blockIdx.x * blockDim.x + threadIdx.x;
437 const int seed = m_deterministic ? 0 : get_random_seed();
438 curand_init(seed, tid, 0, &m_state);
440 template<
typename Index>
441 __device__
double operator()(Index, Index = 0)
const {
442 return curand_uniform_double(&m_state);
444 template<
typename Index>
445 __device__ double2 packetOp(Index, Index = 0)
const {
446 return curand_uniform2_double(&m_state);
450 bool m_deterministic;
451 mutable curandStatePhilox4_32_10_t m_state;
454 template <>
class UniformRandomGenerator<std::complex<float> > {
456 static const bool PacketAccess =
false;
458 __device__ UniformRandomGenerator(
bool deterministic =
true) : m_deterministic(deterministic) {
459 const int tid = blockIdx.x * blockDim.x + threadIdx.x;
460 const int seed = deterministic ? 0 : get_random_seed();
461 curand_init(seed, tid, 0, &m_state);
463 __device__ UniformRandomGenerator(
const UniformRandomGenerator& other) {
464 m_deterministic = other.m_deterministic;
465 const int tid = blockIdx.x * blockDim.x + threadIdx.x;
466 const int seed = m_deterministic ? 0 : get_random_seed();
467 curand_init(seed, tid, 0, &m_state);
469 template<
typename Index>
470 __device__ std::complex<float> operator()(Index, Index = 0)
const {
471 float4 vals = curand_uniform4(&m_state);
472 return std::complex<float>(vals.x, vals.y);
476 bool m_deterministic;
477 mutable curandStatePhilox4_32_10_t m_state;
480 template <>
class UniformRandomGenerator<std::complex<double> > {
482 static const bool PacketAccess =
false;
484 __device__ UniformRandomGenerator(
bool deterministic =
true) : m_deterministic(deterministic) {
485 const int tid = blockIdx.x * blockDim.x + threadIdx.x;
486 const int seed = deterministic ? 0 : get_random_seed();
487 curand_init(seed, tid, 0, &m_state);
489 __device__ UniformRandomGenerator(
const UniformRandomGenerator& other) {
490 m_deterministic = other.m_deterministic;
491 const int tid = blockIdx.x * blockDim.x + threadIdx.x;
492 const int seed = m_deterministic ? 0 : get_random_seed();
493 curand_init(seed, tid, 0, &m_state);
495 template<
typename Index>
496 __device__ std::complex<double> operator()(Index, Index = 0)
const {
497 double2 vals = curand_uniform2_double(&m_state);
498 return std::complex<double>(vals.x, vals.y);
502 bool m_deterministic;
503 mutable curandStatePhilox4_32_10_t m_state;
509 #if (!defined (EIGEN_USE_GPU) || !defined(__CUDACC__) || !defined(__CUDA_ARCH__)) && __cplusplus > 199711
511 template <
typename T>
class NormalRandomGenerator {
513 static const bool PacketAccess =
true;
515 NormalRandomGenerator(
bool deterministic =
true) : m_deterministic(deterministic), m_distribution(0, 1) {
516 if (!deterministic) {
517 m_generator.seed(get_random_seed());
520 NormalRandomGenerator(
const NormalRandomGenerator& other)
521 : m_deterministic(other.m_deterministic), m_distribution(other.m_distribution) {
522 m_generator.seed(other(0, 0) * UINT_MAX);
525 template<
typename Index>
526 T operator()(Index, Index = 0)
const {
527 return m_distribution(m_generator);
529 template<
typename Index>
530 typename internal::packet_traits<T>::type packetOp(Index, Index = 0)
const {
531 const int packetSize = internal::packet_traits<T>::size;
532 EIGEN_ALIGN_MAX T values[packetSize];
533 for (
int i = 0; i < packetSize; ++i) {
534 values[i] = m_distribution(m_generator);
536 return internal::pload<typename internal::packet_traits<T>::type>(values);
540 bool m_deterministic;
541 mutable std::normal_distribution<T> m_distribution;
542 mutable std::mt19937 m_generator;
545 #elif defined (EIGEN_USE_GPU) && defined(__CUDACC__) && defined(__CUDA_ARCH__)
548 template <
typename T>
class NormalRandomGenerator;
550 template <>
class NormalRandomGenerator<float> {
552 static const bool PacketAccess =
true;
554 __device__ NormalRandomGenerator(
bool deterministic =
true) : m_deterministic(deterministic) {
555 const int tid = blockIdx.x * blockDim.x + threadIdx.x;
556 const int seed = deterministic ? 0 : get_random_seed();
557 curand_init(seed, tid, 0, &m_state);
559 __device__ NormalRandomGenerator(
const NormalRandomGenerator<float>& other) {
560 m_deterministic = other.m_deterministic;
561 const int tid = blockIdx.x * blockDim.x + threadIdx.x;
562 const int seed = m_deterministic ? 0 : get_random_seed();
563 curand_init(seed, tid, 0, &m_state);
565 template<
typename Index>
566 __device__
float operator()(Index, Index = 0)
const {
567 return curand_normal(&m_state);
569 template<
typename Index>
570 __device__ float4 packetOp(Index, Index = 0)
const {
571 return curand_normal4(&m_state);
575 bool m_deterministic;
576 mutable curandStatePhilox4_32_10_t m_state;
579 template <>
class NormalRandomGenerator<double> {
581 static const bool PacketAccess =
true;
583 __device__ NormalRandomGenerator(
bool deterministic =
true) : m_deterministic(deterministic) {
584 const int tid = blockIdx.x * blockDim.x + threadIdx.x;
585 const int seed = deterministic ? 0 : get_random_seed();
586 curand_init(seed, tid, 0, &m_state);
588 __device__ NormalRandomGenerator(
const NormalRandomGenerator<double>& other) {
589 m_deterministic = other.m_deterministic;
590 const int tid = blockIdx.x * blockDim.x + threadIdx.x;
591 const int seed = m_deterministic ? 0 : get_random_seed();
592 curand_init(seed, tid, 0, &m_state);
594 template<
typename Index>
595 __device__
double operator()(Index, Index = 0)
const {
596 return curand_normal_double(&m_state);
598 template<
typename Index>
599 __device__ double2 packetOp(Index, Index = 0)
const {
600 return curand_normal2_double(&m_state);
604 bool m_deterministic;
605 mutable curandStatePhilox4_32_10_t m_state;
608 template <>
class NormalRandomGenerator<std::complex<float> > {
610 static const bool PacketAccess =
false;
612 __device__ NormalRandomGenerator(
bool deterministic =
true) : m_deterministic(deterministic) {
613 const int tid = blockIdx.x * blockDim.x + threadIdx.x;
614 const int seed = deterministic ? 0 : get_random_seed();
615 curand_init(seed, tid, 0, &m_state);
617 __device__ NormalRandomGenerator(
const NormalRandomGenerator& other) {
618 m_deterministic = other.m_deterministic;
619 const int tid = blockIdx.x * blockDim.x + threadIdx.x;
620 const int seed = m_deterministic ? 0 : get_random_seed();
621 curand_init(seed, tid, 0, &m_state);
623 template<
typename Index>
624 __device__ std::complex<float> operator()(Index, Index = 0)
const {
625 float4 vals = curand_normal4(&m_state);
626 return std::complex<float>(vals.x, vals.y);
630 bool m_deterministic;
631 mutable curandStatePhilox4_32_10_t m_state;
634 template <>
class NormalRandomGenerator<std::complex<double> > {
636 static const bool PacketAccess =
false;
638 __device__ NormalRandomGenerator(
bool deterministic =
true) : m_deterministic(deterministic) {
639 const int tid = blockIdx.x * blockDim.x + threadIdx.x;
640 const int seed = deterministic ? 0 : get_random_seed();
641 curand_init(seed, tid, 0, &m_state);
643 __device__ NormalRandomGenerator(
const NormalRandomGenerator& other) {
644 m_deterministic = other.m_deterministic;
645 const int tid = blockIdx.x * blockDim.x + threadIdx.x;
646 const int seed = m_deterministic ? 0 : get_random_seed();
647 curand_init(seed, tid, 0, &m_state);
649 template<
typename Index>
650 __device__ std::complex<double> operator()(Index, Index = 0)
const {
651 double2 vals = curand_normal2_double(&m_state);
652 return std::complex<double>(vals.x, vals.y);
656 bool m_deterministic;
657 mutable curandStatePhilox4_32_10_t m_state;
662 template <
typename T>
class NormalRandomGenerator {
664 NormalRandomGenerator(
bool deterministic =
true) : m_deterministic(deterministic) {}
667 bool m_deterministic;
673 template <
typename T,
typename Index,
size_t NumDims>
674 class GaussianGenerator {
676 static const bool PacketAccess =
false;
678 EIGEN_DEVICE_FUNC GaussianGenerator(
const array<T, NumDims>& means,
679 const array<T, NumDims>& std_devs)
682 for (
size_t i = 0; i < NumDims; ++i) {
683 m_two_sigmas[i] = std_devs[i] * std_devs[i] * 2;
687 T operator()(
const array<Index, NumDims>& coordinates)
const {
689 for (
size_t i = 0; i < NumDims; ++i) {
690 T offset = coordinates[i] - m_means[i];
691 tmp += offset * offset / m_two_sigmas[i];
693 return std::exp(-tmp);
697 array<T, NumDims> m_means;
698 array<T, NumDims> m_two_sigmas;
705 #endif // EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H
Namespace containing all symbols from the Eigen library.
Definition: CXX11Meta.h:13