TensorIntDiv.h
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H
12 
13 
14 namespace Eigen {
15 
29 namespace internal {
30 
31 namespace {
32  // Note: result is undefined if val == 0
33  template <typename T>
34  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int count_leading_zeros(const T val)
35  {
36 #ifdef __CUDA_ARCH__
37  if (sizeof(T) == 8) {
38  return __clzll(val);
39  }
40  return __clz(val);
41 #elif EIGEN_COMP_MSVC
42  DWORD leading_zeros = 0;
43  if (sizeof(T) == 8) {
44  _BitScanReverse64(&leading_zero, val);
45  }
46  else {
47  _BitScanReverse(&leading_zero, val);
48  }
49 #else
50  if (sizeof(T) == 8) {
51  return __builtin_clzl(static_cast<uint64_t>(val));
52  }
53  return __builtin_clz(static_cast<uint32_t>(val));
54 #endif
55  }
56 
57  template <typename T>
58  struct UnsignedTraits {
59  typedef typename conditional<sizeof(T) == 8, uint64_t, uint32_t>::type type;
60  };
61 
62  template <typename T>
63  struct DividerTraits {
64 #if defined(__SIZEOF_INT128__) && !defined(__CUDACC__)
65  typedef typename UnsignedTraits<T>::type type;
66  static const int N = sizeof(T) * 8;
67 #else
68  typedef uint32_t type;
69  static const int N = 32;
70 #endif
71  };
72 
73  template <typename T>
74  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint32_t muluh(const uint32_t a, const T b) {
75 #if defined(__CUDA_ARCH__)
76  return __umulhi(a, b);
77 #else
78  return (static_cast<uint64_t>(a) * b) >> 32;
79 #endif
80  }
81 
82 #if defined(__CUDA_ARCH__)
83  template <typename T>
84  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t muluh(const uint64_t a, const T b) {
85  return __umul64hi(a, b);
86  }
87 #else
88  template <typename T>
89  EIGEN_ALWAYS_INLINE uint64_t muluh(const uint64_t a, const T b) {
90 #if defined(__SIZEOF_INT128__) && !defined(__CUDACC__)
91  __uint128_t v = static_cast<__uint128_t>(a) * static_cast<__uint128_t>(b);
92  return static_cast<uint64_t>(v >> 64);
93 #else
94  EIGEN_STATIC_ASSERT(sizeof(T) == 4, YOU_MADE_A_PROGRAMMING_MISTAKE);
95  return (a * b) >> 32;
96 #endif
97  }
98 #endif
99 
100  template <int N, typename T>
101  struct DividerHelper {
102  static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint32_t computeMultiplier (const int log_div, const T divider) {
103  EIGEN_STATIC_ASSERT(N == 32, YOU_MADE_A_PROGRAMMING_MISTAKE);
104  return static_cast<uint32_t>((static_cast<uint64_t>(1) << (N+log_div)) / divider - (static_cast<uint64_t>(1) << N) + 1);
105  }
106  };
107 
108 #if defined(__SIZEOF_INT128__) && !defined(__CUDACC__)
109  template <typename T>
110  struct DividerHelper<64, T> {
111  static EIGEN_ALWAYS_INLINE uint64_t computeMultiplier(const int log_div, const T divider) {
112  return static_cast<uint64_t>((static_cast<__uint128_t>(1) << (64+log_div)) / static_cast<__uint128_t>(divider) - (static_cast<__uint128_t>(1) << 64) + 1);
113  }
114  };
115 #endif
116 }
117 
118 
119 template <typename T>
120 struct TensorIntDivisor {
121  public:
122  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() {
123  multiplier = 0;
124  shift1 = 0;
125  shift2 = 0;
126  }
127 
128  // Must have 0 < divider < 2^31. This is relaxed to
129  // 0 < divider < 2^63 when using 64-bit indices on platforms that support
130  // the __uint128_t type.
131  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor(const T divider) {
132  const int N = DividerTraits<T>::N;
133  eigen_assert(static_cast<typename UnsignedTraits<T>::type>(divider) < NumTraits<UnsignedType>::highest()/2);
134  eigen_assert(divider > 0);
135 
136  // fast ln2
137  const int leading_zeros = count_leading_zeros(static_cast<UnsignedType>(divider));
138  int log_div = N - leading_zeros;
139  // if divider is a power of two then log_div is 1 more than it should be.
140  if ((static_cast<typename UnsignedTraits<T>::type>(1) << (log_div-1)) == static_cast<typename UnsignedTraits<T>::type>(divider))
141  log_div--;
142 
143  multiplier = DividerHelper<N, T>::computeMultiplier(log_div, divider);
144  shift1 = log_div > 1 ? 1 : log_div;
145  shift2 = log_div > 1 ? log_div-1 : 0;
146  }
147 
148  // Must have 0 <= numerator. On platforms that dont support the __uint128_t
149  // type numerator should also be less than 2^32-1.
150  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T divide(const T numerator) const {
151  eigen_assert(static_cast<typename UnsignedTraits<T>::type>(numerator) < NumTraits<UnsignedType>::highest()/2);
152  //eigen_assert(numerator >= 0); // this is implicitly asserted by the line above
153 
154  UnsignedType t1 = muluh(multiplier, numerator);
155  UnsignedType t = (static_cast<UnsignedType>(numerator) - t1) >> shift1;
156  return (t1 + t) >> shift2;
157  }
158 
159  private:
160  typedef typename DividerTraits<T>::type UnsignedType;
161  UnsignedType multiplier;
162  int32_t shift1;
163  int32_t shift2;
164 };
165 
166 
167 // Optimized version for signed 32 bit integers.
168 // Derived from Hacker's Delight.
169 template <>
170 class TensorIntDivisor<int32_t> {
171  public:
172  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() {
173  magic = 0;
174  shift = 0;
175  }
176  // Must have 2 <= divider
177  EIGEN_DEVICE_FUNC TensorIntDivisor(int32_t divider) {
178  eigen_assert(divider >= 2);
179  calcMagic(divider);
180  }
181 
182  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int divide(const int32_t n) const {
183 #ifdef __CUDA_ARCH__
184  return (__umulhi(magic, n) >> shift);
185 #else
186  uint64_t v = static_cast<uint64_t>(magic) * static_cast<uint64_t>(n);
187  return (static_cast<uint32_t>(v >> 32) >> shift);
188 #endif
189  }
190 
191 private:
192  // Compute the magic numbers. See Hacker's Delight section 10 for an in
193  // depth explanation.
194  EIGEN_DEVICE_FUNC void calcMagic(int32_t d) {
195  const unsigned two31 = 0x80000000; // 2**31.
196  unsigned ad = d;
197  unsigned t = two31 + (ad >> 31);
198  unsigned anc = t - 1 - t%ad; // Absolute value of nc.
199  int p = 31; // Init. p.
200  unsigned q1 = two31/anc; // Init. q1 = 2**p/|nc|.
201  unsigned r1 = two31 - q1*anc; // Init. r1 = rem(2**p, |nc|).
202  unsigned q2 = two31/ad; // Init. q2 = 2**p/|d|.
203  unsigned r2 = two31 - q2*ad; // Init. r2 = rem(2**p, |d|).
204  unsigned delta = 0;
205  do {
206  p = p + 1;
207  q1 = 2*q1; // Update q1 = 2**p/|nc|.
208  r1 = 2*r1; // Update r1 = rem(2**p, |nc|).
209  if (r1 >= anc) { // (Must be an unsigned
210  q1 = q1 + 1; // comparison here).
211  r1 = r1 - anc;}
212  q2 = 2*q2; // Update q2 = 2**p/|d|.
213  r2 = 2*r2; // Update r2 = rem(2**p, |d|).
214  if (r2 >= ad) { // (Must be an unsigned
215  q2 = q2 + 1; // comparison here).
216  r2 = r2 - ad;}
217  delta = ad - r2;
218  } while (q1 < delta || (q1 == delta && r1 == 0));
219 
220  magic = (unsigned)(q2 + 1);
221  shift = p - 32;
222  }
223 
224  uint32_t magic;
225  int32_t shift;
226 };
227 
228 
229 template <typename T>
230 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor<T>& divisor) {
231  return divisor.divide(numerator);
232 }
233 
234 
235 } // end namespace internal
236 } // end namespace Eigen
237 
238 #endif // EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H
Namespace containing all symbols from the Eigen library.
Definition: CXX11Meta.h:13