Eigen  3.2.91
Redux.h
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
5 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
6 //
7 // This Source Code Form is subject to the terms of the Mozilla
8 // Public License v. 2.0. If a copy of the MPL was not distributed
9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 
11 #ifndef EIGEN_REDUX_H
12 #define EIGEN_REDUX_H
13 
14 namespace Eigen {
15 
16 namespace internal {
17 
18 // TODO
19 // * implement other kind of vectorization
20 // * factorize code
21 
22 /***************************************************************************
23 * Part 1 : the logic deciding a strategy for vectorization and unrolling
24 ***************************************************************************/
25 
26 template<typename Func, typename Derived>
27 struct redux_traits
28 {
29 public:
30  enum {
31  PacketSize = packet_traits<typename Derived::Scalar>::size,
32  InnerMaxSize = int(Derived::IsRowMajor)
33  ? Derived::MaxColsAtCompileTime
34  : Derived::MaxRowsAtCompileTime
35  };
36 
37  enum {
38  MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit)
39  && (functor_traits<Func>::PacketAccess),
40  MayLinearVectorize = MightVectorize && (int(Derived::Flags)&LinearAccessBit),
41  MaySliceVectorize = MightVectorize && int(InnerMaxSize)>=3*PacketSize
42  };
43 
44 public:
45  enum {
46  Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
47  : int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
48  : int(DefaultTraversal)
49  };
50 
51 public:
52  enum {
53  Cost = ( Derived::SizeAtCompileTime == Dynamic
54  || Derived::CoeffReadCost == Dynamic
55  || (Derived::SizeAtCompileTime!=1 && functor_traits<Func>::Cost == Dynamic)
56  ) ? Dynamic
57  : Derived::SizeAtCompileTime * Derived::CoeffReadCost
58  + (Derived::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
59  UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
60  };
61 
62 public:
63  enum {
64  Unrolling = Cost != Dynamic && Cost <= UnrollingLimit
65  ? CompleteUnrolling
66  : NoUnrolling
67  };
68 
69 #ifdef EIGEN_DEBUG_ASSIGN
70  static void debug()
71  {
72  std::cerr << "Xpr: " << typeid(typename Derived::XprType).name() << std::endl;
73  std::cerr.setf(std::ios::hex, std::ios::basefield);
74  EIGEN_DEBUG_VAR(Derived::Flags)
75  std::cerr.unsetf(std::ios::hex);
76  EIGEN_DEBUG_VAR(InnerMaxSize)
77  EIGEN_DEBUG_VAR(PacketSize)
78  EIGEN_DEBUG_VAR(MightVectorize)
79  EIGEN_DEBUG_VAR(MayLinearVectorize)
80  EIGEN_DEBUG_VAR(MaySliceVectorize)
81  EIGEN_DEBUG_VAR(Traversal)
82  EIGEN_DEBUG_VAR(UnrollingLimit)
83  EIGEN_DEBUG_VAR(Unrolling)
84  std::cerr << std::endl;
85  }
86 #endif
87 };
88 
89 /***************************************************************************
90 * Part 2 : unrollers
91 ***************************************************************************/
92 
93 /*** no vectorization ***/
94 
95 template<typename Func, typename Derived, int Start, int Length>
96 struct redux_novec_unroller
97 {
98  enum {
99  HalfLength = Length/2
100  };
101 
102  typedef typename Derived::Scalar Scalar;
103 
104  EIGEN_DEVICE_FUNC
105  static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
106  {
107  return func(redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
108  redux_novec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func));
109  }
110 };
111 
112 template<typename Func, typename Derived, int Start>
113 struct redux_novec_unroller<Func, Derived, Start, 1>
114 {
115  enum {
116  outer = Start / Derived::InnerSizeAtCompileTime,
117  inner = Start % Derived::InnerSizeAtCompileTime
118  };
119 
120  typedef typename Derived::Scalar Scalar;
121 
122  EIGEN_DEVICE_FUNC
123  static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&)
124  {
125  return mat.coeffByOuterInner(outer, inner);
126  }
127 };
128 
129 // This is actually dead code and will never be called. It is required
130 // to prevent false warnings regarding failed inlining though
131 // for 0 length run() will never be called at all.
132 template<typename Func, typename Derived, int Start>
133 struct redux_novec_unroller<Func, Derived, Start, 0>
134 {
135  typedef typename Derived::Scalar Scalar;
136  EIGEN_DEVICE_FUNC
137  static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); }
138 };
139 
140 /*** vectorization ***/
141 
142 template<typename Func, typename Derived, int Start, int Length>
143 struct redux_vec_unroller
144 {
145  enum {
146  PacketSize = packet_traits<typename Derived::Scalar>::size,
147  HalfLength = Length/2
148  };
149 
150  typedef typename Derived::Scalar Scalar;
151  typedef typename packet_traits<Scalar>::type PacketScalar;
152 
153  static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func)
154  {
155  return func.packetOp(
156  redux_vec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
157  redux_vec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func) );
158  }
159 };
160 
161 template<typename Func, typename Derived, int Start>
162 struct redux_vec_unroller<Func, Derived, Start, 1>
163 {
164  enum {
165  index = Start * packet_traits<typename Derived::Scalar>::size,
166  outer = index / int(Derived::InnerSizeAtCompileTime),
167  inner = index % int(Derived::InnerSizeAtCompileTime),
168  alignment = Derived::Alignment
169  };
170 
171  typedef typename Derived::Scalar Scalar;
172  typedef typename packet_traits<Scalar>::type PacketScalar;
173 
174  static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&)
175  {
176  return mat.template packetByOuterInner<alignment,PacketScalar>(outer, inner);
177  }
178 };
179 
180 /***************************************************************************
181 * Part 3 : implementation of all cases
182 ***************************************************************************/
183 
184 template<typename Func, typename Derived,
185  int Traversal = redux_traits<Func, Derived>::Traversal,
186  int Unrolling = redux_traits<Func, Derived>::Unrolling
187 >
188 struct redux_impl;
189 
190 template<typename Func, typename Derived>
191 struct redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
192 {
193  typedef typename Derived::Scalar Scalar;
194  EIGEN_DEVICE_FUNC
195  static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
196  {
197  eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
198  Scalar res;
199  res = mat.coeffByOuterInner(0, 0);
200  for(Index i = 1; i < mat.innerSize(); ++i)
201  res = func(res, mat.coeffByOuterInner(0, i));
202  for(Index i = 1; i < mat.outerSize(); ++i)
203  for(Index j = 0; j < mat.innerSize(); ++j)
204  res = func(res, mat.coeffByOuterInner(i, j));
205  return res;
206  }
207 };
208 
209 template<typename Func, typename Derived>
210 struct redux_impl<Func,Derived, DefaultTraversal, CompleteUnrolling>
211  : public redux_novec_unroller<Func,Derived, 0, Derived::SizeAtCompileTime>
212 {};
213 
214 template<typename Func, typename Derived>
215 struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
216 {
217  typedef typename Derived::Scalar Scalar;
218  typedef typename packet_traits<Scalar>::type PacketScalar;
219 
220  static Scalar run(const Derived &mat, const Func& func)
221  {
222  const Index size = mat.size();
223 
224  const Index packetSize = packet_traits<Scalar>::size;
225  const int packetAlignment = unpacket_traits<PacketScalar>::alignment;
226  enum {
227  alignment0 = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits<Scalar>::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned),
228  alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Derived::Alignment)
229  };
230  const Index alignedStart = internal::first_default_aligned(mat.nestedExpression());
231  const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
232  const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
233  const Index alignedEnd2 = alignedStart + alignedSize2;
234  const Index alignedEnd = alignedStart + alignedSize;
235  Scalar res;
236  if(alignedSize)
237  {
238  PacketScalar packet_res0 = mat.template packet<alignment,PacketScalar>(alignedStart);
239  if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop
240  {
241  PacketScalar packet_res1 = mat.template packet<alignment,PacketScalar>(alignedStart+packetSize);
242  for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
243  {
244  packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(index));
245  packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment,PacketScalar>(index+packetSize));
246  }
247 
248  packet_res0 = func.packetOp(packet_res0,packet_res1);
249  if(alignedEnd>alignedEnd2)
250  packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment,PacketScalar>(alignedEnd2));
251  }
252  res = func.predux(packet_res0);
253 
254  for(Index index = 0; index < alignedStart; ++index)
255  res = func(res,mat.coeff(index));
256 
257  for(Index index = alignedEnd; index < size; ++index)
258  res = func(res,mat.coeff(index));
259  }
260  else // too small to vectorize anything.
261  // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
262  {
263  res = mat.coeff(0);
264  for(Index index = 1; index < size; ++index)
265  res = func(res,mat.coeff(index));
266  }
267 
268  return res;
269  }
270 };
271 
272 template<typename Func, typename Derived>
273 struct redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling>
274 {
275  typedef typename Derived::Scalar Scalar;
276  typedef typename packet_traits<Scalar>::type PacketType;
277 
278  EIGEN_DEVICE_FUNC static Scalar run(const Derived &mat, const Func& func)
279  {
280  eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
281  const Index innerSize = mat.innerSize();
282  const Index outerSize = mat.outerSize();
283  enum {
284  packetSize = packet_traits<Scalar>::size
285  };
286  const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;
287  Scalar res;
288  if(packetedInnerSize)
289  {
290  PacketType packet_res = mat.template packet<Unaligned,PacketType>(0,0);
291  for(Index j=0; j<outerSize; ++j)
292  for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize))
293  packet_res = func.packetOp(packet_res, mat.template packetByOuterInner<Unaligned,PacketType>(j,i));
294 
295  res = func.predux(packet_res);
296  for(Index j=0; j<outerSize; ++j)
297  for(Index i=packetedInnerSize; i<innerSize; ++i)
298  res = func(res, mat.coeffByOuterInner(j,i));
299  }
300  else // too small to vectorize anything.
301  // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
302  {
303  res = redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>::run(mat, func);
304  }
305 
306  return res;
307  }
308 };
309 
310 template<typename Func, typename Derived>
311 struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
312 {
313  typedef typename Derived::Scalar Scalar;
314  typedef typename packet_traits<Scalar>::type PacketScalar;
315  enum {
316  PacketSize = packet_traits<Scalar>::size,
317  Size = Derived::SizeAtCompileTime,
318  VectorizedSize = (Size / PacketSize) * PacketSize
319  };
320  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
321  {
322  eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
323  if (VectorizedSize > 0) {
324  Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
325  if (VectorizedSize != Size)
326  res = func(res,redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func));
327  return res;
328  }
329  else {
330  return redux_novec_unroller<Func, Derived, 0, Size>::run(mat,func);
331  }
332  }
333 };
334 
335 // evaluator adaptor
336 template<typename _XprType>
337 class redux_evaluator
338 {
339 public:
340  typedef _XprType XprType;
341  EIGEN_DEVICE_FUNC explicit redux_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {}
342 
343  typedef typename XprType::Scalar Scalar;
344  typedef typename XprType::CoeffReturnType CoeffReturnType;
345  typedef typename XprType::PacketScalar PacketScalar;
346  typedef typename XprType::PacketReturnType PacketReturnType;
347 
348  enum {
349  MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime,
350  MaxColsAtCompileTime = XprType::MaxColsAtCompileTime,
351  // TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime from the evaluator
352  Flags = evaluator<XprType>::Flags & ~DirectAccessBit,
353  IsRowMajor = XprType::IsRowMajor,
354  SizeAtCompileTime = XprType::SizeAtCompileTime,
355  InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime,
356  CoeffReadCost = evaluator<XprType>::CoeffReadCost,
357  Alignment = evaluator<XprType>::Alignment
358  };
359 
360  EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); }
361  EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); }
362  EIGEN_DEVICE_FUNC Index size() const { return m_xpr.size(); }
363  EIGEN_DEVICE_FUNC Index innerSize() const { return m_xpr.innerSize(); }
364  EIGEN_DEVICE_FUNC Index outerSize() const { return m_xpr.outerSize(); }
365 
366  EIGEN_DEVICE_FUNC
367  CoeffReturnType coeff(Index row, Index col) const
368  { return m_evaluator.coeff(row, col); }
369 
370  EIGEN_DEVICE_FUNC
371  CoeffReturnType coeff(Index index) const
372  { return m_evaluator.coeff(index); }
373 
374  template<int LoadMode, typename PacketType>
375  PacketReturnType packet(Index row, Index col) const
376  { return m_evaluator.template packet<LoadMode,PacketType>(row, col); }
377 
378  template<int LoadMode, typename PacketType>
379  PacketReturnType packet(Index index) const
380  { return m_evaluator.template packet<LoadMode,PacketType>(index); }
381 
382  EIGEN_DEVICE_FUNC
383  CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
384  { return m_evaluator.coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
385 
386  template<int LoadMode, typename PacketType>
387  PacketReturnType packetByOuterInner(Index outer, Index inner) const
388  { return m_evaluator.template packet<LoadMode,PacketType>(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); }
389 
390  const XprType & nestedExpression() const { return m_xpr; }
391 
392 protected:
393  internal::evaluator<XprType> m_evaluator;
394  const XprType &m_xpr;
395 };
396 
397 } // end namespace internal
398 
399 /***************************************************************************
400 * Part 4 : public API
401 ***************************************************************************/
402 
403 
411 template<typename Derived>
412 template<typename Func>
413 typename internal::traits<Derived>::Scalar
414 DenseBase<Derived>::redux(const Func& func) const
415 {
416  eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
417 
418  // FIXME, eval_nest should be handled by redux_evaluator, however:
419  // - it is currently difficult to provide the right Flags since they are still handled by the expressions
420  // - handling it here might reduce the number of template instantiations
421 // typedef typename internal::nested_eval<Derived,1>::type ThisNested;
422 // typedef typename internal::remove_all<ThisNested>::type ThisNestedCleaned;
423 // typedef typename internal::redux_evaluator<ThisNestedCleaned> ThisEvaluator;
424 //
425 // ThisNested thisNested(derived());
426 // ThisEvaluator thisEval(thisNested);
427 
428  typedef typename internal::redux_evaluator<Derived> ThisEvaluator;
429  ThisEvaluator thisEval(derived());
430 
431  return internal::redux_impl<Func, ThisEvaluator>::run(thisEval, func);
432 }
433 
437 template<typename Derived>
438 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
440 {
441  return derived().redux(Eigen::internal::scalar_min_op<Scalar>());
442 }
443 
447 template<typename Derived>
448 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
450 {
451  return derived().redux(Eigen::internal::scalar_max_op<Scalar>());
452 }
453 
458 template<typename Derived>
459 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
461 {
462  if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
463  return Scalar(0);
464  return derived().redux(Eigen::internal::scalar_sum_op<Scalar>());
465 }
466 
471 template<typename Derived>
472 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
474 {
475  return Scalar(derived().redux(Eigen::internal::scalar_sum_op<Scalar>())) / Scalar(this->size());
476 }
477 
485 template<typename Derived>
486 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
488 {
489  if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
490  return Scalar(1);
491  return derived().redux(Eigen::internal::scalar_product_op<Scalar>());
492 }
493 
500 template<typename Derived>
501 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
503 {
504  return derived().diagonal().sum();
505 }
506 
507 } // end namespace Eigen
508 
509 #endif // EIGEN_REDUX_H
Scalar prod() const
Definition: Redux.h:487
const unsigned int DirectAccessBit
Definition: Constants.h:141
Definition: Constants.h:220
Definition: LDLT.h:16
internal::traits< Derived >::Scalar maxCoeff() const
Definition: Redux.h:449
Definition: StdDeque.h:58
Base class for all dense matrices, vectors, and arrays.
Definition: DenseBase.h:41
Scalar mean() const
Definition: Redux.h:473
Scalar sum() const
Definition: Redux.h:460
internal::traits< Derived >::Scalar minCoeff() const
Definition: Redux.h:439
Definition: Eigen_Colamd.h:54
Scalar trace() const
Definition: Redux.h:502
const unsigned int ActualPacketAccessBit
Definition: Constants.h:91
const unsigned int LinearAccessBit
Definition: Constants.h:116