Eigen  3.2.91
AssignEvaluator.h
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2011 Benoit Jacob <jacob.benoit.1@gmail.com>
5 // Copyright (C) 2011-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
6 // Copyright (C) 2011-2012 Jitse Niesen <jitse@maths.leeds.ac.uk>
7 //
8 // This Source Code Form is subject to the terms of the Mozilla
9 // Public License v. 2.0. If a copy of the MPL was not distributed
10 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
11 
12 #ifndef EIGEN_ASSIGN_EVALUATOR_H
13 #define EIGEN_ASSIGN_EVALUATOR_H
14 
15 namespace Eigen {
16 
17 // This implementation is based on Assign.h
18 
19 namespace internal {
20 
21 /***************************************************************************
22 * Part 1 : the logic deciding a strategy for traversal and unrolling *
23 ***************************************************************************/
24 
25 // copy_using_evaluator_traits is based on assign_traits
26 
27 template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc>
28 struct copy_using_evaluator_traits
29 {
30  typedef typename DstEvaluator::XprType Dst;
31  typedef typename Dst::Scalar DstScalar;
32  // TODO distinguish between linear traversal and inner-traversals
33  typedef typename find_best_packet<DstScalar,Dst::SizeAtCompileTime>::type PacketType;
34 
35  enum {
36  DstFlags = DstEvaluator::Flags,
37  SrcFlags = SrcEvaluator::Flags,
38  RequiredAlignment = unpacket_traits<PacketType>::alignment
39  };
40 
41 public:
42  enum {
43  DstAlignment = DstEvaluator::Alignment,
44  SrcAlignment = SrcEvaluator::Alignment,
45  DstHasDirectAccess = DstFlags & DirectAccessBit,
46  JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment)
47  };
48 
49 private:
50  enum {
51  InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
52  : int(DstFlags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
53  : int(Dst::RowsAtCompileTime),
54  InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
55  : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
56  : int(Dst::MaxRowsAtCompileTime),
57  MaxSizeAtCompileTime = Dst::SizeAtCompileTime,
58  PacketSize = unpacket_traits<PacketType>::size
59  };
60 
61  enum {
62  DstIsRowMajor = DstFlags&RowMajorBit,
63  SrcIsRowMajor = SrcFlags&RowMajorBit,
64  StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)),
65  MightVectorize = StorageOrdersAgree
66  && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit)
67  && (functor_traits<AssignFunc>::PacketAccess),
68  MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
69  && int(JointAlignment)>=int(RequiredAlignment),
70  MayLinearize = StorageOrdersAgree && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
71  MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess
72  && ((int(DstAlignment)>=int(RequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
73  /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
74  so it's only good for large enough sizes. */
75  MaySliceVectorize = MightVectorize && DstHasDirectAccess
76  && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*PacketSize)
77  /* slice vectorization can be slow, so we only want it if the slices are big, which is
78  indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
79  in a fixed-size matrix */
80  };
81 
82 public:
83  enum {
84  Traversal = int(MayInnerVectorize) ? int(InnerVectorizedTraversal)
85  : int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
86  : int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
87  : int(MayLinearize) ? int(LinearTraversal)
88  : int(DefaultTraversal),
89  Vectorized = int(Traversal) == InnerVectorizedTraversal
90  || int(Traversal) == LinearVectorizedTraversal
91  || int(Traversal) == SliceVectorizedTraversal
92  };
93 
94 private:
95  enum {
96  UnrollingLimit = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1),
97  MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic
98  && int(SrcEvaluator::CoeffReadCost) != Dynamic
99  && int(Dst::SizeAtCompileTime) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit),
100  MayUnrollInner = int(InnerSize) != Dynamic
101  && int(SrcEvaluator::CoeffReadCost) != Dynamic
102  && int(InnerSize) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit)
103  };
104 
105 public:
106  enum {
107  Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
108  ? (
109  int(MayUnrollCompletely) ? int(CompleteUnrolling)
110  : int(MayUnrollInner) ? int(InnerUnrolling)
111  : int(NoUnrolling)
112  )
113  : int(Traversal) == int(LinearVectorizedTraversal)
114  ? ( bool(MayUnrollCompletely) && (int(DstAlignment)>=int(RequiredAlignment)) ? int(CompleteUnrolling)
115  : int(NoUnrolling) )
116  : int(Traversal) == int(LinearTraversal)
117  ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
118  : int(NoUnrolling) )
119  : int(NoUnrolling)
120  };
121 
122 #ifdef EIGEN_DEBUG_ASSIGN
123  static void debug()
124  {
125  std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl;
126  std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl;
127  std::cerr.setf(std::ios::hex, std::ios::basefield);
128  EIGEN_DEBUG_VAR(DstFlags)
129  EIGEN_DEBUG_VAR(SrcFlags)
130  std::cerr.unsetf(std::ios::hex);
131  EIGEN_DEBUG_VAR(DstAlignment)
132  EIGEN_DEBUG_VAR(SrcAlignment)
133  EIGEN_DEBUG_VAR(RequiredAlignment)
134  EIGEN_DEBUG_VAR(JointAlignment)
135  EIGEN_DEBUG_VAR(InnerSize)
136  EIGEN_DEBUG_VAR(InnerMaxSize)
137  EIGEN_DEBUG_VAR(PacketSize)
138  EIGEN_DEBUG_VAR(StorageOrdersAgree)
139  EIGEN_DEBUG_VAR(MightVectorize)
140  EIGEN_DEBUG_VAR(MayLinearize)
141  EIGEN_DEBUG_VAR(MayInnerVectorize)
142  EIGEN_DEBUG_VAR(MayLinearVectorize)
143  EIGEN_DEBUG_VAR(MaySliceVectorize)
144  EIGEN_DEBUG_VAR(Traversal)
145  EIGEN_DEBUG_VAR(UnrollingLimit)
146  EIGEN_DEBUG_VAR(MayUnrollCompletely)
147  EIGEN_DEBUG_VAR(MayUnrollInner)
148  EIGEN_DEBUG_VAR(Unrolling)
149  std::cerr << std::endl;
150  }
151 #endif
152 };
153 
154 /***************************************************************************
155 * Part 2 : meta-unrollers
156 ***************************************************************************/
157 
158 /************************
159 *** Default traversal ***
160 ************************/
161 
162 template<typename Kernel, int Index, int Stop>
163 struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
164 {
165  // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
166  typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
167  typedef typename DstEvaluatorType::XprType DstXprType;
168 
169  enum {
170  outer = Index / DstXprType::InnerSizeAtCompileTime,
171  inner = Index % DstXprType::InnerSizeAtCompileTime
172  };
173 
174  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
175  {
176  kernel.assignCoeffByOuterInner(outer, inner);
177  copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
178  }
179 };
180 
181 template<typename Kernel, int Stop>
182 struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Stop, Stop>
183 {
184  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
185 };
186 
187 template<typename Kernel, int Index_, int Stop>
188 struct copy_using_evaluator_DefaultTraversal_InnerUnrolling
189 {
190  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
191  {
192  kernel.assignCoeffByOuterInner(outer, Index_);
193  copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Index_+1, Stop>::run(kernel, outer);
194  }
195 };
196 
197 template<typename Kernel, int Stop>
198 struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop>
199 {
200  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index) { }
201 };
202 
203 /***********************
204 *** Linear traversal ***
205 ***********************/
206 
207 template<typename Kernel, int Index, int Stop>
208 struct copy_using_evaluator_LinearTraversal_CompleteUnrolling
209 {
210  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel)
211  {
212  kernel.assignCoeff(Index);
213  copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
214  }
215 };
216 
217 template<typename Kernel, int Stop>
218 struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop>
219 {
220  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
221 };
222 
223 /**************************
224 *** Inner vectorization ***
225 **************************/
226 
227 template<typename Kernel, int Index, int Stop>
228 struct copy_using_evaluator_innervec_CompleteUnrolling
229 {
230  // FIXME: this is not very clean, perhaps this information should be provided by the kernel?
231  typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
232  typedef typename DstEvaluatorType::XprType DstXprType;
233  typedef typename Kernel::PacketType PacketType;
234 
235  enum {
236  outer = Index / DstXprType::InnerSizeAtCompileTime,
237  inner = Index % DstXprType::InnerSizeAtCompileTime,
238  JointAlignment = Kernel::AssignmentTraits::JointAlignment
239  };
240 
241  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
242  {
243  kernel.template assignPacketByOuterInner<Aligned, JointAlignment, PacketType>(outer, inner);
244  enum { NextIndex = Index + unpacket_traits<PacketType>::size };
245  copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
246  }
247 };
248 
249 template<typename Kernel, int Stop>
250 struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop>
251 {
252  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
253 };
254 
255 template<typename Kernel, int Index_, int Stop>
256 struct copy_using_evaluator_innervec_InnerUnrolling
257 {
258  typedef typename Kernel::PacketType PacketType;
259  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
260  {
261  kernel.template assignPacketByOuterInner<Aligned, Aligned, PacketType>(outer, Index_);
262  enum { NextIndex = Index_ + unpacket_traits<PacketType>::size };
263  copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop>::run(kernel, outer);
264  }
265 };
266 
267 template<typename Kernel, int Stop>
268 struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop>
269 {
270  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, Index) { }
271 };
272 
273 /***************************************************************************
274 * Part 3 : implementation of all cases
275 ***************************************************************************/
276 
277 // dense_assignment_loop is based on assign_impl
278 
279 template<typename Kernel,
280  int Traversal = Kernel::AssignmentTraits::Traversal,
281  int Unrolling = Kernel::AssignmentTraits::Unrolling>
282 struct dense_assignment_loop;
283 
284 /************************
285 *** Default traversal ***
286 ************************/
287 
288 template<typename Kernel>
289 struct dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling>
290 {
291  EIGEN_DEVICE_FUNC static void run(Kernel &kernel)
292  {
293  for(Index outer = 0; outer < kernel.outerSize(); ++outer) {
294  for(Index inner = 0; inner < kernel.innerSize(); ++inner) {
295  kernel.assignCoeffByOuterInner(outer, inner);
296  }
297  }
298  }
299 };
300 
301 template<typename Kernel>
302 struct dense_assignment_loop<Kernel, DefaultTraversal, CompleteUnrolling>
303 {
304  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
305  {
306  typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
307  copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
308  }
309 };
310 
311 template<typename Kernel>
312 struct dense_assignment_loop<Kernel, DefaultTraversal, InnerUnrolling>
313 {
314  typedef typename Kernel::StorageIndex StorageIndex;
315  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
316  {
317  typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
318 
319  const Index outerSize = kernel.outerSize();
320  for(Index outer = 0; outer < outerSize; ++outer)
321  copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer);
322  }
323 };
324 
325 /***************************
326 *** Linear vectorization ***
327 ***************************/
328 
329 
330 // The goal of unaligned_dense_assignment_loop is simply to factorize the handling
331 // of the non vectorizable beginning and ending parts
332 
333 template <bool IsAligned = false>
334 struct unaligned_dense_assignment_loop
335 {
336  // if IsAligned = true, then do nothing
337  template <typename Kernel>
338  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index, Index) {}
339 };
340 
341 template <>
342 struct unaligned_dense_assignment_loop<false>
343 {
344  // MSVC must not inline this functions. If it does, it fails to optimize the
345  // packet access path.
346  // FIXME check which version exhibits this issue
347 #if EIGEN_COMP_MSVC
348  template <typename Kernel>
349  static EIGEN_DONT_INLINE void run(Kernel &kernel,
350  Index start,
351  Index end)
352 #else
353  template <typename Kernel>
354  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel,
355  Index start,
356  Index end)
357 #endif
358  {
359  for (Index index = start; index < end; ++index)
360  kernel.assignCoeff(index);
361  }
362 };
363 
364 template<typename Kernel>
365 struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>
366 {
367  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
368  {
369  const Index size = kernel.size();
370  typedef typename Kernel::Scalar Scalar;
371  typedef typename Kernel::PacketType PacketType;
372  enum {
373  requestedAlignment = Kernel::AssignmentTraits::RequiredAlignment,
374  packetSize = unpacket_traits<PacketType>::size,
375  dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
376  dstAlignment = packet_traits<Scalar>::AlignedOnScalar ? int(requestedAlignment)
377  : int(Kernel::AssignmentTraits::DstAlignment),
378  srcAlignment = Kernel::AssignmentTraits::JointAlignment
379  };
380  const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(&kernel.dstEvaluator().coeffRef(0), size);
381  const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
382 
383  unaligned_dense_assignment_loop<dstIsAligned!=0>::run(kernel, 0, alignedStart);
384 
385  for(Index index = alignedStart; index < alignedEnd; index += packetSize)
386  kernel.template assignPacket<dstAlignment, srcAlignment, PacketType>(index);
387 
388  unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size);
389  }
390 };
391 
392 template<typename Kernel>
393 struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrolling>
394 {
395  typedef typename Kernel::StorageIndex StorageIndex;
396  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
397  {
398  typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
399 
400  enum { size = DstXprType::SizeAtCompileTime,
401  packetSize = packet_traits<typename Kernel::Scalar>::size,
402  alignedSize = (size/packetSize)*packetSize };
403 
404  copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel);
405  copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, alignedSize, size>::run(kernel);
406  }
407 };
408 
409 /**************************
410 *** Inner vectorization ***
411 **************************/
412 
413 template<typename Kernel>
414 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
415 {
416  typedef typename Kernel::PacketType PacketType;
417  EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel)
418  {
419  const Index innerSize = kernel.innerSize();
420  const Index outerSize = kernel.outerSize();
421  const Index packetSize = unpacket_traits<PacketType>::size;
422  for(Index outer = 0; outer < outerSize; ++outer)
423  for(Index inner = 0; inner < innerSize; inner+=packetSize)
424  kernel.template assignPacketByOuterInner<Aligned, Aligned, PacketType>(outer, inner);
425  }
426 };
427 
428 template<typename Kernel>
429 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, CompleteUnrolling>
430 {
431  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
432  {
433  typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
434  copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
435  }
436 };
437 
438 template<typename Kernel>
439 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling>
440 {
441  typedef typename Kernel::StorageIndex StorageIndex;
442  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
443  {
444  typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
445  const Index outerSize = kernel.outerSize();
446  for(Index outer = 0; outer < outerSize; ++outer)
447  copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer);
448  }
449 };
450 
451 /***********************
452 *** Linear traversal ***
453 ***********************/
454 
455 template<typename Kernel>
456 struct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling>
457 {
458  EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel)
459  {
460  const Index size = kernel.size();
461  for(Index i = 0; i < size; ++i)
462  kernel.assignCoeff(i);
463  }
464 };
465 
466 template<typename Kernel>
467 struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling>
468 {
469  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
470  {
471  typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
472  copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
473  }
474 };
475 
476 /**************************
477 *** Slice vectorization ***
478 ***************************/
479 
480 template<typename Kernel>
481 struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
482 {
483  EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel)
484  {
485  typedef typename Kernel::Scalar Scalar;
486  typedef typename Kernel::PacketType PacketType;
487  enum {
488  packetSize = unpacket_traits<PacketType>::size,
489  requestedAlignment = int(Kernel::AssignmentTraits::RequiredAlignment),
490  alignable = packet_traits<Scalar>::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar),
491  dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
492  dstAlignment = alignable ? int(requestedAlignment)
493  : int(Kernel::AssignmentTraits::DstAlignment)
494  };
495  const Scalar *dst_ptr = &kernel.dstEvaluator().coeffRef(0,0);
496  if((!bool(dstIsAligned)) && (size_t(dst_ptr) % sizeof(Scalar))>0)
497  {
498  // the pointer is not aligend-on scalar, so alignment is not possible
499  return dense_assignment_loop<Kernel,DefaultTraversal,NoUnrolling>::run(kernel);
500  }
501  const Index packetAlignedMask = packetSize - 1;
502  const Index innerSize = kernel.innerSize();
503  const Index outerSize = kernel.outerSize();
504  const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0;
505  Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned<requestedAlignment>(dst_ptr, innerSize);
506 
507  for(Index outer = 0; outer < outerSize; ++outer)
508  {
509  const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
510  // do the non-vectorizable part of the assignment
511  for(Index inner = 0; inner<alignedStart ; ++inner)
512  kernel.assignCoeffByOuterInner(outer, inner);
513 
514  // do the vectorizable part of the assignment
515  for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize)
516  kernel.template assignPacketByOuterInner<dstAlignment, Unaligned, PacketType>(outer, inner);
517 
518  // do the non-vectorizable part of the assignment
519  for(Index inner = alignedEnd; inner<innerSize ; ++inner)
520  kernel.assignCoeffByOuterInner(outer, inner);
521 
522  alignedStart = std::min<Index>((alignedStart+alignedStep)%packetSize, innerSize);
523  }
524  }
525 };
526 
527 /***************************************************************************
528 * Part 4 : Generic dense assignment kernel
529 ***************************************************************************/
530 
531 // This class generalize the assignment of a coefficient (or packet) from one dense evaluator
532 // to another dense writable evaluator.
533 // It is parametrized by the two evaluators, and the actual assignment functor.
534 // This abstraction level permits to keep the evaluation loops as simple and as generic as possible.
535 // One can customize the assignment using this generic dense_assignment_kernel with different
536 // functors, or by completely overloading it, by-passing a functor.
537 template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized>
538 class generic_dense_assignment_kernel
539 {
540 protected:
541  typedef typename DstEvaluatorTypeT::XprType DstXprType;
542  typedef typename SrcEvaluatorTypeT::XprType SrcXprType;
543 public:
544 
545  typedef DstEvaluatorTypeT DstEvaluatorType;
546  typedef SrcEvaluatorTypeT SrcEvaluatorType;
547  typedef typename DstEvaluatorType::Scalar Scalar;
548  typedef typename DstEvaluatorType::StorageIndex StorageIndex;
549  typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor> AssignmentTraits;
550  typedef typename AssignmentTraits::PacketType PacketType;
551 
552 
553  EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
554  : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr)
555  {
556  #ifdef EIGEN_DEBUG_ASSIGN
557  AssignmentTraits::debug();
558  #endif
559  }
560 
561  EIGEN_DEVICE_FUNC Index size() const { return m_dstExpr.size(); }
562  EIGEN_DEVICE_FUNC Index innerSize() const { return m_dstExpr.innerSize(); }
563  EIGEN_DEVICE_FUNC Index outerSize() const { return m_dstExpr.outerSize(); }
564  EIGEN_DEVICE_FUNC Index rows() const { return m_dstExpr.rows(); }
565  EIGEN_DEVICE_FUNC Index cols() const { return m_dstExpr.cols(); }
566  EIGEN_DEVICE_FUNC Index outerStride() const { return m_dstExpr.outerStride(); }
567 
568  // TODO get rid of this one:
569  EIGEN_DEVICE_FUNC DstXprType& dstExpression() const { return m_dstExpr; }
570 
571  EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() { return m_dst; }
572  EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const { return m_src; }
573 
575  EIGEN_DEVICE_FUNC void assignCoeff(Index row, Index col)
576  {
577  m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col));
578  }
579 
581  EIGEN_DEVICE_FUNC void assignCoeff(Index index)
582  {
583  m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index));
584  }
585 
587  EIGEN_DEVICE_FUNC void assignCoeffByOuterInner(Index outer, Index inner)
588  {
589  Index row = rowIndexByOuterInner(outer, inner);
590  Index col = colIndexByOuterInner(outer, inner);
591  assignCoeff(row, col);
592  }
593 
594 
595  template<int StoreMode, int LoadMode, typename PacketType>
596  EIGEN_DEVICE_FUNC void assignPacket(Index row, Index col)
597  {
598  m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode,PacketType>(row,col));
599  }
600 
601  template<int StoreMode, int LoadMode, typename PacketType>
602  EIGEN_DEVICE_FUNC void assignPacket(Index index)
603  {
604  m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode,PacketType>(index));
605  }
606 
607  template<int StoreMode, int LoadMode, typename PacketType>
608  EIGEN_DEVICE_FUNC void assignPacketByOuterInner(Index outer, Index inner)
609  {
610  Index row = rowIndexByOuterInner(outer, inner);
611  Index col = colIndexByOuterInner(outer, inner);
612  assignPacket<StoreMode,LoadMode,PacketType>(row, col);
613  }
614 
615  EIGEN_DEVICE_FUNC static Index rowIndexByOuterInner(Index outer, Index inner)
616  {
617  typedef typename DstEvaluatorType::ExpressionTraits Traits;
618  return int(Traits::RowsAtCompileTime) == 1 ? 0
619  : int(Traits::ColsAtCompileTime) == 1 ? inner
620  : int(DstEvaluatorType::Flags)&RowMajorBit ? outer
621  : inner;
622  }
623 
624  EIGEN_DEVICE_FUNC static Index colIndexByOuterInner(Index outer, Index inner)
625  {
626  typedef typename DstEvaluatorType::ExpressionTraits Traits;
627  return int(Traits::ColsAtCompileTime) == 1 ? 0
628  : int(Traits::RowsAtCompileTime) == 1 ? inner
629  : int(DstEvaluatorType::Flags)&RowMajorBit ? inner
630  : outer;
631  }
632 
633 protected:
634  DstEvaluatorType& m_dst;
635  const SrcEvaluatorType& m_src;
636  const Functor &m_functor;
637  // TODO find a way to avoid the needs of the original expression
638  DstXprType& m_dstExpr;
639 };
640 
641 /***************************************************************************
642 * Part 5 : Entry point for dense rectangular assignment
643 ***************************************************************************/
644 
645 template<typename DstXprType, typename SrcXprType, typename Functor>
646 EIGEN_DEVICE_FUNC void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func)
647 {
648  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
649 
650  typedef evaluator<DstXprType> DstEvaluatorType;
651  typedef evaluator<SrcXprType> SrcEvaluatorType;
652 
653  DstEvaluatorType dstEvaluator(dst);
654  SrcEvaluatorType srcEvaluator(src);
655 
656  typedef generic_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
657  Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
658 
659  dense_assignment_loop<Kernel>::run(kernel);
660 }
661 
662 template<typename DstXprType, typename SrcXprType>
663 EIGEN_DEVICE_FUNC void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src)
664 {
665  call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar>());
666 }
667 
668 /***************************************************************************
669 * Part 6 : Generic assignment
670 ***************************************************************************/
671 
672 // Based on the respective shapes of the destination and source,
673 // the class AssignmentKind determine the kind of assignment mechanism.
674 // AssignmentKind must define a Kind typedef.
675 template<typename DstShape, typename SrcShape> struct AssignmentKind;
676 
677 // Assignement kind defined in this file:
678 struct Dense2Dense {};
679 struct EigenBase2EigenBase {};
680 
681 template<typename,typename> struct AssignmentKind { typedef EigenBase2EigenBase Kind; };
682 template<> struct AssignmentKind<DenseShape,DenseShape> { typedef Dense2Dense Kind; };
683 
684 // This is the main assignment class
685 template< typename DstXprType, typename SrcXprType, typename Functor,
686  typename Kind = typename AssignmentKind< typename evaluator_traits<DstXprType>::Shape , typename evaluator_traits<SrcXprType>::Shape >::Kind,
687  typename Scalar = typename DstXprType::Scalar>
688 struct Assignment;
689 
690 
691 // The only purpose of this call_assignment() function is to deal with noalias() / AssumeAliasing and automatic transposition.
692 // Indeed, I (Gael) think that this concept of AssumeAliasing was a mistake, and it makes thing quite complicated.
693 // So this intermediate function removes everything related to AssumeAliasing such that Assignment
694 // does not has to bother about these annoying details.
695 
696 template<typename Dst, typename Src>
697 EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src)
698 {
699  call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>());
700 }
701 template<typename Dst, typename Src>
702 EIGEN_DEVICE_FUNC void call_assignment(const Dst& dst, const Src& src)
703 {
704  call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>());
705 }
706 
707 // Deal with AssumeAliasing
708 template<typename Dst, typename Src, typename Func>
709 EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<evaluator_traits<Src>::AssumeAliasing==1, void*>::type = 0)
710 {
711  typename plain_matrix_type<Src>::type tmp(src);
712  call_assignment_no_alias(dst, tmp, func);
713 }
714 
715 template<typename Dst, typename Src, typename Func>
716 EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<evaluator_traits<Src>::AssumeAliasing==0, void*>::type = 0)
717 {
718  call_assignment_no_alias(dst, src, func);
719 }
720 
721 // by-pass AssumeAliasing
722 // FIXME the const version should probably not be needed
723 // When there is no aliasing, we require that 'dst' has been properly resized
724 template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
725 EIGEN_DEVICE_FUNC void call_assignment(const NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
726 {
727  call_assignment_no_alias(dst.expression(), src, func);
728 }
729 template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
730 EIGEN_DEVICE_FUNC void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
731 {
732  call_assignment_no_alias(dst.expression(), src, func);
733 }
734 
735 
736 template<typename Dst, typename Src, typename Func>
737 EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
738 {
739  enum {
740  NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1)
741  | // FIXME | instead of || to please GCC 4.4.0 stupid warning "suggest parentheses around &&".
742  // revert to || as soon as not needed anymore.
743  (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1))
744  && int(Dst::SizeAtCompileTime) != 1
745  };
746 
747  Index dstRows = NeedToTranspose ? src.cols() : src.rows();
748  Index dstCols = NeedToTranspose ? src.rows() : src.cols();
749  if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
750  dst.resize(dstRows, dstCols);
751 
752  typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned;
753  typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType;
754  ActualDstType actualDst(dst);
755 
756  // TODO check whether this is the right place to perform these checks:
757  EIGEN_STATIC_ASSERT_LVALUE(Dst)
758  EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src)
759 
760  // TODO this line is commented to allow matrix = permutation
761  // Actually, the "Scalar" type for a permutation matrix does not really make sense,
762  // perhaps it could be void, and EIGEN_CHECK_BINARY_COMPATIBILIY could allow micing void with anything...?
763 // EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar);
764 
765  Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func);
766 }
767 template<typename Dst, typename Src>
768 EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src)
769 {
770  call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar>());
771 }
772 
773 template<typename Dst, typename Src, typename Func>
774 EIGEN_DEVICE_FUNC void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)
775 {
776  Index dstRows = src.rows();
777  Index dstCols = src.cols();
778  if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
779  dst.resize(dstRows, dstCols);
780 
781  // TODO check whether this is the right place to perform these checks:
782  EIGEN_STATIC_ASSERT_LVALUE(Dst)
783  EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src)
784 
785  Assignment<Dst,Src,Func>::run(dst, src, func);
786 }
787 template<typename Dst, typename Src>
788 EIGEN_DEVICE_FUNC void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src)
789 {
790  call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar>());
791 }
792 
793 // forward declaration
794 template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, const Src &src);
795 
796 // Generic Dense to Dense assignment
797 template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
798 struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Scalar>
799 {
800  EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
801  {
802  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
803 
804 #ifndef EIGEN_NO_DEBUG
805  internal::check_for_aliasing(dst, src);
806 #endif
807 
808  call_dense_assignment_loop(dst, src, func);
809  }
810 };
811 
812 // Generic assignment through evalTo.
813 // TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism.
814 template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
815 struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Scalar>
816 {
817  EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/)
818  {
819  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
820  src.evalTo(dst);
821  }
822 };
823 
824 } // namespace internal
825 
826 } // end namespace Eigen
827 
828 #endif // EIGEN_ASSIGN_EVALUATOR_H
const unsigned int DirectAccessBit
Definition: Constants.h:141
Definition: LDLT.h:16
Definition: StdDeque.h:58
const unsigned int RowMajorBit
Definition: Constants.h:53
Definition: Eigen_Colamd.h:54
const unsigned int ActualPacketAccessBit
Definition: Constants.h:91
const unsigned int LinearAccessBit
Definition: Constants.h:116