TensorMorphing.h
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
12 
13 namespace Eigen {
14 
22 namespace internal {
23 template<typename NewDimensions, typename XprType>
24 struct traits<TensorReshapingOp<NewDimensions, XprType> > : public traits<XprType>
25 {
26  typedef typename XprType::Scalar Scalar;
27  typedef traits<XprType> XprTraits;
28  typedef typename packet_traits<Scalar>::type Packet;
29  typedef typename XprTraits::StorageKind StorageKind;
30  typedef typename XprTraits::Index Index;
31  typedef typename XprType::Nested Nested;
32  typedef typename remove_reference<Nested>::type _Nested;
33  static const int NumDimensions = array_size<NewDimensions>::value;
34  static const int Layout = XprTraits::Layout;
35 };
36 
37 template<typename NewDimensions, typename XprType>
38 struct eval<TensorReshapingOp<NewDimensions, XprType>, Eigen::Dense>
39 {
40  typedef const TensorReshapingOp<NewDimensions, XprType>& type;
41 };
42 
43 template<typename NewDimensions, typename XprType>
44 struct nested<TensorReshapingOp<NewDimensions, XprType>, 1, typename eval<TensorReshapingOp<NewDimensions, XprType> >::type>
45 {
46  typedef TensorReshapingOp<NewDimensions, XprType> type;
47 };
48 
49 } // end namespace internal
50 
51 
52 
53 template<typename NewDimensions, typename XprType>
54 class TensorReshapingOp : public TensorBase<TensorReshapingOp<NewDimensions, XprType>, WriteAccessors>
55 {
56  public:
57  typedef typename Eigen::internal::traits<TensorReshapingOp>::Scalar Scalar;
58  typedef typename Eigen::internal::traits<TensorReshapingOp>::Packet Packet;
59  typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
60  typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
61  typedef typename internal::remove_const<typename XprType::PacketReturnType>::type PacketReturnType;
62  typedef typename Eigen::internal::nested<TensorReshapingOp>::type Nested;
63  typedef typename Eigen::internal::traits<TensorReshapingOp>::StorageKind StorageKind;
64  typedef typename Eigen::internal::traits<TensorReshapingOp>::Index Index;
65 
66  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReshapingOp(const XprType& expr, const NewDimensions& dims)
67  : m_xpr(expr), m_dims(dims) {}
68 
69  EIGEN_DEVICE_FUNC
70  const NewDimensions& dimensions() const { return m_dims; }
71 
72  EIGEN_DEVICE_FUNC
73  const typename internal::remove_all<typename XprType::Nested>::type&
74  expression() const { return m_xpr; }
75 
76  EIGEN_DEVICE_FUNC
77  EIGEN_STRONG_INLINE TensorReshapingOp& operator = (const TensorReshapingOp& other)
78  {
79  typedef TensorAssignOp<TensorReshapingOp, const TensorReshapingOp> Assign;
80  Assign assign(*this, other);
81  internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
82  return *this;
83  }
84 
85  template<typename OtherDerived>
86  EIGEN_DEVICE_FUNC
87  EIGEN_STRONG_INLINE TensorReshapingOp& operator = (const OtherDerived& other)
88  {
89  typedef TensorAssignOp<TensorReshapingOp, const OtherDerived> Assign;
90  Assign assign(*this, other);
91  internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
92  return *this;
93  }
94 
95  protected:
96  typename XprType::Nested m_xpr;
97  const NewDimensions m_dims;
98 };
99 
100 
101 // Eval as rvalue
102 template<typename NewDimensions, typename ArgType, typename Device>
103 struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
104 {
105  typedef TensorReshapingOp<NewDimensions, ArgType> XprType;
106  typedef NewDimensions Dimensions;
107 
108  enum {
109  IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
110  PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
111  Layout = TensorEvaluator<ArgType, Device>::Layout,
112  CoordAccess = false, // to be implemented
113  };
114 
115  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
116  : m_impl(op.expression(), device), m_dimensions(op.dimensions())
117  {
118  // The total size of the reshaped tensor must be equal to the total size
119  // of the input tensor.
120  eigen_assert(internal::array_prod(m_impl.dimensions()) == internal::array_prod(op.dimensions()));
121  }
122 
123  typedef typename XprType::Index Index;
124  typedef typename XprType::Scalar Scalar;
125  typedef typename XprType::CoeffReturnType CoeffReturnType;
126  typedef typename XprType::PacketReturnType PacketReturnType;
127 
128  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
129 
130  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
131  return m_impl.evalSubExprsIfNeeded(data);
132  }
133  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
134  m_impl.cleanup();
135  }
136 
137  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
138  {
139  return m_impl.coeff(index);
140  }
141 
142  template<int LoadMode>
143  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
144  {
145  return m_impl.template packet<LoadMode>(index);
146  }
147 
148  EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_impl.data(); }
149 
150  const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
151 
152  protected:
153  TensorEvaluator<ArgType, Device> m_impl;
154  NewDimensions m_dimensions;
155 };
156 
157 
158 // Eval as lvalue
159 template<typename NewDimensions, typename ArgType, typename Device>
160  struct TensorEvaluator<TensorReshapingOp<NewDimensions, ArgType>, Device>
161  : public TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
162 
163 {
164  typedef TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device> Base;
165  typedef TensorReshapingOp<NewDimensions, ArgType> XprType;
166  typedef NewDimensions Dimensions;
167 
168  enum {
169  IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
170  PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
171  Layout = TensorEvaluator<ArgType, Device>::Layout,
172  CoordAccess = false, // to be implemented
173  };
174 
175  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
176  : Base(op, device)
177  { }
178 
179  typedef typename XprType::Index Index;
180  typedef typename XprType::Scalar Scalar;
181  typedef typename XprType::CoeffReturnType CoeffReturnType;
182  typedef typename XprType::PacketReturnType PacketReturnType;
183 
184  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
185  {
186  return this->m_impl.coeffRef(index);
187  }
188  template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
189  void writePacket(Index index, const PacketReturnType& x)
190  {
191  this->m_impl.template writePacket<StoreMode>(index, x);
192  }
193 };
194 
195 
203 namespace internal {
204 template<typename StartIndices, typename Sizes, typename XprType>
205 struct traits<TensorSlicingOp<StartIndices, Sizes, XprType> > : public traits<XprType>
206 {
207  typedef typename XprType::Scalar Scalar;
208  typedef traits<XprType> XprTraits;
209  typedef typename packet_traits<Scalar>::type Packet;
210  typedef typename XprTraits::StorageKind StorageKind;
211  typedef typename XprTraits::Index Index;
212  typedef typename XprType::Nested Nested;
213  typedef typename remove_reference<Nested>::type _Nested;
214  static const int NumDimensions = array_size<StartIndices>::value;
215  static const int Layout = XprTraits::Layout;
216 };
217 
218 template<typename StartIndices, typename Sizes, typename XprType>
219 struct eval<TensorSlicingOp<StartIndices, Sizes, XprType>, Eigen::Dense>
220 {
221  typedef const TensorSlicingOp<StartIndices, Sizes, XprType>& type;
222 };
223 
224 template<typename StartIndices, typename Sizes, typename XprType>
225 struct nested<TensorSlicingOp<StartIndices, Sizes, XprType>, 1, typename eval<TensorSlicingOp<StartIndices, Sizes, XprType> >::type>
226 {
227  typedef TensorSlicingOp<StartIndices, Sizes, XprType> type;
228 };
229 
230 } // end namespace internal
231 
232 
233 
234 template<typename StartIndices, typename Sizes, typename XprType>
235 class TensorSlicingOp : public TensorBase<TensorSlicingOp<StartIndices, Sizes, XprType> >
236 {
237  public:
238  typedef typename Eigen::internal::traits<TensorSlicingOp>::Scalar Scalar;
239  typedef typename Eigen::internal::traits<TensorSlicingOp>::Packet Packet;
240  typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
241  typedef typename XprType::CoeffReturnType CoeffReturnType;
242  typedef typename XprType::PacketReturnType PacketReturnType;
243  typedef typename Eigen::internal::nested<TensorSlicingOp>::type Nested;
244  typedef typename Eigen::internal::traits<TensorSlicingOp>::StorageKind StorageKind;
245  typedef typename Eigen::internal::traits<TensorSlicingOp>::Index Index;
246 
247  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorSlicingOp(const XprType& expr, const StartIndices& indices, const Sizes& sizes)
248  : m_xpr(expr), m_indices(indices), m_sizes(sizes) {}
249 
250  EIGEN_DEVICE_FUNC
251  const StartIndices& startIndices() const { return m_indices; }
252  EIGEN_DEVICE_FUNC
253  const Sizes& sizes() const { return m_sizes; }
254 
255  EIGEN_DEVICE_FUNC
256  const typename internal::remove_all<typename XprType::Nested>::type&
257  expression() const { return m_xpr; }
258 
259  template<typename OtherDerived>
260  EIGEN_DEVICE_FUNC
261  EIGEN_STRONG_INLINE TensorSlicingOp& operator = (const OtherDerived& other)
262  {
263  typedef TensorAssignOp<TensorSlicingOp, const OtherDerived> Assign;
264  Assign assign(*this, other);
265  internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
266  return *this;
267  }
268 
269  EIGEN_DEVICE_FUNC
270  EIGEN_STRONG_INLINE TensorSlicingOp& operator = (const TensorSlicingOp& other)
271  {
272  typedef TensorAssignOp<TensorSlicingOp, const TensorSlicingOp> Assign;
273  Assign assign(*this, other);
274  internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
275  return *this;
276  }
277 
278 
279  protected:
280  typename XprType::Nested m_xpr;
281  const StartIndices m_indices;
282  const Sizes m_sizes;
283 };
284 
285 
286 // Fixme: figure out the exact threshold
287 namespace {
288 template <typename Index, typename Device> struct MemcpyTriggerForSlicing {
289  EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const Device& device) : threshold_(2 * device.numThreads()) { }
290  EIGEN_DEVICE_FUNC bool operator ()(Index val) const { return val > threshold_; }
291 
292  private:
293  Index threshold_;
294 };
295 
296 // It is very expensive to start the memcpy kernel on GPU: we therefore only
297 // use it for large copies.
298 #ifdef EIGEN_USE_GPU
299 template <typename Index> struct MemcpyTriggerForSlicing<Index, GpuDevice> {
300  EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const GpuDevice&) { }
301  EIGEN_DEVICE_FUNC bool operator ()(Index val) const { return val > 4*1024*1024; }
302 };
303 #endif
304 }
305 
306 // Eval as rvalue
307 template<typename StartIndices, typename Sizes, typename ArgType, typename Device>
308 struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
309 {
310  typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType;
311  static const int NumDims = internal::array_size<Sizes>::value;
312 
313  enum {
314  // Alignment can't be guaranteed at compile time since it depends on the
315  // slice offsets and sizes.
316  IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
317  PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
318  Layout = TensorEvaluator<ArgType, Device>::Layout,
319  CoordAccess = TensorEvaluator<ArgType, Device>::CoordAccess,
320  };
321 
322  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
323  : m_impl(op.expression(), device), m_device(device), m_dimensions(op.sizes()), m_offsets(op.startIndices())
324  {
325  for (std::size_t i = 0; i < internal::array_size<Dimensions>::value; ++i) {
326  eigen_assert(m_impl.dimensions()[i] >= op.sizes()[i] + op.startIndices()[i]);
327  }
328 
329  const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
330  const Sizes& output_dims = op.sizes();
331  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
332  m_inputStrides[0] = 1;
333  for (int i = 1; i < NumDims; ++i) {
334  m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
335  }
336 
337  // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed.
338  m_outputStrides[0] = 1;
339  for (int i = 1; i < NumDims; ++i) {
340  m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
341  m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
342  }
343  } else {
344  m_inputStrides[NumDims-1] = 1;
345  for (int i = NumDims - 2; i >= 0; --i) {
346  m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
347  }
348 
349  // Don't initialize m_fastOutputStrides[NumDims-1] since it won't ever be accessed.
350  m_outputStrides[NumDims-1] = 1;
351  for (int i = NumDims - 2; i >= 0; --i) {
352  m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1];
353  m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
354  }
355  }
356  }
357 
358  typedef typename XprType::Index Index;
359  typedef typename XprType::Scalar Scalar;
360  typedef typename XprType::CoeffReturnType CoeffReturnType;
361  typedef typename XprType::PacketReturnType PacketReturnType;
362  typedef Sizes Dimensions;
363 
364  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
365 
366 
367  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
368  m_impl.evalSubExprsIfNeeded(NULL);
369  if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && data && m_impl.data()) {
370  Index contiguous_values = 1;
371  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
372  for (int i = 0; i < NumDims; ++i) {
373  contiguous_values *= dimensions()[i];
374  if (dimensions()[i] != m_impl.dimensions()[i]) {
375  break;
376  }
377  }
378  } else {
379  for (int i = NumDims-1; i >= 0; --i) {
380  contiguous_values *= dimensions()[i];
381  if (dimensions()[i] != m_impl.dimensions()[i]) {
382  break;
383  }
384  }
385  }
386  // Use memcpy if it's going to be faster than using the regular evaluation.
387  const MemcpyTriggerForSlicing<Index, Device> trigger(m_device);
388  if (trigger(contiguous_values)) {
389  Scalar* src = (Scalar*)m_impl.data();
390  for (int i = 0; i < internal::array_prod(dimensions()); i += contiguous_values) {
391  Index offset = srcCoeff(i);
392  m_device.memcpy((void*)(data+i), src+offset, contiguous_values * sizeof(Scalar));
393  }
394  return false;
395  }
396  }
397  return true;
398  }
399 
400  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
401  m_impl.cleanup();
402  }
403 
404  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
405  {
406  return m_impl.coeff(srcCoeff(index));
407  }
408 
409  template<int LoadMode>
410  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
411  {
412  const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
413  EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
414  eigen_assert(index+packetSize-1 < internal::array_prod(dimensions()));
415 
416  Index inputIndices[] = {0, 0};
417  Index indices[] = {index, index + packetSize - 1};
418  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
419  for (int i = NumDims - 1; i > 0; --i) {
420  const Index idx0 = indices[0] / m_fastOutputStrides[i];
421  const Index idx1 = indices[1] / m_fastOutputStrides[i];
422  inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
423  inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
424  indices[0] -= idx0 * m_outputStrides[i];
425  indices[1] -= idx1 * m_outputStrides[i];
426  }
427  inputIndices[0] += (indices[0] + m_offsets[0]);
428  inputIndices[1] += (indices[1] + m_offsets[0]);
429  } else {
430  for (int i = 0; i < NumDims - 1; ++i) {
431  const Index idx0 = indices[0] / m_fastOutputStrides[i];
432  const Index idx1 = indices[1] / m_fastOutputStrides[i];
433  inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
434  inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
435  indices[0] -= idx0 * m_outputStrides[i];
436  indices[1] -= idx1 * m_outputStrides[i];
437  }
438  inputIndices[0] += (indices[0] + m_offsets[NumDims-1]);
439  inputIndices[1] += (indices[1] + m_offsets[NumDims-1]);
440  }
441  if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
442  PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]);
443  return rslt;
444  }
445  else {
446  typename internal::remove_const<CoeffReturnType>::type values[packetSize];
447  values[0] = m_impl.coeff(inputIndices[0]);
448  values[packetSize-1] = m_impl.coeff(inputIndices[1]);
449  for (int i = 1; i < packetSize-1; ++i) {
450  values[i] = coeff(index+i);
451  }
452  PacketReturnType rslt = internal::pload<PacketReturnType>(values);
453  return rslt;
454  }
455  }
456 
457  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<Index, NumDims>& coords)
458  {
459  array<Index, NumDims> inputCoords;
460  for (int i = 0; i < NumDims; ++i) {
461  inputCoords = coords[i] + this->m_offsets[i];
462  }
463  return m_impl.coeff(inputCoords);
464  }
465 
466  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType* data() const {
467  CoeffReturnType* result = m_impl.data();
468  if (result) {
469  Index offset = 0;
470  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
471  for (int i = 0; i < NumDims; ++i) {
472  if (m_dimensions[i] != m_impl.dimensions()[i]) {
473  offset += m_offsets[i] * m_inputStrides[i];
474  for (int j = i+1; j < NumDims; ++j) {
475  if (m_dimensions[j] > 1) {
476  return NULL;
477  }
478  offset += m_offsets[j] * m_inputStrides[j];
479  }
480  break;
481  }
482  }
483  } else {
484  for (int i = NumDims - 1; i >= 0; --i) {
485  if (m_dimensions[i] != m_impl.dimensions()[i]) {
486  offset += m_offsets[i] * m_inputStrides[i];
487  for (int j = i-1; j >= 0; --j) {
488  if (m_dimensions[j] > 1) {
489  return NULL;
490  }
491  offset += m_offsets[j] * m_inputStrides[j];
492  }
493  break;
494  }
495  }
496  }
497  return result + offset;
498  }
499  return NULL;
500  }
501 
502  protected:
503  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
504  {
505  Index inputIndex = 0;
506  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
507  for (int i = NumDims - 1; i > 0; --i) {
508  const Index idx = index / m_fastOutputStrides[i];
509  inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
510  index -= idx * m_outputStrides[i];
511  }
512  inputIndex += (index + m_offsets[0]);
513  } else {
514  for (int i = 0; i < NumDims - 1; ++i) {
515  const Index idx = index / m_fastOutputStrides[i];
516  inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
517  index -= idx * m_outputStrides[i];
518  }
519  inputIndex += (index + m_offsets[NumDims-1]);
520  }
521  return inputIndex;
522  }
523 
524  array<Index, NumDims> m_outputStrides;
525  array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
526  array<Index, NumDims> m_inputStrides;
527  TensorEvaluator<ArgType, Device> m_impl;
528  const Device& m_device;
529  Dimensions m_dimensions;
530  const StartIndices m_offsets;
531 };
532 
533 
534 // Eval as lvalue
535 template<typename StartIndices, typename Sizes, typename ArgType, typename Device>
536 struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
537  : public TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
538 {
539  typedef TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> Base;
540  typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType;
541  static const int NumDims = internal::array_size<Sizes>::value;
542 
543  enum {
544  IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
545  PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
546  Layout = TensorEvaluator<ArgType, Device>::Layout,
547  CoordAccess = TensorEvaluator<ArgType, Device>::CoordAccess,
548  };
549 
550  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
551  : Base(op, device)
552  { }
553 
554  typedef typename XprType::Index Index;
555  typedef typename XprType::Scalar Scalar;
556  typedef typename XprType::CoeffReturnType CoeffReturnType;
557  typedef typename XprType::PacketReturnType PacketReturnType;
558  typedef Sizes Dimensions;
559 
560  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
561  {
562  return this->m_impl.coeffRef(this->srcCoeff(index));
563  }
564 
565  template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
566  void writePacket(Index index, const PacketReturnType& x)
567  {
568  const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
569  Index inputIndices[] = {0, 0};
570  Index indices[] = {index, index + packetSize - 1};
571  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
572  for (int i = NumDims - 1; i > 0; --i) {
573  const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
574  const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
575  inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i];
576  inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i];
577  indices[0] -= idx0 * this->m_outputStrides[i];
578  indices[1] -= idx1 * this->m_outputStrides[i];
579  }
580  inputIndices[0] += (indices[0] + this->m_offsets[0]);
581  inputIndices[1] += (indices[1] + this->m_offsets[0]);
582  } else {
583  for (int i = 0; i < NumDims - 1; ++i) {
584  const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
585  const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
586  inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i];
587  inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i];
588  indices[0] -= idx0 * this->m_outputStrides[i];
589  indices[1] -= idx1 * this->m_outputStrides[i];
590  }
591  inputIndices[0] += (indices[0] + this->m_offsets[NumDims-1]);
592  inputIndices[1] += (indices[1] + this->m_offsets[NumDims-1]);
593  }
594  if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
595  this->m_impl.template writePacket<StoreMode>(inputIndices[0], x);
596  }
597  else {
598  EIGEN_ALIGN_MAX CoeffReturnType values[packetSize];
599  internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
600  this->m_impl.coeffRef(inputIndices[0]) = values[0];
601  this->m_impl.coeffRef(inputIndices[1]) = values[packetSize-1];
602  for (int i = 1; i < packetSize-1; ++i) {
603  this->coeffRef(index+i) = values[i];
604  }
605  }
606  }
607 
608  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(const array<Index, NumDims>& coords)
609  {
610  array<Index, NumDims> inputCoords;
611  for (int i = 0; i < NumDims; ++i) {
612  inputCoords = coords[i] + this->m_offsets[i];
613  }
614  return this->m_impl.coeffRef(inputCoords);
615  }
616 };
617 
618 
619 } // end namespace Eigen
620 
621 #endif // EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
Namespace containing all symbols from the Eigen library.
Definition: CXX11Meta.h:13