Eigen  3.2.91
AltiVec/Complex.h
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_COMPLEX32_ALTIVEC_H
11 #define EIGEN_COMPLEX32_ALTIVEC_H
12 
13 namespace Eigen {
14 
15 namespace internal {
16 
17 static Packet4ui p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_ZERO_);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
18 #ifdef _BIG_ENDIAN
19 static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_ZERO_, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
20 static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_ZERO_, 8);//{ 0x8000000000000000, 0x0000000000000000 };
21 #else
22 static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_ZERO_, 8);//{ 0x8000000000000000, 0x0000000000000000 };
23 static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2d_ZERO_, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 };
24 #endif
25 
26 //---------- float ----------
27 struct Packet2cf
28 {
29  EIGEN_STRONG_INLINE Packet2cf() {}
30  EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
31  Packet4f v;
32 };
33 
34 template<> struct packet_traits<std::complex<float> > : default_packet_traits
35 {
36  typedef Packet2cf type;
37  typedef Packet2cf half;
38  enum {
39  Vectorizable = 1,
40  AlignedOnScalar = 1,
41  size = 2,
42 
43  HasAdd = 1,
44  HasSub = 1,
45  HasMul = 1,
46  HasDiv = 1,
47  HasNegate = 1,
48  HasAbs = 0,
49  HasAbs2 = 0,
50  HasMin = 0,
51  HasMax = 0,
52  HasSetLinear = 0
53  };
54 };
55 
56 template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
57 
58 template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
59 {
60  Packet2cf res;
61  /* On AltiVec we cannot load 64-bit registers, so wa have to take care of alignment */
62  if((ptrdiff_t(&from) % 16) == 0)
63  res.v = pload<Packet4f>((const float *)&from);
64  else
65  res.v = ploadu<Packet4f>((const float *)&from);
66  res.v = vec_perm(res.v, res.v, p16uc_PSET64_HI);
67  return res;
68 }
69 
70 template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
71 {
72  std::complex<float> EIGEN_ALIGN16 af[2];
73  af[0] = from[0*stride];
74  af[1] = from[1*stride];
75  return Packet2cf(vec_ld(0, (const float*)af));
76 }
77 template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
78 {
79  std::complex<float> EIGEN_ALIGN16 af[2];
80  vec_st(from.v, 0, (float*)af);
81  to[0*stride] = af[0];
82  to[1*stride] = af[1];
83 }
84 
85 
86 template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_add(a.v,b.v)); }
87 template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_sub(a.v,b.v)); }
88 template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(a.v)); }
89 template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { return Packet2cf((Packet4f)vec_xor((Packet4ui)a.v, p4ui_CONJ_XOR)); }
90 
91 template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
92 {
93  Packet4f v1, v2;
94 
95  // Permute and multiply the real parts of a and b
96  v1 = vec_perm(a.v, a.v, p16uc_PSET32_WODD);
97  // Get the imaginary parts of a
98  v2 = vec_perm(a.v, a.v, p16uc_PSET32_WEVEN);
99  // multiply a_re * b
100  v1 = vec_madd(v1, b.v, p4f_ZERO);
101  // multiply a_im * b and get the conjugate result
102  v2 = vec_madd(v2, b.v, p4f_ZERO);
103  v2 = (Packet4f) vec_xor((Packet4ui)v2, p4ui_CONJ_XOR);
104  // permute back to a proper order
105  v2 = vec_perm(v2, v2, p16uc_COMPLEX32_REV);
106 
107  return Packet2cf(vec_add(v1, v2));
108 }
109 
110 template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_and(a.v,b.v)); }
111 template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_or(a.v,b.v)); }
112 template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_xor(a.v,b.v)); }
113 template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_and(a.v, vec_nor(b.v,b.v))); }
114 
115 template<> EIGEN_STRONG_INLINE Packet2cf pload <Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>((const float*)from)); }
116 template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from)); }
117 
118 template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from)
119 {
120  return pset1<Packet2cf>(*from);
121 }
122 
123 template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
124 template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
125 
126 template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { vec_dstt((float *)addr, DST_CTRL(2,2,32), DST_CHAN); }
127 
128 template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
129 {
130  std::complex<float> EIGEN_ALIGN16 res[2];
131  pstore((float *)&res, a.v);
132 
133  return res[0];
134 }
135 
136 template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
137 {
138  Packet4f rev_a;
139  rev_a = vec_perm(a.v, a.v, p16uc_COMPLEX32_REV2);
140  return Packet2cf(rev_a);
141 }
142 
143 template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
144 {
145  Packet4f b;
146  b = (Packet4f) vec_sld(a.v, a.v, 8);
147  b = padd(a.v, b);
148  return pfirst(Packet2cf(b));
149 }
150 
151 template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
152 {
153  Packet4f b1, b2;
154 #ifdef _BIG_ENDIAN
155  b1 = (Packet4f) vec_sld(vecs[0].v, vecs[1].v, 8);
156  b2 = (Packet4f) vec_sld(vecs[1].v, vecs[0].v, 8);
157 #else
158  b1 = (Packet4f) vec_sld(vecs[1].v, vecs[0].v, 8);
159  b2 = (Packet4f) vec_sld(vecs[0].v, vecs[1].v, 8);
160 #endif
161  b2 = (Packet4f) vec_sld(b2, b2, 8);
162  b2 = padd(b1, b2);
163 
164  return Packet2cf(b2);
165 }
166 
167 template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
168 {
169  Packet4f b;
170  Packet2cf prod;
171  b = (Packet4f) vec_sld(a.v, a.v, 8);
172  prod = pmul(a, Packet2cf(b));
173 
174  return pfirst(prod);
175 }
176 
177 template<int Offset>
178 struct palign_impl<Offset,Packet2cf>
179 {
180  static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second)
181  {
182  if (Offset==1)
183  {
184 #ifdef _BIG_ENDIAN
185  first.v = vec_sld(first.v, second.v, 8);
186 #else
187  first.v = vec_sld(second.v, first.v, 8);
188 #endif
189  }
190  }
191 };
192 
193 template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
194 {
195  EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
196  { return padd(pmul(x,y),c); }
197 
198  EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
199  {
200  return internal::pmul(a, pconj(b));
201  }
202 };
203 
204 template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
205 {
206  EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
207  { return padd(pmul(x,y),c); }
208 
209  EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
210  {
211  return internal::pmul(pconj(a), b);
212  }
213 };
214 
215 template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
216 {
217  EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
218  { return padd(pmul(x,y),c); }
219 
220  EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
221  {
222  return pconj(internal::pmul(a, b));
223  }
224 };
225 
226 template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
227 {
228  // TODO optimize it for AltiVec
229  Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
230  Packet4f s = vec_madd(b.v, b.v, p4f_ZERO);
231  return Packet2cf(pdiv(res.v, vec_add(s,vec_perm(s, s, p16uc_COMPLEX32_REV))));
232 }
233 
234 template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& x)
235 {
236  return Packet2cf(vec_perm(x.v, x.v, p16uc_COMPLEX32_REV));
237 }
238 
239 EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel)
240 {
241  Packet4f tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);
242  kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);
243  kernel.packet[0].v = tmp;
244 }
245 
246 //---------- double ----------
247 #ifdef __VSX__
248 struct Packet1cd
249 {
250  EIGEN_STRONG_INLINE Packet1cd() {}
251  EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {}
252  Packet2d v;
253 };
254 
255 template<> struct packet_traits<std::complex<double> > : default_packet_traits
256 {
257  typedef Packet1cd type;
258  typedef Packet1cd half;
259  enum {
260  Vectorizable = 1,
261  AlignedOnScalar = 0,
262  size = 1,
263  HasHalfPacket = 0,
264 
265  HasAdd = 1,
266  HasSub = 1,
267  HasMul = 1,
268  HasDiv = 1,
269  HasNegate = 1,
270  HasAbs = 0,
271  HasAbs2 = 0,
272  HasMin = 0,
273  HasMax = 0,
274  HasSetLinear = 0
275  };
276 };
277 
278 template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
279 
280 template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
281 template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
282 template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
283 template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
284 
285 template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
286 { /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
287 
288 template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride)
289 {
290  std::complex<double> EIGEN_ALIGN16 af[2];
291  af[0] = from[0*stride];
292  af[1] = from[1*stride];
293  return pload<Packet1cd>(af);
294 }
295 template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to, const Packet1cd& from, Index stride)
296 {
297  std::complex<double> EIGEN_ALIGN16 af[2];
298  pstore<std::complex<double> >(af, from);
299  to[0*stride] = af[0];
300  to[1*stride] = af[1];
301 }
302 
303 template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_add(a.v,b.v)); }
304 template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_sub(a.v,b.v)); }
305 template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }
306 template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd((Packet2d)vec_xor((Packet2d)a.v, (Packet2d)p2ul_CONJ_XOR2)); }
307 
308 template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
309 {
310  Packet2d a_re, a_im, v1, v2;
311 
312  // Permute and multiply the real parts of a and b
313  a_re = vec_perm(a.v, a.v, p16uc_PSET64_HI);
314  // Get the imaginary parts of a
315  a_im = vec_perm(a.v, a.v, p16uc_PSET64_LO);
316  // multiply a_re * b
317  v1 = vec_madd(a_re, b.v, p2d_ZERO);
318  // multiply a_im * b and get the conjugate result
319  v2 = vec_madd(a_im, b.v, p2d_ZERO);
320  v2 = (Packet2d) vec_sld((Packet4ui)v2, (Packet4ui)v2, 8);
321  v2 = (Packet2d) vec_xor((Packet2d)v2, (Packet2d) p2ul_CONJ_XOR1);
322 
323  return Packet1cd(vec_add(v1, v2));
324 }
325 
326 template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v,b.v)); }
327 template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_or(a.v,b.v)); }
328 template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_xor(a.v,b.v)); }
329 template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v, vec_nor(b.v,b.v))); }
330 
331 template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from)
332 {
333  return pset1<Packet1cd>(*from);
334 }
335 
336 template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { vec_dstt((long *)addr, DST_CTRL(2,2,32), DST_CHAN); }
337 
338 template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
339 {
340  std::complex<double> EIGEN_ALIGN16 res[2];
341  pstore<std::complex<double> >(res, a);
342 
343  return res[0];
344 }
345 
346 template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
347 
348 template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a)
349 {
350  return pfirst(a);
351 }
352 
353 template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(const Packet1cd* vecs)
354 {
355  return vecs[0];
356 }
357 
358 template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a)
359 {
360  return pfirst(a);
361 }
362 
363 template<int Offset>
364 struct palign_impl<Offset,Packet1cd>
365 {
366  static EIGEN_STRONG_INLINE void run(Packet1cd& /*first*/, const Packet1cd& /*second*/)
367  {
368  // FIXME is it sure we never have to align a Packet1cd?
369  // Even though a std::complex<double> has 16 bytes, it is not necessarily aligned on a 16 bytes boundary...
370  }
371 };
372 
373 template<> struct conj_helper<Packet1cd, Packet1cd, false,true>
374 {
375  EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
376  { return padd(pmul(x,y),c); }
377 
378  EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
379  {
380  return internal::pmul(a, pconj(b));
381  }
382 };
383 
384 template<> struct conj_helper<Packet1cd, Packet1cd, true,false>
385 {
386  EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
387  { return padd(pmul(x,y),c); }
388 
389  EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
390  {
391  return internal::pmul(pconj(a), b);
392  }
393 };
394 
395 template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
396 {
397  EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const
398  { return padd(pmul(x,y),c); }
399 
400  EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const
401  {
402  return pconj(internal::pmul(a, b));
403  }
404 };
405 
406 template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
407 {
408  // TODO optimize it for AltiVec
409  Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
410  Packet2d s = vec_madd(b.v, b.v, p2d_ZERO_);
411  return Packet1cd(pdiv(res.v, vec_add(s,vec_perm(s, s, p16uc_REVERSE64))));
412 }
413 
414 EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
415 {
416  return Packet1cd(preverse(Packet2d(x.v)));
417 }
418 
419 EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
420 {
421  Packet2d tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_HI);
422  kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_TRANSPOSE64_LO);
423  kernel.packet[0].v = tmp;
424 }
425 #endif // __VSX__
426 } // end namespace internal
427 
428 } // end namespace Eigen
429 
430 #endif // EIGEN_COMPLEX32_ALTIVEC_H
Definition: LDLT.h:16
Definition: StdDeque.h:58
Definition: Constants.h:222
Definition: Eigen_Colamd.h:54