10 #ifndef EIGEN_COMPLEX_NEON_H
11 #define EIGEN_COMPLEX_NEON_H
17 static uint32x4_t p4ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET4(0x00000000, 0x80000000, 0x00000000, 0x80000000);
18 static uint32x2_t p2ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x00000000, 0x80000000);
23 EIGEN_STRONG_INLINE Packet2cf() {}
24 EIGEN_STRONG_INLINE
explicit Packet2cf(
const Packet4f& a) : v(a) {}
28 template<>
struct packet_traits<
std::complex<float> > : default_packet_traits
30 typedef Packet2cf type;
31 typedef Packet2cf half;
51 template<>
struct unpacket_traits<Packet2cf> {
typedef std::complex<float> type;
enum {size=2, alignment=
Aligned16};
typedef Packet2cf half; };
53 template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(
const std::complex<float>& from)
56 r64 = vld1_f32((
float *)&from);
58 return Packet2cf(vcombine_f32(r64, r64));
61 template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(
const Packet2cf& a,
const Packet2cf& b) {
return Packet2cf(padd<Packet4f>(a.v,b.v)); }
62 template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(
const Packet2cf& a,
const Packet2cf& b) {
return Packet2cf(psub<Packet4f>(a.v,b.v)); }
63 template<> EIGEN_STRONG_INLINE Packet2cf pnegate(
const Packet2cf& a) {
return Packet2cf(pnegate<Packet4f>(a.v)); }
64 template<> EIGEN_STRONG_INLINE Packet2cf pconj(
const Packet2cf& a)
66 Packet4ui b = vreinterpretq_u32_f32(a.v);
67 return Packet2cf(vreinterpretq_f32_u32(veorq_u32(b, p4ui_CONJ_XOR)));
70 template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(
const Packet2cf& a,
const Packet2cf& b)
75 v1 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 0), vdup_lane_f32(vget_high_f32(a.v), 0));
77 v2 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 1), vdup_lane_f32(vget_high_f32(a.v), 1));
79 v1 = vmulq_f32(v1, b.v);
81 v2 = vmulq_f32(v2, b.v);
83 v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2), p4ui_CONJ_XOR));
87 return Packet2cf(vaddq_f32(v1, v2));
90 template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(
const Packet2cf& a,
const Packet2cf& b)
92 return Packet2cf(vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
94 template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(
const Packet2cf& a,
const Packet2cf& b)
96 return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
98 template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(
const Packet2cf& a,
const Packet2cf& b)
100 return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
102 template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(
const Packet2cf& a,
const Packet2cf& b)
104 return Packet2cf(vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
107 template<> EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(
const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD
return Packet2cf(pload<Packet4f>((
const float*)from)); }
108 template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(
const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD
return Packet2cf(ploadu<Packet4f>((
const float*)from)); }
110 template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(
const std::complex<float>* from) {
return pset1<Packet2cf>(*from); }
112 template<> EIGEN_STRONG_INLINE
void pstore <std::complex<float> >(std::complex<float> * to,
const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((
float*)to, from.v); }
113 template<> EIGEN_STRONG_INLINE
void pstoreu<std::complex<float> >(std::complex<float> * to,
const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((
float*)to, from.v); }
115 template<> EIGEN_DEVICE_FUNC
inline Packet2cf pgather<std::complex<float>, Packet2cf>(
const std::complex<float>* from, Index stride)
117 Packet4f res = pset1<Packet4f>(0.f);
118 res = vsetq_lane_f32(std::real(from[0*stride]), res, 0);
119 res = vsetq_lane_f32(std::imag(from[0*stride]), res, 1);
120 res = vsetq_lane_f32(std::real(from[1*stride]), res, 2);
121 res = vsetq_lane_f32(std::imag(from[1*stride]), res, 3);
122 return Packet2cf(res);
125 template<> EIGEN_DEVICE_FUNC
inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to,
const Packet2cf& from, Index stride)
127 to[stride*0] = std::complex<float>(vgetq_lane_f32(from.v, 0), vgetq_lane_f32(from.v, 1));
128 to[stride*1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3));
131 template<> EIGEN_STRONG_INLINE
void prefetch<std::complex<float> >(
const std::complex<float> * addr) { EIGEN_ARM_PREFETCH((
float *)addr); }
133 template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(
const Packet2cf& a)
135 std::complex<float> EIGEN_ALIGN16 x[2];
136 vst1q_f32((
float *)x, a.v);
140 template<> EIGEN_STRONG_INLINE Packet2cf preverse(
const Packet2cf& a)
142 float32x2_t a_lo, a_hi;
145 a_lo = vget_low_f32(a.v);
146 a_hi = vget_high_f32(a.v);
147 a_r128 = vcombine_f32(a_hi, a_lo);
149 return Packet2cf(a_r128);
152 template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(
const Packet2cf& a)
154 return Packet2cf(vrev64q_f32(a.v));
157 template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(
const Packet2cf& a)
160 std::complex<float> s;
162 a1 = vget_low_f32(a.v);
163 a2 = vget_high_f32(a.v);
164 a2 = vadd_f32(a1, a2);
165 vst1_f32((
float *)&s, a2);
170 template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(
const Packet2cf* vecs)
172 Packet4f sum1, sum2, sum;
175 sum1 = vcombine_f32(vget_low_f32(vecs[0].v), vget_low_f32(vecs[1].v));
176 sum2 = vcombine_f32(vget_high_f32(vecs[0].v), vget_high_f32(vecs[1].v));
177 sum = vaddq_f32(sum1, sum2);
179 return Packet2cf(sum);
182 template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(
const Packet2cf& a)
184 float32x2_t a1, a2, v1, v2, prod;
185 std::complex<float> s;
187 a1 = vget_low_f32(a.v);
188 a2 = vget_high_f32(a.v);
190 v1 = vdup_lane_f32(a1, 0);
192 v2 = vdup_lane_f32(a1, 1);
194 v1 = vmul_f32(v1, a2);
196 v2 = vmul_f32(v2, a2);
198 v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR));
202 prod = vadd_f32(v1, v2);
204 vst1_f32((
float *)&s, prod);
210 struct palign_impl<Offset,Packet2cf>
212 EIGEN_STRONG_INLINE
static void run(Packet2cf& first,
const Packet2cf& second)
216 first.v = vextq_f32(first.v, second.v, 2);
221 template<>
struct conj_helper<Packet2cf, Packet2cf, false,true>
223 EIGEN_STRONG_INLINE Packet2cf pmadd(
const Packet2cf& x,
const Packet2cf& y,
const Packet2cf& c)
const
224 {
return padd(pmul(x,y),c); }
226 EIGEN_STRONG_INLINE Packet2cf pmul(
const Packet2cf& a,
const Packet2cf& b)
const
228 return internal::pmul(a, pconj(b));
232 template<>
struct conj_helper<Packet2cf, Packet2cf, true,false>
234 EIGEN_STRONG_INLINE Packet2cf pmadd(
const Packet2cf& x,
const Packet2cf& y,
const Packet2cf& c)
const
235 {
return padd(pmul(x,y),c); }
237 EIGEN_STRONG_INLINE Packet2cf pmul(
const Packet2cf& a,
const Packet2cf& b)
const
239 return internal::pmul(pconj(a), b);
243 template<>
struct conj_helper<Packet2cf, Packet2cf, true,true>
245 EIGEN_STRONG_INLINE Packet2cf pmadd(
const Packet2cf& x,
const Packet2cf& y,
const Packet2cf& c)
const
246 {
return padd(pmul(x,y),c); }
248 EIGEN_STRONG_INLINE Packet2cf pmul(
const Packet2cf& a,
const Packet2cf& b)
const
250 return pconj(internal::pmul(a, b));
254 template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(
const Packet2cf& a,
const Packet2cf& b)
257 Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
261 s = vmulq_f32(b.v, b.v);
262 rev_s = vrev64q_f32(s);
264 return Packet2cf(pdiv(res.v, vaddq_f32(s,rev_s)));
267 EIGEN_DEVICE_FUNC
inline void
268 ptranspose(PacketBlock<Packet2cf,2>& kernel) {
269 Packet4f tmp = vcombine_f32(vget_high_f32(kernel.packet[0].v), vget_high_f32(kernel.packet[1].v));
270 kernel.packet[0].v = vcombine_f32(vget_low_f32(kernel.packet[0].v), vget_low_f32(kernel.packet[1].v));
271 kernel.packet[1].v = tmp;
275 #if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
277 static uint64x2_t p2ul_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x0, 0x8000000000000000);
281 EIGEN_STRONG_INLINE Packet1cd() {}
282 EIGEN_STRONG_INLINE
explicit Packet1cd(
const Packet2d& a) : v(a) {}
286 template<>
struct packet_traits<
std::complex<double> > : default_packet_traits
288 typedef Packet1cd type;
289 typedef Packet1cd half;
309 template<>
struct unpacket_traits<Packet1cd> {
typedef std::complex<double> type;
enum {size=1, alignment=
Aligned16};
typedef Packet1cd half; };
311 template<> EIGEN_STRONG_INLINE Packet1cd pload<Packet1cd>(
const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD
return Packet1cd(pload<Packet2d>((
const double*)from)); }
312 template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(
const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD
return Packet1cd(ploadu<Packet2d>((
const double*)from)); }
314 template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(
const std::complex<double>& from)
315 {
return ploadu<Packet1cd>(&from); }
317 template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(
const Packet1cd& a,
const Packet1cd& b) {
return Packet1cd(padd<Packet2d>(a.v,b.v)); }
318 template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(
const Packet1cd& a,
const Packet1cd& b) {
return Packet1cd(psub<Packet2d>(a.v,b.v)); }
319 template<> EIGEN_STRONG_INLINE Packet1cd pnegate(
const Packet1cd& a) {
return Packet1cd(pnegate<Packet2d>(a.v)); }
320 template<> EIGEN_STRONG_INLINE Packet1cd pconj(
const Packet1cd& a) {
return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v), p2ul_CONJ_XOR))); }
322 template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(
const Packet1cd& a,
const Packet1cd& b)
327 v1 = vdupq_lane_f64(vget_low_f64(a.v), 0);
329 v2 = vdupq_lane_f64(vget_high_f64(a.v), 1);
331 v1 = vmulq_f64(v1, b.v);
333 v2 = vmulq_f64(v2, b.v);
335 v2 = vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(v2), p2ul_CONJ_XOR));
337 v2 = preverse<Packet2d>(v2);
339 return Packet1cd(vaddq_f64(v1, v2));
342 template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(
const Packet1cd& a,
const Packet1cd& b)
344 return Packet1cd(vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
346 template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(
const Packet1cd& a,
const Packet1cd& b)
348 return Packet1cd(vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
350 template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(
const Packet1cd& a,
const Packet1cd& b)
352 return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
354 template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(
const Packet1cd& a,
const Packet1cd& b)
356 return Packet1cd(vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
359 template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(
const std::complex<double>* from) {
return pset1<Packet1cd>(*from); }
361 template<> EIGEN_STRONG_INLINE
void pstore <std::complex<double> >(std::complex<double> * to,
const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((
double*)to, from.v); }
362 template<> EIGEN_STRONG_INLINE
void pstoreu<std::complex<double> >(std::complex<double> * to,
const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((
double*)to, from.v); }
364 template<> EIGEN_STRONG_INLINE
void prefetch<std::complex<double> >(
const std::complex<double> * addr) { EIGEN_ARM_PREFETCH((
double *)addr); }
366 template<> EIGEN_DEVICE_FUNC
inline Packet1cd pgather<std::complex<double>, Packet1cd>(
const std::complex<double>* from, Index stride)
368 Packet2d res = pset1<Packet2d>(0.0);
369 res = vsetq_lane_f64(std::real(from[0*stride]), res, 0);
370 res = vsetq_lane_f64(std::imag(from[0*stride]), res, 1);
371 return Packet1cd(res);
374 template<> EIGEN_DEVICE_FUNC
inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to,
const Packet1cd& from, Index stride)
376 to[stride*0] = std::complex<double>(vgetq_lane_f64(from.v, 0), vgetq_lane_f64(from.v, 1));
380 template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(
const Packet1cd& a)
382 std::complex<double> EIGEN_ALIGN16 res;
383 pstore<std::complex<double> >(&res, a);
388 template<> EIGEN_STRONG_INLINE Packet1cd preverse(
const Packet1cd& a) {
return a; }
390 template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(
const Packet1cd& a) {
return pfirst(a); }
392 template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(
const Packet1cd* vecs) {
return vecs[0]; }
394 template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(
const Packet1cd& a) {
return pfirst(a); }
397 struct palign_impl<Offset,Packet1cd>
399 static EIGEN_STRONG_INLINE
void run(Packet1cd& ,
const Packet1cd& )
406 template<>
struct conj_helper<Packet1cd, Packet1cd, false,true>
408 EIGEN_STRONG_INLINE Packet1cd pmadd(
const Packet1cd& x,
const Packet1cd& y,
const Packet1cd& c)
const
409 {
return padd(pmul(x,y),c); }
411 EIGEN_STRONG_INLINE Packet1cd pmul(
const Packet1cd& a,
const Packet1cd& b)
const
413 return internal::pmul(a, pconj(b));
417 template<>
struct conj_helper<Packet1cd, Packet1cd, true,false>
419 EIGEN_STRONG_INLINE Packet1cd pmadd(
const Packet1cd& x,
const Packet1cd& y,
const Packet1cd& c)
const
420 {
return padd(pmul(x,y),c); }
422 EIGEN_STRONG_INLINE Packet1cd pmul(
const Packet1cd& a,
const Packet1cd& b)
const
424 return internal::pmul(pconj(a), b);
428 template<>
struct conj_helper<Packet1cd, Packet1cd, true,true>
430 EIGEN_STRONG_INLINE Packet1cd pmadd(
const Packet1cd& x,
const Packet1cd& y,
const Packet1cd& c)
const
431 {
return padd(pmul(x,y),c); }
433 EIGEN_STRONG_INLINE Packet1cd pmul(
const Packet1cd& a,
const Packet1cd& b)
const
435 return pconj(internal::pmul(a, b));
439 template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(
const Packet1cd& a,
const Packet1cd& b)
442 Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
443 Packet2d s = pmul<Packet2d>(b.v, b.v);
444 Packet2d rev_s = preverse<Packet2d>(s);
446 return Packet1cd(pdiv(res.v, padd<Packet2d>(s,rev_s)));
449 EIGEN_STRONG_INLINE Packet1cd pcplxflip(
const Packet1cd& x)
451 return Packet1cd(preverse(Packet2d(x.v)));
454 EIGEN_STRONG_INLINE
void ptranspose(PacketBlock<Packet1cd,2>& kernel)
456 Packet2d tmp = vcombine_f64(vget_high_f64(kernel.packet[0].v), vget_high_f64(kernel.packet[1].v));
457 kernel.packet[0].v = vcombine_f64(vget_low_f64(kernel.packet[0].v), vget_low_f64(kernel.packet[1].v));
458 kernel.packet[1].v = tmp;
460 #endif // EIGEN_ARCH_ARM64
466 #endif // EIGEN_COMPLEX_NEON_H
Definition: StdDeque.h:58
Definition: Constants.h:222
Definition: Eigen_Colamd.h:54