29 vector
float d0, d1, s,
zero = (vector float)vec_splat_u32(0);
30 for(i=0; i<len-7; i+=8) {
31 d0 = vec_ld(0, src0+i);
32 s = vec_ld(0, src1+i);
33 d1 = vec_ld(16, src0+i);
34 d0 = vec_madd(d0, s, zero);
35 d1 = vec_madd(d1, vec_ld(16,src1+i), zero);
37 vec_st(d1, 16, dst+i);
42 const float *src1,
int len)
45 vector
float d,
s0,
s1, h0, l0,
46 s2,
s3,
zero = (vector float)vec_splat_u32(0);
48 for(i=0; i<len-7; i+=8) {
49 s1 = vec_ld(0, src1-i);
50 s0 = vec_ld(0, src0+i);
51 l0 = vec_mergel(s1, s1);
52 s3 = vec_ld(-16, src1-i);
53 h0 = vec_mergeh(s1, s1);
54 s2 = vec_ld(16, src0+i);
55 s1 = vec_mergeh(vec_mergel(l0,h0),
58 l0 = vec_mergel(s3, s3);
59 d = vec_madd(s0, s1, zero);
60 h0 = vec_mergeh(s3, s3);
62 s3 = vec_mergeh(vec_mergel(l0,h0),
64 d = vec_madd(s2, s3, zero);
70 const float *src1,
const float *src2,
75 vector
unsigned char align = vec_lvsr(0,dst),
76 mask = vec_lvsl(0, dst);
78 for (i=0; i<len-3; i+=4) {
79 t0 = vec_ld(0, dst+i);
80 t1 = vec_ld(15, dst+i);
81 s0 = vec_ld(0, src0+i);
82 s1 = vec_ld(0, src1+i);
83 s2 = vec_ld(0, src2+i);
84 edges = vec_perm(t1 ,t0,
mask);
85 d = vec_madd(s0,s1,s2);
86 t1 = vec_perm(d, edges, align);
87 t0 = vec_perm(edges, d, align);
88 vec_st(t1, 15, dst+i);
96 const vector
unsigned char reverse =
vcprm(3,2,1,0);
103 zero = (vector float)vec_splat_u32(0);
105 for(i=-len*4, j=len*4-16; i<0; i+=16, j-=16) {
106 s0 = vec_ld(i, src0);
107 s1 = vec_ld(j, src1);
111 s1 = vec_perm(s1, s1, reverse);
112 wj = vec_perm(wj, wj, reverse);
114 t0 = vec_madd(s0, wj, zero);
115 t0 = vec_nmsub(s1, wi, t0);
116 t1 = vec_madd(s0, wi, zero);
117 t1 = vec_madd(s1, wj, t1);
118 t1 = vec_perm(t1, t1, reverse);