100 #undef PROFILE_THE_BEAST
144 const vector
unsigned char
146 0x12,0x06,0x07,0x13,0x08,0x09,0x14,0x0a},
148 0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f},
150 0x00,0x01,0x18,0x02,0x03,0x19,0x04,0x05},
152 0x0b,0x1d,0x0c,0x0d,0x1e,0x0e,0x0f,0x1f};
154 #define vec_merge3(x2,x1,x0,y0,y1,y2) \
156 __typeof__(x0) o0,o2,o3; \
157 o0 = vec_mergeh (x0,x1); \
158 y0 = vec_perm (o0, x2, perm_rgb_0); \
159 o2 = vec_perm (o0, x2, perm_rgb_1); \
160 o3 = vec_mergel (x0,x1); \
161 y1 = vec_perm (o3,o2,perm_rgb_2); \
162 y2 = vec_perm (o3,o2,perm_rgb_3); \
165 #define vec_mstbgr24(x0,x1,x2,ptr) \
167 __typeof__(x0) _0,_1,_2; \
168 vec_merge3 (x0,x1,x2,_0,_1,_2); \
169 vec_st (_0, 0, ptr++); \
170 vec_st (_1, 0, ptr++); \
171 vec_st (_2, 0, ptr++); \
174 #define vec_mstrgb24(x0,x1,x2,ptr) \
176 __typeof__(x0) _0,_1,_2; \
177 vec_merge3 (x2,x1,x0,_0,_1,_2); \
178 vec_st (_0, 0, ptr++); \
179 vec_st (_1, 0, ptr++); \
180 vec_st (_2, 0, ptr++); \
187 #define vec_mstrgb32(T,x0,x1,x2,x3,ptr) \
190 _0 = vec_mergeh (x0,x1); \
191 _1 = vec_mergeh (x2,x3); \
192 _2 = (T)vec_mergeh ((vector unsigned short)_0,(vector unsigned short)_1); \
193 _3 = (T)vec_mergel ((vector unsigned short)_0,(vector unsigned short)_1); \
194 vec_st (_2, 0*16, (T *)ptr); \
195 vec_st (_3, 1*16, (T *)ptr); \
196 _0 = vec_mergel (x0,x1); \
197 _1 = vec_mergel (x2,x3); \
198 _2 = (T)vec_mergeh ((vector unsigned short)_0,(vector unsigned short)_1); \
199 _3 = (T)vec_mergel ((vector unsigned short)_0,(vector unsigned short)_1); \
200 vec_st (_2, 2*16, (T *)ptr); \
201 vec_st (_3, 3*16, (T *)ptr); \
223 (vector signed short) \
224 vec_perm(x,(__typeof__(x)){0}, \
225 ((vector unsigned char){0x10,0x00,0x10,0x01,0x10,0x02,0x10,0x03,\
226 0x10,0x04,0x10,0x05,0x10,0x06,0x10,0x07}))
228 (vector signed short) \
229 vec_perm(x,(__typeof__(x)){0}, \
230 ((vector unsigned char){0x10,0x08,0x10,0x09,0x10,0x0A,0x10,0x0B,\
231 0x10,0x0C,0x10,0x0D,0x10,0x0E,0x10,0x0F}))
233 #define vec_clip_s16(x) \
234 vec_max (vec_min (x, ((vector signed short){235,235,235,235,235,235,235,235})), \
235 ((vector signed short){ 16, 16, 16, 16, 16, 16, 16, 16}))
237 #define vec_packclp(x,y) \
238 (vector unsigned char)vec_packs \
239 ((vector unsigned short)vec_max (x,((vector signed short) {0})), \
240 (vector unsigned short)vec_max (y,((vector signed short) {0})))
246 vector
signed short Y, vector
signed short U, vector
signed short V,
247 vector
signed short *
R, vector
signed short *
G, vector
signed short *
B)
249 vector
signed short vx,ux,uvx;
251 Y = vec_mradds (Y, c->CY, c->OY);
252 U = vec_sub (U,(vector
signed short)
253 vec_splat((vector
signed short){128},0));
254 V = vec_sub (V,(vector
signed short)
255 vec_splat((vector
signed short){128},0));
258 ux = vec_sl (U, c->CSHIFT);
259 *B = vec_mradds (ux, c->CBU, Y);
262 vx = vec_sl (V, c->CSHIFT);
263 *R = vec_mradds (vx, c->CRV, Y);
266 uvx = vec_mradds (U, c->CGU, Y);
267 *G = vec_mradds (V, c->CGV, uvx);
278 #define DEFCSP420_CVT(name,out_pixels) \
279 static int altivec_##name (SwsContext *c, \
280 const unsigned char **in, int *instrides, \
281 int srcSliceY, int srcSliceH, \
282 unsigned char **oplanes, int *outstrides) \
287 int instrides_scl[3]; \
288 vector unsigned char y0,y1; \
290 vector signed char u,v; \
292 vector signed short Y0,Y1,Y2,Y3; \
293 vector signed short U,V; \
294 vector signed short vx,ux,uvx; \
295 vector signed short vx0,ux0,uvx0; \
296 vector signed short vx1,ux1,uvx1; \
297 vector signed short R0,G0,B0; \
298 vector signed short R1,G1,B1; \
299 vector unsigned char R,G,B; \
301 vector unsigned char *y1ivP, *y2ivP, *uivP, *vivP; \
302 vector unsigned char align_perm; \
304 vector signed short \
312 vector unsigned short lCSHIFT = c->CSHIFT; \
314 const ubyte *y1i = in[0]; \
315 const ubyte *y2i = in[0]+instrides[0]; \
316 const ubyte *ui = in[1]; \
317 const ubyte *vi = in[2]; \
319 vector unsigned char *oute \
320 = (vector unsigned char *) \
321 (oplanes[0]+srcSliceY*outstrides[0]); \
322 vector unsigned char *outo \
323 = (vector unsigned char *) \
324 (oplanes[0]+srcSliceY*outstrides[0]+outstrides[0]); \
327 instrides_scl[0] = instrides[0]*2-w; \
328 instrides_scl[1] = instrides[1]-w/2; \
329 instrides_scl[2] = instrides[2]-w/2; \
332 for (i=0;i<h/2;i++) { \
333 vec_dstst (outo, (0x02000002|(((w*3+32)/32)<<16)), 0); \
334 vec_dstst (oute, (0x02000002|(((w*3+32)/32)<<16)), 1); \
336 for (j=0;j<w/16;j++) { \
338 y1ivP = (vector unsigned char *)y1i; \
339 y2ivP = (vector unsigned char *)y2i; \
340 uivP = (vector unsigned char *)ui; \
341 vivP = (vector unsigned char *)vi; \
343 align_perm = vec_lvsl (0, y1i); \
344 y0 = (vector unsigned char) \
345 vec_perm (y1ivP[0], y1ivP[1], align_perm); \
347 align_perm = vec_lvsl (0, y2i); \
348 y1 = (vector unsigned char) \
349 vec_perm (y2ivP[0], y2ivP[1], align_perm); \
351 align_perm = vec_lvsl (0, ui); \
352 u = (vector signed char) \
353 vec_perm (uivP[0], uivP[1], align_perm); \
355 align_perm = vec_lvsl (0, vi); \
356 v = (vector signed char) \
357 vec_perm (vivP[0], vivP[1], align_perm); \
359 u = (vector signed char) \
360 vec_sub (u,(vector signed char) \
361 vec_splat((vector signed char){128},0)); \
362 v = (vector signed char) \
363 vec_sub (v,(vector signed char) \
364 vec_splat((vector signed char){128},0)); \
366 U = vec_unpackh (u); \
367 V = vec_unpackh (v); \
375 Y0 = vec_mradds (Y0, lCY, lOY); \
376 Y1 = vec_mradds (Y1, lCY, lOY); \
377 Y2 = vec_mradds (Y2, lCY, lOY); \
378 Y3 = vec_mradds (Y3, lCY, lOY); \
381 ux = vec_sl (U, lCSHIFT); \
382 ux = vec_mradds (ux, lCBU, (vector signed short){0}); \
383 ux0 = vec_mergeh (ux,ux); \
384 ux1 = vec_mergel (ux,ux); \
387 vx = vec_sl (V, lCSHIFT); \
388 vx = vec_mradds (vx, lCRV, (vector signed short){0}); \
389 vx0 = vec_mergeh (vx,vx); \
390 vx1 = vec_mergel (vx,vx); \
393 uvx = vec_mradds (U, lCGU, (vector signed short){0}); \
394 uvx = vec_mradds (V, lCGV, uvx); \
395 uvx0 = vec_mergeh (uvx,uvx); \
396 uvx1 = vec_mergel (uvx,uvx); \
398 R0 = vec_add (Y0,vx0); \
399 G0 = vec_add (Y0,uvx0); \
400 B0 = vec_add (Y0,ux0); \
401 R1 = vec_add (Y1,vx1); \
402 G1 = vec_add (Y1,uvx1); \
403 B1 = vec_add (Y1,ux1); \
405 R = vec_packclp (R0,R1); \
406 G = vec_packclp (G0,G1); \
407 B = vec_packclp (B0,B1); \
409 out_pixels(R,G,B,oute); \
411 R0 = vec_add (Y2,vx0); \
412 G0 = vec_add (Y2,uvx0); \
413 B0 = vec_add (Y2,ux0); \
414 R1 = vec_add (Y3,vx1); \
415 G1 = vec_add (Y3,uvx1); \
416 B1 = vec_add (Y3,ux1); \
417 R = vec_packclp (R0,R1); \
418 G = vec_packclp (G0,G1); \
419 B = vec_packclp (B0,B1); \
422 out_pixels(R,G,B,outo); \
431 outo += (outstrides[0])>>4; \
432 oute += (outstrides[0])>>4; \
434 ui += instrides_scl[1]; \
435 vi += instrides_scl[2]; \
436 y1i += instrides_scl[0]; \
437 y2i += instrides_scl[0]; \
443 #define out_abgr(a,b,c,ptr) vec_mstrgb32(__typeof__(a),((__typeof__ (a)){255}),c,b,a,ptr)
444 #define out_bgra(a,b,c,ptr) vec_mstrgb32(__typeof__(a),c,b,a,((__typeof__ (a)){255}),ptr)
445 #define out_rgba(a,b,c,ptr) vec_mstrgb32(__typeof__(a),a,b,c,((__typeof__ (a)){255}),ptr)
446 #define out_argb(a,b,c,ptr) vec_mstrgb32(__typeof__(a),((__typeof__ (a)){255}),a,b,c,ptr)
447 #define out_rgb24(a,b,c,ptr) vec_mstrgb24(a,b,c,ptr)
448 #define out_bgr24(a,b,c,ptr) vec_mstbgr24(a,b,c,ptr)
461 const vector
unsigned char
465 0x10,0x0c,0x10,0x0c},
469 0x10,0x0E,0x10,0x0E},
473 0x10,0x0D,0x10,0x0F};
479 const unsigned char **in,
int *instrides,
480 int srcSliceY,
int srcSliceH,
481 unsigned char **oplanes,
int *outstrides)
486 vector
unsigned char uyvy;
487 vector
signed short Y,
U,
V;
488 vector
signed short R0,G0,
B0,
R1,G1,
B1;
489 vector
unsigned char R,
G,
B;
490 vector
unsigned char *out;
494 out = (vector
unsigned char *)(oplanes[0]+srcSliceY*outstrides[0]);
497 for (j=0;j<w/16;j++) {
498 uyvy = vec_ld (0, img);
499 U = (vector
signed short)
500 vec_perm (uyvy, (vector
unsigned char){0},
demux_u);
502 V = (vector
signed short)
503 vec_perm (uyvy, (vector
unsigned char){0},
demux_v);
505 Y = (vector
signed short)
506 vec_perm (uyvy, (vector
unsigned char){0},
demux_y);
510 uyvy = vec_ld (16, img);
511 U = (vector
signed short)
512 vec_perm (uyvy, (vector
unsigned char){0},
demux_u);
514 V = (vector
signed short)
515 vec_perm (uyvy, (vector
unsigned char){0},
demux_v);
517 Y = (vector
signed short)
518 vec_perm (uyvy, (vector
unsigned char){0},
demux_y);
555 if ((c->
srcW & 0xf) != 0)
return NULL;
564 if ((c->
srcH & 0x1) != 0)
570 return altivec_yuv2_rgb24;
573 return altivec_yuv2_bgr24;
576 return altivec_yuv2_argb;
579 return altivec_yuv2_abgr;
582 return altivec_yuv2_rgba;
585 return altivec_yuv2_bgra;
586 default:
return NULL;
595 default:
return NULL;
607 vector
signed short vec;
610 buf.tmp[0] = ((0xffffLL) * contrast>>8)>>9;
611 buf.tmp[1] = -256*brightness;
612 buf.tmp[2] = (inv_table[0]>>3) *(contrast>>16)*(saturation>>16);
613 buf.tmp[3] = (inv_table[1]>>3) *(contrast>>16)*(saturation>>16);
614 buf.tmp[4] = -((inv_table[2]>>1)*(contrast>>16)*(saturation>>16));
615 buf.tmp[5] = -((inv_table[3]>>1)*(contrast>>16)*(saturation>>16));
618 c->CSHIFT = (vector
unsigned short)vec_splat_u16(2);
619 c->CY = vec_splat ((vector
signed short)buf.vec, 0);
620 c->OY = vec_splat ((vector
signed short)buf.vec, 1);
621 c->CRV = vec_splat ((vector
signed short)buf.vec, 2);
622 c->CBU = vec_splat ((vector
signed short)buf.vec, 3);
623 c->CGU = vec_splat ((vector
signed short)buf.vec, 4);
624 c->CGV = vec_splat ((vector
signed short)buf.vec, 5);
631 const int16_t **lumSrc,
int lumFilterSize,
632 const int16_t *chrFilter,
const int16_t **chrUSrc,
633 const int16_t **chrVSrc,
int chrFilterSize,
634 const int16_t **alpSrc, uint8_t *dest,
638 vector
signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,
U,
V;
639 vector
signed short R0,G0,
B0,
R1,G1,
B1;
641 vector
unsigned char R,
G,
B;
642 vector
unsigned char *out,*nout;
644 vector
signed short RND = vec_splat_s16(1<<3);
645 vector
unsigned short SCL = vec_splat_u16(4);
648 vector
signed short *YCoeffs, *CCoeffs;
650 YCoeffs = c->vYCoeffsBank+dstY*lumFilterSize;
651 CCoeffs = c->vCCoeffsBank+dstY*chrFilterSize;
653 out = (vector
unsigned char *)dest;
655 for (i=0; i<dstW; i+=16) {
659 for (j=0; j<lumFilterSize; j++) {
660 X0 = vec_ld (0, &lumSrc[j][i]);
661 X1 = vec_ld (16, &lumSrc[j][i]);
662 Y0 = vec_mradds (X0, YCoeffs[j], Y0);
663 Y1 = vec_mradds (X1, YCoeffs[j], Y1);
669 for (j=0; j<chrFilterSize; j++) {
670 X = vec_ld (0, &chrUSrc[j][i/2]);
671 U = vec_mradds (X, CCoeffs[j], U);
672 X = vec_ld (0, &chrVSrc[j][i/2]);
673 V = vec_mradds (X, CCoeffs[j], V);
677 Y0 = vec_sra (Y0, SCL);
678 Y1 = vec_sra (Y1, SCL);
679 U = vec_sra (U, SCL);
680 V = vec_sra (V, SCL);
696 U0 = vec_mergeh (U,U);
697 V0 = vec_mergeh (V,V);
699 U1 = vec_mergel (U,U);
700 V1 = vec_mergel (V,V);
720 static int printed_error_message;
721 if (!printed_error_message) {
724 printed_error_message=1;
737 for (j=0; j<lumFilterSize; j++) {
738 X0 = vec_ld (0, &lumSrc[j][i]);
739 X1 = vec_ld (16, &lumSrc[j][i]);
740 Y0 = vec_mradds (X0, YCoeffs[j], Y0);
741 Y1 = vec_mradds (X1, YCoeffs[j], Y1);
747 for (j=0; j<chrFilterSize; j++) {
748 X = vec_ld (0, &chrUSrc[j][i/2]);
749 U = vec_mradds (X, CCoeffs[j], U);
750 X = vec_ld (0, &chrVSrc[j][i/2]);
751 V = vec_mradds (X, CCoeffs[j], V);
755 Y0 = vec_sra (Y0, SCL);
756 Y1 = vec_sra (Y1, SCL);
757 U = vec_sra (U, SCL);
758 V = vec_sra (V, SCL);
774 U0 = vec_mergeh (U,U);
775 V0 = vec_mergeh (V,V);
777 U1 = vec_mergel (U,U);
778 V1 = vec_mergel (V,V);
787 nout = (vector
unsigned char *)scratch;
802 memcpy (&((uint32_t*)dest)[i], scratch, (dstW-i)/4);
807 #define YUV2PACKEDX_WRAPPER(suffix, pixfmt) \
808 void ff_yuv2 ## suffix ## _X_altivec(SwsContext *c, const int16_t *lumFilter, \
809 const int16_t **lumSrc, int lumFilterSize, \
810 const int16_t *chrFilter, const int16_t **chrUSrc, \
811 const int16_t **chrVSrc, int chrFilterSize, \
812 const int16_t **alpSrc, uint8_t *dest, \
813 int dstW, int dstY) \
815 ff_yuv2packedX_altivec(c, lumFilter, lumSrc, lumFilterSize, \
816 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
817 alpSrc, dest, dstW, dstY, pixfmt); \