58 #define pb_7f (~0UL/255 * 0x7f)
59 #define pb_80 (~0UL/255 * 0x80)
62 0, 1, 8, 16, 9, 2, 3, 10,
63 17, 24, 32, 25, 18, 11, 4, 5,
64 12, 19, 26, 33, 40, 48, 41, 34,
65 27, 20, 13, 6, 7, 14, 21, 28,
66 35, 42, 49, 56, 57, 50, 43, 36,
67 29, 22, 15, 23, 30, 37, 44, 51,
68 58, 59, 52, 45, 38, 31, 39, 46,
69 53, 60, 61, 54, 47, 55, 62, 63
75 0, 8, 1, 9, 16, 24, 2, 10,
76 17, 25, 32, 40, 48, 56, 33, 41,
77 18, 26, 3, 11, 4, 12, 19, 27,
78 34, 42, 49, 57, 50, 58, 35, 43,
79 20, 28, 5, 13, 6, 14, 21, 29,
80 36, 44, 51, 59, 52, 60, 37, 45,
81 22, 30, 7, 15, 23, 31, 38, 46,
82 53, 61, 54, 62, 39, 47, 55, 63,
89 0, 1, 2, 3, 8, 9, 16, 17,
90 10, 11, 4, 5, 6, 7, 15, 14,
91 13, 12, 19, 18, 24, 25, 32, 33,
92 26, 27, 20, 21, 22, 23, 28, 29,
93 30, 31, 34, 35, 40, 41, 48, 49,
94 42, 43, 36, 37, 38, 39, 44, 45,
95 46, 47, 50, 51, 56, 57, 58, 59,
96 52, 53, 54, 55, 60, 61, 62, 63,
100 0, 8, 16, 24, 1, 9, 2, 10,
101 17, 25, 32, 40, 48, 56, 57, 49,
102 41, 33, 26, 18, 3, 11, 4, 12,
103 19, 27, 34, 42, 50, 58, 35, 43,
104 51, 59, 20, 28, 5, 13, 6, 14,
105 21, 29, 36, 44, 52, 60, 37, 45,
106 53, 61, 22, 30, 7, 15, 23, 31,
107 38, 46, 54, 62, 39, 47, 55, 63,
112 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
113 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
114 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
115 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
116 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
117 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
118 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
119 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
132 j = src_scantable[i];
149 int idct_permutation_type)
153 switch(idct_permutation_type){
156 idct_permutation[i]= i;
160 idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
168 idct_permutation[i]= ((i&7)<<3) | (i>>3);
172 idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
188 for (i = 0; i < 16; i++) {
189 for (j = 0; j < 16; j += 8) {
200 pix += line_size - 16;
211 for (i = 0; i < 16; i++) {
212 for (j = 0; j < 16; j += 8) {
224 register uint64_t x=*(uint64_t*)pix;
226 s += sq[(x>>8)&0xff];
227 s += sq[(x>>16)&0xff];
228 s += sq[(x>>24)&0xff];
229 s += sq[(x>>32)&0xff];
230 s += sq[(x>>40)&0xff];
231 s += sq[(x>>48)&0xff];
232 s += sq[(x>>56)&0xff];
234 register uint32_t x=*(uint32_t*)pix;
236 s += sq[(x>>8)&0xff];
237 s += sq[(x>>16)&0xff];
238 s += sq[(x>>24)&0xff];
239 x=*(uint32_t*)(pix+4);
241 s += sq[(x>>8)&0xff];
242 s += sq[(x>>16)&0xff];
243 s += sq[(x>>24)&0xff];
248 pix += line_size - 16;
253 static void bswap_buf(uint32_t *dst,
const uint32_t *src,
int w){
256 for(i=0; i+8<=w; i+=8){
277 static int sse4_c(
void *
v, uint8_t * pix1, uint8_t * pix2,
int line_size,
int h)
283 for (i = 0; i < h; i++) {
284 s += sq[pix1[0] - pix2[0]];
285 s += sq[pix1[1] - pix2[1]];
286 s += sq[pix1[2] - pix2[2]];
287 s += sq[pix1[3] - pix2[3]];
294 static int sse8_c(
void *
v, uint8_t * pix1, uint8_t * pix2,
int line_size,
int h)
300 for (i = 0; i < h; i++) {
301 s += sq[pix1[0] - pix2[0]];
302 s += sq[pix1[1] - pix2[1]];
303 s += sq[pix1[2] - pix2[2]];
304 s += sq[pix1[3] - pix2[3]];
305 s += sq[pix1[4] - pix2[4]];
306 s += sq[pix1[5] - pix2[5]];
307 s += sq[pix1[6] - pix2[6]];
308 s += sq[pix1[7] - pix2[7]];
315 static int sse16_c(
void *
v, uint8_t *pix1, uint8_t *pix2,
int line_size,
int h)
321 for (i = 0; i < h; i++) {
322 s += sq[pix1[ 0] - pix2[ 0]];
323 s += sq[pix1[ 1] - pix2[ 1]];
324 s += sq[pix1[ 2] - pix2[ 2]];
325 s += sq[pix1[ 3] - pix2[ 3]];
326 s += sq[pix1[ 4] - pix2[ 4]];
327 s += sq[pix1[ 5] - pix2[ 5]];
328 s += sq[pix1[ 6] - pix2[ 6]];
329 s += sq[pix1[ 7] - pix2[ 7]];
330 s += sq[pix1[ 8] - pix2[ 8]];
331 s += sq[pix1[ 9] - pix2[ 9]];
332 s += sq[pix1[10] - pix2[10]];
333 s += sq[pix1[11] - pix2[11]];
334 s += sq[pix1[12] - pix2[12]];
335 s += sq[pix1[13] - pix2[13]];
336 s += sq[pix1[14] - pix2[14]];
337 s += sq[pix1[15] - pix2[15]];
351 block[0] = s1[0] - s2[0];
352 block[1] = s1[1] - s2[1];
353 block[2] = s1[2] - s2[2];
354 block[3] = s1[3] - s2[3];
355 block[4] = s1[4] - s2[4];
356 block[5] = s1[5] - s2[5];
357 block[6] = s1[6] - s2[6];
358 block[7] = s1[7] - s2[7];
373 pixels[0] = av_clip_uint8(block[0]);
374 pixels[1] = av_clip_uint8(block[1]);
375 pixels[2] = av_clip_uint8(block[2]);
376 pixels[3] = av_clip_uint8(block[3]);
377 pixels[4] = av_clip_uint8(block[4]);
378 pixels[5] = av_clip_uint8(block[5]);
379 pixels[6] = av_clip_uint8(block[6]);
380 pixels[7] = av_clip_uint8(block[7]);
394 pixels[0] = av_clip_uint8(block[0]);
395 pixels[1] = av_clip_uint8(block[1]);
396 pixels[2] = av_clip_uint8(block[2]);
397 pixels[3] = av_clip_uint8(block[3]);
411 pixels[0] = av_clip_uint8(block[0]);
412 pixels[1] = av_clip_uint8(block[1]);
425 for (i = 0; i < 8; i++) {
426 for (j = 0; j < 8; j++) {
429 else if (*block > 127)
432 *pixels = (uint8_t)(*block + 128);
436 pixels += (line_size - 8);
447 pixels[0] = av_clip_uint8(pixels[0] + block[0]);
448 pixels[1] = av_clip_uint8(pixels[1] + block[1]);
449 pixels[2] = av_clip_uint8(pixels[2] + block[2]);
450 pixels[3] = av_clip_uint8(pixels[3] + block[3]);
451 pixels[4] = av_clip_uint8(pixels[4] + block[4]);
452 pixels[5] = av_clip_uint8(pixels[5] + block[5]);
453 pixels[6] = av_clip_uint8(pixels[6] + block[6]);
454 pixels[7] = av_clip_uint8(pixels[7] + block[7]);
467 pixels[0] = av_clip_uint8(pixels[0] + block[0]);
468 pixels[1] = av_clip_uint8(pixels[1] + block[1]);
469 pixels[2] = av_clip_uint8(pixels[2] + block[2]);
470 pixels[3] = av_clip_uint8(pixels[3] + block[3]);
483 pixels[0] = av_clip_uint8(pixels[0] + block[0]);
484 pixels[1] = av_clip_uint8(pixels[1] + block[1]);
494 sum+=
FFABS(block[i]);
502 for (i = 0; i < h; i++) {
503 memset(block, value, 16);
512 for (i = 0; i < h; i++) {
513 memset(block, value, 8);
518 #define avg2(a,b) ((a+b+1)>>1)
519 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
523 const int A=(16-x16)*(16-y16);
524 const int B=( x16)*(16-y16);
525 const int C=(16-x16)*( y16);
526 const int D=( x16)*( y16);
531 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] +
rounder)>>8;
532 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] +
rounder)>>8;
533 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] +
rounder)>>8;
534 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] +
rounder)>>8;
535 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] +
rounder)>>8;
536 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] +
rounder)>>8;
537 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] +
rounder)>>8;
538 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] +
rounder)>>8;
545 int dxx,
int dxy,
int dyx,
int dyy,
int shift,
int r,
int width,
int height)
548 const int s= 1<<shift;
559 int src_x, src_y, frac_x, frac_y,
index;
568 if((
unsigned)src_x <
width){
569 if((
unsigned)src_y <
height){
570 index= src_x + src_y*
stride;
571 dst[y*stride + x]= ( ( src[
index ]*(s-frac_x)
572 + src[index +1]* frac_x )*(s-frac_y)
573 + ( src[index+stride ]*(s-frac_x)
574 + src[index+stride+1]* frac_x )* frac_y
577 index= src_x + av_clip(src_y, 0, height)*
stride;
578 dst[y*stride + x]= ( ( src[
index ]*(s-frac_x)
579 + src[index +1]* frac_x )*s
583 if((
unsigned)src_y <
height){
584 index= av_clip(src_x, 0, width) + src_y*
stride;
585 dst[y*stride + x]= ( ( src[
index ]*(s-frac_y)
586 + src[index+stride ]* frac_y )*s
589 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*
stride;
590 dst[y*stride + x]= src[
index ];
604 case 2: put_pixels2_8_c (dst, src, stride, height);
break;
605 case 4: put_pixels4_8_c (dst, src, stride, height);
break;
606 case 8: put_pixels8_8_c (dst, src, stride, height);
break;
607 case 16:put_pixels16_8_c(dst, src, stride, height);
break;
613 for (i=0; i <
height; i++) {
614 for (j=0; j <
width; j++) {
615 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
624 for (i=0; i <
height; i++) {
625 for (j=0; j <
width; j++) {
626 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
635 for (i=0; i <
height; i++) {
636 for (j=0; j <
width; j++) {
637 dst[j] = (683*(2*src[j] + src[j+
stride] + 1)) >> 11;
646 for (i=0; i <
height; i++) {
647 for (j=0; j <
width; j++) {
648 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+
stride] + 2*src[j+stride+1] + 6)) >> 15;
657 for (i=0; i <
height; i++) {
658 for (j=0; j <
width; j++) {
659 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+
stride] + 3*src[j+stride+1] + 6)) >> 15;
668 for (i=0; i <
height; i++) {
669 for (j=0; j <
width; j++) {
670 dst[j] = (683*(src[j] + 2*src[j+
stride] + 1)) >> 11;
679 for (i=0; i <
height; i++) {
680 for (j=0; j <
width; j++) {
681 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+
stride] + 3*src[j+stride+1] + 6)) >> 15;
690 for (i=0; i <
height; i++) {
691 for (j=0; j <
width; j++) {
692 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+
stride] + 4*src[j+stride+1] + 6)) >> 15;
701 case 2: avg_pixels2_8_c (dst, src, stride, height);
break;
702 case 4: avg_pixels4_8_c (dst, src, stride, height);
break;
703 case 8: avg_pixels8_8_c (dst, src, stride, height);
break;
704 case 16:avg_pixels16_8_c(dst, src, stride, height);
break;
710 for (i=0; i <
height; i++) {
711 for (j=0; j <
width; j++) {
712 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
721 for (i=0; i <
height; i++) {
722 for (j=0; j <
width; j++) {
723 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
732 for (i=0; i <
height; i++) {
733 for (j=0; j <
width; j++) {
734 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+
stride] + 1)) >> 11) + 1) >> 1;
743 for (i=0; i <
height; i++) {
744 for (j=0; j <
width; j++) {
745 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+
stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
754 for (i=0; i <
height; i++) {
755 for (j=0; j <
width; j++) {
756 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+
stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
765 for (i=0; i <
height; i++) {
766 for (j=0; j <
width; j++) {
767 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+
stride] + 1)) >> 11) + 1) >> 1;
776 for (i=0; i <
height; i++) {
777 for (j=0; j <
width; j++) {
778 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+
stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
787 for (i=0; i <
height; i++) {
788 for (j=0; j <
width; j++) {
789 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+
stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
796 #define QPEL_MC(r, OPNAME, RND, OP) \
797 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
798 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
802 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
803 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
804 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
805 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
806 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
807 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
808 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
809 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
815 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
817 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
821 const int src0= src[0*srcStride];\
822 const int src1= src[1*srcStride];\
823 const int src2= src[2*srcStride];\
824 const int src3= src[3*srcStride];\
825 const int src4= src[4*srcStride];\
826 const int src5= src[5*srcStride];\
827 const int src6= src[6*srcStride];\
828 const int src7= src[7*srcStride];\
829 const int src8= src[8*srcStride];\
830 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
831 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
832 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
833 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
834 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
835 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
836 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
837 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
843 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
844 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
849 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
850 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
851 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
852 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
853 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
854 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
855 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
856 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
857 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
858 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
859 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
860 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
861 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
862 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
863 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
864 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
870 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
871 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
876 const int src0= src[0*srcStride];\
877 const int src1= src[1*srcStride];\
878 const int src2= src[2*srcStride];\
879 const int src3= src[3*srcStride];\
880 const int src4= src[4*srcStride];\
881 const int src5= src[5*srcStride];\
882 const int src6= src[6*srcStride];\
883 const int src7= src[7*srcStride];\
884 const int src8= src[8*srcStride];\
885 const int src9= src[9*srcStride];\
886 const int src10= src[10*srcStride];\
887 const int src11= src[11*srcStride];\
888 const int src12= src[12*srcStride];\
889 const int src13= src[13*srcStride];\
890 const int src14= src[14*srcStride];\
891 const int src15= src[15*srcStride];\
892 const int src16= src[16*srcStride];\
893 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
894 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
895 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
896 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
897 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
898 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
899 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
900 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
901 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
902 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
903 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
904 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
905 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
906 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
907 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
908 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
914 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
916 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
917 OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
920 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
921 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
924 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
926 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
927 OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
930 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
933 copy_block9(full, src, 16, stride, 9);\
934 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
935 OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
938 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
940 copy_block9(full, src, 16, stride, 9);\
941 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
944 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
947 copy_block9(full, src, 16, stride, 9);\
948 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
949 OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
951 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
956 copy_block9(full, src, 16, stride, 9);\
957 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
958 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
959 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
960 OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
962 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
966 copy_block9(full, src, 16, stride, 9);\
967 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
968 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
969 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
970 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
972 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
977 copy_block9(full, src, 16, stride, 9);\
978 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
979 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
980 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
981 OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
983 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
987 copy_block9(full, src, 16, stride, 9);\
988 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
989 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
990 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
991 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
993 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
998 copy_block9(full, src, 16, stride, 9);\
999 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1000 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1001 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1002 OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1004 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1005 uint8_t full[16*9];\
1007 uint8_t halfHV[64];\
1008 copy_block9(full, src, 16, stride, 9);\
1009 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1010 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
1011 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1012 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1014 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1015 uint8_t full[16*9];\
1018 uint8_t halfHV[64];\
1019 copy_block9(full, src, 16, stride, 9);\
1020 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
1021 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1022 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1023 OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1025 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1026 uint8_t full[16*9];\
1028 uint8_t halfHV[64];\
1029 copy_block9(full, src, 16, stride, 9);\
1030 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1031 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1032 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1033 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1035 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1037 uint8_t halfHV[64];\
1038 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1039 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1040 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
1042 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1044 uint8_t halfHV[64];\
1045 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1046 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1047 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1049 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1050 uint8_t full[16*9];\
1053 uint8_t halfHV[64];\
1054 copy_block9(full, src, 16, stride, 9);\
1055 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1056 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1057 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1058 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1060 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1061 uint8_t full[16*9];\
1063 copy_block9(full, src, 16, stride, 9);\
1064 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1065 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
1066 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1068 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1069 uint8_t full[16*9];\
1072 uint8_t halfHV[64];\
1073 copy_block9(full, src, 16, stride, 9);\
1074 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1075 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1076 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1077 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1079 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1080 uint8_t full[16*9];\
1082 copy_block9(full, src, 16, stride, 9);\
1083 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1084 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1085 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1087 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1089 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1090 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1093 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1095 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1096 OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
1099 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
1100 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
1103 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1105 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1106 OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
1109 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1110 uint8_t full[24*17];\
1112 copy_block17(full, src, 24, stride, 17);\
1113 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1114 OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
1117 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1118 uint8_t full[24*17];\
1119 copy_block17(full, src, 24, stride, 17);\
1120 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
1123 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1124 uint8_t full[24*17];\
1126 copy_block17(full, src, 24, stride, 17);\
1127 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1128 OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
1130 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1131 uint8_t full[24*17];\
1132 uint8_t halfH[272];\
1133 uint8_t halfV[256];\
1134 uint8_t halfHV[256];\
1135 copy_block17(full, src, 24, stride, 17);\
1136 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1137 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1138 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1139 OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1141 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1142 uint8_t full[24*17];\
1143 uint8_t halfH[272];\
1144 uint8_t halfHV[256];\
1145 copy_block17(full, src, 24, stride, 17);\
1146 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1147 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1148 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1149 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1151 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1152 uint8_t full[24*17];\
1153 uint8_t halfH[272];\
1154 uint8_t halfV[256];\
1155 uint8_t halfHV[256];\
1156 copy_block17(full, src, 24, stride, 17);\
1157 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1158 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1159 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1160 OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1162 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1163 uint8_t full[24*17];\
1164 uint8_t halfH[272];\
1165 uint8_t halfHV[256];\
1166 copy_block17(full, src, 24, stride, 17);\
1167 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1168 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1169 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1170 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1172 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1173 uint8_t full[24*17];\
1174 uint8_t halfH[272];\
1175 uint8_t halfV[256];\
1176 uint8_t halfHV[256];\
1177 copy_block17(full, src, 24, stride, 17);\
1178 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1179 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1180 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1181 OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1183 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1184 uint8_t full[24*17];\
1185 uint8_t halfH[272];\
1186 uint8_t halfHV[256];\
1187 copy_block17(full, src, 24, stride, 17);\
1188 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1189 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1190 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1191 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1193 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1194 uint8_t full[24*17];\
1195 uint8_t halfH[272];\
1196 uint8_t halfV[256];\
1197 uint8_t halfHV[256];\
1198 copy_block17(full, src, 24, stride, 17);\
1199 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
1200 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1201 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1202 OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1204 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1205 uint8_t full[24*17];\
1206 uint8_t halfH[272];\
1207 uint8_t halfHV[256];\
1208 copy_block17(full, src, 24, stride, 17);\
1209 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1210 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1211 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1212 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1214 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1215 uint8_t halfH[272];\
1216 uint8_t halfHV[256];\
1217 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1218 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1219 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1221 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1222 uint8_t halfH[272];\
1223 uint8_t halfHV[256];\
1224 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1225 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1226 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1228 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1229 uint8_t full[24*17];\
1230 uint8_t halfH[272];\
1231 uint8_t halfV[256];\
1232 uint8_t halfHV[256];\
1233 copy_block17(full, src, 24, stride, 17);\
1234 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1235 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1236 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1237 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1239 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1240 uint8_t full[24*17];\
1241 uint8_t halfH[272];\
1242 copy_block17(full, src, 24, stride, 17);\
1243 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1244 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1245 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1247 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1248 uint8_t full[24*17];\
1249 uint8_t halfH[272];\
1250 uint8_t halfV[256];\
1251 uint8_t halfHV[256];\
1252 copy_block17(full, src, 24, stride, 17);\
1253 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1254 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1255 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1256 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1258 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1259 uint8_t full[24*17];\
1260 uint8_t halfH[272];\
1261 copy_block17(full, src, 24, stride, 17);\
1262 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1263 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1264 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1266 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1267 uint8_t halfH[272];\
1268 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1269 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1272 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
1273 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
1274 #define op_put(a, b) a = cm[((b) + 16)>>5]
1275 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
1279 QPEL_MC(0, avg_ , _ ,
op_avg)
1282 #undef op_avg_no_rnd
1284 #undef op_put_no_rnd
1286 #define put_qpel8_mc00_c ff_put_pixels8x8_c
1287 #define avg_qpel8_mc00_c ff_avg_pixels8x8_c
1288 #define put_qpel16_mc00_c ff_put_pixels16x16_c
1289 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
1290 #define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
1291 #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_8_c
1298 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
1299 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
1300 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
1301 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
1302 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
1303 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
1304 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
1305 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
1311 #if CONFIG_RV40_DECODER
1313 put_pixels16_xy2_8_c(dst, src, stride, 16);
1316 avg_pixels16_xy2_8_c(dst, src, stride, 16);
1319 put_pixels8_xy2_8_c(dst, src, stride, 8);
1322 avg_pixels8_xy2_8_c(dst, src, stride, 8);
1331 const int src_1= src[ -srcStride];
1332 const int src0 = src[0 ];
1333 const int src1 = src[ srcStride];
1334 const int src2 = src[2*srcStride];
1335 const int src3 = src[3*srcStride];
1336 const int src4 = src[4*srcStride];
1337 const int src5 = src[5*srcStride];
1338 const int src6 = src[6*srcStride];
1339 const int src7 = src[7*srcStride];
1340 const int src8 = src[8*srcStride];
1341 const int src9 = src[9*srcStride];
1342 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
1343 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
1344 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
1345 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
1346 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
1347 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
1348 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
1349 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
1358 put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
1368 put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
1382 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1391 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1410 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1412 if (d<-2*strength) d1= 0;
1413 else if(d<- strength) d1=-2*strength - d;
1414 else if(d< strength) d1= d;
1415 else if(d< 2*strength) d1= 2*strength - d;
1420 if(p1&256) p1= ~(p1>>31);
1421 if(p2&256) p2= ~(p2>>31);
1428 d2= av_clip((p0-p3)/4, -ad1, ad1);
1430 src[x-2*
stride] = p0 - d2;
1431 src[x+
stride] = p3 + d2;
1443 int p0= src[y*stride-2];
1444 int p1= src[y*stride-1];
1445 int p2= src[y*stride+0];
1446 int p3= src[y*stride+1];
1447 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1449 if (d<-2*strength) d1= 0;
1450 else if(d<- strength) d1=-2*strength - d;
1451 else if(d< strength) d1= d;
1452 else if(d< 2*strength) d1= 2*strength - d;
1457 if(p1&256) p1= ~(p1>>31);
1458 if(p2&256) p2= ~(p2>>31);
1460 src[y*stride-1] = p1;
1461 src[y*stride+0] = p2;
1465 d2= av_clip((p0-p3)/4, -ad1, ad1);
1467 src[y*stride-2] = p0 - d2;
1468 src[y*stride+1] = p3 + d2;
1478 temp[x ] = 4*src[x ];
1479 temp[x + 7*8] = 4*src[x + 7*
stride];
1483 xy = y * stride + x;
1485 temp[yz] = src[xy -
stride] + 2*src[xy] + src[xy +
stride];
1490 src[ y*
stride] = (temp[ y*8] + 2)>>2;
1491 src[7+y*
stride] = (temp[7+y*8] + 2)>>2;
1493 xy = y * stride + x;
1495 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
1500 static inline int pix_abs16_c(
void *
v, uint8_t *pix1, uint8_t *pix2,
int line_size,
int h)
1506 s += abs(pix1[0] - pix2[0]);
1507 s += abs(pix1[1] - pix2[1]);
1508 s += abs(pix1[2] - pix2[2]);
1509 s += abs(pix1[3] - pix2[3]);
1510 s += abs(pix1[4] - pix2[4]);
1511 s += abs(pix1[5] - pix2[5]);
1512 s += abs(pix1[6] - pix2[6]);
1513 s += abs(pix1[7] - pix2[7]);
1514 s += abs(pix1[8] - pix2[8]);
1515 s += abs(pix1[9] - pix2[9]);
1516 s += abs(pix1[10] - pix2[10]);
1517 s += abs(pix1[11] - pix2[11]);
1518 s += abs(pix1[12] - pix2[12]);
1519 s += abs(pix1[13] - pix2[13]);
1520 s += abs(pix1[14] - pix2[14]);
1521 s += abs(pix1[15] - pix2[15]);
1534 s += abs(pix1[0] -
avg2(pix2[0], pix2[1]));
1535 s += abs(pix1[1] -
avg2(pix2[1], pix2[2]));
1536 s += abs(pix1[2] -
avg2(pix2[2], pix2[3]));
1537 s += abs(pix1[3] -
avg2(pix2[3], pix2[4]));
1538 s += abs(pix1[4] -
avg2(pix2[4], pix2[5]));
1539 s += abs(pix1[5] -
avg2(pix2[5], pix2[6]));
1540 s += abs(pix1[6] -
avg2(pix2[6], pix2[7]));
1541 s += abs(pix1[7] -
avg2(pix2[7], pix2[8]));
1542 s += abs(pix1[8] -
avg2(pix2[8], pix2[9]));
1543 s += abs(pix1[9] -
avg2(pix2[9], pix2[10]));
1544 s += abs(pix1[10] -
avg2(pix2[10], pix2[11]));
1545 s += abs(pix1[11] -
avg2(pix2[11], pix2[12]));
1546 s += abs(pix1[12] -
avg2(pix2[12], pix2[13]));
1547 s += abs(pix1[13] -
avg2(pix2[13], pix2[14]));
1548 s += abs(pix1[14] -
avg2(pix2[14], pix2[15]));
1549 s += abs(pix1[15] -
avg2(pix2[15], pix2[16]));
1559 uint8_t *pix3 = pix2 + line_size;
1563 s += abs(pix1[0] -
avg2(pix2[0], pix3[0]));
1564 s += abs(pix1[1] -
avg2(pix2[1], pix3[1]));
1565 s += abs(pix1[2] -
avg2(pix2[2], pix3[2]));
1566 s += abs(pix1[3] -
avg2(pix2[3], pix3[3]));
1567 s += abs(pix1[4] -
avg2(pix2[4], pix3[4]));
1568 s += abs(pix1[5] -
avg2(pix2[5], pix3[5]));
1569 s += abs(pix1[6] -
avg2(pix2[6], pix3[6]));
1570 s += abs(pix1[7] -
avg2(pix2[7], pix3[7]));
1571 s += abs(pix1[8] -
avg2(pix2[8], pix3[8]));
1572 s += abs(pix1[9] -
avg2(pix2[9], pix3[9]));
1573 s += abs(pix1[10] -
avg2(pix2[10], pix3[10]));
1574 s += abs(pix1[11] -
avg2(pix2[11], pix3[11]));
1575 s += abs(pix1[12] -
avg2(pix2[12], pix3[12]));
1576 s += abs(pix1[13] -
avg2(pix2[13], pix3[13]));
1577 s += abs(pix1[14] -
avg2(pix2[14], pix3[14]));
1578 s += abs(pix1[15] -
avg2(pix2[15], pix3[15]));
1589 uint8_t *pix3 = pix2 + line_size;
1593 s += abs(pix1[0] -
avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1594 s += abs(pix1[1] -
avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1595 s += abs(pix1[2] -
avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1596 s += abs(pix1[3] -
avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1597 s += abs(pix1[4] -
avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1598 s += abs(pix1[5] -
avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1599 s += abs(pix1[6] -
avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1600 s += abs(pix1[7] -
avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1601 s += abs(pix1[8] -
avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
1602 s += abs(pix1[9] -
avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
1603 s += abs(pix1[10] -
avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
1604 s += abs(pix1[11] -
avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
1605 s += abs(pix1[12] -
avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
1606 s += abs(pix1[13] -
avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
1607 s += abs(pix1[14] -
avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
1608 s += abs(pix1[15] -
avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
1616 static inline int pix_abs8_c(
void *
v, uint8_t *pix1, uint8_t *pix2,
int line_size,
int h)
1622 s += abs(pix1[0] - pix2[0]);
1623 s += abs(pix1[1] - pix2[1]);
1624 s += abs(pix1[2] - pix2[2]);
1625 s += abs(pix1[3] - pix2[3]);
1626 s += abs(pix1[4] - pix2[4]);
1627 s += abs(pix1[5] - pix2[5]);
1628 s += abs(pix1[6] - pix2[6]);
1629 s += abs(pix1[7] - pix2[7]);
1636 static int pix_abs8_x2_c(
void *
v, uint8_t *pix1, uint8_t *pix2,
int line_size,
int h)
1642 s += abs(pix1[0] -
avg2(pix2[0], pix2[1]));
1643 s += abs(pix1[1] -
avg2(pix2[1], pix2[2]));
1644 s += abs(pix1[2] -
avg2(pix2[2], pix2[3]));
1645 s += abs(pix1[3] -
avg2(pix2[3], pix2[4]));
1646 s += abs(pix1[4] -
avg2(pix2[4], pix2[5]));
1647 s += abs(pix1[5] -
avg2(pix2[5], pix2[6]));
1648 s += abs(pix1[6] -
avg2(pix2[6], pix2[7]));
1649 s += abs(pix1[7] -
avg2(pix2[7], pix2[8]));
1656 static int pix_abs8_y2_c(
void *
v, uint8_t *pix1, uint8_t *pix2,
int line_size,
int h)
1659 uint8_t *pix3 = pix2 + line_size;
1663 s += abs(pix1[0] -
avg2(pix2[0], pix3[0]));
1664 s += abs(pix1[1] -
avg2(pix2[1], pix3[1]));
1665 s += abs(pix1[2] -
avg2(pix2[2], pix3[2]));
1666 s += abs(pix1[3] -
avg2(pix2[3], pix3[3]));
1667 s += abs(pix1[4] -
avg2(pix2[4], pix3[4]));
1668 s += abs(pix1[5] -
avg2(pix2[5], pix3[5]));
1669 s += abs(pix1[6] -
avg2(pix2[6], pix3[6]));
1670 s += abs(pix1[7] -
avg2(pix2[7], pix3[7]));
1681 uint8_t *pix3 = pix2 + line_size;
1685 s += abs(pix1[0] -
avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1686 s += abs(pix1[1] -
avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1687 s += abs(pix1[2] -
avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1688 s += abs(pix1[3] -
avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1689 s += abs(pix1[4] -
avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1690 s += abs(pix1[5] -
avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1691 s += abs(pix1[6] -
avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1692 s += abs(pix1[7] -
avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1707 for(x=0; x<16; x++){
1708 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1711 for(x=0; x<15; x++){
1712 score2+=
FFABS( s1[x ] - s1[x +stride]
1713 - s1[x+1] + s1[x+1+stride])
1714 -
FFABS( s2[x ] - s2[x +stride]
1715 - s2[x+1] + s2[x+1+stride]);
1723 else return score1 +
FFABS(score2)*8;
1734 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1738 score2+=
FFABS( s1[x ] - s1[x +stride]
1739 - s1[x+1] + s1[x+1+stride])
1740 -
FFABS( s2[x ] - s2[x +stride]
1741 - s2[x+1] + s2[x+1+stride]);
1749 else return score1 +
FFABS(score2)*8;
1756 for(i=0; i<8*8; i++){
1760 assert(-512<b && b<512);
1762 sum += (w*
b)*(w*b)>>4;
1770 for(i=0; i<8*8; i++){
1791 for(i=0; i<=last; i++){
1792 const int j= scantable[i];
1797 for(i=0; i<=last; i++){
1798 const int j= scantable[i];
1799 const int perm_j= permutation[j];
1800 block[perm_j]= temp[j];
1811 memset(cmp, 0,
sizeof(
void*)*6);
1870 for (i = 0; i <= w - (int)
sizeof(
long); i +=
sizeof(long)) {
1871 long a = *(
long*)(src+i);
1872 long b = *(
long*)(dst+i);
1876 dst[i+0] += src[i+0];
1881 for(i=0; i<=w-
sizeof(long); i+=
sizeof(long)){
1882 long a = *(
long*)(src1+i);
1883 long b = *(
long*)(src2+i);
1887 dst[i] = src1[i]+src2[i];
1890 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2,
int w){
1892 #if !HAVE_FAST_UNALIGNED
1893 if((
long)src2 & (
sizeof(
long)-1)){
1894 for(i=0; i+7<w; i+=8){
1895 dst[i+0] = src1[i+0]-src2[i+0];
1896 dst[i+1] = src1[i+1]-src2[i+1];
1897 dst[i+2] = src1[i+2]-src2[i+2];
1898 dst[i+3] = src1[i+3]-src2[i+3];
1899 dst[i+4] = src1[i+4]-src2[i+4];
1900 dst[i+5] = src1[i+5]-src2[i+5];
1901 dst[i+6] = src1[i+6]-src2[i+6];
1902 dst[i+7] = src1[i+7]-src2[i+7];
1906 for (i = 0; i <= w - (int)
sizeof(
long); i +=
sizeof(long)) {
1907 long a = *(
long*)(src1+i);
1908 long b = *(
long*)(src2+i);
1912 dst[i+0] = src1[i+0]-src2[i+0];
1923 l=
mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
1940 const int pred=
mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
1953 for(i=0; i<w-1; i++){
2010 #define BUTTERFLY2(o1,o2,i1,i2) \
2014 #define BUTTERFLY1(x,y) \
2023 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
2034 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
2035 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
2036 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
2037 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
2079 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
2080 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
2081 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
2082 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
2113 sum -=
FFABS(temp[8*0] + temp[8*4]);
2131 const int s07 = SRC(0) + SRC(7);\
2132 const int s16 = SRC(1) + SRC(6);\
2133 const int s25 = SRC(2) + SRC(5);\
2134 const int s34 = SRC(3) + SRC(4);\
2135 const int a0 = s07 + s34;\
2136 const int a1 = s16 + s25;\
2137 const int a2 = s07 - s34;\
2138 const int a3 = s16 - s25;\
2139 const int d07 = SRC(0) - SRC(7);\
2140 const int d16 = SRC(1) - SRC(6);\
2141 const int d25 = SRC(2) - SRC(5);\
2142 const int d34 = SRC(3) - SRC(4);\
2143 const int a4 = d16 + d25 + (d07 + (d07>>1));\
2144 const int a5 = d07 - d34 - (d25 + (d25>>1));\
2145 const int a6 = d07 + d34 - (d16 + (d16>>1));\
2146 const int a7 = d16 - d25 + (d34 + (d34>>1));\
2148 DST(1, a4 + (a7>>2)) ;\
2149 DST(2, a2 + (a3>>1)) ;\
2150 DST(3, a5 + (a6>>2)) ;\
2152 DST(5, a6 - (a5>>2)) ;\
2153 DST(6, (a2>>1) - a3 ) ;\
2154 DST(7, (a4>>2) - a7 ) ;\
2157 static int dct264_sad8x8_c(
void *c, uint8_t *src1, uint8_t *src2,
int stride,
int h){
2165 #define SRC(x) dct[i][x]
2166 #define DST(x,v) dct[i][x]= v
2167 for( i = 0; i < 8; i++ )
2172 #define
SRC(x) dct[x][i]
2173 #define DST(x,v) sum += FFABS(v)
2174 for( i = 0; i < 8; i++ )
2182 static int dct_max8x8_c(
void *c, uint8_t *src1, uint8_t *src2,
int stride,
int h){
2201 DCTELEM *
const bak = temp+64;
2209 memcpy(bak, temp, 64*
sizeof(
DCTELEM));
2216 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
2221 static int rd8x8_c(
void *c, uint8_t *src1, uint8_t *src2,
int stride,
int h){
2230 uint8_t * last_length;
2256 for(i=start_i; i<last; i++){
2257 int j= scantable[i];
2262 if((level&(~127)) == 0){
2272 level= temp[i] + 64;
2276 if((level&(~127)) == 0){
2292 distortion= s->
dsp.
sse[1](
NULL, lsrc2, lsrc1, 8, 8);
2294 return distortion + ((bits*s->
qscale*s->
qscale*109 + 64)>>7);
2297 static int bit8x8_c(
void *c, uint8_t *src1, uint8_t *src2,
int stride,
int h){
2304 uint8_t * last_length;
2327 for(i=start_i; i<last; i++){
2328 int j= scantable[i];
2333 if((level&(~127)) == 0){
2343 level= temp[i] + 64;
2347 if((level&(~127)) == 0){
2356 #define VSAD_INTRA(size) \
2357 static int vsad_intra##size##_c( void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2361 for(y=1; y<h; y++){ \
2362 for(x=0; x<size; x+=4){ \
2363 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \
2364 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \
2374 static
int vsad16_c(
void *c, uint8_t *
s1, uint8_t *
s2,
int stride,
int h){
2379 for(x=0; x<16; x++){
2380 score+=
FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2389 #define SQ(a) ((a)*(a))
2390 #define VSSE_INTRA(size) \
2391 static int vsse_intra##size##_c( void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2395 for(y=1; y<h; y++){ \
2396 for(x=0; x<size; x+=4){ \
2397 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \
2398 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \
2408 static
int vsse16_c(
void *c, uint8_t *
s1, uint8_t *
s2,
int stride,
int h){
2413 for(x=0; x<16; x++){
2414 score+=
SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2427 for(i=0; i<
size; i++)
2428 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
2445 for(i=0; i<
len; i++)
2446 dst[i] = src0[i] * src1[i];
2452 for(i=0; i<
len; i++)
2453 dst[i] = src0[i] * src1[-i];
2458 for(i=0; i<
len; i++)
2459 dst[i] = src0[i] * src1[i] + src2[i];
2463 const float *src1,
const float *win,
int len)
2469 for(i=-len, j=len-1; i<0; i++, j--) {
2474 dst[i] = s0*wj - s1*wi;
2475 dst[j] = s0*wi + s1*wj;
2483 for (i = 0; i <
len; i++)
2484 dst[i] = src[i] * mul;
2491 for (i = 0; i <
len; i++)
2492 dst[i] += src[i] * mul;
2499 for (i = 0; i <
len; i++) {
2500 float t = v1[i] - v2[i];
2507 const float *src1,
int len)
2510 for (i = 0; i <
len; i++) {
2513 dst[2*i ] = f1 + f2;
2514 dst[2*i + 1] = f1 - f2;
2523 for (i = 0; i <
len; i++)
2530 uint32_t maxi, uint32_t maxisign)
2533 if(a > mini)
return mini;
2534 else if((a^(1
U<<31)) > maxisign)
return maxi;
2540 uint32_t mini = *(uint32_t*)min;
2541 uint32_t maxi = *(uint32_t*)max;
2542 uint32_t maxisign = maxi ^ (1
U<<31);
2543 uint32_t *dsti = (uint32_t*)dst;
2544 const uint32_t *srci = (
const uint32_t*)src;
2545 for(i=0; i<
len; i+=8) {
2546 dsti[i + 0] =
clipf_c_one(srci[i + 0], mini, maxi, maxisign);
2547 dsti[i + 1] =
clipf_c_one(srci[i + 1], mini, maxi, maxisign);
2548 dsti[i + 2] =
clipf_c_one(srci[i + 2], mini, maxi, maxisign);
2549 dsti[i + 3] =
clipf_c_one(srci[i + 3], mini, maxi, maxisign);
2550 dsti[i + 4] =
clipf_c_one(srci[i + 4], mini, maxi, maxisign);
2551 dsti[i + 5] =
clipf_c_one(srci[i + 5], mini, maxi, maxisign);
2552 dsti[i + 6] =
clipf_c_one(srci[i + 6], mini, maxi, maxisign);
2553 dsti[i + 7] =
clipf_c_one(srci[i + 7], mini, maxi, maxisign);
2558 if(min < 0 && max > 0) {
2561 for(i=0; i <
len; i+=8) {
2562 dst[i ] = av_clipf(src[i ], min, max);
2563 dst[i + 1] = av_clipf(src[i + 1], min, max);
2564 dst[i + 2] = av_clipf(src[i + 2], min, max);
2565 dst[i + 3] = av_clipf(src[i + 3], min, max);
2566 dst[i + 4] = av_clipf(src[i + 4], min, max);
2567 dst[i + 5] = av_clipf(src[i + 5], min, max);
2568 dst[i + 6] = av_clipf(src[i + 6], min, max);
2569 dst[i + 7] = av_clipf(src[i + 7], min, max);
2579 res += (*v1++ * *v2++) >> shift;
2589 *v1++ += mul * *v3++;
2595 const int16_t *window,
unsigned int len)
2598 int len2 = len >> 1;
2600 for (i = 0; i < len2; i++) {
2601 int16_t w = window[i];
2602 output[i] = (
MUL16(input[i], w) + (1 << 14)) >> 15;
2603 output[len-i-1] = (
MUL16(input[len-i-1], w) + (1 << 14)) >> 15;
2608 int32_t max,
unsigned int len)
2611 *dst++ = av_clip(*src++, min, max);
2612 *dst++ = av_clip(*src++, min, max);
2613 *dst++ = av_clip(*src++, min, max);
2614 *dst++ = av_clip(*src++, min, max);
2615 *dst++ = av_clip(*src++, min, max);
2616 *dst++ = av_clip(*src++, min, max);
2617 *dst++ = av_clip(*src++, min, max);
2618 *dst++ = av_clip(*src++, min, max);
2637 a1 =
W1*b[1]+
W7*b[7];
2638 a7 =
W7*b[1]-
W1*b[7];
2639 a5 =
W5*b[5]+
W3*b[3];
2640 a3 =
W3*b[5]-
W5*b[3];
2641 a2 =
W2*b[2]+
W6*b[6];
2642 a6 =
W6*b[2]-
W2*b[6];
2643 a0 =
W0*b[0]+
W0*b[4];
2644 a4 =
W0*b[0]-
W0*b[4];
2646 s1 = (181*(a1-a5+a7-
a3)+128)>>8;
2647 s2 = (181*(a1-a5-a7+
a3)+128)>>8;
2649 b[0] = (a0+a2+a1+a5 + (1<<7))>>8;
2650 b[1] = (a4+a6 +s1 + (1<<7))>>8;
2651 b[2] = (a4-a6 +s2 + (1<<7))>>8;
2652 b[3] = (a0-a2+a7+a3 + (1<<7))>>8;
2653 b[4] = (a0-a2-a7-a3 + (1<<7))>>8;
2654 b[5] = (a4-a6 -s2 + (1<<7))>>8;
2655 b[6] = (a4+a6 -s1 + (1<<7))>>8;
2656 b[7] = (a0+a2-a1-a5 + (1<<7))>>8;
2663 a1 = (
W1*b[8*1]+
W7*b[8*7] + 4)>>3;
2664 a7 = (
W7*b[8*1]-
W1*b[8*7] + 4)>>3;
2665 a5 = (
W5*b[8*5]+
W3*b[8*3] + 4)>>3;
2666 a3 = (
W3*b[8*5]-
W5*b[8*3] + 4)>>3;
2667 a2 = (
W2*b[8*2]+
W6*b[8*6] + 4)>>3;
2668 a6 = (
W6*b[8*2]-
W2*b[8*6] + 4)>>3;
2669 a0 = (
W0*b[8*0]+
W0*b[8*4] )>>3;
2670 a4 = (
W0*b[8*0]-
W0*b[8*4] )>>3;
2672 s1 = (181*(a1-a5+a7-
a3)+128)>>8;
2673 s2 = (181*(a1-a5-a7+
a3)+128)>>8;
2675 b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14;
2676 b[8*1] = (a4+a6 +s1 + (1<<13))>>14;
2677 b[8*2] = (a4-a6 +s2 + (1<<13))>>14;
2678 b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14;
2680 b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14;
2681 b[8*5] = (a4-a6 -s2 + (1<<13))>>14;
2682 b[8*6] = (a4+a6 -s1 + (1<<13))>>14;
2683 b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14;
2742 dest[0] = av_clip_uint8((block[0] + 4)>>3);
2746 dest[0] = av_clip_uint8(dest[0] + ((block[0] + 4)>>3));
2762 for(i=0;i<512;i++) {
2770 static int did_fail=0;
2773 if((intptr_t)aligned & 15){
2775 #if HAVE_MMX || HAVE_ALTIVEC
2777 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
2778 "and may be very slow or crash. This is not a bug in libavcodec,\n"
2779 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
2780 "Do not report crashes to Libav developers.\n");
2813 #endif //CONFIG_ENCODERS
2820 }
else if(avctx->
lowres==2){
2825 }
else if(avctx->
lowres==3){
2913 #define dspfunc(PFX, IDX, NUM) \
2914 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
2915 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
2916 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
2917 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
2918 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
2919 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
2920 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
2921 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
2922 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
2923 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
2924 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
2925 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
2926 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
2927 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
2928 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
2929 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
2932 dspfunc(put_no_rnd_qpel, 0, 16);
2938 dspfunc(put_no_rnd_qpel, 1, 8);
2945 #if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER
2948 #if CONFIG_WMV2_DECODER || CONFIG_VC1_DECODER
2961 #define SET_CMP_FUNC(name) \
2962 c->name[0]= name ## 16_c;\
2963 c->name[1]= name ## 8x8_c;
2982 c->
vsad[4]= vsad_intra16_c;
2983 c->
vsad[5]= vsad_intra8_c;
2985 c->
vsse[4]= vsse_intra16_c;
2986 c->
vsse[5]= vsse_intra8_c;
3004 #if CONFIG_PNG_DECODER
3024 #if CONFIG_VORBIS_DECODER
3027 #if CONFIG_AC3_DECODER
3057 #define FUNC(f, depth) f ## _ ## depth
3058 #define FUNCC(f, depth) f ## _ ## depth ## _c
3060 #define dspfunc1(PFX, IDX, NUM, depth)\
3061 c->PFX ## _pixels_tab[IDX][0] = FUNCC(PFX ## _pixels ## NUM , depth);\
3062 c->PFX ## _pixels_tab[IDX][1] = FUNCC(PFX ## _pixels ## NUM ## _x2 , depth);\
3063 c->PFX ## _pixels_tab[IDX][2] = FUNCC(PFX ## _pixels ## NUM ## _y2 , depth);\
3064 c->PFX ## _pixels_tab[IDX][3] = FUNCC(PFX ## _pixels ## NUM ## _xy2, depth)
3066 #define dspfunc2(PFX, IDX, NUM, depth)\
3067 c->PFX ## _pixels_tab[IDX][ 0] = FUNCC(PFX ## NUM ## _mc00, depth);\
3068 c->PFX ## _pixels_tab[IDX][ 1] = FUNCC(PFX ## NUM ## _mc10, depth);\
3069 c->PFX ## _pixels_tab[IDX][ 2] = FUNCC(PFX ## NUM ## _mc20, depth);\
3070 c->PFX ## _pixels_tab[IDX][ 3] = FUNCC(PFX ## NUM ## _mc30, depth);\
3071 c->PFX ## _pixels_tab[IDX][ 4] = FUNCC(PFX ## NUM ## _mc01, depth);\
3072 c->PFX ## _pixels_tab[IDX][ 5] = FUNCC(PFX ## NUM ## _mc11, depth);\
3073 c->PFX ## _pixels_tab[IDX][ 6] = FUNCC(PFX ## NUM ## _mc21, depth);\
3074 c->PFX ## _pixels_tab[IDX][ 7] = FUNCC(PFX ## NUM ## _mc31, depth);\
3075 c->PFX ## _pixels_tab[IDX][ 8] = FUNCC(PFX ## NUM ## _mc02, depth);\
3076 c->PFX ## _pixels_tab[IDX][ 9] = FUNCC(PFX ## NUM ## _mc12, depth);\
3077 c->PFX ## _pixels_tab[IDX][10] = FUNCC(PFX ## NUM ## _mc22, depth);\
3078 c->PFX ## _pixels_tab[IDX][11] = FUNCC(PFX ## NUM ## _mc32, depth);\
3079 c->PFX ## _pixels_tab[IDX][12] = FUNCC(PFX ## NUM ## _mc03, depth);\
3080 c->PFX ## _pixels_tab[IDX][13] = FUNCC(PFX ## NUM ## _mc13, depth);\
3081 c->PFX ## _pixels_tab[IDX][14] = FUNCC(PFX ## NUM ## _mc23, depth);\
3082 c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth)
3085 #define BIT_DEPTH_FUNCS(depth, dct)\
3086 c->get_pixels = FUNCC(get_pixels ## dct , depth);\
3087 c->draw_edges = FUNCC(draw_edges , depth);\
3088 c->emulated_edge_mc = FUNC (ff_emulated_edge_mc , depth);\
3089 c->clear_block = FUNCC(clear_block ## dct , depth);\
3090 c->clear_blocks = FUNCC(clear_blocks ## dct , depth);\
3091 c->add_pixels8 = FUNCC(add_pixels8 ## dct , depth);\
3092 c->add_pixels4 = FUNCC(add_pixels4 ## dct , depth);\
3093 c->put_no_rnd_pixels_l2[0] = FUNCC(put_no_rnd_pixels16_l2, depth);\
3094 c->put_no_rnd_pixels_l2[1] = FUNCC(put_no_rnd_pixels8_l2 , depth);\
3096 c->put_h264_chroma_pixels_tab[0] = FUNCC(put_h264_chroma_mc8 , depth);\
3097 c->put_h264_chroma_pixels_tab[1] = FUNCC(put_h264_chroma_mc4 , depth);\
3098 c->put_h264_chroma_pixels_tab[2] = FUNCC(put_h264_chroma_mc2 , depth);\
3099 c->avg_h264_chroma_pixels_tab[0] = FUNCC(avg_h264_chroma_mc8 , depth);\
3100 c->avg_h264_chroma_pixels_tab[1] = FUNCC(avg_h264_chroma_mc4 , depth);\
3101 c->avg_h264_chroma_pixels_tab[2] = FUNCC(avg_h264_chroma_mc2 , depth);\
3103 dspfunc1(put , 0, 16, depth);\
3104 dspfunc1(put , 1, 8, depth);\
3105 dspfunc1(put , 2, 4, depth);\
3106 dspfunc1(put , 3, 2, depth);\
3107 dspfunc1(put_no_rnd, 0, 16, depth);\
3108 dspfunc1(put_no_rnd, 1, 8, depth);\
3109 dspfunc1(avg , 0, 16, depth);\
3110 dspfunc1(avg , 1, 8, depth);\
3111 dspfunc1(avg , 2, 4, depth);\
3112 dspfunc1(avg , 3, 2, depth);\
3113 dspfunc1(avg_no_rnd, 0, 16, depth);\
3114 dspfunc1(avg_no_rnd, 1, 8, depth);\
3116 dspfunc2(put_h264_qpel, 0, 16, depth);\
3117 dspfunc2(put_h264_qpel, 1, 8, depth);\
3118 dspfunc2(put_h264_qpel, 2, 4, depth);\
3119 dspfunc2(put_h264_qpel, 3, 2, depth);\
3120 dspfunc2(avg_h264_qpel, 0, 16, depth);\
3121 dspfunc2(avg_h264_qpel, 1, 8, depth);\
3122 dspfunc2(avg_h264_qpel, 2, 4, depth);
3157 for (i = 0; i < 4; i++) {
3158 for (j = 0; j < 16; j++) {