56 #define pb_7f (~0UL/255 * 0x7f)
57 #define pb_80 (~0UL/255 * 0x80)
60 0, 1, 8, 16, 9, 2, 3, 10,
61 17, 24, 32, 25, 18, 11, 4, 5,
62 12, 19, 26, 33, 40, 48, 41, 34,
63 27, 20, 13, 6, 7, 14, 21, 28,
64 35, 42, 49, 56, 57, 50, 43, 36,
65 29, 22, 15, 23, 30, 37, 44, 51,
66 58, 59, 52, 45, 38, 31, 39, 46,
67 53, 60, 61, 54, 47, 55, 62, 63
73 0, 8, 1, 9, 16, 24, 2, 10,
74 17, 25, 32, 40, 48, 56, 33, 41,
75 18, 26, 3, 11, 4, 12, 19, 27,
76 34, 42, 49, 57, 50, 58, 35, 43,
77 20, 28, 5, 13, 6, 14, 21, 29,
78 36, 44, 51, 59, 52, 60, 37, 45,
79 22, 30, 7, 15, 23, 31, 38, 46,
80 53, 61, 54, 62, 39, 47, 55, 63,
87 0, 1, 2, 3, 8, 9, 16, 17,
88 10, 11, 4, 5, 6, 7, 15, 14,
89 13, 12, 19, 18, 24, 25, 32, 33,
90 26, 27, 20, 21, 22, 23, 28, 29,
91 30, 31, 34, 35, 40, 41, 48, 49,
92 42, 43, 36, 37, 38, 39, 44, 45,
93 46, 47, 50, 51, 56, 57, 58, 59,
94 52, 53, 54, 55, 60, 61, 62, 63,
98 0, 8, 16, 24, 1, 9, 2, 10,
99 17, 25, 32, 40, 48, 56, 57, 49,
100 41, 33, 26, 18, 3, 11, 4, 12,
101 19, 27, 34, 42, 50, 58, 35, 43,
102 51, 59, 20, 28, 5, 13, 6, 14,
103 21, 29, 36, 44, 52, 60, 37, 45,
104 53, 61, 22, 30, 7, 15, 23, 31,
105 38, 46, 54, 62, 39, 47, 55, 63,
110 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
111 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
112 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
113 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
114 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
115 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
116 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
117 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
130 j = src_scantable[i];
144 int idct_permutation_type)
148 switch(idct_permutation_type){
151 idct_permutation[i]= i;
155 idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
163 idct_permutation[i]= ((i&7)<<3) | (i>>3);
167 idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
183 for (i = 0; i < 16; i++) {
184 for (j = 0; j < 16; j += 8) {
195 pix += line_size - 16;
206 for (i = 0; i < 16; i++) {
207 for (j = 0; j < 16; j += 8) {
219 register uint64_t x=*(uint64_t*)pix;
221 s += sq[(x>>8)&0xff];
222 s += sq[(x>>16)&0xff];
223 s += sq[(x>>24)&0xff];
224 s += sq[(x>>32)&0xff];
225 s += sq[(x>>40)&0xff];
226 s += sq[(x>>48)&0xff];
227 s += sq[(x>>56)&0xff];
229 register uint32_t x=*(uint32_t*)pix;
231 s += sq[(x>>8)&0xff];
232 s += sq[(x>>16)&0xff];
233 s += sq[(x>>24)&0xff];
234 x=*(uint32_t*)(pix+4);
236 s += sq[(x>>8)&0xff];
237 s += sq[(x>>16)&0xff];
238 s += sq[(x>>24)&0xff];
243 pix += line_size - 16;
248 static void bswap_buf(uint32_t *dst,
const uint32_t *src,
int w){
251 for(i=0; i+8<=w; i+=8){
278 for (i = 0; i < h; i++) {
279 s += sq[pix1[0] - pix2[0]];
280 s += sq[pix1[1] - pix2[1]];
281 s += sq[pix1[2] - pix2[2]];
282 s += sq[pix1[3] - pix2[3]];
295 for (i = 0; i < h; i++) {
296 s += sq[pix1[0] - pix2[0]];
297 s += sq[pix1[1] - pix2[1]];
298 s += sq[pix1[2] - pix2[2]];
299 s += sq[pix1[3] - pix2[3]];
300 s += sq[pix1[4] - pix2[4]];
301 s += sq[pix1[5] - pix2[5]];
302 s += sq[pix1[6] - pix2[6]];
303 s += sq[pix1[7] - pix2[7]];
316 for (i = 0; i < h; i++) {
317 s += sq[pix1[ 0] - pix2[ 0]];
318 s += sq[pix1[ 1] - pix2[ 1]];
319 s += sq[pix1[ 2] - pix2[ 2]];
320 s += sq[pix1[ 3] - pix2[ 3]];
321 s += sq[pix1[ 4] - pix2[ 4]];
322 s += sq[pix1[ 5] - pix2[ 5]];
323 s += sq[pix1[ 6] - pix2[ 6]];
324 s += sq[pix1[ 7] - pix2[ 7]];
325 s += sq[pix1[ 8] - pix2[ 8]];
326 s += sq[pix1[ 9] - pix2[ 9]];
327 s += sq[pix1[10] - pix2[10]];
328 s += sq[pix1[11] - pix2[11]];
329 s += sq[pix1[12] - pix2[12]];
330 s += sq[pix1[13] - pix2[13]];
331 s += sq[pix1[14] - pix2[14]];
332 s += sq[pix1[15] - pix2[15]];
346 block[0] = s1[0] - s2[0];
347 block[1] = s1[1] - s2[1];
348 block[2] = s1[2] - s2[2];
349 block[3] = s1[3] - s2[3];
350 block[4] = s1[4] - s2[4];
351 block[5] = s1[5] - s2[5];
352 block[6] = s1[6] - s2[6];
353 block[7] = s1[7] - s2[7];
368 pixels[0] = av_clip_uint8(block[0]);
369 pixels[1] = av_clip_uint8(block[1]);
370 pixels[2] = av_clip_uint8(block[2]);
371 pixels[3] = av_clip_uint8(block[3]);
372 pixels[4] = av_clip_uint8(block[4]);
373 pixels[5] = av_clip_uint8(block[5]);
374 pixels[6] = av_clip_uint8(block[6]);
375 pixels[7] = av_clip_uint8(block[7]);
388 for (i = 0; i < 8; i++) {
389 for (j = 0; j < 8; j++) {
392 else if (*block > 127)
395 *pixels = (
uint8_t)(*block + 128);
399 pixels += (line_size - 8);
410 pixels[0] = av_clip_uint8(pixels[0] + block[0]);
411 pixels[1] = av_clip_uint8(pixels[1] + block[1]);
412 pixels[2] = av_clip_uint8(pixels[2] + block[2]);
413 pixels[3] = av_clip_uint8(pixels[3] + block[3]);
414 pixels[4] = av_clip_uint8(pixels[4] + block[4]);
415 pixels[5] = av_clip_uint8(pixels[5] + block[5]);
416 pixels[6] = av_clip_uint8(pixels[6] + block[6]);
417 pixels[7] = av_clip_uint8(pixels[7] + block[7]);
427 sum+=
FFABS(block[i]);
435 for (i = 0; i < h; i++) {
436 memset(block, value, 16);
445 for (i = 0; i < h; i++) {
446 memset(block, value, 8);
451 #define avg2(a,b) ((a+b+1)>>1)
452 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
456 const int A=(16-x16)*(16-y16);
457 const int B=( x16)*(16-y16);
458 const int C=(16-x16)*( y16);
459 const int D=( x16)*( y16);
464 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] +
rounder)>>8;
465 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] +
rounder)>>8;
466 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] +
rounder)>>8;
467 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] +
rounder)>>8;
468 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] +
rounder)>>8;
469 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] +
rounder)>>8;
470 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] +
rounder)>>8;
471 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] +
rounder)>>8;
478 int dxx,
int dxy,
int dyx,
int dyy,
int shift,
int r,
int width,
int height)
481 const int s= 1<<shift;
492 int src_x, src_y, frac_x, frac_y,
index;
501 if((
unsigned)src_x <
width){
502 if((
unsigned)src_y <
height){
503 index= src_x + src_y*
stride;
504 dst[y*stride + x]= ( ( src[
index ]*(s-frac_x)
505 + src[index +1]* frac_x )*(s-frac_y)
506 + ( src[index+stride ]*(s-frac_x)
507 + src[index+stride+1]* frac_x )* frac_y
510 index= src_x + av_clip(src_y, 0, height)*
stride;
511 dst[y*stride + x]= ( ( src[
index ]*(s-frac_x)
512 + src[index +1]* frac_x )*s
516 if((
unsigned)src_y <
height){
517 index= av_clip(src_x, 0, width) + src_y*
stride;
518 dst[y*stride + x]= ( ( src[
index ]*(s-frac_y)
519 + src[index+stride ]* frac_y )*s
522 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*
stride;
523 dst[y*stride + x]= src[
index ];
537 case 2: put_pixels2_8_c (dst, src, stride, height);
break;
538 case 4: put_pixels4_8_c (dst, src, stride, height);
break;
539 case 8: put_pixels8_8_c (dst, src, stride, height);
break;
540 case 16:put_pixels16_8_c(dst, src, stride, height);
break;
546 for (i=0; i <
height; i++) {
547 for (j=0; j <
width; j++) {
548 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
557 for (i=0; i <
height; i++) {
558 for (j=0; j <
width; j++) {
559 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
568 for (i=0; i <
height; i++) {
569 for (j=0; j <
width; j++) {
570 dst[j] = (683*(2*src[j] + src[j+
stride] + 1)) >> 11;
579 for (i=0; i <
height; i++) {
580 for (j=0; j <
width; j++) {
581 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+
stride] + 2*src[j+stride+1] + 6)) >> 15;
590 for (i=0; i <
height; i++) {
591 for (j=0; j <
width; j++) {
592 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+
stride] + 3*src[j+stride+1] + 6)) >> 15;
601 for (i=0; i <
height; i++) {
602 for (j=0; j <
width; j++) {
603 dst[j] = (683*(src[j] + 2*src[j+
stride] + 1)) >> 11;
612 for (i=0; i <
height; i++) {
613 for (j=0; j <
width; j++) {
614 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+
stride] + 3*src[j+stride+1] + 6)) >> 15;
623 for (i=0; i <
height; i++) {
624 for (j=0; j <
width; j++) {
625 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+
stride] + 4*src[j+stride+1] + 6)) >> 15;
634 case 2: avg_pixels2_8_c (dst, src, stride, height);
break;
635 case 4: avg_pixels4_8_c (dst, src, stride, height);
break;
636 case 8: avg_pixels8_8_c (dst, src, stride, height);
break;
637 case 16:avg_pixels16_8_c(dst, src, stride, height);
break;
643 for (i=0; i <
height; i++) {
644 for (j=0; j <
width; j++) {
645 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
654 for (i=0; i <
height; i++) {
655 for (j=0; j <
width; j++) {
656 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
665 for (i=0; i <
height; i++) {
666 for (j=0; j <
width; j++) {
667 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+
stride] + 1)) >> 11) + 1) >> 1;
676 for (i=0; i <
height; i++) {
677 for (j=0; j <
width; j++) {
678 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+
stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
687 for (i=0; i <
height; i++) {
688 for (j=0; j <
width; j++) {
689 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+
stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
698 for (i=0; i <
height; i++) {
699 for (j=0; j <
width; j++) {
700 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+
stride] + 1)) >> 11) + 1) >> 1;
709 for (i=0; i <
height; i++) {
710 for (j=0; j <
width; j++) {
711 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+
stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
720 for (i=0; i <
height; i++) {
721 for (j=0; j <
width; j++) {
722 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+
stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
729 #define QPEL_MC(r, OPNAME, RND, OP) \
730 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
731 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
735 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
736 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
737 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
738 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
739 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
740 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
741 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
742 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
748 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
750 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
754 const int src0= src[0*srcStride];\
755 const int src1= src[1*srcStride];\
756 const int src2= src[2*srcStride];\
757 const int src3= src[3*srcStride];\
758 const int src4= src[4*srcStride];\
759 const int src5= src[5*srcStride];\
760 const int src6= src[6*srcStride];\
761 const int src7= src[7*srcStride];\
762 const int src8= src[8*srcStride];\
763 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
764 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
765 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
766 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
767 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
768 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
769 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
770 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
776 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
777 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
782 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
783 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
784 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
785 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
786 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
787 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
788 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
789 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
790 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
791 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
792 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
793 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
794 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
795 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
796 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
797 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
803 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
804 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
809 const int src0= src[0*srcStride];\
810 const int src1= src[1*srcStride];\
811 const int src2= src[2*srcStride];\
812 const int src3= src[3*srcStride];\
813 const int src4= src[4*srcStride];\
814 const int src5= src[5*srcStride];\
815 const int src6= src[6*srcStride];\
816 const int src7= src[7*srcStride];\
817 const int src8= src[8*srcStride];\
818 const int src9= src[9*srcStride];\
819 const int src10= src[10*srcStride];\
820 const int src11= src[11*srcStride];\
821 const int src12= src[12*srcStride];\
822 const int src13= src[13*srcStride];\
823 const int src14= src[14*srcStride];\
824 const int src15= src[15*srcStride];\
825 const int src16= src[16*srcStride];\
826 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
827 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
828 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
829 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
830 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
831 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
832 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
833 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
834 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
835 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
836 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
837 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
838 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
839 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
840 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
841 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
847 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
849 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
850 OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
853 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
854 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
857 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
859 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
860 OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
863 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
866 copy_block9(full, src, 16, stride, 9);\
867 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
868 OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
871 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
873 copy_block9(full, src, 16, stride, 9);\
874 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
877 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
880 copy_block9(full, src, 16, stride, 9);\
881 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
882 OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
884 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
889 copy_block9(full, src, 16, stride, 9);\
890 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
891 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
892 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
893 OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
895 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
899 copy_block9(full, src, 16, stride, 9);\
900 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
901 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
902 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
903 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
905 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
910 copy_block9(full, src, 16, stride, 9);\
911 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
912 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
913 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
914 OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
916 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
920 copy_block9(full, src, 16, stride, 9);\
921 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
922 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
923 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
924 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
926 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
931 copy_block9(full, src, 16, stride, 9);\
932 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
933 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
934 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
935 OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
937 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
941 copy_block9(full, src, 16, stride, 9);\
942 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
943 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
944 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
945 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
947 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
952 copy_block9(full, src, 16, stride, 9);\
953 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
954 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
955 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
956 OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
958 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
962 copy_block9(full, src, 16, stride, 9);\
963 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
964 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
965 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
966 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
968 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
971 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
972 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
973 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
975 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
978 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
979 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
980 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
982 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
987 copy_block9(full, src, 16, stride, 9);\
988 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
989 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
990 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
991 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
993 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
996 copy_block9(full, src, 16, stride, 9);\
997 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
998 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
999 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1001 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1002 uint8_t full[16*9];\
1005 uint8_t halfHV[64];\
1006 copy_block9(full, src, 16, stride, 9);\
1007 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1008 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1009 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1010 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1012 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1013 uint8_t full[16*9];\
1015 copy_block9(full, src, 16, stride, 9);\
1016 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1017 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1018 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1020 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1022 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1023 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1026 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1028 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1029 OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
1032 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
1033 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
1036 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1038 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1039 OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
1042 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1043 uint8_t full[24*17];\
1045 copy_block17(full, src, 24, stride, 17);\
1046 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1047 OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
1050 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1051 uint8_t full[24*17];\
1052 copy_block17(full, src, 24, stride, 17);\
1053 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
1056 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1057 uint8_t full[24*17];\
1059 copy_block17(full, src, 24, stride, 17);\
1060 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1061 OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
1063 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1064 uint8_t full[24*17];\
1065 uint8_t halfH[272];\
1066 uint8_t halfV[256];\
1067 uint8_t halfHV[256];\
1068 copy_block17(full, src, 24, stride, 17);\
1069 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1070 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1071 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1072 OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1074 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1075 uint8_t full[24*17];\
1076 uint8_t halfH[272];\
1077 uint8_t halfHV[256];\
1078 copy_block17(full, src, 24, stride, 17);\
1079 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1080 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1081 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1082 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1084 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1085 uint8_t full[24*17];\
1086 uint8_t halfH[272];\
1087 uint8_t halfV[256];\
1088 uint8_t halfHV[256];\
1089 copy_block17(full, src, 24, stride, 17);\
1090 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1091 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1092 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1093 OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1095 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1096 uint8_t full[24*17];\
1097 uint8_t halfH[272];\
1098 uint8_t halfHV[256];\
1099 copy_block17(full, src, 24, stride, 17);\
1100 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1101 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1102 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1103 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1105 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1106 uint8_t full[24*17];\
1107 uint8_t halfH[272];\
1108 uint8_t halfV[256];\
1109 uint8_t halfHV[256];\
1110 copy_block17(full, src, 24, stride, 17);\
1111 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1112 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1113 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1114 OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1116 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1117 uint8_t full[24*17];\
1118 uint8_t halfH[272];\
1119 uint8_t halfHV[256];\
1120 copy_block17(full, src, 24, stride, 17);\
1121 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1122 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1123 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1124 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1126 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1127 uint8_t full[24*17];\
1128 uint8_t halfH[272];\
1129 uint8_t halfV[256];\
1130 uint8_t halfHV[256];\
1131 copy_block17(full, src, 24, stride, 17);\
1132 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
1133 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1134 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1135 OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1137 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1138 uint8_t full[24*17];\
1139 uint8_t halfH[272];\
1140 uint8_t halfHV[256];\
1141 copy_block17(full, src, 24, stride, 17);\
1142 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1143 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1144 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1145 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1147 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1148 uint8_t halfH[272];\
1149 uint8_t halfHV[256];\
1150 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1151 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1152 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1154 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1155 uint8_t halfH[272];\
1156 uint8_t halfHV[256];\
1157 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1158 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1159 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1161 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1162 uint8_t full[24*17];\
1163 uint8_t halfH[272];\
1164 uint8_t halfV[256];\
1165 uint8_t halfHV[256];\
1166 copy_block17(full, src, 24, stride, 17);\
1167 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1168 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1169 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1170 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1172 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1173 uint8_t full[24*17];\
1174 uint8_t halfH[272];\
1175 copy_block17(full, src, 24, stride, 17);\
1176 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1177 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1178 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1180 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1181 uint8_t full[24*17];\
1182 uint8_t halfH[272];\
1183 uint8_t halfV[256];\
1184 uint8_t halfHV[256];\
1185 copy_block17(full, src, 24, stride, 17);\
1186 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1187 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1188 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1189 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1191 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1192 uint8_t full[24*17];\
1193 uint8_t halfH[272];\
1194 copy_block17(full, src, 24, stride, 17);\
1195 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1196 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1197 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1199 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1200 uint8_t halfH[272];\
1201 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1202 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1205 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
1206 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
1207 #define op_put(a, b) a = cm[((b) + 16)>>5]
1208 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
1212 QPEL_MC(0, avg_ , _ ,
op_avg)
1215 #undef op_avg_no_rnd
1217 #undef op_put_no_rnd
1219 #define put_qpel8_mc00_c ff_put_pixels8x8_c
1220 #define avg_qpel8_mc00_c ff_avg_pixels8x8_c
1221 #define put_qpel16_mc00_c ff_put_pixels16x16_c
1222 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
1223 #define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
1224 #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_8_c
1231 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
1232 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
1233 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
1234 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
1235 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
1236 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
1237 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
1238 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
1244 #if CONFIG_RV40_DECODER
1246 put_pixels16_xy2_8_c(dst, src, stride, 16);
1249 avg_pixels16_xy2_8_c(dst, src, stride, 16);
1252 put_pixels8_xy2_8_c(dst, src, stride, 8);
1255 avg_pixels8_xy2_8_c(dst, src, stride, 8);
1264 const int src_1= src[ -srcStride];
1265 const int src0 = src[0 ];
1266 const int src1 = src[ srcStride];
1267 const int src2 = src[2*srcStride];
1268 const int src3 = src[3*srcStride];
1269 const int src4 = src[4*srcStride];
1270 const int src5 = src[5*srcStride];
1271 const int src6 = src[6*srcStride];
1272 const int src7 = src[7*srcStride];
1273 const int src8 = src[8*srcStride];
1274 const int src9 = src[9*srcStride];
1275 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
1276 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
1277 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
1278 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
1279 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
1280 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
1281 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
1282 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
1291 put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
1301 put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
1315 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1324 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1343 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1345 if (d<-2*strength) d1= 0;
1346 else if(d<- strength) d1=-2*strength - d;
1347 else if(d< strength) d1= d;
1348 else if(d< 2*strength) d1= 2*strength - d;
1353 if(p1&256) p1= ~(p1>>31);
1354 if(p2&256) p2= ~(p2>>31);
1361 d2= av_clip((p0-p3)/4, -ad1, ad1);
1363 src[x-2*
stride] = p0 - d2;
1364 src[x+
stride] = p3 + d2;
1376 int p0= src[y*stride-2];
1377 int p1= src[y*stride-1];
1378 int p2= src[y*stride+0];
1379 int p3= src[y*stride+1];
1380 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1382 if (d<-2*strength) d1= 0;
1383 else if(d<- strength) d1=-2*strength - d;
1384 else if(d< strength) d1= d;
1385 else if(d< 2*strength) d1= 2*strength - d;
1390 if(p1&256) p1= ~(p1>>31);
1391 if(p2&256) p2= ~(p2>>31);
1393 src[y*stride-1] = p1;
1394 src[y*stride+0] = p2;
1398 d2= av_clip((p0-p3)/4, -ad1, ad1);
1400 src[y*stride-2] = p0 - d2;
1401 src[y*stride+1] = p3 + d2;
1411 temp[x ] = 4*src[x ];
1412 temp[x + 7*8] = 4*src[x + 7*
stride];
1416 xy = y * stride + x;
1418 temp[yz] = src[xy -
stride] + 2*src[xy] + src[xy +
stride];
1423 src[ y*
stride] = (temp[ y*8] + 2)>>2;
1424 src[7+y*
stride] = (temp[7+y*8] + 2)>>2;
1426 xy = y * stride + x;
1428 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
1439 s += abs(pix1[0] - pix2[0]);
1440 s += abs(pix1[1] - pix2[1]);
1441 s += abs(pix1[2] - pix2[2]);
1442 s += abs(pix1[3] - pix2[3]);
1443 s += abs(pix1[4] - pix2[4]);
1444 s += abs(pix1[5] - pix2[5]);
1445 s += abs(pix1[6] - pix2[6]);
1446 s += abs(pix1[7] - pix2[7]);
1447 s += abs(pix1[8] - pix2[8]);
1448 s += abs(pix1[9] - pix2[9]);
1449 s += abs(pix1[10] - pix2[10]);
1450 s += abs(pix1[11] - pix2[11]);
1451 s += abs(pix1[12] - pix2[12]);
1452 s += abs(pix1[13] - pix2[13]);
1453 s += abs(pix1[14] - pix2[14]);
1454 s += abs(pix1[15] - pix2[15]);
1467 s += abs(pix1[0] -
avg2(pix2[0], pix2[1]));
1468 s += abs(pix1[1] -
avg2(pix2[1], pix2[2]));
1469 s += abs(pix1[2] -
avg2(pix2[2], pix2[3]));
1470 s += abs(pix1[3] -
avg2(pix2[3], pix2[4]));
1471 s += abs(pix1[4] -
avg2(pix2[4], pix2[5]));
1472 s += abs(pix1[5] -
avg2(pix2[5], pix2[6]));
1473 s += abs(pix1[6] -
avg2(pix2[6], pix2[7]));
1474 s += abs(pix1[7] -
avg2(pix2[7], pix2[8]));
1475 s += abs(pix1[8] -
avg2(pix2[8], pix2[9]));
1476 s += abs(pix1[9] -
avg2(pix2[9], pix2[10]));
1477 s += abs(pix1[10] -
avg2(pix2[10], pix2[11]));
1478 s += abs(pix1[11] -
avg2(pix2[11], pix2[12]));
1479 s += abs(pix1[12] -
avg2(pix2[12], pix2[13]));
1480 s += abs(pix1[13] -
avg2(pix2[13], pix2[14]));
1481 s += abs(pix1[14] -
avg2(pix2[14], pix2[15]));
1482 s += abs(pix1[15] -
avg2(pix2[15], pix2[16]));
1492 uint8_t *pix3 = pix2 + line_size;
1496 s += abs(pix1[0] -
avg2(pix2[0], pix3[0]));
1497 s += abs(pix1[1] -
avg2(pix2[1], pix3[1]));
1498 s += abs(pix1[2] -
avg2(pix2[2], pix3[2]));
1499 s += abs(pix1[3] -
avg2(pix2[3], pix3[3]));
1500 s += abs(pix1[4] -
avg2(pix2[4], pix3[4]));
1501 s += abs(pix1[5] -
avg2(pix2[5], pix3[5]));
1502 s += abs(pix1[6] -
avg2(pix2[6], pix3[6]));
1503 s += abs(pix1[7] -
avg2(pix2[7], pix3[7]));
1504 s += abs(pix1[8] -
avg2(pix2[8], pix3[8]));
1505 s += abs(pix1[9] -
avg2(pix2[9], pix3[9]));
1506 s += abs(pix1[10] -
avg2(pix2[10], pix3[10]));
1507 s += abs(pix1[11] -
avg2(pix2[11], pix3[11]));
1508 s += abs(pix1[12] -
avg2(pix2[12], pix3[12]));
1509 s += abs(pix1[13] -
avg2(pix2[13], pix3[13]));
1510 s += abs(pix1[14] -
avg2(pix2[14], pix3[14]));
1511 s += abs(pix1[15] -
avg2(pix2[15], pix3[15]));
1522 uint8_t *pix3 = pix2 + line_size;
1526 s += abs(pix1[0] -
avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1527 s += abs(pix1[1] -
avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1528 s += abs(pix1[2] -
avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1529 s += abs(pix1[3] -
avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1530 s += abs(pix1[4] -
avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1531 s += abs(pix1[5] -
avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1532 s += abs(pix1[6] -
avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1533 s += abs(pix1[7] -
avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1534 s += abs(pix1[8] -
avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
1535 s += abs(pix1[9] -
avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
1536 s += abs(pix1[10] -
avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
1537 s += abs(pix1[11] -
avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
1538 s += abs(pix1[12] -
avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
1539 s += abs(pix1[13] -
avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
1540 s += abs(pix1[14] -
avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
1541 s += abs(pix1[15] -
avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
1555 s += abs(pix1[0] - pix2[0]);
1556 s += abs(pix1[1] - pix2[1]);
1557 s += abs(pix1[2] - pix2[2]);
1558 s += abs(pix1[3] - pix2[3]);
1559 s += abs(pix1[4] - pix2[4]);
1560 s += abs(pix1[5] - pix2[5]);
1561 s += abs(pix1[6] - pix2[6]);
1562 s += abs(pix1[7] - pix2[7]);
1575 s += abs(pix1[0] -
avg2(pix2[0], pix2[1]));
1576 s += abs(pix1[1] -
avg2(pix2[1], pix2[2]));
1577 s += abs(pix1[2] -
avg2(pix2[2], pix2[3]));
1578 s += abs(pix1[3] -
avg2(pix2[3], pix2[4]));
1579 s += abs(pix1[4] -
avg2(pix2[4], pix2[5]));
1580 s += abs(pix1[5] -
avg2(pix2[5], pix2[6]));
1581 s += abs(pix1[6] -
avg2(pix2[6], pix2[7]));
1582 s += abs(pix1[7] -
avg2(pix2[7], pix2[8]));
1592 uint8_t *pix3 = pix2 + line_size;
1596 s += abs(pix1[0] -
avg2(pix2[0], pix3[0]));
1597 s += abs(pix1[1] -
avg2(pix2[1], pix3[1]));
1598 s += abs(pix1[2] -
avg2(pix2[2], pix3[2]));
1599 s += abs(pix1[3] -
avg2(pix2[3], pix3[3]));
1600 s += abs(pix1[4] -
avg2(pix2[4], pix3[4]));
1601 s += abs(pix1[5] -
avg2(pix2[5], pix3[5]));
1602 s += abs(pix1[6] -
avg2(pix2[6], pix3[6]));
1603 s += abs(pix1[7] -
avg2(pix2[7], pix3[7]));
1614 uint8_t *pix3 = pix2 + line_size;
1618 s += abs(pix1[0] -
avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1619 s += abs(pix1[1] -
avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1620 s += abs(pix1[2] -
avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1621 s += abs(pix1[3] -
avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1622 s += abs(pix1[4] -
avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1623 s += abs(pix1[5] -
avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1624 s += abs(pix1[6] -
avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1625 s += abs(pix1[7] -
avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1640 for(x=0; x<16; x++){
1641 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1644 for(x=0; x<15; x++){
1645 score2+=
FFABS( s1[x ] - s1[x +stride]
1646 - s1[x+1] + s1[x+1+stride])
1647 -
FFABS( s2[x ] - s2[x +stride]
1648 - s2[x+1] + s2[x+1+stride]);
1656 else return score1 +
FFABS(score2)*8;
1667 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1671 score2+=
FFABS( s1[x ] - s1[x +stride]
1672 - s1[x+1] + s1[x+1+stride])
1673 -
FFABS( s2[x ] - s2[x +stride]
1674 - s2[x+1] + s2[x+1+stride]);
1682 else return score1 +
FFABS(score2)*8;
1689 for(i=0; i<8*8; i++){
1693 assert(-512<b && b<512);
1695 sum += (w*
b)*(w*b)>>4;
1703 for(i=0; i<8*8; i++){
1724 for(i=0; i<=last; i++){
1725 const int j= scantable[i];
1730 for(i=0; i<=last; i++){
1731 const int j= scantable[i];
1732 const int perm_j= permutation[j];
1733 block[perm_j]= temp[j];
1744 memset(cmp, 0,
sizeof(
void*)*6);
1803 for(i=0; i<=w-
sizeof(long); i+=
sizeof(long)){
1804 long a = *(
long*)(src+i);
1805 long b = *(
long*)(dst+i);
1809 dst[i+0] += src[i+0];
1814 #if !HAVE_FAST_UNALIGNED
1815 if((
long)src2 & (
sizeof(
long)-1)){
1816 for(i=0; i+7<w; i+=8){
1817 dst[i+0] = src1[i+0]-src2[i+0];
1818 dst[i+1] = src1[i+1]-src2[i+1];
1819 dst[i+2] = src1[i+2]-src2[i+2];
1820 dst[i+3] = src1[i+3]-src2[i+3];
1821 dst[i+4] = src1[i+4]-src2[i+4];
1822 dst[i+5] = src1[i+5]-src2[i+5];
1823 dst[i+6] = src1[i+6]-src2[i+6];
1824 dst[i+7] = src1[i+7]-src2[i+7];
1828 for(i=0; i<=w-
sizeof(long); i+=
sizeof(long)){
1829 long a = *(
long*)(src1+i);
1830 long b = *(
long*)(src2+i);
1834 dst[i+0] = src1[i+0]-src2[i+0];
1845 l=
mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
1862 const int pred=
mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
1875 for(i=0; i<w-1; i++){
1932 #define BUTTERFLY2(o1,o2,i1,i2) \
1936 #define BUTTERFLY1(x,y) \
1945 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
1956 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
1957 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
1958 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
1959 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
2001 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
2002 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
2003 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
2004 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
2035 sum -=
FFABS(temp[8*0] + temp[8*4]);
2053 const int s07 = SRC(0) + SRC(7);\
2054 const int s16 = SRC(1) + SRC(6);\
2055 const int s25 = SRC(2) + SRC(5);\
2056 const int s34 = SRC(3) + SRC(4);\
2057 const int a0 = s07 + s34;\
2058 const int a1 = s16 + s25;\
2059 const int a2 = s07 - s34;\
2060 const int a3 = s16 - s25;\
2061 const int d07 = SRC(0) - SRC(7);\
2062 const int d16 = SRC(1) - SRC(6);\
2063 const int d25 = SRC(2) - SRC(5);\
2064 const int d34 = SRC(3) - SRC(4);\
2065 const int a4 = d16 + d25 + (d07 + (d07>>1));\
2066 const int a5 = d07 - d34 - (d25 + (d25>>1));\
2067 const int a6 = d07 + d34 - (d16 + (d16>>1));\
2068 const int a7 = d16 - d25 + (d34 + (d34>>1));\
2070 DST(1, a4 + (a7>>2)) ;\
2071 DST(2, a2 + (a3>>1)) ;\
2072 DST(3, a5 + (a6>>2)) ;\
2074 DST(5, a6 - (a5>>2)) ;\
2075 DST(6, (a2>>1) - a3 ) ;\
2076 DST(7, (a4>>2) - a7 ) ;\
2087 #define SRC(x) dct[i][x]
2088 #define DST(x,v) dct[i][x]= v
2089 for( i = 0; i < 8; i++ )
2094 #define
SRC(x) dct[x][i]
2095 #define DST(x,v) sum += FFABS(v)
2096 for( i = 0; i < 8; i++ )
2123 DCTELEM *
const bak = temp+64;
2131 memcpy(bak, temp, 64*
sizeof(
DCTELEM));
2138 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
2178 for(i=start_i; i<last; i++){
2179 int j= scantable[i];
2184 if((level&(~127)) == 0){
2194 level= temp[i] + 64;
2198 if((level&(~127)) == 0){
2214 distortion= s->
dsp.
sse[1](
NULL, lsrc2, lsrc1, 8, 8);
2216 return distortion + ((bits*s->
qscale*s->
qscale*109 + 64)>>7);
2249 for(i=start_i; i<last; i++){
2250 int j= scantable[i];
2255 if((level&(~127)) == 0){
2265 level= temp[i] + 64;
2269 if((level&(~127)) == 0){
2278 #define VSAD_INTRA(size) \
2279 static int vsad_intra##size##_c( void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2283 for(y=1; y<h; y++){ \
2284 for(x=0; x<size; x+=4){ \
2285 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \
2286 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \
2301 for(x=0; x<16; x++){
2302 score+=
FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2311 #define SQ(a) ((a)*(a))
2312 #define VSSE_INTRA(size) \
2313 static int vsse_intra##size##_c( void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2317 for(y=1; y<h; y++){ \
2318 for(x=0; x<size; x+=4){ \
2319 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \
2320 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \
2335 for(x=0; x<16; x++){
2336 score+=
SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2349 for(i=0; i<
size; i++)
2350 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
2368 for(i=0; i<
len; i++)
2369 dst[i] = src0[i] * src1[-i];
2374 for(i=0; i<
len; i++)
2375 dst[i] = src0[i] * src1[i] + src2[i];
2379 const float *src1,
const float *win,
int len)
2385 for(i=-len, j=len-1; i<0; i++, j--) {
2390 dst[i] = s0*wj - s1*wi;
2391 dst[j] = s0*wi + s1*wj;
2399 for (i = 0; i <
len; i++) {
2400 float t = v1[i] - v2[i];
2407 const float *src1,
int len)
2410 for (i = 0; i <
len; i++) {
2413 dst[2*i ] = f1 + f2;
2414 dst[2*i + 1] = f1 - f2;
2423 for (i = 0; i <
len; i++)
2430 uint32_t maxi, uint32_t maxisign)
2433 if(a > mini)
return mini;
2434 else if((a^(1
U<<31)) > maxisign)
return maxi;
2440 uint32_t mini = *(uint32_t*)min;
2441 uint32_t maxi = *(uint32_t*)max;
2442 uint32_t maxisign = maxi ^ (1
U<<31);
2443 uint32_t *dsti = (uint32_t*)dst;
2444 const uint32_t *srci = (
const uint32_t*)src;
2445 for(i=0; i<
len; i+=8) {
2446 dsti[i + 0] =
clipf_c_one(srci[i + 0], mini, maxi, maxisign);
2447 dsti[i + 1] =
clipf_c_one(srci[i + 1], mini, maxi, maxisign);
2448 dsti[i + 2] =
clipf_c_one(srci[i + 2], mini, maxi, maxisign);
2449 dsti[i + 3] =
clipf_c_one(srci[i + 3], mini, maxi, maxisign);
2450 dsti[i + 4] =
clipf_c_one(srci[i + 4], mini, maxi, maxisign);
2451 dsti[i + 5] =
clipf_c_one(srci[i + 5], mini, maxi, maxisign);
2452 dsti[i + 6] =
clipf_c_one(srci[i + 6], mini, maxi, maxisign);
2453 dsti[i + 7] =
clipf_c_one(srci[i + 7], mini, maxi, maxisign);
2458 if(min < 0 && max > 0) {
2461 for(i=0; i <
len; i+=8) {
2462 dst[i ] = av_clipf(src[i ], min, max);
2463 dst[i + 1] = av_clipf(src[i + 1], min, max);
2464 dst[i + 2] = av_clipf(src[i + 2], min, max);
2465 dst[i + 3] = av_clipf(src[i + 3], min, max);
2466 dst[i + 4] = av_clipf(src[i + 4], min, max);
2467 dst[i + 5] = av_clipf(src[i + 5], min, max);
2468 dst[i + 6] = av_clipf(src[i + 6], min, max);
2469 dst[i + 7] = av_clipf(src[i + 7], min, max);
2479 res += *v1++ * *v2++;
2489 *v1++ += mul * *v3++;
2495 const int16_t *window,
unsigned int len)
2498 int len2 = len >> 1;
2500 for (i = 0; i < len2; i++) {
2501 int16_t w = window[i];
2502 output[i] = (
MUL16(input[i], w) + (1 << 14)) >> 15;
2503 output[len-i-1] = (
MUL16(input[len-i-1], w) + (1 << 14)) >> 15;
2511 *dst++ = av_clip(*src++, min, max);
2512 *dst++ = av_clip(*src++, min, max);
2513 *dst++ = av_clip(*src++, min, max);
2514 *dst++ = av_clip(*src++, min, max);
2515 *dst++ = av_clip(*src++, min, max);
2516 *dst++ = av_clip(*src++, min, max);
2517 *dst++ = av_clip(*src++, min, max);
2518 *dst++ = av_clip(*src++, min, max);
2537 a1 =
W1*b[1]+
W7*b[7];
2538 a7 =
W7*b[1]-
W1*b[7];
2539 a5 =
W5*b[5]+
W3*b[3];
2540 a3 =
W3*b[5]-
W5*b[3];
2541 a2 =
W2*b[2]+
W6*b[6];
2542 a6 =
W6*b[2]-
W2*b[6];
2543 a0 =
W0*b[0]+
W0*b[4];
2544 a4 =
W0*b[0]-
W0*b[4];
2546 s1 = (181*(a1-a5+a7-
a3)+128)>>8;
2547 s2 = (181*(a1-a5-a7+
a3)+128)>>8;
2549 b[0] = (a0+a2+a1+a5 + (1<<7))>>8;
2550 b[1] = (a4+a6 +s1 + (1<<7))>>8;
2551 b[2] = (a4-a6 +s2 + (1<<7))>>8;
2552 b[3] = (a0-a2+a7+a3 + (1<<7))>>8;
2553 b[4] = (a0-a2-a7-a3 + (1<<7))>>8;
2554 b[5] = (a4-a6 -s2 + (1<<7))>>8;
2555 b[6] = (a4+a6 -s1 + (1<<7))>>8;
2556 b[7] = (a0+a2-a1-a5 + (1<<7))>>8;
2563 a1 = (
W1*b[8*1]+
W7*b[8*7] + 4)>>3;
2564 a7 = (
W7*b[8*1]-
W1*b[8*7] + 4)>>3;
2565 a5 = (
W5*b[8*5]+
W3*b[8*3] + 4)>>3;
2566 a3 = (
W3*b[8*5]-
W5*b[8*3] + 4)>>3;
2567 a2 = (
W2*b[8*2]+
W6*b[8*6] + 4)>>3;
2568 a6 = (
W6*b[8*2]-
W2*b[8*6] + 4)>>3;
2569 a0 = (
W0*b[8*0]+
W0*b[8*4] )>>3;
2570 a4 = (
W0*b[8*0]-
W0*b[8*4] )>>3;
2572 s1 = (181*(a1-a5+a7-
a3)+128)>>8;
2573 s2 = (181*(a1-a5-a7+
a3)+128)>>8;
2575 b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14;
2576 b[8*1] = (a4+a6 +s1 + (1<<13))>>14;
2577 b[8*2] = (a4-a6 +s2 + (1<<13))>>14;
2578 b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14;
2580 b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14;
2581 b[8*5] = (a4-a6 -s2 + (1<<13))>>14;
2582 b[8*6] = (a4+a6 -s1 + (1<<13))>>14;
2583 b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14;
2629 for(i=0;i<512;i++) {
2637 static int did_fail=0;
2640 if((intptr_t)aligned & 15){
2642 #if HAVE_MMX || HAVE_ALTIVEC
2644 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
2645 "and may be very slow or crash. This is not a bug in libavcodec,\n"
2646 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
2647 "Do not report crashes to Libav developers.\n");
2680 #endif //CONFIG_ENCODERS
2754 #define dspfunc(PFX, IDX, NUM) \
2755 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
2756 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
2757 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
2758 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
2759 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
2760 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
2761 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
2762 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
2763 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
2764 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
2765 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
2766 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
2767 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
2768 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
2769 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
2770 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
2773 dspfunc(put_no_rnd_qpel, 0, 16);
2779 dspfunc(put_no_rnd_qpel, 1, 8);
2795 #define SET_CMP_FUNC(name) \
2796 c->name[0]= name ## 16_c;\
2797 c->name[1]= name ## 8x8_c;
2816 c->
vsad[4]= vsad_intra16_c;
2817 c->
vsad[5]= vsad_intra8_c;
2819 c->
vsse[4]= vsse_intra16_c;
2820 c->
vsse[5]= vsse_intra8_c;
2848 #if CONFIG_VORBIS_DECODER
2873 #define FUNC(f, depth) f ## _ ## depth
2874 #define FUNCC(f, depth) f ## _ ## depth ## _c
2876 #define dspfunc1(PFX, IDX, NUM, depth)\
2877 c->PFX ## _pixels_tab[IDX][0] = FUNCC(PFX ## _pixels ## NUM , depth);\
2878 c->PFX ## _pixels_tab[IDX][1] = FUNCC(PFX ## _pixels ## NUM ## _x2 , depth);\
2879 c->PFX ## _pixels_tab[IDX][2] = FUNCC(PFX ## _pixels ## NUM ## _y2 , depth);\
2880 c->PFX ## _pixels_tab[IDX][3] = FUNCC(PFX ## _pixels ## NUM ## _xy2, depth)
2882 #define dspfunc2(PFX, IDX, NUM, depth)\
2883 c->PFX ## _pixels_tab[IDX][ 0] = FUNCC(PFX ## NUM ## _mc00, depth);\
2884 c->PFX ## _pixels_tab[IDX][ 1] = FUNCC(PFX ## NUM ## _mc10, depth);\
2885 c->PFX ## _pixels_tab[IDX][ 2] = FUNCC(PFX ## NUM ## _mc20, depth);\
2886 c->PFX ## _pixels_tab[IDX][ 3] = FUNCC(PFX ## NUM ## _mc30, depth);\
2887 c->PFX ## _pixels_tab[IDX][ 4] = FUNCC(PFX ## NUM ## _mc01, depth);\
2888 c->PFX ## _pixels_tab[IDX][ 5] = FUNCC(PFX ## NUM ## _mc11, depth);\
2889 c->PFX ## _pixels_tab[IDX][ 6] = FUNCC(PFX ## NUM ## _mc21, depth);\
2890 c->PFX ## _pixels_tab[IDX][ 7] = FUNCC(PFX ## NUM ## _mc31, depth);\
2891 c->PFX ## _pixels_tab[IDX][ 8] = FUNCC(PFX ## NUM ## _mc02, depth);\
2892 c->PFX ## _pixels_tab[IDX][ 9] = FUNCC(PFX ## NUM ## _mc12, depth);\
2893 c->PFX ## _pixels_tab[IDX][10] = FUNCC(PFX ## NUM ## _mc22, depth);\
2894 c->PFX ## _pixels_tab[IDX][11] = FUNCC(PFX ## NUM ## _mc32, depth);\
2895 c->PFX ## _pixels_tab[IDX][12] = FUNCC(PFX ## NUM ## _mc03, depth);\
2896 c->PFX ## _pixels_tab[IDX][13] = FUNCC(PFX ## NUM ## _mc13, depth);\
2897 c->PFX ## _pixels_tab[IDX][14] = FUNCC(PFX ## NUM ## _mc23, depth);\
2898 c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth)
2901 #define BIT_DEPTH_FUNCS(depth, dct)\
2902 c->get_pixels = FUNCC(get_pixels ## dct , depth);\
2903 c->draw_edges = FUNCC(draw_edges , depth);\
2904 c->clear_block = FUNCC(clear_block ## dct , depth);\
2905 c->clear_blocks = FUNCC(clear_blocks ## dct , depth);\
2906 c->add_pixels8 = FUNCC(add_pixels8 ## dct , depth);\
2907 c->add_pixels4 = FUNCC(add_pixels4 ## dct , depth);\
2908 c->put_no_rnd_pixels_l2[0] = FUNCC(put_no_rnd_pixels16_l2, depth);\
2909 c->put_no_rnd_pixels_l2[1] = FUNCC(put_no_rnd_pixels8_l2 , depth);\
2911 c->put_h264_chroma_pixels_tab[0] = FUNCC(put_h264_chroma_mc8 , depth);\
2912 c->put_h264_chroma_pixels_tab[1] = FUNCC(put_h264_chroma_mc4 , depth);\
2913 c->put_h264_chroma_pixels_tab[2] = FUNCC(put_h264_chroma_mc2 , depth);\
2914 c->avg_h264_chroma_pixels_tab[0] = FUNCC(avg_h264_chroma_mc8 , depth);\
2915 c->avg_h264_chroma_pixels_tab[1] = FUNCC(avg_h264_chroma_mc4 , depth);\
2916 c->avg_h264_chroma_pixels_tab[2] = FUNCC(avg_h264_chroma_mc2 , depth);\
2918 dspfunc1(put , 0, 16, depth);\
2919 dspfunc1(put , 1, 8, depth);\
2920 dspfunc1(put , 2, 4, depth);\
2921 dspfunc1(put , 3, 2, depth);\
2922 dspfunc1(put_no_rnd, 0, 16, depth);\
2923 dspfunc1(put_no_rnd, 1, 8, depth);\
2924 dspfunc1(avg , 0, 16, depth);\
2925 dspfunc1(avg , 1, 8, depth);\
2926 dspfunc1(avg , 2, 4, depth);\
2927 dspfunc1(avg , 3, 2, depth);\
2928 dspfunc1(avg_no_rnd, 0, 16, depth);\
2929 dspfunc1(avg_no_rnd, 1, 8, depth);\
2931 dspfunc2(put_h264_qpel, 0, 16, depth);\
2932 dspfunc2(put_h264_qpel, 1, 8, depth);\
2933 dspfunc2(put_h264_qpel, 2, 4, depth);\
2934 dspfunc2(put_h264_qpel, 3, 2, depth);\
2935 dspfunc2(avg_h264_qpel, 0, 16, depth);\
2936 dspfunc2(avg_h264_qpel, 1, 8, depth);\
2937 dspfunc2(avg_h264_qpel, 2, 4, depth);
2968 for (i = 0; i < 4; i++) {
2969 for (j = 0; j < 16; j++) {