dsputil.c
Go to the documentation of this file.
1 /*
2  * DSP utils
3  * Copyright (c) 2000, 2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
30 #include "libavutil/imgutils.h"
31 #include "avcodec.h"
32 #include "dsputil.h"
33 #include "simple_idct.h"
34 #include "faandct.h"
35 #include "faanidct.h"
36 #include "mathops.h"
37 #include "mpegvideo.h"
38 #include "config.h"
39 #include "vorbis.h"
40 
41 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
42 uint32_t ff_squareTbl[512] = {0, };
43 
44 #define BIT_DEPTH 9
45 #include "dsputil_template.c"
46 #undef BIT_DEPTH
47 
48 #define BIT_DEPTH 10
49 #include "dsputil_template.c"
50 #undef BIT_DEPTH
51 
52 #define BIT_DEPTH 8
53 #include "dsputil_template.c"
54 
55 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
56 #define pb_7f (~0UL/255 * 0x7f)
57 #define pb_80 (~0UL/255 * 0x80)
58 
60  0, 1, 8, 16, 9, 2, 3, 10,
61  17, 24, 32, 25, 18, 11, 4, 5,
62  12, 19, 26, 33, 40, 48, 41, 34,
63  27, 20, 13, 6, 7, 14, 21, 28,
64  35, 42, 49, 56, 57, 50, 43, 36,
65  29, 22, 15, 23, 30, 37, 44, 51,
66  58, 59, 52, 45, 38, 31, 39, 46,
67  53, 60, 61, 54, 47, 55, 62, 63
68 };
69 
70 /* Specific zigzag scan for 248 idct. NOTE that unlike the
71  specification, we interleave the fields */
73  0, 8, 1, 9, 16, 24, 2, 10,
74  17, 25, 32, 40, 48, 56, 33, 41,
75  18, 26, 3, 11, 4, 12, 19, 27,
76  34, 42, 49, 57, 50, 58, 35, 43,
77  20, 28, 5, 13, 6, 14, 21, 29,
78  36, 44, 51, 59, 52, 60, 37, 45,
79  22, 30, 7, 15, 23, 31, 38, 46,
80  53, 61, 54, 62, 39, 47, 55, 63,
81 };
82 
83 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
85 
87  0, 1, 2, 3, 8, 9, 16, 17,
88  10, 11, 4, 5, 6, 7, 15, 14,
89  13, 12, 19, 18, 24, 25, 32, 33,
90  26, 27, 20, 21, 22, 23, 28, 29,
91  30, 31, 34, 35, 40, 41, 48, 49,
92  42, 43, 36, 37, 38, 39, 44, 45,
93  46, 47, 50, 51, 56, 57, 58, 59,
94  52, 53, 54, 55, 60, 61, 62, 63,
95 };
96 
98  0, 8, 16, 24, 1, 9, 2, 10,
99  17, 25, 32, 40, 48, 56, 57, 49,
100  41, 33, 26, 18, 3, 11, 4, 12,
101  19, 27, 34, 42, 50, 58, 35, 43,
102  51, 59, 20, 28, 5, 13, 6, 14,
103  21, 29, 36, 44, 52, 60, 37, 45,
104  53, 61, 22, 30, 7, 15, 23, 31,
105  38, 46, 54, 62, 39, 47, 55, 63,
106 };
107 
108 /* Input permutation for the simple_idct_mmx */
109 static const uint8_t simple_mmx_permutation[64]={
110  0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
111  0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
112  0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
113  0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
114  0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
115  0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
116  0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
117  0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
118 };
119 
120 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
121 
122 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
123  int i;
124  int end;
125 
126  st->scantable= src_scantable;
127 
128  for(i=0; i<64; i++){
129  int j;
130  j = src_scantable[i];
131  st->permutated[i] = permutation[j];
132  }
133 
134  end=-1;
135  for(i=0; i<64; i++){
136  int j;
137  j = st->permutated[i];
138  if(j>end) end=j;
139  st->raster_end[i]= end;
140  }
141 }
142 
143 void ff_init_scantable_permutation(uint8_t *idct_permutation,
144  int idct_permutation_type)
145 {
146  int i;
147 
148  switch(idct_permutation_type){
149  case FF_NO_IDCT_PERM:
150  for(i=0; i<64; i++)
151  idct_permutation[i]= i;
152  break;
154  for(i=0; i<64; i++)
155  idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
156  break;
157  case FF_SIMPLE_IDCT_PERM:
158  for(i=0; i<64; i++)
159  idct_permutation[i]= simple_mmx_permutation[i];
160  break;
162  for(i=0; i<64; i++)
163  idct_permutation[i]= ((i&7)<<3) | (i>>3);
164  break;
166  for(i=0; i<64; i++)
167  idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
168  break;
169  case FF_SSE2_IDCT_PERM:
170  for(i=0; i<64; i++)
171  idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
172  break;
173  default:
174  av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
175  }
176 }
177 
178 static int pix_sum_c(uint8_t * pix, int line_size)
179 {
180  int s, i, j;
181 
182  s = 0;
183  for (i = 0; i < 16; i++) {
184  for (j = 0; j < 16; j += 8) {
185  s += pix[0];
186  s += pix[1];
187  s += pix[2];
188  s += pix[3];
189  s += pix[4];
190  s += pix[5];
191  s += pix[6];
192  s += pix[7];
193  pix += 8;
194  }
195  pix += line_size - 16;
196  }
197  return s;
198 }
199 
200 static int pix_norm1_c(uint8_t * pix, int line_size)
201 {
202  int s, i, j;
203  uint32_t *sq = ff_squareTbl + 256;
204 
205  s = 0;
206  for (i = 0; i < 16; i++) {
207  for (j = 0; j < 16; j += 8) {
208 #if 0
209  s += sq[pix[0]];
210  s += sq[pix[1]];
211  s += sq[pix[2]];
212  s += sq[pix[3]];
213  s += sq[pix[4]];
214  s += sq[pix[5]];
215  s += sq[pix[6]];
216  s += sq[pix[7]];
217 #else
218 #if HAVE_FAST_64BIT
219  register uint64_t x=*(uint64_t*)pix;
220  s += sq[x&0xff];
221  s += sq[(x>>8)&0xff];
222  s += sq[(x>>16)&0xff];
223  s += sq[(x>>24)&0xff];
224  s += sq[(x>>32)&0xff];
225  s += sq[(x>>40)&0xff];
226  s += sq[(x>>48)&0xff];
227  s += sq[(x>>56)&0xff];
228 #else
229  register uint32_t x=*(uint32_t*)pix;
230  s += sq[x&0xff];
231  s += sq[(x>>8)&0xff];
232  s += sq[(x>>16)&0xff];
233  s += sq[(x>>24)&0xff];
234  x=*(uint32_t*)(pix+4);
235  s += sq[x&0xff];
236  s += sq[(x>>8)&0xff];
237  s += sq[(x>>16)&0xff];
238  s += sq[(x>>24)&0xff];
239 #endif
240 #endif
241  pix += 8;
242  }
243  pix += line_size - 16;
244  }
245  return s;
246 }
247 
248 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
249  int i;
250 
251  for(i=0; i+8<=w; i+=8){
252  dst[i+0]= av_bswap32(src[i+0]);
253  dst[i+1]= av_bswap32(src[i+1]);
254  dst[i+2]= av_bswap32(src[i+2]);
255  dst[i+3]= av_bswap32(src[i+3]);
256  dst[i+4]= av_bswap32(src[i+4]);
257  dst[i+5]= av_bswap32(src[i+5]);
258  dst[i+6]= av_bswap32(src[i+6]);
259  dst[i+7]= av_bswap32(src[i+7]);
260  }
261  for(;i<w; i++){
262  dst[i+0]= av_bswap32(src[i+0]);
263  }
264 }
265 
266 static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
267 {
268  while (len--)
269  *dst++ = av_bswap16(*src++);
270 }
271 
272 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
273 {
274  int s, i;
275  uint32_t *sq = ff_squareTbl + 256;
276 
277  s = 0;
278  for (i = 0; i < h; i++) {
279  s += sq[pix1[0] - pix2[0]];
280  s += sq[pix1[1] - pix2[1]];
281  s += sq[pix1[2] - pix2[2]];
282  s += sq[pix1[3] - pix2[3]];
283  pix1 += line_size;
284  pix2 += line_size;
285  }
286  return s;
287 }
288 
289 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
290 {
291  int s, i;
292  uint32_t *sq = ff_squareTbl + 256;
293 
294  s = 0;
295  for (i = 0; i < h; i++) {
296  s += sq[pix1[0] - pix2[0]];
297  s += sq[pix1[1] - pix2[1]];
298  s += sq[pix1[2] - pix2[2]];
299  s += sq[pix1[3] - pix2[3]];
300  s += sq[pix1[4] - pix2[4]];
301  s += sq[pix1[5] - pix2[5]];
302  s += sq[pix1[6] - pix2[6]];
303  s += sq[pix1[7] - pix2[7]];
304  pix1 += line_size;
305  pix2 += line_size;
306  }
307  return s;
308 }
309 
310 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
311 {
312  int s, i;
313  uint32_t *sq = ff_squareTbl + 256;
314 
315  s = 0;
316  for (i = 0; i < h; i++) {
317  s += sq[pix1[ 0] - pix2[ 0]];
318  s += sq[pix1[ 1] - pix2[ 1]];
319  s += sq[pix1[ 2] - pix2[ 2]];
320  s += sq[pix1[ 3] - pix2[ 3]];
321  s += sq[pix1[ 4] - pix2[ 4]];
322  s += sq[pix1[ 5] - pix2[ 5]];
323  s += sq[pix1[ 6] - pix2[ 6]];
324  s += sq[pix1[ 7] - pix2[ 7]];
325  s += sq[pix1[ 8] - pix2[ 8]];
326  s += sq[pix1[ 9] - pix2[ 9]];
327  s += sq[pix1[10] - pix2[10]];
328  s += sq[pix1[11] - pix2[11]];
329  s += sq[pix1[12] - pix2[12]];
330  s += sq[pix1[13] - pix2[13]];
331  s += sq[pix1[14] - pix2[14]];
332  s += sq[pix1[15] - pix2[15]];
333 
334  pix1 += line_size;
335  pix2 += line_size;
336  }
337  return s;
338 }
339 
341  const uint8_t *s2, int stride){
342  int i;
343 
344  /* read the pixels */
345  for(i=0;i<8;i++) {
346  block[0] = s1[0] - s2[0];
347  block[1] = s1[1] - s2[1];
348  block[2] = s1[2] - s2[2];
349  block[3] = s1[3] - s2[3];
350  block[4] = s1[4] - s2[4];
351  block[5] = s1[5] - s2[5];
352  block[6] = s1[6] - s2[6];
353  block[7] = s1[7] - s2[7];
354  s1 += stride;
355  s2 += stride;
356  block += 8;
357  }
358 }
359 
360 
361 static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
362  int line_size)
363 {
364  int i;
365 
366  /* read the pixels */
367  for(i=0;i<8;i++) {
368  pixels[0] = av_clip_uint8(block[0]);
369  pixels[1] = av_clip_uint8(block[1]);
370  pixels[2] = av_clip_uint8(block[2]);
371  pixels[3] = av_clip_uint8(block[3]);
372  pixels[4] = av_clip_uint8(block[4]);
373  pixels[5] = av_clip_uint8(block[5]);
374  pixels[6] = av_clip_uint8(block[6]);
375  pixels[7] = av_clip_uint8(block[7]);
376 
377  pixels += line_size;
378  block += 8;
379  }
380 }
381 
383  uint8_t *restrict pixels,
384  int line_size)
385 {
386  int i, j;
387 
388  for (i = 0; i < 8; i++) {
389  for (j = 0; j < 8; j++) {
390  if (*block < -128)
391  *pixels = 0;
392  else if (*block > 127)
393  *pixels = 255;
394  else
395  *pixels = (uint8_t)(*block + 128);
396  block++;
397  pixels++;
398  }
399  pixels += (line_size - 8);
400  }
401 }
402 
403 static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
404  int line_size)
405 {
406  int i;
407 
408  /* read the pixels */
409  for(i=0;i<8;i++) {
410  pixels[0] = av_clip_uint8(pixels[0] + block[0]);
411  pixels[1] = av_clip_uint8(pixels[1] + block[1]);
412  pixels[2] = av_clip_uint8(pixels[2] + block[2]);
413  pixels[3] = av_clip_uint8(pixels[3] + block[3]);
414  pixels[4] = av_clip_uint8(pixels[4] + block[4]);
415  pixels[5] = av_clip_uint8(pixels[5] + block[5]);
416  pixels[6] = av_clip_uint8(pixels[6] + block[6]);
417  pixels[7] = av_clip_uint8(pixels[7] + block[7]);
418  pixels += line_size;
419  block += 8;
420  }
421 }
422 
424 {
425  int sum=0, i;
426  for(i=0; i<64; i++)
427  sum+= FFABS(block[i]);
428  return sum;
429 }
430 
431 static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
432 {
433  int i;
434 
435  for (i = 0; i < h; i++) {
436  memset(block, value, 16);
437  block += line_size;
438  }
439 }
440 
441 static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
442 {
443  int i;
444 
445  for (i = 0; i < h; i++) {
446  memset(block, value, 8);
447  block += line_size;
448  }
449 }
450 
451 #define avg2(a,b) ((a+b+1)>>1)
452 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
453 
454 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
455 {
456  const int A=(16-x16)*(16-y16);
457  const int B=( x16)*(16-y16);
458  const int C=(16-x16)*( y16);
459  const int D=( x16)*( y16);
460  int i;
461 
462  for(i=0; i<h; i++)
463  {
464  dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
465  dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
466  dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
467  dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
468  dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
469  dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
470  dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
471  dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
472  dst+= stride;
473  src+= stride;
474  }
475 }
476 
477 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
478  int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
479 {
480  int y, vx, vy;
481  const int s= 1<<shift;
482 
483  width--;
484  height--;
485 
486  for(y=0; y<h; y++){
487  int x;
488 
489  vx= ox;
490  vy= oy;
491  for(x=0; x<8; x++){ //XXX FIXME optimize
492  int src_x, src_y, frac_x, frac_y, index;
493 
494  src_x= vx>>16;
495  src_y= vy>>16;
496  frac_x= src_x&(s-1);
497  frac_y= src_y&(s-1);
498  src_x>>=shift;
499  src_y>>=shift;
500 
501  if((unsigned)src_x < width){
502  if((unsigned)src_y < height){
503  index= src_x + src_y*stride;
504  dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
505  + src[index +1]* frac_x )*(s-frac_y)
506  + ( src[index+stride ]*(s-frac_x)
507  + src[index+stride+1]* frac_x )* frac_y
508  + r)>>(shift*2);
509  }else{
510  index= src_x + av_clip(src_y, 0, height)*stride;
511  dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
512  + src[index +1]* frac_x )*s
513  + r)>>(shift*2);
514  }
515  }else{
516  if((unsigned)src_y < height){
517  index= av_clip(src_x, 0, width) + src_y*stride;
518  dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
519  + src[index+stride ]* frac_y )*s
520  + r)>>(shift*2);
521  }else{
522  index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
523  dst[y*stride + x]= src[index ];
524  }
525  }
526 
527  vx+= dxx;
528  vy+= dyx;
529  }
530  ox += dxy;
531  oy += dyy;
532  }
533 }
534 
535 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
536  switch(width){
537  case 2: put_pixels2_8_c (dst, src, stride, height); break;
538  case 4: put_pixels4_8_c (dst, src, stride, height); break;
539  case 8: put_pixels8_8_c (dst, src, stride, height); break;
540  case 16:put_pixels16_8_c(dst, src, stride, height); break;
541  }
542 }
543 
544 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
545  int i,j;
546  for (i=0; i < height; i++) {
547  for (j=0; j < width; j++) {
548  dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
549  }
550  src += stride;
551  dst += stride;
552  }
553 }
554 
555 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
556  int i,j;
557  for (i=0; i < height; i++) {
558  for (j=0; j < width; j++) {
559  dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
560  }
561  src += stride;
562  dst += stride;
563  }
564 }
565 
566 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
567  int i,j;
568  for (i=0; i < height; i++) {
569  for (j=0; j < width; j++) {
570  dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
571  }
572  src += stride;
573  dst += stride;
574  }
575 }
576 
577 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
578  int i,j;
579  for (i=0; i < height; i++) {
580  for (j=0; j < width; j++) {
581  dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
582  }
583  src += stride;
584  dst += stride;
585  }
586 }
587 
588 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
589  int i,j;
590  for (i=0; i < height; i++) {
591  for (j=0; j < width; j++) {
592  dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
593  }
594  src += stride;
595  dst += stride;
596  }
597 }
598 
599 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
600  int i,j;
601  for (i=0; i < height; i++) {
602  for (j=0; j < width; j++) {
603  dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
604  }
605  src += stride;
606  dst += stride;
607  }
608 }
609 
610 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
611  int i,j;
612  for (i=0; i < height; i++) {
613  for (j=0; j < width; j++) {
614  dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
615  }
616  src += stride;
617  dst += stride;
618  }
619 }
620 
621 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
622  int i,j;
623  for (i=0; i < height; i++) {
624  for (j=0; j < width; j++) {
625  dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
626  }
627  src += stride;
628  dst += stride;
629  }
630 }
631 
632 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
633  switch(width){
634  case 2: avg_pixels2_8_c (dst, src, stride, height); break;
635  case 4: avg_pixels4_8_c (dst, src, stride, height); break;
636  case 8: avg_pixels8_8_c (dst, src, stride, height); break;
637  case 16:avg_pixels16_8_c(dst, src, stride, height); break;
638  }
639 }
640 
641 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
642  int i,j;
643  for (i=0; i < height; i++) {
644  for (j=0; j < width; j++) {
645  dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
646  }
647  src += stride;
648  dst += stride;
649  }
650 }
651 
652 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
653  int i,j;
654  for (i=0; i < height; i++) {
655  for (j=0; j < width; j++) {
656  dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
657  }
658  src += stride;
659  dst += stride;
660  }
661 }
662 
663 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
664  int i,j;
665  for (i=0; i < height; i++) {
666  for (j=0; j < width; j++) {
667  dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
668  }
669  src += stride;
670  dst += stride;
671  }
672 }
673 
674 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
675  int i,j;
676  for (i=0; i < height; i++) {
677  for (j=0; j < width; j++) {
678  dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
679  }
680  src += stride;
681  dst += stride;
682  }
683 }
684 
685 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
686  int i,j;
687  for (i=0; i < height; i++) {
688  for (j=0; j < width; j++) {
689  dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
690  }
691  src += stride;
692  dst += stride;
693  }
694 }
695 
696 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
697  int i,j;
698  for (i=0; i < height; i++) {
699  for (j=0; j < width; j++) {
700  dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
701  }
702  src += stride;
703  dst += stride;
704  }
705 }
706 
707 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
708  int i,j;
709  for (i=0; i < height; i++) {
710  for (j=0; j < width; j++) {
711  dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
712  }
713  src += stride;
714  dst += stride;
715  }
716 }
717 
718 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
719  int i,j;
720  for (i=0; i < height; i++) {
721  for (j=0; j < width; j++) {
722  dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
723  }
724  src += stride;
725  dst += stride;
726  }
727 }
728 
729 #define QPEL_MC(r, OPNAME, RND, OP) \
730 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
731  uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
732  int i;\
733  for(i=0; i<h; i++)\
734  {\
735  OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
736  OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
737  OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
738  OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
739  OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
740  OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
741  OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
742  OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
743  dst+=dstStride;\
744  src+=srcStride;\
745  }\
746 }\
747 \
748 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
749  const int w=8;\
750  uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
751  int i;\
752  for(i=0; i<w; i++)\
753  {\
754  const int src0= src[0*srcStride];\
755  const int src1= src[1*srcStride];\
756  const int src2= src[2*srcStride];\
757  const int src3= src[3*srcStride];\
758  const int src4= src[4*srcStride];\
759  const int src5= src[5*srcStride];\
760  const int src6= src[6*srcStride];\
761  const int src7= src[7*srcStride];\
762  const int src8= src[8*srcStride];\
763  OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
764  OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
765  OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
766  OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
767  OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
768  OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
769  OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
770  OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
771  dst++;\
772  src++;\
773  }\
774 }\
775 \
776 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
777  uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
778  int i;\
779  \
780  for(i=0; i<h; i++)\
781  {\
782  OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
783  OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
784  OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
785  OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
786  OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
787  OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
788  OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
789  OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
790  OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
791  OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
792  OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
793  OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
794  OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
795  OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
796  OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
797  OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
798  dst+=dstStride;\
799  src+=srcStride;\
800  }\
801 }\
802 \
803 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
804  uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
805  int i;\
806  const int w=16;\
807  for(i=0; i<w; i++)\
808  {\
809  const int src0= src[0*srcStride];\
810  const int src1= src[1*srcStride];\
811  const int src2= src[2*srcStride];\
812  const int src3= src[3*srcStride];\
813  const int src4= src[4*srcStride];\
814  const int src5= src[5*srcStride];\
815  const int src6= src[6*srcStride];\
816  const int src7= src[7*srcStride];\
817  const int src8= src[8*srcStride];\
818  const int src9= src[9*srcStride];\
819  const int src10= src[10*srcStride];\
820  const int src11= src[11*srcStride];\
821  const int src12= src[12*srcStride];\
822  const int src13= src[13*srcStride];\
823  const int src14= src[14*srcStride];\
824  const int src15= src[15*srcStride];\
825  const int src16= src[16*srcStride];\
826  OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
827  OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
828  OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
829  OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
830  OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
831  OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
832  OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
833  OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
834  OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
835  OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
836  OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
837  OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
838  OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
839  OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
840  OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
841  OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
842  dst++;\
843  src++;\
844  }\
845 }\
846 \
847 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
848  uint8_t half[64];\
849  put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
850  OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
851 }\
852 \
853 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
854  OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
855 }\
856 \
857 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
858  uint8_t half[64];\
859  put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
860  OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
861 }\
862 \
863 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
864  uint8_t full[16*9];\
865  uint8_t half[64];\
866  copy_block9(full, src, 16, stride, 9);\
867  put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
868  OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
869 }\
870 \
871 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
872  uint8_t full[16*9];\
873  copy_block9(full, src, 16, stride, 9);\
874  OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
875 }\
876 \
877 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
878  uint8_t full[16*9];\
879  uint8_t half[64];\
880  copy_block9(full, src, 16, stride, 9);\
881  put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
882  OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
883 }\
884 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
885  uint8_t full[16*9];\
886  uint8_t halfH[72];\
887  uint8_t halfV[64];\
888  uint8_t halfHV[64];\
889  copy_block9(full, src, 16, stride, 9);\
890  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
891  put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
892  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
893  OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
894 }\
895 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
896  uint8_t full[16*9];\
897  uint8_t halfH[72];\
898  uint8_t halfHV[64];\
899  copy_block9(full, src, 16, stride, 9);\
900  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
901  put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
902  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
903  OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
904 }\
905 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
906  uint8_t full[16*9];\
907  uint8_t halfH[72];\
908  uint8_t halfV[64];\
909  uint8_t halfHV[64];\
910  copy_block9(full, src, 16, stride, 9);\
911  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
912  put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
913  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
914  OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
915 }\
916 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
917  uint8_t full[16*9];\
918  uint8_t halfH[72];\
919  uint8_t halfHV[64];\
920  copy_block9(full, src, 16, stride, 9);\
921  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
922  put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
923  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
924  OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
925 }\
926 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
927  uint8_t full[16*9];\
928  uint8_t halfH[72];\
929  uint8_t halfV[64];\
930  uint8_t halfHV[64];\
931  copy_block9(full, src, 16, stride, 9);\
932  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
933  put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
934  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
935  OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
936 }\
937 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
938  uint8_t full[16*9];\
939  uint8_t halfH[72];\
940  uint8_t halfHV[64];\
941  copy_block9(full, src, 16, stride, 9);\
942  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
943  put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
944  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
945  OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
946 }\
947 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
948  uint8_t full[16*9];\
949  uint8_t halfH[72];\
950  uint8_t halfV[64];\
951  uint8_t halfHV[64];\
952  copy_block9(full, src, 16, stride, 9);\
953  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
954  put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
955  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
956  OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
957 }\
958 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
959  uint8_t full[16*9];\
960  uint8_t halfH[72];\
961  uint8_t halfHV[64];\
962  copy_block9(full, src, 16, stride, 9);\
963  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
964  put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
965  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
966  OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
967 }\
968 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
969  uint8_t halfH[72];\
970  uint8_t halfHV[64];\
971  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
972  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
973  OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
974 }\
975 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
976  uint8_t halfH[72];\
977  uint8_t halfHV[64];\
978  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
979  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
980  OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
981 }\
982 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
983  uint8_t full[16*9];\
984  uint8_t halfH[72];\
985  uint8_t halfV[64];\
986  uint8_t halfHV[64];\
987  copy_block9(full, src, 16, stride, 9);\
988  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
989  put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
990  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
991  OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
992 }\
993 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
994  uint8_t full[16*9];\
995  uint8_t halfH[72];\
996  copy_block9(full, src, 16, stride, 9);\
997  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
998  put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
999  OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1000 }\
1001 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1002  uint8_t full[16*9];\
1003  uint8_t halfH[72];\
1004  uint8_t halfV[64];\
1005  uint8_t halfHV[64];\
1006  copy_block9(full, src, 16, stride, 9);\
1007  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1008  put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1009  put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1010  OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1011 }\
1012 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1013  uint8_t full[16*9];\
1014  uint8_t halfH[72];\
1015  copy_block9(full, src, 16, stride, 9);\
1016  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1017  put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1018  OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1019 }\
1020 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1021  uint8_t halfH[72];\
1022  put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1023  OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1024 }\
1025 \
1026 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1027  uint8_t half[256];\
1028  put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1029  OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
1030 }\
1031 \
1032 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
1033  OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
1034 }\
1035 \
1036 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1037  uint8_t half[256];\
1038  put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1039  OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
1040 }\
1041 \
1042 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1043  uint8_t full[24*17];\
1044  uint8_t half[256];\
1045  copy_block17(full, src, 24, stride, 17);\
1046  put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1047  OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
1048 }\
1049 \
1050 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1051  uint8_t full[24*17];\
1052  copy_block17(full, src, 24, stride, 17);\
1053  OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
1054 }\
1055 \
1056 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1057  uint8_t full[24*17];\
1058  uint8_t half[256];\
1059  copy_block17(full, src, 24, stride, 17);\
1060  put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1061  OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
1062 }\
1063 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1064  uint8_t full[24*17];\
1065  uint8_t halfH[272];\
1066  uint8_t halfV[256];\
1067  uint8_t halfHV[256];\
1068  copy_block17(full, src, 24, stride, 17);\
1069  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1070  put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1071  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1072  OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1073 }\
1074 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1075  uint8_t full[24*17];\
1076  uint8_t halfH[272];\
1077  uint8_t halfHV[256];\
1078  copy_block17(full, src, 24, stride, 17);\
1079  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1080  put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1081  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1082  OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1083 }\
1084 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1085  uint8_t full[24*17];\
1086  uint8_t halfH[272];\
1087  uint8_t halfV[256];\
1088  uint8_t halfHV[256];\
1089  copy_block17(full, src, 24, stride, 17);\
1090  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1091  put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1092  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1093  OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1094 }\
1095 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1096  uint8_t full[24*17];\
1097  uint8_t halfH[272];\
1098  uint8_t halfHV[256];\
1099  copy_block17(full, src, 24, stride, 17);\
1100  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1101  put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1102  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1103  OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1104 }\
1105 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1106  uint8_t full[24*17];\
1107  uint8_t halfH[272];\
1108  uint8_t halfV[256];\
1109  uint8_t halfHV[256];\
1110  copy_block17(full, src, 24, stride, 17);\
1111  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1112  put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1113  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1114  OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1115 }\
1116 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1117  uint8_t full[24*17];\
1118  uint8_t halfH[272];\
1119  uint8_t halfHV[256];\
1120  copy_block17(full, src, 24, stride, 17);\
1121  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1122  put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1123  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1124  OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1125 }\
1126 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1127  uint8_t full[24*17];\
1128  uint8_t halfH[272];\
1129  uint8_t halfV[256];\
1130  uint8_t halfHV[256];\
1131  copy_block17(full, src, 24, stride, 17);\
1132  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
1133  put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1134  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1135  OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1136 }\
1137 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1138  uint8_t full[24*17];\
1139  uint8_t halfH[272];\
1140  uint8_t halfHV[256];\
1141  copy_block17(full, src, 24, stride, 17);\
1142  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1143  put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1144  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1145  OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1146 }\
1147 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1148  uint8_t halfH[272];\
1149  uint8_t halfHV[256];\
1150  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1151  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1152  OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1153 }\
1154 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1155  uint8_t halfH[272];\
1156  uint8_t halfHV[256];\
1157  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1158  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1159  OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1160 }\
1161 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1162  uint8_t full[24*17];\
1163  uint8_t halfH[272];\
1164  uint8_t halfV[256];\
1165  uint8_t halfHV[256];\
1166  copy_block17(full, src, 24, stride, 17);\
1167  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1168  put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1169  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1170  OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1171 }\
1172 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1173  uint8_t full[24*17];\
1174  uint8_t halfH[272];\
1175  copy_block17(full, src, 24, stride, 17);\
1176  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1177  put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1178  OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1179 }\
1180 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1181  uint8_t full[24*17];\
1182  uint8_t halfH[272];\
1183  uint8_t halfV[256];\
1184  uint8_t halfHV[256];\
1185  copy_block17(full, src, 24, stride, 17);\
1186  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1187  put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1188  put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1189  OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1190 }\
1191 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1192  uint8_t full[24*17];\
1193  uint8_t halfH[272];\
1194  copy_block17(full, src, 24, stride, 17);\
1195  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1196  put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1197  OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1198 }\
1199 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1200  uint8_t halfH[272];\
1201  put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1202  OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1203 }
1204 
1205 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
1206 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
1207 #define op_put(a, b) a = cm[((b) + 16)>>5]
1208 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
1209 
1210 QPEL_MC(0, put_ , _ , op_put)
1211 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
1212 QPEL_MC(0, avg_ , _ , op_avg)
1213 //QPEL_MC(1, avg_no_rnd , _ , op_avg)
1214 #undef op_avg
1215 #undef op_avg_no_rnd
1216 #undef op_put
1217 #undef op_put_no_rnd
1218 
1219 #define put_qpel8_mc00_c ff_put_pixels8x8_c
1220 #define avg_qpel8_mc00_c ff_avg_pixels8x8_c
1221 #define put_qpel16_mc00_c ff_put_pixels16x16_c
1222 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
1223 #define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
1224 #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_8_c
1225 
1226 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
1228  int i;
1229 
1230  for(i=0; i<h; i++){
1231  dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
1232  dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
1233  dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
1234  dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
1235  dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
1236  dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
1237  dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
1238  dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
1239  dst+=dstStride;
1240  src+=srcStride;
1241  }
1242 }
1243 
1244 #if CONFIG_RV40_DECODER
1245 void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1246  put_pixels16_xy2_8_c(dst, src, stride, 16);
1247 }
1248 void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1249  avg_pixels16_xy2_8_c(dst, src, stride, 16);
1250 }
1251 void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1252  put_pixels8_xy2_8_c(dst, src, stride, 8);
1253 }
1254 void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1255  avg_pixels8_xy2_8_c(dst, src, stride, 8);
1256 }
1257 #endif /* CONFIG_RV40_DECODER */
1258 
1259 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
1261  int i;
1262 
1263  for(i=0; i<w; i++){
1264  const int src_1= src[ -srcStride];
1265  const int src0 = src[0 ];
1266  const int src1 = src[ srcStride];
1267  const int src2 = src[2*srcStride];
1268  const int src3 = src[3*srcStride];
1269  const int src4 = src[4*srcStride];
1270  const int src5 = src[5*srcStride];
1271  const int src6 = src[6*srcStride];
1272  const int src7 = src[7*srcStride];
1273  const int src8 = src[8*srcStride];
1274  const int src9 = src[9*srcStride];
1275  dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
1276  dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
1277  dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
1278  dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
1279  dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
1280  dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
1281  dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
1282  dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
1283  src++;
1284  dst++;
1285  }
1286 }
1287 
1288 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
1289  uint8_t half[64];
1290  wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1291  put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
1292 }
1293 
1294 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
1295  wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
1296 }
1297 
1298 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
1299  uint8_t half[64];
1300  wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1301  put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
1302 }
1303 
1304 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
1305  wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
1306 }
1307 
1308 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
1309  uint8_t halfH[88];
1310  uint8_t halfV[64];
1311  uint8_t halfHV[64];
1312  wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1313  wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
1314  wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1315  put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1316 }
1317 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
1318  uint8_t halfH[88];
1319  uint8_t halfV[64];
1320  uint8_t halfHV[64];
1321  wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1322  wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
1323  wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1324  put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1325 }
1326 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
1327  uint8_t halfH[88];
1328  wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1329  wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
1330 }
1331 
1332 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
1334  int x;
1335  const int strength= ff_h263_loop_filter_strength[qscale];
1336 
1337  for(x=0; x<8; x++){
1338  int d1, d2, ad1;
1339  int p0= src[x-2*stride];
1340  int p1= src[x-1*stride];
1341  int p2= src[x+0*stride];
1342  int p3= src[x+1*stride];
1343  int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1344 
1345  if (d<-2*strength) d1= 0;
1346  else if(d<- strength) d1=-2*strength - d;
1347  else if(d< strength) d1= d;
1348  else if(d< 2*strength) d1= 2*strength - d;
1349  else d1= 0;
1350 
1351  p1 += d1;
1352  p2 -= d1;
1353  if(p1&256) p1= ~(p1>>31);
1354  if(p2&256) p2= ~(p2>>31);
1355 
1356  src[x-1*stride] = p1;
1357  src[x+0*stride] = p2;
1358 
1359  ad1= FFABS(d1)>>1;
1360 
1361  d2= av_clip((p0-p3)/4, -ad1, ad1);
1362 
1363  src[x-2*stride] = p0 - d2;
1364  src[x+ stride] = p3 + d2;
1365  }
1366  }
1367 }
1368 
1369 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
1371  int y;
1372  const int strength= ff_h263_loop_filter_strength[qscale];
1373 
1374  for(y=0; y<8; y++){
1375  int d1, d2, ad1;
1376  int p0= src[y*stride-2];
1377  int p1= src[y*stride-1];
1378  int p2= src[y*stride+0];
1379  int p3= src[y*stride+1];
1380  int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1381 
1382  if (d<-2*strength) d1= 0;
1383  else if(d<- strength) d1=-2*strength - d;
1384  else if(d< strength) d1= d;
1385  else if(d< 2*strength) d1= 2*strength - d;
1386  else d1= 0;
1387 
1388  p1 += d1;
1389  p2 -= d1;
1390  if(p1&256) p1= ~(p1>>31);
1391  if(p2&256) p2= ~(p2>>31);
1392 
1393  src[y*stride-1] = p1;
1394  src[y*stride+0] = p2;
1395 
1396  ad1= FFABS(d1)>>1;
1397 
1398  d2= av_clip((p0-p3)/4, -ad1, ad1);
1399 
1400  src[y*stride-2] = p0 - d2;
1401  src[y*stride+1] = p3 + d2;
1402  }
1403  }
1404 }
1405 
1406 static void h261_loop_filter_c(uint8_t *src, int stride){
1407  int x,y,xy,yz;
1408  int temp[64];
1409 
1410  for(x=0; x<8; x++){
1411  temp[x ] = 4*src[x ];
1412  temp[x + 7*8] = 4*src[x + 7*stride];
1413  }
1414  for(y=1; y<7; y++){
1415  for(x=0; x<8; x++){
1416  xy = y * stride + x;
1417  yz = y * 8 + x;
1418  temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
1419  }
1420  }
1421 
1422  for(y=0; y<8; y++){
1423  src[ y*stride] = (temp[ y*8] + 2)>>2;
1424  src[7+y*stride] = (temp[7+y*8] + 2)>>2;
1425  for(x=1; x<7; x++){
1426  xy = y * stride + x;
1427  yz = y * 8 + x;
1428  src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
1429  }
1430  }
1431 }
1432 
1433 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1434 {
1435  int s, i;
1436 
1437  s = 0;
1438  for(i=0;i<h;i++) {
1439  s += abs(pix1[0] - pix2[0]);
1440  s += abs(pix1[1] - pix2[1]);
1441  s += abs(pix1[2] - pix2[2]);
1442  s += abs(pix1[3] - pix2[3]);
1443  s += abs(pix1[4] - pix2[4]);
1444  s += abs(pix1[5] - pix2[5]);
1445  s += abs(pix1[6] - pix2[6]);
1446  s += abs(pix1[7] - pix2[7]);
1447  s += abs(pix1[8] - pix2[8]);
1448  s += abs(pix1[9] - pix2[9]);
1449  s += abs(pix1[10] - pix2[10]);
1450  s += abs(pix1[11] - pix2[11]);
1451  s += abs(pix1[12] - pix2[12]);
1452  s += abs(pix1[13] - pix2[13]);
1453  s += abs(pix1[14] - pix2[14]);
1454  s += abs(pix1[15] - pix2[15]);
1455  pix1 += line_size;
1456  pix2 += line_size;
1457  }
1458  return s;
1459 }
1460 
1461 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1462 {
1463  int s, i;
1464 
1465  s = 0;
1466  for(i=0;i<h;i++) {
1467  s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1468  s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1469  s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1470  s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1471  s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1472  s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1473  s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1474  s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1475  s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
1476  s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
1477  s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
1478  s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
1479  s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
1480  s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
1481  s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
1482  s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
1483  pix1 += line_size;
1484  pix2 += line_size;
1485  }
1486  return s;
1487 }
1488 
1489 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1490 {
1491  int s, i;
1492  uint8_t *pix3 = pix2 + line_size;
1493 
1494  s = 0;
1495  for(i=0;i<h;i++) {
1496  s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1497  s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1498  s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1499  s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1500  s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1501  s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1502  s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1503  s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1504  s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
1505  s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
1506  s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
1507  s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
1508  s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
1509  s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
1510  s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
1511  s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
1512  pix1 += line_size;
1513  pix2 += line_size;
1514  pix3 += line_size;
1515  }
1516  return s;
1517 }
1518 
1519 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1520 {
1521  int s, i;
1522  uint8_t *pix3 = pix2 + line_size;
1523 
1524  s = 0;
1525  for(i=0;i<h;i++) {
1526  s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1527  s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1528  s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1529  s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1530  s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1531  s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1532  s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1533  s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1534  s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
1535  s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
1536  s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
1537  s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
1538  s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
1539  s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
1540  s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
1541  s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
1542  pix1 += line_size;
1543  pix2 += line_size;
1544  pix3 += line_size;
1545  }
1546  return s;
1547 }
1548 
1549 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1550 {
1551  int s, i;
1552 
1553  s = 0;
1554  for(i=0;i<h;i++) {
1555  s += abs(pix1[0] - pix2[0]);
1556  s += abs(pix1[1] - pix2[1]);
1557  s += abs(pix1[2] - pix2[2]);
1558  s += abs(pix1[3] - pix2[3]);
1559  s += abs(pix1[4] - pix2[4]);
1560  s += abs(pix1[5] - pix2[5]);
1561  s += abs(pix1[6] - pix2[6]);
1562  s += abs(pix1[7] - pix2[7]);
1563  pix1 += line_size;
1564  pix2 += line_size;
1565  }
1566  return s;
1567 }
1568 
1569 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1570 {
1571  int s, i;
1572 
1573  s = 0;
1574  for(i=0;i<h;i++) {
1575  s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1576  s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1577  s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1578  s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1579  s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1580  s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1581  s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1582  s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1583  pix1 += line_size;
1584  pix2 += line_size;
1585  }
1586  return s;
1587 }
1588 
1589 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1590 {
1591  int s, i;
1592  uint8_t *pix3 = pix2 + line_size;
1593 
1594  s = 0;
1595  for(i=0;i<h;i++) {
1596  s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1597  s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1598  s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1599  s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1600  s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1601  s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1602  s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1603  s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1604  pix1 += line_size;
1605  pix2 += line_size;
1606  pix3 += line_size;
1607  }
1608  return s;
1609 }
1610 
1611 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1612 {
1613  int s, i;
1614  uint8_t *pix3 = pix2 + line_size;
1615 
1616  s = 0;
1617  for(i=0;i<h;i++) {
1618  s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1619  s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1620  s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1621  s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1622  s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1623  s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1624  s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1625  s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1626  pix1 += line_size;
1627  pix2 += line_size;
1628  pix3 += line_size;
1629  }
1630  return s;
1631 }
1632 
1633 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
1634  MpegEncContext *c = v;
1635  int score1=0;
1636  int score2=0;
1637  int x,y;
1638 
1639  for(y=0; y<h; y++){
1640  for(x=0; x<16; x++){
1641  score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1642  }
1643  if(y+1<h){
1644  for(x=0; x<15; x++){
1645  score2+= FFABS( s1[x ] - s1[x +stride]
1646  - s1[x+1] + s1[x+1+stride])
1647  -FFABS( s2[x ] - s2[x +stride]
1648  - s2[x+1] + s2[x+1+stride]);
1649  }
1650  }
1651  s1+= stride;
1652  s2+= stride;
1653  }
1654 
1655  if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
1656  else return score1 + FFABS(score2)*8;
1657 }
1658 
1659 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
1660  MpegEncContext *c = v;
1661  int score1=0;
1662  int score2=0;
1663  int x,y;
1664 
1665  for(y=0; y<h; y++){
1666  for(x=0; x<8; x++){
1667  score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1668  }
1669  if(y+1<h){
1670  for(x=0; x<7; x++){
1671  score2+= FFABS( s1[x ] - s1[x +stride]
1672  - s1[x+1] + s1[x+1+stride])
1673  -FFABS( s2[x ] - s2[x +stride]
1674  - s2[x+1] + s2[x+1+stride]);
1675  }
1676  }
1677  s1+= stride;
1678  s2+= stride;
1679  }
1680 
1681  if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
1682  else return score1 + FFABS(score2)*8;
1683 }
1684 
1685 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
1686  int i;
1687  unsigned int sum=0;
1688 
1689  for(i=0; i<8*8; i++){
1690  int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
1691  int w= weight[i];
1692  b>>= RECON_SHIFT;
1693  assert(-512<b && b<512);
1694 
1695  sum += (w*b)*(w*b)>>4;
1696  }
1697  return sum>>2;
1698 }
1699 
1700 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
1701  int i;
1702 
1703  for(i=0; i<8*8; i++){
1704  rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
1705  }
1706 }
1707 
1716 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
1717 {
1718  int i;
1719  DCTELEM temp[64];
1720 
1721  if(last<=0) return;
1722  //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations
1723 
1724  for(i=0; i<=last; i++){
1725  const int j= scantable[i];
1726  temp[j]= block[j];
1727  block[j]=0;
1728  }
1729 
1730  for(i=0; i<=last; i++){
1731  const int j= scantable[i];
1732  const int perm_j= permutation[j];
1733  block[perm_j]= temp[j];
1734  }
1735 }
1736 
1737 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
1738  return 0;
1739 }
1740 
1741 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
1742  int i;
1743 
1744  memset(cmp, 0, sizeof(void*)*6);
1745 
1746  for(i=0; i<6; i++){
1747  switch(type&0xFF){
1748  case FF_CMP_SAD:
1749  cmp[i]= c->sad[i];
1750  break;
1751  case FF_CMP_SATD:
1752  cmp[i]= c->hadamard8_diff[i];
1753  break;
1754  case FF_CMP_SSE:
1755  cmp[i]= c->sse[i];
1756  break;
1757  case FF_CMP_DCT:
1758  cmp[i]= c->dct_sad[i];
1759  break;
1760  case FF_CMP_DCT264:
1761  cmp[i]= c->dct264_sad[i];
1762  break;
1763  case FF_CMP_DCTMAX:
1764  cmp[i]= c->dct_max[i];
1765  break;
1766  case FF_CMP_PSNR:
1767  cmp[i]= c->quant_psnr[i];
1768  break;
1769  case FF_CMP_BIT:
1770  cmp[i]= c->bit[i];
1771  break;
1772  case FF_CMP_RD:
1773  cmp[i]= c->rd[i];
1774  break;
1775  case FF_CMP_VSAD:
1776  cmp[i]= c->vsad[i];
1777  break;
1778  case FF_CMP_VSSE:
1779  cmp[i]= c->vsse[i];
1780  break;
1781  case FF_CMP_ZERO:
1782  cmp[i]= zero_cmp;
1783  break;
1784  case FF_CMP_NSSE:
1785  cmp[i]= c->nsse[i];
1786  break;
1787 #if CONFIG_DWT
1788  case FF_CMP_W53:
1789  cmp[i]= c->w53[i];
1790  break;
1791  case FF_CMP_W97:
1792  cmp[i]= c->w97[i];
1793  break;
1794 #endif
1795  default:
1796  av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
1797  }
1798  }
1799 }
1800 
1801 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
1802  long i;
1803  for (i = 0; i <= w - (int) sizeof(long); i += sizeof(long)) {
1804  long a = *(long*)(src+i);
1805  long b = *(long*)(dst+i);
1806  *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
1807  }
1808  for(; i<w; i++)
1809  dst[i+0] += src[i+0];
1810 }
1811 
1812 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
1813  long i;
1814 #if !HAVE_FAST_UNALIGNED
1815  if((long)src2 & (sizeof(long)-1)){
1816  for(i=0; i+7<w; i+=8){
1817  dst[i+0] = src1[i+0]-src2[i+0];
1818  dst[i+1] = src1[i+1]-src2[i+1];
1819  dst[i+2] = src1[i+2]-src2[i+2];
1820  dst[i+3] = src1[i+3]-src2[i+3];
1821  dst[i+4] = src1[i+4]-src2[i+4];
1822  dst[i+5] = src1[i+5]-src2[i+5];
1823  dst[i+6] = src1[i+6]-src2[i+6];
1824  dst[i+7] = src1[i+7]-src2[i+7];
1825  }
1826  }else
1827 #endif
1828  for (i = 0; i <= w - (int) sizeof(long); i += sizeof(long)) {
1829  long a = *(long*)(src1+i);
1830  long b = *(long*)(src2+i);
1831  *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
1832  }
1833  for(; i<w; i++)
1834  dst[i+0] = src1[i+0]-src2[i+0];
1835 }
1836 
1837 static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
1838  int i;
1839  uint8_t l, lt;
1840 
1841  l= *left;
1842  lt= *left_top;
1843 
1844  for(i=0; i<w; i++){
1845  l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
1846  lt= src1[i];
1847  dst[i]= l;
1848  }
1849 
1850  *left= l;
1851  *left_top= lt;
1852 }
1853 
1854 static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
1855  int i;
1856  uint8_t l, lt;
1857 
1858  l= *left;
1859  lt= *left_top;
1860 
1861  for(i=0; i<w; i++){
1862  const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
1863  lt= src1[i];
1864  l= src2[i];
1865  dst[i]= l - pred;
1866  }
1867 
1868  *left= l;
1869  *left_top= lt;
1870 }
1871 
1872 static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){
1873  int i;
1874 
1875  for(i=0; i<w-1; i++){
1876  acc+= src[i];
1877  dst[i]= acc;
1878  i++;
1879  acc+= src[i];
1880  dst[i]= acc;
1881  }
1882 
1883  for(; i<w; i++){
1884  acc+= src[i];
1885  dst[i]= acc;
1886  }
1887 
1888  return acc;
1889 }
1890 
1891 #if HAVE_BIGENDIAN
1892 #define B 3
1893 #define G 2
1894 #define R 1
1895 #define A 0
1896 #else
1897 #define B 0
1898 #define G 1
1899 #define R 2
1900 #define A 3
1901 #endif
1902 static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){
1903  int i;
1904  int r,g,b,a;
1905  r= *red;
1906  g= *green;
1907  b= *blue;
1908  a= *alpha;
1909 
1910  for(i=0; i<w; i++){
1911  b+= src[4*i+B];
1912  g+= src[4*i+G];
1913  r+= src[4*i+R];
1914  a+= src[4*i+A];
1915 
1916  dst[4*i+B]= b;
1917  dst[4*i+G]= g;
1918  dst[4*i+R]= r;
1919  dst[4*i+A]= a;
1920  }
1921 
1922  *red= r;
1923  *green= g;
1924  *blue= b;
1925  *alpha= a;
1926 }
1927 #undef B
1928 #undef G
1929 #undef R
1930 #undef A
1931 
1932 #define BUTTERFLY2(o1,o2,i1,i2) \
1933 o1= (i1)+(i2);\
1934 o2= (i1)-(i2);
1935 
1936 #define BUTTERFLY1(x,y) \
1937 {\
1938  int a,b;\
1939  a= x;\
1940  b= y;\
1941  x= a+b;\
1942  y= a-b;\
1943 }
1944 
1945 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
1946 
1947 static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
1948  int i;
1949  int temp[64];
1950  int sum=0;
1951 
1952  assert(h==8);
1953 
1954  for(i=0; i<8; i++){
1955  //FIXME try pointer walks
1956  BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
1957  BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
1958  BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
1959  BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
1960 
1961  BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
1962  BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
1963  BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
1964  BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
1965 
1966  BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
1967  BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
1968  BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
1969  BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
1970  }
1971 
1972  for(i=0; i<8; i++){
1973  BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
1974  BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
1975  BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
1976  BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
1977 
1978  BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
1979  BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
1980  BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
1981  BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
1982 
1983  sum +=
1984  BUTTERFLYA(temp[8*0+i], temp[8*4+i])
1985  +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
1986  +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
1987  +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
1988  }
1989  return sum;
1990 }
1991 
1992 static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
1993  int i;
1994  int temp[64];
1995  int sum=0;
1996 
1997  assert(h==8);
1998 
1999  for(i=0; i<8; i++){
2000  //FIXME try pointer walks
2001  BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
2002  BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
2003  BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
2004  BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
2005 
2006  BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
2007  BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
2008  BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
2009  BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
2010 
2011  BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
2012  BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
2013  BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
2014  BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
2015  }
2016 
2017  for(i=0; i<8; i++){
2018  BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
2019  BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
2020  BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
2021  BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
2022 
2023  BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
2024  BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
2025  BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
2026  BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
2027 
2028  sum +=
2029  BUTTERFLYA(temp[8*0+i], temp[8*4+i])
2030  +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
2031  +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
2032  +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
2033  }
2034 
2035  sum -= FFABS(temp[8*0] + temp[8*4]); // -mean
2036 
2037  return sum;
2038 }
2039 
2040 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2041  MpegEncContext * const s= (MpegEncContext *)c;
2042  LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2043 
2044  assert(h==8);
2045 
2046  s->dsp.diff_pixels(temp, src1, src2, stride);
2047  s->dsp.fdct(temp);
2048  return s->dsp.sum_abs_dctelem(temp);
2049 }
2050 
2051 #if CONFIG_GPL
2052 #define DCT8_1D {\
2053  const int s07 = SRC(0) + SRC(7);\
2054  const int s16 = SRC(1) + SRC(6);\
2055  const int s25 = SRC(2) + SRC(5);\
2056  const int s34 = SRC(3) + SRC(4);\
2057  const int a0 = s07 + s34;\
2058  const int a1 = s16 + s25;\
2059  const int a2 = s07 - s34;\
2060  const int a3 = s16 - s25;\
2061  const int d07 = SRC(0) - SRC(7);\
2062  const int d16 = SRC(1) - SRC(6);\
2063  const int d25 = SRC(2) - SRC(5);\
2064  const int d34 = SRC(3) - SRC(4);\
2065  const int a4 = d16 + d25 + (d07 + (d07>>1));\
2066  const int a5 = d07 - d34 - (d25 + (d25>>1));\
2067  const int a6 = d07 + d34 - (d16 + (d16>>1));\
2068  const int a7 = d16 - d25 + (d34 + (d34>>1));\
2069  DST(0, a0 + a1 ) ;\
2070  DST(1, a4 + (a7>>2)) ;\
2071  DST(2, a2 + (a3>>1)) ;\
2072  DST(3, a5 + (a6>>2)) ;\
2073  DST(4, a0 - a1 ) ;\
2074  DST(5, a6 - (a5>>2)) ;\
2075  DST(6, (a2>>1) - a3 ) ;\
2076  DST(7, (a4>>2) - a7 ) ;\
2077 }
2078 
2079 static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2080  MpegEncContext * const s= (MpegEncContext *)c;
2081  DCTELEM dct[8][8];
2082  int i;
2083  int sum=0;
2084 
2085  s->dsp.diff_pixels(dct[0], src1, src2, stride);
2086 
2087 #define SRC(x) dct[i][x]
2088 #define DST(x,v) dct[i][x]= v
2089  for( i = 0; i < 8; i++ )
2090  DCT8_1D
2091 #undef SRC
2092 #undef DST
2093 
2094 #define SRC(x) dct[x][i]
2095 #define DST(x,v) sum += FFABS(v)
2096  for( i = 0; i < 8; i++ )
2097  DCT8_1D
2098 #undef SRC
2099 #undef DST
2100  return sum;
2101 }
2102 #endif
2103 
2104 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2105  MpegEncContext * const s= (MpegEncContext *)c;
2106  LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2107  int sum=0, i;
2108 
2109  assert(h==8);
2110 
2111  s->dsp.diff_pixels(temp, src1, src2, stride);
2112  s->dsp.fdct(temp);
2113 
2114  for(i=0; i<64; i++)
2115  sum= FFMAX(sum, FFABS(temp[i]));
2116 
2117  return sum;
2118 }
2119 
2120 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2121  MpegEncContext * const s= (MpegEncContext *)c;
2122  LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]);
2123  DCTELEM * const bak = temp+64;
2124  int sum=0, i;
2125 
2126  assert(h==8);
2127  s->mb_intra=0;
2128 
2129  s->dsp.diff_pixels(temp, src1, src2, stride);
2130 
2131  memcpy(bak, temp, 64*sizeof(DCTELEM));
2132 
2133  s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2134  s->dct_unquantize_inter(s, temp, 0, s->qscale);
2135  ff_simple_idct_8(temp); //FIXME
2136 
2137  for(i=0; i<64; i++)
2138  sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
2139 
2140  return sum;
2141 }
2142 
2143 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2144  MpegEncContext * const s= (MpegEncContext *)c;
2145  const uint8_t *scantable= s->intra_scantable.permutated;
2146  LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2147  LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
2148  LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
2149  int i, last, run, bits, level, distortion, start_i;
2150  const int esc_length= s->ac_esc_length;
2151  uint8_t * length;
2152  uint8_t * last_length;
2153 
2154  assert(h==8);
2155 
2156  copy_block8(lsrc1, src1, 8, stride, 8);
2157  copy_block8(lsrc2, src2, 8, stride, 8);
2158 
2159  s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
2160 
2161  s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2162 
2163  bits=0;
2164 
2165  if (s->mb_intra) {
2166  start_i = 1;
2167  length = s->intra_ac_vlc_length;
2168  last_length= s->intra_ac_vlc_last_length;
2169  bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
2170  } else {
2171  start_i = 0;
2172  length = s->inter_ac_vlc_length;
2173  last_length= s->inter_ac_vlc_last_length;
2174  }
2175 
2176  if(last>=start_i){
2177  run=0;
2178  for(i=start_i; i<last; i++){
2179  int j= scantable[i];
2180  level= temp[j];
2181 
2182  if(level){
2183  level+=64;
2184  if((level&(~127)) == 0){
2185  bits+= length[UNI_AC_ENC_INDEX(run, level)];
2186  }else
2187  bits+= esc_length;
2188  run=0;
2189  }else
2190  run++;
2191  }
2192  i= scantable[last];
2193 
2194  level= temp[i] + 64;
2195 
2196  assert(level - 64);
2197 
2198  if((level&(~127)) == 0){
2199  bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2200  }else
2201  bits+= esc_length;
2202 
2203  }
2204 
2205  if(last>=0){
2206  if(s->mb_intra)
2207  s->dct_unquantize_intra(s, temp, 0, s->qscale);
2208  else
2209  s->dct_unquantize_inter(s, temp, 0, s->qscale);
2210  }
2211 
2212  s->dsp.idct_add(lsrc2, 8, temp);
2213 
2214  distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
2215 
2216  return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
2217 }
2218 
2219 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2220  MpegEncContext * const s= (MpegEncContext *)c;
2221  const uint8_t *scantable= s->intra_scantable.permutated;
2222  LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2223  int i, last, run, bits, level, start_i;
2224  const int esc_length= s->ac_esc_length;
2225  uint8_t * length;
2226  uint8_t * last_length;
2227 
2228  assert(h==8);
2229 
2230  s->dsp.diff_pixels(temp, src1, src2, stride);
2231 
2232  s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2233 
2234  bits=0;
2235 
2236  if (s->mb_intra) {
2237  start_i = 1;
2238  length = s->intra_ac_vlc_length;
2239  last_length= s->intra_ac_vlc_last_length;
2240  bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
2241  } else {
2242  start_i = 0;
2243  length = s->inter_ac_vlc_length;
2244  last_length= s->inter_ac_vlc_last_length;
2245  }
2246 
2247  if(last>=start_i){
2248  run=0;
2249  for(i=start_i; i<last; i++){
2250  int j= scantable[i];
2251  level= temp[j];
2252 
2253  if(level){
2254  level+=64;
2255  if((level&(~127)) == 0){
2256  bits+= length[UNI_AC_ENC_INDEX(run, level)];
2257  }else
2258  bits+= esc_length;
2259  run=0;
2260  }else
2261  run++;
2262  }
2263  i= scantable[last];
2264 
2265  level= temp[i] + 64;
2266 
2267  assert(level - 64);
2268 
2269  if((level&(~127)) == 0){
2270  bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2271  }else
2272  bits+= esc_length;
2273  }
2274 
2275  return bits;
2276 }
2277 
2278 #define VSAD_INTRA(size) \
2279 static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2280  int score=0; \
2281  int x,y; \
2282  \
2283  for(y=1; y<h; y++){ \
2284  for(x=0; x<size; x+=4){ \
2285  score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \
2286  +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \
2287  } \
2288  s+= stride; \
2289  } \
2290  \
2291  return score; \
2292 }
2293 VSAD_INTRA(8)
2294 VSAD_INTRA(16)
2295 
2296 static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
2297  int score=0;
2298  int x,y;
2299 
2300  for(y=1; y<h; y++){
2301  for(x=0; x<16; x++){
2302  score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2303  }
2304  s1+= stride;
2305  s2+= stride;
2306  }
2307 
2308  return score;
2309 }
2310 
2311 #define SQ(a) ((a)*(a))
2312 #define VSSE_INTRA(size) \
2313 static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2314  int score=0; \
2315  int x,y; \
2316  \
2317  for(y=1; y<h; y++){ \
2318  for(x=0; x<size; x+=4){ \
2319  score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \
2320  +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \
2321  } \
2322  s+= stride; \
2323  } \
2324  \
2325  return score; \
2326 }
2327 VSSE_INTRA(8)
2328 VSSE_INTRA(16)
2329 
2330 static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
2331  int score=0;
2332  int x,y;
2333 
2334  for(y=1; y<h; y++){
2335  for(x=0; x<16; x++){
2336  score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2337  }
2338  s1+= stride;
2339  s2+= stride;
2340  }
2341 
2342  return score;
2343 }
2344 
2345 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
2346  int size){
2347  int score=0;
2348  int i;
2349  for(i=0; i<size; i++)
2350  score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
2351  return score;
2352 }
2353 
2354 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
2355 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
2356 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
2357 #if CONFIG_GPL
2358 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
2359 #endif
2360 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
2361 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
2362 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
2363 WRAPPER8_16_SQ(bit8x8_c, bit16_c)
2364 
2365 static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
2366  int i;
2367  src1 += len-1;
2368  for(i=0; i<len; i++)
2369  dst[i] = src0[i] * src1[-i];
2370 }
2371 
2372 static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len){
2373  int i;
2374  for(i=0; i<len; i++)
2375  dst[i] = src0[i] * src1[i] + src2[i];
2376 }
2377 
2378 static void vector_fmul_window_c(float *dst, const float *src0,
2379  const float *src1, const float *win, int len)
2380 {
2381  int i,j;
2382  dst += len;
2383  win += len;
2384  src0+= len;
2385  for(i=-len, j=len-1; i<0; i++, j--) {
2386  float s0 = src0[i];
2387  float s1 = src1[j];
2388  float wi = win[i];
2389  float wj = win[j];
2390  dst[i] = s0*wj - s1*wi;
2391  dst[j] = s0*wi + s1*wj;
2392  }
2393 }
2394 
2395 static void butterflies_float_c(float *restrict v1, float *restrict v2,
2396  int len)
2397 {
2398  int i;
2399  for (i = 0; i < len; i++) {
2400  float t = v1[i] - v2[i];
2401  v1[i] += v2[i];
2402  v2[i] = t;
2403  }
2404 }
2405 
2406 static void butterflies_float_interleave_c(float *dst, const float *src0,
2407  const float *src1, int len)
2408 {
2409  int i;
2410  for (i = 0; i < len; i++) {
2411  float f1 = src0[i];
2412  float f2 = src1[i];
2413  dst[2*i ] = f1 + f2;
2414  dst[2*i + 1] = f1 - f2;
2415  }
2416 }
2417 
2418 float ff_scalarproduct_float_c(const float *v1, const float *v2, int len)
2419 {
2420  float p = 0.0;
2421  int i;
2422 
2423  for (i = 0; i < len; i++)
2424  p += v1[i] * v2[i];
2425 
2426  return p;
2427 }
2428 
2429 static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
2430  uint32_t maxi, uint32_t maxisign)
2431 {
2432 
2433  if(a > mini) return mini;
2434  else if((a^(1U<<31)) > maxisign) return maxi;
2435  else return a;
2436 }
2437 
2438 static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
2439  int i;
2440  uint32_t mini = *(uint32_t*)min;
2441  uint32_t maxi = *(uint32_t*)max;
2442  uint32_t maxisign = maxi ^ (1U<<31);
2443  uint32_t *dsti = (uint32_t*)dst;
2444  const uint32_t *srci = (const uint32_t*)src;
2445  for(i=0; i<len; i+=8) {
2446  dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
2447  dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
2448  dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
2449  dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
2450  dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
2451  dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
2452  dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
2453  dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
2454  }
2455 }
2456 static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){
2457  int i;
2458  if(min < 0 && max > 0) {
2459  vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
2460  } else {
2461  for(i=0; i < len; i+=8) {
2462  dst[i ] = av_clipf(src[i ], min, max);
2463  dst[i + 1] = av_clipf(src[i + 1], min, max);
2464  dst[i + 2] = av_clipf(src[i + 2], min, max);
2465  dst[i + 3] = av_clipf(src[i + 3], min, max);
2466  dst[i + 4] = av_clipf(src[i + 4], min, max);
2467  dst[i + 5] = av_clipf(src[i + 5], min, max);
2468  dst[i + 6] = av_clipf(src[i + 6], min, max);
2469  dst[i + 7] = av_clipf(src[i + 7], min, max);
2470  }
2471  }
2472 }
2473 
2474 static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order)
2475 {
2476  int res = 0;
2477 
2478  while (order--)
2479  res += *v1++ * *v2++;
2480 
2481  return res;
2482 }
2483 
2484 static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
2485 {
2486  int res = 0;
2487  while (order--) {
2488  res += *v1 * *v2++;
2489  *v1++ += mul * *v3++;
2490  }
2491  return res;
2492 }
2493 
2494 static void apply_window_int16_c(int16_t *output, const int16_t *input,
2495  const int16_t *window, unsigned int len)
2496 {
2497  int i;
2498  int len2 = len >> 1;
2499 
2500  for (i = 0; i < len2; i++) {
2501  int16_t w = window[i];
2502  output[i] = (MUL16(input[i], w) + (1 << 14)) >> 15;
2503  output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15;
2504  }
2505 }
2506 
2507 static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
2508  int32_t max, unsigned int len)
2509 {
2510  do {
2511  *dst++ = av_clip(*src++, min, max);
2512  *dst++ = av_clip(*src++, min, max);
2513  *dst++ = av_clip(*src++, min, max);
2514  *dst++ = av_clip(*src++, min, max);
2515  *dst++ = av_clip(*src++, min, max);
2516  *dst++ = av_clip(*src++, min, max);
2517  *dst++ = av_clip(*src++, min, max);
2518  *dst++ = av_clip(*src++, min, max);
2519  len -= 8;
2520  } while (len > 0);
2521 }
2522 
2523 #define W0 2048
2524 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
2525 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
2526 #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
2527 #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
2528 #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
2529 #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
2530 #define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */
2531 
2532 static void wmv2_idct_row(short * b)
2533 {
2534  int s1,s2;
2535  int a0,a1,a2,a3,a4,a5,a6,a7;
2536  /*step 1*/
2537  a1 = W1*b[1]+W7*b[7];
2538  a7 = W7*b[1]-W1*b[7];
2539  a5 = W5*b[5]+W3*b[3];
2540  a3 = W3*b[5]-W5*b[3];
2541  a2 = W2*b[2]+W6*b[6];
2542  a6 = W6*b[2]-W2*b[6];
2543  a0 = W0*b[0]+W0*b[4];
2544  a4 = W0*b[0]-W0*b[4];
2545  /*step 2*/
2546  s1 = (181*(a1-a5+a7-a3)+128)>>8;//1,3,5,7,
2547  s2 = (181*(a1-a5-a7+a3)+128)>>8;
2548  /*step 3*/
2549  b[0] = (a0+a2+a1+a5 + (1<<7))>>8;
2550  b[1] = (a4+a6 +s1 + (1<<7))>>8;
2551  b[2] = (a4-a6 +s2 + (1<<7))>>8;
2552  b[3] = (a0-a2+a7+a3 + (1<<7))>>8;
2553  b[4] = (a0-a2-a7-a3 + (1<<7))>>8;
2554  b[5] = (a4-a6 -s2 + (1<<7))>>8;
2555  b[6] = (a4+a6 -s1 + (1<<7))>>8;
2556  b[7] = (a0+a2-a1-a5 + (1<<7))>>8;
2557 }
2558 static void wmv2_idct_col(short * b)
2559 {
2560  int s1,s2;
2561  int a0,a1,a2,a3,a4,a5,a6,a7;
2562  /*step 1, with extended precision*/
2563  a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3;
2564  a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3;
2565  a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3;
2566  a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3;
2567  a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3;
2568  a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3;
2569  a0 = (W0*b[8*0]+W0*b[8*4] )>>3;
2570  a4 = (W0*b[8*0]-W0*b[8*4] )>>3;
2571  /*step 2*/
2572  s1 = (181*(a1-a5+a7-a3)+128)>>8;
2573  s2 = (181*(a1-a5-a7+a3)+128)>>8;
2574  /*step 3*/
2575  b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14;
2576  b[8*1] = (a4+a6 +s1 + (1<<13))>>14;
2577  b[8*2] = (a4-a6 +s2 + (1<<13))>>14;
2578  b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14;
2579 
2580  b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14;
2581  b[8*5] = (a4-a6 -s2 + (1<<13))>>14;
2582  b[8*6] = (a4+a6 -s1 + (1<<13))>>14;
2583  b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14;
2584 }
2585 void ff_wmv2_idct_c(short * block){
2586  int i;
2587 
2588  for(i=0;i<64;i+=8){
2589  wmv2_idct_row(block+i);
2590  }
2591  for(i=0;i<8;i++){
2592  wmv2_idct_col(block+i);
2593  }
2594 }
2595 /* XXX: those functions should be suppressed ASAP when all IDCTs are
2596  converted */
2597 static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
2598 {
2599  ff_wmv2_idct_c(block);
2600  put_pixels_clamped_c(block, dest, line_size);
2601 }
2602 static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
2603 {
2604  ff_wmv2_idct_c(block);
2605  add_pixels_clamped_c(block, dest, line_size);
2606 }
2607 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
2608 {
2609  ff_j_rev_dct (block);
2610  put_pixels_clamped_c(block, dest, line_size);
2611 }
2612 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
2613 {
2614  ff_j_rev_dct (block);
2615  add_pixels_clamped_c(block, dest, line_size);
2616 }
2617 
2618 /* init static data */
2620 {
2621  int i;
2622 
2623  for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i;
2624  for(i=0;i<MAX_NEG_CROP;i++) {
2625  ff_cropTbl[i] = 0;
2626  ff_cropTbl[i + MAX_NEG_CROP + 256] = 255;
2627  }
2628 
2629  for(i=0;i<512;i++) {
2630  ff_squareTbl[i] = (i - 256) * (i - 256);
2631  }
2632 
2633  for(i=0; i<64; i++) ff_inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
2634 }
2635 
2637  static int did_fail=0;
2638  LOCAL_ALIGNED_16(int, aligned, [4]);
2639 
2640  if((intptr_t)aligned & 15){
2641  if(!did_fail){
2642 #if HAVE_MMX || HAVE_ALTIVEC
2644  "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
2645  "and may be very slow or crash. This is not a bug in libavcodec,\n"
2646  "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
2647  "Do not report crashes to Libav developers.\n");
2648 #endif
2649  did_fail=1;
2650  }
2651  return -1;
2652  }
2653  return 0;
2654 }
2655 
2657 {
2658  int i, j;
2659 
2661 
2662 #if CONFIG_ENCODERS
2663  if (avctx->bits_per_raw_sample == 10) {
2666  } else {
2667  if(avctx->dct_algo==FF_DCT_FASTINT) {
2668  c->fdct = ff_fdct_ifast;
2670  }
2671  else if(avctx->dct_algo==FF_DCT_FAAN) {
2672  c->fdct = ff_faandct;
2673  c->fdct248 = ff_faandct248;
2674  }
2675  else {
2676  c->fdct = ff_jpeg_fdct_islow_8; //slow/accurate/default
2678  }
2679  }
2680 #endif //CONFIG_ENCODERS
2681 
2682  if (avctx->bits_per_raw_sample == 10) {
2685  c->idct = ff_simple_idct_10;
2687  } else {
2688  if(avctx->idct_algo==FF_IDCT_INT){
2691  c->idct = ff_j_rev_dct;
2693  }else if(avctx->idct_algo==FF_IDCT_WMV2){
2696  c->idct = ff_wmv2_idct_c;
2698  }else if(avctx->idct_algo==FF_IDCT_FAAN){
2701  c->idct = ff_faanidct;
2703  }else{ //accurate/default
2706  c->idct = ff_simple_idct_8;
2708  }
2709  }
2710 
2716  c->gmc1 = gmc1_c;
2717  c->gmc = ff_gmc_c;
2718  c->pix_sum = pix_sum_c;
2719  c->pix_norm1 = pix_norm1_c;
2720 
2722  c->fill_block_tab[1] = fill_block8_c;
2723 
2724  /* TODO [0] 16 [1] 8 */
2725  c->pix_abs[0][0] = pix_abs16_c;
2726  c->pix_abs[0][1] = pix_abs16_x2_c;
2727  c->pix_abs[0][2] = pix_abs16_y2_c;
2728  c->pix_abs[0][3] = pix_abs16_xy2_c;
2729  c->pix_abs[1][0] = pix_abs8_c;
2730  c->pix_abs[1][1] = pix_abs8_x2_c;
2731  c->pix_abs[1][2] = pix_abs8_y2_c;
2732  c->pix_abs[1][3] = pix_abs8_xy2_c;
2733 
2743 
2753 
2754 #define dspfunc(PFX, IDX, NUM) \
2755  c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
2756  c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
2757  c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
2758  c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
2759  c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
2760  c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
2761  c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
2762  c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
2763  c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
2764  c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
2765  c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
2766  c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
2767  c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
2768  c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
2769  c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
2770  c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
2771 
2772  dspfunc(put_qpel, 0, 16);
2773  dspfunc(put_no_rnd_qpel, 0, 16);
2774 
2775  dspfunc(avg_qpel, 0, 16);
2776  /* dspfunc(avg_no_rnd_qpel, 0, 16); */
2777 
2778  dspfunc(put_qpel, 1, 8);
2779  dspfunc(put_no_rnd_qpel, 1, 8);
2780 
2781  dspfunc(avg_qpel, 1, 8);
2782  /* dspfunc(avg_no_rnd_qpel, 1, 8); */
2783 
2784 #undef dspfunc
2785 
2794 
2795 #define SET_CMP_FUNC(name) \
2796  c->name[0]= name ## 16_c;\
2797  c->name[1]= name ## 8x8_c;
2798 
2799  SET_CMP_FUNC(hadamard8_diff)
2800  c->hadamard8_diff[4]= hadamard8_intra16_c;
2802  SET_CMP_FUNC(dct_sad)
2803  SET_CMP_FUNC(dct_max)
2804 #if CONFIG_GPL
2805  SET_CMP_FUNC(dct264_sad)
2806 #endif
2807  c->sad[0]= pix_abs16_c;
2808  c->sad[1]= pix_abs8_c;
2809  c->sse[0]= sse16_c;
2810  c->sse[1]= sse8_c;
2811  c->sse[2]= sse4_c;
2812  SET_CMP_FUNC(quant_psnr)
2813  SET_CMP_FUNC(rd)
2814  SET_CMP_FUNC(bit)
2815  c->vsad[0]= vsad16_c;
2816  c->vsad[4]= vsad_intra16_c;
2817  c->vsad[5]= vsad_intra8_c;
2818  c->vsse[0]= vsse16_c;
2819  c->vsse[4]= vsse_intra16_c;
2820  c->vsse[5]= vsse_intra8_c;
2821  c->nsse[0]= nsse16_c;
2822  c->nsse[1]= nsse8_c;
2823 #if CONFIG_DWT
2825 #endif
2826 
2828 
2829  c->add_bytes= add_bytes_c;
2835  c->bswap_buf= bswap_buf;
2836  c->bswap16_buf = bswap16_buf;
2837 
2841  }
2842 
2844 
2847 
2848 #if CONFIG_VORBIS_DECODER
2850 #endif
2862 
2863  c->shrink[0]= av_image_copy_plane;
2864  c->shrink[1]= ff_shrink22;
2865  c->shrink[2]= ff_shrink44;
2866  c->shrink[3]= ff_shrink88;
2867 
2868  memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
2869  memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
2870 
2871 #undef FUNC
2872 #undef FUNCC
2873 #define FUNC(f, depth) f ## _ ## depth
2874 #define FUNCC(f, depth) f ## _ ## depth ## _c
2875 
2876 #define dspfunc1(PFX, IDX, NUM, depth)\
2877  c->PFX ## _pixels_tab[IDX][0] = FUNCC(PFX ## _pixels ## NUM , depth);\
2878  c->PFX ## _pixels_tab[IDX][1] = FUNCC(PFX ## _pixels ## NUM ## _x2 , depth);\
2879  c->PFX ## _pixels_tab[IDX][2] = FUNCC(PFX ## _pixels ## NUM ## _y2 , depth);\
2880  c->PFX ## _pixels_tab[IDX][3] = FUNCC(PFX ## _pixels ## NUM ## _xy2, depth)
2881 
2882 #define dspfunc2(PFX, IDX, NUM, depth)\
2883  c->PFX ## _pixels_tab[IDX][ 0] = FUNCC(PFX ## NUM ## _mc00, depth);\
2884  c->PFX ## _pixels_tab[IDX][ 1] = FUNCC(PFX ## NUM ## _mc10, depth);\
2885  c->PFX ## _pixels_tab[IDX][ 2] = FUNCC(PFX ## NUM ## _mc20, depth);\
2886  c->PFX ## _pixels_tab[IDX][ 3] = FUNCC(PFX ## NUM ## _mc30, depth);\
2887  c->PFX ## _pixels_tab[IDX][ 4] = FUNCC(PFX ## NUM ## _mc01, depth);\
2888  c->PFX ## _pixels_tab[IDX][ 5] = FUNCC(PFX ## NUM ## _mc11, depth);\
2889  c->PFX ## _pixels_tab[IDX][ 6] = FUNCC(PFX ## NUM ## _mc21, depth);\
2890  c->PFX ## _pixels_tab[IDX][ 7] = FUNCC(PFX ## NUM ## _mc31, depth);\
2891  c->PFX ## _pixels_tab[IDX][ 8] = FUNCC(PFX ## NUM ## _mc02, depth);\
2892  c->PFX ## _pixels_tab[IDX][ 9] = FUNCC(PFX ## NUM ## _mc12, depth);\
2893  c->PFX ## _pixels_tab[IDX][10] = FUNCC(PFX ## NUM ## _mc22, depth);\
2894  c->PFX ## _pixels_tab[IDX][11] = FUNCC(PFX ## NUM ## _mc32, depth);\
2895  c->PFX ## _pixels_tab[IDX][12] = FUNCC(PFX ## NUM ## _mc03, depth);\
2896  c->PFX ## _pixels_tab[IDX][13] = FUNCC(PFX ## NUM ## _mc13, depth);\
2897  c->PFX ## _pixels_tab[IDX][14] = FUNCC(PFX ## NUM ## _mc23, depth);\
2898  c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth)
2899 
2900 
2901 #define BIT_DEPTH_FUNCS(depth, dct)\
2902  c->get_pixels = FUNCC(get_pixels ## dct , depth);\
2903  c->draw_edges = FUNCC(draw_edges , depth);\
2904  c->clear_block = FUNCC(clear_block ## dct , depth);\
2905  c->clear_blocks = FUNCC(clear_blocks ## dct , depth);\
2906  c->add_pixels8 = FUNCC(add_pixels8 ## dct , depth);\
2907  c->add_pixels4 = FUNCC(add_pixels4 ## dct , depth);\
2908  c->put_no_rnd_pixels_l2[0] = FUNCC(put_no_rnd_pixels16_l2, depth);\
2909  c->put_no_rnd_pixels_l2[1] = FUNCC(put_no_rnd_pixels8_l2 , depth);\
2910 \
2911  c->put_h264_chroma_pixels_tab[0] = FUNCC(put_h264_chroma_mc8 , depth);\
2912  c->put_h264_chroma_pixels_tab[1] = FUNCC(put_h264_chroma_mc4 , depth);\
2913  c->put_h264_chroma_pixels_tab[2] = FUNCC(put_h264_chroma_mc2 , depth);\
2914  c->avg_h264_chroma_pixels_tab[0] = FUNCC(avg_h264_chroma_mc8 , depth);\
2915  c->avg_h264_chroma_pixels_tab[1] = FUNCC(avg_h264_chroma_mc4 , depth);\
2916  c->avg_h264_chroma_pixels_tab[2] = FUNCC(avg_h264_chroma_mc2 , depth);\
2917 \
2918  dspfunc1(put , 0, 16, depth);\
2919  dspfunc1(put , 1, 8, depth);\
2920  dspfunc1(put , 2, 4, depth);\
2921  dspfunc1(put , 3, 2, depth);\
2922  dspfunc1(put_no_rnd, 0, 16, depth);\
2923  dspfunc1(put_no_rnd, 1, 8, depth);\
2924  dspfunc1(avg , 0, 16, depth);\
2925  dspfunc1(avg , 1, 8, depth);\
2926  dspfunc1(avg , 2, 4, depth);\
2927  dspfunc1(avg , 3, 2, depth);\
2928  dspfunc1(avg_no_rnd, 0, 16, depth);\
2929  dspfunc1(avg_no_rnd, 1, 8, depth);\
2930 \
2931  dspfunc2(put_h264_qpel, 0, 16, depth);\
2932  dspfunc2(put_h264_qpel, 1, 8, depth);\
2933  dspfunc2(put_h264_qpel, 2, 4, depth);\
2934  dspfunc2(put_h264_qpel, 3, 2, depth);\
2935  dspfunc2(avg_h264_qpel, 0, 16, depth);\
2936  dspfunc2(avg_h264_qpel, 1, 8, depth);\
2937  dspfunc2(avg_h264_qpel, 2, 4, depth);
2938 
2939  switch (avctx->bits_per_raw_sample) {
2940  case 9:
2941  if (c->dct_bits == 32) {
2942  BIT_DEPTH_FUNCS(9, _32);
2943  } else {
2944  BIT_DEPTH_FUNCS(9, _16);
2945  }
2946  break;
2947  case 10:
2948  if (c->dct_bits == 32) {
2949  BIT_DEPTH_FUNCS(10, _32);
2950  } else {
2951  BIT_DEPTH_FUNCS(10, _16);
2952  }
2953  break;
2954  default:
2955  BIT_DEPTH_FUNCS(8, _16);
2956  break;
2957  }
2958 
2959 
2960  if (HAVE_MMX) ff_dsputil_init_mmx (c, avctx);
2961  if (ARCH_ARM) ff_dsputil_init_arm (c, avctx);
2962  if (HAVE_VIS) ff_dsputil_init_vis (c, avctx);
2963  if (ARCH_ALPHA) ff_dsputil_init_alpha (c, avctx);
2964  if (ARCH_PPC) ff_dsputil_init_ppc (c, avctx);
2965  if (ARCH_SH4) ff_dsputil_init_sh4 (c, avctx);
2966  if (ARCH_BFIN) ff_dsputil_init_bfin (c, avctx);
2967 
2968  for (i = 0; i < 4; i++) {
2969  for (j = 0; j < 16; j++) {
2970  if(!c->put_2tap_qpel_pixels_tab[i][j])
2971  c->put_2tap_qpel_pixels_tab[i][j] =
2972  c->put_h264_qpel_pixels_tab[i][j];
2973  if(!c->avg_2tap_qpel_pixels_tab[i][j])
2974  c->avg_2tap_qpel_pixels_tab[i][j] =
2975  c->avg_h264_qpel_pixels_tab[i][j];
2976  }
2977  }
2978 
2981 }
static int bit8x8_c(void *c, uint8_t *src1, uint8_t *src2, int stride, int h)
Definition: dsputil.c:2219
static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len)
Definition: dsputil.c:2372
void(* vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, int len)
Definition: dsputil.h:358
#define W5
Definition: dsputil.c:2528
static int vsse16_c(void *c, uint8_t *s1, uint8_t *s2, int stride, int h)
Definition: dsputil.c:2330
me_cmp_func vsad[6]
Definition: dsputil.h:231
void ff_dsputil_init_bfin(DSPContext *c, AVCodecContext *avctx)
Definition: dsputil_bfin.c:198
av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
Definition: dsputil.c:2656
#define ARCH_ARM
Definition: config.h:14
#define ff_put_pixels8x8_c
Definition: dsputil.h:99
const uint8_t ff_zigzag_direct[64]
Definition: dsputil.c:59
int size
static int pix_sum_c(uint8_t *pix, int line_size)
Definition: dsputil.c:178
int dct_algo
DCT algorithm, see FF_DCT_* below.
Definition: avcodec.h:2648
static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
Definition: dsputil.c:2602
static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min, int32_t max, unsigned int len)
Definition: dsputil.c:2507
void(* add_hfyu_left_prediction_bgr32)(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha)
Definition: dsputil.h:342
#define B
Definition: dsputil.c:1897
misc image utilities
void(* shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height)
Definition: dsputil.h:444
#define SET_CMP_FUNC(name)
static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale)
Definition: dsputil.c:1700
void ff_simple_idct_8(DCTELEM *block)
static void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:610
int acc
Definition: yuv2rgb.c:476
void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx)
Definition: dsputil_ppc.c:140
const uint8_t ff_zigzag248_direct[64]
Definition: dsputil.c:72
void(* idct_add)(uint8_t *dest, int line_size, DCTELEM *block)
block -> idct -> add dest -> clip to unsigned 8 bit -> dest.
Definition: dsputil.h:411
void(* vorbis_inverse_coupling)(float *mag, float *ang, int blocksize)
Definition: dsputil.h:352
static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
Definition: dsputil.c:2612
#define a0
Definition: regdef.h:46
void ff_fdct248_islow_10(DCTELEM *data)
int(* try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale)
Definition: dsputil.h:434
#define FF_PARTTRANS_IDCT_PERM
Definition: dsputil.h:431
#define ARCH_BFIN
Definition: config.h:18
int(* me_cmp_func)(void *s, uint8_t *blk1, uint8_t *blk2, int line_size, int h)
Definition: dsputil.h:176
Scantable.
Definition: dsputil.h:181
me_cmp_func dct_max[6]
Definition: dsputil.h:236
#define MAX_NEG_CROP
Definition: dsputil.h:83
static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale)
Definition: dsputil.c:1685
static void put_signed_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, int line_size)
Definition: dsputil.c:382
#define av_bswap16
Definition: bswap.h:31
static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top)
Definition: dsputil.c:1837
me_cmp_func sse[6]
Definition: dsputil.h:225
static void butterflies_float_interleave_c(float *dst, const float *src0, const float *src1, int len)
Definition: dsputil.c:2406
const uint8_t ff_h263_loop_filter_strength[32]
Definition: h263data.h:275
static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len)
Definition: dsputil.c:2438
#define a1
Definition: regdef.h:47
uint8_t raster_end[64]
Definition: dsputil.h:184
#define op_avg(a, b)
Definition: dsputil.c:1205
static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha)
Definition: dsputil.c:1902
mpegvideo header.
#define pb_80
Definition: dsputil.c:57
void(* apply_window_int16)(int16_t *output, const int16_t *input, const int16_t *window, unsigned int len)
Apply symmetric window in 16-bit fixed-point.
Definition: dsputil.h:470
uint8_t permutated[64]
Definition: dsputil.h:183
uint8_t run
Definition: svq3.c:124
int bits_per_raw_sample
Bits per sample/pixel of internal libavcodec pixel/sample format.
Definition: avcodec.h:2711
uint8_t * intra_ac_vlc_length
Definition: mpegvideo.h:450
void ff_j_rev_dct(DCTELEM *data)
static int32_t scalarproduct_int16_c(const int16_t *v1, const int16_t *v2, int order)
Definition: dsputil.c:2474
#define UNI_AC_ENC_INDEX(run, level)
Definition: mpegvideo.h:455
uint8_t ff_cropTbl[256+2 *MAX_NEG_CROP]
Definition: dsputil.c:41
int stride
Definition: mace.c:144
int qscale
QP.
Definition: mpegvideo.h:342
static int vsad16_c(void *c, uint8_t *s1, uint8_t *s2, int stride, int h)
Definition: dsputil.c:2296
static void butterflies_float_c(float *restrict v1, float *restrict v2, int len)
Definition: dsputil.c:2395
int(* pix_sum)(uint8_t *pix, int line_size)
Definition: dsputil.h:220
#define HAVE_VIS
Definition: config.h:56
#define _(x)
static void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:652
#define dspfunc(PFX, IDX, NUM)
static void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:621
static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
Definition: dsputil.c:1569
static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
Definition: dsputil.c:2607
const uint8_t ff_alternate_vertical_scan[64]
Definition: dsputil.c:97
static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride)
Definition: dsputil.c:1298
#define a3
Definition: regdef.h:49
static int hadamard8_diff8x8_c(void *s, uint8_t *dst, uint8_t *src, int stride, int h)
Definition: dsputil.c:1947
static int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
Definition: dsputil.c:1549
uint8_t bits
Definition: crc.c:31
uint8_t
#define RECON_SHIFT
Definition: dsputil.h:437
static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h)
Definition: dsputil.c:1633
static void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:599
static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
Definition: dsputil.c:431
static void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:696
#define b
Definition: input.c:52
void ff_vorbis_inverse_coupling(float *mag, float *ang, int blocksize)
Definition: vorbisdec.c:1464
#define W0
Definition: dsputil.c:2523
static void vector_clipf_c(float *dst, const float *src, float min, float max, int len)
Definition: dsputil.c:2456
static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2, int size)
Definition: dsputil.c:2345
void(* add_bytes)(uint8_t *dst, uint8_t *src, int w)
Definition: dsputil.h:333
me_cmp_func dct_sad[6]
Definition: dsputil.h:227
void ff_simple_idct_add_8(uint8_t *dest, int line_size, DCTELEM *block)
void(* dct_unquantize_intra)(struct MpegEncContext *s, DCTELEM *block, int n, int qscale)
Definition: mpegvideo.h:698
#define R
Definition: dsputil.c:1899
void(* dct_unquantize_inter)(struct MpegEncContext *s, DCTELEM *block, int n, int qscale)
Definition: mpegvideo.h:700
void ff_set_cmp(DSPContext *c, me_cmp_func *cmp, int type)
Definition: dsputil.c:1741
void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx)
Definition: dsputil_mmx.c:2515
uint8_t idct_permutation[64]
idct input permutation.
Definition: dsputil.h:425
const uint8_t * scantable
Definition: dsputil.h:182
#define WRAPPER8_16_SQ(name8, name16)
Definition: dsputil.h:605
static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc)
Definition: dsputil.c:1872
static void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:535
#define LOCAL_ALIGNED_16(t, v,...)
Definition: dsputil.h:602
static void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:685
static int hadamard8_intra8x8_c(void *s, uint8_t *src, uint8_t *dummy, int stride, int h)
Definition: dsputil.c:1992
static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
Definition: dsputil.c:441
#define W7
Definition: dsputil.c:2530
void(* h263_h_loop_filter)(uint8_t *src, int stride, int qscale)
Definition: dsputil.h:347
#define cm
Definition: dvbsubdec.c:34
static void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:663
static float t
me_cmp_func nsse[6]
Definition: dsputil.h:233
#define ARCH_ALPHA
Definition: config.h:13
qpel_mc_func put_mspel_pixels_tab[8]
Definition: dsputil.h:316
#define r
Definition: input.c:51
void(* vector_fmul_add)(float *dst, const float *src0, const float *src1, const float *src2, int len)
Definition: dsputil.h:356
void(* add_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size)
Definition: dsputil.h:205
me_cmp_func w53[6]
Definition: dsputil.h:234
int32_t(* scalarproduct_int16)(const int16_t *v1, const int16_t *v2, int len)
Calculate scalar product of two vectors.
Definition: dsputil.h:450
void ff_simple_idct_put_10(uint8_t *dest, int line_size, DCTELEM *block)
void(* put_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size)
Definition: dsputil.h:203
#define W1
Definition: dsputil.c:2524
void ff_shrink22(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height)
Definition: imgconvert.c:192
#define FF_SSE2_IDCT_PERM
Definition: dsputil.h:432
#define W2
Definition: dsputil.c:2525
void ff_faanidct_put(uint8_t *dest, int line_size, DCTELEM block[64])
Definition: faanidct.c:158
#define MUL16(a, b)
Definition: mathops.h:38
void(* bswap16_buf)(uint16_t *dst, const uint16_t *src, int len)
Definition: dsputil.h:344
uint8_t * inter_ac_vlc_last_length
Definition: mpegvideo.h:453
#define s2
Definition: regdef.h:39
void(* sub_hfyu_median_prediction)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top)
subtract huffyuv's variant of median prediction note, this might read from src1[-1], src2[-1]
Definition: dsputil.h:339
tpel_mc_func avg_tpel_pixels_tab[11]
Definition: dsputil.h:310
void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride)
void ff_simple_idct_put_8(uint8_t *dest, int line_size, DCTELEM *block)
void(* bswap_buf)(uint32_t *dst, const uint32_t *src, int w)
Definition: dsputil.h:343
const uint8_t ff_alternate_horizontal_scan[64]
Definition: dsputil.c:86
void(* idct_put)(uint8_t *dest, int line_size, DCTELEM *block)
block -> idct -> clip to unsigned 8 bit -> dest.
Definition: dsputil.h:405
static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
Definition: dsputil.c:266
g
Definition: yuv2rgb.c:540
#define s0
Definition: regdef.h:37
static uint32_t clipf_c_one(uint32_t a, uint32_t mini, uint32_t maxi, uint32_t maxisign)
Definition: dsputil.c:2429
static void wmv2_idct_col(short *b)
Definition: dsputil.c:2558
uint16_t ff_inv_zigzag_direct16[64]
Definition: dsputil.c:84
#define VSAD_INTRA(size)
Definition: dsputil.c:2278
static void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:577
void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
Definition: dsputil.c:477
static int quant_psnr8x8_c(void *c, uint8_t *src1, uint8_t *src2, int stride, int h)
Definition: dsputil.c:2120
#define op_put(a, b)
Definition: dsputil.c:1207
void av_log(void *avcl, int level, const char *fmt,...)
Definition: log.c:146
float ff_scalarproduct_float_c(const float *v1, const float *v2, int len)
Return the scalar product of two vectors.
Definition: dsputil.c:2418
int(* add_hfyu_left_prediction)(uint8_t *dst, const uint8_t *src, int w, int left)
Definition: dsputil.h:341
int ff_check_alignment(void)
Definition: dsputil.c:2636
#define W6
Definition: dsputil.c:2529
static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride)
Definition: dsputil.c:1304
#define FF_LIBMPEG2_IDCT_PERM
Definition: dsputil.h:428
#define BASIS_SHIFT
Definition: dsputil.h:436
void(* idct)(DCTELEM *block)
Definition: dsputil.h:398
void ff_faanidct(DCTELEM block[64])
Definition: faanidct.c:132
int dct_bits
Size of DCT coefficients.
Definition: dsputil.h:198
static void copy_block8(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
Definition: dsputil.h:642
#define QPEL_MC(r, OPNAME, RND, OP)
Definition: dsputil.c:729
me_cmp_func vsse[6]
Definition: dsputil.h:232
void ff_faandct248(DCTELEM *data)
Definition: faandct.c:182
void ff_dsputil_init_dwt(DSPContext *c)
Definition: dwt.c:845
static int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
Definition: dsputil.c:1433
static int dct_max8x8_c(void *c, uint8_t *src1, uint8_t *src2, int stride, int h)
Definition: dsputil.c:2104
void ff_dsputil_init_arm(DSPContext *c, AVCodecContext *avctx)
static void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:674
static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, int line_size)
Definition: dsputil.c:361
static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
Definition: dsputil.c:1611
#define BUTTERFLYA(x, y)
Definition: dsputil.c:1945
uint8_t * intra_ac_vlc_last_length
Definition: mpegvideo.h:451
void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride)
void(* add_hfyu_median_prediction)(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top)
Definition: dsputil.h:340
void ff_simple_idct_10(DCTELEM *block)
uint32_t ff_squareTbl[512]
Definition: dsputil.c:42
static DCTELEM block[64]
Definition: dct-test.c:169
static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride)
Definition: dsputil.c:1317
void(* fdct248)(DCTELEM *block)
Definition: dsputil.h:395
void ff_fdct_ifast(DCTELEM *data)
Definition: jfdctfst.c:208
int idct_algo
IDCT algorithm, see FF_IDCT_* below.
Definition: avcodec.h:2661
static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
Definition: dsputil.c:454
void ff_shrink88(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height)
Definition: imgconvert.c:256
static int sse8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
Definition: dsputil.c:289
#define W3
Definition: dsputil.c:2526
qpel_mc_func avg_2tap_qpel_pixels_tab[4][16]
Definition: dsputil.h:328
int32_t
#define a2
Definition: regdef.h:48
void ff_dsputil_init_vis(DSPContext *c, AVCodecContext *avctx)
Definition: dsputil_vis.c:3953
#define FF_NO_IDCT_PERM
Definition: dsputil.h:427
static av_always_inline int cmp(MpegEncContext *s, const int x, const int y, const int subx, const int suby, const int size, const int h, int ref_index, int src_index, me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags)
compares a block (either a full macroblock or a partition thereof) against a proposed motion-compensa...
Definition: motion_est.c:252
int block_last_index[12]
last non zero coefficient in block
Definition: mpegvideo.h:261
static void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:718
static void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:588
#define HAVE_MMX
Definition: config.h:46
static void h261_loop_filter_c(uint8_t *src, int stride)
Definition: dsputil.c:1406
static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
Definition: dsputil.c:1589
int ac_esc_length
num of bits needed to encode the longest esc
Definition: mpegvideo.h:449
static void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:641
#define BIT_DEPTH_FUNCS(depth, dct)
me_cmp_func bit[6]
Definition: dsputil.h:229
static const float pred[4]
Definition: siprdata.h:259
static int dct_sad8x8_c(void *c, uint8_t *src1, uint8_t *src2, int stride, int h)
Definition: dsputil.c:2040
void ff_jpeg_fdct_islow_10(DCTELEM *data)
static void wmv2_idct_row(short *b)
Definition: dsputil.c:2532
NULL
Definition: eval.c:52
void ff_faandct(DCTELEM *data)
Definition: faandct.c:122
static int width
Definition: utils.c:156
#define av_bswap32
Definition: bswap.h:33
uint8_t * luma_dc_vlc_length
Definition: mpegvideo.h:454
int(* fast_dct_quantize)(struct MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow)
Definition: mpegvideo.h:703
#define a5
Definition: regdef.h:51
static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride)
Definition: dsputil.c:1288
external API header
int idct_permutation_type
Definition: dsputil.h:426
static int pix_norm1_c(uint8_t *pix, int line_size)
Definition: dsputil.c:200
void(* vector_clipf)(float *dst, const float *src, float min, float max, int len)
Definition: dsputil.h:360
main external API structure.
Definition: avcodec.h:1339
#define FF_SIMPLE_IDCT_PERM
Definition: dsputil.h:429
#define avg4(a, b, c, d)
Definition: dsputil.c:452
ScanTable intra_scantable
Definition: mpegvideo.h:266
void(* butterflies_float)(float *restrict v1, float *restrict v2, int len)
Calculate the sum and difference of two vectors of floats.
Definition: dsputil.h:374
me_cmp_func dct264_sad[6]
Definition: dsputil.h:237
void(* diff_pixels)(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride)
Definition: dsputil.h:202
#define CONFIG_H263_DECODER
Definition: config.h:416
op_fill_func fill_block_tab[2]
Definition: dsputil.h:489
void ff_faanidct_add(uint8_t *dest, int line_size, DCTELEM block[64])
Definition: faanidct.c:145
void(* gmc1)(uint8_t *dst, uint8_t *src, int srcStride, int h, int x16, int y16, int rounder)
translational global motion compensation.
Definition: dsputil.h:212
qpel_mc_func put_2tap_qpel_pixels_tab[4][16]
Definition: dsputil.h:327
static void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:707
uint8_t * inter_ac_vlc_length
Definition: mpegvideo.h:452
#define SQ(a)
Definition: dsputil.c:2311
static int sum_abs_dctelem_c(DCTELEM *block)
Definition: dsputil.c:423
static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
Definition: dsputil.c:2597
static void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:632
static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride)
Definition: dsputil.c:1294
av_cold void ff_dsputil_static_init(void)
Definition: dsputil.c:2619
int index
Definition: gxfenc.c:72
static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
Definition: dsputil.c:1226
#define avg2(a, b)
Definition: dsputil.c:451
#define mid_pred
Definition: mathops.h:94
void(* add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale)
Definition: dsputil.h:435
DSPContext dsp
pointers for accelerated dsp functions
Definition: mpegvideo.h:361
int(* pix_norm1)(uint8_t *pix, int line_size)
Definition: dsputil.h:221
#define s1
Definition: regdef.h:38
static void vector_fmul_window_c(float *dst, const float *src0, const float *src1, const float *win, int len)
Definition: dsputil.c:2378
static void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:566
#define pb_7f
Definition: dsputil.c:56
void ff_init_scantable_permutation(uint8_t *idct_permutation, int idct_permutation_type)
Definition: dsputil.c:143
int(* ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2, int size)
Definition: dsputil.h:246
static void bswap_buf(uint32_t *dst, const uint32_t *src, int w)
Definition: dsputil.c:248
short DCTELEM
Definition: dsputil.h:39
me_cmp_func w97[6]
Definition: dsputil.h:235
void(* h263_v_loop_filter)(uint8_t *src, int stride, int qscale)
Definition: dsputil.h:346
static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top)
Definition: dsputil.c:1854
void ff_fdct248_islow_8(DCTELEM *data)
void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride)
void(* vector_fmul_reverse)(float *dst, const float *src0, const float *src1, int len)
Definition: dsputil.h:354
static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w)
Definition: dsputil.c:1259
static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
Definition: dsputil.c:1489
#define ARCH_PPC
Definition: config.h:24
static const uint16_t scale[4]
qpel_mc_func put_h264_qpel_pixels_tab[4][16]
Definition: dsputil.h:324
uint8_t level
Definition: svq3.c:125
static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h)
Definition: dsputil.c:1659
static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h)
Definition: dsputil.c:1737
static int rd8x8_c(void *c, uint8_t *src1, uint8_t *src2, int stride, int h)
Definition: dsputil.c:2143
int(* sum_abs_dctelem)(DCTELEM *block)
Definition: dsputil.h:208
Definition: vf_drawbox.c:36
void(* h261_loop_filter)(uint8_t *src, int stride)
Definition: dsputil.h:349
me_cmp_func rd[6]
Definition: dsputil.h:230
static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
Definition: dsputil.c:2484
int height
Definition: gxfenc.c:72
void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
Permute an 8x8 block.
Definition: dsputil.c:1716
MpegEncContext.
Definition: mpegvideo.h:211
tpel_mc_func put_tpel_pixels_tab[11]
Thirdpel motion compensation with rounding (a+b+1)>>1.
Definition: dsputil.h:309
struct AVCodecContext * avctx
Definition: mpegvideo.h:213
static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
Definition: dsputil.c:1461
#define BUTTERFLY1(x, y)
Definition: dsputil.c:1936
#define op_put_no_rnd(a, b)
Definition: dsputil.c:1208
static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w)
Definition: dsputil.c:1812
static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, int line_size)
Definition: dsputil.c:403
void(* vector_clip_int32)(int32_t *dst, const int32_t *src, int32_t min, int32_t max, unsigned int len)
Clip each element in an array of int32_t to a given minimum and maximum value.
Definition: dsputil.h:486
void ff_simple_idct_add_10(uint8_t *dest, int line_size, DCTELEM *block)
static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale)
Definition: dsputil.c:1369
void(* diff_bytes)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w)
Definition: dsputil.h:334
static const uint8_t idct_sse2_row_perm[8]
Definition: dsputil.c:120
qpel_mc_func avg_h264_qpel_pixels_tab[4][16]
Definition: dsputil.h:325
void(* put_signed_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size)
Definition: dsputil.h:204
static void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:555
#define VSSE_INTRA(size)
Definition: dsputil.c:2312
static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride)
Definition: dsputil.c:1326
void ff_jpeg_fdct_islow_8(DCTELEM *data)
void(* fdct)(DCTELEM *block)
Definition: dsputil.h:394
int nsse_weight
noise vs.
Definition: avcodec.h:2808
static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
Definition: dsputil.c:1519
void(* gmc)(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
global motion compensation.
Definition: dsputil.h:216
#define restrict
Definition: config.h:8
void ff_dsputil_init_sh4(DSPContext *c, AVCodecContext *avctx)
Definition: dsputil_sh4.c:92
static int16_t basis[64][64]
static void add_bytes_c(uint8_t *dst, uint8_t *src, int w)
Definition: dsputil.c:1801
static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale)
Definition: dsputil.c:1332
me_cmp_func sad[6]
Definition: dsputil.h:224
int32_t(* scalarproduct_and_madd_int16)(int16_t *v1, const int16_t *v2, const int16_t *v3, int len, int mul)
Calculate scalar product of v1 and v2, and v1[i] += v3[i] * mul.
Definition: dsputil.h:457
static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1, const uint8_t *s2, int stride)
Definition: dsputil.c:340
static const uint8_t simple_mmx_permutation[64]
Definition: dsputil.c:109
static int sse4_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
Definition: dsputil.c:272
DSP utils.
float(* scalarproduct_float)(const float *v1, const float *v2, int len)
Calculate the scalar product of two vectors of floats.
Definition: dsputil.h:367
#define a4
Definition: regdef.h:50
me_cmp_func hadamard8_diff[6]
Definition: dsputil.h:226
void(* butterflies_float_interleave)(float *dst, const float *src0, const float *src1, int len)
Calculate the sum and difference of two vectors of floats and interleave results into a separate outp...
Definition: dsputil.h:390
simple idct header.
int len
void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable)
Definition: dsputil.c:122
#define FF_TRANSPOSE_IDCT_PERM
Definition: dsputil.h:430
me_cmp_func quant_psnr[6]
Definition: dsputil.h:228
#define G
Definition: dsputil.c:1898
void ff_fdct_ifast248(DCTELEM *data)
Definition: jfdctfst.c:274
static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len)
Definition: dsputil.c:2365
#define CONFIG_H263_ENCODER
Definition: config.h:850
Floating point AAN DCT
static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
Definition: dsputil.c:310
void av_image_copy_plane(uint8_t *dst, int dst_linesize, const uint8_t *src, int src_linesize, int bytewidth, int height)
Copy image plane from src to dst.
Definition: imgutils.c:231
#define ARCH_SH4
Definition: config.h:27
#define BUTTERFLY2(o1, o2, i1, i2)
Definition: dsputil.c:1932
#define A
Definition: dsputil.c:1900
void ff_shrink44(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height)
Definition: imgconvert.c:225
void ff_dsputil_init_alpha(DSPContext *c, AVCodecContext *avctx)
static const uint16_t rounder[4]
float min
void ff_wmv2_idct_c(short *block)
Definition: dsputil.c:2585
static void apply_window_int16_c(int16_t *output, const int16_t *input, const int16_t *window, unsigned int len)
Definition: dsputil.c:2494
static void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height)
Definition: dsputil.c:544
me_cmp_func pix_abs[2][4]
Definition: dsputil.h:330
void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride)
DSPContext.
Definition: dsputil.h:194
static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride)
Definition: dsputil.c:1308
#define SRC(x, y)