dct-test.c
Go to the documentation of this file.
1 /*
2  * (c) 2001 Fabrice Bellard
3  * 2007 Marc Hoffman <marc.hoffman@analog.com>
4  *
5  * This file is part of Libav.
6  *
7  * Libav is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * Libav is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with Libav; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
28 #include <stdlib.h>
29 #include <stdio.h>
30 #include <string.h>
31 #include <sys/time.h>
32 #include <unistd.h>
33 #include <math.h>
34 
35 #include "libavutil/cpu.h"
36 #include "libavutil/common.h"
37 #include "libavutil/lfg.h"
38 
39 #include "simple_idct.h"
40 #include "aandcttab.h"
41 #include "faandct.h"
42 #include "faanidct.h"
43 #include "x86/idct_xvid.h"
44 #include "dctref.h"
45 
46 #undef printf
47 
48 void ff_mmx_idct(DCTELEM *data);
50 
51 void odivx_idct_c(short *block);
52 
53 // BFIN
56 
57 // ALTIVEC
59 //void idct_altivec(DCTELEM *block);?? no routine
60 
61 // ARM
67 
69 
70 struct algo {
71  const char *name;
76  int nonspec;
77 };
78 
79 #ifndef FAAN_POSTSCALE
80 #define FAAN_SCALE SCALE_PERM
81 #else
82 #define FAAN_SCALE NO_PERM
83 #endif
84 
85 static int cpu_flags;
86 
87 static const struct algo fdct_tab[] = {
88  { "REF-DBL", ff_ref_fdct, NO_PERM },
89  { "FAAN", ff_faandct, FAAN_SCALE },
90  { "IJG-AAN-INT", fdct_ifast, SCALE_PERM },
91  { "IJG-LLM-INT", ff_jpeg_fdct_islow_8, NO_PERM },
92 
93 #if HAVE_MMX
94  { "MMX", ff_fdct_mmx, NO_PERM, AV_CPU_FLAG_MMX },
97 #endif
98 
99 #if HAVE_ALTIVEC
100  { "altivecfdct", fdct_altivec, NO_PERM, AV_CPU_FLAG_ALTIVEC },
101 #endif
102 
103 #if ARCH_BFIN
104  { "BFINfdct", ff_bfin_fdct, NO_PERM },
105 #endif
106 
107  { 0 }
108 };
109 
110 static const struct algo idct_tab[] = {
111  { "FAANI", ff_faanidct, NO_PERM },
112  { "REF-DBL", ff_ref_idct, NO_PERM },
113  { "INT", j_rev_dct, MMX_PERM },
114  { "SIMPLE-C", ff_simple_idct_8, NO_PERM },
115 
116 #if HAVE_MMX
117 #if CONFIG_GPL
118  { "LIBMPEG2-MMX", ff_mmx_idct, MMX_PERM, AV_CPU_FLAG_MMX, 1 },
119  { "LIBMPEG2-MMX2", ff_mmxext_idct, MMX_PERM, AV_CPU_FLAG_MMX2, 1 },
120 #endif
122  { "XVID-MMX", ff_idct_xvid_mmx, NO_PERM, AV_CPU_FLAG_MMX, 1 },
123  { "XVID-MMX2", ff_idct_xvid_mmx2, NO_PERM, AV_CPU_FLAG_MMX2, 1 },
124  { "XVID-SSE2", ff_idct_xvid_sse2, SSE2_PERM, AV_CPU_FLAG_SSE2, 1 },
125 #endif
126 
127 #if ARCH_BFIN
128  { "BFINidct", ff_bfin_idct, NO_PERM },
129 #endif
130 
131 #if ARCH_ARM
132  { "SIMPLE-ARM", ff_simple_idct_arm, NO_PERM },
133  { "INT-ARM", ff_j_rev_dct_arm, MMX_PERM },
134 #endif
135 #if HAVE_ARMV5TE
136  { "SIMPLE-ARMV5TE", ff_simple_idct_armv5te,NO_PERM },
137 #endif
138 #if HAVE_ARMV6
139  { "SIMPLE-ARMV6", ff_simple_idct_armv6, MMX_PERM },
140 #endif
141 #if HAVE_NEON
142  { "SIMPLE-NEON", ff_simple_idct_neon, PARTTRANS_PERM },
143 #endif
144 
145 #if ARCH_ALPHA
146  { "SIMPLE-ALPHA", ff_simple_idct_axp, NO_PERM },
147 #endif
148 
149  { 0 }
150 };
151 
152 #define AANSCALE_BITS 12
153 
154 static int64_t gettime(void)
155 {
156  struct timeval tv;
157  gettimeofday(&tv, NULL);
158  return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
159 }
160 
161 #define NB_ITS 20000
162 #define NB_ITS_SPEED 50000
163 
164 static short idct_mmx_perm[64];
165 
166 static short idct_simple_mmx_perm[64] = {
167  0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
168  0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
169  0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
170  0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
171  0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
172  0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
173  0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
174  0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
175 };
176 
177 static const uint8_t idct_sse2_row_perm[8] = { 0, 4, 1, 5, 2, 6, 3, 7 };
178 
179 static void idct_mmx_init(void)
180 {
181  int i;
182 
183  /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
184  for (i = 0; i < 64; i++) {
185  idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
186  }
187 }
188 
189 DECLARE_ALIGNED(16, static DCTELEM, block)[64];
191 
192 static inline void mmx_emms(void)
193 {
194 #if HAVE_MMX
196  __asm__ volatile ("emms\n\t");
197 #endif
198 }
199 
200 static void init_block(DCTELEM block[64], int test, int is_idct, AVLFG *prng)
201 {
202  int i, j;
203 
204  memset(block, 0, 64 * sizeof(*block));
205 
206  switch (test) {
207  case 0:
208  for (i = 0; i < 64; i++)
209  block[i] = (av_lfg_get(prng) % 512) - 256;
210  if (is_idct) {
211  ff_ref_fdct(block);
212  for (i = 0; i < 64; i++)
213  block[i] >>= 3;
214  }
215  break;
216  case 1:
217  j = av_lfg_get(prng) % 10 + 1;
218  for (i = 0; i < j; i++)
219  block[av_lfg_get(prng) % 64] = av_lfg_get(prng) % 512 - 256;
220  break;
221  case 2:
222  block[ 0] = av_lfg_get(prng) % 4096 - 2048;
223  block[63] = (block[0] & 1) ^ 1;
224  break;
225  }
226 }
227 
228 static void permute(DCTELEM dst[64], const DCTELEM src[64], int perm)
229 {
230  int i;
231 
232  if (perm == MMX_PERM) {
233  for (i = 0; i < 64; i++)
234  dst[idct_mmx_perm[i]] = src[i];
235  } else if (perm == MMX_SIMPLE_PERM) {
236  for (i = 0; i < 64; i++)
237  dst[idct_simple_mmx_perm[i]] = src[i];
238  } else if (perm == SSE2_PERM) {
239  for (i = 0; i < 64; i++)
240  dst[(i & 0x38) | idct_sse2_row_perm[i & 7]] = src[i];
241  } else if (perm == PARTTRANS_PERM) {
242  for (i = 0; i < 64; i++)
243  dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i];
244  } else {
245  for (i = 0; i < 64; i++)
246  dst[i] = src[i];
247  }
248 }
249 
250 static int dct_error(const struct algo *dct, int test, int is_idct, int speed)
251 {
252  void (*ref)(DCTELEM *block) = is_idct ? ff_ref_idct : ff_ref_fdct;
253  int it, i, scale;
254  int err_inf, v;
255  int64_t err2, ti, ti1, it1, err_sum = 0;
256  int64_t sysErr[64], sysErrMax = 0;
257  int maxout = 0;
258  int blockSumErrMax = 0, blockSumErr;
259  AVLFG prng;
260  double omse, ome;
261  int spec_err;
262 
263  av_lfg_init(&prng, 1);
264 
265  err_inf = 0;
266  err2 = 0;
267  for (i = 0; i < 64; i++)
268  sysErr[i] = 0;
269  for (it = 0; it < NB_ITS; it++) {
270  init_block(block1, test, is_idct, &prng);
271  permute(block, block1, dct->format);
272 
273  dct->func(block);
274  mmx_emms();
275 
276  if (dct->format == SCALE_PERM) {
277  for (i = 0; i < 64; i++) {
278  scale = 8 * (1 << (AANSCALE_BITS + 11)) / ff_aanscales[i];
279  block[i] = (block[i] * scale) >> AANSCALE_BITS;
280  }
281  }
282 
283  ref(block1);
284 
285  blockSumErr = 0;
286  for (i = 0; i < 64; i++) {
287  int err = block[i] - block1[i];
288  err_sum += err;
289  v = abs(err);
290  if (v > err_inf)
291  err_inf = v;
292  err2 += v * v;
293  sysErr[i] += block[i] - block1[i];
294  blockSumErr += v;
295  if (abs(block[i]) > maxout)
296  maxout = abs(block[i]);
297  }
298  if (blockSumErrMax < blockSumErr)
299  blockSumErrMax = blockSumErr;
300  }
301  for (i = 0; i < 64; i++)
302  sysErrMax = FFMAX(sysErrMax, FFABS(sysErr[i]));
303 
304  for (i = 0; i < 64; i++) {
305  if (i % 8 == 0)
306  printf("\n");
307  printf("%7d ", (int) sysErr[i]);
308  }
309  printf("\n");
310 
311  omse = (double) err2 / NB_ITS / 64;
312  ome = (double) err_sum / NB_ITS / 64;
313 
314  spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015);
315 
316  printf("%s %s: ppe=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
317  is_idct ? "IDCT" : "DCT", dct->name, err_inf,
318  omse, ome, (double) sysErrMax / NB_ITS,
319  maxout, blockSumErrMax);
320 
321  if (spec_err && !dct->nonspec)
322  return 1;
323 
324  if (!speed)
325  return 0;
326 
327  /* speed test */
328  init_block(block, test, is_idct, &prng);
329  permute(block1, block, dct->format);
330 
331  ti = gettime();
332  it1 = 0;
333  do {
334  for (it = 0; it < NB_ITS_SPEED; it++) {
335  memcpy(block, block1, sizeof(block));
336  dct->func(block);
337  }
338  it1 += NB_ITS_SPEED;
339  ti1 = gettime() - ti;
340  } while (ti1 < 1000000);
341  mmx_emms();
342 
343  printf("%s %s: %0.1f kdct/s\n", is_idct ? "IDCT" : "DCT", dct->name,
344  (double) it1 * 1000.0 / (double) ti1);
345 
346  return 0;
347 }
348 
349 DECLARE_ALIGNED(8, static uint8_t, img_dest)[64];
350 DECLARE_ALIGNED(8, static uint8_t, img_dest1)[64];
351 
352 static void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
353 {
354  static int init;
355  static double c8[8][8];
356  static double c4[4][4];
357  double block1[64], block2[64], block3[64];
358  double s, sum, v;
359  int i, j, k;
360 
361  if (!init) {
362  init = 1;
363 
364  for (i = 0; i < 8; i++) {
365  sum = 0;
366  for (j = 0; j < 8; j++) {
367  s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
368  c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
369  sum += c8[i][j] * c8[i][j];
370  }
371  }
372 
373  for (i = 0; i < 4; i++) {
374  sum = 0;
375  for (j = 0; j < 4; j++) {
376  s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
377  c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
378  sum += c4[i][j] * c4[i][j];
379  }
380  }
381  }
382 
383  /* butterfly */
384  s = 0.5 * sqrt(2.0);
385  for (i = 0; i < 4; i++) {
386  for (j = 0; j < 8; j++) {
387  block1[8 * (2 * i) + j] =
388  (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) * s;
389  block1[8 * (2 * i + 1) + j] =
390  (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) * s;
391  }
392  }
393 
394  /* idct8 on lines */
395  for (i = 0; i < 8; i++) {
396  for (j = 0; j < 8; j++) {
397  sum = 0;
398  for (k = 0; k < 8; k++)
399  sum += c8[k][j] * block1[8 * i + k];
400  block2[8 * i + j] = sum;
401  }
402  }
403 
404  /* idct4 */
405  for (i = 0; i < 8; i++) {
406  for (j = 0; j < 4; j++) {
407  /* top */
408  sum = 0;
409  for (k = 0; k < 4; k++)
410  sum += c4[k][j] * block2[8 * (2 * k) + i];
411  block3[8 * (2 * j) + i] = sum;
412 
413  /* bottom */
414  sum = 0;
415  for (k = 0; k < 4; k++)
416  sum += c4[k][j] * block2[8 * (2 * k + 1) + i];
417  block3[8 * (2 * j + 1) + i] = sum;
418  }
419  }
420 
421  /* clamp and store the result */
422  for (i = 0; i < 8; i++) {
423  for (j = 0; j < 8; j++) {
424  v = block3[8 * i + j];
425  if (v < 0) v = 0;
426  else if (v > 255) v = 255;
427  dest[i * linesize + j] = (int) rint(v);
428  }
429  }
430 }
431 
432 static void idct248_error(const char *name,
433  void (*idct248_put)(uint8_t *dest, int line_size,
434  int16_t *block),
435  int speed)
436 {
437  int it, i, it1, ti, ti1, err_max, v;
438  AVLFG prng;
439 
440  av_lfg_init(&prng, 1);
441 
442  /* just one test to see if code is correct (precision is less
443  important here) */
444  err_max = 0;
445  for (it = 0; it < NB_ITS; it++) {
446  /* XXX: use forward transform to generate values */
447  for (i = 0; i < 64; i++)
448  block1[i] = av_lfg_get(&prng) % 256 - 128;
449  block1[0] += 1024;
450 
451  for (i = 0; i < 64; i++)
452  block[i] = block1[i];
453  idct248_ref(img_dest1, 8, block);
454 
455  for (i = 0; i < 64; i++)
456  block[i] = block1[i];
457  idct248_put(img_dest, 8, block);
458 
459  for (i = 0; i < 64; i++) {
460  v = abs((int) img_dest[i] - (int) img_dest1[i]);
461  if (v == 255)
462  printf("%d %d\n", img_dest[i], img_dest1[i]);
463  if (v > err_max)
464  err_max = v;
465  }
466  }
467  printf("%s %s: err_inf=%d\n", 1 ? "IDCT248" : "DCT248", name, err_max);
468 
469  if (!speed)
470  return;
471 
472  ti = gettime();
473  it1 = 0;
474  do {
475  for (it = 0; it < NB_ITS_SPEED; it++) {
476  for (i = 0; i < 64; i++)
477  block[i] = block1[i];
478  idct248_put(img_dest, 8, block);
479  }
480  it1 += NB_ITS_SPEED;
481  ti1 = gettime() - ti;
482  } while (ti1 < 1000000);
483  mmx_emms();
484 
485  printf("%s %s: %0.1f kdct/s\n", 1 ? "IDCT248" : "DCT248", name,
486  (double) it1 * 1000.0 / (double) ti1);
487 }
488 
489 static void help(void)
490 {
491  printf("dct-test [-i] [<test-number>]\n"
492  "test-number 0 -> test with random matrixes\n"
493  " 1 -> test with random sparse matrixes\n"
494  " 2 -> do 3. test from mpeg4 std\n"
495  "-i test IDCT implementations\n"
496  "-4 test IDCT248 implementations\n"
497  "-t speed test\n");
498 }
499 
500 int main(int argc, char **argv)
501 {
502  int test_idct = 0, test_248_dct = 0;
503  int c, i;
504  int test = 1;
505  int speed = 0;
506  int err = 0;
507 
509 
510  ff_ref_dct_init();
511  idct_mmx_init();
512 
513  for (;;) {
514  c = getopt(argc, argv, "ih4t");
515  if (c == -1)
516  break;
517  switch (c) {
518  case 'i':
519  test_idct = 1;
520  break;
521  case '4':
522  test_248_dct = 1;
523  break;
524  case 't':
525  speed = 1;
526  break;
527  default:
528  case 'h':
529  help();
530  return 0;
531  }
532  }
533 
534  if (optind < argc)
535  test = atoi(argv[optind]);
536 
537  printf("Libav DCT/IDCT test\n");
538 
539  if (test_248_dct) {
540  idct248_error("SIMPLE-C", ff_simple_idct248_put, speed);
541  } else {
542  const struct algo *algos = test_idct ? idct_tab : fdct_tab;
543  for (i = 0; algos[i].name; i++)
544  if (!(~cpu_flags & algos[i].mm_support)) {
545  err |= dct_error(&algos[i], test, test_idct, speed);
546  }
547  }
548 
549  return err;
550 }