79 #ifndef FAAN_POSTSCALE
80 #define FAAN_SCALE SCALE_PERM
82 #define FAAN_SCALE NO_PERM
152 #define AANSCALE_BITS 12
157 gettimeofday(&tv,
NULL);
158 return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
162 #define NB_ITS_SPEED 50000
167 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
168 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
169 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
170 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
171 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
172 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
173 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
174 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
184 for (i = 0; i < 64; i++) {
185 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
196 __asm__
volatile (
"emms\n\t");
204 memset(block, 0, 64 *
sizeof(*block));
208 for (i = 0; i < 64; i++)
212 for (i = 0; i < 64; i++)
218 for (i = 0; i < j; i++)
223 block[63] = (block[0] & 1) ^ 1;
232 if (perm == MMX_PERM) {
233 for (i = 0; i < 64; i++)
235 }
else if (perm == MMX_SIMPLE_PERM) {
236 for (i = 0; i < 64; i++)
238 }
else if (perm == SSE2_PERM) {
239 for (i = 0; i < 64; i++)
241 }
else if (perm == PARTTRANS_PERM) {
242 for (i = 0; i < 64; i++)
243 dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i];
245 for (i = 0; i < 64; i++)
250 static int dct_error(
const struct algo *dct,
int test,
int is_idct,
int speed)
255 int64_t err2, ti, ti1, it1, err_sum = 0;
256 int64_t sysErr[64], sysErrMax = 0;
258 int blockSumErrMax = 0, blockSumErr;
267 for (i = 0; i < 64; i++)
269 for (it = 0; it <
NB_ITS; it++) {
276 if (dct->
format == SCALE_PERM) {
277 for (i = 0; i < 64; i++) {
286 for (i = 0; i < 64; i++) {
293 sysErr[i] +=
block[i] - block1[i];
295 if (abs(
block[i]) > maxout)
296 maxout = abs(
block[i]);
298 if (blockSumErrMax < blockSumErr)
299 blockSumErrMax = blockSumErr;
301 for (i = 0; i < 64; i++)
302 sysErrMax =
FFMAX(sysErrMax,
FFABS(sysErr[i]));
304 for (i = 0; i < 64; i++) {
307 printf(
"%7d ", (
int) sysErr[i]);
311 omse = (double) err2 / NB_ITS / 64;
312 ome = (double) err_sum / NB_ITS / 64;
314 spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015);
316 printf(
"%s %s: ppe=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
317 is_idct ?
"IDCT" :
"DCT", dct->
name, err_inf,
318 omse, ome, (
double) sysErrMax / NB_ITS,
319 maxout, blockSumErrMax);
340 }
while (ti1 < 1000000);
343 printf(
"%s %s: %0.1f kdct/s\n", is_idct ?
"IDCT" :
"DCT", dct->
name,
344 (
double) it1 * 1000.0 / (
double) ti1);
355 static double c8[8][8];
356 static double c4[4][4];
357 double block1[64], block2[64], block3[64];
364 for (i = 0; i < 8; i++) {
366 for (j = 0; j < 8; j++) {
367 s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
368 c8[i][j] = s * cos(
M_PI * i * (j + 0.5) / 8.0);
369 sum += c8[i][j] * c8[i][j];
373 for (i = 0; i < 4; i++) {
375 for (j = 0; j < 4; j++) {
376 s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
377 c4[i][j] = s * cos(
M_PI * i * (j + 0.5) / 4.0);
378 sum += c4[i][j] * c4[i][j];
385 for (i = 0; i < 4; i++) {
386 for (j = 0; j < 8; j++) {
387 block1[8 * (2 * i) + j] =
388 (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) * s;
389 block1[8 * (2 * i + 1) + j] =
390 (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) * s;
395 for (i = 0; i < 8; i++) {
396 for (j = 0; j < 8; j++) {
398 for (k = 0; k < 8; k++)
399 sum += c8[k][j] * block1[8 * i + k];
400 block2[8 * i + j] = sum;
405 for (i = 0; i < 8; i++) {
406 for (j = 0; j < 4; j++) {
409 for (k = 0; k < 4; k++)
410 sum += c4[k][j] * block2[8 * (2 * k) + i];
411 block3[8 * (2 * j) + i] = sum;
415 for (k = 0; k < 4; k++)
416 sum += c4[k][j] * block2[8 * (2 * k + 1) + i];
417 block3[8 * (2 * j + 1) + i] = sum;
422 for (i = 0; i < 8; i++) {
423 for (j = 0; j < 8; j++) {
424 v = block3[8 * i + j];
426 else if (v > 255) v = 255;
427 dest[i * linesize + j] = (int) rint(v);
433 void (*idct248_put)(uint8_t *dest,
int line_size,
437 int it, i, it1, ti, ti1, err_max,
v;
445 for (it = 0; it <
NB_ITS; it++) {
447 for (i = 0; i < 64; i++)
451 for (i = 0; i < 64; i++)
455 for (i = 0; i < 64; i++)
459 for (i = 0; i < 64; i++) {
467 printf(
"%s %s: err_inf=%d\n", 1 ?
"IDCT248" :
"DCT248", name, err_max);
476 for (i = 0; i < 64; i++)
482 }
while (ti1 < 1000000);
485 printf(
"%s %s: %0.1f kdct/s\n", 1 ?
"IDCT248" :
"DCT248", name,
486 (
double) it1 * 1000.0 / (
double) ti1);
491 printf(
"dct-test [-i] [<test-number>]\n"
492 "test-number 0 -> test with random matrixes\n"
493 " 1 -> test with random sparse matrixes\n"
494 " 2 -> do 3. test from mpeg4 std\n"
495 "-i test IDCT implementations\n"
496 "-4 test IDCT248 implementations\n"
500 int main(
int argc,
char **argv)
502 int test_idct = 0, test_248_dct = 0;
514 c = getopt(argc, argv,
"ih4t");
535 test = atoi(argv[optind]);
537 printf(
"Libav DCT/IDCT test\n");
542 const struct algo *algos = test_idct ? idct_tab :
fdct_tab;
543 for (i = 0; algos[i].
name; i++)
545 err |=
dct_error(&algos[i], test, test_idct, speed);