84 #if HAVE_MMXEXT_INLINE
112 #if HAVE_MMXEXT_INLINE
133 #if HAVE_NEON && ARCH_ARM
140 #define AANSCALE_BITS 12
143 #define NB_ITS_SPEED 50000
148 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
149 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
150 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
151 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
152 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
153 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
154 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
155 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
165 for (i = 0; i < 64; i++) {
166 idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
177 memset(block, 0, 64 *
sizeof(*block));
181 for (i = 0; i < 64; i++)
185 for (i = 0; i < 64; i++)
191 for (i = 0; i < j; i++)
196 block[63] = (block[0] & 1) ^ 1;
201 static void permute(int16_t dst[64],
const int16_t src[64],
int perm)
206 for (i = 0; i < 64; i++)
209 for (i = 0; i < 64; i++)
212 for (i = 0; i < 64; i++)
215 for (i = 0; i < 64; i++)
216 dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i];
218 for (i = 0; i < 64; i++)
228 int64_t err2, ti, ti1, it1, err_sum = 0;
229 int64_t sysErr[64], sysErrMax = 0;
231 int blockSumErrMax = 0, blockSumErr;
240 for (i = 0; i < 64; i++)
242 for (it = 0; it <
NB_ITS; it++) {
250 for (i = 0; i < 64; i++) {
259 for (i = 0; i < 64; i++) {
266 sysErr[i] +=
block[i] - block1[i];
268 if (abs(
block[i]) > maxout)
269 maxout = abs(
block[i]);
271 if (blockSumErrMax < blockSumErr)
272 blockSumErrMax = blockSumErr;
274 for (i = 0; i < 64; i++)
275 sysErrMax =
FFMAX(sysErrMax,
FFABS(sysErr[i]));
277 for (i = 0; i < 64; i++) {
280 printf(
"%7d ", (
int) sysErr[i]);
284 omse = (double) err2 / NB_ITS / 64;
285 ome = (double) err_sum / NB_ITS / 64;
287 spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015);
289 printf(
"%s %s: ppe=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
290 is_idct ?
"IDCT" :
"DCT", dct->
name, err_inf,
291 omse, ome, (
double) sysErrMax / NB_ITS,
292 maxout, blockSumErrMax);
313 }
while (ti1 < 1000000);
316 printf(
"%s %s: %0.1f kdct/s\n", is_idct ?
"IDCT" :
"DCT", dct->
name,
317 (
double) it1 * 1000.0 / (
double) ti1);
328 static double c8[8][8];
329 static double c4[4][4];
330 double block1[64], block2[64], block3[64];
337 for (i = 0; i < 8; i++) {
339 for (j = 0; j < 8; j++) {
340 s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
341 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
342 sum += c8[i][j] * c8[i][j];
346 for (i = 0; i < 4; i++) {
348 for (j = 0; j < 4; j++) {
349 s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
350 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
351 sum += c4[i][j] * c4[i][j];
358 for (i = 0; i < 4; i++) {
359 for (j = 0; j < 8; j++) {
360 block1[8 * (2 * i) + j] =
361 (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) * s;
362 block1[8 * (2 * i + 1) + j] =
363 (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) * s;
368 for (i = 0; i < 8; i++) {
369 for (j = 0; j < 8; j++) {
371 for (k = 0; k < 8; k++)
372 sum += c8[k][j] * block1[8 * i + k];
373 block2[8 * i + j] = sum;
378 for (i = 0; i < 8; i++) {
379 for (j = 0; j < 4; j++) {
382 for (k = 0; k < 4; k++)
383 sum += c4[k][j] * block2[8 * (2 * k) + i];
384 block3[8 * (2 * j) + i] = sum;
388 for (k = 0; k < 4; k++)
389 sum += c4[k][j] * block2[8 * (2 * k + 1) + i];
390 block3[8 * (2 * j + 1) + i] = sum;
395 for (i = 0; i < 8; i++) {
396 for (j = 0; j < 8; j++) {
397 v = block3[8 * i + j];
399 else if (v > 255) v = 255;
400 dest[i * linesize + j] = (int)
rint(v);
406 void (*idct248_put)(
uint8_t *dest,
int line_size,
410 int it, i, it1, ti, ti1, err_max, v;
418 for (it = 0; it <
NB_ITS; it++) {
420 for (i = 0; i < 64; i++)
424 for (i = 0; i < 64; i++)
428 for (i = 0; i < 64; i++)
432 for (i = 0; i < 64; i++) {
440 printf(
"%s %s: err_inf=%d\n", 1 ?
"IDCT248" :
"DCT248", name, err_max);
449 for (i = 0; i < 64; i++)
455 }
while (ti1 < 1000000);
458 printf(
"%s %s: %0.1f kdct/s\n", 1 ?
"IDCT248" :
"DCT248", name,
459 (
double) it1 * 1000.0 / (
double) ti1);
464 printf(
"dct-test [-i] [<test-number>]\n"
465 "test-number 0 -> test with random matrixes\n"
466 " 1 -> test with random sparse matrixes\n"
467 " 2 -> do 3. test from mpeg4 std\n"
468 "-i test IDCT implementations\n"
469 "-4 test IDCT248 implementations\n"
477 int main(
int argc,
char **argv)
479 int test_idct = 0, test_248_dct = 0;
491 c =
getopt(argc, argv,
"ih4t");
512 test = atoi(argv[
optind]);
514 printf(
"Libav DCT/IDCT test\n");
519 const struct algo *algos = test_idct ? idct_tab :
fdct_tab;
520 for (i = 0; algos[i].
name; i++)
522 err |=
dct_error(&algos[i], test, test_idct, speed);
527 printf(
"Error: %d.\n", err);
#define AV_CPU_FLAG_ALTIVEC
standard
static double rint(double x)
void ff_fdct_ifast(int16_t *data)
static const struct algo idct_tab[]
void ff_simple_idct_neon(int16_t *data)
static uint8_t img_dest[64]
void ff_idct_xvid_sse2(short *block)
#define DECLARE_ALIGNED(n, t, v)
int main(int argc, char **argv)
av_cold void ff_ref_dct_init(void)
Initialize the double precision discrete cosine transform functions fdct & idct.
void ff_faanidct(int16_t block[64])
void ff_bfin_idct(int16_t *block)
#define AV_CPU_FLAG_MMXEXT
SSE integer functions or AMD MMX ext.
void ff_simple_idct248_put(uint8_t *dest, int line_size, int16_t *block)
const uint16_t ff_aanscales[64]
void ff_fdct_mmxext(int16_t *block)
void ff_simple_idct_armv6(int16_t *data)
void ff_simple_idct_mmx(int16_t *block)
void ff_fdct_mmx(int16_t *block)
static const struct algo fdct_tab[]
#define AV_CPU_FLAG_ARMV5TE
void ff_fdct_sse2(int16_t *block)
static void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
static short idct_simple_mmx_perm[64]
static void test(const char *pattern, const char *host)
static void permute(int16_t dst[64], const int16_t src[64], int perm)
static short idct_mmx_perm[64]
int64_t av_gettime(void)
Get the current time in microseconds.
static void idct_mmx_init(void)
#define AV_CPU_FLAG_ARMV6
void ff_jpeg_fdct_islow_8(int16_t *data)
void ff_j_rev_dct(int16_t *data)
void ff_faandct(int16_t *data)
#define AV_CPU_FLAG_MMX
standard MMX
static void(WINAPI *cond_broadcast)(pthread_cond_t *cond)
static const uint8_t idct_sse2_row_perm[8]
static int getopt(int argc, char *argv[], char *opts)
static unsigned int av_lfg_get(AVLFG *c)
Get the next random unsigned 32-bit number using an ALFG.
void ff_bfin_fdct(int16_t *block)
void ff_ref_fdct(short *block)
Transform 8x8 block of data with a double precision forward DCT This is a reference implementation...
av_cold void av_lfg_init(AVLFG *c, unsigned int seed)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
static int16_t block1[64]
static const uint16_t scale[4]
AAN (Arai Agui Nakajima) (I)DCT tables.
header for Xvid IDCT functions
static uint8_t img_dest1[64]
common internal and external API header
void ff_fdct_altivec(int16_t *block)
void ff_simple_idct_armv5te(int16_t *data)
static void idct248_error(const char *name, void(*idct248_put)(uint8_t *dest, int line_size, int16_t *block), int speed)
static av_cold int init(AVCodecParserContext *s)
void ff_ref_idct(short *block)
Transform 8x8 block of data with a double precision inverse DCT This is a reference implementation...
static int dct_error(const struct algo *dct, int test, int is_idct, int speed)
#define AV_CPU_FLAG_SSE2
PIV SSE2 functions.
enum algo::formattag format
void(* func)(int16_t *block)
void ff_idct_xvid_mmx(short *block)
void ff_idct_xvid_mmxext(short *block)
void ff_j_rev_dct_arm(int16_t *data)
static void init_block(int16_t block[64], int test, int is_idct, AVLFG *prng)
void ff_simple_idct_arm(int16_t *data)
void ff_simple_idct_8(int16_t *block)