30 #ifndef AVCODEC_DSPUTIL_H
31 #define AVCODEC_DSPUTIL_H
58 #define H264_IDCT(depth) \
59 void ff_h264_idct8_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int stride);\
60 void ff_h264_idct_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int stride);\
61 void ff_h264_idct8_dc_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int stride);\
62 void ff_h264_idct_dc_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int stride);\
63 void ff_h264_idct_add16_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
64 void ff_h264_idct_add16intra_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
65 void ff_h264_idct8_add4_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
66 void ff_h264_idct_add8_422_ ## depth ## _c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
67 void ff_h264_idct_add8_ ## depth ## _c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
68 void ff_h264_luma_dc_dequant_idct_ ## depth ## _c(DCTELEM *output, DCTELEM *input, int qmul);\
69 void ff_h264_chroma422_dc_dequant_idct_ ## depth ## _c(DCTELEM *block, int qmul);\
70 void ff_h264_chroma_dc_dequant_idct_ ## depth ## _c(DCTELEM *block, int qmul);
86 #define MAX_NEG_CROP 1024
92 #define PUTAVG_PIXELS(depth)\
93 void ff_put_pixels8x8_ ## depth ## _c(uint8_t *dst, uint8_t *src, int stride);\
94 void ff_avg_pixels8x8_ ## depth ## _c(uint8_t *dst, uint8_t *src, int stride);\
95 void ff_put_pixels16x16_ ## depth ## _c(uint8_t *dst, uint8_t *src, int stride);\
96 void ff_avg_pixels16x16_ ## depth ## _c(uint8_t *dst, uint8_t *src, int stride);
102 #define ff_put_pixels8x8_c ff_put_pixels8x8_8_c
103 #define ff_avg_pixels8x8_c ff_avg_pixels8x8_8_c
104 #define ff_put_pixels16x16_c ff_put_pixels16x16_8_c
105 #define ff_avg_pixels16x16_c ff_avg_pixels16x16_8_c
130 void ff_gmc_c(uint8_t *dst, uint8_t *src,
int stride,
int h,
int ox,
int oy,
131 int dxx,
int dxy,
int dyx,
int dyy,
int shift,
int r,
int width,
int height);
158 typedef void (*
tpel_mc_func)(uint8_t *block,
const uint8_t *pixels,
int line_size,
int w,
int h);
164 #define DEF_OLD_QPEL(name)\
165 void ff_put_ ## name (uint8_t *dst, uint8_t *src, int stride);\
166 void ff_put_no_rnd_ ## name (uint8_t *dst, uint8_t *src, int stride);\
167 void ff_avg_ ## name (uint8_t *dst, uint8_t *src, int stride);
182 #define CALL_2X_PIXELS(a, b, n)\
183 static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
184 b(block , pixels , line_size, h);\
185 b(block+n, pixels+n, line_size, h);\
191 typedef int (*
me_cmp_func)(
void *s, uint8_t *blk1, uint8_t *blk2,
int line_size,
int h);
208 int idct_permutation_type);
210 #define EMULATED_EDGE(depth) \
211 void ff_emulated_edge_mc_ ## depth (uint8_t *buf, const uint8_t *src, int linesize,\
212 int block_w, int block_h,\
213 int src_x, int src_y, int w, int h);
233 void (*get_pixels)(
DCTELEM *block,
const uint8_t *pixels,
int line_size);
235 void (*put_pixels_clamped)(
const DCTELEM *block, uint8_t *pixels,
int line_size);
236 void (*put_signed_pixels_clamped)(
const DCTELEM *block, uint8_t *pixels,
int line_size);
237 void (*add_pixels_clamped)(
const DCTELEM *block, uint8_t *pixels,
int line_size);
253 void (*emulated_edge_mc)(uint8_t *buf,
const uint8_t *src,
int linesize,
254 int block_w,
int block_h,
255 int src_x,
int src_y,
int w,
int h);
259 void (*gmc1)(uint8_t *dst, uint8_t *src,
int srcStride,
int h,
int x16,
int y16,
int rounder);
264 int dxx,
int dxy,
int dyx,
int dyy,
int shift,
int r,
int width,
int height);
267 int (*pix_sum)(uint8_t * pix,
int line_size);
268 int (*pix_norm1)(uint8_t * pix,
int line_size);
293 int (*ssd_int8_vs_int16)(
const int8_t *pix1,
const int16_t *pix2,
344 void (*put_no_rnd_pixels_l2[2])(uint8_t *block,
const uint8_t *a,
const uint8_t *
b,
int line_size,
int h);
380 void (*add_bytes)(uint8_t *dst, uint8_t *src,
int w);
381 void (*add_bytes_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2,
int w);
382 void (*diff_bytes)(uint8_t *dst, uint8_t *src1, uint8_t *src2,
int w);
387 void (*sub_hfyu_median_prediction)(uint8_t *dst,
const uint8_t *src1,
const uint8_t *src2,
int w,
int *left,
int *left_top);
388 void (*add_hfyu_median_prediction)(uint8_t *dst,
const uint8_t *top,
const uint8_t *diff,
int w,
int *left,
int *left_top);
389 int (*add_hfyu_left_prediction)(uint8_t *dst,
const uint8_t *src,
int w,
int left);
390 void (*add_hfyu_left_prediction_bgr32)(uint8_t *dst,
const uint8_t *src,
int w,
int *red,
int *green,
int *blue,
int *alpha);
392 void (*add_png_paeth_prediction)(uint8_t *dst, uint8_t *src, uint8_t *top,
int w,
int bpp);
396 void (*h263_v_loop_filter)(uint8_t *src,
int stride,
int qscale);
397 void (*h263_h_loop_filter)(uint8_t *src,
int stride,
int qscale);
404 void (*vp3_idct_dc_add)(uint8_t *dest,
int line_size,
const DCTELEM *block);
405 void (*vp3_v_loop_filter)(uint8_t *src,
int stride,
int *bounding_values);
406 void (*vp3_h_loop_filter)(uint8_t *src,
int stride,
int *bounding_values);
410 void (*ac3_downmix)(float (*
samples)[256], float (*matrix)[2],
int out_ch,
int in_ch,
int len);
412 void (*vector_fmul)(
float *dst,
const float *src0,
const float *src1,
int len);
413 void (*vector_fmul_reverse)(
float *dst,
const float *src0,
const float *src1,
int len);
415 void (*vector_fmul_add)(
float *dst,
const float *src0,
const float *src1,
const float *src2,
int len);
417 void (*vector_fmul_window)(
float *dst,
const float *src0,
const float *src1,
const float *win,
int len);
419 void (*vector_clipf)(
float *dst ,
const float *src ,
float min,
float max,
int len );
428 void (*vector_fmul_scalar)(
float *dst,
const float *src,
float mul,
439 void (*vector_fmac_scalar)(
float *dst,
const float *src,
float mul,
447 float (*scalarproduct_float)(
const float *v1,
const float *v2,
int len);
470 void (*butterflies_float_interleave)(
float *dst,
const float *src0,
471 const float *src1,
int len);
505 uint8_t idct_permutation[64];
507 #define FF_NO_IDCT_PERM 1
508 #define FF_LIBMPEG2_IDCT_PERM 2
509 #define FF_SIMPLE_IDCT_PERM 3
510 #define FF_TRANSPOSE_IDCT_PERM 4
511 #define FF_PARTTRANS_IDCT_PERM 5
512 #define FF_SSE2_IDCT_PERM 6
514 int (*try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t
basis[64],
int scale);
515 void (*add_8x8basis)(int16_t rem[64], int16_t basis[64],
int scale);
516 #define BASIS_SHIFT 16
517 #define RECON_SHIFT 6
520 #define EDGE_WIDTH 16
522 #define EDGE_BOTTOM 2
526 void (*shrink[4])(uint8_t *dst,
int dst_wrap,
const uint8_t *src,
int src_wrap,
int width,
int height);
529 void (*mlp_filter_channel)(int32_t *
state,
const int32_t *coeff,
530 int firorder,
int iirorder,
531 unsigned int filter_shift, int32_t
mask,
int blocksize,
532 int32_t *sample_buffer);
535 void (*x8_spatial_compensation[12])(uint8_t *src , uint8_t *dst,
int linesize);
537 int * range,
int * sum,
int edges);
544 int32_t (*scalarproduct_int16)(
const int16_t *v1,
const int16_t *v2,
int len,
int shift);
551 int32_t (*scalarproduct_and_madd_int16)(int16_t *v1,
const int16_t *v2,
const int16_t *v3,
int len,
int mul);
564 void (*apply_window_int16)(int16_t *output,
const int16_t *input,
565 const int16_t *window,
unsigned int len);
580 void (*vector_clip_int32)(int32_t *dst,
const int32_t *src, int32_t
min,
581 int32_t max,
unsigned int len);
599 #define BYTE_VEC32(c) ((c)*0x01010101UL)
600 #define BYTE_VEC64(c) ((c)*0x0001000100010001UL)
604 return (a | b) - (((a ^
b) & ~
BYTE_VEC32(0x01)) >> 1);
609 return (a & b) + (((a ^
b) & ~
BYTE_VEC32(0x01)) >> 1);
614 return (a | b) - (((a ^
b) & ~
BYTE_VEC64(0x01)) >> 1);
619 return (a & b) + (((a ^
b) & ~
BYTE_VEC64(0x01)) >> 1);
628 return (3*lambda)>>(FF_LAMBDA_SHIFT+1);
664 # define STRIDE_ALIGN 16
669 #define STRIDE_ALIGN 16
673 #define STRIDE_ALIGN 16
678 # define STRIDE_ALIGN 8
681 #define LOCAL_ALIGNED_A(a, t, v, s, o, ...) \
682 uint8_t la_##v[sizeof(t s o) + (a)]; \
683 t (*v) o = (void *)FFALIGN((uintptr_t)la_##v, a)
685 #define LOCAL_ALIGNED_D(a, t, v, s, o, ...) DECLARE_ALIGNED(a, t, v) s o
687 #define LOCAL_ALIGNED(a, t, v, ...) LOCAL_ALIGNED_A(a, t, v, __VA_ARGS__,,)
689 #if HAVE_LOCAL_ALIGNED_8
690 # define LOCAL_ALIGNED_8(t, v, ...) LOCAL_ALIGNED_D(8, t, v, __VA_ARGS__,,)
692 # define LOCAL_ALIGNED_8(t, v, ...) LOCAL_ALIGNED(8, t, v, __VA_ARGS__)
695 #if HAVE_LOCAL_ALIGNED_16
696 # define LOCAL_ALIGNED_16(t, v, ...) LOCAL_ALIGNED_D(16, t, v, __VA_ARGS__,,)
698 # define LOCAL_ALIGNED_16(t, v, ...) LOCAL_ALIGNED(16, t, v, __VA_ARGS__)
701 #define WRAPPER8_16(name8, name16)\
702 static int name16(void *s, uint8_t *dst, uint8_t *src, int stride, int h){\
703 return name8(s, dst , src , stride, h)\
704 +name8(s, dst+8 , src+8 , stride, h);\
707 #define WRAPPER8_16_SQ(name8, name16)\
708 static int name16(void *s, uint8_t *dst, uint8_t *src, int stride, int h){\
710 score +=name8(s, dst , src , stride, 8);\
711 score +=name8(s, dst+8 , src+8 , stride, 8);\
715 score +=name8(s, dst , src , stride, 8);\
716 score +=name8(s, dst+8 , src+8 , stride, 8);\
722 static inline void copy_block2(uint8_t *dst,
const uint8_t *src,
int dstStride,
int srcStride,
int h)
733 static inline void copy_block4(uint8_t *dst,
const uint8_t *src,
int dstStride,
int srcStride,
int h)
744 static inline void copy_block8(uint8_t *dst,
const uint8_t *src,
int dstStride,
int srcStride,
int h)
756 static inline void copy_block9(uint8_t *dst,
const uint8_t *src,
int dstStride,
int srcStride,
int h)
769 static inline void copy_block16(uint8_t *dst,
const uint8_t *src,
int dstStride,
int srcStride,
int h)
783 static inline void copy_block17(uint8_t *dst,
const uint8_t *src,
int dstStride,
int srcStride,
int h)