vc1dsp_mmx.c File Reference
#include "libavutil/cpu.h"
#include "libavutil/x86_cpu.h"
#include "libavcodec/dsputil.h"
#include "dsputil_mmx.h"
#include "libavcodec/vc1dsp.h"

Go to the source code of this file.

Macros

#define OP_PUT(S, D)
#define OP_AVG(S, D)   "pavgb " #S ", " #D " \n\t"
#define NORMALIZE_MMX(SHIFT)
 Add rounder from mm7 to mm3 and pack result at destination.
#define TRANSFER_DO_PACK(OP)
#define TRANSFER_DONT_PACK(OP)
#define DO_UNPACK(reg)   "punpcklbw %%mm0, " reg "\n\t"
#define DONT_UNPACK(reg)
#define LOAD_ROUNDER_MMX(ROUND)
 Compute the rounder 32-r or 8-r and unpacks it to mm7.
#define SHIFT2_LINE(OFF, R0, R1, R2, R3)
#define VC1_HOR_16b_SHIFT2(OP, OPNAME)
 Data is already unpacked, so some operations can directly be made from memory.
#define VC1_SHIFT2(OP, OPNAME)
#define MSPEL_FILTER13_CORE(UNPACK, MOVQ, A1, A2, A3, A4)
#define MSPEL_FILTER13_VER_16B(NAME, A1, A2, A3, A4)
#define MSPEL_FILTER13_HOR_16B(NAME, A1, A2, A3, A4, OP, OPNAME)
#define MSPEL_FILTER13_8B(NAME, A1, A2, A3, A4, OP, OPNAME)
#define VC1_MSPEL_MC(OP)
#define DECLARE_FUNCTION(a, b)
#define LOOP_FILTER(EXT)
#define ASSIGN_LF(EXT)

Functions

static void vc1_put_ver_16b_shift2_mmx (int16_t *dst, const uint8_t *src, x86_reg stride, int rnd, int64_t shift)
 Sacrifying mm6 allows to pipeline loads from src.
 VC1_HOR_16b_SHIFT2 (VC1_HOR_16b_SHIFT2(OP_PUT, put_)
static void vc1_inv_trans_4x8_dc_mmx2 (uint8_t *dest, int linesize, DCTELEM *block)
static void vc1_inv_trans_8x4_dc_mmx2 (uint8_t *dest, int linesize, DCTELEM *block)
static void vc1_inv_trans_8x8_dc_mmx2 (uint8_t *dest, int linesize, DCTELEM *block)
void ff_put_vc1_chroma_mc8_mmx_nornd (uint8_t *dst, uint8_t *src, int stride, int h, int x, int y)
void ff_avg_vc1_chroma_mc8_mmx2_nornd (uint8_t *dst, uint8_t *src, int stride, int h, int x, int y)
void ff_avg_vc1_chroma_mc8_3dnow_nornd (uint8_t *dst, uint8_t *src, int stride, int h, int x, int y)
void ff_put_vc1_chroma_mc8_ssse3_nornd (uint8_t *dst, uint8_t *src, int stride, int h, int x, int y)
void ff_avg_vc1_chroma_mc8_ssse3_nornd (uint8_t *dst, uint8_t *src, int stride, int h, int x, int y)
void ff_vc1dsp_init_mmx (VC1DSPContext *dsp)

Macro Definition Documentation

#define ASSIGN_LF (   EXT)
Value:
dsp->vc1_v_loop_filter4 = ff_vc1_v_loop_filter4_ ## EXT; \
dsp->vc1_h_loop_filter4 = ff_vc1_h_loop_filter4_ ## EXT; \
dsp->vc1_v_loop_filter8 = ff_vc1_v_loop_filter8_ ## EXT; \
dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_ ## EXT; \
dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_ ## EXT; \
dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_ ## EXT

Referenced by ff_vc1dsp_init_mmx().

#define DECLARE_FUNCTION (   a,
  b 
)
Value:
static void put_vc1_mspel_mc ## a ## b ## _mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \
put_vc1_mspel_mc(dst, src, stride, a, b, rnd); \
}\
static void avg_vc1_mspel_mc ## a ## b ## _mmx2(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \
avg_vc1_mspel_mc(dst, src, stride, a, b, rnd); \
}

Referenced by VC1_HOR_16b_SHIFT2().

#define DO_UNPACK (   reg)    "punpcklbw %%mm0, " reg "\n\t"
See Also
MSPEL_FILTER13_CORE for use as UNPACK macro

Definition at line 55 of file vc1dsp_mmx.c.

#define DONT_UNPACK (   reg)

Definition at line 56 of file vc1dsp_mmx.c.

#define LOAD_ROUNDER_MMX (   ROUND)
Value:
"movd "ROUND", %%mm7 \n\t" \
"punpcklwd %%mm7, %%mm7 \n\t" \
"punpckldq %%mm7, %%mm7 \n\t"

Compute the rounder 32-r or 8-r and unpacks it to mm7.

Definition at line 59 of file vc1dsp_mmx.c.

Referenced by vc1_put_ver_16b_shift2_mmx().

#define LOOP_FILTER (   EXT)
Value:
void ff_vc1_v_loop_filter4_ ## EXT(uint8_t *src, int stride, int pq); \
void ff_vc1_h_loop_filter4_ ## EXT(uint8_t *src, int stride, int pq); \
void ff_vc1_v_loop_filter8_ ## EXT(uint8_t *src, int stride, int pq); \
void ff_vc1_h_loop_filter8_ ## EXT(uint8_t *src, int stride, int pq); \
\
static void vc1_v_loop_filter16_ ## EXT(uint8_t *src, int stride, int pq) \
{ \
ff_vc1_v_loop_filter8_ ## EXT(src, stride, pq); \
ff_vc1_v_loop_filter8_ ## EXT(src+8, stride, pq); \
} \
\
static void vc1_h_loop_filter16_ ## EXT(uint8_t *src, int stride, int pq) \
{ \
ff_vc1_h_loop_filter8_ ## EXT(src, stride, pq); \
ff_vc1_h_loop_filter8_ ## EXT(src+8*stride, stride, pq); \
}

Definition at line 685 of file vc1dsp_mmx.c.

#define MSPEL_FILTER13_8B (   NAME,
  A1,
  A2,
  A3,
  A4,
  OP,
  OPNAME 
)
Value:
static void \
OPNAME ## vc1_## NAME ## _mmx(uint8_t *dst, const uint8_t *src, \
x86_reg stride, int rnd, x86_reg offset) \
{ \
int h = 8; \
src -= offset; \
rnd = 32-rnd; \
__asm__ volatile ( \
LOAD_ROUNDER_MMX("%6") \
"movq "MANGLE(ff_pw_53)", %%mm5 \n\t" \
"movq "MANGLE(ff_pw_18)", %%mm6 \n\t" \
".p2align 3 \n\t" \
"1: \n\t" \
MSPEL_FILTER13_CORE(DO_UNPACK, "movd 1", A1, A2, A3, A4) \
NORMALIZE_MMX("$6") \
TRANSFER_DO_PACK(OP) \
"add %5, %1 \n\t" \
"add %5, %2 \n\t" \
"decl %0 \n\t" \
"jnz 1b \n\t" \
: "+r"(h), "+r" (src), "+r" (dst) \
: "r"(offset), "r"(3*offset), "g"(stride), "m"(rnd) \
: "memory" \
); \
}

Referenced by VC1_HOR_16b_SHIFT2().

#define MSPEL_FILTER13_CORE (   UNPACK,
  MOVQ,
  A1,
  A2,
  A3,
  A4 
)
Value:
MOVQ "*0+"A1", %%mm1 \n\t" \
MOVQ "*4+"A1", %%mm2 \n\t" \
UNPACK("%%mm1") \
UNPACK("%%mm2") \
"pmullw "MANGLE(ff_pw_3)", %%mm1\n\t" \
"pmullw "MANGLE(ff_pw_3)", %%mm2\n\t" \
MOVQ "*0+"A2", %%mm3 \n\t" \
MOVQ "*4+"A2", %%mm4 \n\t" \
UNPACK("%%mm3") \
UNPACK("%%mm4") \
"pmullw %%mm6, %%mm3 \n\t" /* *18 */ \
"pmullw %%mm6, %%mm4 \n\t" /* *18 */ \
"psubw %%mm1, %%mm3 \n\t" /* 18,-3 */ \
"psubw %%mm2, %%mm4 \n\t" /* 18,-3 */ \
MOVQ "*0+"A4", %%mm1 \n\t" \
MOVQ "*4+"A4", %%mm2 \n\t" \
UNPACK("%%mm1") \
UNPACK("%%mm2") \
"psllw $2, %%mm1 \n\t" /* 4* */ \
"psllw $2, %%mm2 \n\t" /* 4* */ \
"psubw %%mm1, %%mm3 \n\t" /* -4,18,-3 */ \
"psubw %%mm2, %%mm4 \n\t" /* -4,18,-3 */ \
MOVQ "*0+"A3", %%mm1 \n\t" \
MOVQ "*4+"A3", %%mm2 \n\t" \
UNPACK("%%mm1") \
UNPACK("%%mm2") \
"pmullw %%mm5, %%mm1 \n\t" /* *53 */ \
"pmullw %%mm5, %%mm2 \n\t" /* *53 */ \
"paddw %%mm1, %%mm3 \n\t" /* 4,53,18,-3 */ \
"paddw %%mm2, %%mm4 \n\t" /* 4,53,18,-3 */
#define MSPEL_FILTER13_HOR_16B (   NAME,
  A1,
  A2,
  A3,
  A4,
  OP,
  OPNAME 
)
Value:
static void \
OPNAME ## vc1_hor_16b_ ## NAME ## _mmx(uint8_t *dst, x86_reg stride, \
const int16_t *src, int rnd) \
{ \
int h = 8; \
src -= 1; \
rnd -= (-4+58+13-3)*256; /* Add -256 bias */ \
__asm__ volatile( \
LOAD_ROUNDER_MMX("%4") \
"movq "MANGLE(ff_pw_18)", %%mm6 \n\t" \
"movq "MANGLE(ff_pw_53)", %%mm5 \n\t" \
".p2align 3 \n\t" \
"1: \n\t" \
MSPEL_FILTER13_CORE(DONT_UNPACK, "movq 2", A1, A2, A3, A4) \
NORMALIZE_MMX("$7") \
/* Remove bias */ \
"paddw "MANGLE(ff_pw_128)", %%mm3 \n\t" \
"paddw "MANGLE(ff_pw_128)", %%mm4 \n\t" \
TRANSFER_DO_PACK(OP) \
"add $24, %1 \n\t" \
"add %3, %2 \n\t" \
"decl %0 \n\t" \
"jnz 1b \n\t" \
: "+r"(h), "+r" (src), "+r" (dst) \
: "r"(stride), "m"(rnd) \
: "memory" \
); \
}
#define MSPEL_FILTER13_VER_16B (   NAME,
  A1,
  A2,
  A3,
  A4 
)
#define NORMALIZE_MMX (   SHIFT)
Value:
"paddw %%mm7, %%mm3 \n\t" /* +bias-r */ \
"paddw %%mm7, %%mm4 \n\t" /* +bias-r */ \
"psraw "SHIFT", %%mm3 \n\t" \
"psraw "SHIFT", %%mm4 \n\t"

Add rounder from mm7 to mm3 and pack result at destination.

Definition at line 37 of file vc1dsp_mmx.c.

#define OP_AVG (   S,
  D 
)    "pavgb " #S ", " #D " \n\t"

Definition at line 34 of file vc1dsp_mmx.c.

#define OP_PUT (   S,
  D 
)

Definition at line 33 of file vc1dsp_mmx.c.

#define SHIFT2_LINE (   OFF,
  R0,
  R1,
  R2,
  R3 
)
Value:
"paddw %%mm"#R2", %%mm"#R1" \n\t" \
"movd (%0,%3), %%mm"#R0" \n\t" \
"pmullw %%mm6, %%mm"#R1" \n\t" \
"punpcklbw %%mm0, %%mm"#R0" \n\t" \
"movd (%0,%2), %%mm"#R3" \n\t" \
"psubw %%mm"#R0", %%mm"#R1" \n\t" \
"punpcklbw %%mm0, %%mm"#R3" \n\t" \
"paddw %%mm7, %%mm"#R1" \n\t" \
"psubw %%mm"#R3", %%mm"#R1" \n\t" \
"psraw %4, %%mm"#R1" \n\t" \
"movq %%mm"#R1", "#OFF"(%1) \n\t" \
"add %2, %0 \n\t"

Definition at line 64 of file vc1dsp_mmx.c.

Referenced by vc1_put_ver_16b_shift2_mmx().

#define TRANSFER_DO_PACK (   OP)
Value:
"packuswb %%mm4, %%mm3 \n\t" \
OP((%2), %%mm3) \
"movq %%mm3, (%2) \n\t"

Definition at line 43 of file vc1dsp_mmx.c.

#define TRANSFER_DONT_PACK (   OP)
Value:
OP(0(%2), %%mm3) \
OP(8(%2), %%mm4) \
"movq %%mm3, 0(%2) \n\t" \
"movq %%mm4, 8(%2) \n\t"

Definition at line 48 of file vc1dsp_mmx.c.

#define VC1_HOR_16b_SHIFT2 (   OP,
  OPNAME 
)

Data is already unpacked, so some operations can directly be made from memory.

Definition at line 116 of file vc1dsp_mmx.c.

#define VC1_MSPEL_MC (   OP)
#define VC1_SHIFT2 (   OP,
  OPNAME 
)

Referenced by VC1_HOR_16b_SHIFT2().

Function Documentation

void ff_avg_vc1_chroma_mc8_3dnow_nornd ( uint8_t *  dst,
uint8_t *  src,
int  stride,
int  h,
int  x,
int  y 
)

Referenced by ff_vc1dsp_init_mmx().

void ff_avg_vc1_chroma_mc8_mmx2_nornd ( uint8_t *  dst,
uint8_t *  src,
int  stride,
int  h,
int  x,
int  y 
)

Referenced by ff_vc1dsp_init_mmx().

void ff_avg_vc1_chroma_mc8_ssse3_nornd ( uint8_t *  dst,
uint8_t *  src,
int  stride,
int  h,
int  x,
int  y 
)

Referenced by ff_vc1dsp_init_mmx().

void ff_put_vc1_chroma_mc8_mmx_nornd ( uint8_t *  dst,
uint8_t *  src,
int  stride,
int  h,
int  x,
int  y 
)

Referenced by ff_vc1dsp_init_mmx().

void ff_put_vc1_chroma_mc8_ssse3_nornd ( uint8_t *  dst,
uint8_t *  src,
int  stride,
int  h,
int  x,
int  y 
)

Referenced by ff_vc1dsp_init_mmx().

void ff_vc1dsp_init_mmx ( VC1DSPContext dsp)

Definition at line 730 of file vc1dsp_mmx.c.

Referenced by ff_vc1dsp_init().

VC1_HOR_16b_SHIFT2 ( VC1_HOR_16b_SHIFT2(  OP_PUT,
put_   
)

Definition at line 156 of file vc1dsp_mmx.c.

static void vc1_inv_trans_4x8_dc_mmx2 ( uint8_t *  dest,
int  linesize,
DCTELEM block 
)
static

Definition at line 525 of file vc1dsp_mmx.c.

Referenced by ff_vc1dsp_init_mmx().

static void vc1_inv_trans_8x4_dc_mmx2 ( uint8_t *  dest,
int  linesize,
DCTELEM block 
)
static

Definition at line 586 of file vc1dsp_mmx.c.

Referenced by ff_vc1dsp_init_mmx().

static void vc1_inv_trans_8x8_dc_mmx2 ( uint8_t *  dest,
int  linesize,
DCTELEM block 
)
static

Definition at line 624 of file vc1dsp_mmx.c.

Referenced by ff_vc1dsp_init_mmx().

static void vc1_put_ver_16b_shift2_mmx ( int16_t *  dst,
const uint8_t *  src,
x86_reg  stride,
int  rnd,
int64_t  shift 
)
static

Sacrifying mm6 allows to pipeline loads from src.

Definition at line 79 of file vc1dsp_mmx.c.