31 #define RV40_LOWPASS(OPNAME, OP) \
32 static av_unused void OPNAME ## rv40_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,\
33 const int h, const int C1, const int C2, const int SHIFT){\
34 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
36 for(i = 0; i < h; i++)\
38 OP(dst[0], (src[-2] + src[ 3] - 5*(src[-1]+src[2]) + src[0]*C1 + src[1]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
39 OP(dst[1], (src[-1] + src[ 4] - 5*(src[ 0]+src[3]) + src[1]*C1 + src[2]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
40 OP(dst[2], (src[ 0] + src[ 5] - 5*(src[ 1]+src[4]) + src[2]*C1 + src[3]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
41 OP(dst[3], (src[ 1] + src[ 6] - 5*(src[ 2]+src[5]) + src[3]*C1 + src[4]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
42 OP(dst[4], (src[ 2] + src[ 7] - 5*(src[ 3]+src[6]) + src[4]*C1 + src[5]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
43 OP(dst[5], (src[ 3] + src[ 8] - 5*(src[ 4]+src[7]) + src[5]*C1 + src[6]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
44 OP(dst[6], (src[ 4] + src[ 9] - 5*(src[ 5]+src[8]) + src[6]*C1 + src[7]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
45 OP(dst[7], (src[ 5] + src[10] - 5*(src[ 6]+src[9]) + src[7]*C1 + src[8]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
51 static void OPNAME ## rv40_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,\
52 const int w, const int C1, const int C2, const int SHIFT){\
53 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
55 for(i = 0; i < w; i++)\
57 const int srcB = src[-2*srcStride];\
58 const int srcA = src[-1*srcStride];\
59 const int src0 = src[0 *srcStride];\
60 const int src1 = src[1 *srcStride];\
61 const int src2 = src[2 *srcStride];\
62 const int src3 = src[3 *srcStride];\
63 const int src4 = src[4 *srcStride];\
64 const int src5 = src[5 *srcStride];\
65 const int src6 = src[6 *srcStride];\
66 const int src7 = src[7 *srcStride];\
67 const int src8 = src[8 *srcStride];\
68 const int src9 = src[9 *srcStride];\
69 const int src10 = src[10*srcStride];\
70 OP(dst[0*dstStride], (srcB + src3 - 5*(srcA+src2) + src0*C1 + src1*C2 + (1<<(SHIFT-1))) >> SHIFT);\
71 OP(dst[1*dstStride], (srcA + src4 - 5*(src0+src3) + src1*C1 + src2*C2 + (1<<(SHIFT-1))) >> SHIFT);\
72 OP(dst[2*dstStride], (src0 + src5 - 5*(src1+src4) + src2*C1 + src3*C2 + (1<<(SHIFT-1))) >> SHIFT);\
73 OP(dst[3*dstStride], (src1 + src6 - 5*(src2+src5) + src3*C1 + src4*C2 + (1<<(SHIFT-1))) >> SHIFT);\
74 OP(dst[4*dstStride], (src2 + src7 - 5*(src3+src6) + src4*C1 + src5*C2 + (1<<(SHIFT-1))) >> SHIFT);\
75 OP(dst[5*dstStride], (src3 + src8 - 5*(src4+src7) + src5*C1 + src6*C2 + (1<<(SHIFT-1))) >> SHIFT);\
76 OP(dst[6*dstStride], (src4 + src9 - 5*(src5+src8) + src6*C1 + src7*C2 + (1<<(SHIFT-1))) >> SHIFT);\
77 OP(dst[7*dstStride], (src5 + src10 - 5*(src6+src9) + src7*C1 + src8*C2 + (1<<(SHIFT-1))) >> SHIFT);\
83 static void OPNAME ## rv40_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,\
84 const int w, const int C1, const int C2, const int SHIFT){\
85 OPNAME ## rv40_qpel8_v_lowpass(dst , src , dstStride, srcStride, 8, C1, C2, SHIFT);\
86 OPNAME ## rv40_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride, 8, C1, C2, SHIFT);\
89 OPNAME ## rv40_qpel8_v_lowpass(dst , src , dstStride, srcStride, w-8, C1, C2, SHIFT);\
90 OPNAME ## rv40_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride, w-8, C1, C2, SHIFT);\
93 static void OPNAME ## rv40_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,\
94 const int h, const int C1, const int C2, const int SHIFT){\
95 OPNAME ## rv40_qpel8_h_lowpass(dst , src , dstStride, srcStride, 8, C1, C2, SHIFT);\
96 OPNAME ## rv40_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride, 8, C1, C2, SHIFT);\
99 OPNAME ## rv40_qpel8_h_lowpass(dst , src , dstStride, srcStride, h-8, C1, C2, SHIFT);\
100 OPNAME ## rv40_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride, h-8, C1, C2, SHIFT);\
104 #define RV40_MC(OPNAME, SIZE) \
105 static void OPNAME ## rv40_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
106 OPNAME ## rv40_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride, SIZE, 52, 20, 6);\
109 static void OPNAME ## rv40_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
110 OPNAME ## rv40_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride, SIZE, 20, 52, 6);\
113 static void OPNAME ## rv40_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
114 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, src, stride, stride, SIZE, 52, 20, 6);\
117 static void OPNAME ## rv40_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
118 uint8_t full[SIZE*(SIZE+5)];\
119 uint8_t * const full_mid = full + SIZE*2;\
120 put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 52, 20, 6);\
121 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\
124 static void OPNAME ## rv40_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
125 uint8_t full[SIZE*(SIZE+5)];\
126 uint8_t * const full_mid = full + SIZE*2;\
127 put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 20, 5);\
128 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\
131 static void OPNAME ## rv40_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
132 uint8_t full[SIZE*(SIZE+5)];\
133 uint8_t * const full_mid = full + SIZE*2;\
134 put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 52, 6);\
135 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\
138 static void OPNAME ## rv40_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
139 uint8_t full[SIZE*(SIZE+5)];\
140 uint8_t * const full_mid = full + SIZE*2;\
141 put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 52, 20, 6);\
142 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 20, 5);\
145 static void OPNAME ## rv40_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
146 uint8_t full[SIZE*(SIZE+5)];\
147 uint8_t * const full_mid = full + SIZE*2;\
148 put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 20, 5);\
149 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 20, 5);\
152 static void OPNAME ## rv40_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
153 uint8_t full[SIZE*(SIZE+5)];\
154 uint8_t * const full_mid = full + SIZE*2;\
155 put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 52, 6);\
156 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 20, 5);\
159 static void OPNAME ## rv40_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
160 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, src, stride, stride, SIZE, 20, 52, 6);\
163 static void OPNAME ## rv40_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
164 uint8_t full[SIZE*(SIZE+5)];\
165 uint8_t * const full_mid = full + SIZE*2;\
166 put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 52, 20, 6);\
167 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 52, 6);\
170 static void OPNAME ## rv40_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
171 uint8_t full[SIZE*(SIZE+5)];\
172 uint8_t * const full_mid = full + SIZE*2;\
173 put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 20, 5);\
174 OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 52, 6);\
178 #define op_avg(a, b) a = (((a)+cm[b]+1)>>1)
179 #define op_put(a, b) a = cm[b]
199 #define RV40_CHROMA_MC(OPNAME, OP)\
200 static void OPNAME ## rv40_chroma_mc4_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
201 const int A = (8-x) * (8-y);\
202 const int B = ( x) * (8-y);\
203 const int C = (8-x) * ( y);\
204 const int D = ( x) * ( y);\
206 int bias = rv40_bias[y>>1][x>>1];\
208 assert(x<8 && y<8 && x>=0 && y>=0);\
211 for(i = 0; i < h; i++){\
212 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + bias));\
213 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + bias));\
214 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + bias));\
215 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + bias));\
220 const int E = B + C;\
221 const int step = C ? stride : 1;\
222 for(i = 0; i < h; i++){\
223 OP(dst[0], (A*src[0] + E*src[step+0] + bias));\
224 OP(dst[1], (A*src[1] + E*src[step+1] + bias));\
225 OP(dst[2], (A*src[2] + E*src[step+2] + bias));\
226 OP(dst[3], (A*src[3] + E*src[step+3] + bias));\
233 static void OPNAME ## rv40_chroma_mc8_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
234 const int A = (8-x) * (8-y);\
235 const int B = ( x) * (8-y);\
236 const int C = (8-x) * ( y);\
237 const int D = ( x) * ( y);\
239 int bias = rv40_bias[y>>1][x>>1];\
241 assert(x<8 && y<8 && x>=0 && y>=0);\
244 for(i = 0; i < h; i++){\
245 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + bias));\
246 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + bias));\
247 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + bias));\
248 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + bias));\
249 OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + bias));\
250 OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + bias));\
251 OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + bias));\
252 OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + bias));\
257 const int E = B + C;\
258 const int step = C ? stride : 1;\
259 for(i = 0; i < h; i++){\
260 OP(dst[0], (A*src[0] + E*src[step+0] + bias));\
261 OP(dst[1], (A*src[1] + E*src[step+1] + bias));\
262 OP(dst[2], (A*src[2] + E*src[step+2] + bias));\
263 OP(dst[3], (A*src[3] + E*src[step+3] + bias));\
264 OP(dst[4], (A*src[4] + E*src[step+4] + bias));\
265 OP(dst[5], (A*src[5] + E*src[step+5] + bias));\
266 OP(dst[6], (A*src[6] + E*src[step+6] + bias));\
267 OP(dst[7], (A*src[7] + E*src[step+7] + bias));\
274 #define op_avg(a, b) a = (((a)+((b)>>6)+1)>>1)
275 #define op_put(a, b) a = ((b)>>6)
280 #define RV40_WEIGHT_FUNC(size) \
281 static void rv40_weight_func_ ## size (uint8_t *dst, uint8_t *src1, uint8_t *src2, int w1, int w2, int stride)\
285 for (j = 0; j < size; j++) {\
286 for (i = 0; i < size; i++)\
287 dst[i] = (((w2 * src1[i]) >> 9) + ((w1 * src2[i]) >> 9) + 0x10) >> 5;\
301 0x40, 0x50, 0x20, 0x60, 0x30, 0x50, 0x40, 0x30,
302 0x50, 0x40, 0x50, 0x30, 0x60, 0x20, 0x50, 0x40
309 0x40, 0x30, 0x60, 0x20, 0x50, 0x30, 0x30, 0x40,
310 0x40, 0x40, 0x50, 0x30, 0x20, 0x60, 0x30, 0x40
313 #define CLIP_SYMM(a, b) av_clip(a, -(b), b)
331 for (i = 0; i < 4; i++, src +=
stride) {
332 int diff_p1p0 = src[-2*
step] - src[-1*
step];
333 int diff_q1q0 = src[ 1*
step] - src[ 0*
step];
334 int diff_p1p2 = src[-2*
step] - src[-3*
step];
335 int diff_q1q2 = src[ 1*
step] - src[ 2*
step];
341 u = (alpha *
FFABS(t)) >> 7;
342 if (u > 3 - (filter_p1 && filter_q1))
346 if (filter_p1 && filter_q1)
349 diff =
CLIP_SYMM((t + 4) >> 3, lim_p0q0);
350 src[-1*
step] = cm[src[-1*
step] + diff];
351 src[ 0*
step] = cm[src[ 0*
step] - diff];
353 if (filter_p1 &&
FFABS(diff_p1p2) <= beta) {
354 t = (diff_p1p0 + diff_p1p2 - diff) >> 1;
358 if (filter_q1 &&
FFABS(diff_q1q2) <= beta) {
359 t = (diff_q1q0 + diff_q1q2 + diff) >> 1;
366 const int filter_p1,
const int filter_q1,
367 const int alpha,
const int beta,
368 const int lim_p0q0,
const int lim_q1,
372 alpha, beta, lim_p0q0, lim_q1, lim_p1);
376 const int filter_p1,
const int filter_q1,
377 const int alpha,
const int beta,
378 const int lim_p0q0,
const int lim_q1,
382 alpha, beta, lim_p0q0, lim_q1, lim_p1);
395 for(i = 0; i < 4; i++, src +=
stride){
396 int sflag, p0, q0, p1, q1;
402 sflag = (alpha *
FFABS(t)) >> 7;
406 p0 = (25*src[-3*
step] + 26*src[-2*
step] + 26*src[-1*
step] +
410 q0 = (25*src[-2*
step] + 26*src[-1*
step] + 26*src[ 0*
step] +
412 rv40_dither_r[dmode + i]) >> 7;
415 p0 = av_clip(p0, src[-1*step] - lims, src[-1*step] + lims);
416 q0 = av_clip(q0, src[ 0*step] - lims, src[ 0*step] + lims);
419 p1 = (25*src[-4*
step] + 26*src[-3*
step] + 26*src[-2*
step] + 26*p0 +
421 q1 = (25*src[-1*
step] + 26*q0 + 26*src[ 1*
step] + 26*src[ 2*
step] +
422 25*src[ 3*
step] + rv40_dither_r[dmode + i]) >> 7;
425 p1 = av_clip(p1, src[-2*step] - lims, src[-2*step] + lims);
426 q1 = av_clip(q1, src[ 1*step] - lims, src[ 1*step] + lims);
436 51*src[-3*
step] + 26*src[-4*
step] + 64) >> 7;
438 51*src[ 2*
step] + 26*src[ 3*
step] + 64) >> 7;
444 const int alpha,
const int lims,
445 const int dmode,
const int chroma)
451 const int alpha,
const int lims,
452 const int dmode,
const int chroma)
463 int sum_p1p0 = 0, sum_q1q0 = 0, sum_p1p2 = 0, sum_q1q2 = 0;
464 int strong0 = 0, strong1 = 0;
468 for (i = 0, ptr = src; i < 4; i++, ptr +=
stride) {
469 sum_p1p0 += ptr[-2*
step] - ptr[-1*
step];
470 sum_q1q0 += ptr[ 1*
step] - ptr[ 0*
step];
473 *p1 =
FFABS(sum_p1p0) < (beta << 2);
474 *q1 =
FFABS(sum_q1q0) < (beta << 2);
482 for (i = 0, ptr = src; i < 4; i++, ptr +=
stride) {
483 sum_p1p2 += ptr[-2*
step] - ptr[-3*
step];
484 sum_q1q2 += ptr[ 1*
step] - ptr[ 2*
step];
487 strong0 = *p1 && (
FFABS(sum_p1p2) < beta2);
488 strong1 = *q1 && (
FFABS(sum_q1q2) < beta2);
490 return strong0 && strong1;
494 int beta,
int beta2,
int edge,
501 int beta,
int beta2,
int edge,