Libav
vp3dsp.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2004 the ffmpeg project
3  *
4  * This file is part of Libav.
5  *
6  * Libav is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * Libav is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with Libav; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
27 #include "libavutil/attributes.h"
28 #include "libavutil/intreadwrite.h"
29 #include "libavutil/common.h"
30 #include "libavutil/intreadwrite.h"
31 
32 #include "avcodec.h"
33 #include "rnd_avg.h"
34 #include "vp3dsp.h"
35 
36 #define IdctAdjustBeforeShift 8
37 #define xC1S7 64277
38 #define xC2S6 60547
39 #define xC3S5 54491
40 #define xC4S4 46341
41 #define xC5S3 36410
42 #define xC6S2 25080
43 #define xC7S1 12785
44 
45 #define M(a, b) (((a) * (b)) >> 16)
46 
47 static av_always_inline void idct(uint8_t *dst, int stride,
48  int16_t *input, int type)
49 {
50  int16_t *ip = input;
51 
52  int A, B, C, D, Ad, Bd, Cd, Dd, E, F, G, H;
53  int Ed, Gd, Add, Bdd, Fd, Hd;
54 
55  int i;
56 
57  /* Inverse DCT on the rows now */
58  for (i = 0; i < 8; i++) {
59  /* Check for non-zero values */
60  if (ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] |
61  ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8]) {
62  A = M(xC1S7, ip[1 * 8]) + M(xC7S1, ip[7 * 8]);
63  B = M(xC7S1, ip[1 * 8]) - M(xC1S7, ip[7 * 8]);
64  C = M(xC3S5, ip[3 * 8]) + M(xC5S3, ip[5 * 8]);
65  D = M(xC3S5, ip[5 * 8]) - M(xC5S3, ip[3 * 8]);
66 
67  Ad = M(xC4S4, (A - C));
68  Bd = M(xC4S4, (B - D));
69 
70  Cd = A + C;
71  Dd = B + D;
72 
73  E = M(xC4S4, (ip[0 * 8] + ip[4 * 8]));
74  F = M(xC4S4, (ip[0 * 8] - ip[4 * 8]));
75 
76  G = M(xC2S6, ip[2 * 8]) + M(xC6S2, ip[6 * 8]);
77  H = M(xC6S2, ip[2 * 8]) - M(xC2S6, ip[6 * 8]);
78 
79  Ed = E - G;
80  Gd = E + G;
81 
82  Add = F + Ad;
83  Bdd = Bd - H;
84 
85  Fd = F - Ad;
86  Hd = Bd + H;
87 
88  /* Final sequence of operations over-write original inputs. */
89  ip[0 * 8] = Gd + Cd;
90  ip[7 * 8] = Gd - Cd;
91 
92  ip[1 * 8] = Add + Hd;
93  ip[2 * 8] = Add - Hd;
94 
95  ip[3 * 8] = Ed + Dd;
96  ip[4 * 8] = Ed - Dd;
97 
98  ip[5 * 8] = Fd + Bdd;
99  ip[6 * 8] = Fd - Bdd;
100  }
101 
102  ip += 1; /* next row */
103  }
104 
105  ip = input;
106 
107  for (i = 0; i < 8; i++) {
108  /* Check for non-zero values (bitwise or faster than ||) */
109  if (ip[1] | ip[2] | ip[3] |
110  ip[4] | ip[5] | ip[6] | ip[7]) {
111  A = M(xC1S7, ip[1]) + M(xC7S1, ip[7]);
112  B = M(xC7S1, ip[1]) - M(xC1S7, ip[7]);
113  C = M(xC3S5, ip[3]) + M(xC5S3, ip[5]);
114  D = M(xC3S5, ip[5]) - M(xC5S3, ip[3]);
115 
116  Ad = M(xC4S4, (A - C));
117  Bd = M(xC4S4, (B - D));
118 
119  Cd = A + C;
120  Dd = B + D;
121 
122  E = M(xC4S4, (ip[0] + ip[4])) + 8;
123  F = M(xC4S4, (ip[0] - ip[4])) + 8;
124 
125  if (type == 1) { // HACK
126  E += 16 * 128;
127  F += 16 * 128;
128  }
129 
130  G = M(xC2S6, ip[2]) + M(xC6S2, ip[6]);
131  H = M(xC6S2, ip[2]) - M(xC2S6, ip[6]);
132 
133  Ed = E - G;
134  Gd = E + G;
135 
136  Add = F + Ad;
137  Bdd = Bd - H;
138 
139  Fd = F - Ad;
140  Hd = Bd + H;
141 
142  /* Final sequence of operations over-write original inputs. */
143  if (type == 1) {
144  dst[0 * stride] = av_clip_uint8((Gd + Cd) >> 4);
145  dst[7 * stride] = av_clip_uint8((Gd - Cd) >> 4);
146 
147  dst[1 * stride] = av_clip_uint8((Add + Hd) >> 4);
148  dst[2 * stride] = av_clip_uint8((Add - Hd) >> 4);
149 
150  dst[3 * stride] = av_clip_uint8((Ed + Dd) >> 4);
151  dst[4 * stride] = av_clip_uint8((Ed - Dd) >> 4);
152 
153  dst[5 * stride] = av_clip_uint8((Fd + Bdd) >> 4);
154  dst[6 * stride] = av_clip_uint8((Fd - Bdd) >> 4);
155  } else {
156  dst[0 * stride] = av_clip_uint8(dst[0 * stride] + ((Gd + Cd) >> 4));
157  dst[7 * stride] = av_clip_uint8(dst[7 * stride] + ((Gd - Cd) >> 4));
158 
159  dst[1 * stride] = av_clip_uint8(dst[1 * stride] + ((Add + Hd) >> 4));
160  dst[2 * stride] = av_clip_uint8(dst[2 * stride] + ((Add - Hd) >> 4));
161 
162  dst[3 * stride] = av_clip_uint8(dst[3 * stride] + ((Ed + Dd) >> 4));
163  dst[4 * stride] = av_clip_uint8(dst[4 * stride] + ((Ed - Dd) >> 4));
164 
165  dst[5 * stride] = av_clip_uint8(dst[5 * stride] + ((Fd + Bdd) >> 4));
166  dst[6 * stride] = av_clip_uint8(dst[6 * stride] + ((Fd - Bdd) >> 4));
167  }
168  } else {
169  if (type == 1) {
170  dst[0*stride] =
171  dst[1*stride] =
172  dst[2*stride] =
173  dst[3*stride] =
174  dst[4*stride] =
175  dst[5*stride] =
176  dst[6*stride] =
177  dst[7*stride] = av_clip_uint8(128 + ((xC4S4 * ip[0] + (IdctAdjustBeforeShift << 16)) >> 20));
178  } else {
179  if (ip[0]) {
180  int v = (xC4S4 * ip[0] + (IdctAdjustBeforeShift << 16)) >> 20;
181  dst[0 * stride] = av_clip_uint8(dst[0 * stride] + v);
182  dst[1 * stride] = av_clip_uint8(dst[1 * stride] + v);
183  dst[2 * stride] = av_clip_uint8(dst[2 * stride] + v);
184  dst[3 * stride] = av_clip_uint8(dst[3 * stride] + v);
185  dst[4 * stride] = av_clip_uint8(dst[4 * stride] + v);
186  dst[5 * stride] = av_clip_uint8(dst[5 * stride] + v);
187  dst[6 * stride] = av_clip_uint8(dst[6 * stride] + v);
188  dst[7 * stride] = av_clip_uint8(dst[7 * stride] + v);
189  }
190  }
191  }
192 
193  ip += 8; /* next column */
194  dst++;
195  }
196 }
197 
198 static void vp3_idct_put_c(uint8_t *dest /* align 8 */, int line_size,
199  int16_t *block /* align 16 */)
200 {
201  idct(dest, line_size, block, 1);
202  memset(block, 0, sizeof(*block) * 64);
203 }
204 
205 static void vp3_idct_add_c(uint8_t *dest /* align 8 */, int line_size,
206  int16_t *block /* align 16 */)
207 {
208  idct(dest, line_size, block, 2);
209  memset(block, 0, sizeof(*block) * 64);
210 }
211 
212 static void vp3_idct_dc_add_c(uint8_t *dest /* align 8 */, int line_size,
213  int16_t *block /* align 16 */)
214 {
215  int i, dc = (block[0] + 15) >> 5;
216 
217  for (i = 0; i < 8; i++) {
218  dest[0] = av_clip_uint8(dest[0] + dc);
219  dest[1] = av_clip_uint8(dest[1] + dc);
220  dest[2] = av_clip_uint8(dest[2] + dc);
221  dest[3] = av_clip_uint8(dest[3] + dc);
222  dest[4] = av_clip_uint8(dest[4] + dc);
223  dest[5] = av_clip_uint8(dest[5] + dc);
224  dest[6] = av_clip_uint8(dest[6] + dc);
225  dest[7] = av_clip_uint8(dest[7] + dc);
226  dest += line_size;
227  }
228  block[0] = 0;
229 }
230 
231 static void vp3_v_loop_filter_c(uint8_t *first_pixel, int stride,
232  int *bounding_values)
233 {
234  unsigned char *end;
235  int filter_value;
236  const int nstride = -stride;
237 
238  for (end = first_pixel + 8; first_pixel < end; first_pixel++) {
239  filter_value = (first_pixel[2 * nstride] - first_pixel[stride]) +
240  (first_pixel[0] - first_pixel[nstride]) * 3;
241  filter_value = bounding_values[(filter_value + 4) >> 3];
242 
243  first_pixel[nstride] = av_clip_uint8(first_pixel[nstride] + filter_value);
244  first_pixel[0] = av_clip_uint8(first_pixel[0] - filter_value);
245  }
246 }
247 
248 static void vp3_h_loop_filter_c(uint8_t *first_pixel, int stride,
249  int *bounding_values)
250 {
251  unsigned char *end;
252  int filter_value;
253 
254  for (end = first_pixel + 8 * stride; first_pixel != end; first_pixel += stride) {
255  filter_value = (first_pixel[-2] - first_pixel[1]) +
256  (first_pixel[ 0] - first_pixel[-1]) * 3;
257  filter_value = bounding_values[(filter_value + 4) >> 3];
258 
259  first_pixel[-1] = av_clip_uint8(first_pixel[-1] + filter_value);
260  first_pixel[ 0] = av_clip_uint8(first_pixel[ 0] - filter_value);
261  }
262 }
263 
264 static void put_no_rnd_pixels_l2(uint8_t *dst, const uint8_t *src1,
265  const uint8_t *src2, ptrdiff_t stride, int h)
266 {
267  int i;
268 
269  for (i = 0; i < h; i++) {
270  uint32_t a, b;
271 
272  a = AV_RN32(&src1[i * stride]);
273  b = AV_RN32(&src2[i * stride]);
274  AV_WN32A(&dst[i * stride], no_rnd_avg32(a, b));
275  a = AV_RN32(&src1[i * stride + 4]);
276  b = AV_RN32(&src2[i * stride + 4]);
277  AV_WN32A(&dst[i * stride + 4], no_rnd_avg32(a, b));
278  }
279 }
280 
282 {
284 
290 
291  if (ARCH_ARM)
292  ff_vp3dsp_init_arm(c, flags);
293  if (ARCH_PPC)
294  ff_vp3dsp_init_ppc(c, flags);
295  if (ARCH_X86)
296  ff_vp3dsp_init_x86(c, flags);
297 }
#define G
Definition: huffyuv.h:50
void(* put_no_rnd_pixels_l2)(uint8_t *dst, const uint8_t *a, const uint8_t *b, ptrdiff_t stride, int h)
Copy 8xH pixels from source to destination buffer using a bilinear filter with no rounding (i...
Definition: vp3dsp.h:36
void(* idct_put)(uint8_t *dest, int line_size, int16_t *block)
Definition: vp3dsp.h:41
void(* h_loop_filter)(uint8_t *src, int stride, int *bounding_values)
Definition: vp3dsp.h:45
#define F
Definition: tiny_psnr.c:33
#define xC4S4
Definition: vp3dsp.c:40
static void vp3_idct_put_c(uint8_t *dest, int line_size, int16_t *block)
Definition: vp3dsp.c:198
static void put_no_rnd_pixels_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, ptrdiff_t stride, int h)
Definition: vp3dsp.c:264
int stride
Definition: mace.c:144
#define AV_WN32A(p, v)
Definition: intreadwrite.h:458
Macro definitions for various function/variable attributes.
void(* idct_dc_add)(uint8_t *dest, int line_size, int16_t *block)
Definition: vp3dsp.h:43
#define xC1S7
Definition: vp3dsp.c:37
#define M(a, b)
Definition: vp3dsp.c:45
uint8_t
#define av_cold
Definition: attributes.h:66
#define H
Definition: swscale-test.c:340
#define b
Definition: input.c:52
#define ARCH_X86
Definition: config.h:33
static int flags
Definition: log.c:44
static void vp3_v_loop_filter_c(uint8_t *first_pixel, int stride, int *bounding_values)
Definition: vp3dsp.c:231
#define B
Definition: huffyuv.h:49
#define xC6S2
Definition: vp3dsp.c:42
#define xC2S6
Definition: vp3dsp.c:38
#define xC7S1
Definition: vp3dsp.c:43
static uint32_t no_rnd_avg32(uint32_t a, uint32_t b)
Definition: rnd_avg.h:33
static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int type)
Definition: vp3dsp.c:47
static int filter_value(int in, int rrp[8], int v[9])
void(* idct_add)(uint8_t *dest, int line_size, int16_t *block)
Definition: vp3dsp.h:42
#define E
Definition: options_table.h:38
static void vp3_idct_dc_add_c(uint8_t *dest, int line_size, int16_t *block)
Definition: vp3dsp.c:212
Definition: vf_drawbox.c:37
Libavcodec external API header.
av_cold void ff_vp3dsp_init_arm(VP3DSPContext *c, int flags)
static void vp3_h_loop_filter_c(uint8_t *first_pixel, int stride, int *bounding_values)
Definition: vp3dsp.c:248
static void vp3_idct_add_c(uint8_t *dest, int line_size, int16_t *block)
Definition: vp3dsp.c:205
void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags)
Definition: vp3dsp_init.c:42
#define D
Definition: options_table.h:39
#define AV_RN32(p)
Definition: intreadwrite.h:326
#define xC3S5
Definition: vp3dsp.c:39
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(constint16_t *) pi >>8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(constint32_t *) pi >>24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(constfloat *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(constfloat *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(constfloat *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(constdouble *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(constdouble *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(constdouble *) pi *(1U<< 31))))#defineSET_CONV_FUNC_GROUP(ofmt, ifmt) staticvoidset_generic_function(AudioConvert *ac){}voidff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enumAVSampleFormatout_fmt, enumAVSampleFormatin_fmt, intchannels, intsample_rate, intapply_map){AudioConvert *ac;intin_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) returnNULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt)>2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);returnNULL;}returnac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}elseif(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;elseac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);returnac;}intff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){intuse_generic=1;intlen=in->nb_samples;intp;if(ac->dc){av_dlog(ac->avr,"%dsamples-audio_convert:%sto%s(dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));returnff_convert_dither(ac-> dc
common internal and external API header
void(* v_loop_filter)(uint8_t *src, int stride, int *bounding_values)
Definition: vp3dsp.h:44
av_cold void ff_vp3dsp_init_ppc(VP3DSPContext *c, int flags)
#define ARCH_ARM
Definition: config.h:14
#define av_always_inline
Definition: attributes.h:40
#define IdctAdjustBeforeShift
Definition: vp3dsp.c:36
av_cold void ff_vp3dsp_init(VP3DSPContext *c, int flags)
Definition: vp3dsp.c:281
#define xC5S3
Definition: vp3dsp.c:41
#define ARCH_PPC
Definition: config.h:24
static int16_t block[64]
Definition: dct-test.c:88