dwt.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2004-2010 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of Libav.
5  *
6  * Libav is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * Libav is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with Libav; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/attributes.h"
22 #include "dsputil.h"
23 #include "dwt.h"
24 
25 void ff_slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
26 {
27  int i;
28 
29  buf->base_buffer = base_buffer;
30  buf->line_count = line_count;
31  buf->line_width = line_width;
32  buf->data_count = max_allocated_lines;
33  buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
34  buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
35 
36  for(i = 0; i < max_allocated_lines; i++){
37  buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
38  }
39 
40  buf->data_stack_top = max_allocated_lines - 1;
41 }
42 
44 {
45  IDWTELEM * buffer;
46 
47  assert(buf->data_stack_top >= 0);
48 // assert(!buf->line[line]);
49  if (buf->line[line])
50  return buf->line[line];
51 
52  buffer = buf->data_stack[buf->data_stack_top];
53  buf->data_stack_top--;
54  buf->line[line] = buffer;
55 
56  return buffer;
57 }
58 
60 {
61  IDWTELEM * buffer;
62 
63  assert(line >= 0 && line < buf->line_count);
64  assert(buf->line[line]);
65 
66  buffer = buf->line[line];
67  buf->data_stack_top++;
68  buf->data_stack[buf->data_stack_top] = buffer;
69  buf->line[line] = NULL;
70 }
71 
73 {
74  int i;
75  for(i = 0; i < buf->line_count; i++){
76  if (buf->line[i])
78  }
79 }
80 
82 {
83  int i;
85 
86  for(i = buf->data_count - 1; i >= 0; i--){
87  av_freep(&buf->data_stack[i]);
88  }
89  av_freep(&buf->data_stack);
90  av_freep(&buf->line);
91 }
92 
93 static inline int mirror(int v, int m){
94  while((unsigned)v > (unsigned)m){
95  v=-v;
96  if(v<0) v+= 2*m;
97  }
98  return v;
99 }
100 
101 static av_always_inline void
102 lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
103  int dst_step, int src_step, int ref_step,
104  int width, int mul, int add, int shift,
105  int highpass, int inverse){
106  const int mirror_left= !highpass;
107  const int mirror_right= (width&1) ^ highpass;
108  const int w= (width>>1) - 1 + (highpass & width);
109  int i;
110 
111 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
112  if(mirror_left){
113  dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
114  dst += dst_step;
115  src += src_step;
116  }
117 
118  for(i=0; i<w; i++){
119  dst[i*dst_step] =
120  LIFT(src[i*src_step],
121  ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
122  inverse);
123  }
124 
125  if(mirror_right){
126  dst[w*dst_step] =
127  LIFT(src[w*src_step],
128  ((mul*2*ref[w*ref_step]+add)>>shift),
129  inverse);
130  }
131 }
132 
133 static av_always_inline void
135  int dst_step, int src_step, int ref_step,
136  int width, int mul, int add, int shift,
137  int highpass, int inverse){
138  const int mirror_left= !highpass;
139  const int mirror_right= (width&1) ^ highpass;
140  const int w= (width>>1) - 1 + (highpass & width);
141  int i;
142 
143 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
144  if(mirror_left){
145  dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
146  dst += dst_step;
147  src += src_step;
148  }
149 
150  for(i=0; i<w; i++){
151  dst[i*dst_step] =
152  LIFT(src[i*src_step],
153  ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
154  inverse);
155  }
156 
157  if(mirror_right){
158  dst[w*dst_step] =
159  LIFT(src[w*src_step],
160  ((mul*2*ref[w*ref_step]+add)>>shift),
161  inverse);
162  }
163 }
164 
165 #ifndef liftS
166 static av_always_inline void
167 liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
168  int dst_step, int src_step, int ref_step,
169  int width, int mul, int add, int shift,
170  int highpass, int inverse){
171  const int mirror_left= !highpass;
172  const int mirror_right= (width&1) ^ highpass;
173  const int w= (width>>1) - 1 + (highpass & width);
174  int i;
175 
176  assert(shift == 4);
177 #define LIFTS(src, ref, inv) \
178  ((inv) ? \
179  (src) + (((ref) + 4*(src))>>shift): \
180  -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
181  if(mirror_left){
182  dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
183  dst += dst_step;
184  src += src_step;
185  }
186 
187  for(i=0; i<w; i++){
188  dst[i*dst_step] =
189  LIFTS(src[i*src_step],
190  mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
191  inverse);
192  }
193 
194  if(mirror_right){
195  dst[w*dst_step] =
196  LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
197  }
198 }
199 static av_always_inline void
200 inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
201  int dst_step, int src_step, int ref_step,
202  int width, int mul, int add, int shift,
203  int highpass, int inverse){
204  const int mirror_left= !highpass;
205  const int mirror_right= (width&1) ^ highpass;
206  const int w= (width>>1) - 1 + (highpass & width);
207  int i;
208 
209  assert(shift == 4);
210 #define LIFTS(src, ref, inv) \
211  ((inv) ? \
212  (src) + (((ref) + 4*(src))>>shift): \
213  -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
214  if(mirror_left){
215  dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
216  dst += dst_step;
217  src += src_step;
218  }
219 
220  for(i=0; i<w; i++){
221  dst[i*dst_step] =
222  LIFTS(src[i*src_step],
223  mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
224  inverse);
225  }
226 
227  if(mirror_right){
228  dst[w*dst_step] =
229  LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
230  }
231 }
232 #endif /* ! liftS */
233 
234 static void horizontal_decompose53i(DWTELEM *b, int width){
235  DWTELEM temp[width];
236  const int width2= width>>1;
237  int x;
238  const int w2= (width+1)>>1;
239 
240  for(x=0; x<width2; x++){
241  temp[x ]= b[2*x ];
242  temp[x+w2]= b[2*x + 1];
243  }
244  if(width&1)
245  temp[x ]= b[2*x ];
246 #if 0
247  {
248  int A1,A2,A3,A4;
249  A2= temp[1 ];
250  A4= temp[0 ];
251  A1= temp[0+width2];
252  A1 -= (A2 + A4)>>1;
253  A4 += (A1 + 1)>>1;
254  b[0+width2] = A1;
255  b[0 ] = A4;
256  for(x=1; x+1<width2; x+=2){
257  A3= temp[x+width2];
258  A4= temp[x+1 ];
259  A3 -= (A2 + A4)>>1;
260  A2 += (A1 + A3 + 2)>>2;
261  b[x+width2] = A3;
262  b[x ] = A2;
263 
264  A1= temp[x+1+width2];
265  A2= temp[x+2 ];
266  A1 -= (A2 + A4)>>1;
267  A4 += (A1 + A3 + 2)>>2;
268  b[x+1+width2] = A1;
269  b[x+1 ] = A4;
270  }
271  A3= temp[width-1];
272  A3 -= A2;
273  A2 += (A1 + A3 + 2)>>2;
274  b[width -1] = A3;
275  b[width2-1] = A2;
276  }
277 #else
278  lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
279  lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
280 #endif /* 0 */
281 }
282 
283 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
284  int i;
285 
286  for(i=0; i<width; i++){
287  b1[i] -= (b0[i] + b2[i])>>1;
288  }
289 }
290 
291 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
292  int i;
293 
294  for(i=0; i<width; i++){
295  b1[i] += (b0[i] + b2[i] + 2)>>2;
296  }
297 }
298 
299 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
300  int y;
301  DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
302  DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
303 
304  for(y=-2; y<height; y+=2){
305  DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
306  DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
307 
308  if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
309  if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
310 
311  if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
312  if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
313 
314  b0=b2;
315  b1=b3;
316  }
317 }
318 
319 static void horizontal_decompose97i(DWTELEM *b, int width){
320  DWTELEM temp[width];
321  const int w2= (width+1)>>1;
322 
323  lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1);
324  liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0);
325  lift (b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
326  lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
327 }
328 
329 
330 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
331  int i;
332 
333  for(i=0; i<width; i++){
334  b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
335  }
336 }
337 
338 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
339  int i;
340 
341  for(i=0; i<width; i++){
342  b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
343  }
344 }
345 
346 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
347  int i;
348 
349  for(i=0; i<width; i++){
350 #ifdef liftS
351  b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
352 #else
353  b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
354 #endif
355  }
356 }
357 
358 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
359  int i;
360 
361  for(i=0; i<width; i++){
362  b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
363  }
364 }
365 
366 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
367  int y;
368  DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
369  DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
370  DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
371  DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
372 
373  for(y=-4; y<height; y+=2){
374  DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
375  DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
376 
377  if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
378  if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
379 
380  if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
381  if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
382  if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
383  if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
384 
385  b0=b2;
386  b1=b3;
387  b2=b4;
388  b3=b5;
389  }
390 }
391 
392 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
393  int level;
394 
395  for(level=0; level<decomposition_count; level++){
396  switch(type){
397  case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
398  case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
399  }
400  }
401 }
402 
403 static void horizontal_compose53i(IDWTELEM *b, int width){
404  IDWTELEM temp[width];
405  const int width2= width>>1;
406  const int w2= (width+1)>>1;
407  int x;
408 
409  for(x=0; x<width2; x++){
410  temp[2*x ]= b[x ];
411  temp[2*x + 1]= b[x+w2];
412  }
413  if(width&1)
414  temp[2*x ]= b[x ];
415 
416  b[0] = temp[0] - ((temp[1]+1)>>1);
417  for(x=2; x<width-1; x+=2){
418  b[x ] = temp[x ] - ((temp[x-1] + temp[x+1]+2)>>2);
419  b[x-1] = temp[x-1] + ((b [x-2] + b [x ]+1)>>1);
420  }
421  if(width&1){
422  b[x ] = temp[x ] - ((temp[x-1]+1)>>1);
423  b[x-1] = temp[x-1] + ((b [x-2] + b [x ]+1)>>1);
424  }else
425  b[x-1] = temp[x-1] + b[x-2];
426 }
427 
428 static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
429  int i;
430 
431  for(i=0; i<width; i++){
432  b1[i] += (b0[i] + b2[i])>>1;
433  }
434 }
435 
436 static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
437  int i;
438 
439  for(i=0; i<width; i++){
440  b1[i] -= (b0[i] + b2[i] + 2)>>2;
441  }
442 }
443 
444 static void spatial_compose53i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
445  cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
446  cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
447  cs->y = -1;
448 }
449 
451  cs->b0 = buffer + mirror(-1-1, height-1)*stride;
452  cs->b1 = buffer + mirror(-1 , height-1)*stride;
453  cs->y = -1;
454 }
455 
456 static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
457  int y= cs->y;
458 
459  IDWTELEM *b0= cs->b0;
460  IDWTELEM *b1= cs->b1;
461  IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
462  IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
463 
464  if(y+1<(unsigned)height && y<(unsigned)height){
465  int x;
466 
467  for(x=0; x<width; x++){
468  b2[x] -= (b1[x] + b3[x] + 2)>>2;
469  b1[x] += (b0[x] + b2[x])>>1;
470  }
471  }else{
472  if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
473  if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
474  }
475 
476  if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
477  if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
478 
479  cs->b0 = b2;
480  cs->b1 = b3;
481  cs->y += 2;
482 }
483 
484 static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
485  int y= cs->y;
486  IDWTELEM *b0= cs->b0;
487  IDWTELEM *b1= cs->b1;
488  IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
489  IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
490 
491  if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
492  if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
493 
494  if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
495  if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
496 
497  cs->b0 = b2;
498  cs->b1 = b3;
499  cs->y += 2;
500 }
501 
502 static void av_unused spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
503  DWTCompose cs;
504  spatial_compose53i_init(&cs, buffer, height, stride);
505  while(cs.y <= height)
506  spatial_compose53i_dy(&cs, buffer, width, height, stride);
507 }
508 
509 
511  IDWTELEM temp[width];
512  const int w2= (width+1)>>1;
513 
514 #if 0 //maybe more understadable but slower
515  inv_lift (temp , b , b +w2, 2, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
516  inv_lift (temp+1 , b +w2, temp , 2, 1, 2, width, W_CM, W_CO, W_CS, 1, 1);
517 
518  inv_liftS(b , temp , temp+1 , 2, 2, 2, width, W_BM, W_BO, W_BS, 0, 1);
519  inv_lift (b+1 , temp+1 , b , 2, 2, 2, width, W_AM, W_AO, W_AS, 1, 0);
520 #else
521  int x;
522  temp[0] = b[0] - ((3*b[w2]+2)>>2);
523  for(x=1; x<(width>>1); x++){
524  temp[2*x ] = b[x ] - ((3*(b [x+w2-1] + b[x+w2])+4)>>3);
525  temp[2*x-1] = b[x+w2-1] - temp[2*x-2] - temp[2*x];
526  }
527  if(width&1){
528  temp[2*x ] = b[x ] - ((3*b [x+w2-1]+2)>>2);
529  temp[2*x-1] = b[x+w2-1] - temp[2*x-2] - temp[2*x];
530  }else
531  temp[2*x-1] = b[x+w2-1] - 2*temp[2*x-2];
532 
533  b[0] = temp[0] + ((2*temp[0] + temp[1]+4)>>3);
534  for(x=2; x<width-1; x+=2){
535  b[x ] = temp[x ] + ((4*temp[x ] + temp[x-1] + temp[x+1]+8)>>4);
536  b[x-1] = temp[x-1] + ((3*(b [x-2] + b [x ] ))>>1);
537  }
538  if(width&1){
539  b[x ] = temp[x ] + ((2*temp[x ] + temp[x-1]+4)>>3);
540  b[x-1] = temp[x-1] + ((3*(b [x-2] + b [x ] ))>>1);
541  }else
542  b[x-1] = temp[x-1] + 3*b [x-2];
543 #endif
544 }
545 
546 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
547  int i;
548 
549  for(i=0; i<width; i++){
550  b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
551  }
552 }
553 
554 static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
555  int i;
556 
557  for(i=0; i<width; i++){
558  b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
559  }
560 }
561 
562 static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
563  int i;
564 
565  for(i=0; i<width; i++){
566 #ifdef liftS
567  b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
568 #else
569  b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
570 #endif
571  }
572 }
573 
574 static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
575  int i;
576 
577  for(i=0; i<width; i++){
578  b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
579  }
580 }
581 
582 void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
583  int i;
584 
585  for(i=0; i<width; i++){
586  b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
587  b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
588 #ifdef liftS
589  b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
590 #else
591  b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
592 #endif
593  b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
594  }
595 }
596 
597 static void spatial_compose97i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
598  cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
599  cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
600  cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
601  cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
602  cs->y = -3;
603 }
604 
606  cs->b0 = buffer + mirror(-3-1, height-1)*stride;
607  cs->b1 = buffer + mirror(-3 , height-1)*stride;
608  cs->b2 = buffer + mirror(-3+1, height-1)*stride;
609  cs->b3 = buffer + mirror(-3+2, height-1)*stride;
610  cs->y = -3;
611 }
612 
613 static void spatial_compose97i_dy_buffered(DWTContext *dsp, DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
614  int y = cs->y;
615 
616  IDWTELEM *b0= cs->b0;
617  IDWTELEM *b1= cs->b1;
618  IDWTELEM *b2= cs->b2;
619  IDWTELEM *b3= cs->b3;
620  IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
621  IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
622 
623  if(y>0 && y+4<height){
624  dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
625  }else{
626  if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
627  if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
628  if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
629  if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
630  }
631 
632  if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
633  if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
634 
635  cs->b0=b2;
636  cs->b1=b3;
637  cs->b2=b4;
638  cs->b3=b5;
639  cs->y += 2;
640 }
641 
642 static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
643  int y = cs->y;
644  IDWTELEM *b0= cs->b0;
645  IDWTELEM *b1= cs->b1;
646  IDWTELEM *b2= cs->b2;
647  IDWTELEM *b3= cs->b3;
648  IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
649  IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
650 
651  if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
652  if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
653  if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
654  if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
655 
656  if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
657  if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
658 
659  cs->b0=b2;
660  cs->b1=b3;
661  cs->b2=b4;
662  cs->b3=b5;
663  cs->y += 2;
664 }
665 
666 static void av_unused spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
667  DWTCompose cs;
668  spatial_compose97i_init(&cs, buffer, height, stride);
669  while(cs.y <= height)
670  spatial_compose97i_dy(&cs, buffer, width, height, stride);
671 }
672 
673 void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
674  int level;
675  for(level=decomposition_count-1; level>=0; level--){
676  switch(type){
677  case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
678  case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
679  }
680  }
681 }
682 
683 void ff_spatial_idwt_buffered_slice(DWTContext *dsp, DWTCompose *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
684  const int support = type==1 ? 3 : 5;
685  int level;
686  if(type==2) return;
687 
688  for(level=decomposition_count-1; level>=0; level--){
689  while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
690  switch(type){
691  case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
692  break;
693  case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
694  break;
695  }
696  }
697  }
698 }
699 
700 static void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
701  int level;
702  for(level=decomposition_count-1; level>=0; level--){
703  switch(type){
704  case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
705  case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
706  }
707  }
708 }
709 
710 static void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
711  const int support = type==1 ? 3 : 5;
712  int level;
713  if(type==2) return;
714 
715  for(level=decomposition_count-1; level>=0; level--){
716  while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
717  switch(type){
718  case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
719  break;
720  case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
721  break;
722  }
723  }
724  }
725 }
726 
727 void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
729  int y;
730  ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
731  for(y=0; y<height; y+=4)
732  ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
733 }
734 
735 static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int w, int h, int type){
736  int s, i, j;
737  const int dec_count= w==8 ? 3 : 4;
738  int tmp[32*32];
739  int level, ori;
740  static const int scale[2][2][4][4]={
741  {
742  {
743  // 9/7 8x8 dec=3
744  {268, 239, 239, 213},
745  { 0, 224, 224, 152},
746  { 0, 135, 135, 110},
747  },{
748  // 9/7 16x16 or 32x32 dec=4
749  {344, 310, 310, 280},
750  { 0, 320, 320, 228},
751  { 0, 175, 175, 136},
752  { 0, 129, 129, 102},
753  }
754  },{
755  {
756  // 5/3 8x8 dec=3
757  {275, 245, 245, 218},
758  { 0, 230, 230, 156},
759  { 0, 138, 138, 113},
760  },{
761  // 5/3 16x16 or 32x32 dec=4
762  {352, 317, 317, 286},
763  { 0, 328, 328, 233},
764  { 0, 180, 180, 140},
765  { 0, 132, 132, 105},
766  }
767  }
768  };
769 
770  for (i = 0; i < h; i++) {
771  for (j = 0; j < w; j+=4) {
772  tmp[32*i+j+0] = (pix1[j+0] - pix2[j+0])<<4;
773  tmp[32*i+j+1] = (pix1[j+1] - pix2[j+1])<<4;
774  tmp[32*i+j+2] = (pix1[j+2] - pix2[j+2])<<4;
775  tmp[32*i+j+3] = (pix1[j+3] - pix2[j+3])<<4;
776  }
777  pix1 += line_size;
778  pix2 += line_size;
779  }
780 
781  ff_spatial_dwt(tmp, w, h, 32, type, dec_count);
782 
783  s=0;
784  assert(w==h);
785  for(level=0; level<dec_count; level++){
786  for(ori= level ? 1 : 0; ori<4; ori++){
787  int size= w>>(dec_count-level);
788  int sx= (ori&1) ? size : 0;
789  int stride= 32<<(dec_count-level);
790  int sy= (ori&2) ? stride>>1 : 0;
791 
792  for(i=0; i<size; i++){
793  for(j=0; j<size; j++){
794  int v= tmp[sx + sy + i*stride + j] * scale[type][dec_count-3][level][ori];
795  s += FFABS(v);
796  }
797  }
798  }
799  }
800  assert(s>=0);
801  return s>>9;
802 }
803 
804 static int w53_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
805  return w_c(v, pix1, pix2, line_size, 8, h, 1);
806 }
807 
808 static int w97_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
809  return w_c(v, pix1, pix2, line_size, 8, h, 0);
810 }
811 
812 static int w53_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
813  return w_c(v, pix1, pix2, line_size, 16, h, 1);
814 }
815 
816 static int w97_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
817  return w_c(v, pix1, pix2, line_size, 16, h, 0);
818 }
819 
820 int ff_w53_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
821  return w_c(v, pix1, pix2, line_size, 32, h, 1);
822 }
823 
824 int ff_w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
825  return w_c(v, pix1, pix2, line_size, 32, h, 0);
826 }
827 
829 {
830  c->w53[0]= w53_16_c;
831  c->w53[1]= w53_8_c;
832  c->w97[0]= w97_16_c;
833  c->w97[1]= w97_8_c;
834 }
835 
837 {
841 
842  if (HAVE_MMX) ff_dwt_init_x86(c);
843 }