h264_cavlc.c
Go to the documentation of this file.
1 /*
2  * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4  *
5  * This file is part of Libav.
6  *
7  * Libav is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * Libav is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with Libav; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
28 #define CABAC 0
29 
30 #include "internal.h"
31 #include "avcodec.h"
32 #include "mpegvideo.h"
33 #include "h264.h"
34 #include "h264data.h" // FIXME FIXME FIXME
35 #include "h264_mvpred.h"
36 #include "golomb.h"
37 
38 //#undef NDEBUG
39 #include <assert.h>
40 
41 static const uint8_t golomb_to_inter_cbp_gray[16]={
42  0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
43 };
44 
45 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
46 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
47 };
48 
49 static const uint8_t chroma_dc_coeff_token_len[4*5]={
50  2, 0, 0, 0,
51  6, 1, 0, 0,
52  6, 6, 3, 0,
53  6, 7, 7, 6,
54  6, 8, 8, 7,
55 };
56 
57 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
58  1, 0, 0, 0,
59  7, 1, 0, 0,
60  4, 6, 1, 0,
61  3, 3, 2, 5,
62  2, 3, 2, 0,
63 };
64 
65 static const uint8_t chroma422_dc_coeff_token_len[4*9]={
66  1, 0, 0, 0,
67  7, 2, 0, 0,
68  7, 7, 3, 0,
69  9, 7, 7, 5,
70  9, 9, 7, 6,
71  10, 10, 9, 7,
72  11, 11, 10, 7,
73  12, 12, 11, 10,
74  13, 12, 12, 11,
75 };
76 
77 static const uint8_t chroma422_dc_coeff_token_bits[4*9]={
78  1, 0, 0, 0,
79  15, 1, 0, 0,
80  14, 13, 1, 0,
81  7, 12, 11, 1,
82  6, 5, 10, 1,
83  7, 6, 4, 9,
84  7, 6, 5, 8,
85  7, 6, 5, 4,
86  7, 5, 4, 4,
87 };
88 
89 static const uint8_t coeff_token_len[4][4*17]={
90 {
91  1, 0, 0, 0,
92  6, 2, 0, 0, 8, 6, 3, 0, 9, 8, 7, 5, 10, 9, 8, 6,
93  11,10, 9, 7, 13,11,10, 8, 13,13,11, 9, 13,13,13,10,
94  14,14,13,11, 14,14,14,13, 15,15,14,14, 15,15,15,14,
95  16,15,15,15, 16,16,16,15, 16,16,16,16, 16,16,16,16,
96 },
97 {
98  2, 0, 0, 0,
99  6, 2, 0, 0, 6, 5, 3, 0, 7, 6, 6, 4, 8, 6, 6, 4,
100  8, 7, 7, 5, 9, 8, 8, 6, 11, 9, 9, 6, 11,11,11, 7,
101  12,11,11, 9, 12,12,12,11, 12,12,12,11, 13,13,13,12,
102  13,13,13,13, 13,14,13,13, 14,14,14,13, 14,14,14,14,
103 },
104 {
105  4, 0, 0, 0,
106  6, 4, 0, 0, 6, 5, 4, 0, 6, 5, 5, 4, 7, 5, 5, 4,
107  7, 5, 5, 4, 7, 6, 6, 4, 7, 6, 6, 4, 8, 7, 7, 5,
108  8, 8, 7, 6, 9, 8, 8, 7, 9, 9, 8, 8, 9, 9, 9, 8,
109  10, 9, 9, 9, 10,10,10,10, 10,10,10,10, 10,10,10,10,
110 },
111 {
112  6, 0, 0, 0,
113  6, 6, 0, 0, 6, 6, 6, 0, 6, 6, 6, 6, 6, 6, 6, 6,
114  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
115  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
116  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
117 }
118 };
119 
120 static const uint8_t coeff_token_bits[4][4*17]={
121 {
122  1, 0, 0, 0,
123  5, 1, 0, 0, 7, 4, 1, 0, 7, 6, 5, 3, 7, 6, 5, 3,
124  7, 6, 5, 4, 15, 6, 5, 4, 11,14, 5, 4, 8,10,13, 4,
125  15,14, 9, 4, 11,10,13,12, 15,14, 9,12, 11,10,13, 8,
126  15, 1, 9,12, 11,14,13, 8, 7,10, 9,12, 4, 6, 5, 8,
127 },
128 {
129  3, 0, 0, 0,
130  11, 2, 0, 0, 7, 7, 3, 0, 7,10, 9, 5, 7, 6, 5, 4,
131  4, 6, 5, 6, 7, 6, 5, 8, 15, 6, 5, 4, 11,14,13, 4,
132  15,10, 9, 4, 11,14,13,12, 8,10, 9, 8, 15,14,13,12,
133  11,10, 9,12, 7,11, 6, 8, 9, 8,10, 1, 7, 6, 5, 4,
134 },
135 {
136  15, 0, 0, 0,
137  15,14, 0, 0, 11,15,13, 0, 8,12,14,12, 15,10,11,11,
138  11, 8, 9,10, 9,14,13, 9, 8,10, 9, 8, 15,14,13,13,
139  11,14,10,12, 15,10,13,12, 11,14, 9,12, 8,10,13, 8,
140  13, 7, 9,12, 9,12,11,10, 5, 8, 7, 6, 1, 4, 3, 2,
141 },
142 {
143  3, 0, 0, 0,
144  0, 1, 0, 0, 4, 5, 6, 0, 8, 9,10,11, 12,13,14,15,
145  16,17,18,19, 20,21,22,23, 24,25,26,27, 28,29,30,31,
146  32,33,34,35, 36,37,38,39, 40,41,42,43, 44,45,46,47,
147  48,49,50,51, 52,53,54,55, 56,57,58,59, 60,61,62,63,
148 }
149 };
150 
151 static const uint8_t total_zeros_len[16][16]= {
152  {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
153  {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
154  {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
155  {5,3,4,4,3,3,3,4,3,4,5,5,5},
156  {4,4,4,3,3,3,3,3,4,5,4,5},
157  {6,5,3,3,3,3,3,3,4,3,6},
158  {6,5,3,3,3,2,3,4,3,6},
159  {6,4,5,3,2,2,3,3,6},
160  {6,6,4,2,2,3,2,5},
161  {5,5,3,2,2,2,4},
162  {4,4,3,3,1,3},
163  {4,4,2,1,3},
164  {3,3,1,2},
165  {2,2,1},
166  {1,1},
167 };
168 
169 static const uint8_t total_zeros_bits[16][16]= {
170  {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
171  {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
172  {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
173  {3,7,5,4,6,5,4,3,3,2,2,1,0},
174  {5,4,3,7,6,5,4,3,2,1,1,0},
175  {1,1,7,6,5,4,3,2,1,1,0},
176  {1,1,5,4,3,3,2,1,1,0},
177  {1,1,1,3,3,2,2,1,0},
178  {1,0,1,3,2,1,1,1},
179  {1,0,1,3,2,1,1},
180  {0,1,1,2,1,3},
181  {0,1,1,1,1},
182  {0,1,1,1},
183  {0,1,1},
184  {0,1},
185 };
186 
187 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
188  { 1, 2, 3, 3,},
189  { 1, 2, 2, 0,},
190  { 1, 1, 0, 0,},
191 };
192 
193 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
194  { 1, 1, 1, 0,},
195  { 1, 1, 0, 0,},
196  { 1, 0, 0, 0,},
197 };
198 
199 static const uint8_t chroma422_dc_total_zeros_len[7][8]= {
200  { 1, 3, 3, 4, 4, 4, 5, 5 },
201  { 3, 2, 3, 3, 3, 3, 3 },
202  { 3, 3, 2, 2, 3, 3 },
203  { 3, 2, 2, 2, 3 },
204  { 2, 2, 2, 2 },
205  { 2, 2, 1 },
206  { 1, 1 },
207 };
208 
209 static const uint8_t chroma422_dc_total_zeros_bits[7][8]= {
210  { 1, 2, 3, 2, 3, 1, 1, 0 },
211  { 0, 1, 1, 4, 5, 6, 7 },
212  { 0, 1, 1, 2, 6, 7 },
213  { 6, 0, 1, 2, 7 },
214  { 0, 1, 2, 3 },
215  { 0, 1, 1 },
216  { 0, 1 },
217 };
218 
219 static const uint8_t run_len[7][16]={
220  {1,1},
221  {1,2,2},
222  {2,2,2,2},
223  {2,2,2,3,3},
224  {2,2,3,3,3,3},
225  {2,3,3,3,3,3,3},
226  {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
227 };
228 
229 static const uint8_t run_bits[7][16]={
230  {1,0},
231  {1,1,0},
232  {3,2,1,0},
233  {3,2,1,1,0},
234  {3,2,3,2,1,0},
235  {3,0,1,3,2,5,4},
236  {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
237 };
238 
240 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
241 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
242 
246 
250 
251 static VLC total_zeros_vlc[15];
252 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
253 static const int total_zeros_vlc_tables_size = 512;
254 
258 
262 
263 static VLC run_vlc[6];
264 static VLC_TYPE run_vlc_tables[6][8][2];
265 static const int run_vlc_tables_size = 8;
266 
267 static VLC run7_vlc;
268 static VLC_TYPE run7_vlc_table[96][2];
269 static const int run7_vlc_table_size = 96;
270 
271 #define LEVEL_TAB_BITS 8
272 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
273 
274 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
275 #define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13
276 #define COEFF_TOKEN_VLC_BITS 8
277 #define TOTAL_ZEROS_VLC_BITS 9
278 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
279 #define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5
280 #define RUN_VLC_BITS 3
281 #define RUN7_VLC_BITS 6
282 
287 static inline int pred_non_zero_count(H264Context *h, int n){
288  const int index8= scan8[n];
289  const int left= h->non_zero_count_cache[index8 - 1];
290  const int top = h->non_zero_count_cache[index8 - 8];
291  int i= left + top;
292 
293  if(i<64) i= (i+1)>>1;
294 
295  tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
296 
297  return i&31;
298 }
299 
300 static av_cold void init_cavlc_level_tab(void){
301  int suffix_length;
302  unsigned int i;
303 
304  for(suffix_length=0; suffix_length<7; suffix_length++){
305  for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
306  int prefix= LEVEL_TAB_BITS - av_log2(2*i);
307 
308  if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
309  int level_code = (prefix << suffix_length) +
310  (i >> (av_log2(i) - suffix_length)) - (1 << suffix_length);
311  int mask = -(level_code&1);
312  level_code = (((2 + level_code) >> 1) ^ mask) - mask;
313  cavlc_level_tab[suffix_length][i][0]= level_code;
314  cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
315  }else if(prefix + 1 <= LEVEL_TAB_BITS){
316  cavlc_level_tab[suffix_length][i][0]= prefix+100;
317  cavlc_level_tab[suffix_length][i][1]= prefix + 1;
318  }else{
319  cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
320  cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
321  }
322  }
323  }
324 }
325 
327  static int done = 0;
328 
329  if (!done) {
330  int i;
331  int offset;
332  done = 1;
333 
334  chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
335  chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
336  init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
337  &chroma_dc_coeff_token_len [0], 1, 1,
338  &chroma_dc_coeff_token_bits[0], 1, 1,
340 
341  chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table;
342  chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size;
343  init_vlc(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9,
344  &chroma422_dc_coeff_token_len [0], 1, 1,
347 
348  offset = 0;
349  for(i=0; i<4; i++){
350  coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
351  coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
352  init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
353  &coeff_token_len [i][0], 1, 1,
354  &coeff_token_bits[i][0], 1, 1,
356  offset += coeff_token_vlc_tables_size[i];
357  }
358  /*
359  * This is a one time safety check to make sure that
360  * the packed static coeff_token_vlc table sizes
361  * were initialized correctly.
362  */
363  assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
364 
365  for(i=0; i<3; i++){
366  chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
367  chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
368  init_vlc(&chroma_dc_total_zeros_vlc[i],
370  &chroma_dc_total_zeros_len [i][0], 1, 1,
371  &chroma_dc_total_zeros_bits[i][0], 1, 1,
373  }
374 
375  for(i=0; i<7; i++){
376  chroma422_dc_total_zeros_vlc[i].table = chroma422_dc_total_zeros_vlc_tables[i];
377  chroma422_dc_total_zeros_vlc[i].table_allocated = chroma422_dc_total_zeros_vlc_tables_size;
378  init_vlc(&chroma422_dc_total_zeros_vlc[i],
380  &chroma422_dc_total_zeros_len [i][0], 1, 1,
381  &chroma422_dc_total_zeros_bits[i][0], 1, 1,
383  }
384 
385  for(i=0; i<15; i++){
386  total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
387  total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
388  init_vlc(&total_zeros_vlc[i],
390  &total_zeros_len [i][0], 1, 1,
391  &total_zeros_bits[i][0], 1, 1,
393  }
394 
395  for(i=0; i<6; i++){
396  run_vlc[i].table = run_vlc_tables[i];
398  init_vlc(&run_vlc[i],
399  RUN_VLC_BITS, 7,
400  &run_len [i][0], 1, 1,
401  &run_bits[i][0], 1, 1,
403  }
404  run7_vlc.table = run7_vlc_table,
406  init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
407  &run_len [6][0], 1, 1,
408  &run_bits[6][0], 1, 1,
410 
412  }
413 }
414 
418 static inline int get_level_prefix(GetBitContext *gb){
419  unsigned int buf;
420  int log;
421 
422  OPEN_READER(re, gb);
423  UPDATE_CACHE(re, gb);
424  buf=GET_CACHE(re, gb);
425 
426  log= 32 - av_log2(buf);
427 #ifdef TRACE
428  print_bin(buf>>(32-log), log);
429  av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
430 #endif
431 
432  LAST_SKIP_BITS(re, gb, log);
433  CLOSE_READER(re, gb);
434 
435  return log-1;
436 }
437 
445 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
446  MpegEncContext * const s = &h->s;
447  static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
448  int level[16];
449  int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before;
450 
451  //FIXME put trailing_onex into the context
452 
453  if(max_coeff <= 8){
454  if (max_coeff == 4)
455  coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
456  else
457  coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc.table, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1);
458  total_coeff= coeff_token>>2;
459  }else{
460  if(n >= LUMA_DC_BLOCK_INDEX){
461  total_coeff= pred_non_zero_count(h, (n - LUMA_DC_BLOCK_INDEX)*16);
462  coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
463  total_coeff= coeff_token>>2;
464  }else{
465  total_coeff= pred_non_zero_count(h, n);
466  coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
467  total_coeff= coeff_token>>2;
468  }
469  }
470  h->non_zero_count_cache[ scan8[n] ]= total_coeff;
471 
472  //FIXME set last_non_zero?
473 
474  if(total_coeff==0)
475  return 0;
476  if(total_coeff > (unsigned)max_coeff) {
477  av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
478  return -1;
479  }
480 
481  trailing_ones= coeff_token&3;
482  tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
483  assert(total_coeff<=16);
484 
485  i = show_bits(gb, 3);
486  skip_bits(gb, trailing_ones);
487  level[0] = 1-((i&4)>>1);
488  level[1] = 1-((i&2) );
489  level[2] = 1-((i&1)<<1);
490 
491  if(trailing_ones<total_coeff) {
492  int mask, prefix;
493  int suffix_length = total_coeff > 10 & trailing_ones < 3;
494  int bitsi= show_bits(gb, LEVEL_TAB_BITS);
495  int level_code= cavlc_level_tab[suffix_length][bitsi][0];
496 
497  skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
498  if(level_code >= 100){
499  prefix= level_code - 100;
500  if(prefix == LEVEL_TAB_BITS)
501  prefix += get_level_prefix(gb);
502 
503  //first coefficient has suffix_length equal to 0 or 1
504  if(prefix<14){ //FIXME try to build a large unified VLC table for all this
505  if(suffix_length)
506  level_code= (prefix<<1) + get_bits1(gb); //part
507  else
508  level_code= prefix; //part
509  }else if(prefix==14){
510  if(suffix_length)
511  level_code= (prefix<<1) + get_bits1(gb); //part
512  else
513  level_code= prefix + get_bits(gb, 4); //part
514  }else{
515  level_code= 30 + get_bits(gb, prefix-3); //part
516  if(prefix>=16){
517  if(prefix > 25+3){
518  av_log(h->s.avctx, AV_LOG_ERROR, "Invalid level prefix\n");
519  return -1;
520  }
521  level_code += (1<<(prefix-3))-4096;
522  }
523  }
524 
525  if(trailing_ones < 3) level_code += 2;
526 
527  suffix_length = 2;
528  mask= -(level_code&1);
529  level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
530  }else{
531  level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
532 
533  suffix_length = 1 + (level_code + 3U > 6U);
534  level[trailing_ones]= level_code;
535  }
536 
537  //remaining coefficients have suffix_length > 0
538  for(i=trailing_ones+1;i<total_coeff;i++) {
539  static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
540  int bitsi= show_bits(gb, LEVEL_TAB_BITS);
541  level_code= cavlc_level_tab[suffix_length][bitsi][0];
542 
543  skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
544  if(level_code >= 100){
545  prefix= level_code - 100;
546  if(prefix == LEVEL_TAB_BITS){
547  prefix += get_level_prefix(gb);
548  }
549  if(prefix<15){
550  level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
551  }else{
552  level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
553  if(prefix>=16)
554  level_code += (1<<(prefix-3))-4096;
555  }
556  mask= -(level_code&1);
557  level_code= (((2+level_code)>>1) ^ mask) - mask;
558  }
559  level[i]= level_code;
560  suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
561  }
562  }
563 
564  if(total_coeff == max_coeff)
565  zeros_left=0;
566  else{
567  if (max_coeff <= 8) {
568  if (max_coeff == 4)
569  zeros_left = get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[total_coeff].table,
571  else
572  zeros_left = get_vlc2(gb, (chroma422_dc_total_zeros_vlc-1)[total_coeff].table,
574  } else {
575  zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
576  }
577  }
578 
579 #define STORE_BLOCK(type) \
580  scantable += zeros_left + total_coeff - 1; \
581  if(n >= LUMA_DC_BLOCK_INDEX){ \
582  ((type*)block)[*scantable] = level[0]; \
583  for(i=1;i<total_coeff && zeros_left > 0;i++) { \
584  if(zeros_left < 7) \
585  run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
586  else \
587  run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
588  zeros_left -= run_before; \
589  scantable -= 1 + run_before; \
590  ((type*)block)[*scantable]= level[i]; \
591  } \
592  for(;i<total_coeff;i++) { \
593  scantable--; \
594  ((type*)block)[*scantable]= level[i]; \
595  } \
596  }else{ \
597  ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
598  for(i=1;i<total_coeff && zeros_left > 0;i++) { \
599  if(zeros_left < 7) \
600  run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
601  else \
602  run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
603  zeros_left -= run_before; \
604  scantable -= 1 + run_before; \
605  ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
606  } \
607  for(;i<total_coeff;i++) { \
608  scantable--; \
609  ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
610  } \
611  }
612 
613  if (h->pixel_shift) {
614  STORE_BLOCK(int32_t)
615  } else {
616  STORE_BLOCK(int16_t)
617  }
618 
619  if(zeros_left<0){
620  av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
621  return -1;
622  }
623 
624  return 0;
625 }
626 
627 static av_always_inline int decode_luma_residual(H264Context *h, GetBitContext *gb, const uint8_t *scan, const uint8_t *scan8x8, int pixel_shift, int mb_type, int cbp, int p){
628  int i4x4, i8x8;
629  MpegEncContext * const s = &h->s;
630  int qscale = p == 0 ? s->qscale : h->chroma_qp[p-1];
631  if(IS_INTRA16x16(mb_type)){
632  AV_ZERO128(h->mb_luma_dc[p]+0);
633  AV_ZERO128(h->mb_luma_dc[p]+8);
634  AV_ZERO128(h->mb_luma_dc[p]+16);
635  AV_ZERO128(h->mb_luma_dc[p]+24);
636  if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX+p, scan, NULL, 16) < 0){
637  return -1; //FIXME continue if partitioned and other return -1 too
638  }
639 
640  assert((cbp&15) == 0 || (cbp&15) == 15);
641 
642  if(cbp&15){
643  for(i8x8=0; i8x8<4; i8x8++){
644  for(i4x4=0; i4x4<4; i4x4++){
645  const int index= i4x4 + 4*i8x8 + p*16;
646  if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index << pixel_shift),
647  index, scan + 1, h->dequant4_coeff[p][qscale], 15) < 0 ){
648  return -1;
649  }
650  }
651  }
652  return 0xf;
653  }else{
654  fill_rectangle(&h->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1);
655  return 0;
656  }
657  }else{
658  int cqm = (IS_INTRA( mb_type ) ? 0:3)+p;
659  /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */
660  int new_cbp = 0;
661  for(i8x8=0; i8x8<4; i8x8++){
662  if(cbp & (1<<i8x8)){
663  if(IS_8x8DCT(mb_type)){
664  DCTELEM *buf = &h->mb[64*i8x8+256*p << pixel_shift];
665  uint8_t *nnz;
666  for(i4x4=0; i4x4<4; i4x4++){
667  const int index= i4x4 + 4*i8x8 + p*16;
668  if( decode_residual(h, gb, buf, index, scan8x8+16*i4x4,
669  h->dequant8_coeff[cqm][qscale], 16) < 0 )
670  return -1;
671  }
672  nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
673  nnz[0] += nnz[1] + nnz[8] + nnz[9];
674  new_cbp |= !!nnz[0] << i8x8;
675  }else{
676  for(i4x4=0; i4x4<4; i4x4++){
677  const int index= i4x4 + 4*i8x8 + p*16;
678  if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index,
679  scan, h->dequant4_coeff[cqm][qscale], 16) < 0 ){
680  return -1;
681  }
682  new_cbp |= h->non_zero_count_cache[ scan8[index] ] << i8x8;
683  }
684  }
685  }else{
686  uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
687  nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
688  }
689  }
690  return new_cbp;
691  }
692 }
693 
695  MpegEncContext * const s = &h->s;
696  int mb_xy;
697  int partition_count;
698  unsigned int mb_type, cbp;
699  int dct8x8_allowed= h->pps.transform_8x8_mode;
700  int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
701  const int pixel_shift = h->pixel_shift;
702 
703  mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
704 
705  tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
706  cbp = 0; /* avoid warning. FIXME: find a solution without slowing
707  down the code */
709  if(s->mb_skip_run==-1)
710  s->mb_skip_run= get_ue_golomb(&s->gb);
711 
712  if (s->mb_skip_run--) {
713  if(FRAME_MBAFF && (s->mb_y&1) == 0){
714  if(s->mb_skip_run==0)
716  }
717  decode_mb_skip(h);
718  return 0;
719  }
720  }
721  if(FRAME_MBAFF){
722  if( (s->mb_y&1) == 0 )
724  }
725 
726  h->prev_mb_skipped= 0;
727 
728  mb_type= get_ue_golomb(&s->gb);
730  if(mb_type < 23){
731  partition_count= b_mb_type_info[mb_type].partition_count;
732  mb_type= b_mb_type_info[mb_type].type;
733  }else{
734  mb_type -= 23;
735  goto decode_intra_mb;
736  }
737  }else if(h->slice_type_nos == AV_PICTURE_TYPE_P){
738  if(mb_type < 5){
739  partition_count= p_mb_type_info[mb_type].partition_count;
740  mb_type= p_mb_type_info[mb_type].type;
741  }else{
742  mb_type -= 5;
743  goto decode_intra_mb;
744  }
745  }else{
746  assert(h->slice_type_nos == AV_PICTURE_TYPE_I);
747  if(h->slice_type == AV_PICTURE_TYPE_SI && mb_type)
748  mb_type--;
749 decode_intra_mb:
750  if(mb_type > 25){
751  av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(h->slice_type), s->mb_x, s->mb_y);
752  return -1;
753  }
754  partition_count=0;
755  cbp= i_mb_type_info[mb_type].cbp;
757  mb_type= i_mb_type_info[mb_type].type;
758  }
759 
760  if(MB_FIELD)
761  mb_type |= MB_TYPE_INTERLACED;
762 
763  h->slice_table[ mb_xy ]= h->slice_num;
764 
765  if(IS_INTRA_PCM(mb_type)){
766  unsigned int x;
767  static const uint16_t mb_sizes[4] = {256,384,512,768};
768  const int mb_size = mb_sizes[h->sps.chroma_format_idc]*h->sps.bit_depth_luma >> 3;
769 
770  // We assume these blocks are very rare so we do not optimize it.
771  align_get_bits(&s->gb);
772  if (get_bits_left(&s->gb) < mb_size) {
773  av_log(s->avctx, AV_LOG_ERROR, "Not enough data for an intra PCM block.\n");
774  return AVERROR_INVALIDDATA;
775  }
776 
777  // The pixels are stored in the same order as levels in h->mb array.
778  for(x=0; x < mb_size; x++){
779  ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
780  }
781 
782  // In deblocking, the quantizer is 0
784  // All coeffs are present
785  memset(h->non_zero_count[mb_xy], 16, 48);
786 
787  s->current_picture.f.mb_type[mb_xy] = mb_type;
788  return 0;
789  }
790 
791  if(MB_MBAFF){
792  h->ref_count[0] <<= 1;
793  h->ref_count[1] <<= 1;
794  }
795 
796  fill_decode_neighbors(h, mb_type);
797  fill_decode_caches(h, mb_type);
798 
799  //mb_pred
800  if(IS_INTRA(mb_type)){
801  int pred_mode;
802 // init_top_left_availability(h);
803  if(IS_INTRA4x4(mb_type)){
804  int i;
805  int di = 1;
806  if(dct8x8_allowed && get_bits1(&s->gb)){
807  mb_type |= MB_TYPE_8x8DCT;
808  di = 4;
809  }
810 
811 // fill_intra4x4_pred_table(h);
812  for(i=0; i<16; i+=di){
813  int mode= pred_intra_mode(h, i);
814 
815  if(!get_bits1(&s->gb)){
816  const int rem_mode= get_bits(&s->gb, 3);
817  mode = rem_mode + (rem_mode >= mode);
818  }
819 
820  if(di==4)
821  fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
822  else
823  h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
824  }
827  return -1;
828  }else{
830  if(h->intra16x16_pred_mode < 0)
831  return -1;
832  }
833  if(decode_chroma){
834  pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&s->gb), 1);
835  if(pred_mode < 0)
836  return -1;
837  h->chroma_pred_mode= pred_mode;
838  } else {
840  }
841  }else if(partition_count==4){
842  int i, j, sub_partition_count[4], list, ref[2][4];
843 
845  for(i=0; i<4; i++){
846  h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
847  if(h->sub_mb_type[i] >=13){
848  av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
849  return -1;
850  }
851  sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
853  }
854  if( IS_DIRECT(h->sub_mb_type[0]|h->sub_mb_type[1]|h->sub_mb_type[2]|h->sub_mb_type[3])) {
855  ff_h264_pred_direct_motion(h, &mb_type);
856  h->ref_cache[0][scan8[4]] =
857  h->ref_cache[1][scan8[4]] =
858  h->ref_cache[0][scan8[12]] =
859  h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
860  }
861  }else{
862  assert(h->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
863  for(i=0; i<4; i++){
864  h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
865  if(h->sub_mb_type[i] >=4){
866  av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
867  return -1;
868  }
869  sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
871  }
872  }
873 
874  for(list=0; list<h->list_count; list++){
875  int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
876  for(i=0; i<4; i++){
877  if(IS_DIRECT(h->sub_mb_type[i])) continue;
878  if(IS_DIR(h->sub_mb_type[i], 0, list)){
879  unsigned int tmp;
880  if(ref_count == 1){
881  tmp= 0;
882  }else if(ref_count == 2){
883  tmp= get_bits1(&s->gb)^1;
884  }else{
885  tmp= get_ue_golomb_31(&s->gb);
886  if(tmp>=ref_count){
887  av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
888  return -1;
889  }
890  }
891  ref[list][i]= tmp;
892  }else{
893  //FIXME
894  ref[list][i] = -1;
895  }
896  }
897  }
898 
899  if(dct8x8_allowed)
900  dct8x8_allowed = get_dct8x8_allowed(h);
901 
902  for(list=0; list<h->list_count; list++){
903  for(i=0; i<4; i++){
904  if(IS_DIRECT(h->sub_mb_type[i])) {
905  h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
906  continue;
907  }
908  h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
909  h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
910 
911  if(IS_DIR(h->sub_mb_type[i], 0, list)){
912  const int sub_mb_type= h->sub_mb_type[i];
913  const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
914  for(j=0; j<sub_partition_count[i]; j++){
915  int mx, my;
916  const int index= 4*i + block_width*j;
917  int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
918  pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
919  mx += get_se_golomb(&s->gb);
920  my += get_se_golomb(&s->gb);
921  tprintf(s->avctx, "final mv:%d %d\n", mx, my);
922 
923  if(IS_SUB_8X8(sub_mb_type)){
924  mv_cache[ 1 ][0]=
925  mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
926  mv_cache[ 1 ][1]=
927  mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
928  }else if(IS_SUB_8X4(sub_mb_type)){
929  mv_cache[ 1 ][0]= mx;
930  mv_cache[ 1 ][1]= my;
931  }else if(IS_SUB_4X8(sub_mb_type)){
932  mv_cache[ 8 ][0]= mx;
933  mv_cache[ 8 ][1]= my;
934  }
935  mv_cache[ 0 ][0]= mx;
936  mv_cache[ 0 ][1]= my;
937  }
938  }else{
939  uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
940  p[0] = p[1]=
941  p[8] = p[9]= 0;
942  }
943  }
944  }
945  }else if(IS_DIRECT(mb_type)){
946  ff_h264_pred_direct_motion(h, &mb_type);
947  dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
948  }else{
949  int list, mx, my, i;
950  //FIXME we should set ref_idx_l? to 0 if we use that later ...
951  if(IS_16X16(mb_type)){
952  for(list=0; list<h->list_count; list++){
953  unsigned int val;
954  if(IS_DIR(mb_type, 0, list)){
955  if(h->ref_count[list]==1){
956  val= 0;
957  }else if(h->ref_count[list]==2){
958  val= get_bits1(&s->gb)^1;
959  }else{
960  val= get_ue_golomb_31(&s->gb);
961  if(val >= h->ref_count[list]){
962  av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
963  return -1;
964  }
965  }
966  fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
967  }
968  }
969  for(list=0; list<h->list_count; list++){
970  if(IS_DIR(mb_type, 0, list)){
971  pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
972  mx += get_se_golomb(&s->gb);
973  my += get_se_golomb(&s->gb);
974  tprintf(s->avctx, "final mv:%d %d\n", mx, my);
975 
976  fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
977  }
978  }
979  }
980  else if(IS_16X8(mb_type)){
981  for(list=0; list<h->list_count; list++){
982  for(i=0; i<2; i++){
983  unsigned int val;
984  if(IS_DIR(mb_type, i, list)){
985  if(h->ref_count[list] == 1){
986  val= 0;
987  }else if(h->ref_count[list] == 2){
988  val= get_bits1(&s->gb)^1;
989  }else{
990  val= get_ue_golomb_31(&s->gb);
991  if(val >= h->ref_count[list]){
992  av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
993  return -1;
994  }
995  }
996  }else
997  val= LIST_NOT_USED&0xFF;
998  fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
999  }
1000  }
1001  for(list=0; list<h->list_count; list++){
1002  for(i=0; i<2; i++){
1003  unsigned int val;
1004  if(IS_DIR(mb_type, i, list)){
1005  pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
1006  mx += get_se_golomb(&s->gb);
1007  my += get_se_golomb(&s->gb);
1008  tprintf(s->avctx, "final mv:%d %d\n", mx, my);
1009 
1010  val= pack16to32(mx,my);
1011  }else
1012  val=0;
1013  fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
1014  }
1015  }
1016  }else{
1017  assert(IS_8X16(mb_type));
1018  for(list=0; list<h->list_count; list++){
1019  for(i=0; i<2; i++){
1020  unsigned int val;
1021  if(IS_DIR(mb_type, i, list)){ //FIXME optimize
1022  if(h->ref_count[list]==1){
1023  val= 0;
1024  }else if(h->ref_count[list]==2){
1025  val= get_bits1(&s->gb)^1;
1026  }else{
1027  val= get_ue_golomb_31(&s->gb);
1028  if(val >= h->ref_count[list]){
1029  av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
1030  return -1;
1031  }
1032  }
1033  }else
1034  val= LIST_NOT_USED&0xFF;
1035  fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
1036  }
1037  }
1038  for(list=0; list<h->list_count; list++){
1039  for(i=0; i<2; i++){
1040  unsigned int val;
1041  if(IS_DIR(mb_type, i, list)){
1042  pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
1043  mx += get_se_golomb(&s->gb);
1044  my += get_se_golomb(&s->gb);
1045  tprintf(s->avctx, "final mv:%d %d\n", mx, my);
1046 
1047  val= pack16to32(mx,my);
1048  }else
1049  val=0;
1050  fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
1051  }
1052  }
1053  }
1054  }
1055 
1056  if(IS_INTER(mb_type))
1057  write_back_motion(h, mb_type);
1058 
1059  if(!IS_INTRA16x16(mb_type)){
1060  cbp= get_ue_golomb(&s->gb);
1061 
1062  if(decode_chroma){
1063  if(cbp > 47){
1064  av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
1065  return -1;
1066  }
1067  if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
1068  else cbp= golomb_to_inter_cbp [cbp];
1069  }else{
1070  if(cbp > 15){
1071  av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
1072  return -1;
1073  }
1074  if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
1075  else cbp= golomb_to_inter_cbp_gray[cbp];
1076  }
1077  }
1078 
1079  if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
1080  mb_type |= MB_TYPE_8x8DCT*get_bits1(&s->gb);
1081  }
1082  h->cbp=
1083  h->cbp_table[mb_xy]= cbp;
1084  s->current_picture.f.mb_type[mb_xy] = mb_type;
1085 
1086  if(cbp || IS_INTRA16x16(mb_type)){
1087  int i4x4, i8x8, chroma_idx;
1088  int dquant;
1089  int ret;
1090  GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
1091  const uint8_t *scan, *scan8x8;
1092  const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
1093 
1094  if(IS_INTERLACED(mb_type)){
1095  scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
1096  scan= s->qscale ? h->field_scan : h->field_scan_q0;
1097  }else{
1098  scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
1099  scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
1100  }
1101 
1102  dquant= get_se_golomb(&s->gb);
1103 
1104  s->qscale += dquant;
1105 
1106  if(((unsigned)s->qscale) > max_qp){
1107  if(s->qscale<0) s->qscale+= max_qp+1;
1108  else s->qscale-= max_qp+1;
1109  if(((unsigned)s->qscale) > max_qp){
1110  av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
1111  return -1;
1112  }
1113  }
1114 
1115  h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
1116  h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
1117 
1118  if( (ret = decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ){
1119  return -1;
1120  }
1121  h->cbp_table[mb_xy] |= ret << 12;
1122  if(CHROMA444){
1123  if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ){
1124  return -1;
1125  }
1126  if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ){
1127  return -1;
1128  }
1129  } else if (CHROMA422) {
1130  if(cbp&0x30){
1131  for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1132  if (decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift),
1134  NULL, 8) < 0) {
1135  return -1;
1136  }
1137  }
1138 
1139  if(cbp&0x20){
1140  for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1141  const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1142  DCTELEM *mb = h->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
1143  for (i8x8 = 0; i8x8 < 2; i8x8++) {
1144  for (i4x4 = 0; i4x4 < 4; i4x4++) {
1145  const int index = 16 + 16*chroma_idx + 8*i8x8 + i4x4;
1146  if (decode_residual(h, gb, mb, index, scan + 1, qmul, 15) < 0)
1147  return -1;
1148  mb += 16 << pixel_shift;
1149  }
1150  }
1151  }
1152  }else{
1153  fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1154  fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1155  }
1156  } else /* yuv420 */ {
1157  if(cbp&0x30){
1158  for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1159  if( decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
1160  return -1;
1161  }
1162  }
1163 
1164  if(cbp&0x20){
1165  for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1166  const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1167  for(i4x4=0; i4x4<4; i4x4++){
1168  const int index= 16 + 16*chroma_idx + i4x4;
1169  if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, scan + 1, qmul, 15) < 0){
1170  return -1;
1171  }
1172  }
1173  }
1174  }else{
1175  fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1176  fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1177  }
1178  }
1179  }else{
1180  fill_rectangle(&h->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
1181  fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1182  fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1183  }
1186 
1187  if(MB_MBAFF){
1188  h->ref_count[0] >>= 1;
1189  h->ref_count[1] >>= 1;
1190  }
1191 
1192  return 0;
1193 }
1194