h264_cavlc.c
Go to the documentation of this file.
1 /*
2  * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
3  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4  *
5  * This file is part of Libav.
6  *
7  * Libav is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * Libav is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with Libav; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
28 #define CABAC 0
29 
30 #include "internal.h"
31 #include "avcodec.h"
32 #include "mpegvideo.h"
33 #include "h264.h"
34 #include "h264data.h" // FIXME FIXME FIXME
35 #include "h264_mvpred.h"
36 #include "golomb.h"
37 
38 //#undef NDEBUG
39 #include <assert.h>
40 
41 static const uint8_t golomb_to_inter_cbp_gray[16]={
42  0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
43 };
44 
45 static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
46 15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
47 };
48 
49 static const uint8_t chroma_dc_coeff_token_len[4*5]={
50  2, 0, 0, 0,
51  6, 1, 0, 0,
52  6, 6, 3, 0,
53  6, 7, 7, 6,
54  6, 8, 8, 7,
55 };
56 
57 static const uint8_t chroma_dc_coeff_token_bits[4*5]={
58  1, 0, 0, 0,
59  7, 1, 0, 0,
60  4, 6, 1, 0,
61  3, 3, 2, 5,
62  2, 3, 2, 0,
63 };
64 
65 static const uint8_t chroma422_dc_coeff_token_len[4*9]={
66  1, 0, 0, 0,
67  7, 2, 0, 0,
68  7, 7, 3, 0,
69  9, 7, 7, 5,
70  9, 9, 7, 6,
71  10, 10, 9, 7,
72  11, 11, 10, 7,
73  12, 12, 11, 10,
74  13, 12, 12, 11,
75 };
76 
77 static const uint8_t chroma422_dc_coeff_token_bits[4*9]={
78  1, 0, 0, 0,
79  15, 1, 0, 0,
80  14, 13, 1, 0,
81  7, 12, 11, 1,
82  6, 5, 10, 1,
83  7, 6, 4, 9,
84  7, 6, 5, 8,
85  7, 6, 5, 4,
86  7, 5, 4, 4,
87 };
88 
89 static const uint8_t coeff_token_len[4][4*17]={
90 {
91  1, 0, 0, 0,
92  6, 2, 0, 0, 8, 6, 3, 0, 9, 8, 7, 5, 10, 9, 8, 6,
93  11,10, 9, 7, 13,11,10, 8, 13,13,11, 9, 13,13,13,10,
94  14,14,13,11, 14,14,14,13, 15,15,14,14, 15,15,15,14,
95  16,15,15,15, 16,16,16,15, 16,16,16,16, 16,16,16,16,
96 },
97 {
98  2, 0, 0, 0,
99  6, 2, 0, 0, 6, 5, 3, 0, 7, 6, 6, 4, 8, 6, 6, 4,
100  8, 7, 7, 5, 9, 8, 8, 6, 11, 9, 9, 6, 11,11,11, 7,
101  12,11,11, 9, 12,12,12,11, 12,12,12,11, 13,13,13,12,
102  13,13,13,13, 13,14,13,13, 14,14,14,13, 14,14,14,14,
103 },
104 {
105  4, 0, 0, 0,
106  6, 4, 0, 0, 6, 5, 4, 0, 6, 5, 5, 4, 7, 5, 5, 4,
107  7, 5, 5, 4, 7, 6, 6, 4, 7, 6, 6, 4, 8, 7, 7, 5,
108  8, 8, 7, 6, 9, 8, 8, 7, 9, 9, 8, 8, 9, 9, 9, 8,
109  10, 9, 9, 9, 10,10,10,10, 10,10,10,10, 10,10,10,10,
110 },
111 {
112  6, 0, 0, 0,
113  6, 6, 0, 0, 6, 6, 6, 0, 6, 6, 6, 6, 6, 6, 6, 6,
114  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
115  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
116  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
117 }
118 };
119 
120 static const uint8_t coeff_token_bits[4][4*17]={
121 {
122  1, 0, 0, 0,
123  5, 1, 0, 0, 7, 4, 1, 0, 7, 6, 5, 3, 7, 6, 5, 3,
124  7, 6, 5, 4, 15, 6, 5, 4, 11,14, 5, 4, 8,10,13, 4,
125  15,14, 9, 4, 11,10,13,12, 15,14, 9,12, 11,10,13, 8,
126  15, 1, 9,12, 11,14,13, 8, 7,10, 9,12, 4, 6, 5, 8,
127 },
128 {
129  3, 0, 0, 0,
130  11, 2, 0, 0, 7, 7, 3, 0, 7,10, 9, 5, 7, 6, 5, 4,
131  4, 6, 5, 6, 7, 6, 5, 8, 15, 6, 5, 4, 11,14,13, 4,
132  15,10, 9, 4, 11,14,13,12, 8,10, 9, 8, 15,14,13,12,
133  11,10, 9,12, 7,11, 6, 8, 9, 8,10, 1, 7, 6, 5, 4,
134 },
135 {
136  15, 0, 0, 0,
137  15,14, 0, 0, 11,15,13, 0, 8,12,14,12, 15,10,11,11,
138  11, 8, 9,10, 9,14,13, 9, 8,10, 9, 8, 15,14,13,13,
139  11,14,10,12, 15,10,13,12, 11,14, 9,12, 8,10,13, 8,
140  13, 7, 9,12, 9,12,11,10, 5, 8, 7, 6, 1, 4, 3, 2,
141 },
142 {
143  3, 0, 0, 0,
144  0, 1, 0, 0, 4, 5, 6, 0, 8, 9,10,11, 12,13,14,15,
145  16,17,18,19, 20,21,22,23, 24,25,26,27, 28,29,30,31,
146  32,33,34,35, 36,37,38,39, 40,41,42,43, 44,45,46,47,
147  48,49,50,51, 52,53,54,55, 56,57,58,59, 60,61,62,63,
148 }
149 };
150 
151 static const uint8_t total_zeros_len[16][16]= {
152  {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
153  {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
154  {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
155  {5,3,4,4,3,3,3,4,3,4,5,5,5},
156  {4,4,4,3,3,3,3,3,4,5,4,5},
157  {6,5,3,3,3,3,3,3,4,3,6},
158  {6,5,3,3,3,2,3,4,3,6},
159  {6,4,5,3,2,2,3,3,6},
160  {6,6,4,2,2,3,2,5},
161  {5,5,3,2,2,2,4},
162  {4,4,3,3,1,3},
163  {4,4,2,1,3},
164  {3,3,1,2},
165  {2,2,1},
166  {1,1},
167 };
168 
169 static const uint8_t total_zeros_bits[16][16]= {
170  {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
171  {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
172  {5,7,6,5,4,3,4,3,2,3,2,1,1,0},
173  {3,7,5,4,6,5,4,3,3,2,2,1,0},
174  {5,4,3,7,6,5,4,3,2,1,1,0},
175  {1,1,7,6,5,4,3,2,1,1,0},
176  {1,1,5,4,3,3,2,1,1,0},
177  {1,1,1,3,3,2,2,1,0},
178  {1,0,1,3,2,1,1,1},
179  {1,0,1,3,2,1,1},
180  {0,1,1,2,1,3},
181  {0,1,1,1,1},
182  {0,1,1,1},
183  {0,1,1},
184  {0,1},
185 };
186 
187 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
188  { 1, 2, 3, 3,},
189  { 1, 2, 2, 0,},
190  { 1, 1, 0, 0,},
191 };
192 
193 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
194  { 1, 1, 1, 0,},
195  { 1, 1, 0, 0,},
196  { 1, 0, 0, 0,},
197 };
198 
199 static const uint8_t chroma422_dc_total_zeros_len[7][8]= {
200  { 1, 3, 3, 4, 4, 4, 5, 5 },
201  { 3, 2, 3, 3, 3, 3, 3 },
202  { 3, 3, 2, 2, 3, 3 },
203  { 3, 2, 2, 2, 3 },
204  { 2, 2, 2, 2 },
205  { 2, 2, 1 },
206  { 1, 1 },
207 };
208 
209 static const uint8_t chroma422_dc_total_zeros_bits[7][8]= {
210  { 1, 2, 3, 2, 3, 1, 1, 0 },
211  { 0, 1, 1, 4, 5, 6, 7 },
212  { 0, 1, 1, 2, 6, 7 },
213  { 6, 0, 1, 2, 7 },
214  { 0, 1, 2, 3 },
215  { 0, 1, 1 },
216  { 0, 1 },
217 };
218 
219 static const uint8_t run_len[7][16]={
220  {1,1},
221  {1,2,2},
222  {2,2,2,2},
223  {2,2,2,3,3},
224  {2,2,3,3,3,3},
225  {2,3,3,3,3,3,3},
226  {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
227 };
228 
229 static const uint8_t run_bits[7][16]={
230  {1,0},
231  {1,1,0},
232  {3,2,1,0},
233  {3,2,1,1,0},
234  {3,2,3,2,1,0},
235  {3,0,1,3,2,5,4},
236  {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
237 };
238 
240 static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
241 static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
242 
246 
250 
251 static VLC total_zeros_vlc[15];
252 static VLC_TYPE total_zeros_vlc_tables[15][512][2];
253 static const int total_zeros_vlc_tables_size = 512;
254 
258 
262 
263 static VLC run_vlc[6];
264 static VLC_TYPE run_vlc_tables[6][8][2];
265 static const int run_vlc_tables_size = 8;
266 
267 static VLC run7_vlc;
268 static VLC_TYPE run7_vlc_table[96][2];
269 static const int run7_vlc_table_size = 96;
270 
271 #define LEVEL_TAB_BITS 8
272 static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
273 
274 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
275 #define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13
276 #define COEFF_TOKEN_VLC_BITS 8
277 #define TOTAL_ZEROS_VLC_BITS 9
278 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
279 #define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5
280 #define RUN_VLC_BITS 3
281 #define RUN7_VLC_BITS 6
282 
287 static inline int pred_non_zero_count(H264Context *h, int n){
288  const int index8= scan8[n];
289  const int left= h->non_zero_count_cache[index8 - 1];
290  const int top = h->non_zero_count_cache[index8 - 8];
291  int i= left + top;
292 
293  if(i<64) i= (i+1)>>1;
294 
295  tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
296 
297  return i&31;
298 }
299 
300 static av_cold void init_cavlc_level_tab(void){
301  int suffix_length;
302  unsigned int i;
303 
304  for(suffix_length=0; suffix_length<7; suffix_length++){
305  for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
306  int prefix= LEVEL_TAB_BITS - av_log2(2*i);
307 
308  if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
309  int level_code = (prefix << suffix_length) +
310  (i >> (av_log2(i) - suffix_length)) - (1 << suffix_length);
311  int mask = -(level_code&1);
312  level_code = (((2 + level_code) >> 1) ^ mask) - mask;
313  cavlc_level_tab[suffix_length][i][0]= level_code;
314  cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
315  }else if(prefix + 1 <= LEVEL_TAB_BITS){
316  cavlc_level_tab[suffix_length][i][0]= prefix+100;
317  cavlc_level_tab[suffix_length][i][1]= prefix + 1;
318  }else{
319  cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
320  cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
321  }
322  }
323  }
324 }
325 
327  static int done = 0;
328 
329  if (!done) {
330  int i;
331  int offset;
332  done = 1;
333 
334  chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
335  chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
336  init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
337  &chroma_dc_coeff_token_len [0], 1, 1,
338  &chroma_dc_coeff_token_bits[0], 1, 1,
340 
341  chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table;
342  chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size;
343  init_vlc(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9,
344  &chroma422_dc_coeff_token_len [0], 1, 1,
347 
348  offset = 0;
349  for(i=0; i<4; i++){
350  coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
351  coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
352  init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
353  &coeff_token_len [i][0], 1, 1,
354  &coeff_token_bits[i][0], 1, 1,
356  offset += coeff_token_vlc_tables_size[i];
357  }
358  /*
359  * This is a one time safety check to make sure that
360  * the packed static coeff_token_vlc table sizes
361  * were initialized correctly.
362  */
363  assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
364 
365  for(i=0; i<3; i++){
366  chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
367  chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
368  init_vlc(&chroma_dc_total_zeros_vlc[i],
370  &chroma_dc_total_zeros_len [i][0], 1, 1,
371  &chroma_dc_total_zeros_bits[i][0], 1, 1,
373  }
374 
375  for(i=0; i<7; i++){
376  chroma422_dc_total_zeros_vlc[i].table = chroma422_dc_total_zeros_vlc_tables[i];
377  chroma422_dc_total_zeros_vlc[i].table_allocated = chroma422_dc_total_zeros_vlc_tables_size;
378  init_vlc(&chroma422_dc_total_zeros_vlc[i],
380  &chroma422_dc_total_zeros_len [i][0], 1, 1,
381  &chroma422_dc_total_zeros_bits[i][0], 1, 1,
383  }
384 
385  for(i=0; i<15; i++){
386  total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
387  total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
388  init_vlc(&total_zeros_vlc[i],
390  &total_zeros_len [i][0], 1, 1,
391  &total_zeros_bits[i][0], 1, 1,
393  }
394 
395  for(i=0; i<6; i++){
396  run_vlc[i].table = run_vlc_tables[i];
398  init_vlc(&run_vlc[i],
399  RUN_VLC_BITS, 7,
400  &run_len [i][0], 1, 1,
401  &run_bits[i][0], 1, 1,
403  }
404  run7_vlc.table = run7_vlc_table,
406  init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
407  &run_len [6][0], 1, 1,
408  &run_bits[6][0], 1, 1,
410 
412  }
413 }
414 
418 static inline int get_level_prefix(GetBitContext *gb){
419  unsigned int buf;
420  int log;
421 
422  OPEN_READER(re, gb);
423  UPDATE_CACHE(re, gb);
424  buf=GET_CACHE(re, gb);
425 
426  log= 32 - av_log2(buf);
427 #ifdef TRACE
428  print_bin(buf>>(32-log), log);
429  av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
430 #endif
431 
432  LAST_SKIP_BITS(re, gb, log);
433  CLOSE_READER(re, gb);
434 
435  return log-1;
436 }
437 
445 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
446  MpegEncContext * const s = &h->s;
447  static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
448  int level[16];
449  int zeros_left, coeff_token, total_coeff, i, trailing_ones, run_before;
450 
451  //FIXME put trailing_onex into the context
452 
453  if(max_coeff <= 8){
454  if (max_coeff == 4)
455  coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
456  else
457  coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc.table, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1);
458  total_coeff= coeff_token>>2;
459  }else{
460  if(n >= LUMA_DC_BLOCK_INDEX){
461  total_coeff= pred_non_zero_count(h, (n - LUMA_DC_BLOCK_INDEX)*16);
462  coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
463  total_coeff= coeff_token>>2;
464  }else{
465  total_coeff= pred_non_zero_count(h, n);
466  coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
467  total_coeff= coeff_token>>2;
468  }
469  }
470  h->non_zero_count_cache[ scan8[n] ]= total_coeff;
471 
472  //FIXME set last_non_zero?
473 
474  if(total_coeff==0)
475  return 0;
476  if(total_coeff > (unsigned)max_coeff) {
477  av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
478  return -1;
479  }
480 
481  trailing_ones= coeff_token&3;
482  tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
483  assert(total_coeff<=16);
484 
485  i = show_bits(gb, 3);
486  skip_bits(gb, trailing_ones);
487  level[0] = 1-((i&4)>>1);
488  level[1] = 1-((i&2) );
489  level[2] = 1-((i&1)<<1);
490 
491  if(trailing_ones<total_coeff) {
492  int mask, prefix;
493  int suffix_length = total_coeff > 10 & trailing_ones < 3;
494  int bitsi= show_bits(gb, LEVEL_TAB_BITS);
495  int level_code= cavlc_level_tab[suffix_length][bitsi][0];
496 
497  skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
498  if(level_code >= 100){
499  prefix= level_code - 100;
500  if(prefix == LEVEL_TAB_BITS)
501  prefix += get_level_prefix(gb);
502 
503  //first coefficient has suffix_length equal to 0 or 1
504  if(prefix<14){ //FIXME try to build a large unified VLC table for all this
505  if(suffix_length)
506  level_code= (prefix<<1) + get_bits1(gb); //part
507  else
508  level_code= prefix; //part
509  }else if(prefix==14){
510  if(suffix_length)
511  level_code= (prefix<<1) + get_bits1(gb); //part
512  else
513  level_code= prefix + get_bits(gb, 4); //part
514  }else{
515  level_code= 30 + get_bits(gb, prefix-3); //part
516  if(prefix>=16){
517  if(prefix > 25+3){
518  av_log(h->s.avctx, AV_LOG_ERROR, "Invalid level prefix\n");
519  return -1;
520  }
521  level_code += (1<<(prefix-3))-4096;
522  }
523  }
524 
525  if(trailing_ones < 3) level_code += 2;
526 
527  suffix_length = 2;
528  mask= -(level_code&1);
529  level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
530  }else{
531  level_code += ((level_code>>31)|1) & -(trailing_ones < 3);
532 
533  suffix_length = 1 + (level_code + 3U > 6U);
534  level[trailing_ones]= level_code;
535  }
536 
537  //remaining coefficients have suffix_length > 0
538  for(i=trailing_ones+1;i<total_coeff;i++) {
539  static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
540  int bitsi= show_bits(gb, LEVEL_TAB_BITS);
541  level_code= cavlc_level_tab[suffix_length][bitsi][0];
542 
543  skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
544  if(level_code >= 100){
545  prefix= level_code - 100;
546  if(prefix == LEVEL_TAB_BITS){
547  prefix += get_level_prefix(gb);
548  }
549  if(prefix<15){
550  level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
551  }else{
552  level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
553  if(prefix>=16)
554  level_code += (1<<(prefix-3))-4096;
555  }
556  mask= -(level_code&1);
557  level_code= (((2+level_code)>>1) ^ mask) - mask;
558  }
559  level[i]= level_code;
560  suffix_length+= suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length];
561  }
562  }
563 
564  if(total_coeff == max_coeff)
565  zeros_left=0;
566  else{
567  if (max_coeff <= 8) {
568  if (max_coeff == 4)
569  zeros_left = get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[total_coeff].table,
571  else
572  zeros_left = get_vlc2(gb, (chroma422_dc_total_zeros_vlc-1)[total_coeff].table,
574  } else {
575  zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
576  }
577  }
578 
579 #define STORE_BLOCK(type) \
580  scantable += zeros_left + total_coeff - 1; \
581  if(n >= LUMA_DC_BLOCK_INDEX){ \
582  ((type*)block)[*scantable] = level[0]; \
583  for(i=1;i<total_coeff && zeros_left > 0;i++) { \
584  if(zeros_left < 7) \
585  run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
586  else \
587  run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
588  zeros_left -= run_before; \
589  scantable -= 1 + run_before; \
590  ((type*)block)[*scantable]= level[i]; \
591  } \
592  for(;i<total_coeff;i++) { \
593  scantable--; \
594  ((type*)block)[*scantable]= level[i]; \
595  } \
596  }else{ \
597  ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
598  for(i=1;i<total_coeff && zeros_left > 0;i++) { \
599  if(zeros_left < 7) \
600  run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
601  else \
602  run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
603  zeros_left -= run_before; \
604  scantable -= 1 + run_before; \
605  ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
606  } \
607  for(;i<total_coeff;i++) { \
608  scantable--; \
609  ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
610  } \
611  }
612 
613  if (h->pixel_shift) {
614  STORE_BLOCK(int32_t)
615  } else {
616  STORE_BLOCK(int16_t)
617  }
618 
619  if(zeros_left<0){
620  av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
621  return -1;
622  }
623 
624  return 0;
625 }
626 
627 static av_always_inline int decode_luma_residual(H264Context *h, GetBitContext *gb, const uint8_t *scan, const uint8_t *scan8x8, int pixel_shift, int mb_type, int cbp, int p){
628  int i4x4, i8x8;
629  MpegEncContext * const s = &h->s;
630  int qscale = p == 0 ? s->qscale : h->chroma_qp[p-1];
631  if(IS_INTRA16x16(mb_type)){
632  AV_ZERO128(h->mb_luma_dc[p]+0);
633  AV_ZERO128(h->mb_luma_dc[p]+8);
634  AV_ZERO128(h->mb_luma_dc[p]+16);
635  AV_ZERO128(h->mb_luma_dc[p]+24);
636  if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX+p, scan, NULL, 16) < 0){
637  return -1; //FIXME continue if partitioned and other return -1 too
638  }
639 
640  assert((cbp&15) == 0 || (cbp&15) == 15);
641 
642  if(cbp&15){
643  for(i8x8=0; i8x8<4; i8x8++){
644  for(i4x4=0; i4x4<4; i4x4++){
645  const int index= i4x4 + 4*i8x8 + p*16;
646  if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index << pixel_shift),
647  index, scan + 1, h->dequant4_coeff[p][qscale], 15) < 0 ){
648  return -1;
649  }
650  }
651  }
652  return 0xf;
653  }else{
654  fill_rectangle(&h->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1);
655  return 0;
656  }
657  }else{
658  int cqm = (IS_INTRA( mb_type ) ? 0:3)+p;
659  /* For CAVLC 4:4:4, we need to keep track of the luma 8x8 CBP for deblocking nnz purposes. */
660  int new_cbp = 0;
661  for(i8x8=0; i8x8<4; i8x8++){
662  if(cbp & (1<<i8x8)){
663  if(IS_8x8DCT(mb_type)){
664  DCTELEM *buf = &h->mb[64*i8x8+256*p << pixel_shift];
665  uint8_t *nnz;
666  for(i4x4=0; i4x4<4; i4x4++){
667  const int index= i4x4 + 4*i8x8 + p*16;
668  if( decode_residual(h, gb, buf, index, scan8x8+16*i4x4,
669  h->dequant8_coeff[cqm][qscale], 16) < 0 )
670  return -1;
671  }
672  nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
673  nnz[0] += nnz[1] + nnz[8] + nnz[9];
674  new_cbp |= !!nnz[0] << i8x8;
675  }else{
676  for(i4x4=0; i4x4<4; i4x4++){
677  const int index= i4x4 + 4*i8x8 + p*16;
678  if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index,
679  scan, h->dequant4_coeff[cqm][qscale], 16) < 0 ){
680  return -1;
681  }
682  new_cbp |= h->non_zero_count_cache[ scan8[index] ] << i8x8;
683  }
684  }
685  }else{
686  uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8+p*16] ];
687  nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
688  }
689  }
690  return new_cbp;
691  }
692 }
693 
695  MpegEncContext * const s = &h->s;
696  int mb_xy;
697  int partition_count;
698  unsigned int mb_type, cbp;
699  int dct8x8_allowed= h->pps.transform_8x8_mode;
700  int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
701  const int pixel_shift = h->pixel_shift;
702 
703  mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
704 
705  tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
706  cbp = 0; /* avoid warning. FIXME: find a solution without slowing
707  down the code */
709  if(s->mb_skip_run==-1)
710  s->mb_skip_run= get_ue_golomb(&s->gb);
711 
712  if (s->mb_skip_run--) {
713  if(FRAME_MBAFF && (s->mb_y&1) == 0){
714  if(s->mb_skip_run==0)
716  }
717  decode_mb_skip(h);
718  return 0;
719  }
720  }
721  if(FRAME_MBAFF){
722  if( (s->mb_y&1) == 0 )
724  }
725 
726  h->prev_mb_skipped= 0;
727 
728  mb_type= get_ue_golomb(&s->gb);
730  if(mb_type < 23){
731  partition_count= b_mb_type_info[mb_type].partition_count;
732  mb_type= b_mb_type_info[mb_type].type;
733  }else{
734  mb_type -= 23;
735  goto decode_intra_mb;
736  }
737  }else if(h->slice_type_nos == AV_PICTURE_TYPE_P){
738  if(mb_type < 5){
739  partition_count= p_mb_type_info[mb_type].partition_count;
740  mb_type= p_mb_type_info[mb_type].type;
741  }else{
742  mb_type -= 5;
743  goto decode_intra_mb;
744  }
745  }else{
746  assert(h->slice_type_nos == AV_PICTURE_TYPE_I);
747  if(h->slice_type == AV_PICTURE_TYPE_SI && mb_type)
748  mb_type--;
749 decode_intra_mb:
750  if(mb_type > 25){
751  av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(h->slice_type), s->mb_x, s->mb_y);
752  return -1;
753  }
754  partition_count=0;
755  cbp= i_mb_type_info[mb_type].cbp;
757  mb_type= i_mb_type_info[mb_type].type;
758  }
759 
760  if(MB_FIELD)
761  mb_type |= MB_TYPE_INTERLACED;
762 
763  h->slice_table[ mb_xy ]= h->slice_num;
764 
765  if(IS_INTRA_PCM(mb_type)){
766  unsigned int x;
767  static const uint16_t mb_sizes[4] = {256,384,512,768};
768  const int mb_size = mb_sizes[h->sps.chroma_format_idc]*h->sps.bit_depth_luma >> 3;
769 
770  // We assume these blocks are very rare so we do not optimize it.
771  align_get_bits(&s->gb);
772 
773  // The pixels are stored in the same order as levels in h->mb array.
774  for(x=0; x < mb_size; x++){
775  ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
776  }
777 
778  // In deblocking, the quantizer is 0
780  // All coeffs are present
781  memset(h->non_zero_count[mb_xy], 16, 48);
782 
783  s->current_picture.f.mb_type[mb_xy] = mb_type;
784  return 0;
785  }
786 
787  if(MB_MBAFF){
788  h->ref_count[0] <<= 1;
789  h->ref_count[1] <<= 1;
790  }
791 
792  fill_decode_neighbors(h, mb_type);
793  fill_decode_caches(h, mb_type);
794 
795  //mb_pred
796  if(IS_INTRA(mb_type)){
797  int pred_mode;
798 // init_top_left_availability(h);
799  if(IS_INTRA4x4(mb_type)){
800  int i;
801  int di = 1;
802  if(dct8x8_allowed && get_bits1(&s->gb)){
803  mb_type |= MB_TYPE_8x8DCT;
804  di = 4;
805  }
806 
807 // fill_intra4x4_pred_table(h);
808  for(i=0; i<16; i+=di){
809  int mode= pred_intra_mode(h, i);
810 
811  if(!get_bits1(&s->gb)){
812  const int rem_mode= get_bits(&s->gb, 3);
813  mode = rem_mode + (rem_mode >= mode);
814  }
815 
816  if(di==4)
817  fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
818  else
819  h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
820  }
823  return -1;
824  }else{
826  if(h->intra16x16_pred_mode < 0)
827  return -1;
828  }
829  if(decode_chroma){
830  pred_mode= ff_h264_check_intra_pred_mode(h, get_ue_golomb_31(&s->gb), 1);
831  if(pred_mode < 0)
832  return -1;
833  h->chroma_pred_mode= pred_mode;
834  } else {
836  }
837  }else if(partition_count==4){
838  int i, j, sub_partition_count[4], list, ref[2][4];
839 
841  for(i=0; i<4; i++){
842  h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
843  if(h->sub_mb_type[i] >=13){
844  av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
845  return -1;
846  }
847  sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
849  }
850  if( IS_DIRECT(h->sub_mb_type[0]|h->sub_mb_type[1]|h->sub_mb_type[2]|h->sub_mb_type[3])) {
851  ff_h264_pred_direct_motion(h, &mb_type);
852  h->ref_cache[0][scan8[4]] =
853  h->ref_cache[1][scan8[4]] =
854  h->ref_cache[0][scan8[12]] =
855  h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
856  }
857  }else{
858  assert(h->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
859  for(i=0; i<4; i++){
860  h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
861  if(h->sub_mb_type[i] >=4){
862  av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
863  return -1;
864  }
865  sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
867  }
868  }
869 
870  for(list=0; list<h->list_count; list++){
871  int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
872  for(i=0; i<4; i++){
873  if(IS_DIRECT(h->sub_mb_type[i])) continue;
874  if(IS_DIR(h->sub_mb_type[i], 0, list)){
875  unsigned int tmp;
876  if(ref_count == 1){
877  tmp= 0;
878  }else if(ref_count == 2){
879  tmp= get_bits1(&s->gb)^1;
880  }else{
881  tmp= get_ue_golomb_31(&s->gb);
882  if(tmp>=ref_count){
883  av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
884  return -1;
885  }
886  }
887  ref[list][i]= tmp;
888  }else{
889  //FIXME
890  ref[list][i] = -1;
891  }
892  }
893  }
894 
895  if(dct8x8_allowed)
896  dct8x8_allowed = get_dct8x8_allowed(h);
897 
898  for(list=0; list<h->list_count; list++){
899  for(i=0; i<4; i++){
900  if(IS_DIRECT(h->sub_mb_type[i])) {
901  h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
902  continue;
903  }
904  h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
905  h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
906 
907  if(IS_DIR(h->sub_mb_type[i], 0, list)){
908  const int sub_mb_type= h->sub_mb_type[i];
909  const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
910  for(j=0; j<sub_partition_count[i]; j++){
911  int mx, my;
912  const int index= 4*i + block_width*j;
913  int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
914  pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
915  mx += get_se_golomb(&s->gb);
916  my += get_se_golomb(&s->gb);
917  tprintf(s->avctx, "final mv:%d %d\n", mx, my);
918 
919  if(IS_SUB_8X8(sub_mb_type)){
920  mv_cache[ 1 ][0]=
921  mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
922  mv_cache[ 1 ][1]=
923  mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
924  }else if(IS_SUB_8X4(sub_mb_type)){
925  mv_cache[ 1 ][0]= mx;
926  mv_cache[ 1 ][1]= my;
927  }else if(IS_SUB_4X8(sub_mb_type)){
928  mv_cache[ 8 ][0]= mx;
929  mv_cache[ 8 ][1]= my;
930  }
931  mv_cache[ 0 ][0]= mx;
932  mv_cache[ 0 ][1]= my;
933  }
934  }else{
935  uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
936  p[0] = p[1]=
937  p[8] = p[9]= 0;
938  }
939  }
940  }
941  }else if(IS_DIRECT(mb_type)){
942  ff_h264_pred_direct_motion(h, &mb_type);
943  dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
944  }else{
945  int list, mx, my, i;
946  //FIXME we should set ref_idx_l? to 0 if we use that later ...
947  if(IS_16X16(mb_type)){
948  for(list=0; list<h->list_count; list++){
949  unsigned int val;
950  if(IS_DIR(mb_type, 0, list)){
951  if(h->ref_count[list]==1){
952  val= 0;
953  }else if(h->ref_count[list]==2){
954  val= get_bits1(&s->gb)^1;
955  }else{
956  val= get_ue_golomb_31(&s->gb);
957  if(val >= h->ref_count[list]){
958  av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
959  return -1;
960  }
961  }
962  fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
963  }
964  }
965  for(list=0; list<h->list_count; list++){
966  if(IS_DIR(mb_type, 0, list)){
967  pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
968  mx += get_se_golomb(&s->gb);
969  my += get_se_golomb(&s->gb);
970  tprintf(s->avctx, "final mv:%d %d\n", mx, my);
971 
972  fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
973  }
974  }
975  }
976  else if(IS_16X8(mb_type)){
977  for(list=0; list<h->list_count; list++){
978  for(i=0; i<2; i++){
979  unsigned int val;
980  if(IS_DIR(mb_type, i, list)){
981  if(h->ref_count[list] == 1){
982  val= 0;
983  }else if(h->ref_count[list] == 2){
984  val= get_bits1(&s->gb)^1;
985  }else{
986  val= get_ue_golomb_31(&s->gb);
987  if(val >= h->ref_count[list]){
988  av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
989  return -1;
990  }
991  }
992  }else
993  val= LIST_NOT_USED&0xFF;
994  fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
995  }
996  }
997  for(list=0; list<h->list_count; list++){
998  for(i=0; i<2; i++){
999  unsigned int val;
1000  if(IS_DIR(mb_type, i, list)){
1001  pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
1002  mx += get_se_golomb(&s->gb);
1003  my += get_se_golomb(&s->gb);
1004  tprintf(s->avctx, "final mv:%d %d\n", mx, my);
1005 
1006  val= pack16to32(mx,my);
1007  }else
1008  val=0;
1009  fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
1010  }
1011  }
1012  }else{
1013  assert(IS_8X16(mb_type));
1014  for(list=0; list<h->list_count; list++){
1015  for(i=0; i<2; i++){
1016  unsigned int val;
1017  if(IS_DIR(mb_type, i, list)){ //FIXME optimize
1018  if(h->ref_count[list]==1){
1019  val= 0;
1020  }else if(h->ref_count[list]==2){
1021  val= get_bits1(&s->gb)^1;
1022  }else{
1023  val= get_ue_golomb_31(&s->gb);
1024  if(val >= h->ref_count[list]){
1025  av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
1026  return -1;
1027  }
1028  }
1029  }else
1030  val= LIST_NOT_USED&0xFF;
1031  fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
1032  }
1033  }
1034  for(list=0; list<h->list_count; list++){
1035  for(i=0; i<2; i++){
1036  unsigned int val;
1037  if(IS_DIR(mb_type, i, list)){
1038  pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
1039  mx += get_se_golomb(&s->gb);
1040  my += get_se_golomb(&s->gb);
1041  tprintf(s->avctx, "final mv:%d %d\n", mx, my);
1042 
1043  val= pack16to32(mx,my);
1044  }else
1045  val=0;
1046  fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
1047  }
1048  }
1049  }
1050  }
1051 
1052  if(IS_INTER(mb_type))
1053  write_back_motion(h, mb_type);
1054 
1055  if(!IS_INTRA16x16(mb_type)){
1056  cbp= get_ue_golomb(&s->gb);
1057 
1058  if(decode_chroma){
1059  if(cbp > 47){
1060  av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
1061  return -1;
1062  }
1063  if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
1064  else cbp= golomb_to_inter_cbp [cbp];
1065  }else{
1066  if(cbp > 15){
1067  av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
1068  return -1;
1069  }
1070  if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
1071  else cbp= golomb_to_inter_cbp_gray[cbp];
1072  }
1073  }
1074 
1075  if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
1076  mb_type |= MB_TYPE_8x8DCT*get_bits1(&s->gb);
1077  }
1078  h->cbp=
1079  h->cbp_table[mb_xy]= cbp;
1080  s->current_picture.f.mb_type[mb_xy] = mb_type;
1081 
1082  if(cbp || IS_INTRA16x16(mb_type)){
1083  int i4x4, i8x8, chroma_idx;
1084  int dquant;
1085  int ret;
1086  GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
1087  const uint8_t *scan, *scan8x8;
1088  const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
1089 
1090  if(IS_INTERLACED(mb_type)){
1091  scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
1092  scan= s->qscale ? h->field_scan : h->field_scan_q0;
1093  }else{
1094  scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
1095  scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
1096  }
1097 
1098  dquant= get_se_golomb(&s->gb);
1099 
1100  s->qscale += dquant;
1101 
1102  if(((unsigned)s->qscale) > max_qp){
1103  if(s->qscale<0) s->qscale+= max_qp+1;
1104  else s->qscale-= max_qp+1;
1105  if(((unsigned)s->qscale) > max_qp){
1106  av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
1107  return -1;
1108  }
1109  }
1110 
1111  h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
1112  h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
1113 
1114  if( (ret = decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ){
1115  return -1;
1116  }
1117  h->cbp_table[mb_xy] |= ret << 12;
1118  if(CHROMA444){
1119  if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 1) < 0 ){
1120  return -1;
1121  }
1122  if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ){
1123  return -1;
1124  }
1125  } else if (CHROMA422) {
1126  if(cbp&0x30){
1127  for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1128  if (decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift),
1130  NULL, 8) < 0) {
1131  return -1;
1132  }
1133  }
1134 
1135  if(cbp&0x20){
1136  for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1137  const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1138  DCTELEM *mb = h->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
1139  for (i8x8 = 0; i8x8 < 2; i8x8++) {
1140  for (i4x4 = 0; i4x4 < 4; i4x4++) {
1141  const int index = 16 + 16*chroma_idx + 8*i8x8 + i4x4;
1142  if (decode_residual(h, gb, mb, index, scan + 1, qmul, 15) < 0)
1143  return -1;
1144  mb += 16 << pixel_shift;
1145  }
1146  }
1147  }
1148  }else{
1149  fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1150  fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1151  }
1152  } else /* yuv420 */ {
1153  if(cbp&0x30){
1154  for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1155  if( decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
1156  return -1;
1157  }
1158  }
1159 
1160  if(cbp&0x20){
1161  for(chroma_idx=0; chroma_idx<2; chroma_idx++){
1162  const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1163  for(i4x4=0; i4x4<4; i4x4++){
1164  const int index= 16 + 16*chroma_idx + i4x4;
1165  if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, scan + 1, qmul, 15) < 0){
1166  return -1;
1167  }
1168  }
1169  }
1170  }else{
1171  fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1172  fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1173  }
1174  }
1175  }else{
1176  fill_rectangle(&h->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
1177  fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
1178  fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
1179  }
1182 
1183  if(MB_MBAFF){
1184  h->ref_count[0] >>= 1;
1185  h->ref_count[1] >>= 1;
1186  }
1187 
1188  return 0;
1189 }
1190