Libav
vp9block.c
Go to the documentation of this file.
1 /*
2  * VP9 compatible video decoder
3  *
4  * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5  * Copyright (C) 2013 Clément Bœsch <u pkh me>
6  *
7  * This file is part of Libav.
8  *
9  * Libav is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * Libav is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with Libav; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include "libavutil/avassert.h"
25 
26 #include "avcodec.h"
27 #include "get_bits.h"
28 #include "internal.h"
29 #include "videodsp.h"
30 #include "vp56.h"
31 #include "vp9.h"
32 #include "vp9data.h"
33 
34 static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
35  {
36  { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
37  { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
38  }, {
39  { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
40  { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
41  }
42 };
43 
44 // differential forward probability updates
45 static void decode_mode(VP9Context *s, VP9Block *const b)
46 {
47  static const uint8_t left_ctx[N_BS_SIZES] = {
48  0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
49  };
50  static const uint8_t above_ctx[N_BS_SIZES] = {
51  0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
52  };
53  static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
56  };
57  int row = b->row, col = b->col, row7 = b->row7;
58  enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
59  int w4 = FFMIN(s->cols - col, bwh_tab[1][b->bs][0]);
60  int h4 = FFMIN(s->rows - row, bwh_tab[1][b->bs][1]);
61  int have_a = row > 0, have_l = col > s->tiling.tile_col_start;
62  int y;
63 
64  if (!s->segmentation.enabled) {
65  b->seg_id = 0;
66  } else if (s->keyframe || s->intraonly) {
69  } else if (!s->segmentation.update_map ||
70  (s->segmentation.temporal &&
72  s->prob.segpred[s->above_segpred_ctx[col] +
73  s->left_segpred_ctx[row7]]))) {
74  int pred = 8, x;
75 
76  for (y = 0; y < h4; y++)
77  for (x = 0; x < w4; x++)
78  pred = FFMIN(pred,
79  s->segmentation_map[(y + row) * 8 * s->sb_cols + x + col]);
80  b->seg_id = pred;
81 
82  memset(&s->above_segpred_ctx[col], 1, w4);
83  memset(&s->left_segpred_ctx[row7], 1, h4);
84  } else {
86  s->prob.seg);
87 
88  memset(&s->above_segpred_ctx[col], 0, w4);
89  memset(&s->left_segpred_ctx[row7], 0, h4);
90  }
91  if ((s->segmentation.enabled && s->segmentation.update_map) || s->keyframe) {
92  for (y = 0; y < h4; y++)
93  memset(&s->segmentation_map[(y + row) * 8 * s->sb_cols + col],
94  b->seg_id, w4);
95  }
96 
97  b->skip = s->segmentation.enabled &&
98  s->segmentation.feat[b->seg_id].skip_enabled;
99  if (!b->skip) {
100  int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
101  b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
102  s->counts.skip[c][b->skip]++;
103  }
104 
105  if (s->keyframe || s->intraonly) {
106  b->intra = 1;
107  } else if (s->segmentation.feat[b->seg_id].ref_enabled) {
108  b->intra = !s->segmentation.feat[b->seg_id].ref_val;
109  } else {
110  int c, bit;
111 
112  if (have_a && have_l) {
113  c = s->above_intra_ctx[col] + s->left_intra_ctx[row7];
114  c += (c == 2);
115  } else {
116  c = have_a ? 2 * s->above_intra_ctx[col] :
117  have_l ? 2 * s->left_intra_ctx[row7] : 0;
118  }
119  bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]);
120  s->counts.intra[c][bit]++;
121  b->intra = !bit;
122  }
123 
124  if ((b->intra || !b->skip) && s->txfmmode == TX_SWITCHABLE) {
125  int c;
126  if (have_a) {
127  if (have_l) {
128  c = (s->above_skip_ctx[col] ? max_tx :
129  s->above_txfm_ctx[col]) +
130  (s->left_skip_ctx[row7] ? max_tx :
131  s->left_txfm_ctx[row7]) > max_tx;
132  } else {
133  c = s->above_skip_ctx[col] ? 1 :
134  (s->above_txfm_ctx[col] * 2 > max_tx);
135  }
136  } else if (have_l) {
137  c = s->left_skip_ctx[row7] ? 1 :
138  (s->left_txfm_ctx[row7] * 2 > max_tx);
139  } else {
140  c = 1;
141  }
142  switch (max_tx) {
143  case TX_32X32:
144  b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]);
145  if (b->tx) {
146  b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]);
147  if (b->tx == 2)
148  b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]);
149  }
150  s->counts.tx32p[c][b->tx]++;
151  break;
152  case TX_16X16:
153  b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]);
154  if (b->tx)
155  b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]);
156  s->counts.tx16p[c][b->tx]++;
157  break;
158  case TX_8X8:
159  b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]);
160  s->counts.tx8p[c][b->tx]++;
161  break;
162  case TX_4X4:
163  b->tx = TX_4X4;
164  break;
165  }
166  } else {
167  b->tx = FFMIN(max_tx, s->txfmmode);
168  }
169 
170  if (s->keyframe || s->intraonly) {
171  uint8_t *a = &s->above_mode_ctx[col * 2];
172  uint8_t *l = &s->left_mode_ctx[(row7) << 1];
173 
174  b->comp = 0;
175  if (b->bs > BS_8x8) {
176  // FIXME the memory storage intermediates here aren't really
177  // necessary, they're just there to make the code slightly
178  // simpler for now
179  b->mode[0] =
181  ff_vp9_default_kf_ymode_probs[a[0]][l[0]]);
182  if (b->bs != BS_8x4) {
184  ff_vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
185  l[0] =
186  a[1] = b->mode[1];
187  } else {
188  l[0] =
189  a[1] =
190  b->mode[1] = b->mode[0];
191  }
192  if (b->bs != BS_4x8) {
193  b->mode[2] =
195  ff_vp9_default_kf_ymode_probs[a[0]][l[1]]);
196  if (b->bs != BS_8x4) {
198  ff_vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
199  l[1] =
200  a[1] = b->mode[3];
201  } else {
202  l[1] =
203  a[1] =
204  b->mode[3] = b->mode[2];
205  }
206  } else {
207  b->mode[2] = b->mode[0];
208  l[1] =
209  a[1] =
210  b->mode[3] = b->mode[1];
211  }
212  } else {
215  b->mode[3] =
216  b->mode[2] =
217  b->mode[1] = b->mode[0];
218  // FIXME this can probably be optimized
219  memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
220  memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
221  }
224  } else if (b->intra) {
225  b->comp = 0;
226  if (b->bs > BS_8x8) {
228  s->prob.p.y_mode[0]);
229  s->counts.y_mode[0][b->mode[0]]++;
230  if (b->bs != BS_8x4) {
232  s->prob.p.y_mode[0]);
233  s->counts.y_mode[0][b->mode[1]]++;
234  } else {
235  b->mode[1] = b->mode[0];
236  }
237  if (b->bs != BS_4x8) {
239  s->prob.p.y_mode[0]);
240  s->counts.y_mode[0][b->mode[2]]++;
241  if (b->bs != BS_8x4) {
243  s->prob.p.y_mode[0]);
244  s->counts.y_mode[0][b->mode[3]]++;
245  } else {
246  b->mode[3] = b->mode[2];
247  }
248  } else {
249  b->mode[2] = b->mode[0];
250  b->mode[3] = b->mode[1];
251  }
252  } else {
253  static const uint8_t size_group[10] = {
254  3, 3, 3, 3, 2, 2, 2, 1, 1, 1
255  };
256  int sz = size_group[b->bs];
257 
259  s->prob.p.y_mode[sz]);
260  b->mode[1] =
261  b->mode[2] =
262  b->mode[3] = b->mode[0];
263  s->counts.y_mode[sz][b->mode[3]]++;
264  }
266  s->prob.p.uv_mode[b->mode[3]]);
267  s->counts.uv_mode[b->mode[3]][b->uvmode]++;
268  } else {
269  static const uint8_t inter_mode_ctx_lut[14][14] = {
270  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
271  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
272  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
273  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
274  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
275  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
276  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
277  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
278  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
279  { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
280  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
281  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
282  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
283  { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
284  };
285 
286  if (s->segmentation.feat[b->seg_id].ref_enabled) {
287  av_assert2(s->segmentation.feat[b->seg_id].ref_val != 0);
288  b->comp = 0;
289  b->ref[0] = s->segmentation.feat[b->seg_id].ref_val - 1;
290  } else {
291  // read comp_pred flag
292  if (s->comppredmode != PRED_SWITCHABLE) {
293  b->comp = s->comppredmode == PRED_COMPREF;
294  } else {
295  int c;
296 
297  // FIXME add intra as ref=0xff (or -1) to make these easier?
298  if (have_a) {
299  if (have_l) {
300  if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) {
301  c = 4;
302  } else if (s->above_comp_ctx[col]) {
303  c = 2 + (s->left_intra_ctx[row7] ||
304  s->left_ref_ctx[row7] == s->fixcompref);
305  } else if (s->left_comp_ctx[row7]) {
306  c = 2 + (s->above_intra_ctx[col] ||
307  s->above_ref_ctx[col] == s->fixcompref);
308  } else {
309  c = (!s->above_intra_ctx[col] &&
310  s->above_ref_ctx[col] == s->fixcompref) ^
311  (!s->left_intra_ctx[row7] &&
312  s->left_ref_ctx[row & 7] == s->fixcompref);
313  }
314  } else {
315  c = s->above_comp_ctx[col] ? 3 :
316  (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->fixcompref);
317  }
318  } else if (have_l) {
319  c = s->left_comp_ctx[row7] ? 3 :
320  (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->fixcompref);
321  } else {
322  c = 1;
323  }
324  b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]);
325  s->counts.comp[c][b->comp]++;
326  }
327 
328  // read actual references
329  // FIXME probably cache a few variables here to prevent repetitive
330  // memory accesses below
331  if (b->comp) { /* two references */
332  int fix_idx = s->signbias[s->fixcompref], var_idx = !fix_idx, c, bit;
333 
334  b->ref[fix_idx] = s->fixcompref;
335  // FIXME can this codeblob be replaced by some sort of LUT?
336  if (have_a) {
337  if (have_l) {
338  if (s->above_intra_ctx[col]) {
339  if (s->left_intra_ctx[row7]) {
340  c = 2;
341  } else {
342  c = 1 + 2 * (s->left_ref_ctx[row7] != s->varcompref[1]);
343  }
344  } else if (s->left_intra_ctx[row7]) {
345  c = 1 + 2 * (s->above_ref_ctx[col] != s->varcompref[1]);
346  } else {
347  int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
348 
349  if (refl == refa && refa == s->varcompref[1]) {
350  c = 0;
351  } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
352  if ((refa == s->fixcompref && refl == s->varcompref[0]) ||
353  (refl == s->fixcompref && refa == s->varcompref[0])) {
354  c = 4;
355  } else {
356  c = (refa == refl) ? 3 : 1;
357  }
358  } else if (!s->left_comp_ctx[row7]) {
359  if (refa == s->varcompref[1] && refl != s->varcompref[1]) {
360  c = 1;
361  } else {
362  c = (refl == s->varcompref[1] &&
363  refa != s->varcompref[1]) ? 2 : 4;
364  }
365  } else if (!s->above_comp_ctx[col]) {
366  if (refl == s->varcompref[1] && refa != s->varcompref[1]) {
367  c = 1;
368  } else {
369  c = (refa == s->varcompref[1] &&
370  refl != s->varcompref[1]) ? 2 : 4;
371  }
372  } else {
373  c = (refl == refa) ? 4 : 2;
374  }
375  }
376  } else {
377  if (s->above_intra_ctx[col]) {
378  c = 2;
379  } else if (s->above_comp_ctx[col]) {
380  c = 4 * (s->above_ref_ctx[col] != s->varcompref[1]);
381  } else {
382  c = 3 * (s->above_ref_ctx[col] != s->varcompref[1]);
383  }
384  }
385  } else if (have_l) {
386  if (s->left_intra_ctx[row7]) {
387  c = 2;
388  } else if (s->left_comp_ctx[row7]) {
389  c = 4 * (s->left_ref_ctx[row7] != s->varcompref[1]);
390  } else {
391  c = 3 * (s->left_ref_ctx[row7] != s->varcompref[1]);
392  }
393  } else {
394  c = 2;
395  }
396  bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
397  b->ref[var_idx] = s->varcompref[bit];
398  s->counts.comp_ref[c][bit]++;
399  } else { /* single reference */
400  int bit, c;
401 
402  if (have_a && !s->above_intra_ctx[col]) {
403  if (have_l && !s->left_intra_ctx[row7]) {
404  if (s->left_comp_ctx[row7]) {
405  if (s->above_comp_ctx[col]) {
406  c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7] ||
407  !s->above_ref_ctx[col]);
408  } else {
409  c = (3 * !s->above_ref_ctx[col]) +
410  (!s->fixcompref || !s->left_ref_ctx[row7]);
411  }
412  } else if (s->above_comp_ctx[col]) {
413  c = (3 * !s->left_ref_ctx[row7]) +
414  (!s->fixcompref || !s->above_ref_ctx[col]);
415  } else {
416  c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
417  }
418  } else if (s->above_intra_ctx[col]) {
419  c = 2;
420  } else if (s->above_comp_ctx[col]) {
421  c = 1 + (!s->fixcompref || !s->above_ref_ctx[col]);
422  } else {
423  c = 4 * (!s->above_ref_ctx[col]);
424  }
425  } else if (have_l && !s->left_intra_ctx[row7]) {
426  if (s->left_intra_ctx[row7]) {
427  c = 2;
428  } else if (s->left_comp_ctx[row7]) {
429  c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7]);
430  } else {
431  c = 4 * (!s->left_ref_ctx[row7]);
432  }
433  } else {
434  c = 2;
435  }
436  bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]);
437  s->counts.single_ref[c][0][bit]++;
438  if (!bit) {
439  b->ref[0] = 0;
440  } else {
441  // FIXME can this codeblob be replaced by some sort of LUT?
442  if (have_a) {
443  if (have_l) {
444  if (s->left_intra_ctx[row7]) {
445  if (s->above_intra_ctx[col]) {
446  c = 2;
447  } else if (s->above_comp_ctx[col]) {
448  c = 1 + 2 * (s->fixcompref == 1 ||
449  s->above_ref_ctx[col] == 1);
450  } else if (!s->above_ref_ctx[col]) {
451  c = 3;
452  } else {
453  c = 4 * (s->above_ref_ctx[col] == 1);
454  }
455  } else if (s->above_intra_ctx[col]) {
456  if (s->left_intra_ctx[row7]) {
457  c = 2;
458  } else if (s->left_comp_ctx[row7]) {
459  c = 1 + 2 * (s->fixcompref == 1 ||
460  s->left_ref_ctx[row7] == 1);
461  } else if (!s->left_ref_ctx[row7]) {
462  c = 3;
463  } else {
464  c = 4 * (s->left_ref_ctx[row7] == 1);
465  }
466  } else if (s->above_comp_ctx[col]) {
467  if (s->left_comp_ctx[row7]) {
468  if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
469  c = 3 * (s->fixcompref == 1 ||
470  s->left_ref_ctx[row7] == 1);
471  } else {
472  c = 2;
473  }
474  } else if (!s->left_ref_ctx[row7]) {
475  c = 1 + 2 * (s->fixcompref == 1 ||
476  s->above_ref_ctx[col] == 1);
477  } else {
478  c = 3 * (s->left_ref_ctx[row7] == 1) +
479  (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
480  }
481  } else if (s->left_comp_ctx[row7]) {
482  if (!s->above_ref_ctx[col]) {
483  c = 1 + 2 * (s->fixcompref == 1 ||
484  s->left_ref_ctx[row7] == 1);
485  } else {
486  c = 3 * (s->above_ref_ctx[col] == 1) +
487  (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
488  }
489  } else if (!s->above_ref_ctx[col]) {
490  if (!s->left_ref_ctx[row7]) {
491  c = 3;
492  } else {
493  c = 4 * (s->left_ref_ctx[row7] == 1);
494  }
495  } else if (!s->left_ref_ctx[row7]) {
496  c = 4 * (s->above_ref_ctx[col] == 1);
497  } else {
498  c = 2 * (s->left_ref_ctx[row7] == 1) +
499  2 * (s->above_ref_ctx[col] == 1);
500  }
501  } else {
502  if (s->above_intra_ctx[col] ||
503  (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
504  c = 2;
505  } else if (s->above_comp_ctx[col]) {
506  c = 3 * (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
507  } else {
508  c = 4 * (s->above_ref_ctx[col] == 1);
509  }
510  }
511  } else if (have_l) {
512  if (s->left_intra_ctx[row7] ||
513  (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
514  c = 2;
515  } else if (s->left_comp_ctx[row7]) {
516  c = 3 * (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
517  } else {
518  c = 4 * (s->left_ref_ctx[row7] == 1);
519  }
520  } else {
521  c = 2;
522  }
523  bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]);
524  s->counts.single_ref[c][1][bit]++;
525  b->ref[0] = 1 + bit;
526  }
527  }
528  }
529 
530  if (b->bs <= BS_8x8) {
531  if (s->segmentation.feat[b->seg_id].skip_enabled) {
532  b->mode[0] =
533  b->mode[1] =
534  b->mode[2] =
535  b->mode[3] = ZEROMV;
536  } else {
537  static const uint8_t off[10] = {
538  3, 0, 0, 1, 0, 0, 0, 0, 0, 0
539  };
540 
541  // FIXME this needs to use the LUT tables from find_ref_mvs
542  // because not all are -1,0/0,-1
543  int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
544  [s->left_mode_ctx[row7 + off[b->bs]]];
545 
547  s->prob.p.mv_mode[c]);
548  b->mode[1] =
549  b->mode[2] =
550  b->mode[3] = b->mode[0];
551  s->counts.mv_mode[c][b->mode[0] - 10]++;
552  }
553  }
554 
555  if (s->filtermode == FILTER_SWITCHABLE) {
556  int c;
557 
558  if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
559  if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
560  c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ?
561  s->left_filter_ctx[row7] : 3;
562  } else {
563  c = s->above_filter_ctx[col];
564  }
565  } else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
566  c = s->left_filter_ctx[row7];
567  } else {
568  c = 3;
569  }
570 
572  s->prob.p.filter[c]);
573  s->counts.filter[c][b->filter]++;
574  } else {
575  b->filter = s->filtermode;
576  }
577 
578  if (b->bs > BS_8x8) {
579  int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]];
580 
582  s->prob.p.mv_mode[c]);
583  s->counts.mv_mode[c][b->mode[0] - 10]++;
584  ff_vp9_fill_mv(s, b->mv[0], b->mode[0], 0);
585 
586  if (b->bs != BS_8x4) {
588  s->prob.p.mv_mode[c]);
589  s->counts.mv_mode[c][b->mode[1] - 10]++;
590  ff_vp9_fill_mv(s, b->mv[1], b->mode[1], 1);
591  } else {
592  b->mode[1] = b->mode[0];
593  AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
594  AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
595  }
596 
597  if (b->bs != BS_4x8) {
599  s->prob.p.mv_mode[c]);
600  s->counts.mv_mode[c][b->mode[2] - 10]++;
601  ff_vp9_fill_mv(s, b->mv[2], b->mode[2], 2);
602 
603  if (b->bs != BS_8x4) {
605  s->prob.p.mv_mode[c]);
606  s->counts.mv_mode[c][b->mode[3] - 10]++;
607  ff_vp9_fill_mv(s, b->mv[3], b->mode[3], 3);
608  } else {
609  b->mode[3] = b->mode[2];
610  AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
611  AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
612  }
613  } else {
614  b->mode[2] = b->mode[0];
615  AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
616  AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
617  b->mode[3] = b->mode[1];
618  AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
619  AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
620  }
621  } else {
622  ff_vp9_fill_mv(s, b->mv[0], b->mode[0], -1);
623  AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
624  AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
625  AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
626  AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
627  AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
628  AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
629  }
630  }
631 
632  // FIXME this can probably be optimized
633  memset(&s->above_skip_ctx[col], b->skip, w4);
634  memset(&s->left_skip_ctx[row7], b->skip, h4);
635  memset(&s->above_txfm_ctx[col], b->tx, w4);
636  memset(&s->left_txfm_ctx[row7], b->tx, h4);
637  memset(&s->above_partition_ctx[col], above_ctx[b->bs], w4);
638  memset(&s->left_partition_ctx[row7], left_ctx[b->bs], h4);
639  if (!s->keyframe && !s->intraonly) {
640  memset(&s->above_intra_ctx[col], b->intra, w4);
641  memset(&s->left_intra_ctx[row7], b->intra, h4);
642  memset(&s->above_comp_ctx[col], b->comp, w4);
643  memset(&s->left_comp_ctx[row7], b->comp, h4);
644  memset(&s->above_mode_ctx[col], b->mode[3], w4);
645  memset(&s->left_mode_ctx[row7], b->mode[3], h4);
646  if (s->filtermode == FILTER_SWITCHABLE && !b->intra) {
647  memset(&s->above_filter_ctx[col], b->filter, w4);
648  memset(&s->left_filter_ctx[row7], b->filter, h4);
650  }
651  if (b->bs > BS_8x8) {
652  int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
653 
654  AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
655  AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
656  AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0);
657  AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1);
658  AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
659  AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
660  AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
661  AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
662  } else {
663  int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
664 
665  for (n = 0; n < w4 * 2; n++) {
666  AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
667  AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
668  }
669  for (n = 0; n < h4 * 2; n++) {
670  AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0);
671  AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
672  }
673  }
674 
675  if (!b->intra) { // FIXME write 0xff or -1 if intra, so we can use this
676  // as a direct check in above branches
677  int vref = b->ref[b->comp ? s->signbias[s->varcompref[0]] : 0];
678 
679  memset(&s->above_ref_ctx[col], vref, w4);
680  memset(&s->left_ref_ctx[row7], vref, h4);
681  }
682  }
683 
684  // FIXME kinda ugly
685  for (y = 0; y < h4; y++) {
686  int x, o = (row + y) * s->sb_cols * 8 + col;
687 
688  if (b->intra) {
689  for (x = 0; x < w4; x++) {
690  s->mv[0][o + x].ref[0] =
691  s->mv[0][o + x].ref[1] = -1;
692  }
693  } else if (b->comp) {
694  for (x = 0; x < w4; x++) {
695  s->mv[0][o + x].ref[0] = b->ref[0];
696  s->mv[0][o + x].ref[1] = b->ref[1];
697  AV_COPY32(&s->mv[0][o + x].mv[0], &b->mv[3][0]);
698  AV_COPY32(&s->mv[0][o + x].mv[1], &b->mv[3][1]);
699  }
700  } else {
701  for (x = 0; x < w4; x++) {
702  s->mv[0][o + x].ref[0] = b->ref[0];
703  s->mv[0][o + x].ref[1] = -1;
704  AV_COPY32(&s->mv[0][o + x].mv[0], &b->mv[3][0]);
705  }
706  }
707  }
708 }
709 
710 // FIXME remove tx argument, and merge cnt/eob arguments?
711 static int decode_block_coeffs(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
712  enum TxfmMode tx, unsigned (*cnt)[6][3],
713  unsigned (*eob)[6][2], uint8_t(*p)[6][11],
714  int nnz, const int16_t *scan,
715  const int16_t(*nb)[2],
716  const int16_t *band_counts, const int16_t *qmul)
717 {
718  int i = 0, band = 0, band_left = band_counts[band];
719  uint8_t *tp = p[0][nnz];
720  uint8_t cache[1024];
721 
722  do {
723  int val, rc;
724 
725  val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
726  eob[band][nnz][val]++;
727  if (!val)
728  break;
729 
730 skip_eob:
731  if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
732  cnt[band][nnz][0]++;
733  if (!--band_left)
734  band_left = band_counts[++band];
735  cache[scan[i]] = 0;
736  nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
737  tp = p[band][nnz];
738  if (++i == n_coeffs)
739  break; //invalid input; blocks should end with EOB
740  goto skip_eob;
741  }
742 
743  rc = scan[i];
744  if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
745  cnt[band][nnz][1]++;
746  val = 1;
747  cache[rc] = 1;
748  } else {
749  // fill in p[3-10] (model fill) - only once per frame for each pos
750  if (!tp[3])
751  memcpy(&tp[3], ff_vp9_model_pareto8[tp[2]], 8);
752 
753  cnt[band][nnz][2]++;
754  if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
755  if (!vp56_rac_get_prob_branchy(c, tp[4])) {
756  cache[rc] = val = 2;
757  } else {
758  val = 3 + vp56_rac_get_prob(c, tp[5]);
759  cache[rc] = 3;
760  }
761  } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
762  cache[rc] = 4;
763  if (!vp56_rac_get_prob_branchy(c, tp[7])) {
764  val = vp56_rac_get_prob(c, 159) + 5;
765  } else {
766  val = (vp56_rac_get_prob(c, 165) << 1) + 7;
767  val += vp56_rac_get_prob(c, 145);
768  }
769  } else { // cat 3-6
770  cache[rc] = 5;
771  if (!vp56_rac_get_prob_branchy(c, tp[8])) {
772  if (!vp56_rac_get_prob_branchy(c, tp[9])) {
773  val = (vp56_rac_get_prob(c, 173) << 2) + 11;
774  val += (vp56_rac_get_prob(c, 148) << 1);
775  val += vp56_rac_get_prob(c, 140);
776  } else {
777  val = (vp56_rac_get_prob(c, 176) << 3) + 19;
778  val += (vp56_rac_get_prob(c, 155) << 2);
779  val += (vp56_rac_get_prob(c, 140) << 1);
780  val += vp56_rac_get_prob(c, 135);
781  }
782  } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
783  val = (vp56_rac_get_prob(c, 180) << 4) + 35;
784  val += (vp56_rac_get_prob(c, 157) << 3);
785  val += (vp56_rac_get_prob(c, 141) << 2);
786  val += (vp56_rac_get_prob(c, 134) << 1);
787  val += vp56_rac_get_prob(c, 130);
788  } else {
789  val = (vp56_rac_get_prob(c, 254) << 13) + 67;
790  val += (vp56_rac_get_prob(c, 254) << 12);
791  val += (vp56_rac_get_prob(c, 254) << 11);
792  val += (vp56_rac_get_prob(c, 252) << 10);
793  val += (vp56_rac_get_prob(c, 249) << 9);
794  val += (vp56_rac_get_prob(c, 243) << 8);
795  val += (vp56_rac_get_prob(c, 230) << 7);
796  val += (vp56_rac_get_prob(c, 196) << 6);
797  val += (vp56_rac_get_prob(c, 177) << 5);
798  val += (vp56_rac_get_prob(c, 153) << 4);
799  val += (vp56_rac_get_prob(c, 140) << 3);
800  val += (vp56_rac_get_prob(c, 133) << 2);
801  val += (vp56_rac_get_prob(c, 130) << 1);
802  val += vp56_rac_get_prob(c, 129);
803  }
804  }
805  }
806  if (!--band_left)
807  band_left = band_counts[++band];
808  if (tx == TX_32X32) // FIXME slow
809  coef[rc] = ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2;
810  else
811  coef[rc] = (vp8_rac_get(c) ? -val : val) * qmul[!!i];
812  nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
813  tp = p[band][nnz];
814  } while (++i < n_coeffs);
815 
816  return i;
817 }
818 
819 static int decode_coeffs(AVCodecContext *avctx)
820 {
821  VP9Context *s = avctx->priv_data;
822  VP9Block *const b = &s->b;
823  int row = b->row, col = b->col;
824  uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
825  unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
826  unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra];
827  int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1;
828  int end_x = FFMIN(2 * (s->cols - col), w4);
829  int end_y = FFMIN(2 * (s->rows - row), h4);
830  int n, pl, x, y, step1d = 1 << b->tx, step = 1 << (b->tx * 2);
831  int uvstep1d = 1 << b->uvtx, uvstep = 1 << (b->uvtx * 2), ret;
832  int16_t (*qmul)[2] = s->segmentation.feat[b->seg_id].qmul;
833  int tx = 4 * s->lossless + b->tx;
834  const int16_t **yscans = ff_vp9_scans[tx];
835  const int16_t (**ynbs)[2] = ff_vp9_scans_nb[tx];
836  const int16_t *uvscan = ff_vp9_scans[b->uvtx][DCT_DCT];
837  const int16_t (*uvnb)[2] = ff_vp9_scans_nb[b->uvtx][DCT_DCT];
838  uint8_t *a = &s->above_y_nnz_ctx[col * 2];
839  uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1];
840  static const int16_t band_counts[4][8] = {
841  { 1, 2, 3, 4, 3, 16 - 13, 0 },
842  { 1, 2, 3, 4, 11, 64 - 21, 0 },
843  { 1, 2, 3, 4, 11, 256 - 21, 0 },
844  { 1, 2, 3, 4, 11, 1024 - 21, 0 },
845  };
846  const int16_t *y_band_counts = band_counts[b->tx];
847  const int16_t *uv_band_counts = band_counts[b->uvtx];
848 
849  /* y tokens */
850  if (b->tx > TX_4X4) { // FIXME slow
851  for (y = 0; y < end_y; y += step1d)
852  for (x = 1; x < step1d; x++)
853  l[y] |= l[y + x];
854  for (x = 0; x < end_x; x += step1d)
855  for (y = 1; y < step1d; y++)
856  a[x] |= a[x + y];
857  }
858  for (n = 0, y = 0; y < end_y; y += step1d) {
859  for (x = 0; x < end_x; x += step1d, n += step) {
860  enum TxfmType txtp = ff_vp9_intra_txfm_type[b->mode[b->tx == TX_4X4 &&
861  b->bs > BS_8x8 ?
862  n : 0]];
863  int nnz = a[x] + l[y];
864  if ((ret = decode_block_coeffs(&s->c, s->block + 16 * n, 16 * step,
865  b->tx, c, e, p, nnz, yscans[txtp],
866  ynbs[txtp], y_band_counts,
867  qmul[0])) < 0)
868  return ret;
869  a[x] = l[y] = !!ret;
870  if (b->tx > TX_8X8)
871  AV_WN16A(&s->eob[n], ret);
872  else
873  s->eob[n] = ret;
874  }
875  }
876  if (b->tx > TX_4X4) { // FIXME slow
877  for (y = 0; y < end_y; y += step1d)
878  memset(&l[y + 1], l[y], FFMIN(end_y - y - 1, step1d - 1));
879  for (x = 0; x < end_x; x += step1d)
880  memset(&a[x + 1], a[x], FFMIN(end_x - x - 1, step1d - 1));
881  }
882 
883  p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
884  c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
885  e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
886  w4 >>= 1;
887  h4 >>= 1;
888  end_x >>= 1;
889  end_y >>= 1;
890  for (pl = 0; pl < 2; pl++) {
891  a = &s->above_uv_nnz_ctx[pl][col];
892  l = &s->left_uv_nnz_ctx[pl][row & 7];
893  if (b->uvtx > TX_4X4) { // FIXME slow
894  for (y = 0; y < end_y; y += uvstep1d)
895  for (x = 1; x < uvstep1d; x++)
896  l[y] |= l[y + x];
897  for (x = 0; x < end_x; x += uvstep1d)
898  for (y = 1; y < uvstep1d; y++)
899  a[x] |= a[x + y];
900  }
901  for (n = 0, y = 0; y < end_y; y += uvstep1d) {
902  for (x = 0; x < end_x; x += uvstep1d, n += uvstep) {
903  int nnz = a[x] + l[y];
904  if ((ret = decode_block_coeffs(&s->c, s->uvblock[pl] + 16 * n,
905  16 * uvstep, b->uvtx, c, e, p,
906  nnz, uvscan, uvnb,
907  uv_band_counts, qmul[1])) < 0)
908  return ret;
909  a[x] = l[y] = !!ret;
910  if (b->uvtx > TX_8X8)
911  AV_WN16A(&s->uveob[pl][n], ret);
912  else
913  s->uveob[pl][n] = ret;
914  }
915  }
916  if (b->uvtx > TX_4X4) { // FIXME slow
917  for (y = 0; y < end_y; y += uvstep1d)
918  memset(&l[y + 1], l[y], FFMIN(end_y - y - 1, uvstep1d - 1));
919  for (x = 0; x < end_x; x += uvstep1d)
920  memset(&a[x + 1], a[x], FFMIN(end_x - x - 1, uvstep1d - 1));
921  }
922  }
923 
924  return 0;
925 }
926 
928  uint8_t **a,
929  uint8_t *dst_edge,
930  ptrdiff_t stride_edge,
931  uint8_t *dst_inner,
932  ptrdiff_t stride_inner,
933  uint8_t *l, int col, int x, int w,
934  int row, int y, enum TxfmMode tx,
935  int p)
936 {
937  int have_top = row > 0 || y > 0;
938  int have_left = col > s->tiling.tile_col_start || x > 0;
939  int have_right = x < w - 1;
940  static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
941  [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
942  { DC_127_PRED, VERT_PRED } },
943  [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
944  { HOR_PRED, HOR_PRED } },
945  [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
946  { LEFT_DC_PRED, DC_PRED } },
958  { HOR_UP_PRED, HOR_UP_PRED } },
959  [TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED },
960  { HOR_PRED, TM_VP8_PRED } },
961  };
962  static const struct {
963  uint8_t needs_left:1;
964  uint8_t needs_top:1;
965  uint8_t needs_topleft:1;
966  uint8_t needs_topright:1;
967  } edges[N_INTRA_PRED_MODES] = {
968  [VERT_PRED] = { .needs_top = 1 },
969  [HOR_PRED] = { .needs_left = 1 },
970  [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
971  [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
972  [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1,
973  .needs_topleft = 1 },
974  [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1,
975  .needs_topleft = 1 },
976  [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1,
977  .needs_topleft = 1 },
978  [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
979  [HOR_UP_PRED] = { .needs_left = 1 },
980  [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1,
981  .needs_topleft = 1 },
982  [LEFT_DC_PRED] = { .needs_left = 1 },
983  [TOP_DC_PRED] = { .needs_top = 1 },
984  [DC_128_PRED] = { 0 },
985  [DC_127_PRED] = { 0 },
986  [DC_129_PRED] = { 0 }
987  };
988 
989  av_assert2(mode >= 0 && mode < 10);
990  mode = mode_conv[mode][have_left][have_top];
991  if (edges[mode].needs_top) {
992  uint8_t *top = NULL, *topleft = NULL;
993  int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !p) - x) * 4;
994  int n_px_need_tr = 0;
995 
996  if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
997  n_px_need_tr = 4;
998 
999  // if top of sb64-row, use s->intra_pred_data[] instead of
1000  // dst[-stride] for intra prediction (it contains pre- instead of
1001  // post-loopfilter data)
1002  if (have_top) {
1003  top = !(row & 7) && !y ?
1004  s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
1005  y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
1006  if (have_left)
1007  topleft = !(row & 7) && !y ?
1008  s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
1009  y == 0 || x == 0 ? &dst_edge[-stride_edge] :
1010  &dst_inner[-stride_inner];
1011  }
1012 
1013  if (have_top &&
1014  (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
1015  (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
1016  n_px_need + n_px_need_tr <= n_px_have) {
1017  *a = top;
1018  } else {
1019  if (have_top) {
1020  if (n_px_need <= n_px_have) {
1021  memcpy(*a, top, n_px_need);
1022  } else {
1023  memcpy(*a, top, n_px_have);
1024  memset(&(*a)[n_px_have], (*a)[n_px_have - 1],
1025  n_px_need - n_px_have);
1026  }
1027  } else {
1028  memset(*a, 127, n_px_need);
1029  }
1030  if (edges[mode].needs_topleft) {
1031  if (have_left && have_top)
1032  (*a)[-1] = topleft[-1];
1033  else
1034  (*a)[-1] = have_top ? 129 : 127;
1035  }
1036  if (tx == TX_4X4 && edges[mode].needs_topright) {
1037  if (have_top && have_right &&
1038  n_px_need + n_px_need_tr <= n_px_have) {
1039  memcpy(&(*a)[4], &top[4], 4);
1040  } else {
1041  memset(&(*a)[4], (*a)[3], 4);
1042  }
1043  }
1044  }
1045  }
1046  if (edges[mode].needs_left) {
1047  if (have_left) {
1048  int i;
1049  int n_px_need = 4 << tx;
1050  int n_px_have = (((s->rows - row) << !p) - y) * 4;
1051  uint8_t *dst = x == 0 ? dst_edge : dst_inner;
1052  ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
1053 
1054  if (n_px_need <= n_px_have) {
1055  for (i = 0; i < n_px_need; i++)
1056  l[i] = dst[i * stride - 1];
1057  } else {
1058  for (i = 0; i < n_px_have; i++)
1059  l[i] = dst[i * stride - 1];
1060  memset(&l[i], l[i - 1], n_px_need - n_px_have);
1061  }
1062  } else {
1063  memset(l, 129, 4 << tx);
1064  }
1065  }
1066 
1067  return mode;
1068 }
1069 
1070 static void intra_recon(AVCodecContext *avctx, ptrdiff_t y_off, ptrdiff_t uv_off)
1071 {
1072  VP9Context *s = avctx->priv_data;
1073  VP9Block *const b = &s->b;
1074  int row = b->row, col = b->col;
1075  int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
1076  int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
1077  int end_x = FFMIN(2 * (s->cols - col), w4);
1078  int end_y = FFMIN(2 * (s->rows - row), h4);
1079  int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
1080  int uvstep1d = 1 << b->uvtx, p;
1081  uint8_t *dst = b->dst[0], *dst_r = s->cur_frame->data[0] + y_off;
1082 
1083  for (n = 0, y = 0; y < end_y; y += step1d) {
1084  uint8_t *ptr = dst, *ptr_r = dst_r;
1085  for (x = 0; x < end_x;
1086  x += step1d, ptr += 4 * step1d, ptr_r += 4 * step1d, n += step) {
1087  int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
1088  y * 2 + x : 0];
1089  LOCAL_ALIGNED_16(uint8_t, a_buf, [48]);
1090  uint8_t *a = &a_buf[16], l[32];
1091  enum TxfmType txtp = ff_vp9_intra_txfm_type[mode];
1092  int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
1093 
1094  mode = check_intra_mode(s, mode, &a, ptr_r,
1095  s->cur_frame->linesize[0],
1096  ptr, b->y_stride, l,
1097  col, x, w4, row, y, b->tx, 0);
1098  s->dsp.intra_pred[b->tx][mode](ptr, b->y_stride, l, a);
1099  if (eob)
1100  s->dsp.itxfm_add[tx][txtp](ptr, b->y_stride,
1101  s->block + 16 * n, eob);
1102  }
1103  dst_r += 4 * s->cur_frame->linesize[0] * step1d;
1104  dst += 4 * b->y_stride * step1d;
1105  }
1106 
1107  // U/V
1108  h4 >>= 1;
1109  w4 >>= 1;
1110  end_x >>= 1;
1111  end_y >>= 1;
1112  step = 1 << (b->uvtx * 2);
1113  for (p = 0; p < 2; p++) {
1114  dst = b->dst[1 + p];
1115  dst_r = s->cur_frame->data[1 + p] + uv_off;
1116  for (n = 0, y = 0; y < end_y; y += uvstep1d) {
1117  uint8_t *ptr = dst, *ptr_r = dst_r;
1118  for (x = 0; x < end_x;
1119  x += uvstep1d, ptr += 4 * uvstep1d,
1120  ptr_r += 4 * uvstep1d, n += step) {
1121  int mode = b->uvmode;
1122  LOCAL_ALIGNED_16(uint8_t, a_buf, [48]);
1123  uint8_t *a = &a_buf[16], l[32];
1124  int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n])
1125  : s->uveob[p][n];
1126 
1127  mode = check_intra_mode(s, mode, &a, ptr_r,
1128  s->cur_frame->linesize[1],
1129  ptr, b->uv_stride, l,
1130  col, x, w4, row, y, b->uvtx, p + 1);
1131  s->dsp.intra_pred[b->uvtx][mode](ptr, b->uv_stride, l, a);
1132  if (eob)
1133  s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, b->uv_stride,
1134  s->uvblock[p] + 16 * n,
1135  eob);
1136  }
1137  dst_r += 4 * uvstep1d * s->cur_frame->linesize[1];
1138  dst += 4 * uvstep1d * b->uv_stride;
1139  }
1140  }
1141 }
1142 
1144  uint8_t *dst, ptrdiff_t dst_stride,
1145  const uint8_t *ref,
1146  ptrdiff_t ref_stride,
1147  ptrdiff_t y, ptrdiff_t x,
1148  const VP56mv *mv,
1149  int bw, int bh, int w, int h)
1150 {
1151  int mx = mv->x, my = mv->y;
1152 
1153  y += my >> 3;
1154  x += mx >> 3;
1155  ref += y * ref_stride + x;
1156  mx &= 7;
1157  my &= 7;
1158  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
1159  if (x < !!mx * 3 || y < !!my * 3 ||
1160  x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
1162  ref - !!my * 3 * ref_stride - !!mx * 3,
1163  80,
1164  ref_stride,
1165  bw + !!mx * 7, bh + !!my * 7,
1166  x - !!mx * 3, y - !!my * 3, w, h);
1167  ref = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
1168  ref_stride = 80;
1169  }
1170  mc[!!mx][!!my](dst, ref, dst_stride, ref_stride, bh, mx << 1, my << 1);
1171 }
1172 
1174  uint8_t *dst_u, uint8_t *dst_v,
1175  ptrdiff_t dst_stride,
1176  const uint8_t *ref_u,
1177  ptrdiff_t src_stride_u,
1178  const uint8_t *ref_v,
1179  ptrdiff_t src_stride_v,
1180  ptrdiff_t y, ptrdiff_t x,
1181  const VP56mv *mv,
1182  int bw, int bh, int w, int h)
1183 {
1184  int mx = mv->x, my = mv->y;
1185 
1186  y += my >> 4;
1187  x += mx >> 4;
1188  ref_u += y * src_stride_u + x;
1189  ref_v += y * src_stride_v + x;
1190  mx &= 15;
1191  my &= 15;
1192  // FIXME bilinear filter only needs 0/1 pixels, not 3/4
1193  if (x < !!mx * 3 || y < !!my * 3 ||
1194  x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
1196  ref_u - !!my * 3 * src_stride_u - !!mx * 3,
1197  80,
1198  src_stride_u,
1199  bw + !!mx * 7, bh + !!my * 7,
1200  x - !!mx * 3, y - !!my * 3, w, h);
1201  ref_u = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
1202  mc[!!mx][!!my](dst_u, ref_u, dst_stride, 80, bh, mx, my);
1203 
1205  ref_v - !!my * 3 * src_stride_v - !!mx * 3,
1206  80,
1207  src_stride_v,
1208  bw + !!mx * 7, bh + !!my * 7,
1209  x - !!mx * 3, y - !!my * 3, w, h);
1210  ref_v = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
1211  mc[!!mx][!!my](dst_v, ref_v, dst_stride, 80, bh, mx, my);
1212  } else {
1213  mc[!!mx][!!my](dst_u, ref_u, dst_stride, src_stride_u, bh, mx, my);
1214  mc[!!mx][!!my](dst_v, ref_v, dst_stride, src_stride_v, bh, mx, my);
1215  }
1216 }
1217 
1218 static int inter_recon(AVCodecContext *avctx)
1219 {
1220  static const uint8_t bwlog_tab[2][N_BS_SIZES] = {
1221  { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 },
1222  { 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4 },
1223  };
1224  VP9Context *s = avctx->priv_data;
1225  VP9Block *const b = &s->b;
1226  int row = b->row, col = b->col;
1227  AVFrame *ref1 = s->refs[s->refidx[b->ref[0]]];
1228  AVFrame *ref2 = b->comp ? s->refs[s->refidx[b->ref[1]]] : NULL;
1229  int w = avctx->width, h = avctx->height;
1230  ptrdiff_t ls_y = b->y_stride, ls_uv = b->uv_stride;
1231 
1232  if (!ref1->data[0] || (b->comp && !ref2->data[0]))
1233  return AVERROR_INVALIDDATA;
1234 
1235  // y inter pred
1236  if (b->bs > BS_8x8) {
1237  if (b->bs == BS_8x4) {
1238  mc_luma_dir(s, s->dsp.mc[3][b->filter][0], b->dst[0], ls_y,
1239  ref1->data[0], ref1->linesize[0],
1240  row << 3, col << 3, &b->mv[0][0], 8, 4, w, h);
1241  mc_luma_dir(s, s->dsp.mc[3][b->filter][0],
1242  b->dst[0] + 4 * ls_y, ls_y,
1243  ref1->data[0], ref1->linesize[0],
1244  (row << 3) + 4, col << 3, &b->mv[2][0], 8, 4, w, h);
1245 
1246  if (b->comp) {
1247  mc_luma_dir(s, s->dsp.mc[3][b->filter][1], b->dst[0], ls_y,
1248  ref2->data[0], ref2->linesize[0],
1249  row << 3, col << 3, &b->mv[0][1], 8, 4, w, h);
1250  mc_luma_dir(s, s->dsp.mc[3][b->filter][1],
1251  b->dst[0] + 4 * ls_y, ls_y,
1252  ref2->data[0], ref2->linesize[0],
1253  (row << 3) + 4, col << 3, &b->mv[2][1], 8, 4, w, h);
1254  }
1255  } else if (b->bs == BS_4x8) {
1256  mc_luma_dir(s, s->dsp.mc[4][b->filter][0], b->dst[0], ls_y,
1257  ref1->data[0], ref1->linesize[0],
1258  row << 3, col << 3, &b->mv[0][0], 4, 8, w, h);
1259  mc_luma_dir(s, s->dsp.mc[4][b->filter][0], b->dst[0] + 4, ls_y,
1260  ref1->data[0], ref1->linesize[0],
1261  row << 3, (col << 3) + 4, &b->mv[1][0], 4, 8, w, h);
1262 
1263  if (b->comp) {
1264  mc_luma_dir(s, s->dsp.mc[4][b->filter][1], b->dst[0], ls_y,
1265  ref2->data[0], ref2->linesize[0],
1266  row << 3, col << 3, &b->mv[0][1], 4, 8, w, h);
1267  mc_luma_dir(s, s->dsp.mc[4][b->filter][1], b->dst[0] + 4, ls_y,
1268  ref2->data[0], ref2->linesize[0],
1269  row << 3, (col << 3) + 4, &b->mv[1][1], 4, 8, w, h);
1270  }
1271  } else {
1272  av_assert2(b->bs == BS_4x4);
1273 
1274  // FIXME if two horizontally adjacent blocks have the same MV,
1275  // do a w8 instead of a w4 call
1276  mc_luma_dir(s, s->dsp.mc[4][b->filter][0], b->dst[0], ls_y,
1277  ref1->data[0], ref1->linesize[0],
1278  row << 3, col << 3, &b->mv[0][0], 4, 4, w, h);
1279  mc_luma_dir(s, s->dsp.mc[4][b->filter][0], b->dst[0] + 4, ls_y,
1280  ref1->data[0], ref1->linesize[0],
1281  row << 3, (col << 3) + 4, &b->mv[1][0], 4, 4, w, h);
1282  mc_luma_dir(s, s->dsp.mc[4][b->filter][0],
1283  b->dst[0] + 4 * ls_y, ls_y,
1284  ref1->data[0], ref1->linesize[0],
1285  (row << 3) + 4, col << 3, &b->mv[2][0], 4, 4, w, h);
1286  mc_luma_dir(s, s->dsp.mc[4][b->filter][0],
1287  b->dst[0] + 4 * ls_y + 4, ls_y,
1288  ref1->data[0], ref1->linesize[0],
1289  (row << 3) + 4, (col << 3) + 4, &b->mv[3][0], 4, 4, w, h);
1290 
1291  if (b->comp) {
1292  mc_luma_dir(s, s->dsp.mc[4][b->filter][1], b->dst[0], ls_y,
1293  ref2->data[0], ref2->linesize[0],
1294  row << 3, col << 3, &b->mv[0][1], 4, 4, w, h);
1295  mc_luma_dir(s, s->dsp.mc[4][b->filter][1], b->dst[0] + 4, ls_y,
1296  ref2->data[0], ref2->linesize[0],
1297  row << 3, (col << 3) + 4, &b->mv[1][1], 4, 4, w, h);
1298  mc_luma_dir(s, s->dsp.mc[4][b->filter][1],
1299  b->dst[0] + 4 * ls_y, ls_y,
1300  ref2->data[0], ref2->linesize[0],
1301  (row << 3) + 4, col << 3, &b->mv[2][1], 4, 4, w, h);
1302  mc_luma_dir(s, s->dsp.mc[4][b->filter][1],
1303  b->dst[0] + 4 * ls_y + 4, ls_y,
1304  ref2->data[0], ref2->linesize[0],
1305  (row << 3) + 4, (col << 3) + 4, &b->mv[3][1], 4, 4, w, h);
1306  }
1307  }
1308  } else {
1309  int bwl = bwlog_tab[0][b->bs];
1310  int bw = bwh_tab[0][b->bs][0] * 4;
1311  int bh = bwh_tab[0][b->bs][1] * 4;
1312 
1313  mc_luma_dir(s, s->dsp.mc[bwl][b->filter][0], b->dst[0], ls_y,
1314  ref1->data[0], ref1->linesize[0],
1315  row << 3, col << 3, &b->mv[0][0], bw, bh, w, h);
1316 
1317  if (b->comp)
1318  mc_luma_dir(s, s->dsp.mc[bwl][b->filter][1], b->dst[0], ls_y,
1319  ref2->data[0], ref2->linesize[0],
1320  row << 3, col << 3, &b->mv[0][1], bw, bh, w, h);
1321  }
1322 
1323  // uv inter pred
1324  {
1325  int bwl = bwlog_tab[1][b->bs];
1326  int bw = bwh_tab[1][b->bs][0] * 4, bh = bwh_tab[1][b->bs][1] * 4;
1327  VP56mv mvuv;
1328 
1329  w = (w + 1) >> 1;
1330  h = (h + 1) >> 1;
1331  if (b->bs > BS_8x8) {
1332  mvuv.x = ROUNDED_DIV(b->mv[0][0].x + b->mv[1][0].x +
1333  b->mv[2][0].x + b->mv[3][0].x, 4);
1334  mvuv.y = ROUNDED_DIV(b->mv[0][0].y + b->mv[1][0].y +
1335  b->mv[2][0].y + b->mv[3][0].y, 4);
1336  } else {
1337  mvuv = b->mv[0][0];
1338  }
1339 
1340  mc_chroma_dir(s, s->dsp.mc[bwl][b->filter][0],
1341  b->dst[1], b->dst[2], ls_uv,
1342  ref1->data[1], ref1->linesize[1],
1343  ref1->data[2], ref1->linesize[2],
1344  row << 2, col << 2, &mvuv, bw, bh, w, h);
1345 
1346  if (b->comp) {
1347  if (b->bs > BS_8x8) {
1348  mvuv.x = ROUNDED_DIV(b->mv[0][1].x + b->mv[1][1].x +
1349  b->mv[2][1].x + b->mv[3][1].x, 4);
1350  mvuv.y = ROUNDED_DIV(b->mv[0][1].y + b->mv[1][1].y +
1351  b->mv[2][1].y + b->mv[3][1].y, 4);
1352  } else {
1353  mvuv = b->mv[0][1];
1354  }
1355  mc_chroma_dir(s, s->dsp.mc[bwl][b->filter][1],
1356  b->dst[1], b->dst[2], ls_uv,
1357  ref2->data[1], ref2->linesize[1],
1358  ref2->data[2], ref2->linesize[2],
1359  row << 2, col << 2, &mvuv, bw, bh, w, h);
1360  }
1361  }
1362 
1363  if (!b->skip) {
1364  /* mostly copied intra_reconn() */
1365 
1366  int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
1367  int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
1368  int end_x = FFMIN(2 * (s->cols - col), w4);
1369  int end_y = FFMIN(2 * (s->rows - row), h4);
1370  int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
1371  int uvstep1d = 1 << b->uvtx, p;
1372  uint8_t *dst = b->dst[0];
1373 
1374  // y itxfm add
1375  for (n = 0, y = 0; y < end_y; y += step1d) {
1376  uint8_t *ptr = dst;
1377  for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d, n += step) {
1378  int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
1379 
1380  if (eob)
1381  s->dsp.itxfm_add[tx][DCT_DCT](ptr, b->y_stride,
1382  s->block + 16 * n, eob);
1383  }
1384  dst += 4 * b->y_stride * step1d;
1385  }
1386 
1387  // uv itxfm add
1388  h4 >>= 1;
1389  w4 >>= 1;
1390  end_x >>= 1;
1391  end_y >>= 1;
1392  step = 1 << (b->uvtx * 2);
1393  for (p = 0; p < 2; p++) {
1394  dst = b->dst[p + 1];
1395  for (n = 0, y = 0; y < end_y; y += uvstep1d) {
1396  uint8_t *ptr = dst;
1397  for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d, n += step) {
1398  int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n])
1399  : s->uveob[p][n];
1400  if (eob)
1401  s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, b->uv_stride,
1402  s->uvblock[p] + 16 * n, eob);
1403  }
1404  dst += 4 * uvstep1d * b->uv_stride;
1405  }
1406  }
1407  }
1408  return 0;
1409 }
1410 
1411 static av_always_inline void mask_edges(VP9Filter *lflvl, int is_uv,
1412  int row_and_7, int col_and_7,
1413  int w, int h, int col_end, int row_end,
1414  enum TxfmMode tx, int skip_inter)
1415 {
1416  // FIXME I'm pretty sure all loops can be replaced by a single LUT if
1417  // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
1418  // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
1419  // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
1420 
1421  // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
1422  // edges. This means that for UV, we work on two subsampled blocks at
1423  // a time, and we only use the topleft block's mode information to set
1424  // things like block strength. Thus, for any block size smaller than
1425  // 16x16, ignore the odd portion of the block.
1426  if (tx == TX_4X4 && is_uv) {
1427  if (h == 1) {
1428  if (row_and_7 & 1)
1429  return;
1430  if (!row_end)
1431  h += 1;
1432  }
1433  if (w == 1) {
1434  if (col_and_7 & 1)
1435  return;
1436  if (!col_end)
1437  w += 1;
1438  }
1439  }
1440 
1441  if (tx == TX_4X4 && !skip_inter) {
1442  int t = 1 << col_and_7, m_col = (t << w) - t, y;
1443  int m_col_odd = (t << (w - 1)) - t;
1444 
1445  // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
1446  if (is_uv) {
1447  int m_row_8 = m_col & 0x01, m_row_4 = m_col - m_row_8;
1448 
1449  for (y = row_and_7; y < h + row_and_7; y++) {
1450  int col_mask_id = 2 - !(y & 7);
1451 
1452  lflvl->mask[is_uv][0][y][1] |= m_row_8;
1453  lflvl->mask[is_uv][0][y][2] |= m_row_4;
1454  // for odd lines, if the odd col is not being filtered,
1455  // skip odd row also:
1456  // .---. <-- a
1457  // | |
1458  // |___| <-- b
1459  // ^ ^
1460  // c d
1461  //
1462  // if a/c are even row/col and b/d are odd, and d is skipped,
1463  // e.g. right edge of size-66x66.webm, then skip b also (bug)
1464  if ((col_end & 1) && (y & 1)) {
1465  lflvl->mask[is_uv][1][y][col_mask_id] |= m_col_odd;
1466  } else {
1467  lflvl->mask[is_uv][1][y][col_mask_id] |= m_col;
1468  }
1469  }
1470  } else {
1471  int m_row_8 = m_col & 0x11, m_row_4 = m_col - m_row_8;
1472 
1473  for (y = row_and_7; y < h + row_and_7; y++) {
1474  int col_mask_id = 2 - !(y & 3);
1475 
1476  lflvl->mask[is_uv][0][y][1] |= m_row_8; // row edge
1477  lflvl->mask[is_uv][0][y][2] |= m_row_4;
1478  lflvl->mask[is_uv][1][y][col_mask_id] |= m_col; // col edge
1479  lflvl->mask[is_uv][0][y][3] |= m_col;
1480  lflvl->mask[is_uv][1][y][3] |= m_col;
1481  }
1482  }
1483  } else {
1484  int y, t = 1 << col_and_7, m_col = (t << w) - t;
1485 
1486  if (!skip_inter) {
1487  int mask_id = (tx == TX_8X8);
1488  int l2 = tx + is_uv - 1, step1d = 1 << l2;
1489  static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
1490  int m_row = m_col & masks[l2];
1491 
1492  // at odd UV col/row edges tx16/tx32 loopfilter edges, force
1493  // 8wd loopfilter to prevent going off the visible edge.
1494  if (is_uv && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
1495  int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
1496  int m_row_8 = m_row - m_row_16;
1497 
1498  for (y = row_and_7; y < h + row_and_7; y++) {
1499  lflvl->mask[is_uv][0][y][0] |= m_row_16;
1500  lflvl->mask[is_uv][0][y][1] |= m_row_8;
1501  }
1502  } else {
1503  for (y = row_and_7; y < h + row_and_7; y++)
1504  lflvl->mask[is_uv][0][y][mask_id] |= m_row;
1505  }
1506 
1507  if (is_uv && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
1508  for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
1509  lflvl->mask[is_uv][1][y][0] |= m_col;
1510  if (y - row_and_7 == h - 1)
1511  lflvl->mask[is_uv][1][y][1] |= m_col;
1512  } else {
1513  for (y = row_and_7; y < h + row_and_7; y += step1d)
1514  lflvl->mask[is_uv][1][y][mask_id] |= m_col;
1515  }
1516  } else if (tx != TX_4X4) {
1517  int mask_id;
1518 
1519  mask_id = (tx == TX_8X8) || (is_uv && h == 1);
1520  lflvl->mask[is_uv][1][row_and_7][mask_id] |= m_col;
1521  mask_id = (tx == TX_8X8) || (is_uv && w == 1);
1522  for (y = row_and_7; y < h + row_and_7; y++)
1523  lflvl->mask[is_uv][0][y][mask_id] |= t;
1524  } else if (is_uv) {
1525  int t8 = t & 0x01, t4 = t - t8;
1526 
1527  for (y = row_and_7; y < h + row_and_7; y++) {
1528  lflvl->mask[is_uv][0][y][2] |= t4;
1529  lflvl->mask[is_uv][0][y][1] |= t8;
1530  }
1531  lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 7)] |= m_col;
1532  } else {
1533  int t8 = t & 0x11, t4 = t - t8;
1534 
1535  for (y = row_and_7; y < h + row_and_7; y++) {
1536  lflvl->mask[is_uv][0][y][2] |= t4;
1537  lflvl->mask[is_uv][0][y][1] |= t8;
1538  }
1539  lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 3)] |= m_col;
1540  }
1541  }
1542 }
1543 
1544 int ff_vp9_decode_block(AVCodecContext *avctx, int row, int col,
1545  VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
1546  enum BlockLevel bl, enum BlockPartition bp)
1547 {
1548  VP9Context *s = avctx->priv_data;
1549  VP9Block *const b = &s->b;
1550  enum BlockSize bs = bl * 3 + bp;
1551  int ret, y, w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
1552  int emu[2];
1553 
1554  b->row = row;
1555  b->row7 = row & 7;
1556  b->col = col;
1557  b->col7 = col & 7;
1558 
1559  s->min_mv.x = -(128 + col * 64);
1560  s->min_mv.y = -(128 + row * 64);
1561  s->max_mv.x = 128 + (s->cols - col - w4) * 64;
1562  s->max_mv.y = 128 + (s->rows - row - h4) * 64;
1563 
1564  b->bs = bs;
1565  decode_mode(s, b);
1566  b->uvtx = b->tx - (w4 * 2 == (1 << b->tx) || h4 * 2 == (1 << b->tx));
1567 
1568  if (!b->skip) {
1569  if ((ret = decode_coeffs(avctx)) < 0)
1570  return ret;
1571  } else {
1572  int pl;
1573 
1574  memset(&s->above_y_nnz_ctx[col * 2], 0, w4 * 2);
1575  memset(&s->left_y_nnz_ctx[(row & 7) << 1], 0, h4 * 2);
1576  for (pl = 0; pl < 2; pl++) {
1577  memset(&s->above_uv_nnz_ctx[pl][col], 0, w4);
1578  memset(&s->left_uv_nnz_ctx[pl][row & 7], 0, h4);
1579  }
1580  }
1581 
1582  /* Emulated overhangs if the stride of the target buffer can't hold.
1583  * This allows to support emu-edge and so on even if we have large
1584  * block overhangs. */
1585  emu[0] = (col + w4) * 8 > s->cur_frame->linesize[0] ||
1586  (row + h4) > s->rows;
1587  emu[1] = (col + w4) * 4 > s->cur_frame->linesize[1] ||
1588  (row + h4) > s->rows;
1589  if (emu[0]) {
1590  b->dst[0] = s->tmp_y;
1591  b->y_stride = 64;
1592  } else {
1593  b->dst[0] = s->cur_frame->data[0] + yoff;
1594  b->y_stride = s->cur_frame->linesize[0];
1595  }
1596  if (emu[1]) {
1597  b->dst[1] = s->tmp_uv[0];
1598  b->dst[2] = s->tmp_uv[1];
1599  b->uv_stride = 32;
1600  } else {
1601  b->dst[1] = s->cur_frame->data[1] + uvoff;
1602  b->dst[2] = s->cur_frame->data[2] + uvoff;
1603  b->uv_stride = s->cur_frame->linesize[1];
1604  }
1605  if (b->intra) {
1606  intra_recon(avctx, yoff, uvoff);
1607  } else {
1608  if ((ret = inter_recon(avctx)) < 0)
1609  return ret;
1610  }
1611  if (emu[0]) {
1612  int w = FFMIN(s->cols - col, w4) * 8;
1613  int h = FFMIN(s->rows - row, h4) * 8;
1614  int n, o = 0;
1615 
1616  for (n = 0; o < w; n++) {
1617  int bw = 64 >> n;
1618 
1619  av_assert2(n <= 4);
1620  if (w & bw) {
1621  s->dsp.mc[n][0][0][0][0](s->cur_frame->data[0] + yoff + o,
1622  s->tmp_y + o,
1623  s->cur_frame->linesize[0],
1624  64, h, 0, 0);
1625  o += bw;
1626  }
1627  }
1628  }
1629  if (emu[1]) {
1630  int w = FFMIN(s->cols - col, w4) * 4;
1631  int h = FFMIN(s->rows - row, h4) * 4;
1632  int n, o = 0;
1633 
1634  for (n = 1; o < w; n++) {
1635  int bw = 64 >> n;
1636 
1637  av_assert2(n <= 4);
1638  if (w & bw) {
1639  s->dsp.mc[n][0][0][0][0](s->cur_frame->data[1] + uvoff + o,
1640  s->tmp_uv[0] + o,
1641  s->cur_frame->linesize[1],
1642  32, h, 0, 0);
1643  s->dsp.mc[n][0][0][0][0](s->cur_frame->data[2] + uvoff + o,
1644  s->tmp_uv[1] + o,
1645  s->cur_frame->linesize[2],
1646  32, h, 0, 0);
1647  o += bw;
1648  }
1649  }
1650  }
1651 
1652  // pick filter level and find edges to apply filter to
1653  if (s->filter.level &&
1654  (lvl = s->segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
1655  [b->mode[3] != ZEROMV]) > 0) {
1656  int x_end = FFMIN(s->cols - col, w4);
1657  int y_end = FFMIN(s->rows - row, h4);
1658  int skip_inter = !b->intra && b->skip;
1659 
1660  for (y = 0; y < h4; y++)
1661  memset(&lflvl->level[((row & 7) + y) * 8 + (col & 7)], lvl, w4);
1662  mask_edges(lflvl, 0, row & 7, col & 7, x_end, y_end, 0, 0, b->tx, skip_inter);
1663  mask_edges(lflvl, 1, row & 7, col & 7, x_end, y_end,
1664  s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
1665  s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
1666  b->uvtx, skip_inter);
1667 
1668  if (!s->filter.lim_lut[lvl]) {
1669  int sharp = s->filter.sharpness;
1670  int limit = lvl;
1671 
1672  if (sharp > 0) {
1673  limit >>= (sharp + 3) >> 2;
1674  limit = FFMIN(limit, 9 - sharp);
1675  }
1676  limit = FFMAX(limit, 1);
1677 
1678  s->filter.lim_lut[lvl] = limit;
1679  s->filter.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
1680  }
1681  }
1682 
1683  return 0;
1684 }
Definition: vp9.h:88
int col7
Definition: vp9.h:259
Definition: vp9.h:55
uint8_t lossless
Definition: vp9.h:310
#define AVERROR_INVALIDDATA
Invalid data found when processing input.
Definition: error.h:54
static void decode_mode(VP9Context *s, VP9Block *const b)
Definition: vp9block.c:45
VP56mv min_mv
Definition: vp9.h:402
This structure describes decoded (raw) audio or video data.
Definition: frame.h:135
int row7
Definition: vp9.h:259
unsigned comp_ref[5][2]
Definition: vp9.h:352
uint8_t mblim_lut[64]
Definition: vp9.h:301
uint8_t left_segpred_ctx[8]
Definition: vp9.h:383
VP5 and VP6 compatible video decoder (common features)
struct VP9Context::@72 filter
static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **a, uint8_t *dst_edge, ptrdiff_t stride_edge, uint8_t *dst_inner, ptrdiff_t stride_inner, uint8_t *l, int col, int x, int w, int row, int y, enum TxfmMode tx, int p)
Definition: vp9block.c:927
uint8_t * above_y_nnz_ctx
Definition: vp9.h:379
struct VP9Context::@74 segmentation
static av_always_inline int vp8_rac_get_tree(VP56RangeCoder *c, const int8_t(*tree)[2], const uint8_t *probs)
Definition: vp56.h:386
VideoDSPContext vdsp
Definition: vp9.h:266
BlockPartition
Definition: vp9.h:80
ProbContext p
Definition: vp9.h:335
int16_t uvblock[2][1024]
Definition: vp9.h:399
uint8_t tx32p[2][3]
Definition: vp9.h:110
uint8_t left_uv_nnz_ctx[2][8]
Definition: vp9.h:380
unsigned skip[3][2]
Definition: vp9.h:356
BlockLevel
Definition: vp9.h:228
int row
Definition: vp9.h:259
ptrdiff_t y_stride
Definition: vp9.h:261
void(* intra_pred[N_TXFM_SIZES][N_INTRA_PRED_MODES])(uint8_t *dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)
Definition: vp9.h:145
Definition: vp9.h:252
uint8_t * above_partition_ctx
Definition: vp9.h:376
unsigned cols
Definition: vp9.h:333
uint8_t ref[2]
Definition: vp9.h:253
int stride
Definition: mace.c:144
uint8_t comp_ref[5]
Definition: vp9.h:109
const uint8_t ff_vp9_default_kf_ymode_probs[10][10][9]
Definition: vp9data.c:77
#define AV_WN32A(p, v)
Definition: intreadwrite.h:458
#define AV_COPY32(d, s)
Definition: intreadwrite.h:506
uint8_t * above_ref_ctx
Definition: vp9.h:386
uint8_t update_map
Definition: vp9.h:315
Definition: vp9.h:37
uint8_t * intra_pred_data[3]
Definition: vp9.h:391
const uint8_t ff_vp9_default_kf_uvmode_probs[10][9]
Definition: vp9data.c:191
void(* vp9_mc_func)(uint8_t *dst, const uint8_t *ref, ptrdiff_t dst_stride, ptrdiff_t ref_stride, int h, int mx, int my)
Definition: vp9.h:128
uint8_t * above_comp_ctx
Definition: vp9.h:385
uint8_t varcompref[2]
Definition: vp9.h:293
#define AV_RN32A(p)
Definition: intreadwrite.h:446
unsigned uv_mode[10][10]
Definition: vp9.h:346
vp9_mc_func mc[5][4][2][2][2]
Definition: vp9.h:208
int16_t y
Definition: vp56.h:67
uint8_t coef[4][2][2][6][6][3]
Definition: vp9.h:336
uint8_t
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
Definition: avassert.h:63
static int inter_recon(AVCodecContext *avctx)
Definition: vp9block.c:1218
unsigned y_mode[4][10]
Definition: vp9.h:345
TxfmType
Definition: vp9.h:45
uint8_t * above_mode_ctx
Definition: vp9.h:377
int col
Definition: vp9.h:259
#define b
Definition: input.c:52
Definition: vp9.h:54
uint8_t uveob[2][64]
Definition: vp9.h:401
static av_always_inline void mask_edges(VP9Filter *lflvl, int is_uv, int row_and_7, int col_and_7, int w, int h, int col_end, int row_end, enum TxfmMode tx, int skip_inter)
Definition: vp9block.c:1411
static av_always_inline void mc_chroma_dir(VP9Context *s, vp9_mc_func(*mc)[2], uint8_t *dst_u, uint8_t *dst_v, ptrdiff_t dst_stride, const uint8_t *ref_u, ptrdiff_t src_stride_u, const uint8_t *ref_v, ptrdiff_t src_stride_v, ptrdiff_t y, ptrdiff_t x, const VP56mv *mv, int bw, int bh, int w, int h)
Definition: vp9block.c:1173
static void intra_recon(AVCodecContext *avctx, ptrdiff_t y_off, ptrdiff_t uv_off)
Definition: vp9block.c:1070
uint8_t * above_segpred_ctx
Definition: vp9.h:383
AVFrame * cur_frame
Definition: vp9.h:295
uint8_t skip[3]
Definition: vp9.h:113
bitstream reader API header.
uint8_t tmp_y[64 *64]
Definition: vp9.h:403
VP9DSPContext dsp
Definition: vp9.h:265
uint8_t lim_lut[64]
Definition: vp9.h:300
Definition: vp9.h:46
uint8_t mode[4]
Definition: vp9.h:253
Definition: vp9.h:222
uint8_t left_ref_ctx[8]
Definition: vp9.h:386
Definition: vp9.h:38
uint8_t uv_mode[10][9]
Definition: vp9.h:103
static av_always_inline void mc_luma_dir(VP9Context *s, vp9_mc_func(*mc)[2], uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, ptrdiff_t y, ptrdiff_t x, const VP56mv *mv, int bw, int bh, int w, int h)
Definition: vp9block.c:1143
#define ROUNDED_DIV(a, b)
Definition: common.h:51
uint8_t fixcompref
Definition: vp9.h:287
uint8_t mask[2][2][8][4]
Definition: vp9.h:225
Definition: vp9.h:36
uint8_t keyframe
Definition: vp9.h:275
int8_t sharpness
Definition: vp9.h:299
unsigned mv_mode[7][4]
Definition: vp9.h:348
enum CompPredMode comppredmode
Definition: vp9.h:373
uint8_t left_partition_ctx[8]
Definition: vp9.h:376
const int8_t ff_vp9_intramode_tree[9][2]
Definition: vp9data.c:65
uint8_t intra
Definition: vp9.h:253
Definition: vp9.h:247
TxfmMode
Definition: vp9.h:35
simple assert() macros that are a bit more flexible than ISO C assert().
uint8_t refidx[3]
Definition: vp9.h:291
uint8_t * above_txfm_ctx
Definition: vp9.h:382
uint8_t intra[4]
Definition: vp9.h:106
unsigned comp[5][2]
Definition: vp9.h:350
unsigned tx8p[2][2]
Definition: vp9.h:355
const int16_t(*[5][4] ff_vp9_scans_nb)[2]
Definition: vp9data.c:1043
#define FFMAX(a, b)
Definition: common.h:55
static int decode_coeffs(AVCodecContext *avctx)
Definition: vp9block.c:819
const uint8_t ff_vp9_model_pareto8[256][8]
Definition: vp9data.c:1062
uint8_t * above_filter_ctx
Definition: vp9.h:387
uint8_t comp[5]
Definition: vp9.h:107
const int16_t * ff_vp9_scans[5][4]
Definition: vp9data.c:486
#define FFMIN(a, b)
Definition: common.h:57
VP56mv left_mv_ctx[16][2]
Definition: vp9.h:388
uint8_t left_y_nnz_ctx[16]
Definition: vp9.h:379
struct VP9Context::@74::@79 feat[8]
const int8_t ff_vp9_inter_mode_tree[3][2]
Definition: vp9data.c:204
uint8_t level[8 *8]
Definition: vp9.h:223
int width
picture width / height.
Definition: avcodec.h:1218
uint8_t left_mode_ctx[16]
Definition: vp9.h:377
unsigned eob[4][2][2][6][6][2]
Definition: vp9.h:370
uint8_t tx16p[2][2]
Definition: vp9.h:111
unsigned tx32p[2][4]
Definition: vp9.h:353
unsigned tx16p[2][3]
Definition: vp9.h:354
enum FilterMode filtermode
Definition: vp9.h:285
const int8_t ff_vp9_segmentation_tree[7][2]
Definition: vp9data.c:55
uint8_t * dst[3]
Definition: vp9.h:260
uint8_t uvmode
Definition: vp9.h:253
enum FilterMode ff_vp9_filter_lut[3]
Definition: vp9data.c:215
uint8_t left_comp_ctx[8]
Definition: vp9.h:385
#define AV_WN16A(p, v)
Definition: intreadwrite.h:454
uint8_t mv_mode[7][3]
Definition: vp9.h:105
int16_t block[4096]
Definition: vp9.h:398
#define vp56_rac_get_prob
Definition: vp56.h:243
static const uint8_t bwh_tab[2][N_BS_SIZES][2]
Definition: vp9block.c:34
unsigned tile_col_start
Definition: vp9.h:331
unsigned intra[4][2]
Definition: vp9.h:349
if(ac->has_optimized_func)
static const float pred[4]
Definition: siprdata.h:259
unsigned rows
Definition: vp9.h:333
Definition: vp9.h:90
unsigned sb_cols
Definition: vp9.h:333
VP56mv mv[2]
Definition: vp9.h:218
Definition: vp9.h:246
static const int8_t mv[256][2]
Definition: 4xm.c:75
uint8_t enabled
Definition: vp9.h:304
static av_always_inline int vp56_rac_get_prob_branchy(VP56RangeCoder *c, int prob)
Definition: vp56.h:260
NULL
Definition: eval.c:55
VP56mv(* above_mv_ctx)[2]
Definition: vp9.h:388
Libavcodec external API header.
uint8_t filter[4][2]
Definition: vp9.h:104
uint8_t level
Definition: vp9.h:298
int linesize[AV_NUM_DATA_POINTERS]
For video, size in bytes of each picture line.
Definition: frame.h:153
BlockSize
Definition: vp9.h:235
uint8_t left_skip_ctx[8]
Definition: vp9.h:381
main external API structure.
Definition: avcodec.h:1044
uint8_t left_txfm_ctx[8]
Definition: vp9.h:382
enum TxfmType ff_vp9_intra_txfm_type[14]
Definition: vp9data.c:291
enum TxfmMode tx uvtx
Definition: vp9.h:257
unsigned single_ref[5][2][2]
Definition: vp9.h:351
int ff_vp9_decode_block(AVCodecContext *avctx, int row, int col, VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl, enum BlockPartition bp)
Definition: vp9block.c:1544
void ff_vp9_fill_mv(VP9Context *s, VP56mv *mv, int mode, int sb)
Definition: vp9mvs.c:280
uint8_t temporal
Definition: vp9.h:313
uint8_t tx8p[2]
Definition: vp9.h:112
uint8_t seg_id
Definition: vp9.h:253
uint8_t y_mode[4][9]
Definition: vp9.h:102
uint8_t left_filter_ctx[8]
Definition: vp9.h:387
uint8_t intraonly
Definition: vp9.h:281
static int step
Definition: avplay.c:247
uint8_t signbias[3]
Definition: vp9.h:292
enum BlockSize bs
Definition: vp9.h:256
uint8_t tmp_uv[2][32 *32]
Definition: vp9.h:404
VP56mv mv[4][2]
Definition: vp9.h:255
uint8_t * above_skip_ctx
Definition: vp9.h:381
enum TxfmMode txfmmode
Definition: vp9.h:372
uint8_t single_ref[5][2]
Definition: vp9.h:108
Definition: vp56.h:65
Definition: vp9.h:248
uint8_t comp
Definition: vp9.h:253
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:141
Definition: vp9.h:56
ptrdiff_t uv_stride
Definition: vp9.h:261
uint8_t * segmentation_map
Definition: vp9.h:392
void(* itxfm_add[N_TXFM_SIZES+1][N_TXFM_TYPES])(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob)
Definition: vp9.h:164
uint8_t seg[7]
Definition: vp9.h:341
int16_t x
Definition: vp56.h:66
common internal api header.
uint8_t segpred[3]
Definition: vp9.h:342
static av_always_inline int vp8_rac_get(VP56RangeCoder *c)
Definition: vp56.h:297
Core video DSP helper functions.
uint8_t edge_emu_buffer[71 *80]
Definition: vp9.h:395
int8_t ref[2]
Definition: vp9.h:219
void * priv_data
Definition: avcodec.h:1086
VP56mv max_mv
Definition: vp9.h:402
VP9Block b
Definition: vp9.h:271
enum FilterMode filter
Definition: vp9.h:254
Definition: vp9.h:39
const int8_t ff_vp9_filter_tree[2][2]
Definition: vp9data.c:210
#define AV_RN16A(p)
Definition: intreadwrite.h:442
uint8_t * above_intra_ctx
Definition: vp9.h:384
struct VP9Context::@77 prob
struct VP9Context::@78 counts
#define LOCAL_ALIGNED_16(t, v,...)
Definition: internal.h:114
struct VP9Context::@75 tiling
static int decode_block_coeffs(VP56RangeCoder *c, int16_t *coef, int n_coeffs, enum TxfmMode tx, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, const int16_t *qmul)
Definition: vp9block.c:711
#define av_always_inline
Definition: attributes.h:40
Definition: vp9.h:245
uint8_t skip
Definition: vp9.h:253
VP9MVRefPair * mv[2]
Definition: vp9.h:393
uint8_t left_intra_ctx[8]
Definition: vp9.h:384
uint8_t * above_uv_nnz_ctx[2]
Definition: vp9.h:380
void(* emulated_edge_mc)(uint8_t *buf, const uint8_t *src, ptrdiff_t buf_linesize, ptrdiff_t src_linesize, int block_w, int block_h, int src_x, int src_y, int w, int h)
Copy a rectangular area of samples to a temporary buffer and replicate the border samples...
Definition: videodsp.h:52
AVFrame * refs[8]
Definition: vp9.h:294
VP56RangeCoder c
Definition: vp9.h:268