ffmpeg / libavcodec / vp8.c @ f311208c
History  View  Annotate  Download (58.2 KB)
1 
/**


2 
* VP8 compatible video decoder

3 
*

4 
* Copyright (C) 2010 David Conrad

5 
* Copyright (C) 2010 Ronald S. Bultje

6 
* Copyright (C) 2010 Jason GarrettGlaser

7 
*

8 
* This file is part of FFmpeg.

9 
*

10 
* FFmpeg is free software; you can redistribute it and/or

11 
* modify it under the terms of the GNU Lesser General Public

12 
* License as published by the Free Software Foundation; either

13 
* version 2.1 of the License, or (at your option) any later version.

14 
*

15 
* FFmpeg is distributed in the hope that it will be useful,

16 
* but WITHOUT ANY WARRANTY; without even the implied warranty of

17 
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

18 
* Lesser General Public License for more details.

19 
*

20 
* You should have received a copy of the GNU Lesser General Public

21 
* License along with FFmpeg; if not, write to the Free Software

22 
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 021101301 USA

23 
*/

24  
25 
#include "avcodec.h" 
26 
#include "vp56.h" 
27 
#include "vp8data.h" 
28 
#include "vp8dsp.h" 
29 
#include "h264pred.h" 
30 
#include "rectangle.h" 
31  
32 
typedef struct { 
33 
uint8_t filter_level; 
34 
uint8_t inner_limit; 
35 
uint8_t inner_filter; 
36 
} VP8FilterStrength; 
37  
38 
typedef struct { 
39 
uint8_t skip; 
40 
// todo: make it possible to check for at least (i4x4 or split_mv)

41 
// in one op. are others needed?

42 
uint8_t mode; 
43 
uint8_t ref_frame; 
44 
uint8_t partitioning; 
45 
VP56mv mv; 
46 
VP56mv bmv[16];

47 
} VP8Macroblock; 
48  
49 
typedef struct { 
50 
AVCodecContext *avctx; 
51 
DSPContext dsp; 
52 
VP8DSPContext vp8dsp; 
53 
H264PredContext hpc; 
54 
vp8_mc_func put_pixels_tab[3][3][3]; 
55 
AVFrame frames[4];

56 
AVFrame *framep[4];

57 
uint8_t *edge_emu_buffer; 
58 
VP56RangeCoder c; ///< header context, includes mb modes and motion vectors

59 
int profile;

60  
61 
int mb_width; /* number of horizontal MB */ 
62 
int mb_height; /* number of vertical MB */ 
63 
int linesize;

64 
int uvlinesize;

65  
66 
int keyframe;

67 
int invisible;

68 
int update_last; ///< update VP56_FRAME_PREVIOUS with the current one 
69 
int update_golden; ///< VP56_FRAME_NONE if not updated, or which frame to copy if so 
70 
int update_altref;

71 
int deblock_filter;

72  
73 
/**

74 
* If this flag is not set, all the probability updates

75 
* are discarded after this frame is decoded.

76 
*/

77 
int update_probabilities;

78  
79 
/**

80 
* All coefficients are contained in separate arith coding contexts.

81 
* There can be 1, 2, 4, or 8 of these after the header context.

82 
*/

83 
int num_coeff_partitions;

84 
VP56RangeCoder coeff_partition[8];

85  
86 
VP8Macroblock *macroblocks; 
87 
VP8Macroblock *macroblocks_base; 
88 
VP8FilterStrength *filter_strength; 
89 
int mb_stride;

90  
91 
uint8_t *intra4x4_pred_mode_top; 
92 
uint8_t intra4x4_pred_mode_left[4];

93 
uint8_t *segmentation_map; 
94 
int b4_stride;

95  
96 
/**

97 
* Cache of the top row needed for intra prediction

98 
* 16 for luma, 8 for each chroma plane

99 
*/

100 
uint8_t (*top_border)[16+8+8]; 
101  
102 
/**

103 
* For coeff decode, we need to know whether the above block had nonzero

104 
* coefficients. This means for each macroblock, we need data for 4 luma

105 
* blocks, 2 u blocks, 2 v blocks, and the luma dc block, for a total of 9

106 
* per macroblock. We keep the last row in top_nnz.

107 
*/

108 
uint8_t (*top_nnz)[9];

109 
DECLARE_ALIGNED(8, uint8_t, left_nnz)[9]; 
110  
111 
/**

112 
* This is the index plus one of the last nonzero coeff

113 
* for each of the blocks in the current macroblock.

114 
* So, 0 > no coeffs

115 
* 1 > dconly (special transform)

116 
* 2+> full transform

117 
*/

118 
DECLARE_ALIGNED(16, uint8_t, non_zero_count_cache)[6][4]; 
119 
DECLARE_ALIGNED(16, DCTELEM, block)[6][4][16]; 
120 
DECLARE_ALIGNED(16, DCTELEM, block_dc)[16]; 
121 
uint8_t intra4x4_pred_mode_mb[16];

122  
123 
int chroma_pred_mode; ///< 8x8c pred mode of the current macroblock 
124 
int segment; ///< segment of the current macroblock 
125  
126 
int mbskip_enabled;

127 
int sign_bias[4]; ///< one state [0, 1] per ref frame type 
128 
int ref_count[3]; 
129  
130 
/**

131 
* Base parameters for segmentation, i.e. permacroblock parameters.

132 
* These must be kept unchanged even if segmentation is not used for

133 
* a frame, since the values persist between interframes.

134 
*/

135 
struct {

136 
int enabled;

137 
int absolute_vals;

138 
int update_map;

139 
int8_t base_quant[4];

140 
int8_t filter_level[4]; ///< base loop filter level 
141 
} segmentation; 
142  
143 
/**

144 
* Macroblocks can have one of 4 different quants in a frame when

145 
* segmentation is enabled.

146 
* If segmentation is disabled, only the first segment's values are used.

147 
*/

148 
struct {

149 
// [0]  DC qmul [1]  AC qmul

150 
int16_t luma_qmul[2];

151 
int16_t luma_dc_qmul[2]; ///< luma dconly block quant 
152 
int16_t chroma_qmul[2];

153 
} qmat[4];

154  
155 
struct {

156 
int simple;

157 
int level;

158 
int sharpness;

159 
} filter; 
160  
161 
struct {

162 
int enabled; ///< whether each mb can have a different strength based on mode/ref 
163  
164 
/**

165 
* filter strength adjustment for the following macroblock modes:

166 
* [0]  i4x4

167 
* [1]  zero mv

168 
* [2]  inter modes except for zero or split mv

169 
* [3]  split mv

170 
* i16x16 modes never have any adjustment

171 
*/

172 
int8_t mode[4];

173  
174 
/**

175 
* filter strength adjustment for macroblocks that reference:

176 
* [0]  intra / VP56_FRAME_CURRENT

177 
* [1]  VP56_FRAME_PREVIOUS

178 
* [2]  VP56_FRAME_GOLDEN

179 
* [3]  altref / VP56_FRAME_GOLDEN2

180 
*/

181 
int8_t ref[4];

182 
} lf_delta; 
183  
184 
/**

185 
* These are all of the updatable probabilities for binary decisions.

186 
* They are only implictly reset on keyframes, making it quite likely

187 
* for an interframe to desync if a prior frame's header was corrupt

188 
* or missing outright!

189 
*/

190 
struct {

191 
uint8_t segmentid[3];

192 
uint8_t mbskip; 
193 
uint8_t intra; 
194 
uint8_t last; 
195 
uint8_t golden; 
196 
uint8_t pred16x16[4];

197 
uint8_t pred8x8c[3];

198 
uint8_t token[4][8][3][NUM_DCT_TOKENS1]; 
199 
uint8_t mvc[2][19]; 
200 
} prob[2];

201 
} VP8Context; 
202  
203 
static void vp8_decode_flush(AVCodecContext *avctx) 
204 
{ 
205 
VP8Context *s = avctx>priv_data; 
206 
int i;

207  
208 
for (i = 0; i < 4; i++) 
209 
if (s>frames[i].data[0]) 
210 
avctx>release_buffer(avctx, &s>frames[i]); 
211 
memset(s>framep, 0, sizeof(s>framep)); 
212  
213 
av_freep(&s>macroblocks_base); 
214 
av_freep(&s>filter_strength); 
215 
av_freep(&s>intra4x4_pred_mode_top); 
216 
av_freep(&s>top_nnz); 
217 
av_freep(&s>edge_emu_buffer); 
218 
av_freep(&s>top_border); 
219 
av_freep(&s>segmentation_map); 
220  
221 
s>macroblocks = NULL;

222 
} 
223  
224 
static int update_dimensions(VP8Context *s, int width, int height) 
225 
{ 
226 
if (avcodec_check_dimensions(s>avctx, width, height))

227 
return AVERROR_INVALIDDATA;

228  
229 
vp8_decode_flush(s>avctx); 
230  
231 
avcodec_set_dimensions(s>avctx, width, height); 
232  
233 
s>mb_width = (s>avctx>coded_width +15) / 16; 
234 
s>mb_height = (s>avctx>coded_height+15) / 16; 
235  
236 
// we allocate a border around the top/left of intra4x4 modes

237 
// this is 4 blocks for intra4x4 to keep 4byte alignment for fill_rectangle

238 
s>mb_stride = s>mb_width+1;

239 
s>b4_stride = 4*s>mb_stride;

240  
241 
s>macroblocks_base = av_mallocz((s>mb_stride+s>mb_height*2+2)*sizeof(*s>macroblocks)); 
242 
s>filter_strength = av_mallocz(s>mb_stride*sizeof(*s>filter_strength));

243 
s>intra4x4_pred_mode_top = av_mallocz(s>b4_stride*4);

244 
s>top_nnz = av_mallocz(s>mb_width*sizeof(*s>top_nnz));

245 
s>top_border = av_mallocz((s>mb_width+1)*sizeof(*s>top_border)); 
246 
s>segmentation_map = av_mallocz(s>mb_stride*s>mb_height); 
247  
248 
if (!s>macroblocks_base  !s>filter_strength  !s>intra4x4_pred_mode_top 

249 
!s>top_nnz  !s>top_border  !s>segmentation_map) 
250 
return AVERROR(ENOMEM);

251  
252 
s>macroblocks = s>macroblocks_base + 1;

253  
254 
return 0; 
255 
} 
256  
257 
static void parse_segment_info(VP8Context *s) 
258 
{ 
259 
VP56RangeCoder *c = &s>c; 
260 
int i;

261  
262 
s>segmentation.update_map = vp8_rac_get(c); 
263  
264 
if (vp8_rac_get(c)) { // update segment feature data 
265 
s>segmentation.absolute_vals = vp8_rac_get(c); 
266  
267 
for (i = 0; i < 4; i++) 
268 
s>segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);

269  
270 
for (i = 0; i < 4; i++) 
271 
s>segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);

272 
} 
273 
if (s>segmentation.update_map)

274 
for (i = 0; i < 3; i++) 
275 
s>prob>segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255; 
276 
} 
277  
278 
static void update_lf_deltas(VP8Context *s) 
279 
{ 
280 
VP56RangeCoder *c = &s>c; 
281 
int i;

282  
283 
for (i = 0; i < 4; i++) 
284 
s>lf_delta.ref[i] = vp8_rac_get_sint(c, 6);

285  
286 
for (i = 0; i < 4; i++) 
287 
s>lf_delta.mode[i] = vp8_rac_get_sint(c, 6);

288 
} 
289  
290 
static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size) 
291 
{ 
292 
const uint8_t *sizes = buf;

293 
int i;

294  
295 
s>num_coeff_partitions = 1 << vp8_rac_get_uint(&s>c, 2); 
296  
297 
buf += 3*(s>num_coeff_partitions1); 
298 
buf_size = 3*(s>num_coeff_partitions1); 
299 
if (buf_size < 0) 
300 
return 1; 
301  
302 
for (i = 0; i < s>num_coeff_partitions1; i++) { 
303 
int size = AV_RL24(sizes + 3*i); 
304 
if (buf_size  size < 0) 
305 
return 1; 
306  
307 
vp56_init_range_decoder(&s>coeff_partition[i], buf, size); 
308 
buf += size; 
309 
buf_size = size; 
310 
} 
311 
vp56_init_range_decoder(&s>coeff_partition[i], buf, buf_size); 
312  
313 
return 0; 
314 
} 
315  
316 
static void get_quants(VP8Context *s) 
317 
{ 
318 
VP56RangeCoder *c = &s>c; 
319 
int i, base_qi;

320  
321 
int yac_qi = vp8_rac_get_uint(c, 7); 
322 
int ydc_delta = vp8_rac_get_sint(c, 4); 
323 
int y2dc_delta = vp8_rac_get_sint(c, 4); 
324 
int y2ac_delta = vp8_rac_get_sint(c, 4); 
325 
int uvdc_delta = vp8_rac_get_sint(c, 4); 
326 
int uvac_delta = vp8_rac_get_sint(c, 4); 
327  
328 
for (i = 0; i < 4; i++) { 
329 
if (s>segmentation.enabled) {

330 
base_qi = s>segmentation.base_quant[i]; 
331 
if (!s>segmentation.absolute_vals)

332 
base_qi += yac_qi; 
333 
} else

334 
base_qi = yac_qi; 
335  
336 
s>qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip(base_qi + ydc_delta , 0, 127)]; 
337 
s>qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip(base_qi , 0, 127)]; 
338 
s>qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip(base_qi + y2dc_delta, 0, 127)]; 
339 
s>qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip(base_qi + y2ac_delta, 0, 127)] / 100; 
340 
s>qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip(base_qi + uvdc_delta, 0, 127)]; 
341 
s>qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip(base_qi + uvac_delta, 0, 127)]; 
342  
343 
s>qmat[i].luma_dc_qmul[1] = FFMAX(s>qmat[i].luma_dc_qmul[1], 8); 
344 
s>qmat[i].chroma_qmul[0] = FFMIN(s>qmat[i].chroma_qmul[0], 132); 
345 
} 
346 
} 
347  
348 
/**

349 
* Determine which buffers golden and altref should be updated with after this frame.

350 
* The spec isn't clear here, so I'm going by my understanding of what libvpx does

351 
*

352 
* Intra frames update all 3 references

353 
* Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set

354 
* If the update (goldenaltref) flag is set, it's updated with the current frame

355 
* if update_last is set, and VP56_FRAME_PREVIOUS otherwise.

356 
* If the flag is not set, the number read means:

357 
* 0: no update

358 
* 1: VP56_FRAME_PREVIOUS

359 
* 2: update golden with altref, or update altref with golden

360 
*/

361 
static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref) 
362 
{ 
363 
VP56RangeCoder *c = &s>c; 
364  
365 
if (update)

366 
return VP56_FRAME_CURRENT;

367  
368 
switch (vp8_rac_get_uint(c, 2)) { 
369 
case 1: 
370 
return VP56_FRAME_PREVIOUS;

371 
case 2: 
372 
return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;

373 
} 
374 
return VP56_FRAME_NONE;

375 
} 
376  
377 
static void update_refs(VP8Context *s) 
378 
{ 
379 
VP56RangeCoder *c = &s>c; 
380  
381 
int update_golden = vp8_rac_get(c);

382 
int update_altref = vp8_rac_get(c);

383  
384 
s>update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN); 
385 
s>update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2); 
386 
} 
387  
388 
static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size) 
389 
{ 
390 
VP56RangeCoder *c = &s>c; 
391 
int header_size, hscale, vscale, i, j, k, l, ret;

392 
int width = s>avctx>width;

393 
int height = s>avctx>height;

394  
395 
s>keyframe = !(buf[0] & 1); 
396 
s>profile = (buf[0]>>1) & 7; 
397 
s>invisible = !(buf[0] & 0x10); 
398 
header_size = AV_RL24(buf) >> 5;

399 
buf += 3;

400 
buf_size = 3;

401  
402 
if (s>profile > 3) 
403 
av_log(s>avctx, AV_LOG_WARNING, "Unknown profile %d\n", s>profile);

404  
405 
if (!s>profile)

406 
memcpy(s>put_pixels_tab, s>vp8dsp.put_vp8_epel_pixels_tab, sizeof(s>put_pixels_tab));

407 
else // profile 13 use bilinear, 4+ aren't defined so whatever 
408 
memcpy(s>put_pixels_tab, s>vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s>put_pixels_tab));

409  
410 
if (header_size > buf_size  7*s>keyframe) { 
411 
av_log(s>avctx, AV_LOG_ERROR, "Header size larger than data provided\n");

412 
return AVERROR_INVALIDDATA;

413 
} 
414  
415 
if (s>keyframe) {

416 
if (AV_RL24(buf) != 0x2a019d) { 
417 
av_log(s>avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));

418 
return AVERROR_INVALIDDATA;

419 
} 
420 
width = AV_RL16(buf+3) & 0x3fff; 
421 
height = AV_RL16(buf+5) & 0x3fff; 
422 
hscale = buf[4] >> 6; 
423 
vscale = buf[6] >> 6; 
424 
buf += 7;

425 
buf_size = 7;

426  
427 
if (hscale  vscale)

428 
av_log_missing_feature(s>avctx, "Upscaling", 1); 
429  
430 
s>update_golden = s>update_altref = VP56_FRAME_CURRENT; 
431 
memcpy(s>prob>token , vp8_token_default_probs , sizeof(s>prob>token));

432 
memcpy(s>prob>pred16x16, vp8_pred16x16_prob_inter, sizeof(s>prob>pred16x16));

433 
memcpy(s>prob>pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s>prob>pred8x8c));

434 
memcpy(s>prob>mvc , vp8_mv_default_prob , sizeof(s>prob>mvc));

435 
memset(&s>segmentation, 0, sizeof(s>segmentation)); 
436 
} 
437  
438 
if (!s>macroblocks_base  /* first frame */ 
439 
width != s>avctx>width  height != s>avctx>height) { 
440 
if ((ret = update_dimensions(s, width, height) < 0)) 
441 
return ret;

442 
} 
443  
444 
vp56_init_range_decoder(c, buf, header_size); 
445 
buf += header_size; 
446 
buf_size = header_size; 
447  
448 
if (s>keyframe) {

449 
if (vp8_rac_get(c))

450 
av_log(s>avctx, AV_LOG_WARNING, "Unspecified colorspace\n");

451 
vp8_rac_get(c); // whether we can skip clamping in dsp functions

452 
} 
453  
454 
if ((s>segmentation.enabled = vp8_rac_get(c)))

455 
parse_segment_info(s); 
456 
else

457 
s>segmentation.update_map = 0; // FIXME: move this to some init function? 
458  
459 
s>filter.simple = vp8_rac_get(c); 
460 
s>filter.level = vp8_rac_get_uint(c, 6);

461 
s>filter.sharpness = vp8_rac_get_uint(c, 3);

462  
463 
if ((s>lf_delta.enabled = vp8_rac_get(c)))

464 
if (vp8_rac_get(c))

465 
update_lf_deltas(s); 
466  
467 
if (setup_partitions(s, buf, buf_size)) {

468 
av_log(s>avctx, AV_LOG_ERROR, "Invalid partitions\n");

469 
return AVERROR_INVALIDDATA;

470 
} 
471  
472 
get_quants(s); 
473  
474 
if (!s>keyframe) {

475 
update_refs(s); 
476 
s>sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c); 
477 
s>sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);

478 
} 
479  
480 
// if we aren't saving this frame's probabilities for future frames,

481 
// make a copy of the current probabilities

482 
if (!(s>update_probabilities = vp8_rac_get(c)))

483 
s>prob[1] = s>prob[0]; 
484  
485 
s>update_last = s>keyframe  vp8_rac_get(c); 
486  
487 
for (i = 0; i < 4; i++) 
488 
for (j = 0; j < 8; j++) 
489 
for (k = 0; k < 3; k++) 
490 
for (l = 0; l < NUM_DCT_TOKENS1; l++) 
491 
if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l]))

492 
s>prob>token[i][j][k][l] = vp8_rac_get_uint(c, 8);

493  
494 
if ((s>mbskip_enabled = vp8_rac_get(c)))

495 
s>prob>mbskip = vp8_rac_get_uint(c, 8);

496  
497 
if (!s>keyframe) {

498 
s>prob>intra = vp8_rac_get_uint(c, 8);

499 
s>prob>last = vp8_rac_get_uint(c, 8);

500 
s>prob>golden = vp8_rac_get_uint(c, 8);

501  
502 
if (vp8_rac_get(c))

503 
for (i = 0; i < 4; i++) 
504 
s>prob>pred16x16[i] = vp8_rac_get_uint(c, 8);

505 
if (vp8_rac_get(c))

506 
for (i = 0; i < 3; i++) 
507 
s>prob>pred8x8c[i] = vp8_rac_get_uint(c, 8);

508  
509 
// 17.2 MV probability update

510 
for (i = 0; i < 2; i++) 
511 
for (j = 0; j < 19; j++) 
512 
if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))

513 
s>prob>mvc[i][j] = vp8_rac_get_nn(c); 
514 
} 
515  
516 
return 0; 
517 
} 
518  
519 
static av_always_inline

520 
void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src, int mb_x, int mb_y) 
521 
{ 
522 
#define MARGIN (16 << 2) 
523 
dst>x = av_clip(src>x, ((mb_x << 6) + MARGIN),

524 
((s>mb_width  1  mb_x) << 6) + MARGIN); 
525 
dst>y = av_clip(src>y, ((mb_y << 6) + MARGIN),

526 
((s>mb_height  1  mb_y) << 6) + MARGIN); 
527 
} 
528  
529 
static av_always_inline

530 
void find_near_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, 
531 
VP56mv near[2], VP56mv *best, uint8_t cnt[4]) 
532 
{ 
533 
VP8Macroblock *mb_edge[3] = { mb + 2 /* top */, 
534 
mb  1 /* left */, 
535 
mb + 1 /* topleft */ }; 
536 
enum { EDGE_TOP, EDGE_LEFT, EDGE_TOPLEFT };

537 
VP56mv near_mv[4] = {{ 0 }}; 
538 
enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };

539 
int idx = CNT_ZERO;

540 
int best_idx = CNT_ZERO;

541 
int cur_sign_bias = s>sign_bias[mb>ref_frame];

542 
int *sign_bias = s>sign_bias;

543  
544 
/* Process MB on top, left and topleft */

545 
#define MV_EDGE_CHECK(n)\

546 
{\ 
547 
VP8Macroblock *edge = mb_edge[n];\ 
548 
int edge_ref = edge>ref_frame;\

549 
if (edge_ref != VP56_FRAME_CURRENT) {\

550 
uint32_t mv = AV_RN32A(&edge>mv);\ 
551 
if (mv) {\

552 
if (cur_sign_bias != sign_bias[edge_ref]) {\

553 
/* SWAR negate of the values in mv. */\

554 
mv = ~mv;\ 
555 
mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\ 
556 
}\ 
557 
if (!n  mv != AV_RN32A(&near_mv[idx]))\

558 
AV_WN32A(&near_mv[++idx], mv);\ 
559 
cnt[idx] += 1 + (n != 2);\ 
560 
} else\

561 
cnt[CNT_ZERO] += 1 + (n != 2);\ 
562 
}\ 
563 
} 
564 
MV_EDGE_CHECK(0)

565 
MV_EDGE_CHECK(1)

566 
MV_EDGE_CHECK(2)

567  
568 
/* If we have three distinct MVs, merge first and last if they're the same */

569 
if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1+EDGE_TOP]) == AV_RN32A(&near_mv[1+EDGE_TOPLEFT])) 
570 
cnt[CNT_NEAREST] += 1;

571  
572 
cnt[CNT_SPLITMV] = ((mb_edge[EDGE_LEFT]>mode == VP8_MVMODE_SPLIT) + 
573 
(mb_edge[EDGE_TOP]>mode == VP8_MVMODE_SPLIT)) * 2 +

574 
(mb_edge[EDGE_TOPLEFT]>mode == VP8_MVMODE_SPLIT); 
575  
576 
/* Swap near and nearest if necessary */

577 
if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {

578 
FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]); 
579 
FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]); 
580 
} 
581  
582 
/* Choose the best mv out of 0,0 and the nearest mv */

583 
if (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])

584 
best_idx = CNT_NEAREST; 
585  
586 
mb>mv = near_mv[best_idx]; 
587 
near[0] = near_mv[CNT_NEAREST];

588 
near[1] = near_mv[CNT_NEAR];

589 
} 
590  
591 
/**

592 
* Motion vector coding, 17.1.

593 
*/

594 
static int read_mv_component(VP56RangeCoder *c, const uint8_t *p) 
595 
{ 
596 
int bit, x = 0; 
597  
598 
if (vp56_rac_get_prob_branchy(c, p[0])) { 
599 
int i;

600  
601 
for (i = 0; i < 3; i++) 
602 
x += vp56_rac_get_prob(c, p[9 + i]) << i;

603 
for (i = 9; i > 3; i) 
604 
x += vp56_rac_get_prob(c, p[9 + i]) << i;

605 
if (!(x & 0xFFF0)  vp56_rac_get_prob(c, p[12])) 
606 
x += 8;

607 
} else {

608 
// small_mvtree

609 
const uint8_t *ps = p+2; 
610 
bit = vp56_rac_get_prob(c, *ps); 
611 
ps += 1 + 3*bit; 
612 
x += 4*bit;

613 
bit = vp56_rac_get_prob(c, *ps); 
614 
ps += 1 + bit;

615 
x += 2*bit;

616 
x += vp56_rac_get_prob(c, *ps); 
617 
} 
618  
619 
return (x && vp56_rac_get_prob(c, p[1])) ? x : x; 
620 
} 
621  
622 
static av_always_inline

623 
const uint8_t *get_submv_prob(uint32_t left, uint32_t top)

624 
{ 
625 
if (left == top)

626 
return vp8_submv_prob[4!!left]; 
627 
if (!top)

628 
return vp8_submv_prob[2]; 
629 
return vp8_submv_prob[1!!left]; 
630 
} 
631  
632 
/**

633 
* Split motion vector prediction, 16.4.

634 
* @returns the number of motion vectors parsed (2, 4 or 16)

635 
*/

636 
static av_always_inline

637 
int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb)

638 
{ 
639 
int part_idx = mb>partitioning =

640 
vp8_rac_get_tree(c, vp8_mbsplit_tree, vp8_mbsplit_prob); 
641 
int n, num = vp8_mbsplit_count[part_idx];

642 
VP8Macroblock *top_mb = &mb[2];

643 
VP8Macroblock *left_mb = &mb[1];

644 
const uint8_t *mbsplits_left = vp8_mbsplits[left_mb>partitioning],

645 
*mbsplits_top = vp8_mbsplits[top_mb>partitioning], 
646 
*mbsplits_cur = vp8_mbsplits[part_idx], 
647 
*firstidx = vp8_mbfirstidx[part_idx]; 
648 
VP56mv *top_mv = top_mb>bmv; 
649 
VP56mv *left_mv = left_mb>bmv; 
650 
VP56mv *cur_mv = mb>bmv; 
651  
652 
for (n = 0; n < num; n++) { 
653 
int k = firstidx[n];

654 
uint32_t left, above; 
655 
const uint8_t *submv_prob;

656  
657 
if (!(k & 3)) 
658 
left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);

659 
else

660 
left = AV_RN32A(&cur_mv[mbsplits_cur[k  1]]);

661 
if (k <= 3) 
662 
above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);

663 
else

664 
above = AV_RN32A(&cur_mv[mbsplits_cur[k  4]]);

665  
666 
submv_prob = get_submv_prob(left, above); 
667  
668 
switch (vp8_rac_get_tree(c, vp8_submv_ref_tree, submv_prob)) {

669 
case VP8_SUBMVMODE_NEW4X4:

670 
mb>bmv[n].y = mb>mv.y + read_mv_component(c, s>prob>mvc[0]);

671 
mb>bmv[n].x = mb>mv.x + read_mv_component(c, s>prob>mvc[1]);

672 
break;

673 
case VP8_SUBMVMODE_ZERO4X4:

674 
AV_ZERO32(&mb>bmv[n]); 
675 
break;

676 
case VP8_SUBMVMODE_LEFT4X4:

677 
AV_WN32A(&mb>bmv[n], left); 
678 
break;

679 
case VP8_SUBMVMODE_TOP4X4:

680 
AV_WN32A(&mb>bmv[n], above); 
681 
break;

682 
} 
683 
} 
684  
685 
return num;

686 
} 
687  
688 
static av_always_inline

689 
void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c,

690 
int mb_x, int keyframe) 
691 
{ 
692 
uint8_t *intra4x4 = s>intra4x4_pred_mode_mb; 
693 
if (keyframe) {

694 
int x, y;

695 
uint8_t* const top = s>intra4x4_pred_mode_top + 4 * mb_x; 
696 
uint8_t* const left = s>intra4x4_pred_mode_left;

697 
for (y = 0; y < 4; y++) { 
698 
for (x = 0; x < 4; x++) { 
699 
const uint8_t *ctx;

700 
ctx = vp8_pred4x4_prob_intra[top[x]][left[y]]; 
701 
*intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx); 
702 
left[y] = top[x] = *intra4x4; 
703 
intra4x4++; 
704 
} 
705 
} 
706 
} else {

707 
int i;

708 
for (i = 0; i < 16; i++) 
709 
intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter); 
710 
} 
711 
} 
712  
713 
static av_always_inline

714 
void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_t *segment) 
715 
{ 
716 
VP56RangeCoder *c = &s>c; 
717  
718 
if (s>segmentation.update_map)

719 
*segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s>prob>segmentid); 
720 
s>segment = *segment; 
721  
722 
mb>skip = s>mbskip_enabled ? vp56_rac_get_prob(c, s>prob>mbskip) : 0;

723  
724 
if (s>keyframe) {

725 
mb>mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra); 
726  
727 
if (mb>mode == MODE_I4x4) {

728 
decode_intra4x4_modes(s, c, mb_x, 1);

729 
} else {

730 
const uint32_t modes = vp8_pred4x4_mode[mb>mode] * 0x01010101u; 
731 
AV_WN32A(s>intra4x4_pred_mode_top + 4 * mb_x, modes);

732 
AV_WN32A(s>intra4x4_pred_mode_left, modes); 
733 
} 
734  
735 
s>chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra); 
736 
mb>ref_frame = VP56_FRAME_CURRENT; 
737 
} else if (vp56_rac_get_prob_branchy(c, s>prob>intra)) { 
738 
VP56mv near[2], best;

739 
uint8_t cnt[4] = { 0 }; 
740 
uint8_t p[4];

741  
742 
// inter MB, 16.2

743 
if (vp56_rac_get_prob_branchy(c, s>prob>last))

744 
mb>ref_frame = vp56_rac_get_prob(c, s>prob>golden) ? 
745 
VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;

746 
else

747 
mb>ref_frame = VP56_FRAME_PREVIOUS; 
748 
s>ref_count[mb>ref_frame1]++;

749  
750 
// motion vectors, 16.3

751 
find_near_mvs(s, mb, mb_x, mb_y, near, &best, cnt); 
752 
p[0] = vp8_mode_contexts[cnt[0]][0]; 
753 
p[1] = vp8_mode_contexts[cnt[1]][1]; 
754 
p[2] = vp8_mode_contexts[cnt[2]][2]; 
755 
p[3] = vp8_mode_contexts[cnt[3]][3]; 
756 
mb>mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_mvinter, p); 
757 
switch (mb>mode) {

758 
case VP8_MVMODE_SPLIT:

759 
clamp_mv(s, &mb>mv, &mb>mv, mb_x, mb_y); 
760 
mb>mv = mb>bmv[decode_splitmvs(s, c, mb)  1];

761 
break;

762 
case VP8_MVMODE_ZERO:

763 
AV_ZERO32(&mb>mv); 
764 
break;

765 
case VP8_MVMODE_NEAREST:

766 
clamp_mv(s, &mb>mv, &near[0], mb_x, mb_y);

767 
break;

768 
case VP8_MVMODE_NEAR:

769 
clamp_mv(s, &mb>mv, &near[1], mb_x, mb_y);

770 
break;

771 
case VP8_MVMODE_NEW:

772 
clamp_mv(s, &mb>mv, &mb>mv, mb_x, mb_y); 
773 
mb>mv.y += + read_mv_component(c, s>prob>mvc[0]);

774 
mb>mv.x += + read_mv_component(c, s>prob>mvc[1]);

775 
break;

776 
} 
777 
if (mb>mode != VP8_MVMODE_SPLIT) {

778 
mb>partitioning = VP8_SPLITMVMODE_NONE; 
779 
mb>bmv[0] = mb>mv;

780 
} 
781 
} else {

782 
// intra MB, 16.1

783 
mb>mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s>prob>pred16x16); 
784  
785 
if (mb>mode == MODE_I4x4)

786 
decode_intra4x4_modes(s, c, mb_x, 0);

787  
788 
s>chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s>prob>pred8x8c); 
789 
mb>ref_frame = VP56_FRAME_CURRENT; 
790 
mb>partitioning = VP8_SPLITMVMODE_NONE; 
791 
AV_ZERO32(&mb>bmv[0]);

792 
} 
793 
} 
794  
795 
/**

796 
* @param c arithmetic bitstream reader context

797 
* @param block destination for block coefficients

798 
* @param probs probabilities to use when reading trees from the bitstream

799 
* @param i initial coeff index, 0 unless a separate DC block is coded

800 
* @param zero_nhood the initial prediction context for number of surrounding

801 
* allzero blocks (only left/top, so 02)

802 
* @param qmul array holding the dc/ac dequant factor at position 0/1

803 
* @return 0 if no coeffs were decoded

804 
* otherwise, the index of the last coeff decoded plus one

805 
*/

806 
static int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16], 
807 
uint8_t probs[8][3][NUM_DCT_TOKENS1], 
808 
int i, int zero_nhood, int16_t qmul[2]) 
809 
{ 
810 
uint8_t *token_prob = probs[vp8_coeff_band[i]][zero_nhood]; 
811 
int nonzero = 0; 
812 
int coeff;

813  
814 
do {

815 
if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB 
816 
return nonzero;

817  
818 
skip_eob:

819 
if (!vp56_rac_get_prob_branchy(c, token_prob[1])) { // DCT_0 
820 
if (++i == 16) 
821 
return nonzero; // invalid input; blocks should end with EOB 
822 
token_prob = probs[vp8_coeff_band[i]][0];

823 
goto skip_eob;

824 
} 
825  
826 
if (!vp56_rac_get_prob_branchy(c, token_prob[2])) { // DCT_1 
827 
coeff = 1;

828 
token_prob = probs[vp8_coeff_band[i+1]][1]; 
829 
} else {

830 
if (!vp56_rac_get_prob_branchy(c, token_prob[3])) { // DCT 2,3,4 
831 
coeff = vp56_rac_get_prob(c, token_prob[4]);

832 
if (coeff)

833 
coeff += vp56_rac_get_prob(c, token_prob[5]);

834 
coeff += 2;

835 
} else {

836 
// DCT_CAT*

837 
if (!vp56_rac_get_prob_branchy(c, token_prob[6])) { 
838 
if (!vp56_rac_get_prob_branchy(c, token_prob[7])) { // DCT_CAT1 
839 
coeff = 5 + vp56_rac_get_prob(c, vp8_dct_cat1_prob[0]); 
840 
} else { // DCT_CAT2 
841 
coeff = 7;

842 
coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[0]) << 1; 
843 
coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[1]);

844 
} 
845 
} else { // DCT_CAT3 and up 
846 
int a = vp56_rac_get_prob(c, token_prob[8]); 
847 
int b = vp56_rac_get_prob(c, token_prob[9+a]); 
848 
int cat = (a<<1) + b; 
849 
coeff = 3 + (8<<cat); 
850 
coeff += vp8_rac_get_coeff(c, vp8_dct_cat_prob[cat]); 
851 
} 
852 
} 
853 
token_prob = probs[vp8_coeff_band[i+1]][2]; 
854 
} 
855  
856 
// todo: full [16] qmat? load into register?

857 
block[zigzag_scan[i]] = (vp8_rac_get(c) ? coeff : coeff) * qmul[!!i]; 
858 
nonzero = ++i; 
859 
} while (i < 16); 
860  
861 
return nonzero;

862 
} 
863  
864 
static av_always_inline

865 
void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,

866 
uint8_t t_nnz[9], uint8_t l_nnz[9]) 
867 
{ 
868 
int i, x, y, luma_start = 0, luma_ctx = 3; 
869 
int nnz_pred, nnz, nnz_total = 0; 
870 
int segment = s>segment;

871 
int block_dc = 0; 
872  
873 
if (mb>mode != MODE_I4x4 && mb>mode != VP8_MVMODE_SPLIT) {

874 
nnz_pred = t_nnz[8] + l_nnz[8]; 
875  
876 
// decode DC values and do hadamard

877 
nnz = decode_block_coeffs(c, s>block_dc, s>prob>token[1], 0, nnz_pred, 
878 
s>qmat[segment].luma_dc_qmul); 
879 
l_nnz[8] = t_nnz[8] = !!nnz; 
880 
if (nnz) {

881 
nnz_total += nnz; 
882 
block_dc = 1;

883 
if (nnz == 1) 
884 
s>vp8dsp.vp8_luma_dc_wht_dc(s>block, s>block_dc); 
885 
else

886 
s>vp8dsp.vp8_luma_dc_wht(s>block, s>block_dc); 
887 
} 
888 
luma_start = 1;

889 
luma_ctx = 0;

890 
} 
891  
892 
// luma blocks

893 
for (y = 0; y < 4; y++) 
894 
for (x = 0; x < 4; x++) { 
895 
nnz_pred = l_nnz[y] + t_nnz[x]; 
896 
nnz = decode_block_coeffs(c, s>block[y][x], s>prob>token[luma_ctx], luma_start, 
897 
nnz_pred, s>qmat[segment].luma_qmul); 
898 
// nnz+block_dc may be one more than the actual last index, but we don't care

899 
s>non_zero_count_cache[y][x] = nnz + block_dc; 
900 
t_nnz[x] = l_nnz[y] = !!nnz; 
901 
nnz_total += nnz; 
902 
} 
903  
904 
// chroma blocks

905 
// TODO: what to do about dimensions? 2nd dim for luma is x,

906 
// but for chroma it's (y<<1)x

907 
for (i = 4; i < 6; i++) 
908 
for (y = 0; y < 2; y++) 
909 
for (x = 0; x < 2; x++) { 
910 
nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x]; 
911 
nnz = decode_block_coeffs(c, s>block[i][(y<<1)+x], s>prob>token[2], 0, 
912 
nnz_pred, s>qmat[segment].chroma_qmul); 
913 
s>non_zero_count_cache[i][(y<<1)+x] = nnz;

914 
t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz; 
915 
nnz_total += nnz; 
916 
} 
917  
918 
// if there were no coded coeffs despite the macroblock not being marked skip,

919 
// we MUST not do the inner loop filter and should not do IDCT

920 
// Since skip isn't used for bitstream prediction, just manually set it.

921 
if (!nnz_total)

922 
mb>skip = 1;

923 
} 
924  
925 
static av_always_inline

926 
void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,

927 
int linesize, int uvlinesize, int simple) 
928 
{ 
929 
AV_COPY128(top_border, src_y + 15*linesize);

930 
if (!simple) {

931 
AV_COPY64(top_border+16, src_cb + 7*uvlinesize); 
932 
AV_COPY64(top_border+24, src_cr + 7*uvlinesize); 
933 
} 
934 
} 
935  
936 
static av_always_inline

937 
void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,

938 
int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width, 
939 
int simple, int xchg) 
940 
{ 
941 
uint8_t *top_border_m1 = top_border32; // for TL prediction 
942 
src_y = linesize; 
943 
src_cb = uvlinesize; 
944 
src_cr = uvlinesize; 
945  
946 
#define XCHG(a,b,xchg) do { \ 
947 
if (xchg) AV_SWAP64(b,a); \

948 
else AV_COPY64(b,a); \

949 
} while (0) 
950  
951 
XCHG(top_border_m1+8, src_y8, xchg); 
952 
XCHG(top_border, src_y, xchg); 
953 
XCHG(top_border+8, src_y+8, 1); 
954 
if (mb_x < mb_width1) 
955 
XCHG(top_border+32, src_y+16, 1); 
956  
957 
// only copy chroma for normal loop filter

958 
// or to initialize the top row to 127

959 
if (!simple  !mb_y) {

960 
XCHG(top_border_m1+16, src_cb8, xchg); 
961 
XCHG(top_border_m1+24, src_cr8, xchg); 
962 
XCHG(top_border+16, src_cb, 1); 
963 
XCHG(top_border+24, src_cr, 1); 
964 
} 
965 
} 
966  
967 
static av_always_inline

968 
int check_intra_pred_mode(int mode, int mb_x, int mb_y) 
969 
{ 
970 
if (mode == DC_PRED8x8) {

971 
if (!mb_x) {

972 
mode = mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8; 
973 
} else if (!mb_y) { 
974 
mode = LEFT_DC_PRED8x8; 
975 
} 
976 
} 
977 
return mode;

978 
} 
979  
980 
static av_always_inline

981 
void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, 
982 
int mb_x, int mb_y) 
983 
{ 
984 
int x, y, mode, nnz, tr;

985  
986 
// for the first row, we need to run xchg_mb_border to init the top edge to 127

987 
// otherwise, skip it if we aren't going to deblock

988 
if (s>deblock_filter  !mb_y)

989 
xchg_mb_border(s>top_border[mb_x+1], dst[0], dst[1], dst[2], 
990 
s>linesize, s>uvlinesize, mb_x, mb_y, s>mb_width, 
991 
s>filter.simple, 1);

992  
993 
if (mb>mode < MODE_I4x4) {

994 
mode = check_intra_pred_mode(mb>mode, mb_x, mb_y); 
995 
s>hpc.pred16x16[mode](dst[0], s>linesize);

996 
} else {

997 
uint8_t *ptr = dst[0];

998 
uint8_t *intra4x4 = s>intra4x4_pred_mode_mb; 
999  
1000 
// all blocks on the right edge of the macroblock use bottom edge

1001 
// the top macroblock for their topright edge

1002 
uint8_t *tr_right = ptr  s>linesize + 16;

1003  
1004 
// if we're on the right edge of the frame, said edge is extended

1005 
// from the top macroblock

1006 
if (mb_x == s>mb_width1) { 
1007 
tr = tr_right[1]*0x01010101; 
1008 
tr_right = (uint8_t *)&tr; 
1009 
} 
1010  
1011 
if (mb>skip)

1012 
AV_ZERO128(s>non_zero_count_cache); 
1013  
1014 
for (y = 0; y < 4; y++) { 
1015 
uint8_t *topright = ptr + 4  s>linesize;

1016 
for (x = 0; x < 4; x++) { 
1017 
if (x == 3) 
1018 
topright = tr_right; 
1019  
1020 
s>hpc.pred4x4[intra4x4[x]](ptr+4*x, topright, s>linesize);

1021  
1022 
nnz = s>non_zero_count_cache[y][x]; 
1023 
if (nnz) {

1024 
if (nnz == 1) 
1025 
s>vp8dsp.vp8_idct_dc_add(ptr+4*x, s>block[y][x], s>linesize);

1026 
else

1027 
s>vp8dsp.vp8_idct_add(ptr+4*x, s>block[y][x], s>linesize);

1028 
} 
1029 
topright += 4;

1030 
} 
1031  
1032 
ptr += 4*s>linesize;

1033 
intra4x4 += 4;

1034 
} 
1035 
} 
1036  
1037 
mode = check_intra_pred_mode(s>chroma_pred_mode, mb_x, mb_y); 
1038 
s>hpc.pred8x8[mode](dst[1], s>uvlinesize);

1039 
s>hpc.pred8x8[mode](dst[2], s>uvlinesize);

1040  
1041 
if (s>deblock_filter  !mb_y)

1042 
xchg_mb_border(s>top_border[mb_x+1], dst[0], dst[1], dst[2], 
1043 
s>linesize, s>uvlinesize, mb_x, mb_y, s>mb_width, 
1044 
s>filter.simple, 0);

1045 
} 
1046  
1047 
/**

1048 
* Generic MC function.

1049 
*

1050 
* @param s VP8 decoding context

1051 
* @param luma 1 for luma (Y) planes, 0 for chroma (Cb/Cr) planes

1052 
* @param dst target buffer for block data at block position

1053 
* @param src reference picture buffer at origin (0, 0)

1054 
* @param mv motion vector (relative to block position) to get pixel data from

1055 
* @param x_off horizontal position of block from origin (0, 0)

1056 
* @param y_off vertical position of block from origin (0, 0)

1057 
* @param block_w width of block (16, 8 or 4)

1058 
* @param block_h height of block (always same as block_w)

1059 
* @param width width of src/dst plane data

1060 
* @param height height of src/dst plane data

1061 
* @param linesize size of a single line of plane data, including padding

1062 
* @param mc_func motion compensation function pointers (bilinear or sixtap MC)

1063 
*/

1064 
static av_always_inline

1065 
void vp8_mc(VP8Context *s, int luma, 
1066 
uint8_t *dst, uint8_t *src, const VP56mv *mv,

1067 
int x_off, int y_off, int block_w, int block_h, 
1068 
int width, int height, int linesize, 
1069 
vp8_mc_func mc_func[3][3]) 
1070 
{ 
1071 
if (AV_RN32A(mv)) {

1072 
static const uint8_t idx[8] = { 0, 1, 2, 1, 2, 1, 2, 1 }; 
1073 
int mx = (mv>x << luma)&7, mx_idx = idx[mx]; 
1074 
int my = (mv>y << luma)&7, my_idx = idx[my]; 
1075  
1076 
x_off += mv>x >> (3  luma);

1077 
y_off += mv>y >> (3  luma);

1078  
1079 
// edge emulation

1080 
src += y_off * linesize + x_off; 
1081 
if (x_off < 2  x_off >= width  block_w  3  
1082 
y_off < 2  y_off >= height  block_h  3) { 
1083 
ff_emulated_edge_mc(s>edge_emu_buffer, src  2 * linesize  2, linesize, 
1084 
block_w + 5, block_h + 5, 
1085 
x_off  2, y_off  2, width, height); 
1086 
src = s>edge_emu_buffer + 2 + linesize * 2; 
1087 
} 
1088 
mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my); 
1089 
} else

1090 
mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0); 
1091 
} 
1092  
1093 
static av_always_inline

1094 
void vp8_mc_part(VP8Context *s, uint8_t *dst[3], 
1095 
AVFrame *ref_frame, int x_off, int y_off, 
1096 
int bx_off, int by_off, 
1097 
int block_w, int block_h, 
1098 
int width, int height, VP56mv *mv) 
1099 
{ 
1100 
VP56mv uvmv = *mv; 
1101  
1102 
/* Y */

1103 
vp8_mc(s, 1, dst[0] + by_off * s>linesize + bx_off, 
1104 
ref_frame>data[0], mv, x_off + bx_off, y_off + by_off,

1105 
block_w, block_h, width, height, s>linesize, 
1106 
s>put_pixels_tab[block_w == 8]);

1107  
1108 
/* U/V */

1109 
if (s>profile == 3) { 
1110 
uvmv.x &= ~7;

1111 
uvmv.y &= ~7;

1112 
} 
1113 
x_off >>= 1; y_off >>= 1; 
1114 
bx_off >>= 1; by_off >>= 1; 
1115 
width >>= 1; height >>= 1; 
1116 
block_w >>= 1; block_h >>= 1; 
1117 
vp8_mc(s, 0, dst[1] + by_off * s>uvlinesize + bx_off, 
1118 
ref_frame>data[1], &uvmv, x_off + bx_off, y_off + by_off,

1119 
block_w, block_h, width, height, s>uvlinesize, 
1120 
s>put_pixels_tab[1 + (block_w == 4)]); 
1121 
vp8_mc(s, 0, dst[2] + by_off * s>uvlinesize + bx_off, 
1122 
ref_frame>data[2], &uvmv, x_off + bx_off, y_off + by_off,

1123 
block_w, block_h, width, height, s>uvlinesize, 
1124 
s>put_pixels_tab[1 + (block_w == 4)]); 
1125 
} 
1126  
1127 
/* Fetch pixels for estimated mv 4 macroblocks ahead.

1128 
* Optimized for 64byte cache lines. Inspired by ffh264 prefetch_motion. */

1129 
static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref) 
1130 
{ 
1131 
/* Don't prefetch refs that haven't been used very often this frame. */

1132 
if (s>ref_count[ref1] > (mb_xy >> 5)) { 
1133 
int x_off = mb_x << 4, y_off = mb_y << 4; 
1134 
int mx = mb>mv.x + x_off + 8; 
1135 
int my = mb>mv.y + y_off;

1136 
uint8_t **src= s>framep[ref]>data; 
1137 
int off= mx + (my + (mb_x&3)*4)*s>linesize + 64; 
1138 
s>dsp.prefetch(src[0]+off, s>linesize, 4); 
1139 
off= (mx>>1) + ((my>>1) + (mb_x&7))*s>uvlinesize + 64; 
1140 
s>dsp.prefetch(src[1]+off, src[2]src[1], 2); 
1141 
} 
1142 
} 
1143  
1144 
/**

1145 
* Apply motion vectors to prediction buffer, chapter 18.

1146 
*/

1147 
static av_always_inline

1148 
void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, 
1149 
int mb_x, int mb_y) 
1150 
{ 
1151 
int x_off = mb_x << 4, y_off = mb_y << 4; 
1152 
int width = 16*s>mb_width, height = 16*s>mb_height; 
1153 
AVFrame *ref = s>framep[mb>ref_frame]; 
1154 
VP56mv *bmv = mb>bmv; 
1155  
1156 
if (mb>mode < VP8_MVMODE_SPLIT) {

1157 
vp8_mc_part(s, dst, ref, x_off, y_off, 
1158 
0, 0, 16, 16, width, height, &mb>mv); 
1159 
} else switch (mb>partitioning) { 
1160 
case VP8_SPLITMVMODE_4x4: {

1161 
int x, y;

1162 
VP56mv uvmv; 
1163  
1164 
/* Y */

1165 
for (y = 0; y < 4; y++) { 
1166 
for (x = 0; x < 4; x++) { 
1167 
vp8_mc(s, 1, dst[0] + 4*y*s>linesize + x*4, 
1168 
ref>data[0], &bmv[4*y + x], 
1169 
4*x + x_off, 4*y + y_off, 4, 4, 
1170 
width, height, s>linesize, 
1171 
s>put_pixels_tab[2]);

1172 
} 
1173 
} 
1174  
1175 
/* U/V */

1176 
x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1; 
1177 
for (y = 0; y < 2; y++) { 
1178 
for (x = 0; x < 2; x++) { 
1179 
uvmv.x = mb>bmv[ 2*y * 4 + 2*x ].x + 
1180 
mb>bmv[ 2*y * 4 + 2*x+1].x + 
1181 
mb>bmv[(2*y+1) * 4 + 2*x ].x + 
1182 
mb>bmv[(2*y+1) * 4 + 2*x+1].x; 
1183 
uvmv.y = mb>bmv[ 2*y * 4 + 2*x ].y + 
1184 
mb>bmv[ 2*y * 4 + 2*x+1].y + 
1185 
mb>bmv[(2*y+1) * 4 + 2*x ].y + 
1186 
mb>bmv[(2*y+1) * 4 + 2*x+1].y; 
1187 
uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT1))) >> 2; 
1188 
uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT1))) >> 2; 
1189 
if (s>profile == 3) { 
1190 
uvmv.x &= ~7;

1191 
uvmv.y &= ~7;

1192 
} 
1193 
vp8_mc(s, 0, dst[1] + 4*y*s>uvlinesize + x*4, 
1194 
ref>data[1], &uvmv,

1195 
4*x + x_off, 4*y + y_off, 4, 4, 
1196 
width, height, s>uvlinesize, 
1197 
s>put_pixels_tab[2]);

1198 
vp8_mc(s, 0, dst[2] + 4*y*s>uvlinesize + x*4, 
1199 
ref>data[2], &uvmv,

1200 
4*x + x_off, 4*y + y_off, 4, 4, 
1201 
width, height, s>uvlinesize, 
1202 
s>put_pixels_tab[2]);

1203 
} 
1204 
} 
1205 
break;

1206 
} 
1207 
case VP8_SPLITMVMODE_16x8:

1208 
vp8_mc_part(s, dst, ref, x_off, y_off, 
1209 
0, 0, 16, 8, width, height, &bmv[0]); 
1210 
vp8_mc_part(s, dst, ref, x_off, y_off, 
1211 
0, 8, 16, 8, width, height, &bmv[1]); 
1212 
break;

1213 
case VP8_SPLITMVMODE_8x16:

1214 
vp8_mc_part(s, dst, ref, x_off, y_off, 
1215 
0, 0, 8, 16, width, height, &bmv[0]); 
1216 
vp8_mc_part(s, dst, ref, x_off, y_off, 
1217 
8, 0, 8, 16, width, height, &bmv[1]); 
1218 
break;

1219 
case VP8_SPLITMVMODE_8x8:

1220 
vp8_mc_part(s, dst, ref, x_off, y_off, 
1221 
0, 0, 8, 8, width, height, &bmv[0]); 
1222 
vp8_mc_part(s, dst, ref, x_off, y_off, 
1223 
8, 0, 8, 8, width, height, &bmv[1]); 
1224 
vp8_mc_part(s, dst, ref, x_off, y_off, 
1225 
0, 8, 8, 8, width, height, &bmv[2]); 
1226 
vp8_mc_part(s, dst, ref, x_off, y_off, 
1227 
8, 8, 8, 8, width, height, &bmv[3]); 
1228 
break;

1229 
} 
1230 
} 
1231  
1232 
static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb) 
1233 
{ 
1234 
int x, y, ch;

1235  
1236 
if (mb>mode != MODE_I4x4) {

1237 
uint8_t *y_dst = dst[0];

1238 
for (y = 0; y < 4; y++) { 
1239 
uint32_t nnz4 = AV_RN32A(s>non_zero_count_cache[y]); 
1240 
if (nnz4) {

1241 
if (nnz4&~0x01010101) { 
1242 
for (x = 0; x < 4; x++) { 
1243 
int nnz = s>non_zero_count_cache[y][x];

1244 
if (nnz) {

1245 
if (nnz == 1) 
1246 
s>vp8dsp.vp8_idct_dc_add(y_dst+4*x, s>block[y][x], s>linesize);

1247 
else

1248 
s>vp8dsp.vp8_idct_add(y_dst+4*x, s>block[y][x], s>linesize);

1249 
} 
1250 
} 
1251 
} else {

1252 
s>vp8dsp.vp8_idct_dc_add4y(y_dst, s>block[y], s>linesize); 
1253 
} 
1254 
} 
1255 
y_dst += 4*s>linesize;

1256 
} 
1257 
} 
1258  
1259 
for (ch = 0; ch < 2; ch++) { 
1260 
uint32_t nnz4 = AV_RN32A(s>non_zero_count_cache[4+ch]);

1261 
if (nnz4) {

1262 
uint8_t *ch_dst = dst[1+ch];

1263 
if (nnz4&~0x01010101) { 
1264 
for (y = 0; y < 2; y++) { 
1265 
for (x = 0; x < 2; x++) { 
1266 
int nnz = s>non_zero_count_cache[4+ch][(y<<1)+x]; 
1267 
if (nnz) {

1268 
if (nnz == 1) 
1269 
s>vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s>block[4+ch][(y<<1)+x], s>uvlinesize); 
1270 
else

1271 
s>vp8dsp.vp8_idct_add(ch_dst+4*x, s>block[4+ch][(y<<1)+x], s>uvlinesize); 
1272 
} 
1273 
} 
1274 
ch_dst += 4*s>uvlinesize;

1275 
} 
1276 
} else {

1277 
s>vp8dsp.vp8_idct_dc_add4uv(ch_dst, s>block[4+ch], s>uvlinesize);

1278 
} 
1279 
} 
1280 
} 
1281 
} 
1282  
1283 
static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f ) 
1284 
{ 
1285 
int interior_limit, filter_level;

1286  
1287 
if (s>segmentation.enabled) {

1288 
filter_level = s>segmentation.filter_level[s>segment]; 
1289 
if (!s>segmentation.absolute_vals)

1290 
filter_level += s>filter.level; 
1291 
} else

1292 
filter_level = s>filter.level; 
1293  
1294 
if (s>lf_delta.enabled) {

1295 
filter_level += s>lf_delta.ref[mb>ref_frame]; 
1296  
1297 
if (mb>ref_frame == VP56_FRAME_CURRENT) {

1298 
if (mb>mode == MODE_I4x4)

1299 
filter_level += s>lf_delta.mode[0];

1300 
} else {

1301 
if (mb>mode == VP8_MVMODE_ZERO)

1302 
filter_level += s>lf_delta.mode[1];

1303 
else if (mb>mode == VP8_MVMODE_SPLIT) 
1304 
filter_level += s>lf_delta.mode[3];

1305 
else

1306 
filter_level += s>lf_delta.mode[2];

1307 
} 
1308 
} 
1309 
filter_level = av_clip(filter_level, 0, 63); 
1310  
1311 
interior_limit = filter_level; 
1312 
if (s>filter.sharpness) {

1313 
interior_limit >>= s>filter.sharpness > 4 ? 2 : 1; 
1314 
interior_limit = FFMIN(interior_limit, 9  s>filter.sharpness);

1315 
} 
1316 
interior_limit = FFMAX(interior_limit, 1);

1317  
1318 
f>filter_level = filter_level; 
1319 
f>inner_limit = interior_limit; 
1320 
f>inner_filter = !mb>skip  mb>mode == MODE_I4x4  mb>mode == VP8_MVMODE_SPLIT; 
1321 
} 
1322  
1323 
static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y) 
1324 
{ 
1325 
int mbedge_lim, bedge_lim, hev_thresh;

1326 
int filter_level = f>filter_level;

1327 
int inner_limit = f>inner_limit;

1328 
int inner_filter = f>inner_filter;

1329 
int linesize = s>linesize;

1330 
int uvlinesize = s>uvlinesize;

1331  
1332 
if (!filter_level)

1333 
return;

1334  
1335 
mbedge_lim = 2*(filter_level+2) + inner_limit; 
1336 
bedge_lim = 2* filter_level + inner_limit;

1337 
hev_thresh = filter_level >= 15;

1338  
1339 
if (s>keyframe) {

1340 
if (filter_level >= 40) 
1341 
hev_thresh = 2;

1342 
} else {

1343 
if (filter_level >= 40) 
1344 
hev_thresh = 3;

1345 
else if (filter_level >= 20) 
1346 
hev_thresh = 2;

1347 
} 
1348  
1349 
if (mb_x) {

1350 
s>vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,

1351 
mbedge_lim, inner_limit, hev_thresh); 
1352 
s>vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize, 
1353 
mbedge_lim, inner_limit, hev_thresh); 
1354 
} 
1355  
1356 
if (inner_filter) {

1357 
s>vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim, 
1358 
inner_limit, hev_thresh); 
1359 
s>vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim, 
1360 
inner_limit, hev_thresh); 
1361 
s>vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim, 
1362 
inner_limit, hev_thresh); 
1363 
s>vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, 
1364 
uvlinesize, bedge_lim, 
1365 
inner_limit, hev_thresh); 
1366 
} 
1367  
1368 
if (mb_y) {

1369 
s>vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,

1370 
mbedge_lim, inner_limit, hev_thresh); 
1371 
s>vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize, 
1372 
mbedge_lim, inner_limit, hev_thresh); 
1373 
} 
1374  
1375 
if (inner_filter) {

1376 
s>vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize, 
1377 
linesize, bedge_lim, 
1378 
inner_limit, hev_thresh); 
1379 
s>vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize, 
1380 
linesize, bedge_lim, 
1381 
inner_limit, hev_thresh); 
1382 
s>vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize, 
1383 
linesize, bedge_lim, 
1384 
inner_limit, hev_thresh); 
1385 
s>vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize, 
1386 
dst[2] + 4 * uvlinesize, 
1387 
uvlinesize, bedge_lim, 
1388 
inner_limit, hev_thresh); 
1389 
} 
1390 
} 
1391  
1392 
static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y) 
1393 
{ 
1394 
int mbedge_lim, bedge_lim;

1395 
int filter_level = f>filter_level;

1396 
int inner_limit = f>inner_limit;

1397 
int inner_filter = f>inner_filter;

1398 
int linesize = s>linesize;

1399  
1400 
if (!filter_level)

1401 
return;

1402  
1403 
mbedge_lim = 2*(filter_level+2) + inner_limit; 
1404 
bedge_lim = 2* filter_level + inner_limit;

1405  
1406 
if (mb_x)

1407 
s>vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim); 
1408 
if (inner_filter) {

1409 
s>vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);

1410 
s>vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);

1411 
s>vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);

1412 
} 
1413  
1414 
if (mb_y)

1415 
s>vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim); 
1416 
if (inner_filter) {

1417 
s>vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);

1418 
s>vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);

1419 
s>vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);

1420 
} 
1421 
} 
1422  
1423 
static void filter_mb_row(VP8Context *s, int mb_y) 
1424 
{ 
1425 
VP8FilterStrength *f = s>filter_strength; 
1426 
uint8_t *dst[3] = {

1427 
s>framep[VP56_FRAME_CURRENT]>data[0] + 16*mb_y*s>linesize, 
1428 
s>framep[VP56_FRAME_CURRENT]>data[1] + 8*mb_y*s>uvlinesize, 
1429 
s>framep[VP56_FRAME_CURRENT]>data[2] + 8*mb_y*s>uvlinesize 
1430 
}; 
1431 
int mb_x;

1432  
1433 
for (mb_x = 0; mb_x < s>mb_width; mb_x++) { 
1434 
backup_mb_border(s>top_border[mb_x+1], dst[0], dst[1], dst[2], s>linesize, s>uvlinesize, 0); 
1435 
filter_mb(s, dst, f++, mb_x, mb_y); 
1436 
dst[0] += 16; 
1437 
dst[1] += 8; 
1438 
dst[2] += 8; 
1439 
} 
1440 
} 
1441  
1442 
static void filter_mb_row_simple(VP8Context *s, int mb_y) 
1443 
{ 
1444 
VP8FilterStrength *f = s>filter_strength; 
1445 
uint8_t *dst = s>framep[VP56_FRAME_CURRENT]>data[0] + 16*mb_y*s>linesize; 
1446 
int mb_x;

1447  
1448 
for (mb_x = 0; mb_x < s>mb_width; mb_x++) { 
1449 
backup_mb_border(s>top_border[mb_x+1], dst, NULL, NULL, s>linesize, 0, 1); 
1450 
filter_mb_simple(s, dst, f++, mb_x, mb_y); 
1451 
dst += 16;

1452 
} 
1453 
} 
1454  
1455 
static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, 
1456 
AVPacket *avpkt) 
1457 
{ 
1458 
VP8Context *s = avctx>priv_data; 
1459 
int ret, mb_x, mb_y, i, y, referenced;

1460 
enum AVDiscard skip_thresh;

1461 
AVFrame *av_uninit(curframe); 
1462  
1463 
if ((ret = decode_frame_header(s, avpkt>data, avpkt>size)) < 0) 
1464 
return ret;

1465  
1466 
referenced = s>update_last  s>update_golden == VP56_FRAME_CURRENT 
1467 
 s>update_altref == VP56_FRAME_CURRENT; 
1468  
1469 
skip_thresh = !referenced ? AVDISCARD_NONREF : 
1470 
!s>keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL; 
1471  
1472 
if (avctx>skip_frame >= skip_thresh) {

1473 
s>invisible = 1;

1474 
goto skip_decode;

1475 
} 
1476 
s>deblock_filter = s>filter.level && avctx>skip_loop_filter < skip_thresh; 
1477  
1478 
for (i = 0; i < 4; i++) 
1479 
if (&s>frames[i] != s>framep[VP56_FRAME_PREVIOUS] &&

1480 
&s>frames[i] != s>framep[VP56_FRAME_GOLDEN] && 
1481 
&s>frames[i] != s>framep[VP56_FRAME_GOLDEN2]) { 
1482 
curframe = s>framep[VP56_FRAME_CURRENT] = &s>frames[i]; 
1483 
break;

1484 
} 
1485 
if (curframe>data[0]) 
1486 
avctx>release_buffer(avctx, curframe); 
1487  
1488 
curframe>key_frame = s>keyframe; 
1489 
curframe>pict_type = s>keyframe ? FF_I_TYPE : FF_P_TYPE; 
1490 
curframe>reference = referenced ? 3 : 0; 
1491 
if ((ret = avctx>get_buffer(avctx, curframe))) {

1492 
av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");

1493 
return ret;

1494 
} 
1495  
1496 
// Given that arithmetic probabilities are updated every frame, it's quite likely

1497 
// that the values we have on a random interframe are complete junk if we didn't

1498 
// start decode on a keyframe. So just don't display anything rather than junk.

1499 
if (!s>keyframe && (!s>framep[VP56_FRAME_PREVIOUS] 

1500 
!s>framep[VP56_FRAME_GOLDEN]  
1501 
!s>framep[VP56_FRAME_GOLDEN2])) { 
1502 
av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");

1503 
return AVERROR_INVALIDDATA;

1504 
} 
1505  
1506 
s>linesize = curframe>linesize[0];

1507 
s>uvlinesize = curframe>linesize[1];

1508  
1509 
if (!s>edge_emu_buffer)

1510 
s>edge_emu_buffer = av_malloc(21*s>linesize);

1511  
1512 
memset(s>top_nnz, 0, s>mb_width*sizeof(*s>top_nnz)); 
1513  
1514 
/* Zero macroblock structures for top/left prediction from outside the frame. */

1515 
memset(s>macroblocks, 0, (s>mb_width + s>mb_height*2)*sizeof(*s>macroblocks)); 
1516  
1517 
// top edge of 127 for intra prediction

1518 
memset(s>top_border, 127, (s>mb_width+1)*sizeof(*s>top_border)); 
1519 
memset(s>ref_count, 0, sizeof(s>ref_count)); 
1520 
if (s>keyframe)

1521 
memset(s>intra4x4_pred_mode_top, DC_PRED, s>b4_stride*4);

1522  
1523 
for (mb_y = 0; mb_y < s>mb_height; mb_y++) { 
1524 
VP56RangeCoder *c = &s>coeff_partition[mb_y & (s>num_coeff_partitions1)];

1525 
VP8Macroblock *mb = s>macroblocks + (s>mb_height  mb_y  1)*2; 
1526 
uint8_t *segment_map = s>segmentation_map + mb_y*s>mb_stride; 
1527 
int mb_xy = mb_y * s>mb_stride;

1528 
uint8_t *dst[3] = {

1529 
curframe>data[0] + 16*mb_y*s>linesize, 
1530 
curframe>data[1] + 8*mb_y*s>uvlinesize, 
1531 
curframe>data[2] + 8*mb_y*s>uvlinesize 
1532 
}; 
1533  
1534 
memset(s>left_nnz, 0, sizeof(s>left_nnz)); 
1535 
AV_WN32A(s>intra4x4_pred_mode_left, DC_PRED*0x01010101);

1536  
1537 
// left edge of 129 for intra prediction

1538 
if (!(avctx>flags & CODEC_FLAG_EMU_EDGE))

1539 
for (i = 0; i < 3; i++) 
1540 
for (y = 0; y < 16>>!!i; y++) 
1541 
dst[i][y*curframe>linesize[i]1] = 129; 
1542 
if (mb_y)

1543 
memset(s>top_border, 129, sizeof(*s>top_border)); 
1544  
1545 
for (mb_x = 0; mb_x < s>mb_width; mb_x++, mb_xy++, mb++) { 
1546 
uint8_t *segment_mb = segment_map+mb_x; 
1547  
1548 
/* Prefetch the current frame, 4 MBs ahead */

1549 
s>dsp.prefetch(dst[0] + (mb_x&3)*4*s>linesize + 64, s>linesize, 4); 
1550 
s>dsp.prefetch(dst[1] + (mb_x&7)*s>uvlinesize + 64, dst[2]  dst[1], 2); 
1551  
1552 
decode_mb_mode(s, mb, mb_x, mb_y, segment_mb); 
1553  
1554 
prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS); 
1555  
1556 
if (!mb>skip)

1557 
decode_mb_coeffs(s, c, mb, s>top_nnz[mb_x], s>left_nnz); 
1558  
1559 
if (mb>mode <= MODE_I4x4)

1560 
intra_predict(s, dst, mb, mb_x, mb_y); 
1561 
else

1562 
inter_predict(s, dst, mb, mb_x, mb_y); 
1563  
1564 
prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN); 
1565  
1566 
if (!mb>skip) {

1567 
idct_mb(s, dst, mb); 
1568 
} else {

1569 
AV_ZERO64(s>left_nnz); 
1570 
AV_WN64(s>top_nnz[mb_x], 0); // array of 9, so unaligned 
1571  
1572 
// Reset DC block predictors if they would exist if the mb had coefficients

1573 
if (mb>mode != MODE_I4x4 && mb>mode != VP8_MVMODE_SPLIT) {

1574 
s>left_nnz[8] = 0; 
1575 
s>top_nnz[mb_x][8] = 0; 
1576 
} 
1577 
} 
1578  
1579 
if (s>deblock_filter)

1580 
filter_level_for_mb(s, mb, &s>filter_strength[mb_x]); 
1581  
1582 
prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2); 
1583  
1584 
dst[0] += 16; 
1585 
dst[1] += 8; 
1586 
dst[2] += 8; 
1587 
} 
1588 
if (s>deblock_filter) {

1589 
if (s>filter.simple)

1590 
filter_mb_row_simple(s, mb_y); 
1591 
else

1592 
filter_mb_row(s, mb_y); 
1593 
} 
1594 
} 
1595  
1596 
skip_decode:

1597 
// if future frames don't use the updated probabilities,

1598 
// reset them to the values we saved

1599 
if (!s>update_probabilities)

1600 
s>prob[0] = s>prob[1]; 
1601  
1602 
// check if golden and altref are swapped

1603 
if (s>update_altref == VP56_FRAME_GOLDEN &&

1604 
s>update_golden == VP56_FRAME_GOLDEN2) 
1605 
FFSWAP(AVFrame *, s>framep[VP56_FRAME_GOLDEN], s>framep[VP56_FRAME_GOLDEN2]); 
1606 
else {

1607 
if (s>update_altref != VP56_FRAME_NONE)

1608 
s>framep[VP56_FRAME_GOLDEN2] = s>framep[s>update_altref]; 
1609  
1610 
if (s>update_golden != VP56_FRAME_NONE)

1611 
s>framep[VP56_FRAME_GOLDEN] = s>framep[s>update_golden]; 
1612 
} 
1613  
1614 
if (s>update_last) // move cur>prev 
1615 
s>framep[VP56_FRAME_PREVIOUS] = s>framep[VP56_FRAME_CURRENT]; 
1616  
1617 
// release no longer referenced frames

1618 
for (i = 0; i < 4; i++) 
1619 
if (s>frames[i].data[0] && 
1620 
&s>frames[i] != s>framep[VP56_FRAME_CURRENT] && 
1621 
&s>frames[i] != s>framep[VP56_FRAME_PREVIOUS] && 
1622 
&s>frames[i] != s>framep[VP56_FRAME_GOLDEN] && 
1623 
&s>frames[i] != s>framep[VP56_FRAME_GOLDEN2]) 
1624 
avctx>release_buffer(avctx, &s>frames[i]); 
1625  
1626 
if (!s>invisible) {

1627 
*(AVFrame*)data = *s>framep[VP56_FRAME_CURRENT]; 
1628 
*data_size = sizeof(AVFrame);

1629 
} 
1630  
1631 
return avpkt>size;

1632 
} 
1633  
1634 
static av_cold int vp8_decode_init(AVCodecContext *avctx) 
1635 
{ 
1636 
VP8Context *s = avctx>priv_data; 
1637  
1638 
s>avctx = avctx; 
1639 
avctx>pix_fmt = PIX_FMT_YUV420P; 
1640  
1641 
dsputil_init(&s>dsp, avctx); 
1642 
ff_h264_pred_init(&s>hpc, CODEC_ID_VP8); 
1643 
ff_vp8dsp_init(&s>vp8dsp); 
1644  
1645 
// intra pred needs edge emulation among other things

1646 
if (avctx>flags&CODEC_FLAG_EMU_EDGE) {

1647 
av_log(avctx, AV_LOG_ERROR, "Edge emulation not supported\n");

1648 
return AVERROR_PATCHWELCOME;

1649 
} 
1650  
1651 
return 0; 
1652 
} 
1653  
1654 
static av_cold int vp8_decode_free(AVCodecContext *avctx) 
1655 
{ 
1656 
vp8_decode_flush(avctx); 
1657 
return 0; 
1658 
} 
1659  
1660 
AVCodec vp8_decoder = { 
1661 
"vp8",

1662 
AVMEDIA_TYPE_VIDEO, 
1663 
CODEC_ID_VP8, 
1664 
sizeof(VP8Context),

1665 
vp8_decode_init, 
1666 
NULL,

1667 
vp8_decode_free, 
1668 
vp8_decode_frame, 
1669 
CODEC_CAP_DR1, 
1670 
.flush = vp8_decode_flush, 
1671 
.long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),

1672 
}; 