Line data Source code
1 : /*
2 : * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 : *
4 : * Use of this source code is governed by a BSD-style license
5 : * that can be found in the LICENSE file in the root of the source
6 : * tree. An additional intellectual property rights grant can be found
7 : * in the file PATENTS. All contributing project authors may
8 : * be found in the AUTHORS file in the root of the source tree.
9 : */
10 :
11 : #include <assert.h>
12 : #include <math.h>
13 :
14 : #include "./vp9_rtcd.h"
15 : #include "./vpx_dsp_rtcd.h"
16 :
17 : #include "vpx_dsp/vpx_dsp_common.h"
18 : #include "vpx_mem/vpx_mem.h"
19 : #include "vpx_ports/mem.h"
20 : #include "vpx_ports/system_state.h"
21 :
22 : #include "vp9/common/vp9_common.h"
23 : #include "vp9/common/vp9_entropy.h"
24 : #include "vp9/common/vp9_entropymode.h"
25 : #include "vp9/common/vp9_idct.h"
26 : #include "vp9/common/vp9_mvref_common.h"
27 : #include "vp9/common/vp9_pred_common.h"
28 : #include "vp9/common/vp9_quant_common.h"
29 : #include "vp9/common/vp9_reconinter.h"
30 : #include "vp9/common/vp9_reconintra.h"
31 : #include "vp9/common/vp9_scan.h"
32 : #include "vp9/common/vp9_seg_common.h"
33 :
34 : #include "vp9/encoder/vp9_cost.h"
35 : #include "vp9/encoder/vp9_encodemb.h"
36 : #include "vp9/encoder/vp9_encodemv.h"
37 : #include "vp9/encoder/vp9_encoder.h"
38 : #include "vp9/encoder/vp9_mcomp.h"
39 : #include "vp9/encoder/vp9_quantize.h"
40 : #include "vp9/encoder/vp9_ratectrl.h"
41 : #include "vp9/encoder/vp9_rd.h"
42 : #include "vp9/encoder/vp9_rdopt.h"
43 : #include "vp9/encoder/vp9_aq_variance.h"
44 :
45 : #define LAST_FRAME_MODE_MASK \
46 : ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
47 : #define GOLDEN_FRAME_MODE_MASK \
48 : ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
49 : #define ALT_REF_MODE_MASK \
50 : ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | (1 << INTRA_FRAME))
51 :
52 : #define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | 0x01)
53 :
54 : #define MIN_EARLY_TERM_INDEX 3
55 : #define NEW_MV_DISCOUNT_FACTOR 8
56 :
57 : typedef struct {
58 : PREDICTION_MODE mode;
59 : MV_REFERENCE_FRAME ref_frame[2];
60 : } MODE_DEFINITION;
61 :
62 : typedef struct { MV_REFERENCE_FRAME ref_frame[2]; } REF_DEFINITION;
63 :
64 : struct rdcost_block_args {
65 : const VP9_COMP *cpi;
66 : MACROBLOCK *x;
67 : ENTROPY_CONTEXT t_above[16];
68 : ENTROPY_CONTEXT t_left[16];
69 : int this_rate;
70 : int64_t this_dist;
71 : int64_t this_sse;
72 : int64_t this_rd;
73 : int64_t best_rd;
74 : int exit_early;
75 : int use_fast_coef_costing;
76 : const scan_order *so;
77 : uint8_t skippable;
78 : };
79 :
80 : #define LAST_NEW_MV_INDEX 6
81 : static const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
82 : { NEARESTMV, { LAST_FRAME, NONE } },
83 : { NEARESTMV, { ALTREF_FRAME, NONE } },
84 : { NEARESTMV, { GOLDEN_FRAME, NONE } },
85 :
86 : { DC_PRED, { INTRA_FRAME, NONE } },
87 :
88 : { NEWMV, { LAST_FRAME, NONE } },
89 : { NEWMV, { ALTREF_FRAME, NONE } },
90 : { NEWMV, { GOLDEN_FRAME, NONE } },
91 :
92 : { NEARMV, { LAST_FRAME, NONE } },
93 : { NEARMV, { ALTREF_FRAME, NONE } },
94 : { NEARMV, { GOLDEN_FRAME, NONE } },
95 :
96 : { ZEROMV, { LAST_FRAME, NONE } },
97 : { ZEROMV, { GOLDEN_FRAME, NONE } },
98 : { ZEROMV, { ALTREF_FRAME, NONE } },
99 :
100 : { NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
101 : { NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
102 :
103 : { TM_PRED, { INTRA_FRAME, NONE } },
104 :
105 : { NEARMV, { LAST_FRAME, ALTREF_FRAME } },
106 : { NEWMV, { LAST_FRAME, ALTREF_FRAME } },
107 : { NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
108 : { NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
109 :
110 : { ZEROMV, { LAST_FRAME, ALTREF_FRAME } },
111 : { ZEROMV, { GOLDEN_FRAME, ALTREF_FRAME } },
112 :
113 : { H_PRED, { INTRA_FRAME, NONE } },
114 : { V_PRED, { INTRA_FRAME, NONE } },
115 : { D135_PRED, { INTRA_FRAME, NONE } },
116 : { D207_PRED, { INTRA_FRAME, NONE } },
117 : { D153_PRED, { INTRA_FRAME, NONE } },
118 : { D63_PRED, { INTRA_FRAME, NONE } },
119 : { D117_PRED, { INTRA_FRAME, NONE } },
120 : { D45_PRED, { INTRA_FRAME, NONE } },
121 : };
122 :
123 : static const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
124 : { { LAST_FRAME, NONE } }, { { GOLDEN_FRAME, NONE } },
125 : { { ALTREF_FRAME, NONE } }, { { LAST_FRAME, ALTREF_FRAME } },
126 : { { GOLDEN_FRAME, ALTREF_FRAME } }, { { INTRA_FRAME, NONE } },
127 : };
128 :
129 0 : static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int m, int n,
130 : int min_plane, int max_plane) {
131 : int i;
132 :
133 0 : for (i = min_plane; i < max_plane; ++i) {
134 0 : struct macroblock_plane *const p = &x->plane[i];
135 0 : struct macroblockd_plane *const pd = &x->e_mbd.plane[i];
136 :
137 0 : p->coeff = ctx->coeff_pbuf[i][m];
138 0 : p->qcoeff = ctx->qcoeff_pbuf[i][m];
139 0 : pd->dqcoeff = ctx->dqcoeff_pbuf[i][m];
140 0 : p->eobs = ctx->eobs_pbuf[i][m];
141 :
142 0 : ctx->coeff_pbuf[i][m] = ctx->coeff_pbuf[i][n];
143 0 : ctx->qcoeff_pbuf[i][m] = ctx->qcoeff_pbuf[i][n];
144 0 : ctx->dqcoeff_pbuf[i][m] = ctx->dqcoeff_pbuf[i][n];
145 0 : ctx->eobs_pbuf[i][m] = ctx->eobs_pbuf[i][n];
146 :
147 0 : ctx->coeff_pbuf[i][n] = p->coeff;
148 0 : ctx->qcoeff_pbuf[i][n] = p->qcoeff;
149 0 : ctx->dqcoeff_pbuf[i][n] = pd->dqcoeff;
150 0 : ctx->eobs_pbuf[i][n] = p->eobs;
151 : }
152 0 : }
153 :
154 0 : static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x,
155 : MACROBLOCKD *xd, int *out_rate_sum,
156 : int64_t *out_dist_sum, int *skip_txfm_sb,
157 : int64_t *skip_sse_sb) {
158 : // Note our transform coeffs are 8 times an orthogonal transform.
159 : // Hence quantizer step is also 8 times. To get effective quantizer
160 : // we need to divide by 8 before sending to modeling function.
161 : int i;
162 0 : int64_t rate_sum = 0;
163 0 : int64_t dist_sum = 0;
164 0 : const int ref = xd->mi[0]->ref_frame[0];
165 : unsigned int sse;
166 0 : unsigned int var = 0;
167 0 : unsigned int sum_sse = 0;
168 0 : int64_t total_sse = 0;
169 0 : int skip_flag = 1;
170 0 : const int shift = 6;
171 : int rate;
172 : int64_t dist;
173 0 : const int dequant_shift =
174 : #if CONFIG_VP9_HIGHBITDEPTH
175 : (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 :
176 : #endif // CONFIG_VP9_HIGHBITDEPTH
177 : 3;
178 :
179 0 : x->pred_sse[ref] = 0;
180 :
181 0 : for (i = 0; i < MAX_MB_PLANE; ++i) {
182 0 : struct macroblock_plane *const p = &x->plane[i];
183 0 : struct macroblockd_plane *const pd = &xd->plane[i];
184 0 : const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
185 0 : const TX_SIZE max_tx_size = max_txsize_lookup[bs];
186 0 : const BLOCK_SIZE unit_size = txsize_to_bsize[max_tx_size];
187 0 : const int64_t dc_thr = p->quant_thred[0] >> shift;
188 0 : const int64_t ac_thr = p->quant_thred[1] >> shift;
189 : // The low thresholds are used to measure if the prediction errors are
190 : // low enough so that we can skip the mode search.
191 0 : const int64_t low_dc_thr = VPXMIN(50, dc_thr >> 2);
192 0 : const int64_t low_ac_thr = VPXMIN(80, ac_thr >> 2);
193 0 : int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
194 0 : int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
195 : int idx, idy;
196 0 : int lw = b_width_log2_lookup[unit_size] + 2;
197 0 : int lh = b_height_log2_lookup[unit_size] + 2;
198 :
199 0 : sum_sse = 0;
200 :
201 0 : for (idy = 0; idy < bh; ++idy) {
202 0 : for (idx = 0; idx < bw; ++idx) {
203 0 : uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw);
204 0 : uint8_t *dst = pd->dst.buf + (idy * pd->dst.stride << lh) + (idx << lh);
205 0 : int block_idx = (idy << 1) + idx;
206 0 : int low_err_skip = 0;
207 :
208 0 : var = cpi->fn_ptr[unit_size].vf(src, p->src.stride, dst, pd->dst.stride,
209 : &sse);
210 0 : x->bsse[(i << 2) + block_idx] = sse;
211 0 : sum_sse += sse;
212 :
213 0 : x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_NONE;
214 0 : if (!x->select_tx_size) {
215 : // Check if all ac coefficients can be quantized to zero.
216 0 : if (var < ac_thr || var == 0) {
217 0 : x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_AC_ONLY;
218 :
219 : // Check if dc coefficient can be quantized to zero.
220 0 : if (sse - var < dc_thr || sse == var) {
221 0 : x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_AC_DC;
222 :
223 0 : if (!sse || (var < low_ac_thr && sse - var < low_dc_thr))
224 0 : low_err_skip = 1;
225 : }
226 : }
227 : }
228 :
229 0 : if (skip_flag && !low_err_skip) skip_flag = 0;
230 :
231 0 : if (i == 0) x->pred_sse[ref] += sse;
232 : }
233 : }
234 :
235 0 : total_sse += sum_sse;
236 :
237 : // Fast approximate the modelling function.
238 0 : if (cpi->sf.simple_model_rd_from_var) {
239 : int64_t rate;
240 0 : const int64_t square_error = sum_sse;
241 0 : int quantizer = (pd->dequant[1] >> dequant_shift);
242 :
243 0 : if (quantizer < 120)
244 0 : rate = (square_error * (280 - quantizer)) >> (16 - VP9_PROB_COST_SHIFT);
245 : else
246 0 : rate = 0;
247 0 : dist = (square_error * quantizer) >> 8;
248 0 : rate_sum += rate;
249 0 : dist_sum += dist;
250 : } else {
251 0 : vp9_model_rd_from_var_lapndz(sum_sse, num_pels_log2_lookup[bs],
252 0 : pd->dequant[1] >> dequant_shift, &rate,
253 : &dist);
254 0 : rate_sum += rate;
255 0 : dist_sum += dist;
256 : }
257 : }
258 :
259 0 : *skip_txfm_sb = skip_flag;
260 0 : *skip_sse_sb = total_sse << 4;
261 0 : *out_rate_sum = (int)rate_sum;
262 0 : *out_dist_sum = dist_sum << 4;
263 0 : }
264 :
265 : #if CONFIG_VP9_HIGHBITDEPTH
266 : int64_t vp9_highbd_block_error_c(const tran_low_t *coeff,
267 : const tran_low_t *dqcoeff, intptr_t block_size,
268 : int64_t *ssz, int bd) {
269 : int i;
270 : int64_t error = 0, sqcoeff = 0;
271 : int shift = 2 * (bd - 8);
272 : int rounding = shift > 0 ? 1 << (shift - 1) : 0;
273 :
274 : for (i = 0; i < block_size; i++) {
275 : const int64_t diff = coeff[i] - dqcoeff[i];
276 : error += diff * diff;
277 : sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
278 : }
279 : assert(error >= 0 && sqcoeff >= 0);
280 : error = (error + rounding) >> shift;
281 : sqcoeff = (sqcoeff + rounding) >> shift;
282 :
283 : *ssz = sqcoeff;
284 : return error;
285 : }
286 :
287 : int64_t vp9_highbd_block_error_8bit_c(const tran_low_t *coeff,
288 : const tran_low_t *dqcoeff,
289 : intptr_t block_size, int64_t *ssz) {
290 : // Note that the C versions of these 2 functions (vp9_block_error and
291 : // vp9_highbd_block_error_8bit are the same, but the optimized assembly
292 : // routines are not compatible in the non high bitdepth configuration, so
293 : // they still cannot share the same name.
294 : return vp9_block_error_c(coeff, dqcoeff, block_size, ssz);
295 : }
296 :
297 : static int64_t vp9_highbd_block_error_dispatch(const tran_low_t *coeff,
298 : const tran_low_t *dqcoeff,
299 : intptr_t block_size,
300 : int64_t *ssz, int bd) {
301 : if (bd == 8) {
302 : return vp9_highbd_block_error_8bit(coeff, dqcoeff, block_size, ssz);
303 : } else {
304 : return vp9_highbd_block_error(coeff, dqcoeff, block_size, ssz, bd);
305 : }
306 : }
307 : #endif // CONFIG_VP9_HIGHBITDEPTH
308 :
309 0 : int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
310 : intptr_t block_size, int64_t *ssz) {
311 : int i;
312 0 : int64_t error = 0, sqcoeff = 0;
313 :
314 0 : for (i = 0; i < block_size; i++) {
315 0 : const int diff = coeff[i] - dqcoeff[i];
316 0 : error += diff * diff;
317 0 : sqcoeff += coeff[i] * coeff[i];
318 : }
319 :
320 0 : *ssz = sqcoeff;
321 0 : return error;
322 : }
323 :
324 0 : int64_t vp9_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff,
325 : int block_size) {
326 : int i;
327 0 : int64_t error = 0;
328 :
329 0 : for (i = 0; i < block_size; i++) {
330 0 : const int diff = coeff[i] - dqcoeff[i];
331 0 : error += diff * diff;
332 : }
333 :
334 0 : return error;
335 : }
336 :
337 : /* The trailing '0' is a terminator which is used inside cost_coeffs() to
338 : * decide whether to include cost of a trailing EOB node or not (i.e. we
339 : * can skip this if the last coefficient in this transform block, e.g. the
340 : * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
341 : * were non-zero). */
342 : static const int16_t band_counts[TX_SIZES][8] = {
343 : { 1, 2, 3, 4, 3, 16 - 13, 0 },
344 : { 1, 2, 3, 4, 11, 64 - 21, 0 },
345 : { 1, 2, 3, 4, 11, 256 - 21, 0 },
346 : { 1, 2, 3, 4, 11, 1024 - 21, 0 },
347 : };
348 0 : static int cost_coeffs(MACROBLOCK *x, int plane, int block, TX_SIZE tx_size,
349 : int pt, const int16_t *scan, const int16_t *nb,
350 : int use_fast_coef_costing) {
351 0 : MACROBLOCKD *const xd = &x->e_mbd;
352 0 : MODE_INFO *mi = xd->mi[0];
353 0 : const struct macroblock_plane *p = &x->plane[plane];
354 0 : const PLANE_TYPE type = get_plane_type(plane);
355 0 : const int16_t *band_count = &band_counts[tx_size][1];
356 0 : const int eob = p->eobs[block];
357 0 : const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
358 0 : unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
359 0 : x->token_costs[tx_size][type][is_inter_block(mi)];
360 : uint8_t token_cache[32 * 32];
361 : int c, cost;
362 : #if CONFIG_VP9_HIGHBITDEPTH
363 : const int *cat6_high_cost = vp9_get_high_cost_table(xd->bd);
364 : #else
365 0 : const int *cat6_high_cost = vp9_get_high_cost_table(8);
366 : #endif
367 :
368 : // Check for consistency of tx_size with mode info
369 0 : assert(type == PLANE_TYPE_Y
370 : ? mi->tx_size == tx_size
371 : : get_uv_tx_size(mi, &xd->plane[plane]) == tx_size);
372 :
373 0 : if (eob == 0) {
374 : // single eob token
375 0 : cost = token_costs[0][0][pt][EOB_TOKEN];
376 0 : c = 0;
377 : } else {
378 0 : if (use_fast_coef_costing) {
379 0 : int band_left = *band_count++;
380 :
381 : // dc token
382 0 : int v = qcoeff[0];
383 : int16_t prev_t;
384 0 : cost = vp9_get_token_cost(v, &prev_t, cat6_high_cost);
385 0 : cost += (*token_costs)[0][pt][prev_t];
386 :
387 0 : token_cache[0] = vp9_pt_energy_class[prev_t];
388 0 : ++token_costs;
389 :
390 : // ac tokens
391 0 : for (c = 1; c < eob; c++) {
392 0 : const int rc = scan[c];
393 : int16_t t;
394 :
395 0 : v = qcoeff[rc];
396 0 : cost += vp9_get_token_cost(v, &t, cat6_high_cost);
397 0 : cost += (*token_costs)[!prev_t][!prev_t][t];
398 0 : prev_t = t;
399 0 : if (!--band_left) {
400 0 : band_left = *band_count++;
401 0 : ++token_costs;
402 : }
403 : }
404 :
405 : // eob token
406 0 : if (band_left) cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
407 :
408 : } else { // !use_fast_coef_costing
409 0 : int band_left = *band_count++;
410 :
411 : // dc token
412 0 : int v = qcoeff[0];
413 : int16_t tok;
414 : unsigned int(*tok_cost_ptr)[COEFF_CONTEXTS][ENTROPY_TOKENS];
415 0 : cost = vp9_get_token_cost(v, &tok, cat6_high_cost);
416 0 : cost += (*token_costs)[0][pt][tok];
417 :
418 0 : token_cache[0] = vp9_pt_energy_class[tok];
419 0 : ++token_costs;
420 :
421 0 : tok_cost_ptr = &((*token_costs)[!tok]);
422 :
423 : // ac tokens
424 0 : for (c = 1; c < eob; c++) {
425 0 : const int rc = scan[c];
426 :
427 0 : v = qcoeff[rc];
428 0 : cost += vp9_get_token_cost(v, &tok, cat6_high_cost);
429 0 : pt = get_coef_context(nb, token_cache, c);
430 0 : cost += (*tok_cost_ptr)[pt][tok];
431 0 : token_cache[rc] = vp9_pt_energy_class[tok];
432 0 : if (!--band_left) {
433 0 : band_left = *band_count++;
434 0 : ++token_costs;
435 : }
436 0 : tok_cost_ptr = &((*token_costs)[!tok]);
437 : }
438 :
439 : // eob token
440 0 : if (band_left) {
441 0 : pt = get_coef_context(nb, token_cache, c);
442 0 : cost += (*token_costs)[0][pt][EOB_TOKEN];
443 : }
444 : }
445 : }
446 :
447 0 : return cost;
448 : }
449 :
450 0 : static INLINE int num_4x4_to_edge(int plane_4x4_dim, int mb_to_edge_dim,
451 : int subsampling_dim, int blk_dim) {
452 0 : return plane_4x4_dim + (mb_to_edge_dim >> (5 + subsampling_dim)) - blk_dim;
453 : }
454 :
455 : // Compute the pixel domain sum square error on all visible 4x4s in the
456 : // transform block.
457 0 : static unsigned pixel_sse(const VP9_COMP *const cpi, const MACROBLOCKD *xd,
458 : const struct macroblockd_plane *const pd,
459 : const uint8_t *src, const int src_stride,
460 : const uint8_t *dst, const int dst_stride, int blk_row,
461 : int blk_col, const BLOCK_SIZE plane_bsize,
462 : const BLOCK_SIZE tx_bsize) {
463 0 : unsigned int sse = 0;
464 0 : const int plane_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
465 0 : const int plane_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
466 0 : const int tx_4x4_w = num_4x4_blocks_wide_lookup[tx_bsize];
467 0 : const int tx_4x4_h = num_4x4_blocks_high_lookup[tx_bsize];
468 0 : int b4x4s_to_right_edge = num_4x4_to_edge(plane_4x4_w, xd->mb_to_right_edge,
469 : pd->subsampling_x, blk_col);
470 0 : int b4x4s_to_bottom_edge = num_4x4_to_edge(plane_4x4_h, xd->mb_to_bottom_edge,
471 : pd->subsampling_y, blk_row);
472 0 : if (tx_bsize == BLOCK_4X4 ||
473 0 : (b4x4s_to_right_edge >= tx_4x4_w && b4x4s_to_bottom_edge >= tx_4x4_h)) {
474 0 : cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &sse);
475 : } else {
476 0 : const vpx_variance_fn_t vf_4x4 = cpi->fn_ptr[BLOCK_4X4].vf;
477 : int r, c;
478 0 : unsigned this_sse = 0;
479 0 : int max_r = VPXMIN(b4x4s_to_bottom_edge, tx_4x4_h);
480 0 : int max_c = VPXMIN(b4x4s_to_right_edge, tx_4x4_w);
481 0 : sse = 0;
482 : // if we are in the unrestricted motion border.
483 0 : for (r = 0; r < max_r; ++r) {
484 : // Skip visiting the sub blocks that are wholly within the UMV.
485 0 : for (c = 0; c < max_c; ++c) {
486 0 : vf_4x4(src + r * src_stride * 4 + c * 4, src_stride,
487 0 : dst + r * dst_stride * 4 + c * 4, dst_stride, &this_sse);
488 0 : sse += this_sse;
489 : }
490 : }
491 : }
492 0 : return sse;
493 : }
494 :
495 : // Compute the squares sum squares on all visible 4x4s in the transform block.
496 0 : static int64_t sum_squares_visible(const MACROBLOCKD *xd,
497 : const struct macroblockd_plane *const pd,
498 : const int16_t *diff, const int diff_stride,
499 : int blk_row, int blk_col,
500 : const BLOCK_SIZE plane_bsize,
501 : const BLOCK_SIZE tx_bsize) {
502 : int64_t sse;
503 0 : const int plane_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
504 0 : const int plane_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
505 0 : const int tx_4x4_w = num_4x4_blocks_wide_lookup[tx_bsize];
506 0 : const int tx_4x4_h = num_4x4_blocks_high_lookup[tx_bsize];
507 0 : int b4x4s_to_right_edge = num_4x4_to_edge(plane_4x4_w, xd->mb_to_right_edge,
508 : pd->subsampling_x, blk_col);
509 0 : int b4x4s_to_bottom_edge = num_4x4_to_edge(plane_4x4_h, xd->mb_to_bottom_edge,
510 : pd->subsampling_y, blk_row);
511 0 : if (tx_bsize == BLOCK_4X4 ||
512 0 : (b4x4s_to_right_edge >= tx_4x4_w && b4x4s_to_bottom_edge >= tx_4x4_h)) {
513 0 : sse = (int64_t)vpx_sum_squares_2d_i16(diff, diff_stride, tx_bsize);
514 : } else {
515 : int r, c;
516 0 : int max_r = VPXMIN(b4x4s_to_bottom_edge, tx_4x4_h);
517 0 : int max_c = VPXMIN(b4x4s_to_right_edge, tx_4x4_w);
518 0 : sse = 0;
519 : // if we are in the unrestricted motion border.
520 0 : for (r = 0; r < max_r; ++r) {
521 : // Skip visiting the sub blocks that are wholly within the UMV.
522 0 : for (c = 0; c < max_c; ++c) {
523 0 : sse += (int64_t)vpx_sum_squares_2d_i16(diff, diff_stride, BLOCK_4X4);
524 : }
525 : }
526 : }
527 0 : return sse;
528 : }
529 :
530 0 : static void dist_block(const VP9_COMP *cpi, MACROBLOCK *x, int plane,
531 : BLOCK_SIZE plane_bsize, int block, int blk_row,
532 : int blk_col, TX_SIZE tx_size, int64_t *out_dist,
533 : int64_t *out_sse) {
534 0 : MACROBLOCKD *const xd = &x->e_mbd;
535 0 : const struct macroblock_plane *const p = &x->plane[plane];
536 0 : const struct macroblockd_plane *const pd = &xd->plane[plane];
537 :
538 0 : if (x->block_tx_domain) {
539 0 : const int ss_txfrm_size = tx_size << 1;
540 : int64_t this_sse;
541 0 : const int shift = tx_size == TX_32X32 ? 0 : 2;
542 0 : const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
543 0 : const tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
544 : #if CONFIG_VP9_HIGHBITDEPTH
545 : const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8;
546 : *out_dist = vp9_highbd_block_error_dispatch(
547 : coeff, dqcoeff, 16 << ss_txfrm_size, &this_sse, bd) >>
548 : shift;
549 : #else
550 0 : *out_dist =
551 0 : vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size, &this_sse) >>
552 : shift;
553 : #endif // CONFIG_VP9_HIGHBITDEPTH
554 0 : *out_sse = this_sse >> shift;
555 :
556 0 : if (x->skip_encode && !is_inter_block(xd->mi[0])) {
557 : // TODO(jingning): tune the model to better capture the distortion.
558 0 : const int64_t p =
559 0 : (pd->dequant[1] * pd->dequant[1] * (1 << ss_txfrm_size)) >>
560 : #if CONFIG_VP9_HIGHBITDEPTH
561 : (shift + 2 + (bd - 8) * 2);
562 : #else
563 0 : (shift + 2);
564 : #endif // CONFIG_VP9_HIGHBITDEPTH
565 0 : *out_dist += (p >> 4);
566 0 : *out_sse += p;
567 : }
568 : } else {
569 0 : const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
570 0 : const int bs = 4 * num_4x4_blocks_wide_lookup[tx_bsize];
571 0 : const int src_stride = p->src.stride;
572 0 : const int dst_stride = pd->dst.stride;
573 0 : const int src_idx = 4 * (blk_row * src_stride + blk_col);
574 0 : const int dst_idx = 4 * (blk_row * dst_stride + blk_col);
575 0 : const uint8_t *src = &p->src.buf[src_idx];
576 0 : const uint8_t *dst = &pd->dst.buf[dst_idx];
577 0 : const tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
578 0 : const uint16_t *eob = &p->eobs[block];
579 : unsigned int tmp;
580 :
581 0 : tmp = pixel_sse(cpi, xd, pd, src, src_stride, dst, dst_stride, blk_row,
582 : blk_col, plane_bsize, tx_bsize);
583 0 : *out_sse = (int64_t)tmp * 16;
584 :
585 0 : if (*eob) {
586 : #if CONFIG_VP9_HIGHBITDEPTH
587 : DECLARE_ALIGNED(16, uint16_t, recon16[1024]);
588 : uint8_t *recon = (uint8_t *)recon16;
589 : #else
590 : DECLARE_ALIGNED(16, uint8_t, recon[1024]);
591 : #endif // CONFIG_VP9_HIGHBITDEPTH
592 :
593 : #if CONFIG_VP9_HIGHBITDEPTH
594 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
595 : recon = CONVERT_TO_BYTEPTR(recon);
596 : vpx_highbd_convolve_copy(dst, dst_stride, recon, 32, NULL, 0, NULL, 0,
597 : bs, bs, xd->bd);
598 : if (xd->lossless) {
599 : vp9_highbd_iwht4x4_add(dqcoeff, recon, 32, *eob, xd->bd);
600 : } else {
601 : switch (tx_size) {
602 : case TX_4X4:
603 : vp9_highbd_idct4x4_add(dqcoeff, recon, 32, *eob, xd->bd);
604 : break;
605 : case TX_8X8:
606 : vp9_highbd_idct8x8_add(dqcoeff, recon, 32, *eob, xd->bd);
607 : break;
608 : case TX_16X16:
609 : vp9_highbd_idct16x16_add(dqcoeff, recon, 32, *eob, xd->bd);
610 : break;
611 : case TX_32X32:
612 : vp9_highbd_idct32x32_add(dqcoeff, recon, 32, *eob, xd->bd);
613 : break;
614 : default: assert(0 && "Invalid transform size");
615 : }
616 : }
617 : } else {
618 : #endif // CONFIG_VP9_HIGHBITDEPTH
619 0 : vpx_convolve_copy(dst, dst_stride, recon, 32, NULL, 0, NULL, 0, bs, bs);
620 0 : switch (tx_size) {
621 0 : case TX_32X32: vp9_idct32x32_add(dqcoeff, recon, 32, *eob); break;
622 0 : case TX_16X16: vp9_idct16x16_add(dqcoeff, recon, 32, *eob); break;
623 0 : case TX_8X8: vp9_idct8x8_add(dqcoeff, recon, 32, *eob); break;
624 : case TX_4X4:
625 : // this is like vp9_short_idct4x4 but has a special case around
626 : // eob<=1, which is significant (not just an optimization) for
627 : // the lossless case.
628 0 : x->itxm_add(dqcoeff, recon, 32, *eob);
629 0 : break;
630 0 : default: assert(0 && "Invalid transform size"); break;
631 : }
632 : #if CONFIG_VP9_HIGHBITDEPTH
633 : }
634 : #endif // CONFIG_VP9_HIGHBITDEPTH
635 :
636 0 : tmp = pixel_sse(cpi, xd, pd, src, src_stride, recon, 32, blk_row, blk_col,
637 : plane_bsize, tx_bsize);
638 : }
639 :
640 0 : *out_dist = (int64_t)tmp * 16;
641 : }
642 0 : }
643 :
644 0 : static int rate_block(int plane, int block, TX_SIZE tx_size, int coeff_ctx,
645 : struct rdcost_block_args *args) {
646 0 : return cost_coeffs(args->x, plane, block, tx_size, coeff_ctx, args->so->scan,
647 0 : args->so->neighbors, args->use_fast_coef_costing);
648 : }
649 :
650 0 : static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
651 : BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
652 0 : struct rdcost_block_args *args = arg;
653 0 : MACROBLOCK *const x = args->x;
654 0 : MACROBLOCKD *const xd = &x->e_mbd;
655 0 : MODE_INFO *const mi = xd->mi[0];
656 : int64_t rd1, rd2, rd;
657 : int rate;
658 : int64_t dist;
659 : int64_t sse;
660 0 : const int coeff_ctx =
661 0 : combine_entropy_contexts(args->t_left[blk_row], args->t_above[blk_col]);
662 :
663 0 : if (args->exit_early) return;
664 :
665 0 : if (!is_inter_block(mi)) {
666 0 : struct encode_b_args intra_arg = { x, x->block_qcoeff_opt, args->t_above,
667 0 : args->t_left, &mi->skip };
668 0 : vp9_encode_block_intra(plane, block, blk_row, blk_col, plane_bsize, tx_size,
669 : &intra_arg);
670 0 : if (x->block_tx_domain) {
671 0 : dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col,
672 : tx_size, &dist, &sse);
673 : } else {
674 0 : const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
675 0 : const struct macroblock_plane *const p = &x->plane[plane];
676 0 : const struct macroblockd_plane *const pd = &xd->plane[plane];
677 0 : const int src_stride = p->src.stride;
678 0 : const int dst_stride = pd->dst.stride;
679 0 : const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
680 0 : const uint8_t *src = &p->src.buf[4 * (blk_row * src_stride + blk_col)];
681 0 : const uint8_t *dst = &pd->dst.buf[4 * (blk_row * dst_stride + blk_col)];
682 0 : const int16_t *diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
683 : unsigned int tmp;
684 0 : sse = sum_squares_visible(xd, pd, diff, diff_stride, blk_row, blk_col,
685 : plane_bsize, tx_bsize);
686 : #if CONFIG_VP9_HIGHBITDEPTH
687 : if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && (xd->bd > 8))
688 : sse = ROUND64_POWER_OF_TWO(sse, (xd->bd - 8) * 2);
689 : #endif // CONFIG_VP9_HIGHBITDEPTH
690 0 : sse = sse * 16;
691 0 : tmp = pixel_sse(args->cpi, xd, pd, src, src_stride, dst, dst_stride,
692 : blk_row, blk_col, plane_bsize, tx_bsize);
693 0 : dist = (int64_t)tmp * 16;
694 : }
695 0 : } else if (max_txsize_lookup[plane_bsize] == tx_size) {
696 0 : if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] ==
697 : SKIP_TXFM_NONE) {
698 : // full forward transform and quantization
699 0 : vp9_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size);
700 0 : if (x->block_qcoeff_opt)
701 0 : vp9_optimize_b(x, plane, block, tx_size, coeff_ctx);
702 0 : dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col,
703 : tx_size, &dist, &sse);
704 0 : } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] ==
705 : SKIP_TXFM_AC_ONLY) {
706 : // compute DC coefficient
707 0 : tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block);
708 0 : tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
709 0 : vp9_xform_quant_dc(x, plane, block, blk_row, blk_col, plane_bsize,
710 : tx_size);
711 0 : sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
712 0 : dist = sse;
713 0 : if (x->plane[plane].eobs[block]) {
714 0 : const int64_t orig_sse = (int64_t)coeff[0] * coeff[0];
715 0 : const int64_t resd_sse = coeff[0] - dqcoeff[0];
716 0 : int64_t dc_correct = orig_sse - resd_sse * resd_sse;
717 : #if CONFIG_VP9_HIGHBITDEPTH
718 : dc_correct >>= ((xd->bd - 8) * 2);
719 : #endif
720 0 : if (tx_size != TX_32X32) dc_correct >>= 2;
721 :
722 0 : dist = VPXMAX(0, sse - dc_correct);
723 : }
724 : } else {
725 : // SKIP_TXFM_AC_DC
726 : // skip forward transform
727 0 : x->plane[plane].eobs[block] = 0;
728 0 : sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
729 0 : dist = sse;
730 : }
731 : } else {
732 : // full forward transform and quantization
733 0 : vp9_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size);
734 0 : if (x->block_qcoeff_opt)
735 0 : vp9_optimize_b(x, plane, block, tx_size, coeff_ctx);
736 0 : dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col,
737 : tx_size, &dist, &sse);
738 : }
739 :
740 0 : rd = RDCOST(x->rdmult, x->rddiv, 0, dist);
741 0 : if (args->this_rd + rd > args->best_rd) {
742 0 : args->exit_early = 1;
743 0 : return;
744 : }
745 :
746 0 : rate = rate_block(plane, block, tx_size, coeff_ctx, args);
747 0 : args->t_above[blk_col] = (x->plane[plane].eobs[block] > 0) ? 1 : 0;
748 0 : args->t_left[blk_row] = (x->plane[plane].eobs[block] > 0) ? 1 : 0;
749 0 : rd1 = RDCOST(x->rdmult, x->rddiv, rate, dist);
750 0 : rd2 = RDCOST(x->rdmult, x->rddiv, 0, sse);
751 :
752 : // TODO(jingning): temporarily enabled only for luma component
753 0 : rd = VPXMIN(rd1, rd2);
754 0 : if (plane == 0)
755 0 : x->zcoeff_blk[tx_size][block] =
756 0 : !x->plane[plane].eobs[block] || (rd1 > rd2 && !xd->lossless);
757 :
758 0 : args->this_rate += rate;
759 0 : args->this_dist += dist;
760 0 : args->this_sse += sse;
761 0 : args->this_rd += rd;
762 :
763 0 : if (args->this_rd > args->best_rd) {
764 0 : args->exit_early = 1;
765 0 : return;
766 : }
767 :
768 0 : args->skippable &= !x->plane[plane].eobs[block];
769 : }
770 :
771 0 : static void txfm_rd_in_plane(const VP9_COMP *cpi, MACROBLOCK *x, int *rate,
772 : int64_t *distortion, int *skippable, int64_t *sse,
773 : int64_t ref_best_rd, int plane, BLOCK_SIZE bsize,
774 : TX_SIZE tx_size, int use_fast_coef_casting) {
775 0 : MACROBLOCKD *const xd = &x->e_mbd;
776 0 : const struct macroblockd_plane *const pd = &xd->plane[plane];
777 : struct rdcost_block_args args;
778 0 : vp9_zero(args);
779 0 : args.cpi = cpi;
780 0 : args.x = x;
781 0 : args.best_rd = ref_best_rd;
782 0 : args.use_fast_coef_costing = use_fast_coef_casting;
783 0 : args.skippable = 1;
784 :
785 0 : if (plane == 0) xd->mi[0]->tx_size = tx_size;
786 :
787 0 : vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
788 :
789 0 : args.so = get_scan(xd, tx_size, get_plane_type(plane), 0);
790 :
791 0 : vp9_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm,
792 : &args);
793 0 : if (args.exit_early) {
794 0 : *rate = INT_MAX;
795 0 : *distortion = INT64_MAX;
796 0 : *sse = INT64_MAX;
797 0 : *skippable = 0;
798 : } else {
799 0 : *distortion = args.this_dist;
800 0 : *rate = args.this_rate;
801 0 : *sse = args.this_sse;
802 0 : *skippable = args.skippable;
803 : }
804 0 : }
805 :
806 0 : static void choose_largest_tx_size(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
807 : int64_t *distortion, int *skip, int64_t *sse,
808 : int64_t ref_best_rd, BLOCK_SIZE bs) {
809 0 : const TX_SIZE max_tx_size = max_txsize_lookup[bs];
810 0 : VP9_COMMON *const cm = &cpi->common;
811 0 : const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
812 0 : MACROBLOCKD *const xd = &x->e_mbd;
813 0 : MODE_INFO *const mi = xd->mi[0];
814 :
815 0 : mi->tx_size = VPXMIN(max_tx_size, largest_tx_size);
816 :
817 0 : txfm_rd_in_plane(cpi, x, rate, distortion, skip, sse, ref_best_rd, 0, bs,
818 0 : mi->tx_size, cpi->sf.use_fast_coef_costing);
819 0 : }
820 :
821 0 : static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
822 : int64_t *distortion, int *skip,
823 : int64_t *psse, int64_t ref_best_rd,
824 : BLOCK_SIZE bs) {
825 0 : const TX_SIZE max_tx_size = max_txsize_lookup[bs];
826 0 : VP9_COMMON *const cm = &cpi->common;
827 0 : MACROBLOCKD *const xd = &x->e_mbd;
828 0 : MODE_INFO *const mi = xd->mi[0];
829 0 : vpx_prob skip_prob = vp9_get_skip_prob(cm, xd);
830 : int r[TX_SIZES][2], s[TX_SIZES];
831 : int64_t d[TX_SIZES], sse[TX_SIZES];
832 0 : int64_t rd[TX_SIZES][2] = { { INT64_MAX, INT64_MAX },
833 : { INT64_MAX, INT64_MAX },
834 : { INT64_MAX, INT64_MAX },
835 : { INT64_MAX, INT64_MAX } };
836 : int n, m;
837 : int s0, s1;
838 0 : int64_t best_rd = INT64_MAX;
839 0 : TX_SIZE best_tx = max_tx_size;
840 : int start_tx, end_tx;
841 :
842 0 : const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
843 0 : assert(skip_prob > 0);
844 0 : s0 = vp9_cost_bit(skip_prob, 0);
845 0 : s1 = vp9_cost_bit(skip_prob, 1);
846 :
847 0 : if (cm->tx_mode == TX_MODE_SELECT) {
848 0 : start_tx = max_tx_size;
849 0 : end_tx = 0;
850 : } else {
851 0 : TX_SIZE chosen_tx_size =
852 0 : VPXMIN(max_tx_size, tx_mode_to_biggest_tx_size[cm->tx_mode]);
853 0 : start_tx = chosen_tx_size;
854 0 : end_tx = chosen_tx_size;
855 : }
856 :
857 0 : for (n = start_tx; n >= end_tx; n--) {
858 0 : int r_tx_size = 0;
859 0 : for (m = 0; m <= n - (n == (int)max_tx_size); m++) {
860 0 : if (m == n)
861 0 : r_tx_size += vp9_cost_zero(tx_probs[m]);
862 : else
863 0 : r_tx_size += vp9_cost_one(tx_probs[m]);
864 : }
865 0 : txfm_rd_in_plane(cpi, x, &r[n][0], &d[n], &s[n], &sse[n], ref_best_rd, 0,
866 : bs, n, cpi->sf.use_fast_coef_costing);
867 0 : r[n][1] = r[n][0];
868 0 : if (r[n][0] < INT_MAX) {
869 0 : r[n][1] += r_tx_size;
870 : }
871 0 : if (d[n] == INT64_MAX || r[n][0] == INT_MAX) {
872 0 : rd[n][0] = rd[n][1] = INT64_MAX;
873 0 : } else if (s[n]) {
874 0 : if (is_inter_block(mi)) {
875 0 : rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, sse[n]);
876 0 : r[n][1] -= r_tx_size;
877 : } else {
878 0 : rd[n][0] = RDCOST(x->rdmult, x->rddiv, s1, sse[n]);
879 0 : rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1 + r_tx_size, sse[n]);
880 : }
881 : } else {
882 0 : rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
883 0 : rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
884 : }
885 :
886 0 : if (is_inter_block(mi) && !xd->lossless && !s[n] && sse[n] != INT64_MAX) {
887 0 : rd[n][0] = VPXMIN(rd[n][0], RDCOST(x->rdmult, x->rddiv, s1, sse[n]));
888 0 : rd[n][1] = VPXMIN(rd[n][1], RDCOST(x->rdmult, x->rddiv, s1, sse[n]));
889 : }
890 :
891 : // Early termination in transform size search.
892 0 : if (cpi->sf.tx_size_search_breakout &&
893 0 : (rd[n][1] == INT64_MAX ||
894 0 : (n < (int)max_tx_size && rd[n][1] > rd[n + 1][1]) || s[n] == 1))
895 : break;
896 :
897 0 : if (rd[n][1] < best_rd) {
898 0 : best_tx = n;
899 0 : best_rd = rd[n][1];
900 : }
901 : }
902 0 : mi->tx_size = best_tx;
903 :
904 0 : *distortion = d[mi->tx_size];
905 0 : *rate = r[mi->tx_size][cm->tx_mode == TX_MODE_SELECT];
906 0 : *skip = s[mi->tx_size];
907 0 : *psse = sse[mi->tx_size];
908 0 : }
909 :
910 0 : static void super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
911 : int64_t *distortion, int *skip, int64_t *psse,
912 : BLOCK_SIZE bs, int64_t ref_best_rd) {
913 0 : MACROBLOCKD *xd = &x->e_mbd;
914 : int64_t sse;
915 0 : int64_t *ret_sse = psse ? psse : &sse;
916 :
917 0 : assert(bs == xd->mi[0]->sb_type);
918 :
919 0 : if (cpi->sf.tx_size_search_method == USE_LARGESTALL || xd->lossless) {
920 0 : choose_largest_tx_size(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd,
921 : bs);
922 : } else {
923 0 : choose_tx_size_from_rd(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd,
924 : bs);
925 : }
926 0 : }
927 :
928 0 : static int conditional_skipintra(PREDICTION_MODE mode,
929 : PREDICTION_MODE best_intra_mode) {
930 0 : if (mode == D117_PRED && best_intra_mode != V_PRED &&
931 : best_intra_mode != D135_PRED)
932 0 : return 1;
933 0 : if (mode == D63_PRED && best_intra_mode != V_PRED &&
934 : best_intra_mode != D45_PRED)
935 0 : return 1;
936 0 : if (mode == D207_PRED && best_intra_mode != H_PRED &&
937 : best_intra_mode != D45_PRED)
938 0 : return 1;
939 0 : if (mode == D153_PRED && best_intra_mode != H_PRED &&
940 : best_intra_mode != D135_PRED)
941 0 : return 1;
942 0 : return 0;
943 : }
944 :
945 0 : static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int row,
946 : int col, PREDICTION_MODE *best_mode,
947 : const int *bmode_costs, ENTROPY_CONTEXT *a,
948 : ENTROPY_CONTEXT *l, int *bestrate,
949 : int *bestratey, int64_t *bestdistortion,
950 : BLOCK_SIZE bsize, int64_t rd_thresh) {
951 : PREDICTION_MODE mode;
952 0 : MACROBLOCKD *const xd = &x->e_mbd;
953 0 : int64_t best_rd = rd_thresh;
954 0 : struct macroblock_plane *p = &x->plane[0];
955 0 : struct macroblockd_plane *pd = &xd->plane[0];
956 0 : const int src_stride = p->src.stride;
957 0 : const int dst_stride = pd->dst.stride;
958 0 : const uint8_t *src_init = &p->src.buf[row * 4 * src_stride + col * 4];
959 0 : uint8_t *dst_init = &pd->dst.buf[row * 4 * src_stride + col * 4];
960 : ENTROPY_CONTEXT ta[2], tempa[2];
961 : ENTROPY_CONTEXT tl[2], templ[2];
962 0 : const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
963 0 : const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
964 : int idx, idy;
965 : uint8_t best_dst[8 * 8];
966 : #if CONFIG_VP9_HIGHBITDEPTH
967 : uint16_t best_dst16[8 * 8];
968 : #endif
969 0 : memcpy(ta, a, num_4x4_blocks_wide * sizeof(a[0]));
970 0 : memcpy(tl, l, num_4x4_blocks_high * sizeof(l[0]));
971 :
972 0 : xd->mi[0]->tx_size = TX_4X4;
973 :
974 : #if CONFIG_VP9_HIGHBITDEPTH
975 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
976 : for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
977 : int64_t this_rd;
978 : int ratey = 0;
979 : int64_t distortion = 0;
980 : int rate = bmode_costs[mode];
981 :
982 : if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode))) continue;
983 :
984 : // Only do the oblique modes if the best so far is
985 : // one of the neighboring directional modes
986 : if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
987 : if (conditional_skipintra(mode, *best_mode)) continue;
988 : }
989 :
990 : memcpy(tempa, ta, num_4x4_blocks_wide * sizeof(ta[0]));
991 : memcpy(templ, tl, num_4x4_blocks_high * sizeof(tl[0]));
992 :
993 : for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
994 : for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
995 : const int block = (row + idy) * 2 + (col + idx);
996 : const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
997 : uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
998 : int16_t *const src_diff =
999 : vp9_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
1000 : tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
1001 : xd->mi[0]->bmi[block].as_mode = mode;
1002 : vp9_predict_intra_block(xd, 1, TX_4X4, mode,
1003 : x->skip_encode ? src : dst,
1004 : x->skip_encode ? src_stride : dst_stride, dst,
1005 : dst_stride, col + idx, row + idy, 0);
1006 : vpx_highbd_subtract_block(4, 4, src_diff, 8, src, src_stride, dst,
1007 : dst_stride, xd->bd);
1008 : if (xd->lossless) {
1009 : const scan_order *so = &vp9_default_scan_orders[TX_4X4];
1010 : const int coeff_ctx =
1011 : combine_entropy_contexts(tempa[idx], templ[idy]);
1012 : vp9_highbd_fwht4x4(src_diff, coeff, 8);
1013 : vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
1014 : ratey += cost_coeffs(x, 0, block, TX_4X4, coeff_ctx, so->scan,
1015 : so->neighbors, cpi->sf.use_fast_coef_costing);
1016 : tempa[idx] = templ[idy] = (x->plane[0].eobs[block] > 0 ? 1 : 0);
1017 : if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1018 : goto next_highbd;
1019 : vp9_highbd_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), dst,
1020 : dst_stride, p->eobs[block], xd->bd);
1021 : } else {
1022 : int64_t unused;
1023 : const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
1024 : const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type];
1025 : const int coeff_ctx =
1026 : combine_entropy_contexts(tempa[idx], templ[idy]);
1027 : if (tx_type == DCT_DCT)
1028 : vpx_highbd_fdct4x4(src_diff, coeff, 8);
1029 : else
1030 : vp9_highbd_fht4x4(src_diff, coeff, 8, tx_type);
1031 : vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
1032 : ratey += cost_coeffs(x, 0, block, TX_4X4, coeff_ctx, so->scan,
1033 : so->neighbors, cpi->sf.use_fast_coef_costing);
1034 : distortion += vp9_highbd_block_error_dispatch(
1035 : coeff, BLOCK_OFFSET(pd->dqcoeff, block), 16,
1036 : &unused, xd->bd) >>
1037 : 2;
1038 : tempa[idx] = templ[idy] = (x->plane[0].eobs[block] > 0 ? 1 : 0);
1039 : if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1040 : goto next_highbd;
1041 : vp9_highbd_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block),
1042 : dst, dst_stride, p->eobs[block], xd->bd);
1043 : }
1044 : }
1045 : }
1046 :
1047 : rate += ratey;
1048 : this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
1049 :
1050 : if (this_rd < best_rd) {
1051 : *bestrate = rate;
1052 : *bestratey = ratey;
1053 : *bestdistortion = distortion;
1054 : best_rd = this_rd;
1055 : *best_mode = mode;
1056 : memcpy(a, tempa, num_4x4_blocks_wide * sizeof(tempa[0]));
1057 : memcpy(l, templ, num_4x4_blocks_high * sizeof(templ[0]));
1058 : for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
1059 : memcpy(best_dst16 + idy * 8,
1060 : CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
1061 : num_4x4_blocks_wide * 4 * sizeof(uint16_t));
1062 : }
1063 : }
1064 : next_highbd : {}
1065 : }
1066 : if (best_rd >= rd_thresh || x->skip_encode) return best_rd;
1067 :
1068 : for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
1069 : memcpy(CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
1070 : best_dst16 + idy * 8, num_4x4_blocks_wide * 4 * sizeof(uint16_t));
1071 : }
1072 :
1073 : return best_rd;
1074 : }
1075 : #endif // CONFIG_VP9_HIGHBITDEPTH
1076 :
1077 0 : for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
1078 : int64_t this_rd;
1079 0 : int ratey = 0;
1080 0 : int64_t distortion = 0;
1081 0 : int rate = bmode_costs[mode];
1082 :
1083 0 : if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode))) continue;
1084 :
1085 : // Only do the oblique modes if the best so far is
1086 : // one of the neighboring directional modes
1087 0 : if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
1088 0 : if (conditional_skipintra(mode, *best_mode)) continue;
1089 : }
1090 :
1091 0 : memcpy(tempa, ta, num_4x4_blocks_wide * sizeof(ta[0]));
1092 0 : memcpy(templ, tl, num_4x4_blocks_high * sizeof(tl[0]));
1093 :
1094 0 : for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
1095 0 : for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
1096 0 : const int block = (row + idy) * 2 + (col + idx);
1097 0 : const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
1098 0 : uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
1099 0 : int16_t *const src_diff =
1100 0 : vp9_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
1101 0 : tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
1102 0 : xd->mi[0]->bmi[block].as_mode = mode;
1103 0 : vp9_predict_intra_block(xd, 1, TX_4X4, mode, x->skip_encode ? src : dst,
1104 0 : x->skip_encode ? src_stride : dst_stride, dst,
1105 : dst_stride, col + idx, row + idy, 0);
1106 0 : vpx_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride);
1107 :
1108 0 : if (xd->lossless) {
1109 0 : const scan_order *so = &vp9_default_scan_orders[TX_4X4];
1110 0 : const int coeff_ctx =
1111 0 : combine_entropy_contexts(tempa[idx], templ[idy]);
1112 0 : vp9_fwht4x4(src_diff, coeff, 8);
1113 0 : vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
1114 0 : ratey += cost_coeffs(x, 0, block, TX_4X4, coeff_ctx, so->scan,
1115 : so->neighbors, cpi->sf.use_fast_coef_costing);
1116 0 : tempa[idx] = templ[idy] = (x->plane[0].eobs[block] > 0) ? 1 : 0;
1117 0 : if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1118 0 : goto next;
1119 0 : vp9_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), dst, dst_stride,
1120 0 : p->eobs[block]);
1121 : } else {
1122 : int64_t unused;
1123 0 : const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
1124 0 : const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type];
1125 0 : const int coeff_ctx =
1126 0 : combine_entropy_contexts(tempa[idx], templ[idy]);
1127 0 : vp9_fht4x4(src_diff, coeff, 8, tx_type);
1128 0 : vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
1129 0 : ratey += cost_coeffs(x, 0, block, TX_4X4, coeff_ctx, so->scan,
1130 : so->neighbors, cpi->sf.use_fast_coef_costing);
1131 0 : tempa[idx] = templ[idy] = (x->plane[0].eobs[block] > 0) ? 1 : 0;
1132 : #if CONFIG_VP9_HIGHBITDEPTH
1133 : distortion +=
1134 : vp9_highbd_block_error_8bit(
1135 : coeff, BLOCK_OFFSET(pd->dqcoeff, block), 16, &unused) >>
1136 : 2;
1137 : #else
1138 0 : distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
1139 0 : 16, &unused) >>
1140 : 2;
1141 : #endif
1142 0 : if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1143 0 : goto next;
1144 0 : vp9_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block), dst,
1145 0 : dst_stride, p->eobs[block]);
1146 : }
1147 : }
1148 : }
1149 :
1150 0 : rate += ratey;
1151 0 : this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
1152 :
1153 0 : if (this_rd < best_rd) {
1154 0 : *bestrate = rate;
1155 0 : *bestratey = ratey;
1156 0 : *bestdistortion = distortion;
1157 0 : best_rd = this_rd;
1158 0 : *best_mode = mode;
1159 0 : memcpy(a, tempa, num_4x4_blocks_wide * sizeof(tempa[0]));
1160 0 : memcpy(l, templ, num_4x4_blocks_high * sizeof(templ[0]));
1161 0 : for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1162 0 : memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
1163 0 : num_4x4_blocks_wide * 4);
1164 : }
1165 : next : {}
1166 : }
1167 :
1168 0 : if (best_rd >= rd_thresh || x->skip_encode) return best_rd;
1169 :
1170 0 : for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1171 0 : memcpy(dst_init + idy * dst_stride, best_dst + idy * 8,
1172 0 : num_4x4_blocks_wide * 4);
1173 :
1174 0 : return best_rd;
1175 : }
1176 :
1177 0 : static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb,
1178 : int *rate, int *rate_y,
1179 : int64_t *distortion,
1180 : int64_t best_rd) {
1181 : int i, j;
1182 0 : const MACROBLOCKD *const xd = &mb->e_mbd;
1183 0 : MODE_INFO *const mic = xd->mi[0];
1184 0 : const MODE_INFO *above_mi = xd->above_mi;
1185 0 : const MODE_INFO *left_mi = xd->left_mi;
1186 0 : const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
1187 0 : const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1188 0 : const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1189 : int idx, idy;
1190 0 : int cost = 0;
1191 0 : int64_t total_distortion = 0;
1192 0 : int tot_rate_y = 0;
1193 0 : int64_t total_rd = 0;
1194 0 : const int *bmode_costs = cpi->mbmode_cost;
1195 :
1196 : // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
1197 0 : for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
1198 0 : for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
1199 0 : PREDICTION_MODE best_mode = DC_PRED;
1200 0 : int r = INT_MAX, ry = INT_MAX;
1201 0 : int64_t d = INT64_MAX, this_rd = INT64_MAX;
1202 0 : i = idy * 2 + idx;
1203 0 : if (cpi->common.frame_type == KEY_FRAME) {
1204 0 : const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, i);
1205 0 : const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, i);
1206 :
1207 0 : bmode_costs = cpi->y_mode_costs[A][L];
1208 : }
1209 :
1210 0 : this_rd = rd_pick_intra4x4block(
1211 : cpi, mb, idy, idx, &best_mode, bmode_costs,
1212 0 : xd->plane[0].above_context + idx, xd->plane[0].left_context + idy, &r,
1213 : &ry, &d, bsize, best_rd - total_rd);
1214 :
1215 0 : if (this_rd >= best_rd - total_rd) return INT64_MAX;
1216 :
1217 0 : total_rd += this_rd;
1218 0 : cost += r;
1219 0 : total_distortion += d;
1220 0 : tot_rate_y += ry;
1221 :
1222 0 : mic->bmi[i].as_mode = best_mode;
1223 0 : for (j = 1; j < num_4x4_blocks_high; ++j)
1224 0 : mic->bmi[i + j * 2].as_mode = best_mode;
1225 0 : for (j = 1; j < num_4x4_blocks_wide; ++j)
1226 0 : mic->bmi[i + j].as_mode = best_mode;
1227 :
1228 0 : if (total_rd >= best_rd) return INT64_MAX;
1229 : }
1230 : }
1231 :
1232 0 : *rate = cost;
1233 0 : *rate_y = tot_rate_y;
1234 0 : *distortion = total_distortion;
1235 0 : mic->mode = mic->bmi[3].as_mode;
1236 :
1237 0 : return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
1238 : }
1239 :
1240 : // This function is used only for intra_only frames
1241 0 : static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
1242 : int *rate_tokenonly, int64_t *distortion,
1243 : int *skippable, BLOCK_SIZE bsize,
1244 : int64_t best_rd) {
1245 : PREDICTION_MODE mode;
1246 0 : PREDICTION_MODE mode_selected = DC_PRED;
1247 0 : MACROBLOCKD *const xd = &x->e_mbd;
1248 0 : MODE_INFO *const mic = xd->mi[0];
1249 : int this_rate, this_rate_tokenonly, s;
1250 : int64_t this_distortion, this_rd;
1251 0 : TX_SIZE best_tx = TX_4X4;
1252 : int *bmode_costs;
1253 0 : const MODE_INFO *above_mi = xd->above_mi;
1254 0 : const MODE_INFO *left_mi = xd->left_mi;
1255 0 : const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0);
1256 0 : const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0);
1257 0 : bmode_costs = cpi->y_mode_costs[A][L];
1258 :
1259 0 : memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
1260 : /* Y Search for intra prediction mode */
1261 0 : for (mode = DC_PRED; mode <= TM_PRED; mode++) {
1262 0 : if (cpi->sf.use_nonrd_pick_mode) {
1263 : // These speed features are turned on in hybrid non-RD and RD mode
1264 : // for key frame coding in the context of real-time setting.
1265 0 : if (conditional_skipintra(mode, mode_selected)) continue;
1266 0 : if (*skippable) break;
1267 : }
1268 :
1269 0 : mic->mode = mode;
1270 :
1271 0 : super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, NULL,
1272 : bsize, best_rd);
1273 :
1274 0 : if (this_rate_tokenonly == INT_MAX) continue;
1275 :
1276 0 : this_rate = this_rate_tokenonly + bmode_costs[mode];
1277 0 : this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1278 :
1279 0 : if (this_rd < best_rd) {
1280 0 : mode_selected = mode;
1281 0 : best_rd = this_rd;
1282 0 : best_tx = mic->tx_size;
1283 0 : *rate = this_rate;
1284 0 : *rate_tokenonly = this_rate_tokenonly;
1285 0 : *distortion = this_distortion;
1286 0 : *skippable = s;
1287 : }
1288 : }
1289 :
1290 0 : mic->mode = mode_selected;
1291 0 : mic->tx_size = best_tx;
1292 :
1293 0 : return best_rd;
1294 : }
1295 :
1296 : // Return value 0: early termination triggered, no valid rd cost available;
1297 : // 1: rd cost values are valid.
1298 0 : static int super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x, int *rate,
1299 : int64_t *distortion, int *skippable, int64_t *sse,
1300 : BLOCK_SIZE bsize, int64_t ref_best_rd) {
1301 0 : MACROBLOCKD *const xd = &x->e_mbd;
1302 0 : MODE_INFO *const mi = xd->mi[0];
1303 0 : const TX_SIZE uv_tx_size = get_uv_tx_size(mi, &xd->plane[1]);
1304 : int plane;
1305 0 : int pnrate = 0, pnskip = 1;
1306 0 : int64_t pndist = 0, pnsse = 0;
1307 0 : int is_cost_valid = 1;
1308 :
1309 0 : if (ref_best_rd < 0) is_cost_valid = 0;
1310 :
1311 0 : if (is_inter_block(mi) && is_cost_valid) {
1312 : int plane;
1313 0 : for (plane = 1; plane < MAX_MB_PLANE; ++plane)
1314 0 : vp9_subtract_plane(x, bsize, plane);
1315 : }
1316 :
1317 0 : *rate = 0;
1318 0 : *distortion = 0;
1319 0 : *sse = 0;
1320 0 : *skippable = 1;
1321 :
1322 0 : for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
1323 0 : txfm_rd_in_plane(cpi, x, &pnrate, &pndist, &pnskip, &pnsse, ref_best_rd,
1324 : plane, bsize, uv_tx_size, cpi->sf.use_fast_coef_costing);
1325 0 : if (pnrate == INT_MAX) {
1326 0 : is_cost_valid = 0;
1327 0 : break;
1328 : }
1329 0 : *rate += pnrate;
1330 0 : *distortion += pndist;
1331 0 : *sse += pnsse;
1332 0 : *skippable &= pnskip;
1333 : }
1334 :
1335 0 : if (!is_cost_valid) {
1336 : // reset cost value
1337 0 : *rate = INT_MAX;
1338 0 : *distortion = INT64_MAX;
1339 0 : *sse = INT64_MAX;
1340 0 : *skippable = 0;
1341 : }
1342 :
1343 0 : return is_cost_valid;
1344 : }
1345 :
1346 0 : static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
1347 : PICK_MODE_CONTEXT *ctx, int *rate,
1348 : int *rate_tokenonly, int64_t *distortion,
1349 : int *skippable, BLOCK_SIZE bsize,
1350 : TX_SIZE max_tx_size) {
1351 0 : MACROBLOCKD *xd = &x->e_mbd;
1352 : PREDICTION_MODE mode;
1353 0 : PREDICTION_MODE mode_selected = DC_PRED;
1354 0 : int64_t best_rd = INT64_MAX, this_rd;
1355 : int this_rate_tokenonly, this_rate, s;
1356 : int64_t this_distortion, this_sse;
1357 :
1358 0 : memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
1359 0 : for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
1360 0 : if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode))) continue;
1361 : #if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
1362 : if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) &&
1363 : (xd->above_mi == NULL || xd->left_mi == NULL) && need_top_left[mode])
1364 : continue;
1365 : #endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
1366 :
1367 0 : xd->mi[0]->uv_mode = mode;
1368 :
1369 0 : if (!super_block_uvrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s,
1370 : &this_sse, bsize, best_rd))
1371 0 : continue;
1372 0 : this_rate =
1373 0 : this_rate_tokenonly +
1374 0 : cpi->intra_uv_mode_cost[cpi->common.frame_type][xd->mi[0]->mode][mode];
1375 0 : this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1376 :
1377 0 : if (this_rd < best_rd) {
1378 0 : mode_selected = mode;
1379 0 : best_rd = this_rd;
1380 0 : *rate = this_rate;
1381 0 : *rate_tokenonly = this_rate_tokenonly;
1382 0 : *distortion = this_distortion;
1383 0 : *skippable = s;
1384 0 : if (!x->select_tx_size) swap_block_ptr(x, ctx, 2, 0, 1, MAX_MB_PLANE);
1385 : }
1386 : }
1387 :
1388 0 : xd->mi[0]->uv_mode = mode_selected;
1389 0 : return best_rd;
1390 : }
1391 :
1392 0 : static int64_t rd_sbuv_dcpred(const VP9_COMP *cpi, MACROBLOCK *x, int *rate,
1393 : int *rate_tokenonly, int64_t *distortion,
1394 : int *skippable, BLOCK_SIZE bsize) {
1395 0 : const VP9_COMMON *cm = &cpi->common;
1396 : int64_t unused;
1397 :
1398 0 : x->e_mbd.mi[0]->uv_mode = DC_PRED;
1399 0 : memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
1400 0 : super_block_uvrd(cpi, x, rate_tokenonly, distortion, skippable, &unused,
1401 : bsize, INT64_MAX);
1402 0 : *rate =
1403 0 : *rate_tokenonly +
1404 0 : cpi->intra_uv_mode_cost[cm->frame_type][x->e_mbd.mi[0]->mode][DC_PRED];
1405 0 : return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
1406 : }
1407 :
1408 0 : static void choose_intra_uv_mode(VP9_COMP *cpi, MACROBLOCK *const x,
1409 : PICK_MODE_CONTEXT *ctx, BLOCK_SIZE bsize,
1410 : TX_SIZE max_tx_size, int *rate_uv,
1411 : int *rate_uv_tokenonly, int64_t *dist_uv,
1412 : int *skip_uv, PREDICTION_MODE *mode_uv) {
1413 : // Use an estimated rd for uv_intra based on DC_PRED if the
1414 : // appropriate speed flag is set.
1415 0 : if (cpi->sf.use_uv_intra_rd_estimate) {
1416 0 : rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
1417 : bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
1418 : // Else do a proper rd search for each possible transform size that may
1419 : // be considered in the main rd loop.
1420 : } else {
1421 0 : rd_pick_intra_sbuv_mode(cpi, x, ctx, rate_uv, rate_uv_tokenonly, dist_uv,
1422 : skip_uv, bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize,
1423 : max_tx_size);
1424 : }
1425 0 : *mode_uv = x->e_mbd.mi[0]->uv_mode;
1426 0 : }
1427 :
1428 0 : static int cost_mv_ref(const VP9_COMP *cpi, PREDICTION_MODE mode,
1429 : int mode_context) {
1430 0 : assert(is_inter_mode(mode));
1431 0 : return cpi->inter_mode_cost[mode_context][INTER_OFFSET(mode)];
1432 : }
1433 :
1434 0 : static int set_and_cost_bmi_mvs(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
1435 : int i, PREDICTION_MODE mode, int_mv this_mv[2],
1436 : int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
1437 : int_mv seg_mvs[MAX_REF_FRAMES],
1438 : int_mv *best_ref_mv[2], const int *mvjcost,
1439 : int *mvcost[2]) {
1440 0 : MODE_INFO *const mi = xd->mi[0];
1441 0 : const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
1442 0 : int thismvcost = 0;
1443 : int idx, idy;
1444 0 : const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mi->sb_type];
1445 0 : const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mi->sb_type];
1446 0 : const int is_compound = has_second_ref(mi);
1447 :
1448 0 : switch (mode) {
1449 : case NEWMV:
1450 0 : this_mv[0].as_int = seg_mvs[mi->ref_frame[0]].as_int;
1451 0 : thismvcost += vp9_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv,
1452 : mvjcost, mvcost, MV_COST_WEIGHT_SUB);
1453 0 : if (is_compound) {
1454 0 : this_mv[1].as_int = seg_mvs[mi->ref_frame[1]].as_int;
1455 0 : thismvcost += vp9_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv,
1456 : mvjcost, mvcost, MV_COST_WEIGHT_SUB);
1457 : }
1458 0 : break;
1459 : case NEARMV:
1460 : case NEARESTMV:
1461 0 : this_mv[0].as_int = frame_mv[mode][mi->ref_frame[0]].as_int;
1462 0 : if (is_compound)
1463 0 : this_mv[1].as_int = frame_mv[mode][mi->ref_frame[1]].as_int;
1464 0 : break;
1465 : case ZEROMV:
1466 0 : this_mv[0].as_int = 0;
1467 0 : if (is_compound) this_mv[1].as_int = 0;
1468 0 : break;
1469 0 : default: break;
1470 : }
1471 :
1472 0 : mi->bmi[i].as_mv[0].as_int = this_mv[0].as_int;
1473 0 : if (is_compound) mi->bmi[i].as_mv[1].as_int = this_mv[1].as_int;
1474 :
1475 0 : mi->bmi[i].as_mode = mode;
1476 :
1477 0 : for (idy = 0; idy < num_4x4_blocks_high; ++idy)
1478 0 : for (idx = 0; idx < num_4x4_blocks_wide; ++idx)
1479 0 : memmove(&mi->bmi[i + idy * 2 + idx], &mi->bmi[i], sizeof(mi->bmi[i]));
1480 :
1481 0 : return cost_mv_ref(cpi, mode, mbmi_ext->mode_context[mi->ref_frame[0]]) +
1482 : thismvcost;
1483 : }
1484 :
1485 0 : static int64_t encode_inter_mb_segment(VP9_COMP *cpi, MACROBLOCK *x,
1486 : int64_t best_yrd, int i, int *labelyrate,
1487 : int64_t *distortion, int64_t *sse,
1488 : ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl,
1489 : int mi_row, int mi_col) {
1490 : int k;
1491 0 : MACROBLOCKD *xd = &x->e_mbd;
1492 0 : struct macroblockd_plane *const pd = &xd->plane[0];
1493 0 : struct macroblock_plane *const p = &x->plane[0];
1494 0 : MODE_INFO *const mi = xd->mi[0];
1495 0 : const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->sb_type, pd);
1496 0 : const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
1497 0 : const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize];
1498 : int idx, idy;
1499 :
1500 0 : const uint8_t *const src =
1501 0 : &p->src.buf[vp9_raster_block_offset(BLOCK_8X8, i, p->src.stride)];
1502 0 : uint8_t *const dst =
1503 0 : &pd->dst.buf[vp9_raster_block_offset(BLOCK_8X8, i, pd->dst.stride)];
1504 0 : int64_t thisdistortion = 0, thissse = 0;
1505 0 : int thisrate = 0, ref;
1506 0 : const scan_order *so = &vp9_default_scan_orders[TX_4X4];
1507 0 : const int is_compound = has_second_ref(mi);
1508 0 : const InterpKernel *kernel = vp9_filter_kernels[mi->interp_filter];
1509 :
1510 0 : for (ref = 0; ref < 1 + is_compound; ++ref) {
1511 0 : const int bw = b_width_log2_lookup[BLOCK_8X8];
1512 0 : const int h = 4 * (i >> bw);
1513 0 : const int w = 4 * (i & ((1 << bw) - 1));
1514 0 : const struct scale_factors *sf = &xd->block_refs[ref]->sf;
1515 0 : int y_stride = pd->pre[ref].stride;
1516 0 : uint8_t *pre = pd->pre[ref].buf + (h * pd->pre[ref].stride + w);
1517 :
1518 0 : if (vp9_is_scaled(sf)) {
1519 0 : const int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x));
1520 0 : const int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y));
1521 :
1522 0 : y_stride = xd->block_refs[ref]->buf->y_stride;
1523 0 : pre = xd->block_refs[ref]->buf->y_buffer;
1524 0 : pre += scaled_buffer_offset(x_start + w, y_start + h, y_stride, sf);
1525 : }
1526 : #if CONFIG_VP9_HIGHBITDEPTH
1527 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1528 : vp9_highbd_build_inter_predictor(
1529 : pre, y_stride, dst, pd->dst.stride, &mi->bmi[i].as_mv[ref].as_mv,
1530 : &xd->block_refs[ref]->sf, width, height, ref, kernel, MV_PRECISION_Q3,
1531 : mi_col * MI_SIZE + 4 * (i % 2), mi_row * MI_SIZE + 4 * (i / 2),
1532 : xd->bd);
1533 : } else {
1534 : vp9_build_inter_predictor(
1535 : pre, y_stride, dst, pd->dst.stride, &mi->bmi[i].as_mv[ref].as_mv,
1536 : &xd->block_refs[ref]->sf, width, height, ref, kernel, MV_PRECISION_Q3,
1537 : mi_col * MI_SIZE + 4 * (i % 2), mi_row * MI_SIZE + 4 * (i / 2));
1538 : }
1539 : #else
1540 0 : vp9_build_inter_predictor(
1541 0 : pre, y_stride, dst, pd->dst.stride, &mi->bmi[i].as_mv[ref].as_mv,
1542 0 : &xd->block_refs[ref]->sf, width, height, ref, kernel, MV_PRECISION_Q3,
1543 0 : mi_col * MI_SIZE + 4 * (i % 2), mi_row * MI_SIZE + 4 * (i / 2));
1544 : #endif // CONFIG_VP9_HIGHBITDEPTH
1545 : }
1546 :
1547 : #if CONFIG_VP9_HIGHBITDEPTH
1548 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1549 : vpx_highbd_subtract_block(
1550 : height, width, vp9_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
1551 : 8, src, p->src.stride, dst, pd->dst.stride, xd->bd);
1552 : } else {
1553 : vpx_subtract_block(height, width,
1554 : vp9_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
1555 : 8, src, p->src.stride, dst, pd->dst.stride);
1556 : }
1557 : #else
1558 0 : vpx_subtract_block(height, width,
1559 0 : vp9_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
1560 0 : 8, src, p->src.stride, dst, pd->dst.stride);
1561 : #endif // CONFIG_VP9_HIGHBITDEPTH
1562 :
1563 0 : k = i;
1564 0 : for (idy = 0; idy < height / 4; ++idy) {
1565 0 : for (idx = 0; idx < width / 4; ++idx) {
1566 : #if CONFIG_VP9_HIGHBITDEPTH
1567 : const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8;
1568 : #endif
1569 : int64_t ssz, rd, rd1, rd2;
1570 : tran_low_t *coeff;
1571 : int coeff_ctx;
1572 0 : k += (idy * 2 + idx);
1573 0 : coeff_ctx = combine_entropy_contexts(ta[k & 1], tl[k >> 1]);
1574 0 : coeff = BLOCK_OFFSET(p->coeff, k);
1575 0 : x->fwd_txm4x4(vp9_raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
1576 : coeff, 8);
1577 0 : vp9_regular_quantize_b_4x4(x, 0, k, so->scan, so->iscan);
1578 : #if CONFIG_VP9_HIGHBITDEPTH
1579 : thisdistortion += vp9_highbd_block_error_dispatch(
1580 : coeff, BLOCK_OFFSET(pd->dqcoeff, k), 16, &ssz, bd);
1581 : #else
1582 0 : thisdistortion +=
1583 0 : vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k), 16, &ssz);
1584 : #endif // CONFIG_VP9_HIGHBITDEPTH
1585 0 : thissse += ssz;
1586 0 : thisrate += cost_coeffs(x, 0, k, TX_4X4, coeff_ctx, so->scan,
1587 : so->neighbors, cpi->sf.use_fast_coef_costing);
1588 0 : ta[k & 1] = tl[k >> 1] = (x->plane[0].eobs[k] > 0) ? 1 : 0;
1589 0 : rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2);
1590 0 : rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2);
1591 0 : rd = VPXMIN(rd1, rd2);
1592 0 : if (rd >= best_yrd) return INT64_MAX;
1593 : }
1594 : }
1595 :
1596 0 : *distortion = thisdistortion >> 2;
1597 0 : *labelyrate = thisrate;
1598 0 : *sse = thissse >> 2;
1599 :
1600 0 : return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
1601 : }
1602 :
1603 : typedef struct {
1604 : int eobs;
1605 : int brate;
1606 : int byrate;
1607 : int64_t bdist;
1608 : int64_t bsse;
1609 : int64_t brdcost;
1610 : int_mv mvs[2];
1611 : ENTROPY_CONTEXT ta[2];
1612 : ENTROPY_CONTEXT tl[2];
1613 : } SEG_RDSTAT;
1614 :
1615 : typedef struct {
1616 : int_mv *ref_mv[2];
1617 : int_mv mvp;
1618 :
1619 : int64_t segment_rd;
1620 : int r;
1621 : int64_t d;
1622 : int64_t sse;
1623 : int segment_yrate;
1624 : PREDICTION_MODE modes[4];
1625 : SEG_RDSTAT rdstat[4][INTER_MODES];
1626 : int mvthresh;
1627 : } BEST_SEG_INFO;
1628 :
1629 0 : static INLINE int mv_check_bounds(const MvLimits *mv_limits, const MV *mv) {
1630 0 : return (mv->row >> 3) < mv_limits->row_min ||
1631 0 : (mv->row >> 3) > mv_limits->row_max ||
1632 0 : (mv->col >> 3) < mv_limits->col_min ||
1633 0 : (mv->col >> 3) > mv_limits->col_max;
1634 : }
1635 :
1636 0 : static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
1637 0 : MODE_INFO *const mi = x->e_mbd.mi[0];
1638 0 : struct macroblock_plane *const p = &x->plane[0];
1639 0 : struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
1640 :
1641 0 : p->src.buf =
1642 0 : &p->src.buf[vp9_raster_block_offset(BLOCK_8X8, i, p->src.stride)];
1643 0 : assert(((intptr_t)pd->pre[0].buf & 0x7) == 0);
1644 0 : pd->pre[0].buf =
1645 0 : &pd->pre[0].buf[vp9_raster_block_offset(BLOCK_8X8, i, pd->pre[0].stride)];
1646 0 : if (has_second_ref(mi))
1647 0 : pd->pre[1].buf =
1648 0 : &pd->pre[1]
1649 0 : .buf[vp9_raster_block_offset(BLOCK_8X8, i, pd->pre[1].stride)];
1650 0 : }
1651 :
1652 0 : static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,
1653 : struct buf_2d orig_pre[2]) {
1654 0 : MODE_INFO *mi = x->e_mbd.mi[0];
1655 0 : x->plane[0].src = orig_src;
1656 0 : x->e_mbd.plane[0].pre[0] = orig_pre[0];
1657 0 : if (has_second_ref(mi)) x->e_mbd.plane[0].pre[1] = orig_pre[1];
1658 0 : }
1659 :
1660 0 : static INLINE int mv_has_subpel(const MV *mv) {
1661 0 : return (mv->row & 0x0F) || (mv->col & 0x0F);
1662 : }
1663 :
1664 : // Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion.
1665 : // TODO(aconverse): Find out if this is still productive then clean up or remove
1666 0 : static int check_best_zero_mv(const VP9_COMP *cpi,
1667 : const uint8_t mode_context[MAX_REF_FRAMES],
1668 : int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
1669 : int this_mode,
1670 : const MV_REFERENCE_FRAME ref_frames[2]) {
1671 0 : if ((this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
1672 0 : frame_mv[this_mode][ref_frames[0]].as_int == 0 &&
1673 0 : (ref_frames[1] == NONE ||
1674 0 : frame_mv[this_mode][ref_frames[1]].as_int == 0)) {
1675 0 : int rfc = mode_context[ref_frames[0]];
1676 0 : int c1 = cost_mv_ref(cpi, NEARMV, rfc);
1677 0 : int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
1678 0 : int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
1679 :
1680 0 : if (this_mode == NEARMV) {
1681 0 : if (c1 > c3) return 0;
1682 0 : } else if (this_mode == NEARESTMV) {
1683 0 : if (c2 > c3) return 0;
1684 : } else {
1685 0 : assert(this_mode == ZEROMV);
1686 0 : if (ref_frames[1] == NONE) {
1687 0 : if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0) ||
1688 0 : (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0))
1689 0 : return 0;
1690 : } else {
1691 0 : if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0 &&
1692 0 : frame_mv[NEARESTMV][ref_frames[1]].as_int == 0) ||
1693 0 : (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0 &&
1694 0 : frame_mv[NEARMV][ref_frames[1]].as_int == 0))
1695 0 : return 0;
1696 : }
1697 : }
1698 : }
1699 0 : return 1;
1700 : }
1701 :
1702 0 : static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
1703 : int_mv *frame_mv, int mi_row, int mi_col,
1704 : int_mv single_newmv[MAX_REF_FRAMES],
1705 : int *rate_mv) {
1706 0 : const VP9_COMMON *const cm = &cpi->common;
1707 0 : const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
1708 0 : const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
1709 0 : MACROBLOCKD *xd = &x->e_mbd;
1710 0 : MODE_INFO *mi = xd->mi[0];
1711 0 : const int refs[2] = { mi->ref_frame[0],
1712 0 : mi->ref_frame[1] < 0 ? 0 : mi->ref_frame[1] };
1713 : int_mv ref_mv[2];
1714 : int ite, ref;
1715 0 : const InterpKernel *kernel = vp9_filter_kernels[mi->interp_filter];
1716 : struct scale_factors sf;
1717 :
1718 : // Do joint motion search in compound mode to get more accurate mv.
1719 : struct buf_2d backup_yv12[2][MAX_MB_PLANE];
1720 0 : uint32_t last_besterr[2] = { UINT_MAX, UINT_MAX };
1721 0 : const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = {
1722 0 : vp9_get_scaled_ref_frame(cpi, mi->ref_frame[0]),
1723 0 : vp9_get_scaled_ref_frame(cpi, mi->ref_frame[1])
1724 : };
1725 :
1726 : // Prediction buffer from second frame.
1727 : #if CONFIG_VP9_HIGHBITDEPTH
1728 : DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[64 * 64]);
1729 : uint8_t *second_pred;
1730 : #else
1731 : DECLARE_ALIGNED(16, uint8_t, second_pred[64 * 64]);
1732 : #endif // CONFIG_VP9_HIGHBITDEPTH
1733 :
1734 0 : for (ref = 0; ref < 2; ++ref) {
1735 0 : ref_mv[ref] = x->mbmi_ext->ref_mvs[refs[ref]][0];
1736 :
1737 0 : if (scaled_ref_frame[ref]) {
1738 : int i;
1739 : // Swap out the reference frame for a version that's been scaled to
1740 : // match the resolution of the current frame, allowing the existing
1741 : // motion search code to be used without additional modifications.
1742 0 : for (i = 0; i < MAX_MB_PLANE; i++)
1743 0 : backup_yv12[ref][i] = xd->plane[i].pre[ref];
1744 0 : vp9_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col,
1745 : NULL);
1746 : }
1747 :
1748 0 : frame_mv[refs[ref]].as_int = single_newmv[refs[ref]].as_int;
1749 : }
1750 :
1751 : // Since we have scaled the reference frames to match the size of the current
1752 : // frame we must use a unit scaling factor during mode selection.
1753 : #if CONFIG_VP9_HIGHBITDEPTH
1754 : vp9_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width,
1755 : cm->height, cm->use_highbitdepth);
1756 : #else
1757 0 : vp9_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width,
1758 : cm->height);
1759 : #endif // CONFIG_VP9_HIGHBITDEPTH
1760 :
1761 : // Allow joint search multiple times iteratively for each reference frame
1762 : // and break out of the search loop if it couldn't find a better mv.
1763 0 : for (ite = 0; ite < 4; ite++) {
1764 : struct buf_2d ref_yv12[2];
1765 0 : uint32_t bestsme = UINT_MAX;
1766 0 : int sadpb = x->sadperbit16;
1767 : MV tmp_mv;
1768 0 : int search_range = 3;
1769 :
1770 0 : const MvLimits tmp_mv_limits = x->mv_limits;
1771 0 : int id = ite % 2; // Even iterations search in the first reference frame,
1772 : // odd iterations search in the second. The predictor
1773 : // found for the 'other' reference frame is factored in.
1774 :
1775 : // Initialized here because of compiler problem in Visual Studio.
1776 0 : ref_yv12[0] = xd->plane[0].pre[0];
1777 0 : ref_yv12[1] = xd->plane[0].pre[1];
1778 :
1779 : // Get the prediction block from the 'other' reference frame.
1780 : #if CONFIG_VP9_HIGHBITDEPTH
1781 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1782 : second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16);
1783 : vp9_highbd_build_inter_predictor(
1784 : ref_yv12[!id].buf, ref_yv12[!id].stride, second_pred, pw,
1785 : &frame_mv[refs[!id]].as_mv, &sf, pw, ph, 0, kernel, MV_PRECISION_Q3,
1786 : mi_col * MI_SIZE, mi_row * MI_SIZE, xd->bd);
1787 : } else {
1788 : second_pred = (uint8_t *)second_pred_alloc_16;
1789 : vp9_build_inter_predictor(ref_yv12[!id].buf, ref_yv12[!id].stride,
1790 : second_pred, pw, &frame_mv[refs[!id]].as_mv,
1791 : &sf, pw, ph, 0, kernel, MV_PRECISION_Q3,
1792 : mi_col * MI_SIZE, mi_row * MI_SIZE);
1793 : }
1794 : #else
1795 0 : vp9_build_inter_predictor(ref_yv12[!id].buf, ref_yv12[!id].stride,
1796 0 : second_pred, pw, &frame_mv[refs[!id]].as_mv, &sf,
1797 : pw, ph, 0, kernel, MV_PRECISION_Q3,
1798 : mi_col * MI_SIZE, mi_row * MI_SIZE);
1799 : #endif // CONFIG_VP9_HIGHBITDEPTH
1800 :
1801 : // Do compound motion search on the current reference frame.
1802 0 : if (id) xd->plane[0].pre[0] = ref_yv12[id];
1803 0 : vp9_set_mv_search_range(&x->mv_limits, &ref_mv[id].as_mv);
1804 :
1805 : // Use the mv result from the single mode as mv predictor.
1806 0 : tmp_mv = frame_mv[refs[id]].as_mv;
1807 :
1808 0 : tmp_mv.col >>= 3;
1809 0 : tmp_mv.row >>= 3;
1810 :
1811 : // Small-range full-pixel motion search.
1812 0 : bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb, search_range,
1813 0 : &cpi->fn_ptr[bsize], &ref_mv[id].as_mv,
1814 : second_pred);
1815 0 : if (bestsme < UINT_MAX)
1816 0 : bestsme = vp9_get_mvpred_av_var(x, &tmp_mv, &ref_mv[id].as_mv,
1817 0 : second_pred, &cpi->fn_ptr[bsize], 1);
1818 :
1819 0 : x->mv_limits = tmp_mv_limits;
1820 :
1821 0 : if (bestsme < UINT_MAX) {
1822 : uint32_t dis; /* TODO: use dis in distortion calculation later. */
1823 : uint32_t sse;
1824 0 : bestsme = cpi->find_fractional_mv_step(
1825 0 : x, &tmp_mv, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv,
1826 0 : x->errorperbit, &cpi->fn_ptr[bsize], 0,
1827 0 : cpi->sf.mv.subpel_iters_per_step, NULL, x->nmvjointcost, x->mvcost,
1828 : &dis, &sse, second_pred, pw, ph);
1829 : }
1830 :
1831 : // Restore the pointer to the first (possibly scaled) prediction buffer.
1832 0 : if (id) xd->plane[0].pre[0] = ref_yv12[0];
1833 :
1834 0 : if (bestsme < last_besterr[id]) {
1835 0 : frame_mv[refs[id]].as_mv = tmp_mv;
1836 0 : last_besterr[id] = bestsme;
1837 : } else {
1838 0 : break;
1839 : }
1840 : }
1841 :
1842 0 : *rate_mv = 0;
1843 :
1844 0 : for (ref = 0; ref < 2; ++ref) {
1845 0 : if (scaled_ref_frame[ref]) {
1846 : // Restore the prediction frame pointers to their unscaled versions.
1847 : int i;
1848 0 : for (i = 0; i < MAX_MB_PLANE; i++)
1849 0 : xd->plane[i].pre[ref] = backup_yv12[ref][i];
1850 : }
1851 :
1852 0 : *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
1853 0 : &x->mbmi_ext->ref_mvs[refs[ref]][0].as_mv,
1854 0 : x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
1855 : }
1856 0 : }
1857 :
1858 0 : static int64_t rd_pick_best_sub8x8_mode(
1859 : VP9_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv,
1860 : int_mv *second_best_ref_mv, int64_t best_rd, int *returntotrate,
1861 : int *returnyrate, int64_t *returndistortion, int *skippable, int64_t *psse,
1862 : int mvthresh, int_mv seg_mvs[4][MAX_REF_FRAMES], BEST_SEG_INFO *bsi_buf,
1863 : int filter_idx, int mi_row, int mi_col) {
1864 : int i;
1865 0 : BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
1866 0 : MACROBLOCKD *xd = &x->e_mbd;
1867 0 : MODE_INFO *mi = xd->mi[0];
1868 : int mode_idx;
1869 0 : int k, br = 0, idx, idy;
1870 0 : int64_t bd = 0, block_sse = 0;
1871 : PREDICTION_MODE this_mode;
1872 0 : VP9_COMMON *cm = &cpi->common;
1873 0 : struct macroblock_plane *const p = &x->plane[0];
1874 0 : struct macroblockd_plane *const pd = &xd->plane[0];
1875 0 : const int label_count = 4;
1876 0 : int64_t this_segment_rd = 0;
1877 : int label_mv_thresh;
1878 0 : int segmentyrate = 0;
1879 0 : const BLOCK_SIZE bsize = mi->sb_type;
1880 0 : const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1881 0 : const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1882 : ENTROPY_CONTEXT t_above[2], t_left[2];
1883 0 : int subpelmv = 1, have_ref = 0;
1884 0 : SPEED_FEATURES *const sf = &cpi->sf;
1885 0 : const int has_second_rf = has_second_ref(mi);
1886 0 : const int inter_mode_mask = sf->inter_mode_mask[bsize];
1887 0 : MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
1888 :
1889 0 : vp9_zero(*bsi);
1890 :
1891 0 : bsi->segment_rd = best_rd;
1892 0 : bsi->ref_mv[0] = best_ref_mv;
1893 0 : bsi->ref_mv[1] = second_best_ref_mv;
1894 0 : bsi->mvp.as_int = best_ref_mv->as_int;
1895 0 : bsi->mvthresh = mvthresh;
1896 :
1897 0 : for (i = 0; i < 4; i++) bsi->modes[i] = ZEROMV;
1898 :
1899 0 : memcpy(t_above, pd->above_context, sizeof(t_above));
1900 0 : memcpy(t_left, pd->left_context, sizeof(t_left));
1901 :
1902 : // 64 makes this threshold really big effectively
1903 : // making it so that we very rarely check mvs on
1904 : // segments. setting this to 1 would make mv thresh
1905 : // roughly equal to what it is for macroblocks
1906 0 : label_mv_thresh = 1 * bsi->mvthresh / label_count;
1907 :
1908 : // Segmentation method overheads
1909 0 : for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
1910 0 : for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
1911 : // TODO(jingning,rbultje): rewrite the rate-distortion optimization
1912 : // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
1913 : int_mv mode_mv[MB_MODE_COUNT][2];
1914 : int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
1915 0 : PREDICTION_MODE mode_selected = ZEROMV;
1916 0 : int64_t best_rd = INT64_MAX;
1917 0 : const int i = idy * 2 + idx;
1918 : int ref;
1919 :
1920 0 : for (ref = 0; ref < 1 + has_second_rf; ++ref) {
1921 0 : const MV_REFERENCE_FRAME frame = mi->ref_frame[ref];
1922 0 : frame_mv[ZEROMV][frame].as_int = 0;
1923 0 : vp9_append_sub8x8_mvs_for_idx(
1924 0 : cm, xd, i, ref, mi_row, mi_col, &frame_mv[NEARESTMV][frame],
1925 0 : &frame_mv[NEARMV][frame], mbmi_ext->mode_context);
1926 : }
1927 :
1928 : // search for the best motion vector on this segment
1929 0 : for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
1930 0 : const struct buf_2d orig_src = x->plane[0].src;
1931 : struct buf_2d orig_pre[2];
1932 :
1933 0 : mode_idx = INTER_OFFSET(this_mode);
1934 0 : bsi->rdstat[i][mode_idx].brdcost = INT64_MAX;
1935 0 : if (!(inter_mode_mask & (1 << this_mode))) continue;
1936 :
1937 0 : if (!check_best_zero_mv(cpi, mbmi_ext->mode_context, frame_mv,
1938 0 : this_mode, mi->ref_frame))
1939 0 : continue;
1940 :
1941 0 : memcpy(orig_pre, pd->pre, sizeof(orig_pre));
1942 0 : memcpy(bsi->rdstat[i][mode_idx].ta, t_above,
1943 : sizeof(bsi->rdstat[i][mode_idx].ta));
1944 0 : memcpy(bsi->rdstat[i][mode_idx].tl, t_left,
1945 : sizeof(bsi->rdstat[i][mode_idx].tl));
1946 :
1947 : // motion search for newmv (single predictor case only)
1948 0 : if (!has_second_rf && this_mode == NEWMV &&
1949 0 : seg_mvs[i][mi->ref_frame[0]].as_int == INVALID_MV) {
1950 0 : MV *const new_mv = &mode_mv[NEWMV][0].as_mv;
1951 0 : int step_param = 0;
1952 0 : uint32_t bestsme = UINT_MAX;
1953 0 : int sadpb = x->sadperbit4;
1954 : MV mvp_full;
1955 : int max_mv;
1956 : int cost_list[5];
1957 0 : const MvLimits tmp_mv_limits = x->mv_limits;
1958 :
1959 : /* Is the best so far sufficiently good that we cant justify doing
1960 : * and new motion search. */
1961 0 : if (best_rd < label_mv_thresh) break;
1962 :
1963 0 : if (cpi->oxcf.mode != BEST) {
1964 : // use previous block's result as next block's MV predictor.
1965 0 : if (i > 0) {
1966 0 : bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int;
1967 0 : if (i == 2) bsi->mvp.as_int = mi->bmi[i - 2].as_mv[0].as_int;
1968 : }
1969 : }
1970 0 : if (i == 0)
1971 0 : max_mv = x->max_mv_context[mi->ref_frame[0]];
1972 : else
1973 0 : max_mv =
1974 0 : VPXMAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
1975 :
1976 0 : if (sf->mv.auto_mv_step_size && cm->show_frame) {
1977 : // Take wtd average of the step_params based on the last frame's
1978 : // max mv magnitude and the best ref mvs of the current block for
1979 : // the given reference.
1980 0 : step_param =
1981 0 : (vp9_init_search_range(max_mv) + cpi->mv_step_param) / 2;
1982 : } else {
1983 0 : step_param = cpi->mv_step_param;
1984 : }
1985 :
1986 0 : mvp_full.row = bsi->mvp.as_mv.row >> 3;
1987 0 : mvp_full.col = bsi->mvp.as_mv.col >> 3;
1988 :
1989 0 : if (sf->adaptive_motion_search) {
1990 0 : mvp_full.row = x->pred_mv[mi->ref_frame[0]].row >> 3;
1991 0 : mvp_full.col = x->pred_mv[mi->ref_frame[0]].col >> 3;
1992 0 : step_param = VPXMAX(step_param, 8);
1993 : }
1994 :
1995 : // adjust src pointer for this block
1996 0 : mi_buf_shift(x, i);
1997 :
1998 0 : vp9_set_mv_search_range(&x->mv_limits, &bsi->ref_mv[0]->as_mv);
1999 :
2000 0 : bestsme = vp9_full_pixel_search(
2001 : cpi, x, bsize, &mvp_full, step_param, sadpb,
2002 0 : sf->mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL,
2003 0 : &bsi->ref_mv[0]->as_mv, new_mv, INT_MAX, 1);
2004 :
2005 0 : x->mv_limits = tmp_mv_limits;
2006 :
2007 0 : if (bestsme < UINT_MAX) {
2008 : uint32_t distortion;
2009 0 : cpi->find_fractional_mv_step(
2010 0 : x, new_mv, &bsi->ref_mv[0]->as_mv, cm->allow_high_precision_mv,
2011 0 : x->errorperbit, &cpi->fn_ptr[bsize], sf->mv.subpel_force_stop,
2012 : sf->mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
2013 0 : x->nmvjointcost, x->mvcost, &distortion,
2014 0 : &x->pred_sse[mi->ref_frame[0]], NULL, 0, 0);
2015 :
2016 : // save motion search result for use in compound prediction
2017 0 : seg_mvs[i][mi->ref_frame[0]].as_mv = *new_mv;
2018 : }
2019 :
2020 0 : if (sf->adaptive_motion_search)
2021 0 : x->pred_mv[mi->ref_frame[0]] = *new_mv;
2022 :
2023 : // restore src pointers
2024 0 : mi_buf_restore(x, orig_src, orig_pre);
2025 : }
2026 :
2027 0 : if (has_second_rf) {
2028 0 : if (seg_mvs[i][mi->ref_frame[1]].as_int == INVALID_MV ||
2029 0 : seg_mvs[i][mi->ref_frame[0]].as_int == INVALID_MV)
2030 0 : continue;
2031 : }
2032 :
2033 0 : if (has_second_rf && this_mode == NEWMV &&
2034 0 : mi->interp_filter == EIGHTTAP) {
2035 : // adjust src pointers
2036 0 : mi_buf_shift(x, i);
2037 0 : if (sf->comp_inter_joint_search_thresh <= bsize) {
2038 : int rate_mv;
2039 0 : joint_motion_search(cpi, x, bsize, frame_mv[this_mode], mi_row,
2040 0 : mi_col, seg_mvs[i], &rate_mv);
2041 0 : seg_mvs[i][mi->ref_frame[0]].as_int =
2042 0 : frame_mv[this_mode][mi->ref_frame[0]].as_int;
2043 0 : seg_mvs[i][mi->ref_frame[1]].as_int =
2044 0 : frame_mv[this_mode][mi->ref_frame[1]].as_int;
2045 : }
2046 : // restore src pointers
2047 0 : mi_buf_restore(x, orig_src, orig_pre);
2048 : }
2049 :
2050 0 : bsi->rdstat[i][mode_idx].brate = set_and_cost_bmi_mvs(
2051 0 : cpi, x, xd, i, this_mode, mode_mv[this_mode], frame_mv, seg_mvs[i],
2052 0 : bsi->ref_mv, x->nmvjointcost, x->mvcost);
2053 :
2054 0 : for (ref = 0; ref < 1 + has_second_rf; ++ref) {
2055 0 : bsi->rdstat[i][mode_idx].mvs[ref].as_int =
2056 0 : mode_mv[this_mode][ref].as_int;
2057 0 : if (num_4x4_blocks_wide > 1)
2058 0 : bsi->rdstat[i + 1][mode_idx].mvs[ref].as_int =
2059 0 : mode_mv[this_mode][ref].as_int;
2060 0 : if (num_4x4_blocks_high > 1)
2061 0 : bsi->rdstat[i + 2][mode_idx].mvs[ref].as_int =
2062 0 : mode_mv[this_mode][ref].as_int;
2063 : }
2064 :
2065 : // Trap vectors that reach beyond the UMV borders
2066 0 : if (mv_check_bounds(&x->mv_limits, &mode_mv[this_mode][0].as_mv) ||
2067 0 : (has_second_rf &&
2068 0 : mv_check_bounds(&x->mv_limits, &mode_mv[this_mode][1].as_mv)))
2069 0 : continue;
2070 :
2071 0 : if (filter_idx > 0) {
2072 0 : BEST_SEG_INFO *ref_bsi = bsi_buf;
2073 0 : subpelmv = 0;
2074 0 : have_ref = 1;
2075 :
2076 0 : for (ref = 0; ref < 1 + has_second_rf; ++ref) {
2077 0 : subpelmv |= mv_has_subpel(&mode_mv[this_mode][ref].as_mv);
2078 0 : have_ref &= mode_mv[this_mode][ref].as_int ==
2079 0 : ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
2080 : }
2081 :
2082 0 : if (filter_idx > 1 && !subpelmv && !have_ref) {
2083 0 : ref_bsi = bsi_buf + 1;
2084 0 : have_ref = 1;
2085 0 : for (ref = 0; ref < 1 + has_second_rf; ++ref)
2086 0 : have_ref &= mode_mv[this_mode][ref].as_int ==
2087 0 : ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
2088 : }
2089 :
2090 0 : if (!subpelmv && have_ref &&
2091 0 : ref_bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
2092 0 : memcpy(&bsi->rdstat[i][mode_idx], &ref_bsi->rdstat[i][mode_idx],
2093 : sizeof(SEG_RDSTAT));
2094 0 : if (num_4x4_blocks_wide > 1)
2095 0 : bsi->rdstat[i + 1][mode_idx].eobs =
2096 0 : ref_bsi->rdstat[i + 1][mode_idx].eobs;
2097 0 : if (num_4x4_blocks_high > 1)
2098 0 : bsi->rdstat[i + 2][mode_idx].eobs =
2099 0 : ref_bsi->rdstat[i + 2][mode_idx].eobs;
2100 :
2101 0 : if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
2102 0 : mode_selected = this_mode;
2103 0 : best_rd = bsi->rdstat[i][mode_idx].brdcost;
2104 : }
2105 0 : continue;
2106 : }
2107 : }
2108 :
2109 0 : bsi->rdstat[i][mode_idx].brdcost = encode_inter_mb_segment(
2110 0 : cpi, x, bsi->segment_rd - this_segment_rd, i,
2111 : &bsi->rdstat[i][mode_idx].byrate, &bsi->rdstat[i][mode_idx].bdist,
2112 0 : &bsi->rdstat[i][mode_idx].bsse, bsi->rdstat[i][mode_idx].ta,
2113 0 : bsi->rdstat[i][mode_idx].tl, mi_row, mi_col);
2114 0 : if (bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
2115 0 : bsi->rdstat[i][mode_idx].brdcost +=
2116 0 : RDCOST(x->rdmult, x->rddiv, bsi->rdstat[i][mode_idx].brate, 0);
2117 0 : bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate;
2118 0 : bsi->rdstat[i][mode_idx].eobs = p->eobs[i];
2119 0 : if (num_4x4_blocks_wide > 1)
2120 0 : bsi->rdstat[i + 1][mode_idx].eobs = p->eobs[i + 1];
2121 0 : if (num_4x4_blocks_high > 1)
2122 0 : bsi->rdstat[i + 2][mode_idx].eobs = p->eobs[i + 2];
2123 : }
2124 :
2125 0 : if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
2126 0 : mode_selected = this_mode;
2127 0 : best_rd = bsi->rdstat[i][mode_idx].brdcost;
2128 : }
2129 : } /*for each 4x4 mode*/
2130 :
2131 0 : if (best_rd == INT64_MAX) {
2132 : int iy, midx;
2133 0 : for (iy = i + 1; iy < 4; ++iy)
2134 0 : for (midx = 0; midx < INTER_MODES; ++midx)
2135 0 : bsi->rdstat[iy][midx].brdcost = INT64_MAX;
2136 0 : bsi->segment_rd = INT64_MAX;
2137 0 : return INT64_MAX;
2138 : }
2139 :
2140 0 : mode_idx = INTER_OFFSET(mode_selected);
2141 0 : memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above));
2142 0 : memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left));
2143 :
2144 0 : set_and_cost_bmi_mvs(cpi, x, xd, i, mode_selected, mode_mv[mode_selected],
2145 0 : frame_mv, seg_mvs[i], bsi->ref_mv, x->nmvjointcost,
2146 : x->mvcost);
2147 :
2148 0 : br += bsi->rdstat[i][mode_idx].brate;
2149 0 : bd += bsi->rdstat[i][mode_idx].bdist;
2150 0 : block_sse += bsi->rdstat[i][mode_idx].bsse;
2151 0 : segmentyrate += bsi->rdstat[i][mode_idx].byrate;
2152 0 : this_segment_rd += bsi->rdstat[i][mode_idx].brdcost;
2153 :
2154 0 : if (this_segment_rd > bsi->segment_rd) {
2155 : int iy, midx;
2156 0 : for (iy = i + 1; iy < 4; ++iy)
2157 0 : for (midx = 0; midx < INTER_MODES; ++midx)
2158 0 : bsi->rdstat[iy][midx].brdcost = INT64_MAX;
2159 0 : bsi->segment_rd = INT64_MAX;
2160 0 : return INT64_MAX;
2161 : }
2162 : }
2163 : } /* for each label */
2164 :
2165 0 : bsi->r = br;
2166 0 : bsi->d = bd;
2167 0 : bsi->segment_yrate = segmentyrate;
2168 0 : bsi->segment_rd = this_segment_rd;
2169 0 : bsi->sse = block_sse;
2170 :
2171 : // update the coding decisions
2172 0 : for (k = 0; k < 4; ++k) bsi->modes[k] = mi->bmi[k].as_mode;
2173 :
2174 0 : if (bsi->segment_rd > best_rd) return INT64_MAX;
2175 : /* set it to the best */
2176 0 : for (i = 0; i < 4; i++) {
2177 0 : mode_idx = INTER_OFFSET(bsi->modes[i]);
2178 0 : mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int;
2179 0 : if (has_second_ref(mi))
2180 0 : mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int;
2181 0 : x->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs;
2182 0 : mi->bmi[i].as_mode = bsi->modes[i];
2183 : }
2184 :
2185 : /*
2186 : * used to set mbmi->mv.as_int
2187 : */
2188 0 : *returntotrate = bsi->r;
2189 0 : *returndistortion = bsi->d;
2190 0 : *returnyrate = bsi->segment_yrate;
2191 0 : *skippable = vp9_is_skippable_in_plane(x, BLOCK_8X8, 0);
2192 0 : *psse = bsi->sse;
2193 0 : mi->mode = bsi->modes[3];
2194 :
2195 0 : return bsi->segment_rd;
2196 : }
2197 :
2198 0 : static void estimate_ref_frame_costs(const VP9_COMMON *cm,
2199 : const MACROBLOCKD *xd, int segment_id,
2200 : unsigned int *ref_costs_single,
2201 : unsigned int *ref_costs_comp,
2202 : vpx_prob *comp_mode_p) {
2203 0 : int seg_ref_active =
2204 0 : segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
2205 0 : if (seg_ref_active) {
2206 0 : memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single));
2207 0 : memset(ref_costs_comp, 0, MAX_REF_FRAMES * sizeof(*ref_costs_comp));
2208 0 : *comp_mode_p = 128;
2209 : } else {
2210 0 : vpx_prob intra_inter_p = vp9_get_intra_inter_prob(cm, xd);
2211 0 : vpx_prob comp_inter_p = 128;
2212 :
2213 0 : if (cm->reference_mode == REFERENCE_MODE_SELECT) {
2214 0 : comp_inter_p = vp9_get_reference_mode_prob(cm, xd);
2215 0 : *comp_mode_p = comp_inter_p;
2216 : } else {
2217 0 : *comp_mode_p = 128;
2218 : }
2219 :
2220 0 : ref_costs_single[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0);
2221 :
2222 0 : if (cm->reference_mode != COMPOUND_REFERENCE) {
2223 0 : vpx_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd);
2224 0 : vpx_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd);
2225 0 : unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
2226 :
2227 0 : if (cm->reference_mode == REFERENCE_MODE_SELECT)
2228 0 : base_cost += vp9_cost_bit(comp_inter_p, 0);
2229 :
2230 0 : ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] =
2231 0 : ref_costs_single[ALTREF_FRAME] = base_cost;
2232 0 : ref_costs_single[LAST_FRAME] += vp9_cost_bit(ref_single_p1, 0);
2233 0 : ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1);
2234 0 : ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1);
2235 0 : ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0);
2236 0 : ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1);
2237 : } else {
2238 0 : ref_costs_single[LAST_FRAME] = 512;
2239 0 : ref_costs_single[GOLDEN_FRAME] = 512;
2240 0 : ref_costs_single[ALTREF_FRAME] = 512;
2241 : }
2242 0 : if (cm->reference_mode != SINGLE_REFERENCE) {
2243 0 : vpx_prob ref_comp_p = vp9_get_pred_prob_comp_ref_p(cm, xd);
2244 0 : unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
2245 :
2246 0 : if (cm->reference_mode == REFERENCE_MODE_SELECT)
2247 0 : base_cost += vp9_cost_bit(comp_inter_p, 1);
2248 :
2249 0 : ref_costs_comp[LAST_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 0);
2250 0 : ref_costs_comp[GOLDEN_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 1);
2251 : } else {
2252 0 : ref_costs_comp[LAST_FRAME] = 512;
2253 0 : ref_costs_comp[GOLDEN_FRAME] = 512;
2254 : }
2255 : }
2256 0 : }
2257 :
2258 0 : static void store_coding_context(
2259 : MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int mode_index,
2260 : int64_t comp_pred_diff[REFERENCE_MODES],
2261 : int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS], int skippable) {
2262 0 : MACROBLOCKD *const xd = &x->e_mbd;
2263 :
2264 : // Take a snapshot of the coding context so it can be
2265 : // restored if we decide to encode this way
2266 0 : ctx->skip = x->skip;
2267 0 : ctx->skippable = skippable;
2268 0 : ctx->best_mode_index = mode_index;
2269 0 : ctx->mic = *xd->mi[0];
2270 0 : ctx->mbmi_ext = *x->mbmi_ext;
2271 0 : ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
2272 0 : ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE];
2273 0 : ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
2274 :
2275 0 : memcpy(ctx->best_filter_diff, best_filter_diff,
2276 : sizeof(*best_filter_diff) * SWITCHABLE_FILTER_CONTEXTS);
2277 0 : }
2278 :
2279 0 : static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
2280 : MV_REFERENCE_FRAME ref_frame,
2281 : BLOCK_SIZE block_size, int mi_row, int mi_col,
2282 : int_mv frame_nearest_mv[MAX_REF_FRAMES],
2283 : int_mv frame_near_mv[MAX_REF_FRAMES],
2284 : struct buf_2d yv12_mb[4][MAX_MB_PLANE]) {
2285 0 : const VP9_COMMON *cm = &cpi->common;
2286 0 : const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
2287 0 : MACROBLOCKD *const xd = &x->e_mbd;
2288 0 : MODE_INFO *const mi = xd->mi[0];
2289 0 : int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
2290 0 : const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
2291 0 : MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
2292 :
2293 0 : assert(yv12 != NULL);
2294 :
2295 : // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
2296 : // use the UV scaling factors.
2297 0 : vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf);
2298 :
2299 : // Gets an initial list of candidate vectors from neighbours and orders them
2300 0 : vp9_find_mv_refs(cm, xd, mi, ref_frame, candidates, mi_row, mi_col,
2301 0 : mbmi_ext->mode_context);
2302 :
2303 : // Candidate refinement carried out at encoder and decoder
2304 0 : vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
2305 0 : &frame_nearest_mv[ref_frame],
2306 0 : &frame_near_mv[ref_frame]);
2307 :
2308 : // Further refinement that is encode side only to test the top few candidates
2309 : // in full and choose the best as the centre point for subsequent searches.
2310 : // The current implementation doesn't support scaling.
2311 0 : if (!vp9_is_scaled(sf) && block_size >= BLOCK_8X8)
2312 0 : vp9_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, ref_frame,
2313 : block_size);
2314 0 : }
2315 :
2316 0 : static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
2317 : int mi_row, int mi_col, int_mv *tmp_mv,
2318 : int *rate_mv) {
2319 0 : MACROBLOCKD *xd = &x->e_mbd;
2320 0 : const VP9_COMMON *cm = &cpi->common;
2321 0 : MODE_INFO *mi = xd->mi[0];
2322 0 : struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0 } };
2323 0 : int bestsme = INT_MAX;
2324 : int step_param;
2325 0 : int sadpb = x->sadperbit16;
2326 : MV mvp_full;
2327 0 : int ref = mi->ref_frame[0];
2328 0 : MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
2329 0 : const MvLimits tmp_mv_limits = x->mv_limits;
2330 : int cost_list[5];
2331 :
2332 0 : const YV12_BUFFER_CONFIG *scaled_ref_frame =
2333 : vp9_get_scaled_ref_frame(cpi, ref);
2334 :
2335 : MV pred_mv[3];
2336 0 : pred_mv[0] = x->mbmi_ext->ref_mvs[ref][0].as_mv;
2337 0 : pred_mv[1] = x->mbmi_ext->ref_mvs[ref][1].as_mv;
2338 0 : pred_mv[2] = x->pred_mv[ref];
2339 :
2340 0 : if (scaled_ref_frame) {
2341 : int i;
2342 : // Swap out the reference frame for a version that's been scaled to
2343 : // match the resolution of the current frame, allowing the existing
2344 : // motion search code to be used without additional modifications.
2345 0 : for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[i] = xd->plane[i].pre[0];
2346 :
2347 0 : vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
2348 : }
2349 :
2350 : // Work out the size of the first step in the mv step search.
2351 : // 0 here is maximum length first step. 1 is VPXMAX >> 1 etc.
2352 0 : if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
2353 : // Take wtd average of the step_params based on the last frame's
2354 : // max mv magnitude and that based on the best ref mvs of the current
2355 : // block for the given reference.
2356 0 : step_param =
2357 0 : (vp9_init_search_range(x->max_mv_context[ref]) + cpi->mv_step_param) /
2358 : 2;
2359 : } else {
2360 0 : step_param = cpi->mv_step_param;
2361 : }
2362 :
2363 0 : if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64) {
2364 0 : int boffset =
2365 0 : 2 * (b_width_log2_lookup[BLOCK_64X64] -
2366 0 : VPXMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize]));
2367 0 : step_param = VPXMAX(step_param, boffset);
2368 : }
2369 :
2370 0 : if (cpi->sf.adaptive_motion_search) {
2371 0 : int bwl = b_width_log2_lookup[bsize];
2372 0 : int bhl = b_height_log2_lookup[bsize];
2373 0 : int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4);
2374 :
2375 0 : if (tlevel < 5) step_param += 2;
2376 :
2377 : // prev_mv_sad is not setup for dynamically scaled frames.
2378 0 : if (cpi->oxcf.resize_mode != RESIZE_DYNAMIC) {
2379 : int i;
2380 0 : for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) {
2381 0 : if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
2382 0 : x->pred_mv[ref].row = 0;
2383 0 : x->pred_mv[ref].col = 0;
2384 0 : tmp_mv->as_int = INVALID_MV;
2385 :
2386 0 : if (scaled_ref_frame) {
2387 : int i;
2388 0 : for (i = 0; i < MAX_MB_PLANE; ++i)
2389 0 : xd->plane[i].pre[0] = backup_yv12[i];
2390 : }
2391 0 : return;
2392 : }
2393 : }
2394 : }
2395 : }
2396 :
2397 : // Note: MV limits are modified here. Always restore the original values
2398 : // after full-pixel motion search.
2399 0 : vp9_set_mv_search_range(&x->mv_limits, &ref_mv);
2400 :
2401 0 : mvp_full = pred_mv[x->mv_best_ref_index[ref]];
2402 :
2403 0 : mvp_full.col >>= 3;
2404 0 : mvp_full.row >>= 3;
2405 :
2406 0 : bestsme = vp9_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb,
2407 : cond_cost_list(cpi, cost_list), &ref_mv,
2408 : &tmp_mv->as_mv, INT_MAX, 1);
2409 :
2410 0 : x->mv_limits = tmp_mv_limits;
2411 :
2412 0 : if (bestsme < INT_MAX) {
2413 : uint32_t dis; /* TODO: use dis in distortion calculation later. */
2414 0 : cpi->find_fractional_mv_step(
2415 : x, &tmp_mv->as_mv, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
2416 0 : &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
2417 : cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
2418 0 : x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, 0, 0);
2419 : }
2420 0 : *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, x->nmvjointcost,
2421 : x->mvcost, MV_COST_WEIGHT);
2422 :
2423 0 : if (cpi->sf.adaptive_motion_search) x->pred_mv[ref] = tmp_mv->as_mv;
2424 :
2425 0 : if (scaled_ref_frame) {
2426 : int i;
2427 0 : for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i];
2428 : }
2429 : }
2430 :
2431 0 : static INLINE void restore_dst_buf(MACROBLOCKD *xd,
2432 : uint8_t *orig_dst[MAX_MB_PLANE],
2433 : int orig_dst_stride[MAX_MB_PLANE]) {
2434 : int i;
2435 0 : for (i = 0; i < MAX_MB_PLANE; i++) {
2436 0 : xd->plane[i].dst.buf = orig_dst[i];
2437 0 : xd->plane[i].dst.stride = orig_dst_stride[i];
2438 : }
2439 0 : }
2440 :
2441 : // In some situations we want to discount tha pparent cost of a new motion
2442 : // vector. Where there is a subtle motion field and especially where there is
2443 : // low spatial complexity then it can be hard to cover the cost of a new motion
2444 : // vector in a single block, even if that motion vector reduces distortion.
2445 : // However, once established that vector may be usable through the nearest and
2446 : // near mv modes to reduce distortion in subsequent blocks and also improve
2447 : // visual quality.
2448 0 : static int discount_newmv_test(const VP9_COMP *cpi, int this_mode,
2449 : int_mv this_mv,
2450 : int_mv (*mode_mv)[MAX_REF_FRAMES],
2451 : int ref_frame) {
2452 0 : return (!cpi->rc.is_src_frame_alt_ref && (this_mode == NEWMV) &&
2453 0 : (this_mv.as_int != 0) &&
2454 0 : ((mode_mv[NEARESTMV][ref_frame].as_int == 0) ||
2455 0 : (mode_mv[NEARESTMV][ref_frame].as_int == INVALID_MV)) &&
2456 0 : ((mode_mv[NEARMV][ref_frame].as_int == 0) ||
2457 0 : (mode_mv[NEARMV][ref_frame].as_int == INVALID_MV)));
2458 : }
2459 :
2460 0 : static int64_t handle_inter_mode(
2461 : VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int *rate2,
2462 : int64_t *distortion, int *skippable, int *rate_y, int *rate_uv,
2463 : int *disable_skip, int_mv (*mode_mv)[MAX_REF_FRAMES], int mi_row,
2464 : int mi_col, int_mv single_newmv[MAX_REF_FRAMES],
2465 : INTERP_FILTER (*single_filter)[MAX_REF_FRAMES],
2466 : int (*single_skippable)[MAX_REF_FRAMES], int64_t *psse,
2467 : const int64_t ref_best_rd, int64_t *mask_filter, int64_t filter_cache[]) {
2468 0 : VP9_COMMON *cm = &cpi->common;
2469 0 : MACROBLOCKD *xd = &x->e_mbd;
2470 0 : MODE_INFO *mi = xd->mi[0];
2471 0 : MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
2472 0 : const int is_comp_pred = has_second_ref(mi);
2473 0 : const int this_mode = mi->mode;
2474 0 : int_mv *frame_mv = mode_mv[this_mode];
2475 : int i;
2476 0 : int refs[2] = { mi->ref_frame[0],
2477 0 : (mi->ref_frame[1] < 0 ? 0 : mi->ref_frame[1]) };
2478 : int_mv cur_mv[2];
2479 : #if CONFIG_VP9_HIGHBITDEPTH
2480 : DECLARE_ALIGNED(16, uint16_t, tmp_buf16[MAX_MB_PLANE * 64 * 64]);
2481 : uint8_t *tmp_buf;
2482 : #else
2483 : DECLARE_ALIGNED(16, uint8_t, tmp_buf[MAX_MB_PLANE * 64 * 64]);
2484 : #endif // CONFIG_VP9_HIGHBITDEPTH
2485 0 : int pred_exists = 0;
2486 : int intpel_mv;
2487 0 : int64_t rd, tmp_rd, best_rd = INT64_MAX;
2488 0 : int best_needs_copy = 0;
2489 : uint8_t *orig_dst[MAX_MB_PLANE];
2490 : int orig_dst_stride[MAX_MB_PLANE];
2491 0 : int rs = 0;
2492 0 : INTERP_FILTER best_filter = SWITCHABLE;
2493 0 : uint8_t skip_txfm[MAX_MB_PLANE << 2] = { 0 };
2494 0 : int64_t bsse[MAX_MB_PLANE << 2] = { 0 };
2495 :
2496 0 : int bsl = mi_width_log2_lookup[bsize];
2497 0 : int pred_filter_search =
2498 0 : cpi->sf.cb_pred_filter_search
2499 0 : ? (((mi_row + mi_col) >> bsl) +
2500 0 : get_chessboard_index(cm->current_video_frame)) &
2501 : 0x1
2502 0 : : 0;
2503 :
2504 0 : int skip_txfm_sb = 0;
2505 0 : int64_t skip_sse_sb = INT64_MAX;
2506 0 : int64_t distortion_y = 0, distortion_uv = 0;
2507 :
2508 : #if CONFIG_VP9_HIGHBITDEPTH
2509 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
2510 : tmp_buf = CONVERT_TO_BYTEPTR(tmp_buf16);
2511 : } else {
2512 : tmp_buf = (uint8_t *)tmp_buf16;
2513 : }
2514 : #endif // CONFIG_VP9_HIGHBITDEPTH
2515 :
2516 0 : if (pred_filter_search) {
2517 0 : INTERP_FILTER af = SWITCHABLE, lf = SWITCHABLE;
2518 0 : if (xd->above_mi && is_inter_block(xd->above_mi))
2519 0 : af = xd->above_mi->interp_filter;
2520 0 : if (xd->left_mi && is_inter_block(xd->left_mi))
2521 0 : lf = xd->left_mi->interp_filter;
2522 :
2523 0 : if ((this_mode != NEWMV) || (af == lf)) best_filter = af;
2524 : }
2525 :
2526 0 : if (is_comp_pred) {
2527 0 : if (frame_mv[refs[0]].as_int == INVALID_MV ||
2528 0 : frame_mv[refs[1]].as_int == INVALID_MV)
2529 0 : return INT64_MAX;
2530 :
2531 0 : if (cpi->sf.adaptive_mode_search) {
2532 0 : if (single_filter[this_mode][refs[0]] ==
2533 0 : single_filter[this_mode][refs[1]])
2534 0 : best_filter = single_filter[this_mode][refs[0]];
2535 : }
2536 : }
2537 :
2538 0 : if (this_mode == NEWMV) {
2539 : int rate_mv;
2540 0 : if (is_comp_pred) {
2541 : // Initialize mv using single prediction mode result.
2542 0 : frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
2543 0 : frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
2544 :
2545 0 : if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
2546 0 : joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col,
2547 : single_newmv, &rate_mv);
2548 : } else {
2549 0 : rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]].as_mv,
2550 0 : &x->mbmi_ext->ref_mvs[refs[0]][0].as_mv,
2551 0 : x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2552 0 : rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]].as_mv,
2553 0 : &x->mbmi_ext->ref_mvs[refs[1]][0].as_mv,
2554 0 : x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2555 : }
2556 0 : *rate2 += rate_mv;
2557 : } else {
2558 : int_mv tmp_mv;
2559 0 : single_motion_search(cpi, x, bsize, mi_row, mi_col, &tmp_mv, &rate_mv);
2560 0 : if (tmp_mv.as_int == INVALID_MV) return INT64_MAX;
2561 :
2562 0 : frame_mv[refs[0]].as_int = xd->mi[0]->bmi[0].as_mv[0].as_int =
2563 0 : tmp_mv.as_int;
2564 0 : single_newmv[refs[0]].as_int = tmp_mv.as_int;
2565 :
2566 : // Estimate the rate implications of a new mv but discount this
2567 : // under certain circumstances where we want to help initiate a weak
2568 : // motion field, where the distortion gain for a single block may not
2569 : // be enough to overcome the cost of a new mv.
2570 0 : if (discount_newmv_test(cpi, this_mode, tmp_mv, mode_mv, refs[0])) {
2571 0 : *rate2 += VPXMAX((rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
2572 : } else {
2573 0 : *rate2 += rate_mv;
2574 : }
2575 : }
2576 : }
2577 :
2578 0 : for (i = 0; i < is_comp_pred + 1; ++i) {
2579 0 : cur_mv[i] = frame_mv[refs[i]];
2580 : // Clip "next_nearest" so that it does not extend to far out of image
2581 0 : if (this_mode != NEWMV) clamp_mv2(&cur_mv[i].as_mv, xd);
2582 :
2583 0 : if (mv_check_bounds(&x->mv_limits, &cur_mv[i].as_mv)) return INT64_MAX;
2584 0 : mi->mv[i].as_int = cur_mv[i].as_int;
2585 : }
2586 :
2587 : // do first prediction into the destination buffer. Do the next
2588 : // prediction into a temporary buffer. Then keep track of which one
2589 : // of these currently holds the best predictor, and use the other
2590 : // one for future predictions. In the end, copy from tmp_buf to
2591 : // dst if necessary.
2592 0 : for (i = 0; i < MAX_MB_PLANE; i++) {
2593 0 : orig_dst[i] = xd->plane[i].dst.buf;
2594 0 : orig_dst_stride[i] = xd->plane[i].dst.stride;
2595 : }
2596 :
2597 : // We don't include the cost of the second reference here, because there
2598 : // are only two options: Last/ARF or Golden/ARF; The second one is always
2599 : // known, which is ARF.
2600 : //
2601 : // Under some circumstances we discount the cost of new mv mode to encourage
2602 : // initiation of a motion field.
2603 0 : if (discount_newmv_test(cpi, this_mode, frame_mv[refs[0]], mode_mv,
2604 : refs[0])) {
2605 0 : *rate2 +=
2606 0 : VPXMIN(cost_mv_ref(cpi, this_mode, mbmi_ext->mode_context[refs[0]]),
2607 : cost_mv_ref(cpi, NEARESTMV, mbmi_ext->mode_context[refs[0]]));
2608 : } else {
2609 0 : *rate2 += cost_mv_ref(cpi, this_mode, mbmi_ext->mode_context[refs[0]]);
2610 : }
2611 :
2612 0 : if (RDCOST(x->rdmult, x->rddiv, *rate2, 0) > ref_best_rd &&
2613 0 : mi->mode != NEARESTMV)
2614 0 : return INT64_MAX;
2615 :
2616 0 : pred_exists = 0;
2617 : // Are all MVs integer pel for Y and UV
2618 0 : intpel_mv = !mv_has_subpel(&mi->mv[0].as_mv);
2619 0 : if (is_comp_pred) intpel_mv &= !mv_has_subpel(&mi->mv[1].as_mv);
2620 :
2621 : // Search for best switchable filter by checking the variance of
2622 : // pred error irrespective of whether the filter will be used
2623 0 : for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) filter_cache[i] = INT64_MAX;
2624 :
2625 0 : if (cm->interp_filter != BILINEAR) {
2626 0 : if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
2627 0 : best_filter = EIGHTTAP;
2628 0 : } else if (best_filter == SWITCHABLE) {
2629 : int newbest;
2630 0 : int tmp_rate_sum = 0;
2631 0 : int64_t tmp_dist_sum = 0;
2632 :
2633 0 : for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
2634 : int j;
2635 : int64_t rs_rd;
2636 0 : int tmp_skip_sb = 0;
2637 0 : int64_t tmp_skip_sse = INT64_MAX;
2638 :
2639 0 : mi->interp_filter = i;
2640 0 : rs = vp9_get_switchable_rate(cpi, xd);
2641 0 : rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
2642 :
2643 0 : if (i > 0 && intpel_mv) {
2644 0 : rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum);
2645 0 : filter_cache[i] = rd;
2646 0 : filter_cache[SWITCHABLE_FILTERS] =
2647 0 : VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
2648 0 : if (cm->interp_filter == SWITCHABLE) rd += rs_rd;
2649 0 : *mask_filter = VPXMAX(*mask_filter, rd);
2650 : } else {
2651 0 : int rate_sum = 0;
2652 0 : int64_t dist_sum = 0;
2653 0 : if (i > 0 && cpi->sf.adaptive_interp_filter_search &&
2654 0 : (cpi->sf.interp_filter_search_mask & (1 << i))) {
2655 0 : rate_sum = INT_MAX;
2656 0 : dist_sum = INT64_MAX;
2657 0 : continue;
2658 : }
2659 :
2660 0 : if ((cm->interp_filter == SWITCHABLE && (!i || best_needs_copy)) ||
2661 0 : (cm->interp_filter != SWITCHABLE &&
2662 0 : (cm->interp_filter == mi->interp_filter ||
2663 0 : (i == 0 && intpel_mv)))) {
2664 0 : restore_dst_buf(xd, orig_dst, orig_dst_stride);
2665 : } else {
2666 0 : for (j = 0; j < MAX_MB_PLANE; j++) {
2667 0 : xd->plane[j].dst.buf = tmp_buf + j * 64 * 64;
2668 0 : xd->plane[j].dst.stride = 64;
2669 : }
2670 : }
2671 0 : vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2672 0 : model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum, &tmp_skip_sb,
2673 : &tmp_skip_sse);
2674 :
2675 0 : rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);
2676 0 : filter_cache[i] = rd;
2677 0 : filter_cache[SWITCHABLE_FILTERS] =
2678 0 : VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
2679 0 : if (cm->interp_filter == SWITCHABLE) rd += rs_rd;
2680 0 : *mask_filter = VPXMAX(*mask_filter, rd);
2681 :
2682 0 : if (i == 0 && intpel_mv) {
2683 0 : tmp_rate_sum = rate_sum;
2684 0 : tmp_dist_sum = dist_sum;
2685 : }
2686 : }
2687 :
2688 0 : if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
2689 0 : if (rd / 2 > ref_best_rd) {
2690 0 : restore_dst_buf(xd, orig_dst, orig_dst_stride);
2691 0 : return INT64_MAX;
2692 : }
2693 : }
2694 0 : newbest = i == 0 || rd < best_rd;
2695 :
2696 0 : if (newbest) {
2697 0 : best_rd = rd;
2698 0 : best_filter = mi->interp_filter;
2699 0 : if (cm->interp_filter == SWITCHABLE && i && !intpel_mv)
2700 0 : best_needs_copy = !best_needs_copy;
2701 : }
2702 :
2703 0 : if ((cm->interp_filter == SWITCHABLE && newbest) ||
2704 0 : (cm->interp_filter != SWITCHABLE &&
2705 0 : cm->interp_filter == mi->interp_filter)) {
2706 0 : pred_exists = 1;
2707 0 : tmp_rd = best_rd;
2708 :
2709 0 : skip_txfm_sb = tmp_skip_sb;
2710 0 : skip_sse_sb = tmp_skip_sse;
2711 0 : memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
2712 0 : memcpy(bsse, x->bsse, sizeof(bsse));
2713 : }
2714 : }
2715 0 : restore_dst_buf(xd, orig_dst, orig_dst_stride);
2716 : }
2717 : }
2718 : // Set the appropriate filter
2719 0 : mi->interp_filter =
2720 0 : cm->interp_filter != SWITCHABLE ? cm->interp_filter : best_filter;
2721 0 : rs = cm->interp_filter == SWITCHABLE ? vp9_get_switchable_rate(cpi, xd) : 0;
2722 :
2723 0 : if (pred_exists) {
2724 0 : if (best_needs_copy) {
2725 : // again temporarily set the buffers to local memory to prevent a memcpy
2726 0 : for (i = 0; i < MAX_MB_PLANE; i++) {
2727 0 : xd->plane[i].dst.buf = tmp_buf + i * 64 * 64;
2728 0 : xd->plane[i].dst.stride = 64;
2729 : }
2730 : }
2731 0 : rd = tmp_rd + RDCOST(x->rdmult, x->rddiv, rs, 0);
2732 : } else {
2733 : int tmp_rate;
2734 : int64_t tmp_dist;
2735 : // Handles the special case when a filter that is not in the
2736 : // switchable list (ex. bilinear) is indicated at the frame level, or
2737 : // skip condition holds.
2738 0 : vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2739 0 : model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist, &skip_txfm_sb,
2740 : &skip_sse_sb);
2741 0 : rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
2742 0 : memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
2743 0 : memcpy(bsse, x->bsse, sizeof(bsse));
2744 : }
2745 :
2746 0 : if (!is_comp_pred) single_filter[this_mode][refs[0]] = mi->interp_filter;
2747 :
2748 0 : if (cpi->sf.adaptive_mode_search)
2749 0 : if (is_comp_pred)
2750 0 : if (single_skippable[this_mode][refs[0]] &&
2751 0 : single_skippable[this_mode][refs[1]])
2752 0 : memset(skip_txfm, SKIP_TXFM_AC_DC, sizeof(skip_txfm));
2753 :
2754 0 : if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
2755 : // if current pred_error modeled rd is substantially more than the best
2756 : // so far, do not bother doing full rd
2757 0 : if (rd / 2 > ref_best_rd) {
2758 0 : restore_dst_buf(xd, orig_dst, orig_dst_stride);
2759 0 : return INT64_MAX;
2760 : }
2761 : }
2762 :
2763 0 : if (cm->interp_filter == SWITCHABLE) *rate2 += rs;
2764 :
2765 0 : memcpy(x->skip_txfm, skip_txfm, sizeof(skip_txfm));
2766 0 : memcpy(x->bsse, bsse, sizeof(bsse));
2767 :
2768 0 : if (!skip_txfm_sb) {
2769 : int skippable_y, skippable_uv;
2770 0 : int64_t sseuv = INT64_MAX;
2771 0 : int64_t rdcosty = INT64_MAX;
2772 :
2773 : // Y cost and distortion
2774 0 : vp9_subtract_plane(x, bsize, 0);
2775 0 : super_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse, bsize,
2776 : ref_best_rd);
2777 :
2778 0 : if (*rate_y == INT_MAX) {
2779 0 : *rate2 = INT_MAX;
2780 0 : *distortion = INT64_MAX;
2781 0 : restore_dst_buf(xd, orig_dst, orig_dst_stride);
2782 0 : return INT64_MAX;
2783 : }
2784 :
2785 0 : *rate2 += *rate_y;
2786 0 : *distortion += distortion_y;
2787 :
2788 0 : rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
2789 0 : rdcosty = VPXMIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));
2790 :
2791 0 : if (!super_block_uvrd(cpi, x, rate_uv, &distortion_uv, &skippable_uv,
2792 : &sseuv, bsize, ref_best_rd - rdcosty)) {
2793 0 : *rate2 = INT_MAX;
2794 0 : *distortion = INT64_MAX;
2795 0 : restore_dst_buf(xd, orig_dst, orig_dst_stride);
2796 0 : return INT64_MAX;
2797 : }
2798 :
2799 0 : *psse += sseuv;
2800 0 : *rate2 += *rate_uv;
2801 0 : *distortion += distortion_uv;
2802 0 : *skippable = skippable_y && skippable_uv;
2803 : } else {
2804 0 : x->skip = 1;
2805 0 : *disable_skip = 1;
2806 :
2807 : // The cost of skip bit needs to be added.
2808 0 : *rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
2809 :
2810 0 : *distortion = skip_sse_sb;
2811 : }
2812 :
2813 0 : if (!is_comp_pred) single_skippable[this_mode][refs[0]] = *skippable;
2814 :
2815 0 : restore_dst_buf(xd, orig_dst, orig_dst_stride);
2816 0 : return 0; // The rate-distortion cost will be re-calculated by caller.
2817 : }
2818 :
2819 0 : void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost,
2820 : BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
2821 : int64_t best_rd) {
2822 0 : VP9_COMMON *const cm = &cpi->common;
2823 0 : MACROBLOCKD *const xd = &x->e_mbd;
2824 0 : struct macroblockd_plane *const pd = xd->plane;
2825 0 : int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
2826 0 : int y_skip = 0, uv_skip = 0;
2827 0 : int64_t dist_y = 0, dist_uv = 0;
2828 : TX_SIZE max_uv_tx_size;
2829 0 : x->skip_encode = 0;
2830 0 : ctx->skip = 0;
2831 0 : xd->mi[0]->ref_frame[0] = INTRA_FRAME;
2832 0 : xd->mi[0]->ref_frame[1] = NONE;
2833 : // Initialize interp_filter here so we do not have to check for inter block
2834 : // modes in get_pred_context_switchable_interp()
2835 0 : xd->mi[0]->interp_filter = SWITCHABLE_FILTERS;
2836 :
2837 0 : if (bsize >= BLOCK_8X8) {
2838 0 : if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y,
2839 : &y_skip, bsize, best_rd) >= best_rd) {
2840 0 : rd_cost->rate = INT_MAX;
2841 0 : return;
2842 : }
2843 : } else {
2844 0 : y_skip = 0;
2845 0 : if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
2846 : &dist_y, best_rd) >= best_rd) {
2847 0 : rd_cost->rate = INT_MAX;
2848 0 : return;
2849 : }
2850 : }
2851 0 : max_uv_tx_size = uv_txsize_lookup[bsize][xd->mi[0]->tx_size]
2852 0 : [pd[1].subsampling_x][pd[1].subsampling_y];
2853 0 : rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly, &dist_uv,
2854 : &uv_skip, VPXMAX(BLOCK_8X8, bsize), max_uv_tx_size);
2855 :
2856 0 : if (y_skip && uv_skip) {
2857 0 : rd_cost->rate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
2858 0 : vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
2859 0 : rd_cost->dist = dist_y + dist_uv;
2860 : } else {
2861 0 : rd_cost->rate =
2862 0 : rate_y + rate_uv + vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
2863 0 : rd_cost->dist = dist_y + dist_uv;
2864 : }
2865 :
2866 0 : ctx->mic = *xd->mi[0];
2867 0 : ctx->mbmi_ext = *x->mbmi_ext;
2868 0 : rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
2869 : }
2870 :
2871 : // This function is designed to apply a bias or adjustment to an rd value based
2872 : // on the relative variance of the source and reconstruction.
2873 : #define LOW_VAR_THRESH 16
2874 : #define VLOW_ADJ_MAX 25
2875 : #define VHIGH_ADJ_MAX 8
2876 0 : static void rd_variance_adjustment(VP9_COMP *cpi, MACROBLOCK *x,
2877 : BLOCK_SIZE bsize, int64_t *this_rd,
2878 : MV_REFERENCE_FRAME ref_frame,
2879 : unsigned int source_variance) {
2880 0 : MACROBLOCKD *const xd = &x->e_mbd;
2881 : unsigned int recon_variance;
2882 0 : unsigned int absvar_diff = 0;
2883 0 : int64_t var_error = 0;
2884 0 : int64_t var_factor = 0;
2885 :
2886 0 : if (*this_rd == INT64_MAX) return;
2887 :
2888 : #if CONFIG_VP9_HIGHBITDEPTH
2889 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
2890 : recon_variance = vp9_high_get_sby_perpixel_variance(cpi, &xd->plane[0].dst,
2891 : bsize, xd->bd);
2892 : } else {
2893 : recon_variance =
2894 : vp9_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize);
2895 : }
2896 : #else
2897 0 : recon_variance = vp9_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize);
2898 : #endif // CONFIG_VP9_HIGHBITDEPTH
2899 :
2900 0 : if ((source_variance + recon_variance) > LOW_VAR_THRESH) {
2901 0 : absvar_diff = (source_variance > recon_variance)
2902 : ? (source_variance - recon_variance)
2903 0 : : (recon_variance - source_variance);
2904 :
2905 0 : var_error = ((int64_t)200 * source_variance * recon_variance) /
2906 0 : (((int64_t)source_variance * source_variance) +
2907 0 : ((int64_t)recon_variance * recon_variance));
2908 0 : var_error = 100 - var_error;
2909 : }
2910 :
2911 : // Source variance above a threshold and ref frame is intra.
2912 : // This case is targeted mainly at discouraging intra modes that give rise
2913 : // to a predictor with a low spatial complexity compared to the source.
2914 0 : if ((source_variance > LOW_VAR_THRESH) && (ref_frame == INTRA_FRAME) &&
2915 : (source_variance > recon_variance)) {
2916 0 : var_factor = VPXMIN(absvar_diff, VPXMIN(VLOW_ADJ_MAX, var_error));
2917 : // A second possible case of interest is where the source variance
2918 : // is very low and we wish to discourage false texture or motion trails.
2919 0 : } else if ((source_variance < (LOW_VAR_THRESH >> 1)) &&
2920 : (recon_variance > source_variance)) {
2921 0 : var_factor = VPXMIN(absvar_diff, VPXMIN(VHIGH_ADJ_MAX, var_error));
2922 : }
2923 0 : *this_rd += (*this_rd * var_factor) / 100;
2924 : }
2925 :
2926 : // Do we have an internal image edge (e.g. formatting bars).
2927 0 : int vp9_internal_image_edge(VP9_COMP *cpi) {
2928 0 : return (cpi->oxcf.pass == 2) &&
2929 0 : ((cpi->twopass.this_frame_stats.inactive_zone_rows > 0) ||
2930 0 : (cpi->twopass.this_frame_stats.inactive_zone_cols > 0));
2931 : }
2932 :
2933 : // Checks to see if a super block is on a horizontal image edge.
2934 : // In most cases this is the "real" edge unless there are formatting
2935 : // bars embedded in the stream.
2936 0 : int vp9_active_h_edge(VP9_COMP *cpi, int mi_row, int mi_step) {
2937 0 : int top_edge = 0;
2938 0 : int bottom_edge = cpi->common.mi_rows;
2939 0 : int is_active_h_edge = 0;
2940 :
2941 : // For two pass account for any formatting bars detected.
2942 0 : if (cpi->oxcf.pass == 2) {
2943 0 : TWO_PASS *twopass = &cpi->twopass;
2944 :
2945 : // The inactive region is specified in MBs not mi units.
2946 : // The image edge is in the following MB row.
2947 0 : top_edge += (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
2948 :
2949 0 : bottom_edge -= (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
2950 0 : bottom_edge = VPXMAX(top_edge, bottom_edge);
2951 : }
2952 :
2953 0 : if (((top_edge >= mi_row) && (top_edge < (mi_row + mi_step))) ||
2954 0 : ((bottom_edge >= mi_row) && (bottom_edge < (mi_row + mi_step)))) {
2955 0 : is_active_h_edge = 1;
2956 : }
2957 0 : return is_active_h_edge;
2958 : }
2959 :
2960 : // Checks to see if a super block is on a vertical image edge.
2961 : // In most cases this is the "real" edge unless there are formatting
2962 : // bars embedded in the stream.
2963 0 : int vp9_active_v_edge(VP9_COMP *cpi, int mi_col, int mi_step) {
2964 0 : int left_edge = 0;
2965 0 : int right_edge = cpi->common.mi_cols;
2966 0 : int is_active_v_edge = 0;
2967 :
2968 : // For two pass account for any formatting bars detected.
2969 0 : if (cpi->oxcf.pass == 2) {
2970 0 : TWO_PASS *twopass = &cpi->twopass;
2971 :
2972 : // The inactive region is specified in MBs not mi units.
2973 : // The image edge is in the following MB row.
2974 0 : left_edge += (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
2975 :
2976 0 : right_edge -= (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
2977 0 : right_edge = VPXMAX(left_edge, right_edge);
2978 : }
2979 :
2980 0 : if (((left_edge >= mi_col) && (left_edge < (mi_col + mi_step))) ||
2981 0 : ((right_edge >= mi_col) && (right_edge < (mi_col + mi_step)))) {
2982 0 : is_active_v_edge = 1;
2983 : }
2984 0 : return is_active_v_edge;
2985 : }
2986 :
2987 : // Checks to see if a super block is at the edge of the active image.
2988 : // In most cases this is the "real" edge unless there are formatting
2989 : // bars embedded in the stream.
2990 0 : int vp9_active_edge_sb(VP9_COMP *cpi, int mi_row, int mi_col) {
2991 0 : return vp9_active_h_edge(cpi, mi_row, MI_BLOCK_SIZE) ||
2992 0 : vp9_active_v_edge(cpi, mi_col, MI_BLOCK_SIZE);
2993 : }
2994 :
2995 0 : void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data,
2996 : MACROBLOCK *x, int mi_row, int mi_col,
2997 : RD_COST *rd_cost, BLOCK_SIZE bsize,
2998 : PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far) {
2999 0 : VP9_COMMON *const cm = &cpi->common;
3000 0 : TileInfo *const tile_info = &tile_data->tile_info;
3001 0 : RD_OPT *const rd_opt = &cpi->rd;
3002 0 : SPEED_FEATURES *const sf = &cpi->sf;
3003 0 : MACROBLOCKD *const xd = &x->e_mbd;
3004 0 : MODE_INFO *const mi = xd->mi[0];
3005 0 : MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
3006 0 : const struct segmentation *const seg = &cm->seg;
3007 : PREDICTION_MODE this_mode;
3008 : MV_REFERENCE_FRAME ref_frame, second_ref_frame;
3009 0 : unsigned char segment_id = mi->segment_id;
3010 : int comp_pred, i, k;
3011 : int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
3012 : struct buf_2d yv12_mb[4][MAX_MB_PLANE];
3013 0 : int_mv single_newmv[MAX_REF_FRAMES] = { { 0 } };
3014 : INTERP_FILTER single_inter_filter[MB_MODE_COUNT][MAX_REF_FRAMES];
3015 : int single_skippable[MB_MODE_COUNT][MAX_REF_FRAMES];
3016 : static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
3017 : VP9_ALT_FLAG };
3018 0 : int64_t best_rd = best_rd_so_far;
3019 : int64_t best_pred_diff[REFERENCE_MODES];
3020 : int64_t best_pred_rd[REFERENCE_MODES];
3021 : int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
3022 : int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
3023 : MODE_INFO best_mbmode;
3024 0 : int best_mode_skippable = 0;
3025 0 : int midx, best_mode_index = -1;
3026 : unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3027 : vpx_prob comp_mode_p;
3028 0 : int64_t best_intra_rd = INT64_MAX;
3029 0 : unsigned int best_pred_sse = UINT_MAX;
3030 0 : PREDICTION_MODE best_intra_mode = DC_PRED;
3031 : int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
3032 : int64_t dist_uv[TX_SIZES];
3033 : int skip_uv[TX_SIZES];
3034 : PREDICTION_MODE mode_uv[TX_SIZES];
3035 0 : const int intra_cost_penalty = vp9_get_intra_cost_penalty(
3036 : cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
3037 0 : int best_skip2 = 0;
3038 0 : uint8_t ref_frame_skip_mask[2] = { 0 };
3039 0 : uint16_t mode_skip_mask[MAX_REF_FRAMES] = { 0 };
3040 0 : int mode_skip_start = sf->mode_skip_start + 1;
3041 0 : const int *const rd_threshes = rd_opt->threshes[segment_id][bsize];
3042 0 : const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize];
3043 : int64_t mode_threshold[MAX_MODES];
3044 0 : int *mode_map = tile_data->mode_map[bsize];
3045 0 : const int mode_search_skip_flags = sf->mode_search_skip_flags;
3046 0 : int64_t mask_filter = 0;
3047 : int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
3048 :
3049 0 : vp9_zero(best_mbmode);
3050 :
3051 0 : x->skip_encode = sf->skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
3052 :
3053 0 : for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) filter_cache[i] = INT64_MAX;
3054 :
3055 0 : estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
3056 : &comp_mode_p);
3057 :
3058 0 : for (i = 0; i < REFERENCE_MODES; ++i) best_pred_rd[i] = INT64_MAX;
3059 0 : for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3060 0 : best_filter_rd[i] = INT64_MAX;
3061 0 : for (i = 0; i < TX_SIZES; i++) rate_uv_intra[i] = INT_MAX;
3062 0 : for (i = 0; i < MAX_REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
3063 0 : for (i = 0; i < MB_MODE_COUNT; ++i) {
3064 0 : for (k = 0; k < MAX_REF_FRAMES; ++k) {
3065 0 : single_inter_filter[i][k] = SWITCHABLE;
3066 0 : single_skippable[i][k] = 0;
3067 : }
3068 : }
3069 :
3070 0 : rd_cost->rate = INT_MAX;
3071 :
3072 0 : for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3073 0 : x->pred_mv_sad[ref_frame] = INT_MAX;
3074 0 : if (cpi->ref_frame_flags & flag_list[ref_frame]) {
3075 0 : assert(get_ref_frame_buffer(cpi, ref_frame) != NULL);
3076 0 : setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col,
3077 : frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
3078 : }
3079 0 : frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
3080 0 : frame_mv[ZEROMV][ref_frame].as_int = 0;
3081 : }
3082 :
3083 0 : for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3084 0 : if (!(cpi->ref_frame_flags & flag_list[ref_frame])) {
3085 : // Skip checking missing references in both single and compound reference
3086 : // modes. Note that a mode will be skipped if both reference frames
3087 : // are masked out.
3088 0 : ref_frame_skip_mask[0] |= (1 << ref_frame);
3089 0 : ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
3090 0 : } else if (sf->reference_masking) {
3091 0 : for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
3092 : // Skip fixed mv modes for poor references
3093 0 : if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) {
3094 0 : mode_skip_mask[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
3095 0 : break;
3096 : }
3097 : }
3098 : }
3099 : // If the segment reference frame feature is enabled....
3100 : // then do nothing if the current ref frame is not allowed..
3101 0 : if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3102 0 : get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
3103 0 : ref_frame_skip_mask[0] |= (1 << ref_frame);
3104 0 : ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
3105 : }
3106 : }
3107 :
3108 : // Disable this drop out case if the ref frame
3109 : // segment level feature is enabled for this segment. This is to
3110 : // prevent the possibility that we end up unable to pick any mode.
3111 0 : if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
3112 : // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
3113 : // unless ARNR filtering is enabled in which case we want
3114 : // an unfiltered alternative. We allow near/nearest as well
3115 : // because they may result in zero-zero MVs but be cheaper.
3116 0 : if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
3117 0 : ref_frame_skip_mask[0] = (1 << LAST_FRAME) | (1 << GOLDEN_FRAME);
3118 0 : ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
3119 0 : mode_skip_mask[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
3120 0 : if (frame_mv[NEARMV][ALTREF_FRAME].as_int != 0)
3121 0 : mode_skip_mask[ALTREF_FRAME] |= (1 << NEARMV);
3122 0 : if (frame_mv[NEARESTMV][ALTREF_FRAME].as_int != 0)
3123 0 : mode_skip_mask[ALTREF_FRAME] |= (1 << NEARESTMV);
3124 : }
3125 : }
3126 :
3127 0 : if (cpi->rc.is_src_frame_alt_ref) {
3128 0 : if (sf->alt_ref_search_fp) {
3129 0 : mode_skip_mask[ALTREF_FRAME] = 0;
3130 0 : ref_frame_skip_mask[0] = ~(1 << ALTREF_FRAME);
3131 0 : ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
3132 : }
3133 : }
3134 :
3135 0 : if (sf->alt_ref_search_fp)
3136 0 : if (!cm->show_frame && x->pred_mv_sad[GOLDEN_FRAME] < INT_MAX)
3137 0 : if (x->pred_mv_sad[ALTREF_FRAME] > (x->pred_mv_sad[GOLDEN_FRAME] << 1))
3138 0 : mode_skip_mask[ALTREF_FRAME] |= INTER_ALL;
3139 :
3140 0 : if (sf->adaptive_mode_search) {
3141 0 : if (cm->show_frame && !cpi->rc.is_src_frame_alt_ref &&
3142 0 : cpi->rc.frames_since_golden >= 3)
3143 0 : if (x->pred_mv_sad[GOLDEN_FRAME] > (x->pred_mv_sad[LAST_FRAME] << 1))
3144 0 : mode_skip_mask[GOLDEN_FRAME] |= INTER_ALL;
3145 : }
3146 :
3147 0 : if (bsize > sf->max_intra_bsize) {
3148 0 : ref_frame_skip_mask[0] |= (1 << INTRA_FRAME);
3149 0 : ref_frame_skip_mask[1] |= (1 << INTRA_FRAME);
3150 : }
3151 :
3152 0 : mode_skip_mask[INTRA_FRAME] |=
3153 0 : ~(sf->intra_y_mode_mask[max_txsize_lookup[bsize]]);
3154 :
3155 0 : for (i = 0; i <= LAST_NEW_MV_INDEX; ++i) mode_threshold[i] = 0;
3156 0 : for (i = LAST_NEW_MV_INDEX + 1; i < MAX_MODES; ++i)
3157 0 : mode_threshold[i] = ((int64_t)rd_threshes[i] * rd_thresh_freq_fact[i]) >> 5;
3158 :
3159 0 : midx = sf->schedule_mode_search ? mode_skip_start : 0;
3160 0 : while (midx > 4) {
3161 0 : uint8_t end_pos = 0;
3162 0 : for (i = 5; i < midx; ++i) {
3163 0 : if (mode_threshold[mode_map[i - 1]] > mode_threshold[mode_map[i]]) {
3164 0 : uint8_t tmp = mode_map[i];
3165 0 : mode_map[i] = mode_map[i - 1];
3166 0 : mode_map[i - 1] = tmp;
3167 0 : end_pos = i;
3168 : }
3169 : }
3170 0 : midx = end_pos;
3171 : }
3172 :
3173 0 : for (midx = 0; midx < MAX_MODES; ++midx) {
3174 0 : int mode_index = mode_map[midx];
3175 0 : int mode_excluded = 0;
3176 0 : int64_t this_rd = INT64_MAX;
3177 0 : int disable_skip = 0;
3178 0 : int compmode_cost = 0;
3179 0 : int rate2 = 0, rate_y = 0, rate_uv = 0;
3180 0 : int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
3181 0 : int skippable = 0;
3182 0 : int this_skip2 = 0;
3183 0 : int64_t total_sse = INT64_MAX;
3184 0 : int early_term = 0;
3185 :
3186 0 : this_mode = vp9_mode_order[mode_index].mode;
3187 0 : ref_frame = vp9_mode_order[mode_index].ref_frame[0];
3188 0 : second_ref_frame = vp9_mode_order[mode_index].ref_frame[1];
3189 :
3190 : // Look at the reference frame of the best mode so far and set the
3191 : // skip mask to look at a subset of the remaining modes.
3192 0 : if (midx == mode_skip_start && best_mode_index >= 0) {
3193 0 : switch (best_mbmode.ref_frame[0]) {
3194 0 : case INTRA_FRAME: break;
3195 : case LAST_FRAME:
3196 0 : ref_frame_skip_mask[0] |= LAST_FRAME_MODE_MASK;
3197 0 : ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
3198 0 : break;
3199 : case GOLDEN_FRAME:
3200 0 : ref_frame_skip_mask[0] |= GOLDEN_FRAME_MODE_MASK;
3201 0 : ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
3202 0 : break;
3203 0 : case ALTREF_FRAME: ref_frame_skip_mask[0] |= ALT_REF_MODE_MASK; break;
3204 : case NONE:
3205 0 : case MAX_REF_FRAMES: assert(0 && "Invalid Reference frame"); break;
3206 : }
3207 : }
3208 :
3209 0 : if ((ref_frame_skip_mask[0] & (1 << ref_frame)) &&
3210 0 : (ref_frame_skip_mask[1] & (1 << VPXMAX(0, second_ref_frame))))
3211 0 : continue;
3212 :
3213 0 : if (mode_skip_mask[ref_frame] & (1 << this_mode)) continue;
3214 :
3215 : // Test best rd so far against threshold for trying this mode.
3216 0 : if (best_mode_skippable && sf->schedule_mode_search)
3217 0 : mode_threshold[mode_index] <<= 1;
3218 :
3219 0 : if (best_rd < mode_threshold[mode_index]) continue;
3220 :
3221 0 : if (sf->motion_field_mode_search) {
3222 0 : const int mi_width = VPXMIN(num_8x8_blocks_wide_lookup[bsize],
3223 : tile_info->mi_col_end - mi_col);
3224 0 : const int mi_height = VPXMIN(num_8x8_blocks_high_lookup[bsize],
3225 : tile_info->mi_row_end - mi_row);
3226 0 : const int bsl = mi_width_log2_lookup[bsize];
3227 0 : int cb_partition_search_ctrl =
3228 0 : (((mi_row + mi_col) >> bsl) +
3229 0 : get_chessboard_index(cm->current_video_frame)) &
3230 : 0x1;
3231 : MODE_INFO *ref_mi;
3232 0 : int const_motion = 1;
3233 0 : int skip_ref_frame = !cb_partition_search_ctrl;
3234 0 : MV_REFERENCE_FRAME rf = NONE;
3235 : int_mv ref_mv;
3236 0 : ref_mv.as_int = INVALID_MV;
3237 :
3238 0 : if ((mi_row - 1) >= tile_info->mi_row_start) {
3239 0 : ref_mv = xd->mi[-xd->mi_stride]->mv[0];
3240 0 : rf = xd->mi[-xd->mi_stride]->ref_frame[0];
3241 0 : for (i = 0; i < mi_width; ++i) {
3242 0 : ref_mi = xd->mi[-xd->mi_stride + i];
3243 0 : const_motion &= (ref_mv.as_int == ref_mi->mv[0].as_int) &&
3244 0 : (ref_frame == ref_mi->ref_frame[0]);
3245 0 : skip_ref_frame &= (rf == ref_mi->ref_frame[0]);
3246 : }
3247 : }
3248 :
3249 0 : if ((mi_col - 1) >= tile_info->mi_col_start) {
3250 0 : if (ref_mv.as_int == INVALID_MV) ref_mv = xd->mi[-1]->mv[0];
3251 0 : if (rf == NONE) rf = xd->mi[-1]->ref_frame[0];
3252 0 : for (i = 0; i < mi_height; ++i) {
3253 0 : ref_mi = xd->mi[i * xd->mi_stride - 1];
3254 0 : const_motion &= (ref_mv.as_int == ref_mi->mv[0].as_int) &&
3255 0 : (ref_frame == ref_mi->ref_frame[0]);
3256 0 : skip_ref_frame &= (rf == ref_mi->ref_frame[0]);
3257 : }
3258 : }
3259 :
3260 0 : if (skip_ref_frame && this_mode != NEARESTMV && this_mode != NEWMV)
3261 0 : if (rf > INTRA_FRAME)
3262 0 : if (ref_frame != rf) continue;
3263 :
3264 0 : if (const_motion)
3265 0 : if (this_mode == NEARMV || this_mode == ZEROMV) continue;
3266 : }
3267 :
3268 0 : comp_pred = second_ref_frame > INTRA_FRAME;
3269 0 : if (comp_pred) {
3270 0 : if (!cpi->allow_comp_inter_inter) continue;
3271 :
3272 : // Skip compound inter modes if ARF is not available.
3273 0 : if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) continue;
3274 :
3275 : // Do not allow compound prediction if the segment level reference frame
3276 : // feature is in use as in this case there can only be one reference.
3277 0 : if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) continue;
3278 :
3279 0 : if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
3280 0 : best_mode_index >= 0 && best_mbmode.ref_frame[0] == INTRA_FRAME)
3281 0 : continue;
3282 :
3283 0 : mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
3284 : } else {
3285 0 : if (ref_frame != INTRA_FRAME)
3286 0 : mode_excluded = cm->reference_mode == COMPOUND_REFERENCE;
3287 : }
3288 :
3289 0 : if (ref_frame == INTRA_FRAME) {
3290 0 : if (sf->adaptive_mode_search)
3291 0 : if ((x->source_variance << num_pels_log2_lookup[bsize]) > best_pred_sse)
3292 0 : continue;
3293 :
3294 0 : if (this_mode != DC_PRED) {
3295 : // Disable intra modes other than DC_PRED for blocks with low variance
3296 : // Threshold for intra skipping based on source variance
3297 : // TODO(debargha): Specialize the threshold for super block sizes
3298 0 : const unsigned int skip_intra_var_thresh = 64;
3299 0 : if ((mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
3300 0 : x->source_variance < skip_intra_var_thresh)
3301 0 : continue;
3302 : // Only search the oblique modes if the best so far is
3303 : // one of the neighboring directional modes
3304 0 : if ((mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
3305 0 : (this_mode >= D45_PRED && this_mode <= TM_PRED)) {
3306 0 : if (best_mode_index >= 0 && best_mbmode.ref_frame[0] > INTRA_FRAME)
3307 0 : continue;
3308 : }
3309 0 : if (mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
3310 0 : if (conditional_skipintra(this_mode, best_intra_mode)) continue;
3311 : }
3312 : }
3313 : } else {
3314 0 : const MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, second_ref_frame };
3315 0 : if (!check_best_zero_mv(cpi, mbmi_ext->mode_context, frame_mv, this_mode,
3316 : ref_frames))
3317 0 : continue;
3318 : }
3319 :
3320 0 : mi->mode = this_mode;
3321 0 : mi->uv_mode = DC_PRED;
3322 0 : mi->ref_frame[0] = ref_frame;
3323 0 : mi->ref_frame[1] = second_ref_frame;
3324 : // Evaluate all sub-pel filters irrespective of whether we can use
3325 : // them for this frame.
3326 0 : mi->interp_filter =
3327 0 : cm->interp_filter == SWITCHABLE ? EIGHTTAP : cm->interp_filter;
3328 0 : mi->mv[0].as_int = mi->mv[1].as_int = 0;
3329 :
3330 0 : x->skip = 0;
3331 0 : set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
3332 :
3333 : // Select prediction reference frames.
3334 0 : for (i = 0; i < MAX_MB_PLANE; i++) {
3335 0 : xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
3336 0 : if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
3337 : }
3338 :
3339 0 : if (ref_frame == INTRA_FRAME) {
3340 : TX_SIZE uv_tx;
3341 0 : struct macroblockd_plane *const pd = &xd->plane[1];
3342 0 : memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
3343 0 : super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL, bsize,
3344 : best_rd);
3345 0 : if (rate_y == INT_MAX) continue;
3346 :
3347 0 : uv_tx = uv_txsize_lookup[bsize][mi->tx_size][pd->subsampling_x]
3348 0 : [pd->subsampling_y];
3349 0 : if (rate_uv_intra[uv_tx] == INT_MAX) {
3350 0 : choose_intra_uv_mode(cpi, x, ctx, bsize, uv_tx, &rate_uv_intra[uv_tx],
3351 0 : &rate_uv_tokenonly[uv_tx], &dist_uv[uv_tx],
3352 0 : &skip_uv[uv_tx], &mode_uv[uv_tx]);
3353 : }
3354 :
3355 0 : rate_uv = rate_uv_tokenonly[uv_tx];
3356 0 : distortion_uv = dist_uv[uv_tx];
3357 0 : skippable = skippable && skip_uv[uv_tx];
3358 0 : mi->uv_mode = mode_uv[uv_tx];
3359 :
3360 0 : rate2 = rate_y + cpi->mbmode_cost[mi->mode] + rate_uv_intra[uv_tx];
3361 0 : if (this_mode != DC_PRED && this_mode != TM_PRED)
3362 0 : rate2 += intra_cost_penalty;
3363 0 : distortion2 = distortion_y + distortion_uv;
3364 : } else {
3365 0 : this_rd = handle_inter_mode(
3366 : cpi, x, bsize, &rate2, &distortion2, &skippable, &rate_y, &rate_uv,
3367 : &disable_skip, frame_mv, mi_row, mi_col, single_newmv,
3368 : single_inter_filter, single_skippable, &total_sse, best_rd,
3369 : &mask_filter, filter_cache);
3370 0 : if (this_rd == INT64_MAX) continue;
3371 :
3372 0 : compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred);
3373 :
3374 0 : if (cm->reference_mode == REFERENCE_MODE_SELECT) rate2 += compmode_cost;
3375 : }
3376 :
3377 : // Estimate the reference frame signaling cost and add it
3378 : // to the rolling cost variable.
3379 0 : if (comp_pred) {
3380 0 : rate2 += ref_costs_comp[ref_frame];
3381 : } else {
3382 0 : rate2 += ref_costs_single[ref_frame];
3383 : }
3384 :
3385 0 : if (!disable_skip) {
3386 0 : const vpx_prob skip_prob = vp9_get_skip_prob(cm, xd);
3387 0 : const int skip_cost0 = vp9_cost_bit(skip_prob, 0);
3388 0 : const int skip_cost1 = vp9_cost_bit(skip_prob, 1);
3389 :
3390 0 : if (skippable) {
3391 : // Back out the coefficient coding costs
3392 0 : rate2 -= (rate_y + rate_uv);
3393 :
3394 : // Cost the skip mb case
3395 0 : rate2 += skip_cost1;
3396 0 : } else if (ref_frame != INTRA_FRAME && !xd->lossless) {
3397 0 : if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv + skip_cost0,
3398 : distortion2) <
3399 0 : RDCOST(x->rdmult, x->rddiv, skip_cost1, total_sse)) {
3400 : // Add in the cost of the no skip flag.
3401 0 : rate2 += skip_cost0;
3402 : } else {
3403 : // FIXME(rbultje) make this work for splitmv also
3404 0 : assert(total_sse >= 0);
3405 :
3406 0 : rate2 += skip_cost1;
3407 0 : distortion2 = total_sse;
3408 0 : rate2 -= (rate_y + rate_uv);
3409 0 : this_skip2 = 1;
3410 : }
3411 : } else {
3412 : // Add in the cost of the no skip flag.
3413 0 : rate2 += skip_cost0;
3414 : }
3415 :
3416 : // Calculate the final RD estimate for this mode.
3417 0 : this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
3418 : }
3419 :
3420 : // Apply an adjustment to the rd value based on the similarity of the
3421 : // source variance and reconstructed variance.
3422 0 : rd_variance_adjustment(cpi, x, bsize, &this_rd, ref_frame,
3423 : x->source_variance);
3424 :
3425 0 : if (ref_frame == INTRA_FRAME) {
3426 : // Keep record of best intra rd
3427 0 : if (this_rd < best_intra_rd) {
3428 0 : best_intra_rd = this_rd;
3429 0 : best_intra_mode = mi->mode;
3430 : }
3431 : }
3432 :
3433 0 : if (!disable_skip && ref_frame == INTRA_FRAME) {
3434 0 : for (i = 0; i < REFERENCE_MODES; ++i)
3435 0 : best_pred_rd[i] = VPXMIN(best_pred_rd[i], this_rd);
3436 0 : for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3437 0 : best_filter_rd[i] = VPXMIN(best_filter_rd[i], this_rd);
3438 : }
3439 :
3440 : // Did this mode help.. i.e. is it the new best mode
3441 0 : if (this_rd < best_rd || x->skip) {
3442 0 : int max_plane = MAX_MB_PLANE;
3443 0 : if (!mode_excluded) {
3444 : // Note index of best mode so far
3445 0 : best_mode_index = mode_index;
3446 :
3447 0 : if (ref_frame == INTRA_FRAME) {
3448 : /* required for left and above block mv */
3449 0 : mi->mv[0].as_int = 0;
3450 0 : max_plane = 1;
3451 : // Initialize interp_filter here so we do not have to check for
3452 : // inter block modes in get_pred_context_switchable_interp()
3453 0 : mi->interp_filter = SWITCHABLE_FILTERS;
3454 : } else {
3455 0 : best_pred_sse = x->pred_sse[ref_frame];
3456 : }
3457 :
3458 0 : rd_cost->rate = rate2;
3459 0 : rd_cost->dist = distortion2;
3460 0 : rd_cost->rdcost = this_rd;
3461 0 : best_rd = this_rd;
3462 0 : best_mbmode = *mi;
3463 0 : best_skip2 = this_skip2;
3464 0 : best_mode_skippable = skippable;
3465 :
3466 0 : if (!x->select_tx_size) swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
3467 0 : memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mi->tx_size],
3468 0 : sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk);
3469 :
3470 : // TODO(debargha): enhance this test with a better distortion prediction
3471 : // based on qp, activity mask and history
3472 0 : if ((mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
3473 : (mode_index > MIN_EARLY_TERM_INDEX)) {
3474 0 : int qstep = xd->plane[0].dequant[1];
3475 : // TODO(debargha): Enhance this by specializing for each mode_index
3476 0 : int scale = 4;
3477 : #if CONFIG_VP9_HIGHBITDEPTH
3478 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
3479 : qstep >>= (xd->bd - 8);
3480 : }
3481 : #endif // CONFIG_VP9_HIGHBITDEPTH
3482 0 : if (x->source_variance < UINT_MAX) {
3483 0 : const int var_adjust = (x->source_variance < 16);
3484 0 : scale -= var_adjust;
3485 : }
3486 0 : if (ref_frame > INTRA_FRAME && distortion2 * scale < qstep * qstep) {
3487 0 : early_term = 1;
3488 : }
3489 : }
3490 : }
3491 : }
3492 :
3493 : /* keep record of best compound/single-only prediction */
3494 0 : if (!disable_skip && ref_frame != INTRA_FRAME) {
3495 : int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
3496 :
3497 0 : if (cm->reference_mode == REFERENCE_MODE_SELECT) {
3498 0 : single_rate = rate2 - compmode_cost;
3499 0 : hybrid_rate = rate2;
3500 : } else {
3501 0 : single_rate = rate2;
3502 0 : hybrid_rate = rate2 + compmode_cost;
3503 : }
3504 :
3505 0 : single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
3506 0 : hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
3507 :
3508 0 : if (!comp_pred) {
3509 0 : if (single_rd < best_pred_rd[SINGLE_REFERENCE])
3510 0 : best_pred_rd[SINGLE_REFERENCE] = single_rd;
3511 : } else {
3512 0 : if (single_rd < best_pred_rd[COMPOUND_REFERENCE])
3513 0 : best_pred_rd[COMPOUND_REFERENCE] = single_rd;
3514 : }
3515 0 : if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
3516 0 : best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
3517 :
3518 : /* keep record of best filter type */
3519 0 : if (!mode_excluded && cm->interp_filter != BILINEAR) {
3520 0 : int64_t ref =
3521 0 : filter_cache[cm->interp_filter == SWITCHABLE ? SWITCHABLE_FILTERS
3522 0 : : cm->interp_filter];
3523 :
3524 0 : for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
3525 : int64_t adj_rd;
3526 0 : if (ref == INT64_MAX)
3527 0 : adj_rd = 0;
3528 0 : else if (filter_cache[i] == INT64_MAX)
3529 : // when early termination is triggered, the encoder does not have
3530 : // access to the rate-distortion cost. it only knows that the cost
3531 : // should be above the maximum valid value. hence it takes the known
3532 : // maximum plus an arbitrary constant as the rate-distortion cost.
3533 0 : adj_rd = mask_filter - ref + 10;
3534 : else
3535 0 : adj_rd = filter_cache[i] - ref;
3536 :
3537 0 : adj_rd += this_rd;
3538 0 : best_filter_rd[i] = VPXMIN(best_filter_rd[i], adj_rd);
3539 : }
3540 : }
3541 : }
3542 :
3543 0 : if (early_term) break;
3544 :
3545 0 : if (x->skip && !comp_pred) break;
3546 : }
3547 :
3548 : // The inter modes' rate costs are not calculated precisely in some cases.
3549 : // Therefore, sometimes, NEWMV is chosen instead of NEARESTMV, NEARMV, and
3550 : // ZEROMV. Here, checks are added for those cases, and the mode decisions
3551 : // are corrected.
3552 0 : if (best_mbmode.mode == NEWMV) {
3553 0 : const MV_REFERENCE_FRAME refs[2] = { best_mbmode.ref_frame[0],
3554 0 : best_mbmode.ref_frame[1] };
3555 0 : int comp_pred_mode = refs[1] > INTRA_FRAME;
3556 :
3557 0 : if (frame_mv[NEARESTMV][refs[0]].as_int == best_mbmode.mv[0].as_int &&
3558 0 : ((comp_pred_mode &&
3559 0 : frame_mv[NEARESTMV][refs[1]].as_int == best_mbmode.mv[1].as_int) ||
3560 : !comp_pred_mode))
3561 0 : best_mbmode.mode = NEARESTMV;
3562 0 : else if (frame_mv[NEARMV][refs[0]].as_int == best_mbmode.mv[0].as_int &&
3563 0 : ((comp_pred_mode &&
3564 0 : frame_mv[NEARMV][refs[1]].as_int == best_mbmode.mv[1].as_int) ||
3565 : !comp_pred_mode))
3566 0 : best_mbmode.mode = NEARMV;
3567 0 : else if (best_mbmode.mv[0].as_int == 0 &&
3568 0 : ((comp_pred_mode && best_mbmode.mv[1].as_int == 0) ||
3569 : !comp_pred_mode))
3570 0 : best_mbmode.mode = ZEROMV;
3571 : }
3572 :
3573 0 : if (best_mode_index < 0 || best_rd >= best_rd_so_far) {
3574 0 : rd_cost->rate = INT_MAX;
3575 0 : rd_cost->rdcost = INT64_MAX;
3576 0 : return;
3577 : }
3578 :
3579 : // If we used an estimate for the uv intra rd in the loop above...
3580 0 : if (sf->use_uv_intra_rd_estimate) {
3581 : // Do Intra UV best rd mode selection if best mode choice above was intra.
3582 0 : if (best_mbmode.ref_frame[0] == INTRA_FRAME) {
3583 : TX_SIZE uv_tx_size;
3584 0 : *mi = best_mbmode;
3585 0 : uv_tx_size = get_uv_tx_size(mi, &xd->plane[1]);
3586 0 : rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
3587 0 : &rate_uv_tokenonly[uv_tx_size],
3588 0 : &dist_uv[uv_tx_size], &skip_uv[uv_tx_size],
3589 : bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize,
3590 : uv_tx_size);
3591 : }
3592 : }
3593 :
3594 0 : assert((cm->interp_filter == SWITCHABLE) ||
3595 : (cm->interp_filter == best_mbmode.interp_filter) ||
3596 : !is_inter_block(&best_mbmode));
3597 :
3598 0 : if (!cpi->rc.is_src_frame_alt_ref)
3599 0 : vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact,
3600 : sf->adaptive_rd_thresh, bsize, best_mode_index);
3601 :
3602 : // macroblock modes
3603 0 : *mi = best_mbmode;
3604 0 : x->skip |= best_skip2;
3605 :
3606 0 : for (i = 0; i < REFERENCE_MODES; ++i) {
3607 0 : if (best_pred_rd[i] == INT64_MAX)
3608 0 : best_pred_diff[i] = INT_MIN;
3609 : else
3610 0 : best_pred_diff[i] = best_rd - best_pred_rd[i];
3611 : }
3612 :
3613 0 : if (!x->skip) {
3614 0 : for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
3615 0 : if (best_filter_rd[i] == INT64_MAX)
3616 0 : best_filter_diff[i] = 0;
3617 : else
3618 0 : best_filter_diff[i] = best_rd - best_filter_rd[i];
3619 : }
3620 0 : if (cm->interp_filter == SWITCHABLE)
3621 0 : assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
3622 : } else {
3623 0 : vp9_zero(best_filter_diff);
3624 : }
3625 :
3626 : // TODO(yunqingwang): Moving this line in front of the above best_filter_diff
3627 : // updating code causes PSNR loss. Need to figure out the confliction.
3628 0 : x->skip |= best_mode_skippable;
3629 :
3630 0 : if (!x->skip && !x->select_tx_size) {
3631 0 : int has_high_freq_coeff = 0;
3632 : int plane;
3633 0 : int max_plane = is_inter_block(xd->mi[0]) ? MAX_MB_PLANE : 1;
3634 0 : for (plane = 0; plane < max_plane; ++plane) {
3635 0 : x->plane[plane].eobs = ctx->eobs_pbuf[plane][1];
3636 0 : has_high_freq_coeff |= vp9_has_high_freq_in_plane(x, bsize, plane);
3637 : }
3638 :
3639 0 : for (plane = max_plane; plane < MAX_MB_PLANE; ++plane) {
3640 0 : x->plane[plane].eobs = ctx->eobs_pbuf[plane][2];
3641 0 : has_high_freq_coeff |= vp9_has_high_freq_in_plane(x, bsize, plane);
3642 : }
3643 :
3644 0 : best_mode_skippable |= !has_high_freq_coeff;
3645 : }
3646 :
3647 0 : assert(best_mode_index >= 0);
3648 :
3649 0 : store_coding_context(x, ctx, best_mode_index, best_pred_diff,
3650 : best_filter_diff, best_mode_skippable);
3651 : }
3652 :
3653 0 : void vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, TileDataEnc *tile_data,
3654 : MACROBLOCK *x, RD_COST *rd_cost,
3655 : BLOCK_SIZE bsize,
3656 : PICK_MODE_CONTEXT *ctx,
3657 : int64_t best_rd_so_far) {
3658 0 : VP9_COMMON *const cm = &cpi->common;
3659 0 : MACROBLOCKD *const xd = &x->e_mbd;
3660 0 : MODE_INFO *const mi = xd->mi[0];
3661 0 : unsigned char segment_id = mi->segment_id;
3662 0 : const int comp_pred = 0;
3663 : int i;
3664 : int64_t best_pred_diff[REFERENCE_MODES];
3665 : int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
3666 : unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3667 : vpx_prob comp_mode_p;
3668 0 : INTERP_FILTER best_filter = SWITCHABLE;
3669 0 : int64_t this_rd = INT64_MAX;
3670 0 : int rate2 = 0;
3671 0 : const int64_t distortion2 = 0;
3672 :
3673 0 : x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
3674 :
3675 0 : estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
3676 : &comp_mode_p);
3677 :
3678 0 : for (i = 0; i < MAX_REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
3679 0 : for (i = LAST_FRAME; i < MAX_REF_FRAMES; ++i) x->pred_mv_sad[i] = INT_MAX;
3680 :
3681 0 : rd_cost->rate = INT_MAX;
3682 :
3683 0 : assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
3684 :
3685 0 : mi->mode = ZEROMV;
3686 0 : mi->uv_mode = DC_PRED;
3687 0 : mi->ref_frame[0] = LAST_FRAME;
3688 0 : mi->ref_frame[1] = NONE;
3689 0 : mi->mv[0].as_int = 0;
3690 0 : x->skip = 1;
3691 :
3692 0 : if (cm->interp_filter != BILINEAR) {
3693 0 : best_filter = EIGHTTAP;
3694 0 : if (cm->interp_filter == SWITCHABLE &&
3695 0 : x->source_variance >= cpi->sf.disable_filter_search_var_thresh) {
3696 : int rs;
3697 0 : int best_rs = INT_MAX;
3698 0 : for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
3699 0 : mi->interp_filter = i;
3700 0 : rs = vp9_get_switchable_rate(cpi, xd);
3701 0 : if (rs < best_rs) {
3702 0 : best_rs = rs;
3703 0 : best_filter = mi->interp_filter;
3704 : }
3705 : }
3706 : }
3707 : }
3708 : // Set the appropriate filter
3709 0 : if (cm->interp_filter == SWITCHABLE) {
3710 0 : mi->interp_filter = best_filter;
3711 0 : rate2 += vp9_get_switchable_rate(cpi, xd);
3712 : } else {
3713 0 : mi->interp_filter = cm->interp_filter;
3714 : }
3715 :
3716 0 : if (cm->reference_mode == REFERENCE_MODE_SELECT)
3717 0 : rate2 += vp9_cost_bit(comp_mode_p, comp_pred);
3718 :
3719 : // Estimate the reference frame signaling cost and add it
3720 : // to the rolling cost variable.
3721 0 : rate2 += ref_costs_single[LAST_FRAME];
3722 0 : this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
3723 :
3724 0 : rd_cost->rate = rate2;
3725 0 : rd_cost->dist = distortion2;
3726 0 : rd_cost->rdcost = this_rd;
3727 :
3728 0 : if (this_rd >= best_rd_so_far) {
3729 0 : rd_cost->rate = INT_MAX;
3730 0 : rd_cost->rdcost = INT64_MAX;
3731 0 : return;
3732 : }
3733 :
3734 0 : assert((cm->interp_filter == SWITCHABLE) ||
3735 : (cm->interp_filter == mi->interp_filter));
3736 :
3737 0 : vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact,
3738 : cpi->sf.adaptive_rd_thresh, bsize, THR_ZEROMV);
3739 :
3740 0 : vp9_zero(best_pred_diff);
3741 0 : vp9_zero(best_filter_diff);
3742 :
3743 0 : if (!x->select_tx_size) swap_block_ptr(x, ctx, 1, 0, 0, MAX_MB_PLANE);
3744 0 : store_coding_context(x, ctx, THR_ZEROMV, best_pred_diff, best_filter_diff, 0);
3745 : }
3746 :
3747 0 : void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, TileDataEnc *tile_data,
3748 : MACROBLOCK *x, int mi_row, int mi_col,
3749 : RD_COST *rd_cost, BLOCK_SIZE bsize,
3750 : PICK_MODE_CONTEXT *ctx,
3751 : int64_t best_rd_so_far) {
3752 0 : VP9_COMMON *const cm = &cpi->common;
3753 0 : RD_OPT *const rd_opt = &cpi->rd;
3754 0 : SPEED_FEATURES *const sf = &cpi->sf;
3755 0 : MACROBLOCKD *const xd = &x->e_mbd;
3756 0 : MODE_INFO *const mi = xd->mi[0];
3757 0 : const struct segmentation *const seg = &cm->seg;
3758 : MV_REFERENCE_FRAME ref_frame, second_ref_frame;
3759 0 : unsigned char segment_id = mi->segment_id;
3760 : int comp_pred, i;
3761 : int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
3762 : struct buf_2d yv12_mb[4][MAX_MB_PLANE];
3763 : static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
3764 : VP9_ALT_FLAG };
3765 0 : int64_t best_rd = best_rd_so_far;
3766 0 : int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise
3767 : int64_t best_pred_diff[REFERENCE_MODES];
3768 : int64_t best_pred_rd[REFERENCE_MODES];
3769 : int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
3770 : int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
3771 : MODE_INFO best_mbmode;
3772 0 : int ref_index, best_ref_index = 0;
3773 : unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3774 : vpx_prob comp_mode_p;
3775 0 : INTERP_FILTER tmp_best_filter = SWITCHABLE;
3776 : int rate_uv_intra, rate_uv_tokenonly;
3777 : int64_t dist_uv;
3778 : int skip_uv;
3779 0 : PREDICTION_MODE mode_uv = DC_PRED;
3780 0 : const int intra_cost_penalty = vp9_get_intra_cost_penalty(
3781 : cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
3782 : int_mv seg_mvs[4][MAX_REF_FRAMES];
3783 : b_mode_info best_bmodes[4];
3784 0 : int best_skip2 = 0;
3785 0 : int ref_frame_skip_mask[2] = { 0 };
3786 0 : int64_t mask_filter = 0;
3787 : int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
3788 0 : int internal_active_edge =
3789 0 : vp9_active_edge_sb(cpi, mi_row, mi_col) && vp9_internal_image_edge(cpi);
3790 :
3791 0 : x->skip_encode = sf->skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
3792 0 : memset(x->zcoeff_blk[TX_4X4], 0, 4);
3793 0 : vp9_zero(best_mbmode);
3794 :
3795 0 : for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) filter_cache[i] = INT64_MAX;
3796 :
3797 0 : for (i = 0; i < 4; i++) {
3798 : int j;
3799 0 : for (j = 0; j < MAX_REF_FRAMES; j++) seg_mvs[i][j].as_int = INVALID_MV;
3800 : }
3801 :
3802 0 : estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
3803 : &comp_mode_p);
3804 :
3805 0 : for (i = 0; i < REFERENCE_MODES; ++i) best_pred_rd[i] = INT64_MAX;
3806 0 : for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3807 0 : best_filter_rd[i] = INT64_MAX;
3808 0 : rate_uv_intra = INT_MAX;
3809 :
3810 0 : rd_cost->rate = INT_MAX;
3811 :
3812 0 : for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
3813 0 : if (cpi->ref_frame_flags & flag_list[ref_frame]) {
3814 0 : setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col,
3815 : frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
3816 : } else {
3817 0 : ref_frame_skip_mask[0] |= (1 << ref_frame);
3818 0 : ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
3819 : }
3820 0 : frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
3821 0 : frame_mv[ZEROMV][ref_frame].as_int = 0;
3822 : }
3823 :
3824 0 : for (ref_index = 0; ref_index < MAX_REFS; ++ref_index) {
3825 0 : int mode_excluded = 0;
3826 0 : int64_t this_rd = INT64_MAX;
3827 0 : int disable_skip = 0;
3828 0 : int compmode_cost = 0;
3829 0 : int rate2 = 0, rate_y = 0, rate_uv = 0;
3830 0 : int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
3831 0 : int skippable = 0;
3832 : int i;
3833 0 : int this_skip2 = 0;
3834 0 : int64_t total_sse = INT_MAX;
3835 0 : int early_term = 0;
3836 : struct buf_2d backup_yv12[2][MAX_MB_PLANE];
3837 :
3838 0 : ref_frame = vp9_ref_order[ref_index].ref_frame[0];
3839 0 : second_ref_frame = vp9_ref_order[ref_index].ref_frame[1];
3840 :
3841 : #if CONFIG_BETTER_HW_COMPATIBILITY
3842 : // forbid 8X4 and 4X8 partitions if any reference frame is scaled.
3843 : if (bsize == BLOCK_8X4 || bsize == BLOCK_4X8) {
3844 : int ref_scaled = vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf);
3845 : if (second_ref_frame > INTRA_FRAME)
3846 : ref_scaled += vp9_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf);
3847 : if (ref_scaled) continue;
3848 : }
3849 : #endif
3850 : // Look at the reference frame of the best mode so far and set the
3851 : // skip mask to look at a subset of the remaining modes.
3852 0 : if (ref_index > 2 && sf->mode_skip_start < MAX_MODES) {
3853 0 : if (ref_index == 3) {
3854 0 : switch (best_mbmode.ref_frame[0]) {
3855 0 : case INTRA_FRAME: break;
3856 : case LAST_FRAME:
3857 0 : ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME);
3858 0 : ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
3859 0 : break;
3860 : case GOLDEN_FRAME:
3861 0 : ref_frame_skip_mask[0] |= (1 << LAST_FRAME) | (1 << ALTREF_FRAME);
3862 0 : ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
3863 0 : break;
3864 : case ALTREF_FRAME:
3865 0 : ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) | (1 << LAST_FRAME);
3866 0 : break;
3867 : case NONE:
3868 0 : case MAX_REF_FRAMES: assert(0 && "Invalid Reference frame"); break;
3869 : }
3870 : }
3871 : }
3872 :
3873 0 : if ((ref_frame_skip_mask[0] & (1 << ref_frame)) &&
3874 0 : (ref_frame_skip_mask[1] & (1 << VPXMAX(0, second_ref_frame))))
3875 0 : continue;
3876 :
3877 : // Test best rd so far against threshold for trying this mode.
3878 0 : if (!internal_active_edge &&
3879 0 : rd_less_than_thresh(best_rd,
3880 : rd_opt->threshes[segment_id][bsize][ref_index],
3881 : tile_data->thresh_freq_fact[bsize][ref_index]))
3882 0 : continue;
3883 :
3884 0 : comp_pred = second_ref_frame > INTRA_FRAME;
3885 0 : if (comp_pred) {
3886 0 : if (!cpi->allow_comp_inter_inter) continue;
3887 0 : if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) continue;
3888 : // Do not allow compound prediction if the segment level reference frame
3889 : // feature is in use as in this case there can only be one reference.
3890 0 : if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) continue;
3891 :
3892 0 : if ((sf->mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
3893 0 : best_mbmode.ref_frame[0] == INTRA_FRAME)
3894 0 : continue;
3895 : }
3896 :
3897 0 : if (comp_pred)
3898 0 : mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
3899 0 : else if (ref_frame != INTRA_FRAME)
3900 0 : mode_excluded = cm->reference_mode == COMPOUND_REFERENCE;
3901 :
3902 : // If the segment reference frame feature is enabled....
3903 : // then do nothing if the current ref frame is not allowed..
3904 0 : if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3905 0 : get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
3906 0 : continue;
3907 : // Disable this drop out case if the ref frame
3908 : // segment level feature is enabled for this segment. This is to
3909 : // prevent the possibility that we end up unable to pick any mode.
3910 0 : } else if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
3911 : // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
3912 : // unless ARNR filtering is enabled in which case we want
3913 : // an unfiltered alternative. We allow near/nearest as well
3914 : // because they may result in zero-zero MVs but be cheaper.
3915 0 : if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0))
3916 0 : continue;
3917 : }
3918 :
3919 0 : mi->tx_size = TX_4X4;
3920 0 : mi->uv_mode = DC_PRED;
3921 0 : mi->ref_frame[0] = ref_frame;
3922 0 : mi->ref_frame[1] = second_ref_frame;
3923 : // Evaluate all sub-pel filters irrespective of whether we can use
3924 : // them for this frame.
3925 0 : mi->interp_filter =
3926 0 : cm->interp_filter == SWITCHABLE ? EIGHTTAP : cm->interp_filter;
3927 0 : x->skip = 0;
3928 0 : set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
3929 :
3930 : // Select prediction reference frames.
3931 0 : for (i = 0; i < MAX_MB_PLANE; i++) {
3932 0 : xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
3933 0 : if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
3934 : }
3935 :
3936 0 : if (ref_frame == INTRA_FRAME) {
3937 : int rate;
3938 0 : if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y, &distortion_y,
3939 : best_rd) >= best_rd)
3940 0 : continue;
3941 0 : rate2 += rate;
3942 0 : rate2 += intra_cost_penalty;
3943 0 : distortion2 += distortion_y;
3944 :
3945 0 : if (rate_uv_intra == INT_MAX) {
3946 0 : choose_intra_uv_mode(cpi, x, ctx, bsize, TX_4X4, &rate_uv_intra,
3947 : &rate_uv_tokenonly, &dist_uv, &skip_uv, &mode_uv);
3948 : }
3949 0 : rate2 += rate_uv_intra;
3950 0 : rate_uv = rate_uv_tokenonly;
3951 0 : distortion2 += dist_uv;
3952 0 : distortion_uv = dist_uv;
3953 0 : mi->uv_mode = mode_uv;
3954 : } else {
3955 : int rate;
3956 : int64_t distortion;
3957 : int64_t this_rd_thresh;
3958 0 : int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX;
3959 0 : int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX;
3960 0 : int64_t tmp_best_distortion = INT_MAX, tmp_best_sse, uv_sse;
3961 0 : int tmp_best_skippable = 0;
3962 : int switchable_filter_index;
3963 0 : int_mv *second_ref =
3964 0 : comp_pred ? &x->mbmi_ext->ref_mvs[second_ref_frame][0] : NULL;
3965 : b_mode_info tmp_best_bmodes[16];
3966 : MODE_INFO tmp_best_mbmode;
3967 : BEST_SEG_INFO bsi[SWITCHABLE_FILTERS];
3968 0 : int pred_exists = 0;
3969 : int uv_skippable;
3970 :
3971 0 : YV12_BUFFER_CONFIG *scaled_ref_frame[2] = { NULL, NULL };
3972 : int ref;
3973 :
3974 0 : for (ref = 0; ref < 2; ++ref) {
3975 0 : scaled_ref_frame[ref] =
3976 0 : mi->ref_frame[ref] > INTRA_FRAME
3977 0 : ? vp9_get_scaled_ref_frame(cpi, mi->ref_frame[ref])
3978 0 : : NULL;
3979 :
3980 0 : if (scaled_ref_frame[ref]) {
3981 : int i;
3982 : // Swap out the reference frame for a version that's been scaled to
3983 : // match the resolution of the current frame, allowing the existing
3984 : // motion search code to be used without additional modifications.
3985 0 : for (i = 0; i < MAX_MB_PLANE; i++)
3986 0 : backup_yv12[ref][i] = xd->plane[i].pre[ref];
3987 0 : vp9_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col,
3988 : NULL);
3989 : }
3990 : }
3991 :
3992 0 : this_rd_thresh = (ref_frame == LAST_FRAME)
3993 0 : ? rd_opt->threshes[segment_id][bsize][THR_LAST]
3994 0 : : rd_opt->threshes[segment_id][bsize][THR_ALTR];
3995 0 : this_rd_thresh = (ref_frame == GOLDEN_FRAME)
3996 0 : ? rd_opt->threshes[segment_id][bsize][THR_GOLD]
3997 0 : : this_rd_thresh;
3998 0 : for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
3999 0 : filter_cache[i] = INT64_MAX;
4000 :
4001 0 : if (cm->interp_filter != BILINEAR) {
4002 0 : tmp_best_filter = EIGHTTAP;
4003 0 : if (x->source_variance < sf->disable_filter_search_var_thresh) {
4004 0 : tmp_best_filter = EIGHTTAP;
4005 0 : } else if (sf->adaptive_pred_interp_filter == 1 &&
4006 0 : ctx->pred_interp_filter < SWITCHABLE) {
4007 0 : tmp_best_filter = ctx->pred_interp_filter;
4008 0 : } else if (sf->adaptive_pred_interp_filter == 2) {
4009 0 : tmp_best_filter = ctx->pred_interp_filter < SWITCHABLE
4010 : ? ctx->pred_interp_filter
4011 : : 0;
4012 : } else {
4013 0 : for (switchable_filter_index = 0;
4014 : switchable_filter_index < SWITCHABLE_FILTERS;
4015 0 : ++switchable_filter_index) {
4016 : int newbest, rs;
4017 : int64_t rs_rd;
4018 0 : MB_MODE_INFO_EXT *mbmi_ext = x->mbmi_ext;
4019 0 : mi->interp_filter = switchable_filter_index;
4020 0 : tmp_rd = rd_pick_best_sub8x8_mode(
4021 0 : cpi, x, &mbmi_ext->ref_mvs[ref_frame][0], second_ref, best_yrd,
4022 : &rate, &rate_y, &distortion, &skippable, &total_sse,
4023 : (int)this_rd_thresh, seg_mvs, bsi, switchable_filter_index,
4024 : mi_row, mi_col);
4025 :
4026 0 : if (tmp_rd == INT64_MAX) continue;
4027 0 : rs = vp9_get_switchable_rate(cpi, xd);
4028 0 : rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
4029 0 : filter_cache[switchable_filter_index] = tmp_rd;
4030 0 : filter_cache[SWITCHABLE_FILTERS] =
4031 0 : VPXMIN(filter_cache[SWITCHABLE_FILTERS], tmp_rd + rs_rd);
4032 0 : if (cm->interp_filter == SWITCHABLE) tmp_rd += rs_rd;
4033 :
4034 0 : mask_filter = VPXMAX(mask_filter, tmp_rd);
4035 :
4036 0 : newbest = (tmp_rd < tmp_best_rd);
4037 0 : if (newbest) {
4038 0 : tmp_best_filter = mi->interp_filter;
4039 0 : tmp_best_rd = tmp_rd;
4040 : }
4041 0 : if ((newbest && cm->interp_filter == SWITCHABLE) ||
4042 0 : (mi->interp_filter == cm->interp_filter &&
4043 0 : cm->interp_filter != SWITCHABLE)) {
4044 0 : tmp_best_rdu = tmp_rd;
4045 0 : tmp_best_rate = rate;
4046 0 : tmp_best_ratey = rate_y;
4047 0 : tmp_best_distortion = distortion;
4048 0 : tmp_best_sse = total_sse;
4049 0 : tmp_best_skippable = skippable;
4050 0 : tmp_best_mbmode = *mi;
4051 0 : for (i = 0; i < 4; i++) {
4052 0 : tmp_best_bmodes[i] = xd->mi[0]->bmi[i];
4053 0 : x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i];
4054 : }
4055 0 : pred_exists = 1;
4056 0 : if (switchable_filter_index == 0 && sf->use_rd_breakout &&
4057 : best_rd < INT64_MAX) {
4058 0 : if (tmp_best_rdu / 2 > best_rd) {
4059 : // skip searching the other filters if the first is
4060 : // already substantially larger than the best so far
4061 0 : tmp_best_filter = mi->interp_filter;
4062 0 : tmp_best_rdu = INT64_MAX;
4063 0 : break;
4064 : }
4065 : }
4066 : }
4067 : } // switchable_filter_index loop
4068 : }
4069 : }
4070 :
4071 0 : if (tmp_best_rdu == INT64_MAX && pred_exists) continue;
4072 :
4073 0 : mi->interp_filter = (cm->interp_filter == SWITCHABLE ? tmp_best_filter
4074 : : cm->interp_filter);
4075 0 : if (!pred_exists) {
4076 : // Handles the special case when a filter that is not in the
4077 : // switchable list (bilinear, 6-tap) is indicated at the frame level
4078 0 : tmp_rd = rd_pick_best_sub8x8_mode(
4079 0 : cpi, x, &x->mbmi_ext->ref_mvs[ref_frame][0], second_ref, best_yrd,
4080 : &rate, &rate_y, &distortion, &skippable, &total_sse,
4081 : (int)this_rd_thresh, seg_mvs, bsi, 0, mi_row, mi_col);
4082 0 : if (tmp_rd == INT64_MAX) continue;
4083 : } else {
4084 0 : total_sse = tmp_best_sse;
4085 0 : rate = tmp_best_rate;
4086 0 : rate_y = tmp_best_ratey;
4087 0 : distortion = tmp_best_distortion;
4088 0 : skippable = tmp_best_skippable;
4089 0 : *mi = tmp_best_mbmode;
4090 0 : for (i = 0; i < 4; i++) xd->mi[0]->bmi[i] = tmp_best_bmodes[i];
4091 : }
4092 :
4093 0 : rate2 += rate;
4094 0 : distortion2 += distortion;
4095 :
4096 0 : if (cm->interp_filter == SWITCHABLE)
4097 0 : rate2 += vp9_get_switchable_rate(cpi, xd);
4098 :
4099 0 : if (!mode_excluded)
4100 0 : mode_excluded = comp_pred ? cm->reference_mode == SINGLE_REFERENCE
4101 0 : : cm->reference_mode == COMPOUND_REFERENCE;
4102 :
4103 0 : compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred);
4104 :
4105 0 : tmp_best_rdu =
4106 0 : best_rd - VPXMIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2),
4107 : RDCOST(x->rdmult, x->rddiv, 0, total_sse));
4108 :
4109 0 : if (tmp_best_rdu > 0) {
4110 : // If even the 'Y' rd value of split is higher than best so far
4111 : // then dont bother looking at UV
4112 0 : vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col, BLOCK_8X8);
4113 0 : memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
4114 0 : if (!super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
4115 : &uv_sse, BLOCK_8X8, tmp_best_rdu)) {
4116 0 : for (ref = 0; ref < 2; ++ref) {
4117 0 : if (scaled_ref_frame[ref]) {
4118 : int i;
4119 0 : for (i = 0; i < MAX_MB_PLANE; ++i)
4120 0 : xd->plane[i].pre[ref] = backup_yv12[ref][i];
4121 : }
4122 : }
4123 0 : continue;
4124 : }
4125 :
4126 0 : rate2 += rate_uv;
4127 0 : distortion2 += distortion_uv;
4128 0 : skippable = skippable && uv_skippable;
4129 0 : total_sse += uv_sse;
4130 : }
4131 :
4132 0 : for (ref = 0; ref < 2; ++ref) {
4133 0 : if (scaled_ref_frame[ref]) {
4134 : // Restore the prediction frame pointers to their unscaled versions.
4135 : int i;
4136 0 : for (i = 0; i < MAX_MB_PLANE; ++i)
4137 0 : xd->plane[i].pre[ref] = backup_yv12[ref][i];
4138 : }
4139 : }
4140 : }
4141 :
4142 0 : if (cm->reference_mode == REFERENCE_MODE_SELECT) rate2 += compmode_cost;
4143 :
4144 : // Estimate the reference frame signaling cost and add it
4145 : // to the rolling cost variable.
4146 0 : if (second_ref_frame > INTRA_FRAME) {
4147 0 : rate2 += ref_costs_comp[ref_frame];
4148 : } else {
4149 0 : rate2 += ref_costs_single[ref_frame];
4150 : }
4151 :
4152 0 : if (!disable_skip) {
4153 0 : const vpx_prob skip_prob = vp9_get_skip_prob(cm, xd);
4154 0 : const int skip_cost0 = vp9_cost_bit(skip_prob, 0);
4155 0 : const int skip_cost1 = vp9_cost_bit(skip_prob, 1);
4156 :
4157 : // Skip is never coded at the segment level for sub8x8 blocks and instead
4158 : // always coded in the bitstream at the mode info level.
4159 0 : if (ref_frame != INTRA_FRAME && !xd->lossless) {
4160 0 : if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv + skip_cost0,
4161 : distortion2) <
4162 0 : RDCOST(x->rdmult, x->rddiv, skip_cost1, total_sse)) {
4163 : // Add in the cost of the no skip flag.
4164 0 : rate2 += skip_cost0;
4165 : } else {
4166 : // FIXME(rbultje) make this work for splitmv also
4167 0 : rate2 += skip_cost1;
4168 0 : distortion2 = total_sse;
4169 0 : assert(total_sse >= 0);
4170 0 : rate2 -= (rate_y + rate_uv);
4171 0 : rate_y = 0;
4172 0 : rate_uv = 0;
4173 0 : this_skip2 = 1;
4174 : }
4175 : } else {
4176 : // Add in the cost of the no skip flag.
4177 0 : rate2 += skip_cost0;
4178 : }
4179 :
4180 : // Calculate the final RD estimate for this mode.
4181 0 : this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
4182 : }
4183 :
4184 0 : if (!disable_skip && ref_frame == INTRA_FRAME) {
4185 0 : for (i = 0; i < REFERENCE_MODES; ++i)
4186 0 : best_pred_rd[i] = VPXMIN(best_pred_rd[i], this_rd);
4187 0 : for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
4188 0 : best_filter_rd[i] = VPXMIN(best_filter_rd[i], this_rd);
4189 : }
4190 :
4191 : // Did this mode help.. i.e. is it the new best mode
4192 0 : if (this_rd < best_rd || x->skip) {
4193 0 : if (!mode_excluded) {
4194 0 : int max_plane = MAX_MB_PLANE;
4195 : // Note index of best mode so far
4196 0 : best_ref_index = ref_index;
4197 :
4198 0 : if (ref_frame == INTRA_FRAME) {
4199 : /* required for left and above block mv */
4200 0 : mi->mv[0].as_int = 0;
4201 0 : max_plane = 1;
4202 : // Initialize interp_filter here so we do not have to check for
4203 : // inter block modes in get_pred_context_switchable_interp()
4204 0 : mi->interp_filter = SWITCHABLE_FILTERS;
4205 : }
4206 :
4207 0 : rd_cost->rate = rate2;
4208 0 : rd_cost->dist = distortion2;
4209 0 : rd_cost->rdcost = this_rd;
4210 0 : best_rd = this_rd;
4211 0 : best_yrd =
4212 0 : best_rd - RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv);
4213 0 : best_mbmode = *mi;
4214 0 : best_skip2 = this_skip2;
4215 0 : if (!x->select_tx_size) swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
4216 0 : memcpy(ctx->zcoeff_blk, x->zcoeff_blk[TX_4X4],
4217 0 : sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk);
4218 :
4219 0 : for (i = 0; i < 4; i++) best_bmodes[i] = xd->mi[0]->bmi[i];
4220 :
4221 : // TODO(debargha): enhance this test with a better distortion prediction
4222 : // based on qp, activity mask and history
4223 0 : if ((sf->mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
4224 : (ref_index > MIN_EARLY_TERM_INDEX)) {
4225 0 : int qstep = xd->plane[0].dequant[1];
4226 : // TODO(debargha): Enhance this by specializing for each mode_index
4227 0 : int scale = 4;
4228 : #if CONFIG_VP9_HIGHBITDEPTH
4229 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
4230 : qstep >>= (xd->bd - 8);
4231 : }
4232 : #endif // CONFIG_VP9_HIGHBITDEPTH
4233 0 : if (x->source_variance < UINT_MAX) {
4234 0 : const int var_adjust = (x->source_variance < 16);
4235 0 : scale -= var_adjust;
4236 : }
4237 0 : if (ref_frame > INTRA_FRAME && distortion2 * scale < qstep * qstep) {
4238 0 : early_term = 1;
4239 : }
4240 : }
4241 : }
4242 : }
4243 :
4244 : /* keep record of best compound/single-only prediction */
4245 0 : if (!disable_skip && ref_frame != INTRA_FRAME) {
4246 : int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
4247 :
4248 0 : if (cm->reference_mode == REFERENCE_MODE_SELECT) {
4249 0 : single_rate = rate2 - compmode_cost;
4250 0 : hybrid_rate = rate2;
4251 : } else {
4252 0 : single_rate = rate2;
4253 0 : hybrid_rate = rate2 + compmode_cost;
4254 : }
4255 :
4256 0 : single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
4257 0 : hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
4258 :
4259 0 : if (!comp_pred && single_rd < best_pred_rd[SINGLE_REFERENCE])
4260 0 : best_pred_rd[SINGLE_REFERENCE] = single_rd;
4261 0 : else if (comp_pred && single_rd < best_pred_rd[COMPOUND_REFERENCE])
4262 0 : best_pred_rd[COMPOUND_REFERENCE] = single_rd;
4263 :
4264 0 : if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
4265 0 : best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
4266 : }
4267 :
4268 : /* keep record of best filter type */
4269 0 : if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
4270 0 : cm->interp_filter != BILINEAR) {
4271 0 : int64_t ref =
4272 0 : filter_cache[cm->interp_filter == SWITCHABLE ? SWITCHABLE_FILTERS
4273 0 : : cm->interp_filter];
4274 : int64_t adj_rd;
4275 0 : for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
4276 0 : if (ref == INT64_MAX)
4277 0 : adj_rd = 0;
4278 0 : else if (filter_cache[i] == INT64_MAX)
4279 : // when early termination is triggered, the encoder does not have
4280 : // access to the rate-distortion cost. it only knows that the cost
4281 : // should be above the maximum valid value. hence it takes the known
4282 : // maximum plus an arbitrary constant as the rate-distortion cost.
4283 0 : adj_rd = mask_filter - ref + 10;
4284 : else
4285 0 : adj_rd = filter_cache[i] - ref;
4286 :
4287 0 : adj_rd += this_rd;
4288 0 : best_filter_rd[i] = VPXMIN(best_filter_rd[i], adj_rd);
4289 : }
4290 : }
4291 :
4292 0 : if (early_term) break;
4293 :
4294 0 : if (x->skip && !comp_pred) break;
4295 : }
4296 :
4297 0 : if (best_rd >= best_rd_so_far) {
4298 0 : rd_cost->rate = INT_MAX;
4299 0 : rd_cost->rdcost = INT64_MAX;
4300 0 : return;
4301 : }
4302 :
4303 : // If we used an estimate for the uv intra rd in the loop above...
4304 0 : if (sf->use_uv_intra_rd_estimate) {
4305 : // Do Intra UV best rd mode selection if best mode choice above was intra.
4306 0 : if (best_mbmode.ref_frame[0] == INTRA_FRAME) {
4307 0 : *mi = best_mbmode;
4308 0 : rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra, &rate_uv_tokenonly,
4309 : &dist_uv, &skip_uv, BLOCK_8X8, TX_4X4);
4310 : }
4311 : }
4312 :
4313 0 : if (best_rd == INT64_MAX) {
4314 0 : rd_cost->rate = INT_MAX;
4315 0 : rd_cost->dist = INT64_MAX;
4316 0 : rd_cost->rdcost = INT64_MAX;
4317 0 : return;
4318 : }
4319 :
4320 0 : assert((cm->interp_filter == SWITCHABLE) ||
4321 : (cm->interp_filter == best_mbmode.interp_filter) ||
4322 : !is_inter_block(&best_mbmode));
4323 :
4324 0 : vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact, sf->adaptive_rd_thresh,
4325 : bsize, best_ref_index);
4326 :
4327 : // macroblock modes
4328 0 : *mi = best_mbmode;
4329 0 : x->skip |= best_skip2;
4330 0 : if (!is_inter_block(&best_mbmode)) {
4331 0 : for (i = 0; i < 4; i++) xd->mi[0]->bmi[i].as_mode = best_bmodes[i].as_mode;
4332 : } else {
4333 0 : for (i = 0; i < 4; ++i)
4334 0 : memcpy(&xd->mi[0]->bmi[i], &best_bmodes[i], sizeof(b_mode_info));
4335 :
4336 0 : mi->mv[0].as_int = xd->mi[0]->bmi[3].as_mv[0].as_int;
4337 0 : mi->mv[1].as_int = xd->mi[0]->bmi[3].as_mv[1].as_int;
4338 : }
4339 :
4340 0 : for (i = 0; i < REFERENCE_MODES; ++i) {
4341 0 : if (best_pred_rd[i] == INT64_MAX)
4342 0 : best_pred_diff[i] = INT_MIN;
4343 : else
4344 0 : best_pred_diff[i] = best_rd - best_pred_rd[i];
4345 : }
4346 :
4347 0 : if (!x->skip) {
4348 0 : for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
4349 0 : if (best_filter_rd[i] == INT64_MAX)
4350 0 : best_filter_diff[i] = 0;
4351 : else
4352 0 : best_filter_diff[i] = best_rd - best_filter_rd[i];
4353 : }
4354 0 : if (cm->interp_filter == SWITCHABLE)
4355 0 : assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
4356 : } else {
4357 0 : vp9_zero(best_filter_diff);
4358 : }
4359 :
4360 0 : store_coding_context(x, ctx, best_ref_index, best_pred_diff, best_filter_diff,
4361 : 0);
4362 : }
|