Line data Source code
1 : /*
2 : * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 : *
4 : * Use of this source code is governed by a BSD-style license
5 : * that can be found in the LICENSE file in the root of the source
6 : * tree. An additional intellectual property rights grant can be found
7 : * in the file PATENTS. All contributing project authors may
8 : * be found in the AUTHORS file in the root of the source tree.
9 : */
10 :
11 : #include <limits.h>
12 : #include <math.h>
13 : #include <stdio.h>
14 :
15 : #include "./vp9_rtcd.h"
16 : #include "./vpx_dsp_rtcd.h"
17 : #include "./vpx_config.h"
18 :
19 : #include "vpx_dsp/vpx_dsp_common.h"
20 : #include "vpx_ports/mem.h"
21 : #include "vpx_ports/vpx_timer.h"
22 : #include "vpx_ports/system_state.h"
23 :
24 : #include "vp9/common/vp9_common.h"
25 : #include "vp9/common/vp9_entropy.h"
26 : #include "vp9/common/vp9_entropymode.h"
27 : #include "vp9/common/vp9_idct.h"
28 : #include "vp9/common/vp9_mvref_common.h"
29 : #include "vp9/common/vp9_pred_common.h"
30 : #include "vp9/common/vp9_quant_common.h"
31 : #include "vp9/common/vp9_reconintra.h"
32 : #include "vp9/common/vp9_reconinter.h"
33 : #include "vp9/common/vp9_seg_common.h"
34 : #include "vp9/common/vp9_tile_common.h"
35 :
36 : #include "vp9/encoder/vp9_aq_360.h"
37 : #include "vp9/encoder/vp9_aq_complexity.h"
38 : #include "vp9/encoder/vp9_aq_cyclicrefresh.h"
39 : #include "vp9/encoder/vp9_aq_variance.h"
40 : #include "vp9/encoder/vp9_encodeframe.h"
41 : #include "vp9/encoder/vp9_encodemb.h"
42 : #include "vp9/encoder/vp9_encodemv.h"
43 : #include "vp9/encoder/vp9_ethread.h"
44 : #include "vp9/encoder/vp9_extend.h"
45 : #include "vp9/encoder/vp9_pickmode.h"
46 : #include "vp9/encoder/vp9_rd.h"
47 : #include "vp9/encoder/vp9_rdopt.h"
48 : #include "vp9/encoder/vp9_segmentation.h"
49 : #include "vp9/encoder/vp9_tokenize.h"
50 :
51 : static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
52 : int output_enabled, int mi_row, int mi_col,
53 : BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx);
54 :
55 : // This is used as a reference when computing the source variance for the
56 : // purpose of activity masking.
57 : // Eventually this should be replaced by custom no-reference routines,
58 : // which will be faster.
59 : static const uint8_t VP9_VAR_OFFS[64] = {
60 : 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
61 : 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
62 : 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
63 : 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
64 : 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128
65 : };
66 :
67 : #if CONFIG_VP9_HIGHBITDEPTH
68 : static const uint16_t VP9_HIGH_VAR_OFFS_8[64] = {
69 : 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
70 : 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
71 : 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
72 : 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
73 : 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128
74 : };
75 :
76 : static const uint16_t VP9_HIGH_VAR_OFFS_10[64] = {
77 : 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
78 : 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
79 : 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
80 : 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
81 : 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
82 : 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
83 : 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
84 : 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4
85 : };
86 :
87 : static const uint16_t VP9_HIGH_VAR_OFFS_12[64] = {
88 : 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
89 : 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
90 : 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
91 : 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
92 : 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
93 : 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
94 : 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
95 : 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
96 : 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
97 : 128 * 16
98 : };
99 : #endif // CONFIG_VP9_HIGHBITDEPTH
100 :
101 0 : unsigned int vp9_get_sby_perpixel_variance(VP9_COMP *cpi,
102 : const struct buf_2d *ref,
103 : BLOCK_SIZE bs) {
104 : unsigned int sse;
105 0 : const unsigned int var =
106 0 : cpi->fn_ptr[bs].vf(ref->buf, ref->stride, VP9_VAR_OFFS, 0, &sse);
107 0 : return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
108 : }
109 :
110 : #if CONFIG_VP9_HIGHBITDEPTH
111 : unsigned int vp9_high_get_sby_perpixel_variance(VP9_COMP *cpi,
112 : const struct buf_2d *ref,
113 : BLOCK_SIZE bs, int bd) {
114 : unsigned int var, sse;
115 : switch (bd) {
116 : case 10:
117 : var =
118 : cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
119 : CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10), 0, &sse);
120 : break;
121 : case 12:
122 : var =
123 : cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
124 : CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12), 0, &sse);
125 : break;
126 : case 8:
127 : default:
128 : var =
129 : cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
130 : CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8), 0, &sse);
131 : break;
132 : }
133 : return ROUND64_POWER_OF_TWO((int64_t)var, num_pels_log2_lookup[bs]);
134 : }
135 : #endif // CONFIG_VP9_HIGHBITDEPTH
136 :
137 0 : static unsigned int get_sby_perpixel_diff_variance(VP9_COMP *cpi,
138 : const struct buf_2d *ref,
139 : int mi_row, int mi_col,
140 : BLOCK_SIZE bs) {
141 : unsigned int sse, var;
142 : uint8_t *last_y;
143 0 : const YV12_BUFFER_CONFIG *last = get_ref_frame_buffer(cpi, LAST_FRAME);
144 :
145 0 : assert(last != NULL);
146 0 : last_y =
147 0 : &last->y_buffer[mi_row * MI_SIZE * last->y_stride + mi_col * MI_SIZE];
148 0 : var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, last_y, last->y_stride, &sse);
149 0 : return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
150 : }
151 :
152 0 : static BLOCK_SIZE get_rd_var_based_fixed_partition(VP9_COMP *cpi, MACROBLOCK *x,
153 : int mi_row, int mi_col) {
154 0 : unsigned int var = get_sby_perpixel_diff_variance(
155 0 : cpi, &x->plane[0].src, mi_row, mi_col, BLOCK_64X64);
156 0 : if (var < 8)
157 0 : return BLOCK_64X64;
158 0 : else if (var < 128)
159 0 : return BLOCK_32X32;
160 0 : else if (var < 2048)
161 0 : return BLOCK_16X16;
162 : else
163 0 : return BLOCK_8X8;
164 : }
165 :
166 : // Lighter version of set_offsets that only sets the mode info
167 : // pointers.
168 0 : static INLINE void set_mode_info_offsets(VP9_COMMON *const cm,
169 : MACROBLOCK *const x,
170 : MACROBLOCKD *const xd, int mi_row,
171 : int mi_col) {
172 0 : const int idx_str = xd->mi_stride * mi_row + mi_col;
173 0 : xd->mi = cm->mi_grid_visible + idx_str;
174 0 : xd->mi[0] = cm->mi + idx_str;
175 0 : x->mbmi_ext = x->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col);
176 0 : }
177 :
178 0 : static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile,
179 : MACROBLOCK *const x, int mi_row, int mi_col,
180 : BLOCK_SIZE bsize) {
181 0 : VP9_COMMON *const cm = &cpi->common;
182 0 : MACROBLOCKD *const xd = &x->e_mbd;
183 : MODE_INFO *mi;
184 0 : const int mi_width = num_8x8_blocks_wide_lookup[bsize];
185 0 : const int mi_height = num_8x8_blocks_high_lookup[bsize];
186 0 : const struct segmentation *const seg = &cm->seg;
187 0 : MvLimits *const mv_limits = &x->mv_limits;
188 :
189 0 : set_skip_context(xd, mi_row, mi_col);
190 :
191 0 : set_mode_info_offsets(cm, x, xd, mi_row, mi_col);
192 :
193 0 : mi = xd->mi[0];
194 :
195 : // Set up destination pointers.
196 0 : vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col);
197 :
198 : // Set up limit values for MV components.
199 : // Mv beyond the range do not produce new/different prediction block.
200 0 : mv_limits->row_min = -(((mi_row + mi_height) * MI_SIZE) + VP9_INTERP_EXTEND);
201 0 : mv_limits->col_min = -(((mi_col + mi_width) * MI_SIZE) + VP9_INTERP_EXTEND);
202 0 : mv_limits->row_max = (cm->mi_rows - mi_row) * MI_SIZE + VP9_INTERP_EXTEND;
203 0 : mv_limits->col_max = (cm->mi_cols - mi_col) * MI_SIZE + VP9_INTERP_EXTEND;
204 :
205 : // Set up distance of MB to edge of frame in 1/8th pel units.
206 0 : assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1)));
207 0 : set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width, cm->mi_rows,
208 : cm->mi_cols);
209 :
210 : // Set up source buffers.
211 0 : vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
212 :
213 : // R/D setup.
214 0 : x->rddiv = cpi->rd.RDDIV;
215 0 : x->rdmult = cpi->rd.RDMULT;
216 :
217 : // Setup segment ID.
218 0 : if (seg->enabled) {
219 0 : if (cpi->oxcf.aq_mode != VARIANCE_AQ && cpi->oxcf.aq_mode != LOOKAHEAD_AQ &&
220 0 : cpi->oxcf.aq_mode != EQUATOR360_AQ) {
221 0 : const uint8_t *const map =
222 0 : seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
223 0 : mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
224 : }
225 0 : vp9_init_plane_quantizers(cpi, x);
226 :
227 0 : x->encode_breakout = cpi->segment_encode_breakout[mi->segment_id];
228 : } else {
229 0 : mi->segment_id = 0;
230 0 : x->encode_breakout = cpi->encode_breakout;
231 : }
232 :
233 : // required by vp9_append_sub8x8_mvs_for_idx() and vp9_find_best_ref_mvs()
234 0 : xd->tile = *tile;
235 0 : }
236 :
237 0 : static void duplicate_mode_info_in_sb(VP9_COMMON *cm, MACROBLOCKD *xd,
238 : int mi_row, int mi_col,
239 : BLOCK_SIZE bsize) {
240 0 : const int block_width =
241 0 : VPXMIN(num_8x8_blocks_wide_lookup[bsize], cm->mi_cols - mi_col);
242 0 : const int block_height =
243 0 : VPXMIN(num_8x8_blocks_high_lookup[bsize], cm->mi_rows - mi_row);
244 0 : const int mi_stride = xd->mi_stride;
245 0 : MODE_INFO *const src_mi = xd->mi[0];
246 : int i, j;
247 :
248 0 : for (j = 0; j < block_height; ++j)
249 0 : for (i = 0; i < block_width; ++i) xd->mi[j * mi_stride + i] = src_mi;
250 0 : }
251 :
252 0 : static void set_block_size(VP9_COMP *const cpi, MACROBLOCK *const x,
253 : MACROBLOCKD *const xd, int mi_row, int mi_col,
254 : BLOCK_SIZE bsize) {
255 0 : if (cpi->common.mi_cols > mi_col && cpi->common.mi_rows > mi_row) {
256 0 : set_mode_info_offsets(&cpi->common, x, xd, mi_row, mi_col);
257 0 : xd->mi[0]->sb_type = bsize;
258 : }
259 0 : }
260 :
261 : typedef struct {
262 : int64_t sum_square_error;
263 : int64_t sum_error;
264 : int log2_count;
265 : int variance;
266 : } var;
267 :
268 : typedef struct {
269 : var none;
270 : var horz[2];
271 : var vert[2];
272 : } partition_variance;
273 :
274 : typedef struct {
275 : partition_variance part_variances;
276 : var split[4];
277 : } v4x4;
278 :
279 : typedef struct {
280 : partition_variance part_variances;
281 : v4x4 split[4];
282 : } v8x8;
283 :
284 : typedef struct {
285 : partition_variance part_variances;
286 : v8x8 split[4];
287 : } v16x16;
288 :
289 : typedef struct {
290 : partition_variance part_variances;
291 : v16x16 split[4];
292 : } v32x32;
293 :
294 : typedef struct {
295 : partition_variance part_variances;
296 : v32x32 split[4];
297 : } v64x64;
298 :
299 : typedef struct {
300 : partition_variance *part_variances;
301 : var *split[4];
302 : } variance_node;
303 :
304 : typedef enum {
305 : V16X16,
306 : V32X32,
307 : V64X64,
308 : } TREE_LEVEL;
309 :
310 0 : static void tree_to_node(void *data, BLOCK_SIZE bsize, variance_node *node) {
311 : int i;
312 0 : node->part_variances = NULL;
313 0 : switch (bsize) {
314 : case BLOCK_64X64: {
315 0 : v64x64 *vt = (v64x64 *)data;
316 0 : node->part_variances = &vt->part_variances;
317 0 : for (i = 0; i < 4; i++)
318 0 : node->split[i] = &vt->split[i].part_variances.none;
319 0 : break;
320 : }
321 : case BLOCK_32X32: {
322 0 : v32x32 *vt = (v32x32 *)data;
323 0 : node->part_variances = &vt->part_variances;
324 0 : for (i = 0; i < 4; i++)
325 0 : node->split[i] = &vt->split[i].part_variances.none;
326 0 : break;
327 : }
328 : case BLOCK_16X16: {
329 0 : v16x16 *vt = (v16x16 *)data;
330 0 : node->part_variances = &vt->part_variances;
331 0 : for (i = 0; i < 4; i++)
332 0 : node->split[i] = &vt->split[i].part_variances.none;
333 0 : break;
334 : }
335 : case BLOCK_8X8: {
336 0 : v8x8 *vt = (v8x8 *)data;
337 0 : node->part_variances = &vt->part_variances;
338 0 : for (i = 0; i < 4; i++)
339 0 : node->split[i] = &vt->split[i].part_variances.none;
340 0 : break;
341 : }
342 : case BLOCK_4X4: {
343 0 : v4x4 *vt = (v4x4 *)data;
344 0 : node->part_variances = &vt->part_variances;
345 0 : for (i = 0; i < 4; i++) node->split[i] = &vt->split[i];
346 0 : break;
347 : }
348 : default: {
349 0 : assert(0);
350 : break;
351 : }
352 : }
353 0 : }
354 :
355 : // Set variance values given sum square error, sum error, count.
356 0 : static void fill_variance(int64_t s2, int64_t s, int c, var *v) {
357 0 : v->sum_square_error = s2;
358 0 : v->sum_error = s;
359 0 : v->log2_count = c;
360 0 : }
361 :
362 0 : static void get_variance(var *v) {
363 0 : v->variance =
364 0 : (int)(256 * (v->sum_square_error -
365 0 : ((v->sum_error * v->sum_error) >> v->log2_count)) >>
366 0 : v->log2_count);
367 0 : }
368 :
369 0 : static void sum_2_variances(const var *a, const var *b, var *r) {
370 0 : assert(a->log2_count == b->log2_count);
371 0 : fill_variance(a->sum_square_error + b->sum_square_error,
372 0 : a->sum_error + b->sum_error, a->log2_count + 1, r);
373 0 : }
374 :
375 0 : static void fill_variance_tree(void *data, BLOCK_SIZE bsize) {
376 : variance_node node;
377 0 : memset(&node, 0, sizeof(node));
378 0 : tree_to_node(data, bsize, &node);
379 0 : sum_2_variances(node.split[0], node.split[1], &node.part_variances->horz[0]);
380 0 : sum_2_variances(node.split[2], node.split[3], &node.part_variances->horz[1]);
381 0 : sum_2_variances(node.split[0], node.split[2], &node.part_variances->vert[0]);
382 0 : sum_2_variances(node.split[1], node.split[3], &node.part_variances->vert[1]);
383 0 : sum_2_variances(&node.part_variances->vert[0], &node.part_variances->vert[1],
384 0 : &node.part_variances->none);
385 0 : }
386 :
387 0 : static int set_vt_partitioning(VP9_COMP *cpi, MACROBLOCK *const x,
388 : MACROBLOCKD *const xd, void *data,
389 : BLOCK_SIZE bsize, int mi_row, int mi_col,
390 : int64_t threshold, BLOCK_SIZE bsize_min,
391 : int force_split) {
392 0 : VP9_COMMON *const cm = &cpi->common;
393 : variance_node vt;
394 0 : const int block_width = num_8x8_blocks_wide_lookup[bsize];
395 0 : const int block_height = num_8x8_blocks_high_lookup[bsize];
396 :
397 0 : assert(block_height == block_width);
398 0 : tree_to_node(data, bsize, &vt);
399 :
400 0 : if (force_split == 1) return 0;
401 :
402 : // For bsize=bsize_min (16x16/8x8 for 8x8/4x4 downsampling), select if
403 : // variance is below threshold, otherwise split will be selected.
404 : // No check for vert/horiz split as too few samples for variance.
405 0 : if (bsize == bsize_min) {
406 : // Variance already computed to set the force_split.
407 0 : if (cm->frame_type == KEY_FRAME) get_variance(&vt.part_variances->none);
408 0 : if (mi_col + block_width / 2 < cm->mi_cols &&
409 0 : mi_row + block_height / 2 < cm->mi_rows &&
410 0 : vt.part_variances->none.variance < threshold) {
411 0 : set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
412 0 : return 1;
413 : }
414 0 : return 0;
415 0 : } else if (bsize > bsize_min) {
416 : // Variance already computed to set the force_split.
417 0 : if (cm->frame_type == KEY_FRAME) get_variance(&vt.part_variances->none);
418 : // For key frame: take split for bsize above 32X32 or very high variance.
419 0 : if (cm->frame_type == KEY_FRAME &&
420 0 : (bsize > BLOCK_32X32 ||
421 0 : vt.part_variances->none.variance > (threshold << 4))) {
422 0 : return 0;
423 : }
424 : // If variance is low, take the bsize (no split).
425 0 : if (mi_col + block_width / 2 < cm->mi_cols &&
426 0 : mi_row + block_height / 2 < cm->mi_rows &&
427 0 : vt.part_variances->none.variance < threshold) {
428 0 : set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
429 0 : return 1;
430 : }
431 :
432 : // Check vertical split.
433 0 : if (mi_row + block_height / 2 < cm->mi_rows) {
434 0 : BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_VERT);
435 0 : get_variance(&vt.part_variances->vert[0]);
436 0 : get_variance(&vt.part_variances->vert[1]);
437 0 : if (vt.part_variances->vert[0].variance < threshold &&
438 0 : vt.part_variances->vert[1].variance < threshold &&
439 0 : get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) {
440 0 : set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
441 0 : set_block_size(cpi, x, xd, mi_row, mi_col + block_width / 2, subsize);
442 0 : return 1;
443 : }
444 : }
445 : // Check horizontal split.
446 0 : if (mi_col + block_width / 2 < cm->mi_cols) {
447 0 : BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_HORZ);
448 0 : get_variance(&vt.part_variances->horz[0]);
449 0 : get_variance(&vt.part_variances->horz[1]);
450 0 : if (vt.part_variances->horz[0].variance < threshold &&
451 0 : vt.part_variances->horz[1].variance < threshold &&
452 0 : get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) {
453 0 : set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
454 0 : set_block_size(cpi, x, xd, mi_row + block_height / 2, mi_col, subsize);
455 0 : return 1;
456 : }
457 : }
458 :
459 0 : return 0;
460 : }
461 0 : return 0;
462 : }
463 :
464 : // Set the variance split thresholds for following the block sizes:
465 : // 0 - threshold_64x64, 1 - threshold_32x32, 2 - threshold_16x16,
466 : // 3 - vbp_threshold_8x8. vbp_threshold_8x8 (to split to 4x4 partition) is
467 : // currently only used on key frame.
468 0 : static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q) {
469 0 : VP9_COMMON *const cm = &cpi->common;
470 0 : const int is_key_frame = (cm->frame_type == KEY_FRAME);
471 0 : const int threshold_multiplier = is_key_frame ? 20 : 1;
472 0 : int64_t threshold_base =
473 0 : (int64_t)(threshold_multiplier * cpi->y_dequant[q][1]);
474 0 : if (is_key_frame) {
475 0 : thresholds[0] = threshold_base;
476 0 : thresholds[1] = threshold_base >> 2;
477 0 : thresholds[2] = threshold_base >> 2;
478 0 : thresholds[3] = threshold_base << 2;
479 : } else {
480 : // Increase base variance threshold based on estimated noise level.
481 0 : if (cpi->noise_estimate.enabled && cm->width >= 640 && cm->height >= 480) {
482 0 : NOISE_LEVEL noise_level =
483 0 : vp9_noise_estimate_extract_level(&cpi->noise_estimate);
484 0 : if (noise_level == kHigh)
485 0 : threshold_base = 3 * threshold_base;
486 0 : else if (noise_level == kMedium)
487 0 : threshold_base = threshold_base << 1;
488 0 : else if (noise_level < kLow)
489 0 : threshold_base = (7 * threshold_base) >> 3;
490 : }
491 0 : thresholds[0] = threshold_base;
492 0 : thresholds[2] = threshold_base << cpi->oxcf.speed;
493 0 : if (cm->width <= 352 && cm->height <= 288) {
494 0 : thresholds[0] = threshold_base >> 3;
495 0 : thresholds[1] = threshold_base >> 1;
496 0 : thresholds[2] = threshold_base << 3;
497 0 : } else if (cm->width < 1280 && cm->height < 720) {
498 0 : thresholds[1] = (5 * threshold_base) >> 2;
499 0 : } else if (cm->width < 1920 && cm->height < 1080) {
500 0 : thresholds[1] = threshold_base << 1;
501 : } else {
502 0 : thresholds[1] = (5 * threshold_base) >> 1;
503 : }
504 : }
505 0 : }
506 :
507 0 : void vp9_set_variance_partition_thresholds(VP9_COMP *cpi, int q) {
508 0 : VP9_COMMON *const cm = &cpi->common;
509 0 : SPEED_FEATURES *const sf = &cpi->sf;
510 0 : const int is_key_frame = (cm->frame_type == KEY_FRAME);
511 0 : if (sf->partition_search_type != VAR_BASED_PARTITION &&
512 0 : sf->partition_search_type != REFERENCE_PARTITION) {
513 0 : return;
514 : } else {
515 0 : set_vbp_thresholds(cpi, cpi->vbp_thresholds, q);
516 : // The thresholds below are not changed locally.
517 0 : if (is_key_frame) {
518 0 : cpi->vbp_threshold_sad = 0;
519 0 : cpi->vbp_bsize_min = BLOCK_8X8;
520 : } else {
521 0 : if (cm->width <= 352 && cm->height <= 288)
522 0 : cpi->vbp_threshold_sad = 10;
523 : else
524 0 : cpi->vbp_threshold_sad = (cpi->y_dequant[q][1] << 1) > 1000
525 0 : ? (cpi->y_dequant[q][1] << 1)
526 0 : : 1000;
527 0 : cpi->vbp_bsize_min = BLOCK_16X16;
528 : }
529 0 : cpi->vbp_threshold_copy = cpi->vbp_thresholds[0] << 16;
530 0 : cpi->vbp_threshold_minmax = 15 + (q >> 3);
531 : }
532 : }
533 :
534 : // Compute the minmax over the 8x8 subblocks.
535 0 : static int compute_minmax_8x8(const uint8_t *s, int sp, const uint8_t *d,
536 : int dp, int x16_idx, int y16_idx,
537 : #if CONFIG_VP9_HIGHBITDEPTH
538 : int highbd_flag,
539 : #endif
540 : int pixels_wide, int pixels_high) {
541 : int k;
542 0 : int minmax_max = 0;
543 0 : int minmax_min = 255;
544 : // Loop over the 4 8x8 subblocks.
545 0 : for (k = 0; k < 4; k++) {
546 0 : int x8_idx = x16_idx + ((k & 1) << 3);
547 0 : int y8_idx = y16_idx + ((k >> 1) << 3);
548 0 : int min = 0;
549 0 : int max = 0;
550 0 : if (x8_idx < pixels_wide && y8_idx < pixels_high) {
551 : #if CONFIG_VP9_HIGHBITDEPTH
552 : if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
553 : vpx_highbd_minmax_8x8(s + y8_idx * sp + x8_idx, sp,
554 : d + y8_idx * dp + x8_idx, dp, &min, &max);
555 : } else {
556 : vpx_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx,
557 : dp, &min, &max);
558 : }
559 : #else
560 0 : vpx_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx, dp,
561 : &min, &max);
562 : #endif
563 0 : if ((max - min) > minmax_max) minmax_max = (max - min);
564 0 : if ((max - min) < minmax_min) minmax_min = (max - min);
565 : }
566 : }
567 0 : return (minmax_max - minmax_min);
568 : }
569 :
570 0 : static void fill_variance_4x4avg(const uint8_t *s, int sp, const uint8_t *d,
571 : int dp, int x8_idx, int y8_idx, v8x8 *vst,
572 : #if CONFIG_VP9_HIGHBITDEPTH
573 : int highbd_flag,
574 : #endif
575 : int pixels_wide, int pixels_high,
576 : int is_key_frame) {
577 : int k;
578 0 : for (k = 0; k < 4; k++) {
579 0 : int x4_idx = x8_idx + ((k & 1) << 2);
580 0 : int y4_idx = y8_idx + ((k >> 1) << 2);
581 0 : unsigned int sse = 0;
582 0 : int sum = 0;
583 0 : if (x4_idx < pixels_wide && y4_idx < pixels_high) {
584 : int s_avg;
585 0 : int d_avg = 128;
586 : #if CONFIG_VP9_HIGHBITDEPTH
587 : if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
588 : s_avg = vpx_highbd_avg_4x4(s + y4_idx * sp + x4_idx, sp);
589 : if (!is_key_frame)
590 : d_avg = vpx_highbd_avg_4x4(d + y4_idx * dp + x4_idx, dp);
591 : } else {
592 : s_avg = vpx_avg_4x4(s + y4_idx * sp + x4_idx, sp);
593 : if (!is_key_frame) d_avg = vpx_avg_4x4(d + y4_idx * dp + x4_idx, dp);
594 : }
595 : #else
596 0 : s_avg = vpx_avg_4x4(s + y4_idx * sp + x4_idx, sp);
597 0 : if (!is_key_frame) d_avg = vpx_avg_4x4(d + y4_idx * dp + x4_idx, dp);
598 : #endif
599 0 : sum = s_avg - d_avg;
600 0 : sse = sum * sum;
601 : }
602 0 : fill_variance(sse, sum, 0, &vst->split[k].part_variances.none);
603 : }
604 0 : }
605 :
606 0 : static void fill_variance_8x8avg(const uint8_t *s, int sp, const uint8_t *d,
607 : int dp, int x16_idx, int y16_idx, v16x16 *vst,
608 : #if CONFIG_VP9_HIGHBITDEPTH
609 : int highbd_flag,
610 : #endif
611 : int pixels_wide, int pixels_high,
612 : int is_key_frame) {
613 : int k;
614 0 : for (k = 0; k < 4; k++) {
615 0 : int x8_idx = x16_idx + ((k & 1) << 3);
616 0 : int y8_idx = y16_idx + ((k >> 1) << 3);
617 0 : unsigned int sse = 0;
618 0 : int sum = 0;
619 0 : if (x8_idx < pixels_wide && y8_idx < pixels_high) {
620 : int s_avg;
621 0 : int d_avg = 128;
622 : #if CONFIG_VP9_HIGHBITDEPTH
623 : if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
624 : s_avg = vpx_highbd_avg_8x8(s + y8_idx * sp + x8_idx, sp);
625 : if (!is_key_frame)
626 : d_avg = vpx_highbd_avg_8x8(d + y8_idx * dp + x8_idx, dp);
627 : } else {
628 : s_avg = vpx_avg_8x8(s + y8_idx * sp + x8_idx, sp);
629 : if (!is_key_frame) d_avg = vpx_avg_8x8(d + y8_idx * dp + x8_idx, dp);
630 : }
631 : #else
632 0 : s_avg = vpx_avg_8x8(s + y8_idx * sp + x8_idx, sp);
633 0 : if (!is_key_frame) d_avg = vpx_avg_8x8(d + y8_idx * dp + x8_idx, dp);
634 : #endif
635 0 : sum = s_avg - d_avg;
636 0 : sse = sum * sum;
637 : }
638 0 : fill_variance(sse, sum, 0, &vst->split[k].part_variances.none);
639 : }
640 0 : }
641 :
642 : #if !CONFIG_VP9_HIGHBITDEPTH
643 : // Check if most of the superblock is skin content, and if so, force split to
644 : // 32x32, and set x->sb_is_skin for use in mode selection.
645 0 : static int skin_sb_split(VP9_COMP *cpi, MACROBLOCK *x, const int low_res,
646 : int mi_row, int mi_col, int *force_split) {
647 0 : VP9_COMMON *const cm = &cpi->common;
648 : // Avoid checking superblocks on/near boundary and avoid low resolutions.
649 : // Note superblock may still pick 64X64 if y_sad is very small
650 : // (i.e., y_sad < cpi->vbp_threshold_sad) below. For now leave this as is.
651 0 : if (!low_res && (mi_col >= 8 && mi_col + 8 < cm->mi_cols && mi_row >= 8 &&
652 0 : mi_row + 8 < cm->mi_rows)) {
653 0 : int num_16x16_skin = 0;
654 0 : int num_16x16_nonskin = 0;
655 0 : uint8_t *ysignal = x->plane[0].src.buf;
656 0 : uint8_t *usignal = x->plane[1].src.buf;
657 0 : uint8_t *vsignal = x->plane[2].src.buf;
658 0 : int sp = x->plane[0].src.stride;
659 0 : int spuv = x->plane[1].src.stride;
660 0 : const int block_index = mi_row * cm->mi_cols + mi_col;
661 0 : const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64];
662 0 : const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64];
663 0 : const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
664 0 : const int ymis = VPXMIN(cm->mi_rows - mi_row, bh);
665 : // Loop through the 16x16 sub-blocks.
666 : int i, j;
667 0 : for (i = 0; i < ymis; i += 2) {
668 0 : for (j = 0; j < xmis; j += 2) {
669 0 : int bl_index = block_index + i * cm->mi_cols + j;
670 0 : int bl_index1 = bl_index + 1;
671 0 : int bl_index2 = bl_index + cm->mi_cols;
672 0 : int bl_index3 = bl_index2 + 1;
673 0 : int consec_zeromv =
674 0 : VPXMIN(cpi->consec_zero_mv[bl_index],
675 : VPXMIN(cpi->consec_zero_mv[bl_index1],
676 : VPXMIN(cpi->consec_zero_mv[bl_index2],
677 : cpi->consec_zero_mv[bl_index3])));
678 0 : int is_skin = vp9_compute_skin_block(
679 : ysignal, usignal, vsignal, sp, spuv, BLOCK_16X16, consec_zeromv, 0);
680 0 : num_16x16_skin += is_skin;
681 0 : num_16x16_nonskin += (1 - is_skin);
682 0 : if (num_16x16_nonskin > 3) {
683 : // Exit loop if at least 4 of the 16x16 blocks are not skin.
684 0 : i = ymis;
685 0 : break;
686 : }
687 0 : ysignal += 16;
688 0 : usignal += 8;
689 0 : vsignal += 8;
690 : }
691 0 : ysignal += (sp << 4) - 64;
692 0 : usignal += (spuv << 3) - 32;
693 0 : vsignal += (spuv << 3) - 32;
694 : }
695 0 : if (num_16x16_skin > 12) {
696 0 : *force_split = 1;
697 0 : return 1;
698 : }
699 : }
700 0 : return 0;
701 : }
702 : #endif
703 :
704 0 : static void set_low_temp_var_flag(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
705 : v64x64 *vt, int64_t thresholds[],
706 : MV_REFERENCE_FRAME ref_frame_partition,
707 : int mi_col, int mi_row) {
708 : int i, j;
709 0 : VP9_COMMON *const cm = &cpi->common;
710 0 : const int mv_thr = cm->width > 640 ? 8 : 4;
711 : // Check temporal variance for bsize >= 16x16, if LAST_FRAME was selected and
712 : // int_pro mv is small. If the temporal variance is small set the flag
713 : // variance_low for the block. The variance threshold can be adjusted, the
714 : // higher the more aggressive.
715 0 : if (ref_frame_partition == LAST_FRAME &&
716 0 : (cpi->sf.short_circuit_low_temp_var == 1 ||
717 0 : (xd->mi[0]->mv[0].as_mv.col < mv_thr &&
718 0 : xd->mi[0]->mv[0].as_mv.col > -mv_thr &&
719 0 : xd->mi[0]->mv[0].as_mv.row < mv_thr &&
720 0 : xd->mi[0]->mv[0].as_mv.row > -mv_thr))) {
721 0 : if (xd->mi[0]->sb_type == BLOCK_64X64) {
722 0 : if ((vt->part_variances).none.variance < (thresholds[0] >> 1))
723 0 : x->variance_low[0] = 1;
724 0 : } else if (xd->mi[0]->sb_type == BLOCK_64X32) {
725 0 : for (i = 0; i < 2; i++) {
726 0 : if (vt->part_variances.horz[i].variance < (thresholds[0] >> 2))
727 0 : x->variance_low[i + 1] = 1;
728 : }
729 0 : } else if (xd->mi[0]->sb_type == BLOCK_32X64) {
730 0 : for (i = 0; i < 2; i++) {
731 0 : if (vt->part_variances.vert[i].variance < (thresholds[0] >> 2))
732 0 : x->variance_low[i + 3] = 1;
733 : }
734 : } else {
735 0 : for (i = 0; i < 4; i++) {
736 0 : const int idx[4][2] = { { 0, 0 }, { 0, 4 }, { 4, 0 }, { 4, 4 } };
737 0 : const int idx_str =
738 0 : cm->mi_stride * (mi_row + idx[i][0]) + mi_col + idx[i][1];
739 0 : MODE_INFO **this_mi = cm->mi_grid_visible + idx_str;
740 :
741 0 : if (cm->mi_cols <= mi_col + idx[i][1] ||
742 0 : cm->mi_rows <= mi_row + idx[i][0])
743 0 : continue;
744 :
745 0 : if ((*this_mi)->sb_type == BLOCK_32X32) {
746 0 : int64_t threshold_32x32 = (cpi->sf.short_circuit_low_temp_var == 1 ||
747 0 : cpi->sf.short_circuit_low_temp_var == 3)
748 0 : ? ((5 * thresholds[1]) >> 3)
749 0 : : (thresholds[1] >> 1);
750 0 : if (vt->split[i].part_variances.none.variance < threshold_32x32)
751 0 : x->variance_low[i + 5] = 1;
752 0 : } else if (cpi->sf.short_circuit_low_temp_var >= 2) {
753 : // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block
754 : // inside.
755 0 : if ((*this_mi)->sb_type == BLOCK_16X16 ||
756 0 : (*this_mi)->sb_type == BLOCK_32X16 ||
757 0 : (*this_mi)->sb_type == BLOCK_16X32) {
758 0 : for (j = 0; j < 4; j++) {
759 0 : if (vt->split[i].split[j].part_variances.none.variance <
760 0 : (thresholds[2] >> 8))
761 0 : x->variance_low[(i << 2) + j + 9] = 1;
762 : }
763 : }
764 : }
765 : }
766 : }
767 : }
768 0 : }
769 :
770 0 : static void copy_prev_partition(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row,
771 : int mi_col) {
772 0 : VP9_COMMON *const cm = &cpi->common;
773 0 : BLOCK_SIZE *prev_part = cpi->prev_partition;
774 0 : int start_pos = mi_row * cm->mi_stride + mi_col;
775 :
776 0 : const int bsl = b_width_log2_lookup[bsize];
777 0 : const int bs = (1 << bsl) / 4;
778 : BLOCK_SIZE subsize;
779 : PARTITION_TYPE partition;
780 0 : MODE_INFO *mi = NULL;
781 :
782 0 : if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
783 :
784 0 : partition = partition_lookup[bsl][prev_part[start_pos]];
785 0 : subsize = get_subsize(bsize, partition);
786 0 : mi = cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col];
787 :
788 0 : if (subsize < BLOCK_8X8) {
789 0 : mi->sb_type = bsize;
790 : } else {
791 0 : switch (partition) {
792 0 : case PARTITION_NONE: mi->sb_type = bsize; break;
793 : case PARTITION_HORZ:
794 0 : mi->sb_type = subsize;
795 0 : if (mi_row + bs < cm->mi_rows)
796 0 : cm->mi_grid_visible[(mi_row + bs) * cm->mi_stride + mi_col]->sb_type =
797 : subsize;
798 0 : break;
799 : case PARTITION_VERT:
800 0 : mi->sb_type = subsize;
801 0 : if (mi_col + bs < cm->mi_cols)
802 0 : cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col + bs]->sb_type =
803 : subsize;
804 0 : break;
805 : case PARTITION_SPLIT:
806 0 : copy_prev_partition(cpi, subsize, mi_row, mi_col);
807 0 : copy_prev_partition(cpi, subsize, mi_row + bs, mi_col);
808 0 : copy_prev_partition(cpi, subsize, mi_row, mi_col + bs);
809 0 : copy_prev_partition(cpi, subsize, mi_row + bs, mi_col + bs);
810 0 : break;
811 0 : default: assert(0);
812 : }
813 : }
814 : }
815 :
816 0 : static void update_prev_partition(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row,
817 : int mi_col) {
818 0 : VP9_COMMON *const cm = &cpi->common;
819 0 : BLOCK_SIZE *prev_part = cpi->prev_partition;
820 0 : int start_pos = mi_row * cm->mi_stride + mi_col;
821 0 : const int bsl = b_width_log2_lookup[bsize];
822 0 : const int bs = (1 << bsl) / 4;
823 : BLOCK_SIZE subsize;
824 : PARTITION_TYPE partition;
825 0 : const MODE_INFO *mi = NULL;
826 :
827 0 : if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
828 :
829 0 : mi = cm->mi_grid_visible[start_pos];
830 0 : partition = partition_lookup[bsl][mi->sb_type];
831 0 : subsize = get_subsize(bsize, partition);
832 0 : if (subsize < BLOCK_8X8) {
833 0 : prev_part[start_pos] = bsize;
834 : } else {
835 0 : switch (partition) {
836 0 : case PARTITION_NONE: prev_part[start_pos] = bsize; break;
837 : case PARTITION_HORZ:
838 0 : prev_part[start_pos] = subsize;
839 0 : if (mi_row + bs < cm->mi_rows)
840 0 : prev_part[start_pos + bs * cm->mi_stride] = subsize;
841 0 : break;
842 : case PARTITION_VERT:
843 0 : prev_part[start_pos] = subsize;
844 0 : if (mi_col + bs < cm->mi_cols) prev_part[start_pos + bs] = subsize;
845 0 : break;
846 : case PARTITION_SPLIT:
847 0 : update_prev_partition(cpi, subsize, mi_row, mi_col);
848 0 : update_prev_partition(cpi, subsize, mi_row + bs, mi_col);
849 0 : update_prev_partition(cpi, subsize, mi_row, mi_col + bs);
850 0 : update_prev_partition(cpi, subsize, mi_row + bs, mi_col + bs);
851 0 : break;
852 0 : default: assert(0);
853 : }
854 : }
855 : }
856 :
857 0 : static void chroma_check(VP9_COMP *cpi, MACROBLOCK *x, int bsize,
858 : unsigned int y_sad, int is_key_frame) {
859 : int i;
860 0 : MACROBLOCKD *xd = &x->e_mbd;
861 0 : if (is_key_frame) return;
862 :
863 0 : for (i = 1; i <= 2; ++i) {
864 0 : unsigned int uv_sad = UINT_MAX;
865 0 : struct macroblock_plane *p = &x->plane[i];
866 0 : struct macroblockd_plane *pd = &xd->plane[i];
867 0 : const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
868 :
869 0 : if (bs != BLOCK_INVALID)
870 0 : uv_sad = cpi->fn_ptr[bs].sdf(p->src.buf, p->src.stride, pd->dst.buf,
871 : pd->dst.stride);
872 :
873 : // TODO(marpan): Investigate if we should lower this threshold if
874 : // superblock is detected as skin.
875 0 : x->color_sensitivity[i - 1] = uv_sad > (y_sad >> 2);
876 : }
877 : }
878 :
879 : // This function chooses partitioning based on the variance between source and
880 : // reconstructed last, where variance is computed for down-sampled inputs.
881 0 : static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
882 : MACROBLOCK *x, int mi_row, int mi_col) {
883 0 : VP9_COMMON *const cm = &cpi->common;
884 0 : MACROBLOCKD *xd = &x->e_mbd;
885 : int i, j, k, m;
886 : v64x64 vt;
887 : v16x16 vt2[16];
888 : int force_split[21];
889 : int avg_32x32;
890 0 : int max_var_32x32 = 0;
891 0 : int min_var_32x32 = INT_MAX;
892 : int var_32x32;
893 : int avg_16x16[4];
894 : int64_t threshold_4x4avg;
895 0 : NOISE_LEVEL noise_level = kLow;
896 : uint8_t *s;
897 : const uint8_t *d;
898 : int sp;
899 : int dp;
900 0 : unsigned int y_sad = UINT_MAX;
901 0 : BLOCK_SIZE bsize = BLOCK_64X64;
902 : // Ref frame used in partitioning.
903 0 : MV_REFERENCE_FRAME ref_frame_partition = LAST_FRAME;
904 0 : int pixels_wide = 64, pixels_high = 64;
905 0 : int64_t thresholds[4] = { cpi->vbp_thresholds[0], cpi->vbp_thresholds[1],
906 0 : cpi->vbp_thresholds[2], cpi->vbp_thresholds[3] };
907 :
908 : // For the variance computation under SVC mode, we treat the frame as key if
909 : // the reference (base layer frame) is key frame (i.e., is_key_frame == 1).
910 0 : const int is_key_frame =
911 0 : (cm->frame_type == KEY_FRAME ||
912 0 : (is_one_pass_cbr_svc(cpi) &&
913 0 : cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame));
914 : // Always use 4x4 partition for key frame.
915 0 : const int use_4x4_partition = cm->frame_type == KEY_FRAME;
916 0 : const int low_res = (cm->width <= 352 && cm->height <= 288);
917 : int variance4x4downsample[16];
918 : int segment_id;
919 0 : int offset = cm->mi_stride * mi_row + mi_col;
920 :
921 0 : set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64);
922 0 : segment_id = xd->mi[0]->segment_id;
923 0 : if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) {
924 0 : if (cyclic_refresh_segment_id_boosted(segment_id)) {
925 0 : int q = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex);
926 0 : set_vbp_thresholds(cpi, thresholds, q);
927 : }
928 : }
929 :
930 : // For non keyframes, disable 4x4 average for low resolution when speed = 8
931 0 : threshold_4x4avg = (cpi->oxcf.speed < 8) ? thresholds[1] << 1 : INT64_MAX;
932 :
933 0 : memset(x->variance_low, 0, sizeof(x->variance_low));
934 :
935 0 : if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3);
936 0 : if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3);
937 :
938 0 : s = x->plane[0].src.buf;
939 0 : sp = x->plane[0].src.stride;
940 :
941 : // Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks,
942 : // 5-20 for the 16x16 blocks.
943 0 : force_split[0] = 0;
944 :
945 0 : if (!is_key_frame) {
946 : // In the case of spatial/temporal scalable coding, the assumption here is
947 : // that the temporal reference frame will always be of type LAST_FRAME.
948 : // TODO(marpan): If that assumption is broken, we need to revisit this code.
949 0 : MODE_INFO *mi = xd->mi[0];
950 0 : YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
951 :
952 0 : const YV12_BUFFER_CONFIG *yv12_g = NULL;
953 : unsigned int y_sad_g, y_sad_thr, y_sad_last;
954 0 : bsize = BLOCK_32X32 + (mi_col + 4 < cm->mi_cols) * 2 +
955 0 : (mi_row + 4 < cm->mi_rows);
956 :
957 0 : assert(yv12 != NULL);
958 :
959 0 : if (!(is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id)) {
960 : // For now, GOLDEN will not be used for non-zero spatial layers, since
961 : // it may not be a temporal reference.
962 0 : yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
963 : }
964 :
965 0 : if (yv12_g && yv12_g != yv12 && (cpi->ref_frame_flags & VP9_GOLD_FLAG)) {
966 0 : vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
967 0 : &cm->frame_refs[GOLDEN_FRAME - 1].sf);
968 0 : y_sad_g = cpi->fn_ptr[bsize].sdf(
969 0 : x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf,
970 : xd->plane[0].pre[0].stride);
971 : } else {
972 0 : y_sad_g = UINT_MAX;
973 : }
974 :
975 0 : if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR &&
976 0 : cpi->rc.is_src_frame_alt_ref) {
977 0 : yv12 = get_ref_frame_buffer(cpi, ALTREF_FRAME);
978 0 : vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
979 0 : &cm->frame_refs[ALTREF_FRAME - 1].sf);
980 0 : mi->ref_frame[0] = ALTREF_FRAME;
981 0 : y_sad_g = UINT_MAX;
982 : } else {
983 0 : vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
984 0 : &cm->frame_refs[LAST_FRAME - 1].sf);
985 0 : mi->ref_frame[0] = LAST_FRAME;
986 : }
987 0 : mi->ref_frame[1] = NONE;
988 0 : mi->sb_type = BLOCK_64X64;
989 0 : mi->mv[0].as_int = 0;
990 0 : mi->interp_filter = BILINEAR;
991 :
992 0 : y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col);
993 0 : y_sad_last = y_sad;
994 : // Pick ref frame for partitioning, bias last frame when y_sad_g and y_sad
995 : // are close if short_circuit_low_temp_var is on.
996 0 : y_sad_thr = cpi->sf.short_circuit_low_temp_var ? (y_sad * 7) >> 3 : y_sad;
997 0 : if (y_sad_g < y_sad_thr) {
998 0 : vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
999 0 : &cm->frame_refs[GOLDEN_FRAME - 1].sf);
1000 0 : mi->ref_frame[0] = GOLDEN_FRAME;
1001 0 : mi->mv[0].as_int = 0;
1002 0 : y_sad = y_sad_g;
1003 0 : ref_frame_partition = GOLDEN_FRAME;
1004 : } else {
1005 0 : x->pred_mv[LAST_FRAME] = mi->mv[0].as_mv;
1006 0 : ref_frame_partition = LAST_FRAME;
1007 : }
1008 :
1009 0 : set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
1010 0 : vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64);
1011 :
1012 0 : x->sb_is_skin = 0;
1013 : #if !CONFIG_VP9_HIGHBITDEPTH
1014 0 : if (cpi->use_skin_detection)
1015 0 : x->sb_is_skin =
1016 0 : skin_sb_split(cpi, x, low_res, mi_row, mi_col, force_split);
1017 : #endif
1018 :
1019 0 : d = xd->plane[0].dst.buf;
1020 0 : dp = xd->plane[0].dst.stride;
1021 :
1022 : // If the y_sad is very small, take 64x64 as partition and exit.
1023 : // Don't check on boosted segment for now, as 64x64 is suppressed there.
1024 0 : if (segment_id == CR_SEGMENT_ID_BASE && y_sad < cpi->vbp_threshold_sad) {
1025 0 : const int block_width = num_8x8_blocks_wide_lookup[BLOCK_64X64];
1026 0 : const int block_height = num_8x8_blocks_high_lookup[BLOCK_64X64];
1027 0 : if (mi_col + block_width / 2 < cm->mi_cols &&
1028 0 : mi_row + block_height / 2 < cm->mi_rows) {
1029 0 : set_block_size(cpi, x, xd, mi_row, mi_col, BLOCK_64X64);
1030 0 : chroma_check(cpi, x, bsize, y_sad, is_key_frame);
1031 0 : return 0;
1032 : }
1033 : }
1034 :
1035 : // If the y_sad is small enough, copy the partition of the superblock in the
1036 : // last frame to current frame only if the last frame is not a keyframe.
1037 : // TODO(jianj) : tune the threshold.
1038 0 : if (cpi->sf.copy_partition_flag && cpi->rc.frames_since_key > 1 &&
1039 0 : segment_id == CR_SEGMENT_ID_BASE &&
1040 0 : cpi->prev_segment_id[offset] == CR_SEGMENT_ID_BASE &&
1041 0 : y_sad_last < cpi->vbp_threshold_copy) {
1042 0 : if (cpi->prev_partition != NULL) {
1043 0 : copy_prev_partition(cpi, BLOCK_64X64, mi_row, mi_col);
1044 0 : chroma_check(cpi, x, bsize, y_sad, is_key_frame);
1045 0 : return 0;
1046 : }
1047 : }
1048 : } else {
1049 0 : d = VP9_VAR_OFFS;
1050 0 : dp = 0;
1051 : #if CONFIG_VP9_HIGHBITDEPTH
1052 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1053 : switch (xd->bd) {
1054 : case 10: d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10); break;
1055 : case 12: d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12); break;
1056 : case 8:
1057 : default: d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8); break;
1058 : }
1059 : }
1060 : #endif // CONFIG_VP9_HIGHBITDEPTH
1061 : }
1062 :
1063 : // Fill in the entire tree of 8x8 (or 4x4 under some conditions) variances
1064 : // for splits.
1065 0 : for (i = 0; i < 4; i++) {
1066 0 : const int x32_idx = ((i & 1) << 5);
1067 0 : const int y32_idx = ((i >> 1) << 5);
1068 0 : const int i2 = i << 2;
1069 0 : force_split[i + 1] = 0;
1070 0 : avg_16x16[i] = 0;
1071 0 : for (j = 0; j < 4; j++) {
1072 0 : const int x16_idx = x32_idx + ((j & 1) << 4);
1073 0 : const int y16_idx = y32_idx + ((j >> 1) << 4);
1074 0 : const int split_index = 5 + i2 + j;
1075 0 : v16x16 *vst = &vt.split[i].split[j];
1076 0 : force_split[split_index] = 0;
1077 0 : variance4x4downsample[i2 + j] = 0;
1078 0 : if (!is_key_frame) {
1079 0 : fill_variance_8x8avg(s, sp, d, dp, x16_idx, y16_idx, vst,
1080 : #if CONFIG_VP9_HIGHBITDEPTH
1081 : xd->cur_buf->flags,
1082 : #endif
1083 : pixels_wide, pixels_high, is_key_frame);
1084 0 : fill_variance_tree(&vt.split[i].split[j], BLOCK_16X16);
1085 0 : get_variance(&vt.split[i].split[j].part_variances.none);
1086 0 : avg_16x16[i] += vt.split[i].split[j].part_variances.none.variance;
1087 0 : if (vt.split[i].split[j].part_variances.none.variance > thresholds[2]) {
1088 : // 16X16 variance is above threshold for split, so force split to 8x8
1089 : // for this 16x16 block (this also forces splits for upper levels).
1090 0 : force_split[split_index] = 1;
1091 0 : force_split[i + 1] = 1;
1092 0 : force_split[0] = 1;
1093 0 : } else if (cpi->oxcf.speed < 8 &&
1094 0 : vt.split[i].split[j].part_variances.none.variance >
1095 0 : thresholds[1] &&
1096 0 : !cyclic_refresh_segment_id_boosted(segment_id)) {
1097 : // We have some nominal amount of 16x16 variance (based on average),
1098 : // compute the minmax over the 8x8 sub-blocks, and if above threshold,
1099 : // force split to 8x8 block for this 16x16 block.
1100 0 : int minmax = compute_minmax_8x8(s, sp, d, dp, x16_idx, y16_idx,
1101 : #if CONFIG_VP9_HIGHBITDEPTH
1102 : xd->cur_buf->flags,
1103 : #endif
1104 : pixels_wide, pixels_high);
1105 0 : if (minmax > cpi->vbp_threshold_minmax) {
1106 0 : force_split[split_index] = 1;
1107 0 : force_split[i + 1] = 1;
1108 0 : force_split[0] = 1;
1109 : }
1110 : }
1111 : }
1112 0 : if (is_key_frame || (low_res &&
1113 0 : vt.split[i].split[j].part_variances.none.variance >
1114 : threshold_4x4avg)) {
1115 0 : force_split[split_index] = 0;
1116 : // Go down to 4x4 down-sampling for variance.
1117 0 : variance4x4downsample[i2 + j] = 1;
1118 0 : for (k = 0; k < 4; k++) {
1119 0 : int x8_idx = x16_idx + ((k & 1) << 3);
1120 0 : int y8_idx = y16_idx + ((k >> 1) << 3);
1121 0 : v8x8 *vst2 = is_key_frame ? &vst->split[k] : &vt2[i2 + j].split[k];
1122 0 : fill_variance_4x4avg(s, sp, d, dp, x8_idx, y8_idx, vst2,
1123 : #if CONFIG_VP9_HIGHBITDEPTH
1124 : xd->cur_buf->flags,
1125 : #endif
1126 : pixels_wide, pixels_high, is_key_frame);
1127 : }
1128 : }
1129 : }
1130 : }
1131 : // Fill the rest of the variance tree by summing split partition values.
1132 0 : avg_32x32 = 0;
1133 0 : for (i = 0; i < 4; i++) {
1134 0 : const int i2 = i << 2;
1135 0 : for (j = 0; j < 4; j++) {
1136 0 : if (variance4x4downsample[i2 + j] == 1) {
1137 0 : v16x16 *vtemp = (!is_key_frame) ? &vt2[i2 + j] : &vt.split[i].split[j];
1138 0 : for (m = 0; m < 4; m++) fill_variance_tree(&vtemp->split[m], BLOCK_8X8);
1139 0 : fill_variance_tree(vtemp, BLOCK_16X16);
1140 : // If variance of this 16x16 block is above the threshold, force block
1141 : // to split. This also forces a split on the upper levels.
1142 0 : get_variance(&vtemp->part_variances.none);
1143 0 : if (vtemp->part_variances.none.variance > thresholds[2]) {
1144 0 : force_split[5 + i2 + j] = 1;
1145 0 : force_split[i + 1] = 1;
1146 0 : force_split[0] = 1;
1147 : }
1148 : }
1149 : }
1150 0 : fill_variance_tree(&vt.split[i], BLOCK_32X32);
1151 : // If variance of this 32x32 block is above the threshold, or if its above
1152 : // (some threshold of) the average variance over the sub-16x16 blocks, then
1153 : // force this block to split. This also forces a split on the upper
1154 : // (64x64) level.
1155 0 : if (!force_split[i + 1]) {
1156 0 : get_variance(&vt.split[i].part_variances.none);
1157 0 : var_32x32 = vt.split[i].part_variances.none.variance;
1158 0 : max_var_32x32 = VPXMAX(var_32x32, max_var_32x32);
1159 0 : min_var_32x32 = VPXMIN(var_32x32, min_var_32x32);
1160 0 : if (vt.split[i].part_variances.none.variance > thresholds[1] ||
1161 0 : (!is_key_frame &&
1162 0 : vt.split[i].part_variances.none.variance > (thresholds[1] >> 1) &&
1163 0 : vt.split[i].part_variances.none.variance > (avg_16x16[i] >> 1))) {
1164 0 : force_split[i + 1] = 1;
1165 0 : force_split[0] = 1;
1166 : }
1167 0 : avg_32x32 += var_32x32;
1168 : }
1169 : }
1170 0 : if (!force_split[0]) {
1171 0 : fill_variance_tree(&vt, BLOCK_64X64);
1172 0 : get_variance(&vt.part_variances.none);
1173 0 : if (cpi->noise_estimate.enabled)
1174 0 : noise_level = vp9_noise_estimate_extract_level(&cpi->noise_estimate);
1175 : // If variance of this 64x64 block is above (some threshold of) the average
1176 : // variance over the sub-32x32 blocks, then force this block to split.
1177 : // Only checking this for noise level >= medium for now.
1178 0 : if (!is_key_frame && noise_level >= kMedium &&
1179 0 : vt.part_variances.none.variance > (5 * avg_32x32) >> 4)
1180 0 : force_split[0] = 1;
1181 : // Else if the maximum 32x32 variance minus the miniumum 32x32 variance in
1182 : // a 64x64 block is greater than threshold and the maximum 32x32 variance is
1183 : // above a miniumum threshold, then force the split of a 64x64 block
1184 : // Only check this for low noise.
1185 0 : else if (!is_key_frame && noise_level < kMedium &&
1186 0 : (max_var_32x32 - min_var_32x32) > 3 * (thresholds[0] >> 3) &&
1187 0 : max_var_32x32 > thresholds[0] >> 1)
1188 0 : force_split[0] = 1;
1189 : }
1190 :
1191 : // Now go through the entire structure, splitting every block size until
1192 : // we get to one that's got a variance lower than our threshold.
1193 0 : if (mi_col + 8 > cm->mi_cols || mi_row + 8 > cm->mi_rows ||
1194 0 : !set_vt_partitioning(cpi, x, xd, &vt, BLOCK_64X64, mi_row, mi_col,
1195 : thresholds[0], BLOCK_16X16, force_split[0])) {
1196 0 : for (i = 0; i < 4; ++i) {
1197 0 : const int x32_idx = ((i & 1) << 2);
1198 0 : const int y32_idx = ((i >> 1) << 2);
1199 0 : const int i2 = i << 2;
1200 0 : if (!set_vt_partitioning(cpi, x, xd, &vt.split[i], BLOCK_32X32,
1201 : (mi_row + y32_idx), (mi_col + x32_idx),
1202 : thresholds[1], BLOCK_16X16,
1203 0 : force_split[i + 1])) {
1204 0 : for (j = 0; j < 4; ++j) {
1205 0 : const int x16_idx = ((j & 1) << 1);
1206 0 : const int y16_idx = ((j >> 1) << 1);
1207 : // For inter frames: if variance4x4downsample[] == 1 for this 16x16
1208 : // block, then the variance is based on 4x4 down-sampling, so use vt2
1209 : // in set_vt_partioning(), otherwise use vt.
1210 0 : v16x16 *vtemp = (!is_key_frame && variance4x4downsample[i2 + j] == 1)
1211 0 : ? &vt2[i2 + j]
1212 0 : : &vt.split[i].split[j];
1213 0 : if (!set_vt_partitioning(
1214 0 : cpi, x, xd, vtemp, BLOCK_16X16, mi_row + y32_idx + y16_idx,
1215 0 : mi_col + x32_idx + x16_idx, thresholds[2], cpi->vbp_bsize_min,
1216 0 : force_split[5 + i2 + j])) {
1217 0 : for (k = 0; k < 4; ++k) {
1218 0 : const int x8_idx = (k & 1);
1219 0 : const int y8_idx = (k >> 1);
1220 0 : if (use_4x4_partition) {
1221 0 : if (!set_vt_partitioning(cpi, x, xd, &vtemp->split[k],
1222 : BLOCK_8X8,
1223 0 : mi_row + y32_idx + y16_idx + y8_idx,
1224 0 : mi_col + x32_idx + x16_idx + x8_idx,
1225 : thresholds[3], BLOCK_8X8, 0)) {
1226 0 : set_block_size(
1227 0 : cpi, x, xd, (mi_row + y32_idx + y16_idx + y8_idx),
1228 0 : (mi_col + x32_idx + x16_idx + x8_idx), BLOCK_4X4);
1229 : }
1230 : } else {
1231 0 : set_block_size(
1232 0 : cpi, x, xd, (mi_row + y32_idx + y16_idx + y8_idx),
1233 0 : (mi_col + x32_idx + x16_idx + x8_idx), BLOCK_8X8);
1234 : }
1235 : }
1236 : }
1237 : }
1238 : }
1239 : }
1240 : }
1241 :
1242 0 : if (cm->frame_type != KEY_FRAME && cpi->sf.copy_partition_flag) {
1243 0 : update_prev_partition(cpi, BLOCK_64X64, mi_row, mi_col);
1244 0 : cpi->prev_segment_id[offset] = segment_id;
1245 : }
1246 :
1247 0 : if (cpi->sf.short_circuit_low_temp_var) {
1248 0 : set_low_temp_var_flag(cpi, x, xd, &vt, thresholds, ref_frame_partition,
1249 : mi_col, mi_row);
1250 : }
1251 :
1252 0 : chroma_check(cpi, x, bsize, y_sad, is_key_frame);
1253 0 : return 0;
1254 : }
1255 :
1256 0 : static void update_state(VP9_COMP *cpi, ThreadData *td, PICK_MODE_CONTEXT *ctx,
1257 : int mi_row, int mi_col, BLOCK_SIZE bsize,
1258 : int output_enabled) {
1259 : int i, x_idx, y;
1260 0 : VP9_COMMON *const cm = &cpi->common;
1261 0 : RD_COUNTS *const rdc = &td->rd_counts;
1262 0 : MACROBLOCK *const x = &td->mb;
1263 0 : MACROBLOCKD *const xd = &x->e_mbd;
1264 0 : struct macroblock_plane *const p = x->plane;
1265 0 : struct macroblockd_plane *const pd = xd->plane;
1266 0 : MODE_INFO *mi = &ctx->mic;
1267 0 : MODE_INFO *const xdmi = xd->mi[0];
1268 0 : MODE_INFO *mi_addr = xd->mi[0];
1269 0 : const struct segmentation *const seg = &cm->seg;
1270 0 : const int bw = num_8x8_blocks_wide_lookup[mi->sb_type];
1271 0 : const int bh = num_8x8_blocks_high_lookup[mi->sb_type];
1272 0 : const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col);
1273 0 : const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row);
1274 0 : MV_REF *const frame_mvs = cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col;
1275 : int w, h;
1276 :
1277 0 : const int mis = cm->mi_stride;
1278 0 : const int mi_width = num_8x8_blocks_wide_lookup[bsize];
1279 0 : const int mi_height = num_8x8_blocks_high_lookup[bsize];
1280 : int max_plane;
1281 :
1282 0 : assert(mi->sb_type == bsize);
1283 :
1284 0 : *mi_addr = *mi;
1285 0 : *x->mbmi_ext = ctx->mbmi_ext;
1286 :
1287 : // If segmentation in use
1288 0 : if (seg->enabled) {
1289 : // For in frame complexity AQ copy the segment id from the segment map.
1290 0 : if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
1291 0 : const uint8_t *const map =
1292 0 : seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
1293 0 : mi_addr->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
1294 : }
1295 : // Else for cyclic refresh mode update the segment map, set the segment id
1296 : // and then update the quantizer.
1297 0 : if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
1298 0 : vp9_cyclic_refresh_update_segment(cpi, xd->mi[0], mi_row, mi_col, bsize,
1299 0 : ctx->rate, ctx->dist, x->skip, p);
1300 : }
1301 : }
1302 :
1303 0 : max_plane = is_inter_block(xdmi) ? MAX_MB_PLANE : 1;
1304 0 : for (i = 0; i < max_plane; ++i) {
1305 0 : p[i].coeff = ctx->coeff_pbuf[i][1];
1306 0 : p[i].qcoeff = ctx->qcoeff_pbuf[i][1];
1307 0 : pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];
1308 0 : p[i].eobs = ctx->eobs_pbuf[i][1];
1309 : }
1310 :
1311 0 : for (i = max_plane; i < MAX_MB_PLANE; ++i) {
1312 0 : p[i].coeff = ctx->coeff_pbuf[i][2];
1313 0 : p[i].qcoeff = ctx->qcoeff_pbuf[i][2];
1314 0 : pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2];
1315 0 : p[i].eobs = ctx->eobs_pbuf[i][2];
1316 : }
1317 :
1318 : // Restore the coding context of the MB to that that was in place
1319 : // when the mode was picked for it
1320 0 : for (y = 0; y < mi_height; y++)
1321 0 : for (x_idx = 0; x_idx < mi_width; x_idx++)
1322 0 : if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > x_idx &&
1323 0 : (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > y) {
1324 0 : xd->mi[x_idx + y * mis] = mi_addr;
1325 : }
1326 :
1327 0 : if (cpi->oxcf.aq_mode != NO_AQ) vp9_init_plane_quantizers(cpi, x);
1328 :
1329 0 : if (is_inter_block(xdmi) && xdmi->sb_type < BLOCK_8X8) {
1330 0 : xdmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int;
1331 0 : xdmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int;
1332 : }
1333 :
1334 0 : x->skip = ctx->skip;
1335 0 : memcpy(x->zcoeff_blk[xdmi->tx_size], ctx->zcoeff_blk,
1336 0 : sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk);
1337 :
1338 0 : if (!output_enabled) return;
1339 :
1340 : #if CONFIG_INTERNAL_STATS
1341 : if (frame_is_intra_only(cm)) {
1342 : static const int kf_mode_index[] = {
1343 : THR_DC /*DC_PRED*/, THR_V_PRED /*V_PRED*/,
1344 : THR_H_PRED /*H_PRED*/, THR_D45_PRED /*D45_PRED*/,
1345 : THR_D135_PRED /*D135_PRED*/, THR_D117_PRED /*D117_PRED*/,
1346 : THR_D153_PRED /*D153_PRED*/, THR_D207_PRED /*D207_PRED*/,
1347 : THR_D63_PRED /*D63_PRED*/, THR_TM /*TM_PRED*/,
1348 : };
1349 : ++cpi->mode_chosen_counts[kf_mode_index[xdmi->mode]];
1350 : } else {
1351 : // Note how often each mode chosen as best
1352 : ++cpi->mode_chosen_counts[ctx->best_mode_index];
1353 : }
1354 : #endif
1355 0 : if (!frame_is_intra_only(cm)) {
1356 0 : if (is_inter_block(xdmi)) {
1357 0 : vp9_update_mv_count(td);
1358 :
1359 0 : if (cm->interp_filter == SWITCHABLE) {
1360 0 : const int ctx = get_pred_context_switchable_interp(xd);
1361 0 : ++td->counts->switchable_interp[ctx][xdmi->interp_filter];
1362 : }
1363 : }
1364 :
1365 0 : rdc->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff;
1366 0 : rdc->comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff;
1367 0 : rdc->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff;
1368 :
1369 0 : for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
1370 0 : rdc->filter_diff[i] += ctx->best_filter_diff[i];
1371 : }
1372 :
1373 0 : for (h = 0; h < y_mis; ++h) {
1374 0 : MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols;
1375 0 : for (w = 0; w < x_mis; ++w) {
1376 0 : MV_REF *const mv = frame_mv + w;
1377 0 : mv->ref_frame[0] = mi->ref_frame[0];
1378 0 : mv->ref_frame[1] = mi->ref_frame[1];
1379 0 : mv->mv[0].as_int = mi->mv[0].as_int;
1380 0 : mv->mv[1].as_int = mi->mv[1].as_int;
1381 : }
1382 : }
1383 : }
1384 :
1385 0 : void vp9_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
1386 : int mi_row, int mi_col) {
1387 0 : uint8_t *const buffers[3] = { src->y_buffer, src->u_buffer, src->v_buffer };
1388 0 : const int strides[3] = { src->y_stride, src->uv_stride, src->uv_stride };
1389 : int i;
1390 :
1391 : // Set current frame pointer.
1392 0 : x->e_mbd.cur_buf = src;
1393 :
1394 0 : for (i = 0; i < MAX_MB_PLANE; i++)
1395 0 : setup_pred_plane(&x->plane[i].src, buffers[i], strides[i], mi_row, mi_col,
1396 : NULL, x->e_mbd.plane[i].subsampling_x,
1397 : x->e_mbd.plane[i].subsampling_y);
1398 0 : }
1399 :
1400 0 : static void set_mode_info_seg_skip(MACROBLOCK *x, TX_MODE tx_mode,
1401 : RD_COST *rd_cost, BLOCK_SIZE bsize) {
1402 0 : MACROBLOCKD *const xd = &x->e_mbd;
1403 0 : MODE_INFO *const mi = xd->mi[0];
1404 : INTERP_FILTER filter_ref;
1405 :
1406 0 : filter_ref = get_pred_context_switchable_interp(xd);
1407 0 : if (filter_ref == SWITCHABLE_FILTERS) filter_ref = EIGHTTAP;
1408 :
1409 0 : mi->sb_type = bsize;
1410 0 : mi->mode = ZEROMV;
1411 0 : mi->tx_size =
1412 0 : VPXMIN(max_txsize_lookup[bsize], tx_mode_to_biggest_tx_size[tx_mode]);
1413 0 : mi->skip = 1;
1414 0 : mi->uv_mode = DC_PRED;
1415 0 : mi->ref_frame[0] = LAST_FRAME;
1416 0 : mi->ref_frame[1] = NONE;
1417 0 : mi->mv[0].as_int = 0;
1418 0 : mi->interp_filter = filter_ref;
1419 :
1420 0 : xd->mi[0]->bmi[0].as_mv[0].as_int = 0;
1421 0 : x->skip = 1;
1422 :
1423 0 : vp9_rd_cost_init(rd_cost);
1424 0 : }
1425 :
1426 0 : static int set_segment_rdmult(VP9_COMP *const cpi, MACROBLOCK *const x,
1427 : int8_t segment_id) {
1428 : int segment_qindex;
1429 0 : VP9_COMMON *const cm = &cpi->common;
1430 0 : vp9_init_plane_quantizers(cpi, x);
1431 0 : vpx_clear_system_state();
1432 0 : segment_qindex = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex);
1433 0 : return vp9_compute_rd_mult(cpi, segment_qindex + cm->y_dc_delta_q);
1434 : }
1435 :
1436 0 : static void rd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data,
1437 : MACROBLOCK *const x, int mi_row, int mi_col,
1438 : RD_COST *rd_cost, BLOCK_SIZE bsize,
1439 : PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
1440 0 : VP9_COMMON *const cm = &cpi->common;
1441 0 : TileInfo *const tile_info = &tile_data->tile_info;
1442 0 : MACROBLOCKD *const xd = &x->e_mbd;
1443 : MODE_INFO *mi;
1444 0 : struct macroblock_plane *const p = x->plane;
1445 0 : struct macroblockd_plane *const pd = xd->plane;
1446 0 : const AQ_MODE aq_mode = cpi->oxcf.aq_mode;
1447 : int i, orig_rdmult;
1448 :
1449 0 : vpx_clear_system_state();
1450 :
1451 : // Use the lower precision, but faster, 32x32 fdct for mode selection.
1452 0 : x->use_lp32x32fdct = 1;
1453 :
1454 0 : set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
1455 0 : mi = xd->mi[0];
1456 0 : mi->sb_type = bsize;
1457 :
1458 0 : for (i = 0; i < MAX_MB_PLANE; ++i) {
1459 0 : p[i].coeff = ctx->coeff_pbuf[i][0];
1460 0 : p[i].qcoeff = ctx->qcoeff_pbuf[i][0];
1461 0 : pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][0];
1462 0 : p[i].eobs = ctx->eobs_pbuf[i][0];
1463 : }
1464 0 : ctx->is_coded = 0;
1465 0 : ctx->skippable = 0;
1466 0 : ctx->pred_pixel_ready = 0;
1467 0 : x->skip_recode = 0;
1468 :
1469 : // Set to zero to make sure we do not use the previous encoded frame stats
1470 0 : mi->skip = 0;
1471 :
1472 : #if CONFIG_VP9_HIGHBITDEPTH
1473 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1474 : x->source_variance = vp9_high_get_sby_perpixel_variance(
1475 : cpi, &x->plane[0].src, bsize, xd->bd);
1476 : } else {
1477 : x->source_variance =
1478 : vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
1479 : }
1480 : #else
1481 0 : x->source_variance =
1482 0 : vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
1483 : #endif // CONFIG_VP9_HIGHBITDEPTH
1484 :
1485 : // Save rdmult before it might be changed, so it can be restored later.
1486 0 : orig_rdmult = x->rdmult;
1487 :
1488 0 : if ((cpi->sf.tx_domain_thresh > 0.0) || (cpi->sf.quant_opt_thresh > 0.0)) {
1489 0 : double logvar = vp9_log_block_var(cpi, x, bsize);
1490 : // Check block complexity as part of descision on using pixel or transform
1491 : // domain distortion in rd tests.
1492 0 : x->block_tx_domain = cpi->sf.allow_txfm_domain_distortion &&
1493 0 : (logvar >= cpi->sf.tx_domain_thresh);
1494 :
1495 : // Check block complexity as part of descision on using quantized
1496 : // coefficient optimisation inside the rd loop.
1497 0 : x->block_qcoeff_opt =
1498 0 : cpi->sf.allow_quant_coeff_opt && (logvar <= cpi->sf.quant_opt_thresh);
1499 : } else {
1500 0 : x->block_tx_domain = cpi->sf.allow_txfm_domain_distortion;
1501 0 : x->block_qcoeff_opt = cpi->sf.allow_quant_coeff_opt;
1502 : }
1503 :
1504 0 : if (aq_mode == VARIANCE_AQ) {
1505 0 : const int energy =
1506 0 : bsize <= BLOCK_16X16 ? x->mb_energy : vp9_block_energy(cpi, x, bsize);
1507 :
1508 0 : if (cm->frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame ||
1509 0 : cpi->force_update_segmentation ||
1510 0 : (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) {
1511 0 : mi->segment_id = vp9_vaq_segment_id(energy);
1512 : } else {
1513 0 : const uint8_t *const map =
1514 0 : cm->seg.update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
1515 0 : mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
1516 : }
1517 0 : x->rdmult = set_segment_rdmult(cpi, x, mi->segment_id);
1518 0 : } else if (aq_mode == LOOKAHEAD_AQ) {
1519 0 : const uint8_t *const map = cpi->segmentation_map;
1520 :
1521 : // I do not change rdmult here consciously.
1522 0 : mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
1523 0 : } else if (aq_mode == EQUATOR360_AQ) {
1524 0 : if (cm->frame_type == KEY_FRAME || cpi->force_update_segmentation) {
1525 0 : mi->segment_id = vp9_360aq_segment_id(mi_row, cm->mi_rows);
1526 : } else {
1527 0 : const uint8_t *const map =
1528 0 : cm->seg.update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
1529 0 : mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
1530 : }
1531 0 : x->rdmult = set_segment_rdmult(cpi, x, mi->segment_id);
1532 0 : } else if (aq_mode == COMPLEXITY_AQ) {
1533 0 : x->rdmult = set_segment_rdmult(cpi, x, mi->segment_id);
1534 0 : } else if (aq_mode == CYCLIC_REFRESH_AQ) {
1535 0 : const uint8_t *const map =
1536 0 : cm->seg.update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
1537 : // If segment is boosted, use rdmult for that segment.
1538 0 : if (cyclic_refresh_segment_id_boosted(
1539 : get_segment_id(cm, map, bsize, mi_row, mi_col)))
1540 0 : x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
1541 : }
1542 :
1543 : // Find best coding mode & reconstruct the MB so it is available
1544 : // as a predictor for MBs that follow in the SB
1545 0 : if (frame_is_intra_only(cm)) {
1546 0 : vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, best_rd);
1547 : } else {
1548 0 : if (bsize >= BLOCK_8X8) {
1549 0 : if (segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP))
1550 0 : vp9_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, rd_cost, bsize,
1551 : ctx, best_rd);
1552 : else
1553 0 : vp9_rd_pick_inter_mode_sb(cpi, tile_data, x, mi_row, mi_col, rd_cost,
1554 : bsize, ctx, best_rd);
1555 : } else {
1556 0 : vp9_rd_pick_inter_mode_sub8x8(cpi, tile_data, x, mi_row, mi_col, rd_cost,
1557 : bsize, ctx, best_rd);
1558 : }
1559 : }
1560 :
1561 : // Examine the resulting rate and for AQ mode 2 make a segment choice.
1562 0 : if ((rd_cost->rate != INT_MAX) && (aq_mode == COMPLEXITY_AQ) &&
1563 0 : (bsize >= BLOCK_16X16) &&
1564 0 : (cm->frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame ||
1565 0 : (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref))) {
1566 0 : vp9_caq_select_segment(cpi, x, bsize, mi_row, mi_col, rd_cost->rate);
1567 : }
1568 :
1569 0 : x->rdmult = orig_rdmult;
1570 :
1571 : // TODO(jingning) The rate-distortion optimization flow needs to be
1572 : // refactored to provide proper exit/return handle.
1573 0 : if (rd_cost->rate == INT_MAX) rd_cost->rdcost = INT64_MAX;
1574 :
1575 0 : ctx->rate = rd_cost->rate;
1576 0 : ctx->dist = rd_cost->dist;
1577 0 : }
1578 :
1579 0 : static void update_stats(VP9_COMMON *cm, ThreadData *td) {
1580 0 : const MACROBLOCK *x = &td->mb;
1581 0 : const MACROBLOCKD *const xd = &x->e_mbd;
1582 0 : const MODE_INFO *const mi = xd->mi[0];
1583 0 : const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
1584 0 : const BLOCK_SIZE bsize = mi->sb_type;
1585 :
1586 0 : if (!frame_is_intra_only(cm)) {
1587 0 : FRAME_COUNTS *const counts = td->counts;
1588 0 : const int inter_block = is_inter_block(mi);
1589 0 : const int seg_ref_active =
1590 0 : segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_REF_FRAME);
1591 0 : if (!seg_ref_active) {
1592 0 : counts->intra_inter[get_intra_inter_context(xd)][inter_block]++;
1593 : // If the segment reference feature is enabled we have only a single
1594 : // reference frame allowed for the segment so exclude it from
1595 : // the reference frame counts used to work out probabilities.
1596 0 : if (inter_block) {
1597 0 : const MV_REFERENCE_FRAME ref0 = mi->ref_frame[0];
1598 0 : if (cm->reference_mode == REFERENCE_MODE_SELECT)
1599 0 : counts->comp_inter[vp9_get_reference_mode_context(cm, xd)]
1600 0 : [has_second_ref(mi)]++;
1601 :
1602 0 : if (has_second_ref(mi)) {
1603 0 : counts->comp_ref[vp9_get_pred_context_comp_ref_p(cm, xd)]
1604 0 : [ref0 == GOLDEN_FRAME]++;
1605 : } else {
1606 0 : counts->single_ref[vp9_get_pred_context_single_ref_p1(xd)][0]
1607 0 : [ref0 != LAST_FRAME]++;
1608 0 : if (ref0 != LAST_FRAME)
1609 0 : counts->single_ref[vp9_get_pred_context_single_ref_p2(xd)][1]
1610 0 : [ref0 != GOLDEN_FRAME]++;
1611 : }
1612 : }
1613 : }
1614 0 : if (inter_block &&
1615 0 : !segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP)) {
1616 0 : const int mode_ctx = mbmi_ext->mode_context[mi->ref_frame[0]];
1617 0 : if (bsize >= BLOCK_8X8) {
1618 0 : const PREDICTION_MODE mode = mi->mode;
1619 0 : ++counts->inter_mode[mode_ctx][INTER_OFFSET(mode)];
1620 : } else {
1621 0 : const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
1622 0 : const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
1623 : int idx, idy;
1624 0 : for (idy = 0; idy < 2; idy += num_4x4_h) {
1625 0 : for (idx = 0; idx < 2; idx += num_4x4_w) {
1626 0 : const int j = idy * 2 + idx;
1627 0 : const PREDICTION_MODE b_mode = mi->bmi[j].as_mode;
1628 0 : ++counts->inter_mode[mode_ctx][INTER_OFFSET(b_mode)];
1629 : }
1630 : }
1631 : }
1632 : }
1633 : }
1634 0 : }
1635 :
1636 0 : static void restore_context(MACROBLOCK *const x, int mi_row, int mi_col,
1637 : ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
1638 : ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
1639 : PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8],
1640 : BLOCK_SIZE bsize) {
1641 0 : MACROBLOCKD *const xd = &x->e_mbd;
1642 : int p;
1643 0 : const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1644 0 : const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1645 0 : int mi_width = num_8x8_blocks_wide_lookup[bsize];
1646 0 : int mi_height = num_8x8_blocks_high_lookup[bsize];
1647 0 : for (p = 0; p < MAX_MB_PLANE; p++) {
1648 0 : memcpy(xd->above_context[p] + ((mi_col * 2) >> xd->plane[p].subsampling_x),
1649 0 : a + num_4x4_blocks_wide * p,
1650 0 : (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
1651 0 : xd->plane[p].subsampling_x);
1652 0 : memcpy(xd->left_context[p] +
1653 0 : ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y),
1654 0 : l + num_4x4_blocks_high * p,
1655 0 : (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
1656 0 : xd->plane[p].subsampling_y);
1657 : }
1658 0 : memcpy(xd->above_seg_context + mi_col, sa,
1659 : sizeof(*xd->above_seg_context) * mi_width);
1660 0 : memcpy(xd->left_seg_context + (mi_row & MI_MASK), sl,
1661 : sizeof(xd->left_seg_context[0]) * mi_height);
1662 0 : }
1663 :
1664 0 : static void save_context(MACROBLOCK *const x, int mi_row, int mi_col,
1665 : ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
1666 : ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
1667 : PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8],
1668 : BLOCK_SIZE bsize) {
1669 0 : const MACROBLOCKD *const xd = &x->e_mbd;
1670 : int p;
1671 0 : const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1672 0 : const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1673 0 : int mi_width = num_8x8_blocks_wide_lookup[bsize];
1674 0 : int mi_height = num_8x8_blocks_high_lookup[bsize];
1675 :
1676 : // buffer the above/left context information of the block in search.
1677 0 : for (p = 0; p < MAX_MB_PLANE; ++p) {
1678 0 : memcpy(a + num_4x4_blocks_wide * p,
1679 0 : xd->above_context[p] + (mi_col * 2 >> xd->plane[p].subsampling_x),
1680 0 : (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
1681 0 : xd->plane[p].subsampling_x);
1682 0 : memcpy(l + num_4x4_blocks_high * p,
1683 0 : xd->left_context[p] +
1684 0 : ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y),
1685 0 : (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
1686 0 : xd->plane[p].subsampling_y);
1687 : }
1688 0 : memcpy(sa, xd->above_seg_context + mi_col,
1689 : sizeof(*xd->above_seg_context) * mi_width);
1690 0 : memcpy(sl, xd->left_seg_context + (mi_row & MI_MASK),
1691 : sizeof(xd->left_seg_context[0]) * mi_height);
1692 0 : }
1693 :
1694 0 : static void encode_b(VP9_COMP *cpi, const TileInfo *const tile, ThreadData *td,
1695 : TOKENEXTRA **tp, int mi_row, int mi_col,
1696 : int output_enabled, BLOCK_SIZE bsize,
1697 : PICK_MODE_CONTEXT *ctx) {
1698 0 : MACROBLOCK *const x = &td->mb;
1699 0 : set_offsets(cpi, tile, x, mi_row, mi_col, bsize);
1700 0 : update_state(cpi, td, ctx, mi_row, mi_col, bsize, output_enabled);
1701 0 : encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx);
1702 :
1703 0 : if (output_enabled) {
1704 0 : update_stats(&cpi->common, td);
1705 :
1706 0 : (*tp)->token = EOSB_TOKEN;
1707 0 : (*tp)++;
1708 : }
1709 0 : }
1710 :
1711 0 : static void encode_sb(VP9_COMP *cpi, ThreadData *td, const TileInfo *const tile,
1712 : TOKENEXTRA **tp, int mi_row, int mi_col,
1713 : int output_enabled, BLOCK_SIZE bsize, PC_TREE *pc_tree) {
1714 0 : VP9_COMMON *const cm = &cpi->common;
1715 0 : MACROBLOCK *const x = &td->mb;
1716 0 : MACROBLOCKD *const xd = &x->e_mbd;
1717 :
1718 0 : const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
1719 : int ctx;
1720 : PARTITION_TYPE partition;
1721 0 : BLOCK_SIZE subsize = bsize;
1722 :
1723 0 : if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
1724 :
1725 0 : if (bsize >= BLOCK_8X8) {
1726 0 : ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
1727 0 : subsize = get_subsize(bsize, pc_tree->partitioning);
1728 : } else {
1729 0 : ctx = 0;
1730 0 : subsize = BLOCK_4X4;
1731 : }
1732 :
1733 0 : partition = partition_lookup[bsl][subsize];
1734 0 : if (output_enabled && bsize != BLOCK_4X4)
1735 0 : td->counts->partition[ctx][partition]++;
1736 :
1737 0 : switch (partition) {
1738 : case PARTITION_NONE:
1739 0 : encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
1740 : &pc_tree->none);
1741 0 : break;
1742 : case PARTITION_VERT:
1743 0 : encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
1744 : &pc_tree->vertical[0]);
1745 0 : if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) {
1746 0 : encode_b(cpi, tile, td, tp, mi_row, mi_col + hbs, output_enabled,
1747 : subsize, &pc_tree->vertical[1]);
1748 : }
1749 0 : break;
1750 : case PARTITION_HORZ:
1751 0 : encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
1752 : &pc_tree->horizontal[0]);
1753 0 : if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) {
1754 0 : encode_b(cpi, tile, td, tp, mi_row + hbs, mi_col, output_enabled,
1755 : subsize, &pc_tree->horizontal[1]);
1756 : }
1757 0 : break;
1758 : case PARTITION_SPLIT:
1759 0 : if (bsize == BLOCK_8X8) {
1760 0 : encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
1761 : pc_tree->leaf_split[0]);
1762 : } else {
1763 0 : encode_sb(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize,
1764 0 : pc_tree->split[0]);
1765 0 : encode_sb(cpi, td, tile, tp, mi_row, mi_col + hbs, output_enabled,
1766 0 : subsize, pc_tree->split[1]);
1767 0 : encode_sb(cpi, td, tile, tp, mi_row + hbs, mi_col, output_enabled,
1768 0 : subsize, pc_tree->split[2]);
1769 0 : encode_sb(cpi, td, tile, tp, mi_row + hbs, mi_col + hbs, output_enabled,
1770 0 : subsize, pc_tree->split[3]);
1771 : }
1772 0 : break;
1773 0 : default: assert(0 && "Invalid partition type."); break;
1774 : }
1775 :
1776 0 : if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
1777 0 : update_partition_context(xd, mi_row, mi_col, subsize, bsize);
1778 : }
1779 :
1780 : // Check to see if the given partition size is allowed for a specified number
1781 : // of 8x8 block rows and columns remaining in the image.
1782 : // If not then return the largest allowed partition size
1783 0 : static BLOCK_SIZE find_partition_size(BLOCK_SIZE bsize, int rows_left,
1784 : int cols_left, int *bh, int *bw) {
1785 0 : if (rows_left <= 0 || cols_left <= 0) {
1786 0 : return VPXMIN(bsize, BLOCK_8X8);
1787 : } else {
1788 0 : for (; bsize > 0; bsize -= 3) {
1789 0 : *bh = num_8x8_blocks_high_lookup[bsize];
1790 0 : *bw = num_8x8_blocks_wide_lookup[bsize];
1791 0 : if ((*bh <= rows_left) && (*bw <= cols_left)) {
1792 0 : break;
1793 : }
1794 : }
1795 : }
1796 0 : return bsize;
1797 : }
1798 :
1799 0 : static void set_partial_b64x64_partition(MODE_INFO *mi, int mis, int bh_in,
1800 : int bw_in, int row8x8_remaining,
1801 : int col8x8_remaining, BLOCK_SIZE bsize,
1802 : MODE_INFO **mi_8x8) {
1803 0 : int bh = bh_in;
1804 : int r, c;
1805 0 : for (r = 0; r < MI_BLOCK_SIZE; r += bh) {
1806 0 : int bw = bw_in;
1807 0 : for (c = 0; c < MI_BLOCK_SIZE; c += bw) {
1808 0 : const int index = r * mis + c;
1809 0 : mi_8x8[index] = mi + index;
1810 0 : mi_8x8[index]->sb_type = find_partition_size(
1811 : bsize, row8x8_remaining - r, col8x8_remaining - c, &bh, &bw);
1812 : }
1813 : }
1814 0 : }
1815 :
1816 : // This function attempts to set all mode info entries in a given SB64
1817 : // to the same block partition size.
1818 : // However, at the bottom and right borders of the image the requested size
1819 : // may not be allowed in which case this code attempts to choose the largest
1820 : // allowable partition.
1821 0 : static void set_fixed_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
1822 : MODE_INFO **mi_8x8, int mi_row, int mi_col,
1823 : BLOCK_SIZE bsize) {
1824 0 : VP9_COMMON *const cm = &cpi->common;
1825 0 : const int mis = cm->mi_stride;
1826 0 : const int row8x8_remaining = tile->mi_row_end - mi_row;
1827 0 : const int col8x8_remaining = tile->mi_col_end - mi_col;
1828 : int block_row, block_col;
1829 0 : MODE_INFO *mi_upper_left = cm->mi + mi_row * mis + mi_col;
1830 0 : int bh = num_8x8_blocks_high_lookup[bsize];
1831 0 : int bw = num_8x8_blocks_wide_lookup[bsize];
1832 :
1833 0 : assert((row8x8_remaining > 0) && (col8x8_remaining > 0));
1834 :
1835 : // Apply the requested partition size to the SB64 if it is all "in image"
1836 0 : if ((col8x8_remaining >= MI_BLOCK_SIZE) &&
1837 : (row8x8_remaining >= MI_BLOCK_SIZE)) {
1838 0 : for (block_row = 0; block_row < MI_BLOCK_SIZE; block_row += bh) {
1839 0 : for (block_col = 0; block_col < MI_BLOCK_SIZE; block_col += bw) {
1840 0 : int index = block_row * mis + block_col;
1841 0 : mi_8x8[index] = mi_upper_left + index;
1842 0 : mi_8x8[index]->sb_type = bsize;
1843 : }
1844 : }
1845 : } else {
1846 : // Else this is a partial SB64.
1847 0 : set_partial_b64x64_partition(mi_upper_left, mis, bh, bw, row8x8_remaining,
1848 : col8x8_remaining, bsize, mi_8x8);
1849 : }
1850 0 : }
1851 :
1852 : static const struct {
1853 : int row;
1854 : int col;
1855 : } coord_lookup[16] = {
1856 : // 32x32 index = 0
1857 : { 0, 0 },
1858 : { 0, 2 },
1859 : { 2, 0 },
1860 : { 2, 2 },
1861 : // 32x32 index = 1
1862 : { 0, 4 },
1863 : { 0, 6 },
1864 : { 2, 4 },
1865 : { 2, 6 },
1866 : // 32x32 index = 2
1867 : { 4, 0 },
1868 : { 4, 2 },
1869 : { 6, 0 },
1870 : { 6, 2 },
1871 : // 32x32 index = 3
1872 : { 4, 4 },
1873 : { 4, 6 },
1874 : { 6, 4 },
1875 : { 6, 6 },
1876 : };
1877 :
1878 0 : static void set_source_var_based_partition(VP9_COMP *cpi,
1879 : const TileInfo *const tile,
1880 : MACROBLOCK *const x,
1881 : MODE_INFO **mi_8x8, int mi_row,
1882 : int mi_col) {
1883 0 : VP9_COMMON *const cm = &cpi->common;
1884 0 : const int mis = cm->mi_stride;
1885 0 : const int row8x8_remaining = tile->mi_row_end - mi_row;
1886 0 : const int col8x8_remaining = tile->mi_col_end - mi_col;
1887 0 : MODE_INFO *mi_upper_left = cm->mi + mi_row * mis + mi_col;
1888 :
1889 0 : vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
1890 :
1891 0 : assert((row8x8_remaining > 0) && (col8x8_remaining > 0));
1892 :
1893 : // In-image SB64
1894 0 : if ((col8x8_remaining >= MI_BLOCK_SIZE) &&
1895 0 : (row8x8_remaining >= MI_BLOCK_SIZE)) {
1896 : int i, j;
1897 : int index;
1898 : diff d32[4];
1899 0 : const int offset = (mi_row >> 1) * cm->mb_cols + (mi_col >> 1);
1900 0 : int is_larger_better = 0;
1901 0 : int use32x32 = 0;
1902 0 : unsigned int thr = cpi->source_var_thresh;
1903 :
1904 0 : memset(d32, 0, 4 * sizeof(diff));
1905 :
1906 0 : for (i = 0; i < 4; i++) {
1907 : diff *d16[4];
1908 :
1909 0 : for (j = 0; j < 4; j++) {
1910 0 : int b_mi_row = coord_lookup[i * 4 + j].row;
1911 0 : int b_mi_col = coord_lookup[i * 4 + j].col;
1912 0 : int boffset = b_mi_row / 2 * cm->mb_cols + b_mi_col / 2;
1913 :
1914 0 : d16[j] = cpi->source_diff_var + offset + boffset;
1915 :
1916 0 : index = b_mi_row * mis + b_mi_col;
1917 0 : mi_8x8[index] = mi_upper_left + index;
1918 0 : mi_8x8[index]->sb_type = BLOCK_16X16;
1919 :
1920 : // TODO(yunqingwang): If d16[j].var is very large, use 8x8 partition
1921 : // size to further improve quality.
1922 : }
1923 :
1924 0 : is_larger_better = (d16[0]->var < thr) && (d16[1]->var < thr) &&
1925 0 : (d16[2]->var < thr) && (d16[3]->var < thr);
1926 :
1927 : // Use 32x32 partition
1928 0 : if (is_larger_better) {
1929 0 : use32x32 += 1;
1930 :
1931 0 : for (j = 0; j < 4; j++) {
1932 0 : d32[i].sse += d16[j]->sse;
1933 0 : d32[i].sum += d16[j]->sum;
1934 : }
1935 :
1936 0 : d32[i].var =
1937 0 : (unsigned int)(d32[i].sse -
1938 0 : (unsigned int)(((int64_t)d32[i].sum * d32[i].sum) >>
1939 : 10));
1940 :
1941 0 : index = coord_lookup[i * 4].row * mis + coord_lookup[i * 4].col;
1942 0 : mi_8x8[index] = mi_upper_left + index;
1943 0 : mi_8x8[index]->sb_type = BLOCK_32X32;
1944 : }
1945 : }
1946 :
1947 0 : if (use32x32 == 4) {
1948 0 : thr <<= 1;
1949 0 : is_larger_better = (d32[0].var < thr) && (d32[1].var < thr) &&
1950 0 : (d32[2].var < thr) && (d32[3].var < thr);
1951 :
1952 : // Use 64x64 partition
1953 0 : if (is_larger_better) {
1954 0 : mi_8x8[0] = mi_upper_left;
1955 0 : mi_8x8[0]->sb_type = BLOCK_64X64;
1956 : }
1957 : }
1958 : } else { // partial in-image SB64
1959 0 : int bh = num_8x8_blocks_high_lookup[BLOCK_16X16];
1960 0 : int bw = num_8x8_blocks_wide_lookup[BLOCK_16X16];
1961 0 : set_partial_b64x64_partition(mi_upper_left, mis, bh, bw, row8x8_remaining,
1962 : col8x8_remaining, BLOCK_16X16, mi_8x8);
1963 : }
1964 0 : }
1965 :
1966 0 : static void update_state_rt(VP9_COMP *cpi, ThreadData *td,
1967 : PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col,
1968 : int bsize) {
1969 0 : VP9_COMMON *const cm = &cpi->common;
1970 0 : MACROBLOCK *const x = &td->mb;
1971 0 : MACROBLOCKD *const xd = &x->e_mbd;
1972 0 : MODE_INFO *const mi = xd->mi[0];
1973 0 : struct macroblock_plane *const p = x->plane;
1974 0 : const struct segmentation *const seg = &cm->seg;
1975 0 : const int bw = num_8x8_blocks_wide_lookup[mi->sb_type];
1976 0 : const int bh = num_8x8_blocks_high_lookup[mi->sb_type];
1977 0 : const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col);
1978 0 : const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row);
1979 :
1980 0 : *(xd->mi[0]) = ctx->mic;
1981 0 : *(x->mbmi_ext) = ctx->mbmi_ext;
1982 :
1983 0 : if (seg->enabled && cpi->oxcf.aq_mode != NO_AQ) {
1984 : // For in frame complexity AQ or variance AQ, copy segment_id from
1985 : // segmentation_map.
1986 0 : if (cpi->oxcf.aq_mode != CYCLIC_REFRESH_AQ) {
1987 0 : const uint8_t *const map =
1988 0 : seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
1989 0 : mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
1990 : } else {
1991 : // Setting segmentation map for cyclic_refresh.
1992 0 : vp9_cyclic_refresh_update_segment(cpi, mi, mi_row, mi_col, bsize,
1993 0 : ctx->rate, ctx->dist, x->skip, p);
1994 : }
1995 0 : vp9_init_plane_quantizers(cpi, x);
1996 : }
1997 :
1998 0 : if (is_inter_block(mi)) {
1999 0 : vp9_update_mv_count(td);
2000 0 : if (cm->interp_filter == SWITCHABLE) {
2001 0 : const int pred_ctx = get_pred_context_switchable_interp(xd);
2002 0 : ++td->counts->switchable_interp[pred_ctx][mi->interp_filter];
2003 : }
2004 :
2005 0 : if (mi->sb_type < BLOCK_8X8) {
2006 0 : mi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int;
2007 0 : mi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int;
2008 : }
2009 : }
2010 :
2011 0 : if (cm->use_prev_frame_mvs || !cm->error_resilient_mode ||
2012 0 : (cpi->svc.use_base_mv && cpi->svc.number_spatial_layers > 1 &&
2013 0 : cpi->svc.spatial_layer_id != cpi->svc.number_spatial_layers - 1)) {
2014 0 : MV_REF *const frame_mvs =
2015 0 : cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col;
2016 : int w, h;
2017 :
2018 0 : for (h = 0; h < y_mis; ++h) {
2019 0 : MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols;
2020 0 : for (w = 0; w < x_mis; ++w) {
2021 0 : MV_REF *const mv = frame_mv + w;
2022 0 : mv->ref_frame[0] = mi->ref_frame[0];
2023 0 : mv->ref_frame[1] = mi->ref_frame[1];
2024 0 : mv->mv[0].as_int = mi->mv[0].as_int;
2025 0 : mv->mv[1].as_int = mi->mv[1].as_int;
2026 : }
2027 : }
2028 : }
2029 :
2030 0 : x->skip = ctx->skip;
2031 0 : x->skip_txfm[0] = mi->segment_id ? 0 : ctx->skip_txfm[0];
2032 0 : }
2033 :
2034 0 : static void encode_b_rt(VP9_COMP *cpi, ThreadData *td,
2035 : const TileInfo *const tile, TOKENEXTRA **tp, int mi_row,
2036 : int mi_col, int output_enabled, BLOCK_SIZE bsize,
2037 : PICK_MODE_CONTEXT *ctx) {
2038 0 : MACROBLOCK *const x = &td->mb;
2039 0 : set_offsets(cpi, tile, x, mi_row, mi_col, bsize);
2040 0 : update_state_rt(cpi, td, ctx, mi_row, mi_col, bsize);
2041 :
2042 0 : encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx);
2043 0 : update_stats(&cpi->common, td);
2044 :
2045 0 : (*tp)->token = EOSB_TOKEN;
2046 0 : (*tp)++;
2047 0 : }
2048 :
2049 0 : static void encode_sb_rt(VP9_COMP *cpi, ThreadData *td,
2050 : const TileInfo *const tile, TOKENEXTRA **tp,
2051 : int mi_row, int mi_col, int output_enabled,
2052 : BLOCK_SIZE bsize, PC_TREE *pc_tree) {
2053 0 : VP9_COMMON *const cm = &cpi->common;
2054 0 : MACROBLOCK *const x = &td->mb;
2055 0 : MACROBLOCKD *const xd = &x->e_mbd;
2056 :
2057 0 : const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
2058 : int ctx;
2059 : PARTITION_TYPE partition;
2060 : BLOCK_SIZE subsize;
2061 :
2062 0 : if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
2063 :
2064 0 : if (bsize >= BLOCK_8X8) {
2065 0 : const int idx_str = xd->mi_stride * mi_row + mi_col;
2066 0 : MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str;
2067 0 : ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
2068 0 : subsize = mi_8x8[0]->sb_type;
2069 : } else {
2070 0 : ctx = 0;
2071 0 : subsize = BLOCK_4X4;
2072 : }
2073 :
2074 0 : partition = partition_lookup[bsl][subsize];
2075 0 : if (output_enabled && bsize != BLOCK_4X4)
2076 0 : td->counts->partition[ctx][partition]++;
2077 :
2078 0 : switch (partition) {
2079 : case PARTITION_NONE:
2080 0 : encode_b_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize,
2081 : &pc_tree->none);
2082 0 : break;
2083 : case PARTITION_VERT:
2084 0 : encode_b_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize,
2085 : &pc_tree->vertical[0]);
2086 0 : if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) {
2087 0 : encode_b_rt(cpi, td, tile, tp, mi_row, mi_col + hbs, output_enabled,
2088 : subsize, &pc_tree->vertical[1]);
2089 : }
2090 0 : break;
2091 : case PARTITION_HORZ:
2092 0 : encode_b_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize,
2093 : &pc_tree->horizontal[0]);
2094 0 : if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) {
2095 0 : encode_b_rt(cpi, td, tile, tp, mi_row + hbs, mi_col, output_enabled,
2096 : subsize, &pc_tree->horizontal[1]);
2097 : }
2098 0 : break;
2099 : case PARTITION_SPLIT:
2100 0 : subsize = get_subsize(bsize, PARTITION_SPLIT);
2101 0 : encode_sb_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize,
2102 0 : pc_tree->split[0]);
2103 0 : encode_sb_rt(cpi, td, tile, tp, mi_row, mi_col + hbs, output_enabled,
2104 0 : subsize, pc_tree->split[1]);
2105 0 : encode_sb_rt(cpi, td, tile, tp, mi_row + hbs, mi_col, output_enabled,
2106 0 : subsize, pc_tree->split[2]);
2107 0 : encode_sb_rt(cpi, td, tile, tp, mi_row + hbs, mi_col + hbs,
2108 0 : output_enabled, subsize, pc_tree->split[3]);
2109 0 : break;
2110 0 : default: assert(0 && "Invalid partition type."); break;
2111 : }
2112 :
2113 0 : if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
2114 0 : update_partition_context(xd, mi_row, mi_col, subsize, bsize);
2115 : }
2116 :
2117 0 : static void rd_use_partition(VP9_COMP *cpi, ThreadData *td,
2118 : TileDataEnc *tile_data, MODE_INFO **mi_8x8,
2119 : TOKENEXTRA **tp, int mi_row, int mi_col,
2120 : BLOCK_SIZE bsize, int *rate, int64_t *dist,
2121 : int do_recon, PC_TREE *pc_tree) {
2122 0 : VP9_COMMON *const cm = &cpi->common;
2123 0 : TileInfo *const tile_info = &tile_data->tile_info;
2124 0 : MACROBLOCK *const x = &td->mb;
2125 0 : MACROBLOCKD *const xd = &x->e_mbd;
2126 0 : const int mis = cm->mi_stride;
2127 0 : const int bsl = b_width_log2_lookup[bsize];
2128 0 : const int mi_step = num_4x4_blocks_wide_lookup[bsize] / 2;
2129 0 : const int bss = (1 << bsl) / 4;
2130 : int i, pl;
2131 0 : PARTITION_TYPE partition = PARTITION_NONE;
2132 : BLOCK_SIZE subsize;
2133 : ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
2134 : PARTITION_CONTEXT sl[8], sa[8];
2135 : RD_COST last_part_rdc, none_rdc, chosen_rdc;
2136 0 : BLOCK_SIZE sub_subsize = BLOCK_4X4;
2137 0 : int splits_below = 0;
2138 0 : BLOCK_SIZE bs_type = mi_8x8[0]->sb_type;
2139 0 : int do_partition_search = 1;
2140 0 : PICK_MODE_CONTEXT *ctx = &pc_tree->none;
2141 :
2142 0 : if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
2143 :
2144 0 : assert(num_4x4_blocks_wide_lookup[bsize] ==
2145 : num_4x4_blocks_high_lookup[bsize]);
2146 :
2147 0 : vp9_rd_cost_reset(&last_part_rdc);
2148 0 : vp9_rd_cost_reset(&none_rdc);
2149 0 : vp9_rd_cost_reset(&chosen_rdc);
2150 :
2151 0 : partition = partition_lookup[bsl][bs_type];
2152 0 : subsize = get_subsize(bsize, partition);
2153 :
2154 0 : pc_tree->partitioning = partition;
2155 0 : save_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
2156 :
2157 0 : if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode != NO_AQ) {
2158 0 : set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
2159 0 : x->mb_energy = vp9_block_energy(cpi, x, bsize);
2160 : }
2161 :
2162 0 : if (do_partition_search &&
2163 0 : cpi->sf.partition_search_type == SEARCH_PARTITION &&
2164 0 : cpi->sf.adjust_partitioning_from_last_frame) {
2165 : // Check if any of the sub blocks are further split.
2166 0 : if (partition == PARTITION_SPLIT && subsize > BLOCK_8X8) {
2167 0 : sub_subsize = get_subsize(subsize, PARTITION_SPLIT);
2168 0 : splits_below = 1;
2169 0 : for (i = 0; i < 4; i++) {
2170 0 : int jj = i >> 1, ii = i & 0x01;
2171 0 : MODE_INFO *this_mi = mi_8x8[jj * bss * mis + ii * bss];
2172 0 : if (this_mi && this_mi->sb_type >= sub_subsize) {
2173 0 : splits_below = 0;
2174 : }
2175 : }
2176 : }
2177 :
2178 : // If partition is not none try none unless each of the 4 splits are split
2179 : // even further..
2180 0 : if (partition != PARTITION_NONE && !splits_below &&
2181 0 : mi_row + (mi_step >> 1) < cm->mi_rows &&
2182 0 : mi_col + (mi_step >> 1) < cm->mi_cols) {
2183 0 : pc_tree->partitioning = PARTITION_NONE;
2184 0 : rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc, bsize, ctx,
2185 : INT64_MAX);
2186 :
2187 0 : pl = partition_plane_context(xd, mi_row, mi_col, bsize);
2188 :
2189 0 : if (none_rdc.rate < INT_MAX) {
2190 0 : none_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
2191 0 : none_rdc.rdcost =
2192 0 : RDCOST(x->rdmult, x->rddiv, none_rdc.rate, none_rdc.dist);
2193 : }
2194 :
2195 0 : restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
2196 0 : mi_8x8[0]->sb_type = bs_type;
2197 0 : pc_tree->partitioning = partition;
2198 : }
2199 : }
2200 :
2201 0 : switch (partition) {
2202 : case PARTITION_NONE:
2203 0 : rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, bsize,
2204 : ctx, INT64_MAX);
2205 0 : break;
2206 : case PARTITION_HORZ:
2207 0 : rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
2208 : subsize, &pc_tree->horizontal[0], INT64_MAX);
2209 0 : if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
2210 0 : mi_row + (mi_step >> 1) < cm->mi_rows) {
2211 : RD_COST tmp_rdc;
2212 0 : PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0];
2213 0 : vp9_rd_cost_init(&tmp_rdc);
2214 0 : update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0);
2215 0 : encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx);
2216 0 : rd_pick_sb_modes(cpi, tile_data, x, mi_row + (mi_step >> 1), mi_col,
2217 : &tmp_rdc, subsize, &pc_tree->horizontal[1], INT64_MAX);
2218 0 : if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
2219 0 : vp9_rd_cost_reset(&last_part_rdc);
2220 0 : break;
2221 : }
2222 0 : last_part_rdc.rate += tmp_rdc.rate;
2223 0 : last_part_rdc.dist += tmp_rdc.dist;
2224 0 : last_part_rdc.rdcost += tmp_rdc.rdcost;
2225 : }
2226 0 : break;
2227 : case PARTITION_VERT:
2228 0 : rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
2229 : subsize, &pc_tree->vertical[0], INT64_MAX);
2230 0 : if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
2231 0 : mi_col + (mi_step >> 1) < cm->mi_cols) {
2232 : RD_COST tmp_rdc;
2233 0 : PICK_MODE_CONTEXT *ctx = &pc_tree->vertical[0];
2234 0 : vp9_rd_cost_init(&tmp_rdc);
2235 0 : update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0);
2236 0 : encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx);
2237 0 : rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + (mi_step >> 1),
2238 : &tmp_rdc, subsize,
2239 0 : &pc_tree->vertical[bsize > BLOCK_8X8], INT64_MAX);
2240 0 : if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
2241 0 : vp9_rd_cost_reset(&last_part_rdc);
2242 0 : break;
2243 : }
2244 0 : last_part_rdc.rate += tmp_rdc.rate;
2245 0 : last_part_rdc.dist += tmp_rdc.dist;
2246 0 : last_part_rdc.rdcost += tmp_rdc.rdcost;
2247 : }
2248 0 : break;
2249 : case PARTITION_SPLIT:
2250 0 : if (bsize == BLOCK_8X8) {
2251 0 : rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
2252 : subsize, pc_tree->leaf_split[0], INT64_MAX);
2253 0 : break;
2254 : }
2255 0 : last_part_rdc.rate = 0;
2256 0 : last_part_rdc.dist = 0;
2257 0 : last_part_rdc.rdcost = 0;
2258 0 : for (i = 0; i < 4; i++) {
2259 0 : int x_idx = (i & 1) * (mi_step >> 1);
2260 0 : int y_idx = (i >> 1) * (mi_step >> 1);
2261 0 : int jj = i >> 1, ii = i & 0x01;
2262 : RD_COST tmp_rdc;
2263 0 : if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
2264 0 : continue;
2265 :
2266 0 : vp9_rd_cost_init(&tmp_rdc);
2267 0 : rd_use_partition(cpi, td, tile_data, mi_8x8 + jj * bss * mis + ii * bss,
2268 : tp, mi_row + y_idx, mi_col + x_idx, subsize,
2269 : &tmp_rdc.rate, &tmp_rdc.dist, i != 3,
2270 0 : pc_tree->split[i]);
2271 0 : if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
2272 0 : vp9_rd_cost_reset(&last_part_rdc);
2273 0 : break;
2274 : }
2275 0 : last_part_rdc.rate += tmp_rdc.rate;
2276 0 : last_part_rdc.dist += tmp_rdc.dist;
2277 : }
2278 0 : break;
2279 0 : default: assert(0); break;
2280 : }
2281 :
2282 0 : pl = partition_plane_context(xd, mi_row, mi_col, bsize);
2283 0 : if (last_part_rdc.rate < INT_MAX) {
2284 0 : last_part_rdc.rate += cpi->partition_cost[pl][partition];
2285 0 : last_part_rdc.rdcost =
2286 0 : RDCOST(x->rdmult, x->rddiv, last_part_rdc.rate, last_part_rdc.dist);
2287 : }
2288 :
2289 0 : if (do_partition_search && cpi->sf.adjust_partitioning_from_last_frame &&
2290 0 : cpi->sf.partition_search_type == SEARCH_PARTITION &&
2291 0 : partition != PARTITION_SPLIT && bsize > BLOCK_8X8 &&
2292 0 : (mi_row + mi_step < cm->mi_rows ||
2293 0 : mi_row + (mi_step >> 1) == cm->mi_rows) &&
2294 0 : (mi_col + mi_step < cm->mi_cols ||
2295 0 : mi_col + (mi_step >> 1) == cm->mi_cols)) {
2296 0 : BLOCK_SIZE split_subsize = get_subsize(bsize, PARTITION_SPLIT);
2297 0 : chosen_rdc.rate = 0;
2298 0 : chosen_rdc.dist = 0;
2299 0 : restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
2300 0 : pc_tree->partitioning = PARTITION_SPLIT;
2301 :
2302 : // Split partition.
2303 0 : for (i = 0; i < 4; i++) {
2304 0 : int x_idx = (i & 1) * (mi_step >> 1);
2305 0 : int y_idx = (i >> 1) * (mi_step >> 1);
2306 : RD_COST tmp_rdc;
2307 : ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
2308 : PARTITION_CONTEXT sl[8], sa[8];
2309 :
2310 0 : if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
2311 0 : continue;
2312 :
2313 0 : save_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
2314 0 : pc_tree->split[i]->partitioning = PARTITION_NONE;
2315 0 : rd_pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx,
2316 0 : &tmp_rdc, split_subsize, &pc_tree->split[i]->none,
2317 : INT64_MAX);
2318 :
2319 0 : restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
2320 :
2321 0 : if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
2322 0 : vp9_rd_cost_reset(&chosen_rdc);
2323 0 : break;
2324 : }
2325 :
2326 0 : chosen_rdc.rate += tmp_rdc.rate;
2327 0 : chosen_rdc.dist += tmp_rdc.dist;
2328 :
2329 0 : if (i != 3)
2330 0 : encode_sb(cpi, td, tile_info, tp, mi_row + y_idx, mi_col + x_idx, 0,
2331 0 : split_subsize, pc_tree->split[i]);
2332 :
2333 0 : pl = partition_plane_context(xd, mi_row + y_idx, mi_col + x_idx,
2334 : split_subsize);
2335 0 : chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
2336 : }
2337 0 : pl = partition_plane_context(xd, mi_row, mi_col, bsize);
2338 0 : if (chosen_rdc.rate < INT_MAX) {
2339 0 : chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT];
2340 0 : chosen_rdc.rdcost =
2341 0 : RDCOST(x->rdmult, x->rddiv, chosen_rdc.rate, chosen_rdc.dist);
2342 : }
2343 : }
2344 :
2345 : // If last_part is better set the partitioning to that.
2346 0 : if (last_part_rdc.rdcost < chosen_rdc.rdcost) {
2347 0 : mi_8x8[0]->sb_type = bsize;
2348 0 : if (bsize >= BLOCK_8X8) pc_tree->partitioning = partition;
2349 0 : chosen_rdc = last_part_rdc;
2350 : }
2351 : // If none was better set the partitioning to that.
2352 0 : if (none_rdc.rdcost < chosen_rdc.rdcost) {
2353 0 : if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
2354 0 : chosen_rdc = none_rdc;
2355 : }
2356 :
2357 0 : restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
2358 :
2359 : // We must have chosen a partitioning and encoding or we'll fail later on.
2360 : // No other opportunities for success.
2361 0 : if (bsize == BLOCK_64X64)
2362 0 : assert(chosen_rdc.rate < INT_MAX && chosen_rdc.dist < INT64_MAX);
2363 :
2364 0 : if (do_recon) {
2365 0 : int output_enabled = (bsize == BLOCK_64X64);
2366 0 : encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize,
2367 : pc_tree);
2368 : }
2369 :
2370 0 : *rate = chosen_rdc.rate;
2371 0 : *dist = chosen_rdc.dist;
2372 : }
2373 :
2374 : static const BLOCK_SIZE min_partition_size[BLOCK_SIZES] = {
2375 : BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_4X4,
2376 : BLOCK_4X4, BLOCK_8X8, BLOCK_8X8, BLOCK_8X8, BLOCK_16X16,
2377 : BLOCK_16X16, BLOCK_16X16, BLOCK_16X16
2378 : };
2379 :
2380 : static const BLOCK_SIZE max_partition_size[BLOCK_SIZES] = {
2381 : BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, BLOCK_32X32,
2382 : BLOCK_32X32, BLOCK_32X32, BLOCK_64X64, BLOCK_64X64, BLOCK_64X64,
2383 : BLOCK_64X64, BLOCK_64X64, BLOCK_64X64
2384 : };
2385 :
2386 : // Look at all the mode_info entries for blocks that are part of this
2387 : // partition and find the min and max values for sb_type.
2388 : // At the moment this is designed to work on a 64x64 SB but could be
2389 : // adjusted to use a size parameter.
2390 : //
2391 : // The min and max are assumed to have been initialized prior to calling this
2392 : // function so repeat calls can accumulate a min and max of more than one sb64.
2393 0 : static void get_sb_partition_size_range(MACROBLOCKD *xd, MODE_INFO **mi_8x8,
2394 : BLOCK_SIZE *min_block_size,
2395 : BLOCK_SIZE *max_block_size,
2396 : int bs_hist[BLOCK_SIZES]) {
2397 0 : int sb_width_in_blocks = MI_BLOCK_SIZE;
2398 0 : int sb_height_in_blocks = MI_BLOCK_SIZE;
2399 : int i, j;
2400 0 : int index = 0;
2401 :
2402 : // Check the sb_type for each block that belongs to this region.
2403 0 : for (i = 0; i < sb_height_in_blocks; ++i) {
2404 0 : for (j = 0; j < sb_width_in_blocks; ++j) {
2405 0 : MODE_INFO *mi = mi_8x8[index + j];
2406 0 : BLOCK_SIZE sb_type = mi ? mi->sb_type : 0;
2407 0 : bs_hist[sb_type]++;
2408 0 : *min_block_size = VPXMIN(*min_block_size, sb_type);
2409 0 : *max_block_size = VPXMAX(*max_block_size, sb_type);
2410 : }
2411 0 : index += xd->mi_stride;
2412 : }
2413 0 : }
2414 :
2415 : // Next square block size less or equal than current block size.
2416 : static const BLOCK_SIZE next_square_size[BLOCK_SIZES] = {
2417 : BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, BLOCK_8X8,
2418 : BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, BLOCK_32X32,
2419 : BLOCK_32X32, BLOCK_32X32, BLOCK_64X64
2420 : };
2421 :
2422 : // Look at neighboring blocks and set a min and max partition size based on
2423 : // what they chose.
2424 0 : static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile,
2425 : MACROBLOCKD *const xd, int mi_row,
2426 : int mi_col, BLOCK_SIZE *min_block_size,
2427 : BLOCK_SIZE *max_block_size) {
2428 0 : VP9_COMMON *const cm = &cpi->common;
2429 0 : MODE_INFO **mi = xd->mi;
2430 0 : const int left_in_image = !!xd->left_mi;
2431 0 : const int above_in_image = !!xd->above_mi;
2432 0 : const int row8x8_remaining = tile->mi_row_end - mi_row;
2433 0 : const int col8x8_remaining = tile->mi_col_end - mi_col;
2434 : int bh, bw;
2435 0 : BLOCK_SIZE min_size = BLOCK_4X4;
2436 0 : BLOCK_SIZE max_size = BLOCK_64X64;
2437 0 : int bs_hist[BLOCK_SIZES] = { 0 };
2438 :
2439 : // Trap case where we do not have a prediction.
2440 0 : if (left_in_image || above_in_image || cm->frame_type != KEY_FRAME) {
2441 : // Default "min to max" and "max to min"
2442 0 : min_size = BLOCK_64X64;
2443 0 : max_size = BLOCK_4X4;
2444 :
2445 : // NOTE: each call to get_sb_partition_size_range() uses the previous
2446 : // passed in values for min and max as a starting point.
2447 : // Find the min and max partition used in previous frame at this location
2448 0 : if (cm->frame_type != KEY_FRAME) {
2449 0 : MODE_INFO **prev_mi =
2450 0 : &cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col];
2451 0 : get_sb_partition_size_range(xd, prev_mi, &min_size, &max_size, bs_hist);
2452 : }
2453 : // Find the min and max partition sizes used in the left SB64
2454 0 : if (left_in_image) {
2455 0 : MODE_INFO **left_sb64_mi = &mi[-MI_BLOCK_SIZE];
2456 0 : get_sb_partition_size_range(xd, left_sb64_mi, &min_size, &max_size,
2457 : bs_hist);
2458 : }
2459 : // Find the min and max partition sizes used in the above SB64.
2460 0 : if (above_in_image) {
2461 0 : MODE_INFO **above_sb64_mi = &mi[-xd->mi_stride * MI_BLOCK_SIZE];
2462 0 : get_sb_partition_size_range(xd, above_sb64_mi, &min_size, &max_size,
2463 : bs_hist);
2464 : }
2465 :
2466 : // Adjust observed min and max for "relaxed" auto partition case.
2467 0 : if (cpi->sf.auto_min_max_partition_size == RELAXED_NEIGHBORING_MIN_MAX) {
2468 0 : min_size = min_partition_size[min_size];
2469 0 : max_size = max_partition_size[max_size];
2470 : }
2471 : }
2472 :
2473 : // Check border cases where max and min from neighbors may not be legal.
2474 0 : max_size = find_partition_size(max_size, row8x8_remaining, col8x8_remaining,
2475 : &bh, &bw);
2476 : // Test for blocks at the edge of the active image.
2477 : // This may be the actual edge of the image or where there are formatting
2478 : // bars.
2479 0 : if (vp9_active_edge_sb(cpi, mi_row, mi_col)) {
2480 0 : min_size = BLOCK_4X4;
2481 : } else {
2482 0 : min_size =
2483 0 : VPXMIN(cpi->sf.rd_auto_partition_min_limit, VPXMIN(min_size, max_size));
2484 : }
2485 :
2486 : // When use_square_partition_only is true, make sure at least one square
2487 : // partition is allowed by selecting the next smaller square size as
2488 : // *min_block_size.
2489 0 : if (cpi->sf.use_square_partition_only &&
2490 0 : next_square_size[max_size] < min_size) {
2491 0 : min_size = next_square_size[max_size];
2492 : }
2493 :
2494 0 : *min_block_size = min_size;
2495 0 : *max_block_size = max_size;
2496 0 : }
2497 :
2498 : // TODO(jingning) refactor functions setting partition search range
2499 0 : static void set_partition_range(VP9_COMMON *cm, MACROBLOCKD *xd, int mi_row,
2500 : int mi_col, BLOCK_SIZE bsize,
2501 : BLOCK_SIZE *min_bs, BLOCK_SIZE *max_bs) {
2502 0 : int mi_width = num_8x8_blocks_wide_lookup[bsize];
2503 0 : int mi_height = num_8x8_blocks_high_lookup[bsize];
2504 : int idx, idy;
2505 :
2506 : MODE_INFO *mi;
2507 0 : const int idx_str = cm->mi_stride * mi_row + mi_col;
2508 0 : MODE_INFO **prev_mi = &cm->prev_mi_grid_visible[idx_str];
2509 : BLOCK_SIZE bs, min_size, max_size;
2510 :
2511 0 : min_size = BLOCK_64X64;
2512 0 : max_size = BLOCK_4X4;
2513 :
2514 0 : if (prev_mi) {
2515 0 : for (idy = 0; idy < mi_height; ++idy) {
2516 0 : for (idx = 0; idx < mi_width; ++idx) {
2517 0 : mi = prev_mi[idy * cm->mi_stride + idx];
2518 0 : bs = mi ? mi->sb_type : bsize;
2519 0 : min_size = VPXMIN(min_size, bs);
2520 0 : max_size = VPXMAX(max_size, bs);
2521 : }
2522 : }
2523 : }
2524 :
2525 0 : if (xd->left_mi) {
2526 0 : for (idy = 0; idy < mi_height; ++idy) {
2527 0 : mi = xd->mi[idy * cm->mi_stride - 1];
2528 0 : bs = mi ? mi->sb_type : bsize;
2529 0 : min_size = VPXMIN(min_size, bs);
2530 0 : max_size = VPXMAX(max_size, bs);
2531 : }
2532 : }
2533 :
2534 0 : if (xd->above_mi) {
2535 0 : for (idx = 0; idx < mi_width; ++idx) {
2536 0 : mi = xd->mi[idx - cm->mi_stride];
2537 0 : bs = mi ? mi->sb_type : bsize;
2538 0 : min_size = VPXMIN(min_size, bs);
2539 0 : max_size = VPXMAX(max_size, bs);
2540 : }
2541 : }
2542 :
2543 0 : if (min_size == max_size) {
2544 0 : min_size = min_partition_size[min_size];
2545 0 : max_size = max_partition_size[max_size];
2546 : }
2547 :
2548 0 : *min_bs = min_size;
2549 0 : *max_bs = max_size;
2550 0 : }
2551 :
2552 0 : static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
2553 0 : memcpy(ctx->pred_mv, x->pred_mv, sizeof(x->pred_mv));
2554 0 : }
2555 :
2556 0 : static INLINE void load_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
2557 0 : memcpy(x->pred_mv, ctx->pred_mv, sizeof(x->pred_mv));
2558 0 : }
2559 :
2560 : #if CONFIG_FP_MB_STATS
2561 : const int num_16x16_blocks_wide_lookup[BLOCK_SIZES] = { 1, 1, 1, 1, 1, 1, 1,
2562 : 1, 2, 2, 2, 4, 4 };
2563 : const int num_16x16_blocks_high_lookup[BLOCK_SIZES] = { 1, 1, 1, 1, 1, 1, 1,
2564 : 2, 1, 2, 4, 2, 4 };
2565 : const int qindex_skip_threshold_lookup[BLOCK_SIZES] = {
2566 : 0, 10, 10, 30, 40, 40, 60, 80, 80, 90, 100, 100, 120
2567 : };
2568 : const int qindex_split_threshold_lookup[BLOCK_SIZES] = {
2569 : 0, 3, 3, 7, 15, 15, 30, 40, 40, 60, 80, 80, 120
2570 : };
2571 : const int complexity_16x16_blocks_threshold[BLOCK_SIZES] = {
2572 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 6
2573 : };
2574 :
2575 : typedef enum {
2576 : MV_ZERO = 0,
2577 : MV_LEFT = 1,
2578 : MV_UP = 2,
2579 : MV_RIGHT = 3,
2580 : MV_DOWN = 4,
2581 : MV_INVALID
2582 : } MOTION_DIRECTION;
2583 :
2584 : static INLINE MOTION_DIRECTION get_motion_direction_fp(uint8_t fp_byte) {
2585 : if (fp_byte & FPMB_MOTION_ZERO_MASK) {
2586 : return MV_ZERO;
2587 : } else if (fp_byte & FPMB_MOTION_LEFT_MASK) {
2588 : return MV_LEFT;
2589 : } else if (fp_byte & FPMB_MOTION_RIGHT_MASK) {
2590 : return MV_RIGHT;
2591 : } else if (fp_byte & FPMB_MOTION_UP_MASK) {
2592 : return MV_UP;
2593 : } else {
2594 : return MV_DOWN;
2595 : }
2596 : }
2597 :
2598 : static INLINE int get_motion_inconsistency(MOTION_DIRECTION this_mv,
2599 : MOTION_DIRECTION that_mv) {
2600 : if (this_mv == that_mv) {
2601 : return 0;
2602 : } else {
2603 : return abs(this_mv - that_mv) == 2 ? 2 : 1;
2604 : }
2605 : }
2606 : #endif
2607 :
2608 : // TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
2609 : // unlikely to be selected depending on previous rate-distortion optimization
2610 : // results, for encoding speed-up.
2611 0 : static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
2612 : TileDataEnc *tile_data, TOKENEXTRA **tp,
2613 : int mi_row, int mi_col, BLOCK_SIZE bsize,
2614 : RD_COST *rd_cost, int64_t best_rd,
2615 : PC_TREE *pc_tree) {
2616 0 : VP9_COMMON *const cm = &cpi->common;
2617 0 : TileInfo *const tile_info = &tile_data->tile_info;
2618 0 : MACROBLOCK *const x = &td->mb;
2619 0 : MACROBLOCKD *const xd = &x->e_mbd;
2620 0 : const int mi_step = num_8x8_blocks_wide_lookup[bsize] / 2;
2621 : ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
2622 : PARTITION_CONTEXT sl[8], sa[8];
2623 0 : TOKENEXTRA *tp_orig = *tp;
2624 0 : PICK_MODE_CONTEXT *ctx = &pc_tree->none;
2625 : int i;
2626 0 : const int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
2627 : BLOCK_SIZE subsize;
2628 : RD_COST this_rdc, sum_rdc, best_rdc;
2629 0 : int do_split = bsize >= BLOCK_8X8;
2630 0 : int do_rect = 1;
2631 : INTERP_FILTER pred_interp_filter;
2632 :
2633 : // Override skipping rectangular partition operations for edge blocks
2634 0 : const int force_horz_split = (mi_row + mi_step >= cm->mi_rows);
2635 0 : const int force_vert_split = (mi_col + mi_step >= cm->mi_cols);
2636 0 : const int xss = x->e_mbd.plane[1].subsampling_x;
2637 0 : const int yss = x->e_mbd.plane[1].subsampling_y;
2638 :
2639 0 : BLOCK_SIZE min_size = x->min_partition_size;
2640 0 : BLOCK_SIZE max_size = x->max_partition_size;
2641 :
2642 : #if CONFIG_FP_MB_STATS
2643 : unsigned int src_diff_var = UINT_MAX;
2644 : int none_complexity = 0;
2645 : #endif
2646 :
2647 0 : int partition_none_allowed = !force_horz_split && !force_vert_split;
2648 0 : int partition_horz_allowed =
2649 0 : !force_vert_split && yss <= xss && bsize >= BLOCK_8X8;
2650 0 : int partition_vert_allowed =
2651 0 : !force_horz_split && xss <= yss && bsize >= BLOCK_8X8;
2652 :
2653 0 : int64_t dist_breakout_thr = cpi->sf.partition_search_breakout_dist_thr;
2654 0 : int rate_breakout_thr = cpi->sf.partition_search_breakout_rate_thr;
2655 :
2656 : (void)*tp_orig;
2657 :
2658 0 : assert(num_8x8_blocks_wide_lookup[bsize] ==
2659 : num_8x8_blocks_high_lookup[bsize]);
2660 :
2661 : // Adjust dist breakout threshold according to the partition size.
2662 0 : dist_breakout_thr >>=
2663 0 : 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
2664 0 : rate_breakout_thr *= num_pels_log2_lookup[bsize];
2665 :
2666 0 : vp9_rd_cost_init(&this_rdc);
2667 0 : vp9_rd_cost_init(&sum_rdc);
2668 0 : vp9_rd_cost_reset(&best_rdc);
2669 0 : best_rdc.rdcost = best_rd;
2670 :
2671 0 : set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
2672 :
2673 0 : if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode != NO_AQ &&
2674 0 : cpi->oxcf.aq_mode != LOOKAHEAD_AQ)
2675 0 : x->mb_energy = vp9_block_energy(cpi, x, bsize);
2676 :
2677 0 : if (cpi->sf.cb_partition_search && bsize == BLOCK_16X16) {
2678 0 : int cb_partition_search_ctrl =
2679 0 : ((pc_tree->index == 0 || pc_tree->index == 3) +
2680 0 : get_chessboard_index(cm->current_video_frame)) &
2681 : 0x1;
2682 :
2683 0 : if (cb_partition_search_ctrl && bsize > min_size && bsize < max_size)
2684 0 : set_partition_range(cm, xd, mi_row, mi_col, bsize, &min_size, &max_size);
2685 : }
2686 :
2687 : // Determine partition types in search according to the speed features.
2688 : // The threshold set here has to be of square block size.
2689 0 : if (cpi->sf.auto_min_max_partition_size) {
2690 0 : partition_none_allowed &= (bsize <= max_size && bsize >= min_size);
2691 0 : partition_horz_allowed &=
2692 0 : ((bsize <= max_size && bsize > min_size) || force_horz_split);
2693 0 : partition_vert_allowed &=
2694 0 : ((bsize <= max_size && bsize > min_size) || force_vert_split);
2695 0 : do_split &= bsize > min_size;
2696 : }
2697 :
2698 0 : if (cpi->sf.use_square_partition_only &&
2699 0 : bsize > cpi->sf.use_square_only_threshold) {
2700 0 : if (cpi->use_svc) {
2701 0 : if (!vp9_active_h_edge(cpi, mi_row, mi_step) || x->e_mbd.lossless)
2702 0 : partition_horz_allowed &= force_horz_split;
2703 0 : if (!vp9_active_v_edge(cpi, mi_row, mi_step) || x->e_mbd.lossless)
2704 0 : partition_vert_allowed &= force_vert_split;
2705 : } else {
2706 0 : partition_horz_allowed &= force_horz_split;
2707 0 : partition_vert_allowed &= force_vert_split;
2708 : }
2709 : }
2710 :
2711 0 : save_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
2712 :
2713 : #if CONFIG_FP_MB_STATS
2714 : if (cpi->use_fp_mb_stats) {
2715 : set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
2716 : src_diff_var = get_sby_perpixel_diff_variance(cpi, &x->plane[0].src, mi_row,
2717 : mi_col, bsize);
2718 : }
2719 : #endif
2720 :
2721 : #if CONFIG_FP_MB_STATS
2722 : // Decide whether we shall split directly and skip searching NONE by using
2723 : // the first pass block statistics
2724 : if (cpi->use_fp_mb_stats && bsize >= BLOCK_32X32 && do_split &&
2725 : partition_none_allowed && src_diff_var > 4 &&
2726 : cm->base_qindex < qindex_split_threshold_lookup[bsize]) {
2727 : int mb_row = mi_row >> 1;
2728 : int mb_col = mi_col >> 1;
2729 : int mb_row_end =
2730 : VPXMIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows);
2731 : int mb_col_end =
2732 : VPXMIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols);
2733 : int r, c;
2734 :
2735 : // compute a complexity measure, basically measure inconsistency of motion
2736 : // vectors obtained from the first pass in the current block
2737 : for (r = mb_row; r < mb_row_end; r++) {
2738 : for (c = mb_col; c < mb_col_end; c++) {
2739 : const int mb_index = r * cm->mb_cols + c;
2740 :
2741 : MOTION_DIRECTION this_mv;
2742 : MOTION_DIRECTION right_mv;
2743 : MOTION_DIRECTION bottom_mv;
2744 :
2745 : this_mv =
2746 : get_motion_direction_fp(cpi->twopass.this_frame_mb_stats[mb_index]);
2747 :
2748 : // to its right
2749 : if (c != mb_col_end - 1) {
2750 : right_mv = get_motion_direction_fp(
2751 : cpi->twopass.this_frame_mb_stats[mb_index + 1]);
2752 : none_complexity += get_motion_inconsistency(this_mv, right_mv);
2753 : }
2754 :
2755 : // to its bottom
2756 : if (r != mb_row_end - 1) {
2757 : bottom_mv = get_motion_direction_fp(
2758 : cpi->twopass.this_frame_mb_stats[mb_index + cm->mb_cols]);
2759 : none_complexity += get_motion_inconsistency(this_mv, bottom_mv);
2760 : }
2761 :
2762 : // do not count its left and top neighbors to avoid double counting
2763 : }
2764 : }
2765 :
2766 : if (none_complexity > complexity_16x16_blocks_threshold[bsize]) {
2767 : partition_none_allowed = 0;
2768 : }
2769 : }
2770 : #endif
2771 :
2772 : // PARTITION_NONE
2773 0 : if (partition_none_allowed) {
2774 0 : rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, bsize, ctx,
2775 : best_rdc.rdcost);
2776 0 : if (this_rdc.rate != INT_MAX) {
2777 0 : if (bsize >= BLOCK_8X8) {
2778 0 : this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
2779 0 : this_rdc.rdcost =
2780 0 : RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist);
2781 : }
2782 :
2783 0 : if (this_rdc.rdcost < best_rdc.rdcost) {
2784 0 : best_rdc = this_rdc;
2785 0 : if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
2786 :
2787 : // If all y, u, v transform blocks in this partition are skippable, and
2788 : // the dist & rate are within the thresholds, the partition search is
2789 : // terminated for current branch of the partition search tree.
2790 0 : if (!x->e_mbd.lossless && ctx->skippable &&
2791 0 : ((best_rdc.dist < (dist_breakout_thr >> 2)) ||
2792 0 : (best_rdc.dist < dist_breakout_thr &&
2793 0 : best_rdc.rate < rate_breakout_thr))) {
2794 0 : do_split = 0;
2795 0 : do_rect = 0;
2796 : }
2797 :
2798 : #if CONFIG_FP_MB_STATS
2799 : // Check if every 16x16 first pass block statistics has zero
2800 : // motion and the corresponding first pass residue is small enough.
2801 : // If that is the case, check the difference variance between the
2802 : // current frame and the last frame. If the variance is small enough,
2803 : // stop further splitting in RD optimization
2804 : if (cpi->use_fp_mb_stats && do_split != 0 &&
2805 : cm->base_qindex > qindex_skip_threshold_lookup[bsize]) {
2806 : int mb_row = mi_row >> 1;
2807 : int mb_col = mi_col >> 1;
2808 : int mb_row_end =
2809 : VPXMIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows);
2810 : int mb_col_end =
2811 : VPXMIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols);
2812 : int r, c;
2813 :
2814 : int skip = 1;
2815 : for (r = mb_row; r < mb_row_end; r++) {
2816 : for (c = mb_col; c < mb_col_end; c++) {
2817 : const int mb_index = r * cm->mb_cols + c;
2818 : if (!(cpi->twopass.this_frame_mb_stats[mb_index] &
2819 : FPMB_MOTION_ZERO_MASK) ||
2820 : !(cpi->twopass.this_frame_mb_stats[mb_index] &
2821 : FPMB_ERROR_SMALL_MASK)) {
2822 : skip = 0;
2823 : break;
2824 : }
2825 : }
2826 : if (skip == 0) {
2827 : break;
2828 : }
2829 : }
2830 :
2831 : if (skip) {
2832 : if (src_diff_var == UINT_MAX) {
2833 : set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
2834 : src_diff_var = get_sby_perpixel_diff_variance(
2835 : cpi, &x->plane[0].src, mi_row, mi_col, bsize);
2836 : }
2837 : if (src_diff_var < 8) {
2838 : do_split = 0;
2839 : do_rect = 0;
2840 : }
2841 : }
2842 : }
2843 : #endif
2844 : }
2845 : }
2846 0 : restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
2847 : }
2848 :
2849 : // store estimated motion vector
2850 0 : if (cpi->sf.adaptive_motion_search) store_pred_mv(x, ctx);
2851 :
2852 : // If the interp_filter is marked as SWITCHABLE_FILTERS, it was for an
2853 : // intra block and used for context purposes.
2854 0 : if (ctx->mic.interp_filter == SWITCHABLE_FILTERS) {
2855 0 : pred_interp_filter = EIGHTTAP;
2856 : } else {
2857 0 : pred_interp_filter = ctx->mic.interp_filter;
2858 : }
2859 :
2860 : // PARTITION_SPLIT
2861 : // TODO(jingning): use the motion vectors given by the above search as
2862 : // the starting point of motion search in the following partition type check.
2863 0 : if (do_split) {
2864 0 : subsize = get_subsize(bsize, PARTITION_SPLIT);
2865 0 : if (bsize == BLOCK_8X8) {
2866 0 : i = 4;
2867 0 : if (cpi->sf.adaptive_pred_interp_filter && partition_none_allowed)
2868 0 : pc_tree->leaf_split[0]->pred_interp_filter = pred_interp_filter;
2869 0 : rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
2870 : pc_tree->leaf_split[0], best_rdc.rdcost);
2871 :
2872 0 : if (sum_rdc.rate == INT_MAX) sum_rdc.rdcost = INT64_MAX;
2873 : } else {
2874 0 : for (i = 0; i < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++i) {
2875 0 : const int x_idx = (i & 1) * mi_step;
2876 0 : const int y_idx = (i >> 1) * mi_step;
2877 :
2878 0 : if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
2879 0 : continue;
2880 :
2881 0 : if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
2882 :
2883 0 : pc_tree->split[i]->index = i;
2884 0 : rd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx,
2885 : mi_col + x_idx, subsize, &this_rdc,
2886 0 : best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[i]);
2887 :
2888 0 : if (this_rdc.rate == INT_MAX) {
2889 0 : sum_rdc.rdcost = INT64_MAX;
2890 0 : break;
2891 : } else {
2892 0 : sum_rdc.rate += this_rdc.rate;
2893 0 : sum_rdc.dist += this_rdc.dist;
2894 0 : sum_rdc.rdcost += this_rdc.rdcost;
2895 : }
2896 : }
2897 : }
2898 :
2899 0 : if (sum_rdc.rdcost < best_rdc.rdcost && i == 4) {
2900 0 : sum_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT];
2901 0 : sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
2902 :
2903 0 : if (sum_rdc.rdcost < best_rdc.rdcost) {
2904 0 : best_rdc = sum_rdc;
2905 0 : pc_tree->partitioning = PARTITION_SPLIT;
2906 :
2907 : // Rate and distortion based partition search termination clause.
2908 0 : if (!x->e_mbd.lossless && ((best_rdc.dist < (dist_breakout_thr >> 2)) ||
2909 0 : (best_rdc.dist < dist_breakout_thr &&
2910 0 : best_rdc.rate < rate_breakout_thr))) {
2911 0 : do_rect = 0;
2912 : }
2913 : }
2914 : } else {
2915 : // skip rectangular partition test when larger block size
2916 : // gives better rd cost
2917 0 : if ((cpi->sf.less_rectangular_check) &&
2918 0 : ((bsize > cpi->sf.use_square_only_threshold) ||
2919 0 : (best_rdc.dist < dist_breakout_thr)))
2920 0 : do_rect &= !partition_none_allowed;
2921 : }
2922 0 : restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
2923 : }
2924 :
2925 : // PARTITION_HORZ
2926 0 : if (partition_horz_allowed &&
2927 0 : (do_rect || vp9_active_h_edge(cpi, mi_row, mi_step))) {
2928 0 : subsize = get_subsize(bsize, PARTITION_HORZ);
2929 0 : if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
2930 0 : if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
2931 : partition_none_allowed)
2932 0 : pc_tree->horizontal[0].pred_interp_filter = pred_interp_filter;
2933 0 : rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
2934 : &pc_tree->horizontal[0], best_rdc.rdcost);
2935 :
2936 0 : if (sum_rdc.rdcost < best_rdc.rdcost && mi_row + mi_step < cm->mi_rows &&
2937 : bsize > BLOCK_8X8) {
2938 0 : PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0];
2939 0 : update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0);
2940 0 : encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx);
2941 :
2942 0 : if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
2943 0 : if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
2944 : partition_none_allowed)
2945 0 : pc_tree->horizontal[1].pred_interp_filter = pred_interp_filter;
2946 0 : rd_pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col, &this_rdc,
2947 : subsize, &pc_tree->horizontal[1],
2948 0 : best_rdc.rdcost - sum_rdc.rdcost);
2949 0 : if (this_rdc.rate == INT_MAX) {
2950 0 : sum_rdc.rdcost = INT64_MAX;
2951 : } else {
2952 0 : sum_rdc.rate += this_rdc.rate;
2953 0 : sum_rdc.dist += this_rdc.dist;
2954 0 : sum_rdc.rdcost += this_rdc.rdcost;
2955 : }
2956 : }
2957 :
2958 0 : if (sum_rdc.rdcost < best_rdc.rdcost) {
2959 0 : sum_rdc.rate += cpi->partition_cost[pl][PARTITION_HORZ];
2960 0 : sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
2961 0 : if (sum_rdc.rdcost < best_rdc.rdcost) {
2962 0 : best_rdc = sum_rdc;
2963 0 : pc_tree->partitioning = PARTITION_HORZ;
2964 :
2965 0 : if ((cpi->sf.less_rectangular_check) &&
2966 0 : (bsize > cpi->sf.use_square_only_threshold))
2967 0 : do_rect = 0;
2968 : }
2969 : }
2970 0 : restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
2971 : }
2972 :
2973 : // PARTITION_VERT
2974 0 : if (partition_vert_allowed &&
2975 0 : (do_rect || vp9_active_v_edge(cpi, mi_col, mi_step))) {
2976 0 : subsize = get_subsize(bsize, PARTITION_VERT);
2977 :
2978 0 : if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
2979 0 : if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
2980 : partition_none_allowed)
2981 0 : pc_tree->vertical[0].pred_interp_filter = pred_interp_filter;
2982 0 : rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
2983 : &pc_tree->vertical[0], best_rdc.rdcost);
2984 0 : if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + mi_step < cm->mi_cols &&
2985 : bsize > BLOCK_8X8) {
2986 0 : update_state(cpi, td, &pc_tree->vertical[0], mi_row, mi_col, subsize, 0);
2987 0 : encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize,
2988 : &pc_tree->vertical[0]);
2989 :
2990 0 : if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
2991 0 : if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
2992 : partition_none_allowed)
2993 0 : pc_tree->vertical[1].pred_interp_filter = pred_interp_filter;
2994 0 : rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step, &this_rdc,
2995 : subsize, &pc_tree->vertical[1],
2996 0 : best_rdc.rdcost - sum_rdc.rdcost);
2997 0 : if (this_rdc.rate == INT_MAX) {
2998 0 : sum_rdc.rdcost = INT64_MAX;
2999 : } else {
3000 0 : sum_rdc.rate += this_rdc.rate;
3001 0 : sum_rdc.dist += this_rdc.dist;
3002 0 : sum_rdc.rdcost += this_rdc.rdcost;
3003 : }
3004 : }
3005 :
3006 0 : if (sum_rdc.rdcost < best_rdc.rdcost) {
3007 0 : sum_rdc.rate += cpi->partition_cost[pl][PARTITION_VERT];
3008 0 : sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
3009 0 : if (sum_rdc.rdcost < best_rdc.rdcost) {
3010 0 : best_rdc = sum_rdc;
3011 0 : pc_tree->partitioning = PARTITION_VERT;
3012 : }
3013 : }
3014 0 : restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
3015 : }
3016 :
3017 : // TODO(jbb): This code added so that we avoid static analysis
3018 : // warning related to the fact that best_rd isn't used after this
3019 : // point. This code should be refactored so that the duplicate
3020 : // checks occur in some sub function and thus are used...
3021 : (void)best_rd;
3022 0 : *rd_cost = best_rdc;
3023 :
3024 0 : if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX &&
3025 0 : pc_tree->index != 3) {
3026 0 : int output_enabled = (bsize == BLOCK_64X64);
3027 0 : encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize,
3028 : pc_tree);
3029 : }
3030 :
3031 0 : if (bsize == BLOCK_64X64) {
3032 0 : assert(tp_orig < *tp);
3033 0 : assert(best_rdc.rate < INT_MAX);
3034 0 : assert(best_rdc.dist < INT64_MAX);
3035 : } else {
3036 0 : assert(tp_orig == *tp);
3037 : }
3038 0 : }
3039 :
3040 0 : static void encode_rd_sb_row(VP9_COMP *cpi, ThreadData *td,
3041 : TileDataEnc *tile_data, int mi_row,
3042 : TOKENEXTRA **tp) {
3043 0 : VP9_COMMON *const cm = &cpi->common;
3044 0 : TileInfo *const tile_info = &tile_data->tile_info;
3045 0 : MACROBLOCK *const x = &td->mb;
3046 0 : MACROBLOCKD *const xd = &x->e_mbd;
3047 0 : SPEED_FEATURES *const sf = &cpi->sf;
3048 0 : const int mi_col_start = tile_info->mi_col_start;
3049 0 : const int mi_col_end = tile_info->mi_col_end;
3050 : int mi_col;
3051 :
3052 : // Initialize the left context for the new SB row
3053 0 : memset(&xd->left_context, 0, sizeof(xd->left_context));
3054 0 : memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context));
3055 :
3056 : // Code each SB in the row
3057 0 : for (mi_col = mi_col_start; mi_col < mi_col_end; mi_col += MI_BLOCK_SIZE) {
3058 0 : const struct segmentation *const seg = &cm->seg;
3059 : int dummy_rate;
3060 : int64_t dummy_dist;
3061 : RD_COST dummy_rdc;
3062 : int i;
3063 0 : int seg_skip = 0;
3064 :
3065 0 : const int idx_str = cm->mi_stride * mi_row + mi_col;
3066 0 : MODE_INFO **mi = cm->mi_grid_visible + idx_str;
3067 :
3068 0 : if (sf->adaptive_pred_interp_filter) {
3069 0 : for (i = 0; i < 64; ++i) td->leaf_tree[i].pred_interp_filter = SWITCHABLE;
3070 :
3071 0 : for (i = 0; i < 64; ++i) {
3072 0 : td->pc_tree[i].vertical[0].pred_interp_filter = SWITCHABLE;
3073 0 : td->pc_tree[i].vertical[1].pred_interp_filter = SWITCHABLE;
3074 0 : td->pc_tree[i].horizontal[0].pred_interp_filter = SWITCHABLE;
3075 0 : td->pc_tree[i].horizontal[1].pred_interp_filter = SWITCHABLE;
3076 : }
3077 : }
3078 :
3079 0 : vp9_zero(x->pred_mv);
3080 0 : td->pc_root->index = 0;
3081 :
3082 0 : if (seg->enabled) {
3083 0 : const uint8_t *const map =
3084 0 : seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
3085 0 : int segment_id = get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col);
3086 0 : seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
3087 : }
3088 :
3089 0 : x->source_variance = UINT_MAX;
3090 0 : if (sf->partition_search_type == FIXED_PARTITION || seg_skip) {
3091 0 : const BLOCK_SIZE bsize =
3092 : seg_skip ? BLOCK_64X64 : sf->always_this_block_size;
3093 0 : set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
3094 0 : set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
3095 0 : rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64,
3096 : &dummy_rate, &dummy_dist, 1, td->pc_root);
3097 0 : } else if (cpi->partition_search_skippable_frame) {
3098 : BLOCK_SIZE bsize;
3099 0 : set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
3100 0 : bsize = get_rd_var_based_fixed_partition(cpi, x, mi_row, mi_col);
3101 0 : set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
3102 0 : rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64,
3103 : &dummy_rate, &dummy_dist, 1, td->pc_root);
3104 0 : } else if (sf->partition_search_type == VAR_BASED_PARTITION &&
3105 0 : cm->frame_type != KEY_FRAME) {
3106 0 : choose_partitioning(cpi, tile_info, x, mi_row, mi_col);
3107 0 : rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64,
3108 : &dummy_rate, &dummy_dist, 1, td->pc_root);
3109 : } else {
3110 : // If required set upper and lower partition size limits
3111 0 : if (sf->auto_min_max_partition_size) {
3112 0 : set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
3113 0 : rd_auto_partition_range(cpi, tile_info, xd, mi_row, mi_col,
3114 : &x->min_partition_size, &x->max_partition_size);
3115 : }
3116 0 : rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, BLOCK_64X64,
3117 : &dummy_rdc, INT64_MAX, td->pc_root);
3118 : }
3119 : }
3120 0 : }
3121 :
3122 0 : static void init_encode_frame_mb_context(VP9_COMP *cpi) {
3123 0 : MACROBLOCK *const x = &cpi->td.mb;
3124 0 : VP9_COMMON *const cm = &cpi->common;
3125 0 : MACROBLOCKD *const xd = &x->e_mbd;
3126 0 : const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
3127 :
3128 : // Copy data over into macro block data structures.
3129 0 : vp9_setup_src_planes(x, cpi->Source, 0, 0);
3130 :
3131 0 : vp9_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y);
3132 :
3133 : // Note: this memset assumes above_context[0], [1] and [2]
3134 : // are allocated as part of the same buffer.
3135 0 : memset(xd->above_context[0], 0,
3136 : sizeof(*xd->above_context[0]) * 2 * aligned_mi_cols * MAX_MB_PLANE);
3137 0 : memset(xd->above_seg_context, 0,
3138 : sizeof(*xd->above_seg_context) * aligned_mi_cols);
3139 0 : }
3140 :
3141 0 : static int check_dual_ref_flags(VP9_COMP *cpi) {
3142 0 : const int ref_flags = cpi->ref_frame_flags;
3143 :
3144 0 : if (segfeature_active(&cpi->common.seg, 1, SEG_LVL_REF_FRAME)) {
3145 0 : return 0;
3146 : } else {
3147 0 : return (!!(ref_flags & VP9_GOLD_FLAG) + !!(ref_flags & VP9_LAST_FLAG) +
3148 0 : !!(ref_flags & VP9_ALT_FLAG)) >= 2;
3149 : }
3150 : }
3151 :
3152 0 : static void reset_skip_tx_size(VP9_COMMON *cm, TX_SIZE max_tx_size) {
3153 : int mi_row, mi_col;
3154 0 : const int mis = cm->mi_stride;
3155 0 : MODE_INFO **mi_ptr = cm->mi_grid_visible;
3156 :
3157 0 : for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row, mi_ptr += mis) {
3158 0 : for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) {
3159 0 : if (mi_ptr[mi_col]->tx_size > max_tx_size)
3160 0 : mi_ptr[mi_col]->tx_size = max_tx_size;
3161 : }
3162 : }
3163 0 : }
3164 :
3165 0 : static MV_REFERENCE_FRAME get_frame_type(const VP9_COMP *cpi) {
3166 0 : if (frame_is_intra_only(&cpi->common))
3167 0 : return INTRA_FRAME;
3168 0 : else if (cpi->rc.is_src_frame_alt_ref && cpi->refresh_golden_frame)
3169 0 : return ALTREF_FRAME;
3170 0 : else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)
3171 0 : return GOLDEN_FRAME;
3172 : else
3173 0 : return LAST_FRAME;
3174 : }
3175 :
3176 0 : static TX_MODE select_tx_mode(const VP9_COMP *cpi, MACROBLOCKD *const xd) {
3177 0 : if (xd->lossless) return ONLY_4X4;
3178 0 : if (cpi->common.frame_type == KEY_FRAME && cpi->sf.use_nonrd_pick_mode)
3179 0 : return ALLOW_16X16;
3180 0 : if (cpi->sf.tx_size_search_method == USE_LARGESTALL)
3181 0 : return ALLOW_32X32;
3182 0 : else if (cpi->sf.tx_size_search_method == USE_FULL_RD ||
3183 0 : cpi->sf.tx_size_search_method == USE_TX_8X8)
3184 0 : return TX_MODE_SELECT;
3185 : else
3186 0 : return cpi->common.tx_mode;
3187 : }
3188 :
3189 0 : static void hybrid_intra_mode_search(VP9_COMP *cpi, MACROBLOCK *const x,
3190 : RD_COST *rd_cost, BLOCK_SIZE bsize,
3191 : PICK_MODE_CONTEXT *ctx) {
3192 0 : if (bsize < BLOCK_16X16)
3193 0 : vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, INT64_MAX);
3194 : else
3195 0 : vp9_pick_intra_mode(cpi, x, rd_cost, bsize, ctx);
3196 0 : }
3197 :
3198 0 : static void nonrd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data,
3199 : MACROBLOCK *const x, int mi_row, int mi_col,
3200 : RD_COST *rd_cost, BLOCK_SIZE bsize,
3201 : PICK_MODE_CONTEXT *ctx) {
3202 0 : VP9_COMMON *const cm = &cpi->common;
3203 0 : TileInfo *const tile_info = &tile_data->tile_info;
3204 0 : MACROBLOCKD *const xd = &x->e_mbd;
3205 : MODE_INFO *mi;
3206 : ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
3207 0 : BLOCK_SIZE bs = VPXMAX(bsize, BLOCK_8X8); // processing unit block size
3208 0 : const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bs];
3209 0 : const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bs];
3210 : int plane;
3211 :
3212 0 : set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
3213 0 : mi = xd->mi[0];
3214 0 : mi->sb_type = bsize;
3215 :
3216 0 : for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
3217 0 : struct macroblockd_plane *pd = &xd->plane[plane];
3218 0 : memcpy(a + num_4x4_blocks_wide * plane, pd->above_context,
3219 0 : (sizeof(a[0]) * num_4x4_blocks_wide) >> pd->subsampling_x);
3220 0 : memcpy(l + num_4x4_blocks_high * plane, pd->left_context,
3221 0 : (sizeof(l[0]) * num_4x4_blocks_high) >> pd->subsampling_y);
3222 : }
3223 :
3224 0 : if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled)
3225 0 : if (cyclic_refresh_segment_id_boosted(mi->segment_id))
3226 0 : x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
3227 :
3228 0 : if (cm->frame_type == KEY_FRAME)
3229 0 : hybrid_intra_mode_search(cpi, x, rd_cost, bsize, ctx);
3230 0 : else if (segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP))
3231 0 : set_mode_info_seg_skip(x, cm->tx_mode, rd_cost, bsize);
3232 0 : else if (bsize >= BLOCK_8X8)
3233 0 : vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col, rd_cost, bsize, ctx);
3234 : else
3235 0 : vp9_pick_inter_mode_sub8x8(cpi, x, mi_row, mi_col, rd_cost, bsize, ctx);
3236 :
3237 0 : duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
3238 :
3239 0 : for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
3240 0 : struct macroblockd_plane *pd = &xd->plane[plane];
3241 0 : memcpy(pd->above_context, a + num_4x4_blocks_wide * plane,
3242 0 : (sizeof(a[0]) * num_4x4_blocks_wide) >> pd->subsampling_x);
3243 0 : memcpy(pd->left_context, l + num_4x4_blocks_high * plane,
3244 0 : (sizeof(l[0]) * num_4x4_blocks_high) >> pd->subsampling_y);
3245 : }
3246 :
3247 0 : if (rd_cost->rate == INT_MAX) vp9_rd_cost_reset(rd_cost);
3248 :
3249 0 : ctx->rate = rd_cost->rate;
3250 0 : ctx->dist = rd_cost->dist;
3251 0 : }
3252 :
3253 0 : static void fill_mode_info_sb(VP9_COMMON *cm, MACROBLOCK *x, int mi_row,
3254 : int mi_col, BLOCK_SIZE bsize, PC_TREE *pc_tree) {
3255 0 : MACROBLOCKD *xd = &x->e_mbd;
3256 0 : int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
3257 0 : PARTITION_TYPE partition = pc_tree->partitioning;
3258 0 : BLOCK_SIZE subsize = get_subsize(bsize, partition);
3259 :
3260 0 : assert(bsize >= BLOCK_8X8);
3261 :
3262 0 : if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
3263 :
3264 0 : switch (partition) {
3265 : case PARTITION_NONE:
3266 0 : set_mode_info_offsets(cm, x, xd, mi_row, mi_col);
3267 0 : *(xd->mi[0]) = pc_tree->none.mic;
3268 0 : *(x->mbmi_ext) = pc_tree->none.mbmi_ext;
3269 0 : duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
3270 0 : break;
3271 : case PARTITION_VERT:
3272 0 : set_mode_info_offsets(cm, x, xd, mi_row, mi_col);
3273 0 : *(xd->mi[0]) = pc_tree->vertical[0].mic;
3274 0 : *(x->mbmi_ext) = pc_tree->vertical[0].mbmi_ext;
3275 0 : duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, subsize);
3276 :
3277 0 : if (mi_col + hbs < cm->mi_cols) {
3278 0 : set_mode_info_offsets(cm, x, xd, mi_row, mi_col + hbs);
3279 0 : *(xd->mi[0]) = pc_tree->vertical[1].mic;
3280 0 : *(x->mbmi_ext) = pc_tree->vertical[1].mbmi_ext;
3281 0 : duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col + hbs, subsize);
3282 : }
3283 0 : break;
3284 : case PARTITION_HORZ:
3285 0 : set_mode_info_offsets(cm, x, xd, mi_row, mi_col);
3286 0 : *(xd->mi[0]) = pc_tree->horizontal[0].mic;
3287 0 : *(x->mbmi_ext) = pc_tree->horizontal[0].mbmi_ext;
3288 0 : duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, subsize);
3289 0 : if (mi_row + hbs < cm->mi_rows) {
3290 0 : set_mode_info_offsets(cm, x, xd, mi_row + hbs, mi_col);
3291 0 : *(xd->mi[0]) = pc_tree->horizontal[1].mic;
3292 0 : *(x->mbmi_ext) = pc_tree->horizontal[1].mbmi_ext;
3293 0 : duplicate_mode_info_in_sb(cm, xd, mi_row + hbs, mi_col, subsize);
3294 : }
3295 0 : break;
3296 : case PARTITION_SPLIT: {
3297 0 : fill_mode_info_sb(cm, x, mi_row, mi_col, subsize, pc_tree->split[0]);
3298 0 : fill_mode_info_sb(cm, x, mi_row, mi_col + hbs, subsize,
3299 0 : pc_tree->split[1]);
3300 0 : fill_mode_info_sb(cm, x, mi_row + hbs, mi_col, subsize,
3301 0 : pc_tree->split[2]);
3302 0 : fill_mode_info_sb(cm, x, mi_row + hbs, mi_col + hbs, subsize,
3303 0 : pc_tree->split[3]);
3304 0 : break;
3305 : }
3306 0 : default: break;
3307 : }
3308 : }
3309 :
3310 : // Reset the prediction pixel ready flag recursively.
3311 0 : static void pred_pixel_ready_reset(PC_TREE *pc_tree, BLOCK_SIZE bsize) {
3312 0 : pc_tree->none.pred_pixel_ready = 0;
3313 0 : pc_tree->horizontal[0].pred_pixel_ready = 0;
3314 0 : pc_tree->horizontal[1].pred_pixel_ready = 0;
3315 0 : pc_tree->vertical[0].pred_pixel_ready = 0;
3316 0 : pc_tree->vertical[1].pred_pixel_ready = 0;
3317 :
3318 0 : if (bsize > BLOCK_8X8) {
3319 0 : BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT);
3320 : int i;
3321 0 : for (i = 0; i < 4; ++i) pred_pixel_ready_reset(pc_tree->split[i], subsize);
3322 : }
3323 0 : }
3324 :
3325 0 : static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td,
3326 : TileDataEnc *tile_data, TOKENEXTRA **tp,
3327 : int mi_row, int mi_col, BLOCK_SIZE bsize,
3328 : RD_COST *rd_cost, int do_recon,
3329 : int64_t best_rd, PC_TREE *pc_tree) {
3330 0 : const SPEED_FEATURES *const sf = &cpi->sf;
3331 0 : VP9_COMMON *const cm = &cpi->common;
3332 0 : TileInfo *const tile_info = &tile_data->tile_info;
3333 0 : MACROBLOCK *const x = &td->mb;
3334 0 : MACROBLOCKD *const xd = &x->e_mbd;
3335 0 : const int ms = num_8x8_blocks_wide_lookup[bsize] / 2;
3336 0 : TOKENEXTRA *tp_orig = *tp;
3337 0 : PICK_MODE_CONTEXT *ctx = &pc_tree->none;
3338 : int i;
3339 0 : BLOCK_SIZE subsize = bsize;
3340 : RD_COST this_rdc, sum_rdc, best_rdc;
3341 0 : int do_split = bsize >= BLOCK_8X8;
3342 0 : int do_rect = 1;
3343 : // Override skipping rectangular partition operations for edge blocks
3344 0 : const int force_horz_split = (mi_row + ms >= cm->mi_rows);
3345 0 : const int force_vert_split = (mi_col + ms >= cm->mi_cols);
3346 0 : const int xss = x->e_mbd.plane[1].subsampling_x;
3347 0 : const int yss = x->e_mbd.plane[1].subsampling_y;
3348 :
3349 0 : int partition_none_allowed = !force_horz_split && !force_vert_split;
3350 0 : int partition_horz_allowed =
3351 0 : !force_vert_split && yss <= xss && bsize >= BLOCK_8X8;
3352 0 : int partition_vert_allowed =
3353 0 : !force_horz_split && xss <= yss && bsize >= BLOCK_8X8;
3354 : (void)*tp_orig;
3355 :
3356 0 : assert(num_8x8_blocks_wide_lookup[bsize] ==
3357 : num_8x8_blocks_high_lookup[bsize]);
3358 :
3359 0 : vp9_rd_cost_init(&sum_rdc);
3360 0 : vp9_rd_cost_reset(&best_rdc);
3361 0 : best_rdc.rdcost = best_rd;
3362 :
3363 : // Determine partition types in search according to the speed features.
3364 : // The threshold set here has to be of square block size.
3365 0 : if (sf->auto_min_max_partition_size) {
3366 0 : partition_none_allowed &=
3367 0 : (bsize <= x->max_partition_size && bsize >= x->min_partition_size);
3368 0 : partition_horz_allowed &=
3369 0 : ((bsize <= x->max_partition_size && bsize > x->min_partition_size) ||
3370 : force_horz_split);
3371 0 : partition_vert_allowed &=
3372 0 : ((bsize <= x->max_partition_size && bsize > x->min_partition_size) ||
3373 : force_vert_split);
3374 0 : do_split &= bsize > x->min_partition_size;
3375 : }
3376 0 : if (sf->use_square_partition_only) {
3377 0 : partition_horz_allowed &= force_horz_split;
3378 0 : partition_vert_allowed &= force_vert_split;
3379 : }
3380 :
3381 0 : ctx->pred_pixel_ready =
3382 0 : !(partition_vert_allowed || partition_horz_allowed || do_split);
3383 :
3384 : // PARTITION_NONE
3385 0 : if (partition_none_allowed) {
3386 0 : nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, bsize,
3387 : ctx);
3388 0 : ctx->mic = *xd->mi[0];
3389 0 : ctx->mbmi_ext = *x->mbmi_ext;
3390 0 : ctx->skip_txfm[0] = x->skip_txfm[0];
3391 0 : ctx->skip = x->skip;
3392 :
3393 0 : if (this_rdc.rate != INT_MAX) {
3394 0 : int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
3395 0 : this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
3396 0 : this_rdc.rdcost =
3397 0 : RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist);
3398 0 : if (this_rdc.rdcost < best_rdc.rdcost) {
3399 0 : int64_t dist_breakout_thr = sf->partition_search_breakout_dist_thr;
3400 0 : int64_t rate_breakout_thr = sf->partition_search_breakout_rate_thr;
3401 :
3402 0 : dist_breakout_thr >>=
3403 0 : 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
3404 :
3405 0 : rate_breakout_thr *= num_pels_log2_lookup[bsize];
3406 :
3407 0 : best_rdc = this_rdc;
3408 0 : if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
3409 :
3410 0 : if (!x->e_mbd.lossless && this_rdc.rate < rate_breakout_thr &&
3411 0 : this_rdc.dist < dist_breakout_thr) {
3412 0 : do_split = 0;
3413 0 : do_rect = 0;
3414 : }
3415 : }
3416 : }
3417 : }
3418 :
3419 : // store estimated motion vector
3420 0 : store_pred_mv(x, ctx);
3421 :
3422 : // PARTITION_SPLIT
3423 0 : if (do_split) {
3424 0 : int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
3425 0 : sum_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT];
3426 0 : sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
3427 0 : subsize = get_subsize(bsize, PARTITION_SPLIT);
3428 0 : for (i = 0; i < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++i) {
3429 0 : const int x_idx = (i & 1) * ms;
3430 0 : const int y_idx = (i >> 1) * ms;
3431 :
3432 0 : if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
3433 0 : continue;
3434 0 : load_pred_mv(x, ctx);
3435 0 : nonrd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx,
3436 : mi_col + x_idx, subsize, &this_rdc, 0,
3437 0 : best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[i]);
3438 :
3439 0 : if (this_rdc.rate == INT_MAX) {
3440 0 : vp9_rd_cost_reset(&sum_rdc);
3441 : } else {
3442 0 : sum_rdc.rate += this_rdc.rate;
3443 0 : sum_rdc.dist += this_rdc.dist;
3444 0 : sum_rdc.rdcost += this_rdc.rdcost;
3445 : }
3446 : }
3447 :
3448 0 : if (sum_rdc.rdcost < best_rdc.rdcost) {
3449 0 : best_rdc = sum_rdc;
3450 0 : pc_tree->partitioning = PARTITION_SPLIT;
3451 : } else {
3452 : // skip rectangular partition test when larger block size
3453 : // gives better rd cost
3454 0 : if (sf->less_rectangular_check) do_rect &= !partition_none_allowed;
3455 : }
3456 : }
3457 :
3458 : // PARTITION_HORZ
3459 0 : if (partition_horz_allowed && do_rect) {
3460 0 : subsize = get_subsize(bsize, PARTITION_HORZ);
3461 0 : if (sf->adaptive_motion_search) load_pred_mv(x, ctx);
3462 0 : pc_tree->horizontal[0].pred_pixel_ready = 1;
3463 0 : nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
3464 : &pc_tree->horizontal[0]);
3465 :
3466 0 : pc_tree->horizontal[0].mic = *xd->mi[0];
3467 0 : pc_tree->horizontal[0].mbmi_ext = *x->mbmi_ext;
3468 0 : pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0];
3469 0 : pc_tree->horizontal[0].skip = x->skip;
3470 :
3471 0 : if (sum_rdc.rdcost < best_rdc.rdcost && mi_row + ms < cm->mi_rows) {
3472 0 : load_pred_mv(x, ctx);
3473 0 : pc_tree->horizontal[1].pred_pixel_ready = 1;
3474 0 : nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + ms, mi_col, &this_rdc,
3475 : subsize, &pc_tree->horizontal[1]);
3476 :
3477 0 : pc_tree->horizontal[1].mic = *xd->mi[0];
3478 0 : pc_tree->horizontal[1].mbmi_ext = *x->mbmi_ext;
3479 0 : pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0];
3480 0 : pc_tree->horizontal[1].skip = x->skip;
3481 :
3482 0 : if (this_rdc.rate == INT_MAX) {
3483 0 : vp9_rd_cost_reset(&sum_rdc);
3484 : } else {
3485 0 : int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
3486 0 : this_rdc.rate += cpi->partition_cost[pl][PARTITION_HORZ];
3487 0 : sum_rdc.rate += this_rdc.rate;
3488 0 : sum_rdc.dist += this_rdc.dist;
3489 0 : sum_rdc.rdcost =
3490 0 : RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
3491 : }
3492 : }
3493 :
3494 0 : if (sum_rdc.rdcost < best_rdc.rdcost) {
3495 0 : best_rdc = sum_rdc;
3496 0 : pc_tree->partitioning = PARTITION_HORZ;
3497 : } else {
3498 0 : pred_pixel_ready_reset(pc_tree, bsize);
3499 : }
3500 : }
3501 :
3502 : // PARTITION_VERT
3503 0 : if (partition_vert_allowed && do_rect) {
3504 0 : subsize = get_subsize(bsize, PARTITION_VERT);
3505 0 : if (sf->adaptive_motion_search) load_pred_mv(x, ctx);
3506 0 : pc_tree->vertical[0].pred_pixel_ready = 1;
3507 0 : nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
3508 : &pc_tree->vertical[0]);
3509 0 : pc_tree->vertical[0].mic = *xd->mi[0];
3510 0 : pc_tree->vertical[0].mbmi_ext = *x->mbmi_ext;
3511 0 : pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0];
3512 0 : pc_tree->vertical[0].skip = x->skip;
3513 :
3514 0 : if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + ms < cm->mi_cols) {
3515 0 : load_pred_mv(x, ctx);
3516 0 : pc_tree->vertical[1].pred_pixel_ready = 1;
3517 0 : nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + ms, &this_rdc,
3518 : subsize, &pc_tree->vertical[1]);
3519 0 : pc_tree->vertical[1].mic = *xd->mi[0];
3520 0 : pc_tree->vertical[1].mbmi_ext = *x->mbmi_ext;
3521 0 : pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0];
3522 0 : pc_tree->vertical[1].skip = x->skip;
3523 :
3524 0 : if (this_rdc.rate == INT_MAX) {
3525 0 : vp9_rd_cost_reset(&sum_rdc);
3526 : } else {
3527 0 : int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
3528 0 : sum_rdc.rate += cpi->partition_cost[pl][PARTITION_VERT];
3529 0 : sum_rdc.rate += this_rdc.rate;
3530 0 : sum_rdc.dist += this_rdc.dist;
3531 0 : sum_rdc.rdcost =
3532 0 : RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
3533 : }
3534 : }
3535 :
3536 0 : if (sum_rdc.rdcost < best_rdc.rdcost) {
3537 0 : best_rdc = sum_rdc;
3538 0 : pc_tree->partitioning = PARTITION_VERT;
3539 : } else {
3540 0 : pred_pixel_ready_reset(pc_tree, bsize);
3541 : }
3542 : }
3543 :
3544 0 : *rd_cost = best_rdc;
3545 :
3546 0 : if (best_rdc.rate == INT_MAX) {
3547 0 : vp9_rd_cost_reset(rd_cost);
3548 0 : return;
3549 : }
3550 :
3551 : // update mode info array
3552 0 : fill_mode_info_sb(cm, x, mi_row, mi_col, bsize, pc_tree);
3553 :
3554 0 : if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX && do_recon) {
3555 0 : int output_enabled = (bsize == BLOCK_64X64);
3556 0 : encode_sb_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize,
3557 : pc_tree);
3558 : }
3559 :
3560 0 : if (bsize == BLOCK_64X64 && do_recon) {
3561 0 : assert(tp_orig < *tp);
3562 0 : assert(best_rdc.rate < INT_MAX);
3563 0 : assert(best_rdc.dist < INT64_MAX);
3564 : } else {
3565 0 : assert(tp_orig == *tp);
3566 : }
3567 : }
3568 :
3569 0 : static void nonrd_select_partition(VP9_COMP *cpi, ThreadData *td,
3570 : TileDataEnc *tile_data, MODE_INFO **mi,
3571 : TOKENEXTRA **tp, int mi_row, int mi_col,
3572 : BLOCK_SIZE bsize, int output_enabled,
3573 : RD_COST *rd_cost, PC_TREE *pc_tree) {
3574 0 : VP9_COMMON *const cm = &cpi->common;
3575 0 : TileInfo *const tile_info = &tile_data->tile_info;
3576 0 : MACROBLOCK *const x = &td->mb;
3577 0 : MACROBLOCKD *const xd = &x->e_mbd;
3578 0 : const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
3579 0 : const int mis = cm->mi_stride;
3580 : PARTITION_TYPE partition;
3581 : BLOCK_SIZE subsize;
3582 : RD_COST this_rdc;
3583 :
3584 0 : vp9_rd_cost_reset(&this_rdc);
3585 0 : if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
3586 :
3587 0 : subsize = (bsize >= BLOCK_8X8) ? mi[0]->sb_type : BLOCK_4X4;
3588 0 : partition = partition_lookup[bsl][subsize];
3589 :
3590 0 : if (bsize == BLOCK_32X32 && subsize == BLOCK_32X32) {
3591 0 : x->max_partition_size = BLOCK_32X32;
3592 0 : x->min_partition_size = BLOCK_16X16;
3593 0 : nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize, rd_cost,
3594 : 0, INT64_MAX, pc_tree);
3595 0 : } else if (bsize == BLOCK_32X32 && partition != PARTITION_NONE &&
3596 : subsize >= BLOCK_16X16) {
3597 0 : x->max_partition_size = BLOCK_32X32;
3598 0 : x->min_partition_size = BLOCK_8X8;
3599 0 : nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize, rd_cost,
3600 : 0, INT64_MAX, pc_tree);
3601 0 : } else if (bsize == BLOCK_16X16 && partition != PARTITION_NONE) {
3602 0 : x->max_partition_size = BLOCK_16X16;
3603 0 : x->min_partition_size = BLOCK_8X8;
3604 0 : nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize, rd_cost,
3605 : 0, INT64_MAX, pc_tree);
3606 : } else {
3607 0 : switch (partition) {
3608 : case PARTITION_NONE:
3609 0 : pc_tree->none.pred_pixel_ready = 1;
3610 0 : nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize,
3611 : &pc_tree->none);
3612 0 : pc_tree->none.mic = *xd->mi[0];
3613 0 : pc_tree->none.mbmi_ext = *x->mbmi_ext;
3614 0 : pc_tree->none.skip_txfm[0] = x->skip_txfm[0];
3615 0 : pc_tree->none.skip = x->skip;
3616 0 : break;
3617 : case PARTITION_VERT:
3618 0 : pc_tree->vertical[0].pred_pixel_ready = 1;
3619 0 : nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize,
3620 : &pc_tree->vertical[0]);
3621 0 : pc_tree->vertical[0].mic = *xd->mi[0];
3622 0 : pc_tree->vertical[0].mbmi_ext = *x->mbmi_ext;
3623 0 : pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0];
3624 0 : pc_tree->vertical[0].skip = x->skip;
3625 0 : if (mi_col + hbs < cm->mi_cols) {
3626 0 : pc_tree->vertical[1].pred_pixel_ready = 1;
3627 0 : nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs,
3628 : &this_rdc, subsize, &pc_tree->vertical[1]);
3629 0 : pc_tree->vertical[1].mic = *xd->mi[0];
3630 0 : pc_tree->vertical[1].mbmi_ext = *x->mbmi_ext;
3631 0 : pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0];
3632 0 : pc_tree->vertical[1].skip = x->skip;
3633 0 : if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
3634 0 : rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
3635 0 : rd_cost->rate += this_rdc.rate;
3636 0 : rd_cost->dist += this_rdc.dist;
3637 : }
3638 : }
3639 0 : break;
3640 : case PARTITION_HORZ:
3641 0 : pc_tree->horizontal[0].pred_pixel_ready = 1;
3642 0 : nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize,
3643 : &pc_tree->horizontal[0]);
3644 0 : pc_tree->horizontal[0].mic = *xd->mi[0];
3645 0 : pc_tree->horizontal[0].mbmi_ext = *x->mbmi_ext;
3646 0 : pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0];
3647 0 : pc_tree->horizontal[0].skip = x->skip;
3648 0 : if (mi_row + hbs < cm->mi_rows) {
3649 0 : pc_tree->horizontal[1].pred_pixel_ready = 1;
3650 0 : nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col,
3651 : &this_rdc, subsize, &pc_tree->horizontal[1]);
3652 0 : pc_tree->horizontal[1].mic = *xd->mi[0];
3653 0 : pc_tree->horizontal[1].mbmi_ext = *x->mbmi_ext;
3654 0 : pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0];
3655 0 : pc_tree->horizontal[1].skip = x->skip;
3656 0 : if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
3657 0 : rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
3658 0 : rd_cost->rate += this_rdc.rate;
3659 0 : rd_cost->dist += this_rdc.dist;
3660 : }
3661 : }
3662 0 : break;
3663 : case PARTITION_SPLIT:
3664 0 : subsize = get_subsize(bsize, PARTITION_SPLIT);
3665 0 : nonrd_select_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
3666 : subsize, output_enabled, rd_cost,
3667 0 : pc_tree->split[0]);
3668 0 : nonrd_select_partition(cpi, td, tile_data, mi + hbs, tp, mi_row,
3669 : mi_col + hbs, subsize, output_enabled, &this_rdc,
3670 0 : pc_tree->split[1]);
3671 0 : if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
3672 0 : rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
3673 0 : rd_cost->rate += this_rdc.rate;
3674 0 : rd_cost->dist += this_rdc.dist;
3675 : }
3676 0 : nonrd_select_partition(cpi, td, tile_data, mi + hbs * mis, tp,
3677 : mi_row + hbs, mi_col, subsize, output_enabled,
3678 0 : &this_rdc, pc_tree->split[2]);
3679 0 : if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
3680 0 : rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
3681 0 : rd_cost->rate += this_rdc.rate;
3682 0 : rd_cost->dist += this_rdc.dist;
3683 : }
3684 0 : nonrd_select_partition(cpi, td, tile_data, mi + hbs * mis + hbs, tp,
3685 : mi_row + hbs, mi_col + hbs, subsize,
3686 0 : output_enabled, &this_rdc, pc_tree->split[3]);
3687 0 : if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
3688 0 : rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
3689 0 : rd_cost->rate += this_rdc.rate;
3690 0 : rd_cost->dist += this_rdc.dist;
3691 : }
3692 0 : break;
3693 0 : default: assert(0 && "Invalid partition type."); break;
3694 : }
3695 : }
3696 :
3697 0 : if (bsize == BLOCK_64X64 && output_enabled)
3698 0 : encode_sb_rt(cpi, td, tile_info, tp, mi_row, mi_col, 1, bsize, pc_tree);
3699 : }
3700 :
3701 0 : static void nonrd_use_partition(VP9_COMP *cpi, ThreadData *td,
3702 : TileDataEnc *tile_data, MODE_INFO **mi,
3703 : TOKENEXTRA **tp, int mi_row, int mi_col,
3704 : BLOCK_SIZE bsize, int output_enabled,
3705 : RD_COST *dummy_cost, PC_TREE *pc_tree) {
3706 0 : VP9_COMMON *const cm = &cpi->common;
3707 0 : TileInfo *tile_info = &tile_data->tile_info;
3708 0 : MACROBLOCK *const x = &td->mb;
3709 0 : MACROBLOCKD *const xd = &x->e_mbd;
3710 0 : const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
3711 0 : const int mis = cm->mi_stride;
3712 : PARTITION_TYPE partition;
3713 : BLOCK_SIZE subsize;
3714 :
3715 0 : if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
3716 :
3717 0 : subsize = (bsize >= BLOCK_8X8) ? mi[0]->sb_type : BLOCK_4X4;
3718 0 : partition = partition_lookup[bsl][subsize];
3719 :
3720 0 : if (output_enabled && bsize != BLOCK_4X4) {
3721 0 : int ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
3722 0 : td->counts->partition[ctx][partition]++;
3723 : }
3724 :
3725 0 : switch (partition) {
3726 : case PARTITION_NONE:
3727 0 : pc_tree->none.pred_pixel_ready = 1;
3728 0 : nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost,
3729 : subsize, &pc_tree->none);
3730 0 : pc_tree->none.mic = *xd->mi[0];
3731 0 : pc_tree->none.mbmi_ext = *x->mbmi_ext;
3732 0 : pc_tree->none.skip_txfm[0] = x->skip_txfm[0];
3733 0 : pc_tree->none.skip = x->skip;
3734 0 : encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled,
3735 : subsize, &pc_tree->none);
3736 0 : break;
3737 : case PARTITION_VERT:
3738 0 : pc_tree->vertical[0].pred_pixel_ready = 1;
3739 0 : nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost,
3740 : subsize, &pc_tree->vertical[0]);
3741 0 : pc_tree->vertical[0].mic = *xd->mi[0];
3742 0 : pc_tree->vertical[0].mbmi_ext = *x->mbmi_ext;
3743 0 : pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0];
3744 0 : pc_tree->vertical[0].skip = x->skip;
3745 0 : encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled,
3746 : subsize, &pc_tree->vertical[0]);
3747 0 : if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) {
3748 0 : pc_tree->vertical[1].pred_pixel_ready = 1;
3749 0 : nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, dummy_cost,
3750 : subsize, &pc_tree->vertical[1]);
3751 0 : pc_tree->vertical[1].mic = *xd->mi[0];
3752 0 : pc_tree->vertical[1].mbmi_ext = *x->mbmi_ext;
3753 0 : pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0];
3754 0 : pc_tree->vertical[1].skip = x->skip;
3755 0 : encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col + hbs,
3756 : output_enabled, subsize, &pc_tree->vertical[1]);
3757 : }
3758 0 : break;
3759 : case PARTITION_HORZ:
3760 0 : pc_tree->horizontal[0].pred_pixel_ready = 1;
3761 0 : nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost,
3762 : subsize, &pc_tree->horizontal[0]);
3763 0 : pc_tree->horizontal[0].mic = *xd->mi[0];
3764 0 : pc_tree->horizontal[0].mbmi_ext = *x->mbmi_ext;
3765 0 : pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0];
3766 0 : pc_tree->horizontal[0].skip = x->skip;
3767 0 : encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled,
3768 : subsize, &pc_tree->horizontal[0]);
3769 :
3770 0 : if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) {
3771 0 : pc_tree->horizontal[1].pred_pixel_ready = 1;
3772 0 : nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, dummy_cost,
3773 : subsize, &pc_tree->horizontal[1]);
3774 0 : pc_tree->horizontal[1].mic = *xd->mi[0];
3775 0 : pc_tree->horizontal[1].mbmi_ext = *x->mbmi_ext;
3776 0 : pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0];
3777 0 : pc_tree->horizontal[1].skip = x->skip;
3778 0 : encode_b_rt(cpi, td, tile_info, tp, mi_row + hbs, mi_col,
3779 : output_enabled, subsize, &pc_tree->horizontal[1]);
3780 : }
3781 0 : break;
3782 : case PARTITION_SPLIT:
3783 0 : subsize = get_subsize(bsize, PARTITION_SPLIT);
3784 0 : if (bsize == BLOCK_8X8) {
3785 0 : nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost,
3786 : subsize, pc_tree->leaf_split[0]);
3787 0 : encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled,
3788 : subsize, pc_tree->leaf_split[0]);
3789 : } else {
3790 0 : nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, subsize,
3791 0 : output_enabled, dummy_cost, pc_tree->split[0]);
3792 0 : nonrd_use_partition(cpi, td, tile_data, mi + hbs, tp, mi_row,
3793 : mi_col + hbs, subsize, output_enabled, dummy_cost,
3794 0 : pc_tree->split[1]);
3795 0 : nonrd_use_partition(cpi, td, tile_data, mi + hbs * mis, tp,
3796 : mi_row + hbs, mi_col, subsize, output_enabled,
3797 0 : dummy_cost, pc_tree->split[2]);
3798 0 : nonrd_use_partition(cpi, td, tile_data, mi + hbs * mis + hbs, tp,
3799 : mi_row + hbs, mi_col + hbs, subsize, output_enabled,
3800 0 : dummy_cost, pc_tree->split[3]);
3801 : }
3802 0 : break;
3803 0 : default: assert(0 && "Invalid partition type."); break;
3804 : }
3805 :
3806 0 : if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
3807 0 : update_partition_context(xd, mi_row, mi_col, subsize, bsize);
3808 : }
3809 :
3810 0 : static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td,
3811 : TileDataEnc *tile_data, int mi_row,
3812 : TOKENEXTRA **tp) {
3813 0 : SPEED_FEATURES *const sf = &cpi->sf;
3814 0 : VP9_COMMON *const cm = &cpi->common;
3815 0 : TileInfo *const tile_info = &tile_data->tile_info;
3816 0 : MACROBLOCK *const x = &td->mb;
3817 0 : MACROBLOCKD *const xd = &x->e_mbd;
3818 0 : const int mi_col_start = tile_info->mi_col_start;
3819 0 : const int mi_col_end = tile_info->mi_col_end;
3820 : int mi_col;
3821 :
3822 : // Initialize the left context for the new SB row
3823 0 : memset(&xd->left_context, 0, sizeof(xd->left_context));
3824 0 : memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context));
3825 :
3826 : // Code each SB in the row
3827 0 : for (mi_col = mi_col_start; mi_col < mi_col_end; mi_col += MI_BLOCK_SIZE) {
3828 0 : const struct segmentation *const seg = &cm->seg;
3829 : RD_COST dummy_rdc;
3830 0 : const int idx_str = cm->mi_stride * mi_row + mi_col;
3831 0 : MODE_INFO **mi = cm->mi_grid_visible + idx_str;
3832 0 : PARTITION_SEARCH_TYPE partition_search_type = sf->partition_search_type;
3833 0 : BLOCK_SIZE bsize = BLOCK_64X64;
3834 0 : int seg_skip = 0;
3835 0 : x->source_variance = UINT_MAX;
3836 0 : vp9_zero(x->pred_mv);
3837 0 : vp9_rd_cost_init(&dummy_rdc);
3838 0 : x->color_sensitivity[0] = 0;
3839 0 : x->color_sensitivity[1] = 0;
3840 0 : x->sb_is_skin = 0;
3841 :
3842 0 : if (seg->enabled) {
3843 0 : const uint8_t *const map =
3844 0 : seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
3845 0 : int segment_id = get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col);
3846 0 : seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
3847 0 : if (seg_skip) {
3848 0 : partition_search_type = FIXED_PARTITION;
3849 : }
3850 : }
3851 :
3852 : // Set the partition type of the 64X64 block
3853 0 : switch (partition_search_type) {
3854 : case VAR_BASED_PARTITION:
3855 : // TODO(jingning, marpan): The mode decision and encoding process
3856 : // support both intra and inter sub8x8 block coding for RTC mode.
3857 : // Tune the thresholds accordingly to use sub8x8 block coding for
3858 : // coding performance improvement.
3859 0 : choose_partitioning(cpi, tile_info, x, mi_row, mi_col);
3860 0 : nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
3861 : BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
3862 0 : break;
3863 : case SOURCE_VAR_BASED_PARTITION:
3864 0 : set_source_var_based_partition(cpi, tile_info, x, mi, mi_row, mi_col);
3865 0 : nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
3866 : BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
3867 0 : break;
3868 : case FIXED_PARTITION:
3869 0 : if (!seg_skip) bsize = sf->always_this_block_size;
3870 0 : set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
3871 0 : nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
3872 : BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
3873 0 : break;
3874 : case REFERENCE_PARTITION:
3875 0 : set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
3876 : // Use nonrd_pick_partition on scene-cut for VBR, or on qp-segment
3877 : // if cyclic_refresh is enabled.
3878 : // nonrd_pick_partition does not support 4x4 partition, so avoid it
3879 : // on key frame for now.
3880 0 : if ((cpi->oxcf.rc_mode == VPX_VBR && cpi->rc.high_source_sad &&
3881 0 : cm->frame_type != KEY_FRAME) ||
3882 0 : (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled &&
3883 0 : xd->mi[0]->segment_id)) {
3884 : // Use lower max_partition_size for low resoultions.
3885 0 : if (cm->width <= 352 && cm->height <= 288)
3886 0 : x->max_partition_size = BLOCK_32X32;
3887 : else
3888 0 : x->max_partition_size = BLOCK_64X64;
3889 0 : x->min_partition_size = BLOCK_8X8;
3890 0 : nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col,
3891 : BLOCK_64X64, &dummy_rdc, 1, INT64_MAX,
3892 : td->pc_root);
3893 : } else {
3894 0 : choose_partitioning(cpi, tile_info, x, mi_row, mi_col);
3895 : // TODO(marpan): Seems like nonrd_select_partition does not support
3896 : // 4x4 partition. Since 4x4 is used on key frame, use this switch
3897 : // for now.
3898 0 : if (cm->frame_type == KEY_FRAME)
3899 0 : nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
3900 : BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
3901 : else
3902 0 : nonrd_select_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
3903 : BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
3904 : }
3905 :
3906 0 : break;
3907 0 : default: assert(0); break;
3908 : }
3909 : }
3910 0 : }
3911 : // end RTC play code
3912 :
3913 0 : static int set_var_thresh_from_histogram(VP9_COMP *cpi) {
3914 0 : const SPEED_FEATURES *const sf = &cpi->sf;
3915 0 : const VP9_COMMON *const cm = &cpi->common;
3916 :
3917 0 : const uint8_t *src = cpi->Source->y_buffer;
3918 0 : const uint8_t *last_src = cpi->Last_Source->y_buffer;
3919 0 : const int src_stride = cpi->Source->y_stride;
3920 0 : const int last_stride = cpi->Last_Source->y_stride;
3921 :
3922 : // Pick cutoff threshold
3923 0 : const int cutoff = (VPXMIN(cm->width, cm->height) >= 720)
3924 0 : ? (cm->MBs * VAR_HIST_LARGE_CUT_OFF / 100)
3925 0 : : (cm->MBs * VAR_HIST_SMALL_CUT_OFF / 100);
3926 : DECLARE_ALIGNED(16, int, hist[VAR_HIST_BINS]);
3927 0 : diff *var16 = cpi->source_diff_var;
3928 :
3929 0 : int sum = 0;
3930 : int i, j;
3931 :
3932 0 : memset(hist, 0, VAR_HIST_BINS * sizeof(hist[0]));
3933 :
3934 0 : for (i = 0; i < cm->mb_rows; i++) {
3935 0 : for (j = 0; j < cm->mb_cols; j++) {
3936 : #if CONFIG_VP9_HIGHBITDEPTH
3937 : if (cm->use_highbitdepth) {
3938 : switch (cm->bit_depth) {
3939 : case VPX_BITS_8:
3940 : vpx_highbd_8_get16x16var(src, src_stride, last_src, last_stride,
3941 : &var16->sse, &var16->sum);
3942 : break;
3943 : case VPX_BITS_10:
3944 : vpx_highbd_10_get16x16var(src, src_stride, last_src, last_stride,
3945 : &var16->sse, &var16->sum);
3946 : break;
3947 : case VPX_BITS_12:
3948 : vpx_highbd_12_get16x16var(src, src_stride, last_src, last_stride,
3949 : &var16->sse, &var16->sum);
3950 : break;
3951 : default:
3952 : assert(0 &&
3953 : "cm->bit_depth should be VPX_BITS_8, VPX_BITS_10"
3954 : " or VPX_BITS_12");
3955 : return -1;
3956 : }
3957 : } else {
3958 : vpx_get16x16var(src, src_stride, last_src, last_stride, &var16->sse,
3959 : &var16->sum);
3960 : }
3961 : #else
3962 0 : vpx_get16x16var(src, src_stride, last_src, last_stride, &var16->sse,
3963 : &var16->sum);
3964 : #endif // CONFIG_VP9_HIGHBITDEPTH
3965 0 : var16->var = var16->sse - (((uint32_t)var16->sum * var16->sum) >> 8);
3966 :
3967 0 : if (var16->var >= VAR_HIST_MAX_BG_VAR)
3968 0 : hist[VAR_HIST_BINS - 1]++;
3969 : else
3970 0 : hist[var16->var / VAR_HIST_FACTOR]++;
3971 :
3972 0 : src += 16;
3973 0 : last_src += 16;
3974 0 : var16++;
3975 : }
3976 :
3977 0 : src = src - cm->mb_cols * 16 + 16 * src_stride;
3978 0 : last_src = last_src - cm->mb_cols * 16 + 16 * last_stride;
3979 : }
3980 :
3981 0 : cpi->source_var_thresh = 0;
3982 :
3983 0 : if (hist[VAR_HIST_BINS - 1] < cutoff) {
3984 0 : for (i = 0; i < VAR_HIST_BINS - 1; i++) {
3985 0 : sum += hist[i];
3986 :
3987 0 : if (sum > cutoff) {
3988 0 : cpi->source_var_thresh = (i + 1) * VAR_HIST_FACTOR;
3989 0 : return 0;
3990 : }
3991 : }
3992 : }
3993 :
3994 0 : return sf->search_type_check_frequency;
3995 : }
3996 :
3997 0 : static void source_var_based_partition_search_method(VP9_COMP *cpi) {
3998 0 : VP9_COMMON *const cm = &cpi->common;
3999 0 : SPEED_FEATURES *const sf = &cpi->sf;
4000 :
4001 0 : if (cm->frame_type == KEY_FRAME) {
4002 : // For key frame, use SEARCH_PARTITION.
4003 0 : sf->partition_search_type = SEARCH_PARTITION;
4004 0 : } else if (cm->intra_only) {
4005 0 : sf->partition_search_type = FIXED_PARTITION;
4006 : } else {
4007 0 : if (cm->last_width != cm->width || cm->last_height != cm->height) {
4008 0 : if (cpi->source_diff_var) vpx_free(cpi->source_diff_var);
4009 :
4010 0 : CHECK_MEM_ERROR(cm, cpi->source_diff_var,
4011 : vpx_calloc(cm->MBs, sizeof(diff)));
4012 : }
4013 :
4014 0 : if (!cpi->frames_till_next_var_check)
4015 0 : cpi->frames_till_next_var_check = set_var_thresh_from_histogram(cpi);
4016 :
4017 0 : if (cpi->frames_till_next_var_check > 0) {
4018 0 : sf->partition_search_type = FIXED_PARTITION;
4019 0 : cpi->frames_till_next_var_check--;
4020 : }
4021 : }
4022 0 : }
4023 :
4024 0 : static int get_skip_encode_frame(const VP9_COMMON *cm, ThreadData *const td) {
4025 0 : unsigned int intra_count = 0, inter_count = 0;
4026 : int j;
4027 :
4028 0 : for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) {
4029 0 : intra_count += td->counts->intra_inter[j][0];
4030 0 : inter_count += td->counts->intra_inter[j][1];
4031 : }
4032 :
4033 0 : return (intra_count << 2) < inter_count && cm->frame_type != KEY_FRAME &&
4034 0 : cm->show_frame;
4035 : }
4036 :
4037 0 : void vp9_init_tile_data(VP9_COMP *cpi) {
4038 0 : VP9_COMMON *const cm = &cpi->common;
4039 0 : const int tile_cols = 1 << cm->log2_tile_cols;
4040 0 : const int tile_rows = 1 << cm->log2_tile_rows;
4041 : int tile_col, tile_row;
4042 0 : TOKENEXTRA *pre_tok = cpi->tile_tok[0][0];
4043 0 : int tile_tok = 0;
4044 :
4045 0 : if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows) {
4046 0 : if (cpi->tile_data != NULL) vpx_free(cpi->tile_data);
4047 0 : CHECK_MEM_ERROR(cm, cpi->tile_data, vpx_malloc(tile_cols * tile_rows *
4048 : sizeof(*cpi->tile_data)));
4049 0 : cpi->allocated_tiles = tile_cols * tile_rows;
4050 :
4051 0 : for (tile_row = 0; tile_row < tile_rows; ++tile_row)
4052 0 : for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
4053 0 : TileDataEnc *tile_data =
4054 0 : &cpi->tile_data[tile_row * tile_cols + tile_col];
4055 : int i, j;
4056 0 : for (i = 0; i < BLOCK_SIZES; ++i) {
4057 0 : for (j = 0; j < MAX_MODES; ++j) {
4058 0 : tile_data->thresh_freq_fact[i][j] = 32;
4059 0 : tile_data->mode_map[i][j] = j;
4060 : }
4061 : }
4062 : }
4063 : }
4064 :
4065 0 : for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
4066 0 : for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
4067 0 : TileInfo *tile_info =
4068 0 : &cpi->tile_data[tile_row * tile_cols + tile_col].tile_info;
4069 0 : vp9_tile_init(tile_info, cm, tile_row, tile_col);
4070 :
4071 0 : cpi->tile_tok[tile_row][tile_col] = pre_tok + tile_tok;
4072 0 : pre_tok = cpi->tile_tok[tile_row][tile_col];
4073 0 : tile_tok = allocated_tokens(*tile_info);
4074 : }
4075 : }
4076 0 : }
4077 :
4078 0 : void vp9_encode_tile(VP9_COMP *cpi, ThreadData *td, int tile_row,
4079 : int tile_col) {
4080 0 : VP9_COMMON *const cm = &cpi->common;
4081 0 : const int tile_cols = 1 << cm->log2_tile_cols;
4082 0 : TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
4083 0 : const TileInfo *const tile_info = &this_tile->tile_info;
4084 0 : TOKENEXTRA *tok = cpi->tile_tok[tile_row][tile_col];
4085 0 : const int mi_row_start = tile_info->mi_row_start;
4086 0 : const int mi_row_end = tile_info->mi_row_end;
4087 : int mi_row;
4088 :
4089 : // Set up pointers to per thread motion search counters.
4090 0 : this_tile->m_search_count = 0; // Count of motion search hits.
4091 0 : this_tile->ex_search_count = 0; // Exhaustive mesh search hits.
4092 0 : td->mb.m_search_count_ptr = &this_tile->m_search_count;
4093 0 : td->mb.ex_search_count_ptr = &this_tile->ex_search_count;
4094 :
4095 0 : for (mi_row = mi_row_start; mi_row < mi_row_end; mi_row += MI_BLOCK_SIZE) {
4096 0 : if (cpi->sf.use_nonrd_pick_mode)
4097 0 : encode_nonrd_sb_row(cpi, td, this_tile, mi_row, &tok);
4098 : else
4099 0 : encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok);
4100 : }
4101 0 : cpi->tok_count[tile_row][tile_col] =
4102 0 : (unsigned int)(tok - cpi->tile_tok[tile_row][tile_col]);
4103 0 : assert(tok - cpi->tile_tok[tile_row][tile_col] <=
4104 : allocated_tokens(*tile_info));
4105 0 : }
4106 :
4107 0 : static void encode_tiles(VP9_COMP *cpi) {
4108 0 : VP9_COMMON *const cm = &cpi->common;
4109 0 : const int tile_cols = 1 << cm->log2_tile_cols;
4110 0 : const int tile_rows = 1 << cm->log2_tile_rows;
4111 : int tile_col, tile_row;
4112 :
4113 0 : vp9_init_tile_data(cpi);
4114 :
4115 0 : for (tile_row = 0; tile_row < tile_rows; ++tile_row)
4116 0 : for (tile_col = 0; tile_col < tile_cols; ++tile_col)
4117 0 : vp9_encode_tile(cpi, &cpi->td, tile_row, tile_col);
4118 0 : }
4119 :
4120 : #if CONFIG_FP_MB_STATS
4121 : static int input_fpmb_stats(FIRSTPASS_MB_STATS *firstpass_mb_stats,
4122 : VP9_COMMON *cm, uint8_t **this_frame_mb_stats) {
4123 : uint8_t *mb_stats_in = firstpass_mb_stats->mb_stats_start +
4124 : cm->current_video_frame * cm->MBs * sizeof(uint8_t);
4125 :
4126 : if (mb_stats_in > firstpass_mb_stats->mb_stats_end) return EOF;
4127 :
4128 : *this_frame_mb_stats = mb_stats_in;
4129 :
4130 : return 1;
4131 : }
4132 : #endif
4133 :
4134 0 : static void encode_frame_internal(VP9_COMP *cpi) {
4135 0 : SPEED_FEATURES *const sf = &cpi->sf;
4136 0 : ThreadData *const td = &cpi->td;
4137 0 : MACROBLOCK *const x = &td->mb;
4138 0 : VP9_COMMON *const cm = &cpi->common;
4139 0 : MACROBLOCKD *const xd = &x->e_mbd;
4140 :
4141 0 : xd->mi = cm->mi_grid_visible;
4142 0 : xd->mi[0] = cm->mi;
4143 :
4144 0 : vp9_zero(*td->counts);
4145 0 : vp9_zero(cpi->td.rd_counts);
4146 :
4147 0 : xd->lossless = cm->base_qindex == 0 && cm->y_dc_delta_q == 0 &&
4148 0 : cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0;
4149 :
4150 : #if CONFIG_VP9_HIGHBITDEPTH
4151 : if (cm->use_highbitdepth)
4152 : x->fwd_txm4x4 = xd->lossless ? vp9_highbd_fwht4x4 : vpx_highbd_fdct4x4;
4153 : else
4154 : x->fwd_txm4x4 = xd->lossless ? vp9_fwht4x4 : vpx_fdct4x4;
4155 : x->highbd_itxm_add =
4156 : xd->lossless ? vp9_highbd_iwht4x4_add : vp9_highbd_idct4x4_add;
4157 : #else
4158 0 : x->fwd_txm4x4 = xd->lossless ? vp9_fwht4x4 : vpx_fdct4x4;
4159 : #endif // CONFIG_VP9_HIGHBITDEPTH
4160 0 : x->itxm_add = xd->lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
4161 :
4162 0 : if (xd->lossless) x->optimize = 0;
4163 :
4164 0 : cm->tx_mode = select_tx_mode(cpi, xd);
4165 :
4166 0 : vp9_frame_init_quantizer(cpi);
4167 :
4168 0 : vp9_initialize_rd_consts(cpi);
4169 0 : vp9_initialize_me_consts(cpi, x, cm->base_qindex);
4170 0 : init_encode_frame_mb_context(cpi);
4171 0 : cm->use_prev_frame_mvs =
4172 0 : !cm->error_resilient_mode && cm->width == cm->last_width &&
4173 0 : cm->height == cm->last_height && !cm->intra_only && cm->last_show_frame;
4174 : // Special case: set prev_mi to NULL when the previous mode info
4175 : // context cannot be used.
4176 0 : cm->prev_mi =
4177 0 : cm->use_prev_frame_mvs ? cm->prev_mip + cm->mi_stride + 1 : NULL;
4178 :
4179 0 : x->quant_fp = cpi->sf.use_quant_fp;
4180 0 : vp9_zero(x->skip_txfm);
4181 0 : if (sf->use_nonrd_pick_mode) {
4182 : // Initialize internal buffer pointers for rtc coding, where non-RD
4183 : // mode decision is used and hence no buffer pointer swap needed.
4184 : int i;
4185 0 : struct macroblock_plane *const p = x->plane;
4186 0 : struct macroblockd_plane *const pd = xd->plane;
4187 0 : PICK_MODE_CONTEXT *ctx = &cpi->td.pc_root->none;
4188 :
4189 0 : for (i = 0; i < MAX_MB_PLANE; ++i) {
4190 0 : p[i].coeff = ctx->coeff_pbuf[i][0];
4191 0 : p[i].qcoeff = ctx->qcoeff_pbuf[i][0];
4192 0 : pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][0];
4193 0 : p[i].eobs = ctx->eobs_pbuf[i][0];
4194 : }
4195 0 : vp9_zero(x->zcoeff_blk);
4196 :
4197 0 : if (cm->frame_type != KEY_FRAME && cpi->rc.frames_since_golden == 0 &&
4198 0 : !(cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR) &&
4199 0 : !cpi->use_svc)
4200 0 : cpi->ref_frame_flags &= (~VP9_GOLD_FLAG);
4201 :
4202 0 : if (sf->partition_search_type == SOURCE_VAR_BASED_PARTITION)
4203 0 : source_var_based_partition_search_method(cpi);
4204 : }
4205 :
4206 : {
4207 : struct vpx_usec_timer emr_timer;
4208 0 : vpx_usec_timer_start(&emr_timer);
4209 :
4210 : #if CONFIG_FP_MB_STATS
4211 : if (cpi->use_fp_mb_stats) {
4212 : input_fpmb_stats(&cpi->twopass.firstpass_mb_stats, cm,
4213 : &cpi->twopass.this_frame_mb_stats);
4214 : }
4215 : #endif
4216 :
4217 : // If allowed, encoding tiles in parallel with one thread handling one tile.
4218 0 : if (VPXMIN(cpi->oxcf.max_threads, 1 << cm->log2_tile_cols) > 1)
4219 0 : vp9_encode_tiles_mt(cpi);
4220 : else
4221 0 : encode_tiles(cpi);
4222 :
4223 0 : vpx_usec_timer_mark(&emr_timer);
4224 0 : cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer);
4225 : }
4226 :
4227 0 : sf->skip_encode_frame =
4228 0 : sf->skip_encode_sb ? get_skip_encode_frame(cm, td) : 0;
4229 :
4230 : #if 0
4231 : // Keep record of the total distortion this time around for future use
4232 : cpi->last_frame_distortion = cpi->frame_distortion;
4233 : #endif
4234 0 : }
4235 :
4236 0 : static INTERP_FILTER get_interp_filter(
4237 : const int64_t threshes[SWITCHABLE_FILTER_CONTEXTS], int is_alt_ref) {
4238 0 : if (!is_alt_ref && threshes[EIGHTTAP_SMOOTH] > threshes[EIGHTTAP] &&
4239 0 : threshes[EIGHTTAP_SMOOTH] > threshes[EIGHTTAP_SHARP] &&
4240 0 : threshes[EIGHTTAP_SMOOTH] > threshes[SWITCHABLE - 1]) {
4241 0 : return EIGHTTAP_SMOOTH;
4242 0 : } else if (threshes[EIGHTTAP_SHARP] > threshes[EIGHTTAP] &&
4243 0 : threshes[EIGHTTAP_SHARP] > threshes[SWITCHABLE - 1]) {
4244 0 : return EIGHTTAP_SHARP;
4245 0 : } else if (threshes[EIGHTTAP] > threshes[SWITCHABLE - 1]) {
4246 0 : return EIGHTTAP;
4247 : } else {
4248 0 : return SWITCHABLE;
4249 : }
4250 : }
4251 :
4252 0 : static int compute_frame_aq_offset(struct VP9_COMP *cpi) {
4253 0 : VP9_COMMON *const cm = &cpi->common;
4254 0 : MODE_INFO **mi_8x8_ptr = cm->mi_grid_visible;
4255 0 : struct segmentation *const seg = &cm->seg;
4256 :
4257 : int mi_row, mi_col;
4258 0 : int sum_delta = 0;
4259 0 : int map_index = 0;
4260 : int qdelta_index;
4261 : int segment_id;
4262 :
4263 0 : for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) {
4264 0 : MODE_INFO **mi_8x8 = mi_8x8_ptr;
4265 0 : for (mi_col = 0; mi_col < cm->mi_cols; mi_col++, mi_8x8++) {
4266 0 : segment_id = mi_8x8[0]->segment_id;
4267 0 : qdelta_index = get_segdata(seg, segment_id, SEG_LVL_ALT_Q);
4268 0 : sum_delta += qdelta_index;
4269 0 : map_index++;
4270 : }
4271 0 : mi_8x8_ptr += cm->mi_stride;
4272 : }
4273 :
4274 0 : return sum_delta / (cm->mi_rows * cm->mi_cols);
4275 : }
4276 :
4277 0 : void vp9_encode_frame(VP9_COMP *cpi) {
4278 0 : VP9_COMMON *const cm = &cpi->common;
4279 :
4280 : // In the longer term the encoder should be generalized to match the
4281 : // decoder such that we allow compound where one of the 3 buffers has a
4282 : // different sign bias and that buffer is then the fixed ref. However, this
4283 : // requires further work in the rd loop. For now the only supported encoder
4284 : // side behavior is where the ALT ref buffer has opposite sign bias to
4285 : // the other two.
4286 0 : if (!frame_is_intra_only(cm)) {
4287 0 : if ((cm->ref_frame_sign_bias[ALTREF_FRAME] ==
4288 0 : cm->ref_frame_sign_bias[GOLDEN_FRAME]) ||
4289 0 : (cm->ref_frame_sign_bias[ALTREF_FRAME] ==
4290 0 : cm->ref_frame_sign_bias[LAST_FRAME])) {
4291 0 : cpi->allow_comp_inter_inter = 0;
4292 : } else {
4293 0 : cpi->allow_comp_inter_inter = 1;
4294 0 : cm->comp_fixed_ref = ALTREF_FRAME;
4295 0 : cm->comp_var_ref[0] = LAST_FRAME;
4296 0 : cm->comp_var_ref[1] = GOLDEN_FRAME;
4297 : }
4298 : }
4299 :
4300 0 : if (cpi->sf.frame_parameter_update) {
4301 : int i;
4302 0 : RD_OPT *const rd_opt = &cpi->rd;
4303 0 : FRAME_COUNTS *counts = cpi->td.counts;
4304 0 : RD_COUNTS *const rdc = &cpi->td.rd_counts;
4305 :
4306 : // This code does a single RD pass over the whole frame assuming
4307 : // either compound, single or hybrid prediction as per whatever has
4308 : // worked best for that type of frame in the past.
4309 : // It also predicts whether another coding mode would have worked
4310 : // better than this coding mode. If that is the case, it remembers
4311 : // that for subsequent frames.
4312 : // It also does the same analysis for transform size selection.
4313 0 : const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi);
4314 0 : int64_t *const mode_thrs = rd_opt->prediction_type_threshes[frame_type];
4315 0 : int64_t *const filter_thrs = rd_opt->filter_threshes[frame_type];
4316 0 : const int is_alt_ref = frame_type == ALTREF_FRAME;
4317 :
4318 : /* prediction (compound, single or hybrid) mode selection */
4319 0 : if (is_alt_ref || !cpi->allow_comp_inter_inter)
4320 0 : cm->reference_mode = SINGLE_REFERENCE;
4321 0 : else if (mode_thrs[COMPOUND_REFERENCE] > mode_thrs[SINGLE_REFERENCE] &&
4322 0 : mode_thrs[COMPOUND_REFERENCE] > mode_thrs[REFERENCE_MODE_SELECT] &&
4323 0 : check_dual_ref_flags(cpi) && cpi->static_mb_pct == 100)
4324 0 : cm->reference_mode = COMPOUND_REFERENCE;
4325 0 : else if (mode_thrs[SINGLE_REFERENCE] > mode_thrs[REFERENCE_MODE_SELECT])
4326 0 : cm->reference_mode = SINGLE_REFERENCE;
4327 : else
4328 0 : cm->reference_mode = REFERENCE_MODE_SELECT;
4329 :
4330 0 : if (cm->interp_filter == SWITCHABLE)
4331 0 : cm->interp_filter = get_interp_filter(filter_thrs, is_alt_ref);
4332 :
4333 0 : encode_frame_internal(cpi);
4334 :
4335 0 : for (i = 0; i < REFERENCE_MODES; ++i)
4336 0 : mode_thrs[i] = (mode_thrs[i] + rdc->comp_pred_diff[i] / cm->MBs) / 2;
4337 :
4338 0 : for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
4339 0 : filter_thrs[i] = (filter_thrs[i] + rdc->filter_diff[i] / cm->MBs) / 2;
4340 :
4341 0 : if (cm->reference_mode == REFERENCE_MODE_SELECT) {
4342 0 : int single_count_zero = 0;
4343 0 : int comp_count_zero = 0;
4344 :
4345 0 : for (i = 0; i < COMP_INTER_CONTEXTS; i++) {
4346 0 : single_count_zero += counts->comp_inter[i][0];
4347 0 : comp_count_zero += counts->comp_inter[i][1];
4348 : }
4349 :
4350 0 : if (comp_count_zero == 0) {
4351 0 : cm->reference_mode = SINGLE_REFERENCE;
4352 0 : vp9_zero(counts->comp_inter);
4353 0 : } else if (single_count_zero == 0) {
4354 0 : cm->reference_mode = COMPOUND_REFERENCE;
4355 0 : vp9_zero(counts->comp_inter);
4356 : }
4357 : }
4358 :
4359 0 : if (cm->tx_mode == TX_MODE_SELECT) {
4360 0 : int count4x4 = 0;
4361 0 : int count8x8_lp = 0, count8x8_8x8p = 0;
4362 0 : int count16x16_16x16p = 0, count16x16_lp = 0;
4363 0 : int count32x32 = 0;
4364 :
4365 0 : for (i = 0; i < TX_SIZE_CONTEXTS; ++i) {
4366 0 : count4x4 += counts->tx.p32x32[i][TX_4X4];
4367 0 : count4x4 += counts->tx.p16x16[i][TX_4X4];
4368 0 : count4x4 += counts->tx.p8x8[i][TX_4X4];
4369 :
4370 0 : count8x8_lp += counts->tx.p32x32[i][TX_8X8];
4371 0 : count8x8_lp += counts->tx.p16x16[i][TX_8X8];
4372 0 : count8x8_8x8p += counts->tx.p8x8[i][TX_8X8];
4373 :
4374 0 : count16x16_16x16p += counts->tx.p16x16[i][TX_16X16];
4375 0 : count16x16_lp += counts->tx.p32x32[i][TX_16X16];
4376 0 : count32x32 += counts->tx.p32x32[i][TX_32X32];
4377 : }
4378 0 : if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 &&
4379 : count32x32 == 0) {
4380 0 : cm->tx_mode = ALLOW_8X8;
4381 0 : reset_skip_tx_size(cm, TX_8X8);
4382 0 : } else if (count8x8_8x8p == 0 && count16x16_16x16p == 0 &&
4383 0 : count8x8_lp == 0 && count16x16_lp == 0 && count32x32 == 0) {
4384 0 : cm->tx_mode = ONLY_4X4;
4385 0 : reset_skip_tx_size(cm, TX_4X4);
4386 0 : } else if (count8x8_lp == 0 && count16x16_lp == 0 && count4x4 == 0) {
4387 0 : cm->tx_mode = ALLOW_32X32;
4388 0 : } else if (count32x32 == 0 && count8x8_lp == 0 && count4x4 == 0) {
4389 0 : cm->tx_mode = ALLOW_16X16;
4390 0 : reset_skip_tx_size(cm, TX_16X16);
4391 : }
4392 : }
4393 : } else {
4394 0 : cm->reference_mode = SINGLE_REFERENCE;
4395 0 : encode_frame_internal(cpi);
4396 : }
4397 :
4398 : // If segmented AQ is enabled compute the average AQ weighting.
4399 0 : if (cm->seg.enabled && (cpi->oxcf.aq_mode != NO_AQ) &&
4400 0 : (cm->seg.update_map || cm->seg.update_data)) {
4401 0 : cm->seg.aq_av_offset = compute_frame_aq_offset(cpi);
4402 : }
4403 0 : }
4404 :
4405 0 : static void sum_intra_stats(FRAME_COUNTS *counts, const MODE_INFO *mi) {
4406 0 : const PREDICTION_MODE y_mode = mi->mode;
4407 0 : const PREDICTION_MODE uv_mode = mi->uv_mode;
4408 0 : const BLOCK_SIZE bsize = mi->sb_type;
4409 :
4410 0 : if (bsize < BLOCK_8X8) {
4411 : int idx, idy;
4412 0 : const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
4413 0 : const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
4414 0 : for (idy = 0; idy < 2; idy += num_4x4_h)
4415 0 : for (idx = 0; idx < 2; idx += num_4x4_w)
4416 0 : ++counts->y_mode[0][mi->bmi[idy * 2 + idx].as_mode];
4417 : } else {
4418 0 : ++counts->y_mode[size_group_lookup[bsize]][y_mode];
4419 : }
4420 :
4421 0 : ++counts->uv_mode[y_mode][uv_mode];
4422 0 : }
4423 :
4424 0 : static void update_zeromv_cnt(VP9_COMP *const cpi, const MODE_INFO *const mi,
4425 : int mi_row, int mi_col, BLOCK_SIZE bsize) {
4426 0 : const VP9_COMMON *const cm = &cpi->common;
4427 0 : MV mv = mi->mv[0].as_mv;
4428 0 : const int bw = num_8x8_blocks_wide_lookup[bsize];
4429 0 : const int bh = num_8x8_blocks_high_lookup[bsize];
4430 0 : const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
4431 0 : const int ymis = VPXMIN(cm->mi_rows - mi_row, bh);
4432 0 : const int block_index = mi_row * cm->mi_cols + mi_col;
4433 : int x, y;
4434 0 : for (y = 0; y < ymis; y++)
4435 0 : for (x = 0; x < xmis; x++) {
4436 0 : int map_offset = block_index + y * cm->mi_cols + x;
4437 0 : if (is_inter_block(mi) && mi->segment_id <= CR_SEGMENT_ID_BOOST2) {
4438 0 : if (abs(mv.row) < 8 && abs(mv.col) < 8) {
4439 0 : if (cpi->consec_zero_mv[map_offset] < 255)
4440 0 : cpi->consec_zero_mv[map_offset]++;
4441 : } else {
4442 0 : cpi->consec_zero_mv[map_offset] = 0;
4443 : }
4444 : }
4445 : }
4446 0 : }
4447 :
4448 0 : static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
4449 : int output_enabled, int mi_row, int mi_col,
4450 : BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
4451 0 : VP9_COMMON *const cm = &cpi->common;
4452 0 : MACROBLOCK *const x = &td->mb;
4453 0 : MACROBLOCKD *const xd = &x->e_mbd;
4454 0 : MODE_INFO *mi = xd->mi[0];
4455 0 : const int seg_skip =
4456 0 : segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP);
4457 0 : x->skip_recode = !x->select_tx_size && mi->sb_type >= BLOCK_8X8 &&
4458 0 : cpi->oxcf.aq_mode != COMPLEXITY_AQ &&
4459 0 : cpi->oxcf.aq_mode != CYCLIC_REFRESH_AQ &&
4460 0 : cpi->sf.allow_skip_recode;
4461 :
4462 0 : if (!x->skip_recode && !cpi->sf.use_nonrd_pick_mode)
4463 0 : memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
4464 :
4465 0 : x->skip_optimize = ctx->is_coded;
4466 0 : ctx->is_coded = 1;
4467 0 : x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct;
4468 0 : x->skip_encode = (!output_enabled && cpi->sf.skip_encode_frame &&
4469 0 : x->q_index < QIDX_SKIP_THRESH);
4470 :
4471 0 : if (x->skip_encode) return;
4472 :
4473 0 : if (!is_inter_block(mi)) {
4474 : int plane;
4475 : #if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
4476 : if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) &&
4477 : (xd->above_mi == NULL || xd->left_mi == NULL) &&
4478 : need_top_left[mi->uv_mode])
4479 : assert(0);
4480 : #endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
4481 0 : mi->skip = 1;
4482 0 : for (plane = 0; plane < MAX_MB_PLANE; ++plane)
4483 0 : vp9_encode_intra_block_plane(x, VPXMAX(bsize, BLOCK_8X8), plane, 1);
4484 0 : if (output_enabled) sum_intra_stats(td->counts, mi);
4485 0 : vp9_tokenize_sb(cpi, td, t, !output_enabled, seg_skip,
4486 : VPXMAX(bsize, BLOCK_8X8));
4487 : } else {
4488 : int ref;
4489 0 : const int is_compound = has_second_ref(mi);
4490 0 : set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
4491 0 : for (ref = 0; ref < 1 + is_compound; ++ref) {
4492 0 : YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, mi->ref_frame[ref]);
4493 0 : assert(cfg != NULL);
4494 0 : vp9_setup_pre_planes(xd, ref, cfg, mi_row, mi_col,
4495 0 : &xd->block_refs[ref]->sf);
4496 : }
4497 0 : if (!(cpi->sf.reuse_inter_pred_sby && ctx->pred_pixel_ready) || seg_skip)
4498 0 : vp9_build_inter_predictors_sby(xd, mi_row, mi_col,
4499 : VPXMAX(bsize, BLOCK_8X8));
4500 :
4501 0 : vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col,
4502 : VPXMAX(bsize, BLOCK_8X8));
4503 :
4504 0 : vp9_encode_sb(x, VPXMAX(bsize, BLOCK_8X8));
4505 0 : vp9_tokenize_sb(cpi, td, t, !output_enabled, seg_skip,
4506 : VPXMAX(bsize, BLOCK_8X8));
4507 : }
4508 :
4509 0 : if (seg_skip) {
4510 0 : assert(mi->skip);
4511 : }
4512 :
4513 0 : if (output_enabled) {
4514 0 : if (cm->tx_mode == TX_MODE_SELECT && mi->sb_type >= BLOCK_8X8 &&
4515 0 : !(is_inter_block(mi) && mi->skip)) {
4516 0 : ++get_tx_counts(max_txsize_lookup[bsize], get_tx_size_context(xd),
4517 0 : &td->counts->tx)[mi->tx_size];
4518 : } else {
4519 : // The new intra coding scheme requires no change of transform size
4520 0 : if (is_inter_block(mi)) {
4521 0 : mi->tx_size = VPXMIN(tx_mode_to_biggest_tx_size[cm->tx_mode],
4522 : max_txsize_lookup[bsize]);
4523 : } else {
4524 0 : mi->tx_size = (bsize >= BLOCK_8X8) ? mi->tx_size : TX_4X4;
4525 : }
4526 : }
4527 :
4528 0 : ++td->counts->tx.tx_totals[mi->tx_size];
4529 0 : ++td->counts->tx.tx_totals[get_uv_tx_size(mi, &xd->plane[1])];
4530 0 : if (cm->seg.enabled && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
4531 0 : vp9_cyclic_refresh_update_sb_postencode(cpi, mi, mi_row, mi_col, bsize);
4532 0 : if (cpi->oxcf.pass == 0 && cpi->svc.temporal_layer_id == 0)
4533 0 : update_zeromv_cnt(cpi, mi, mi_row, mi_col, bsize);
4534 : }
4535 : }
|