Line data Source code
1 : /*
2 : * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 : *
4 : * This source code is subject to the terms of the BSD 2 Clause License and
5 : * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 : * was not distributed with this source code in the LICENSE file, you can
7 : * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 : * Media Patent License 1.0 was not distributed with this source code in the
9 : * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 : */
11 :
12 : #include <assert.h>
13 : #include <math.h>
14 :
15 : #include "./aom_dsp_rtcd.h"
16 : #include "./av1_rtcd.h"
17 :
18 : #include "aom_dsp/aom_dsp_common.h"
19 : #include "aom_dsp/blend.h"
20 : #include "aom_mem/aom_mem.h"
21 : #include "aom_ports/mem.h"
22 : #include "aom_ports/system_state.h"
23 :
24 : #include "av1/common/common.h"
25 : #include "av1/common/common_data.h"
26 : #include "av1/common/entropy.h"
27 : #include "av1/common/entropymode.h"
28 : #include "av1/common/idct.h"
29 : #include "av1/common/mvref_common.h"
30 : #include "av1/common/pred_common.h"
31 : #include "av1/common/quant_common.h"
32 : #include "av1/common/reconinter.h"
33 : #include "av1/common/reconintra.h"
34 : #include "av1/common/scan.h"
35 : #include "av1/common/seg_common.h"
36 : #if CONFIG_LV_MAP
37 : #include "av1/common/txb_common.h"
38 : #endif
39 : #if CONFIG_WARPED_MOTION
40 : #include "av1/common/warped_motion.h"
41 : #endif // CONFIG_WARPED_MOTION
42 :
43 : #include "av1/encoder/aq_variance.h"
44 : #include "av1/encoder/av1_quantize.h"
45 : #include "av1/encoder/cost.h"
46 : #include "av1/encoder/encodemb.h"
47 : #include "av1/encoder/encodemv.h"
48 : #include "av1/encoder/encoder.h"
49 : #if CONFIG_LV_MAP
50 : #include "av1/encoder/encodetxb.h"
51 : #endif
52 : #include "av1/encoder/hybrid_fwd_txfm.h"
53 : #include "av1/encoder/mcomp.h"
54 : #if CONFIG_PALETTE
55 : #include "av1/encoder/palette.h"
56 : #endif // CONFIG_PALETTE
57 : #include "av1/encoder/ratectrl.h"
58 : #include "av1/encoder/rd.h"
59 : #include "av1/encoder/rdopt.h"
60 : #include "av1/encoder/tokenize.h"
61 : #if CONFIG_PVQ
62 : #include "av1/encoder/pvq_encoder.h"
63 : #endif // CONFIG_PVQ
64 : #if CONFIG_PVQ || CONFIG_DAALA_DIST
65 : #include "av1/common/pvq.h"
66 : #endif // CONFIG_PVQ || CONFIG_DAALA_DIST
67 : #if CONFIG_DUAL_FILTER
68 : #define DUAL_FILTER_SET_SIZE (SWITCHABLE_FILTERS * SWITCHABLE_FILTERS)
69 : #if USE_EXTRA_FILTER
70 : static const int filter_sets[DUAL_FILTER_SET_SIZE][2] = {
71 : { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 }, { 1, 0 }, { 1, 1 },
72 : { 1, 2 }, { 1, 3 }, { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 },
73 : { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 },
74 : };
75 : #else // USE_EXTRA_FILTER
76 : static const int filter_sets[DUAL_FILTER_SET_SIZE][2] = {
77 : { 0, 0 }, { 0, 1 }, { 0, 2 }, { 1, 0 }, { 1, 1 },
78 : { 1, 2 }, { 2, 0 }, { 2, 1 }, { 2, 2 },
79 : };
80 : #endif // USE_EXTRA_FILTER
81 : #endif // CONFIG_DUAL_FILTER
82 :
83 : #if CONFIG_EXT_REFS
84 :
85 : #define LAST_FRAME_MODE_MASK \
86 : ((1 << INTRA_FRAME) | (1 << LAST2_FRAME) | (1 << LAST3_FRAME) | \
87 : (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
88 : #define LAST2_FRAME_MODE_MASK \
89 : ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST3_FRAME) | \
90 : (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
91 : #define LAST3_FRAME_MODE_MASK \
92 : ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
93 : (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
94 : #define GOLDEN_FRAME_MODE_MASK \
95 : ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
96 : (1 << LAST3_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
97 : #define BWDREF_FRAME_MODE_MASK \
98 : ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
99 : (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME))
100 : #define ALTREF_FRAME_MODE_MASK \
101 : ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
102 : (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME))
103 :
104 : #else
105 :
106 : #define LAST_FRAME_MODE_MASK \
107 : ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
108 : #define GOLDEN_FRAME_MODE_MASK \
109 : ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
110 : #define ALTREF_FRAME_MODE_MASK \
111 : ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | (1 << INTRA_FRAME))
112 :
113 : #endif // CONFIG_EXT_REFS
114 :
115 : #if CONFIG_EXT_REFS
116 : #define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | (1 << BWDREF_FRAME) | 0x01)
117 : #else
118 : #define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | 0x01)
119 : #endif // CONFIG_EXT_REFS
120 :
121 : #define MIN_EARLY_TERM_INDEX 3
122 : #define NEW_MV_DISCOUNT_FACTOR 8
123 :
124 : #if CONFIG_EXT_INTRA
125 : #define ANGLE_SKIP_THRESH 10
126 : #define FILTER_FAST_SEARCH 1
127 : #endif // CONFIG_EXT_INTRA
128 :
129 : const double ADST_FLIP_SVM[8] = { -6.6623, -2.8062, -3.2531, 3.1671, // vert
130 : -7.7051, -3.2234, -3.6193, 3.4533 }; // horz
131 :
132 : typedef struct {
133 : PREDICTION_MODE mode;
134 : MV_REFERENCE_FRAME ref_frame[2];
135 : } MODE_DEFINITION;
136 :
137 : typedef struct { MV_REFERENCE_FRAME ref_frame[2]; } REF_DEFINITION;
138 :
139 : struct rdcost_block_args {
140 : const AV1_COMP *cpi;
141 : MACROBLOCK *x;
142 : ENTROPY_CONTEXT t_above[2 * MAX_MIB_SIZE];
143 : ENTROPY_CONTEXT t_left[2 * MAX_MIB_SIZE];
144 : RD_STATS rd_stats;
145 : int64_t this_rd;
146 : int64_t best_rd;
147 : int exit_early;
148 : int use_fast_coef_costing;
149 : };
150 :
151 : #define LAST_NEW_MV_INDEX 6
152 : static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
153 : { NEARESTMV, { LAST_FRAME, NONE_FRAME } },
154 : #if CONFIG_EXT_REFS
155 : { NEARESTMV, { LAST2_FRAME, NONE_FRAME } },
156 : { NEARESTMV, { LAST3_FRAME, NONE_FRAME } },
157 : { NEARESTMV, { BWDREF_FRAME, NONE_FRAME } },
158 : #endif // CONFIG_EXT_REFS
159 : { NEARESTMV, { ALTREF_FRAME, NONE_FRAME } },
160 : { NEARESTMV, { GOLDEN_FRAME, NONE_FRAME } },
161 :
162 : { DC_PRED, { INTRA_FRAME, NONE_FRAME } },
163 :
164 : { NEWMV, { LAST_FRAME, NONE_FRAME } },
165 : #if CONFIG_EXT_REFS
166 : { NEWMV, { LAST2_FRAME, NONE_FRAME } },
167 : { NEWMV, { LAST3_FRAME, NONE_FRAME } },
168 : { NEWMV, { BWDREF_FRAME, NONE_FRAME } },
169 : #endif // CONFIG_EXT_REFS
170 : { NEWMV, { ALTREF_FRAME, NONE_FRAME } },
171 : { NEWMV, { GOLDEN_FRAME, NONE_FRAME } },
172 :
173 : { NEARMV, { LAST_FRAME, NONE_FRAME } },
174 : #if CONFIG_EXT_REFS
175 : { NEARMV, { LAST2_FRAME, NONE_FRAME } },
176 : { NEARMV, { LAST3_FRAME, NONE_FRAME } },
177 : { NEARMV, { BWDREF_FRAME, NONE_FRAME } },
178 : #endif // CONFIG_EXT_REFS
179 : { NEARMV, { ALTREF_FRAME, NONE_FRAME } },
180 : { NEARMV, { GOLDEN_FRAME, NONE_FRAME } },
181 :
182 : { ZEROMV, { LAST_FRAME, NONE_FRAME } },
183 : #if CONFIG_EXT_REFS
184 : { ZEROMV, { LAST2_FRAME, NONE_FRAME } },
185 : { ZEROMV, { LAST3_FRAME, NONE_FRAME } },
186 : { ZEROMV, { BWDREF_FRAME, NONE_FRAME } },
187 : #endif // CONFIG_EXT_REFS
188 : { ZEROMV, { GOLDEN_FRAME, NONE_FRAME } },
189 : { ZEROMV, { ALTREF_FRAME, NONE_FRAME } },
190 :
191 : // TODO(zoeliu): May need to reconsider the order on the modes to check
192 :
193 : #if CONFIG_EXT_INTER
194 : { NEAREST_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
195 : #if CONFIG_EXT_REFS
196 : { NEAREST_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
197 : { NEAREST_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
198 : #endif // CONFIG_EXT_REFS
199 : { NEAREST_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
200 : #if CONFIG_EXT_REFS
201 : { NEAREST_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
202 : { NEAREST_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
203 : { NEAREST_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
204 : { NEAREST_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
205 : #endif // CONFIG_EXT_REFS
206 :
207 : #else // CONFIG_EXT_INTER
208 :
209 : { NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
210 : #if CONFIG_EXT_REFS
211 : { NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
212 : { NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
213 : #endif // CONFIG_EXT_REFS
214 : { NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
215 : #if CONFIG_EXT_REFS
216 : { NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
217 : { NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
218 : { NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
219 : { NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
220 : #endif // CONFIG_EXT_REFS
221 : #endif // CONFIG_EXT_INTER
222 :
223 : { TM_PRED, { INTRA_FRAME, NONE_FRAME } },
224 :
225 : #if CONFIG_ALT_INTRA
226 : { SMOOTH_PRED, { INTRA_FRAME, NONE_FRAME } },
227 : #if CONFIG_SMOOTH_HV
228 : { SMOOTH_V_PRED, { INTRA_FRAME, NONE_FRAME } },
229 : { SMOOTH_H_PRED, { INTRA_FRAME, NONE_FRAME } },
230 : #endif // CONFIG_SMOOTH_HV
231 : #endif // CONFIG_ALT_INTRA
232 :
233 : #if CONFIG_EXT_INTER
234 : { NEAR_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
235 : { NEW_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
236 : { NEAREST_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
237 : { NEW_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
238 : { NEAR_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
239 : { NEW_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
240 : { ZERO_ZEROMV, { LAST_FRAME, ALTREF_FRAME } },
241 :
242 : #if CONFIG_EXT_REFS
243 : { NEAR_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
244 : { NEW_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
245 : { NEAREST_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
246 : { NEW_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
247 : { NEAR_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
248 : { NEW_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
249 : { ZERO_ZEROMV, { LAST2_FRAME, ALTREF_FRAME } },
250 :
251 : { NEAR_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
252 : { NEW_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
253 : { NEAREST_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
254 : { NEW_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
255 : { NEAR_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
256 : { NEW_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
257 : { ZERO_ZEROMV, { LAST3_FRAME, ALTREF_FRAME } },
258 : #endif // CONFIG_EXT_REFS
259 :
260 : { NEAR_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
261 : { NEW_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
262 : { NEAREST_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
263 : { NEW_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
264 : { NEAR_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
265 : { NEW_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
266 : { ZERO_ZEROMV, { GOLDEN_FRAME, ALTREF_FRAME } },
267 :
268 : #if CONFIG_EXT_REFS
269 : { NEAR_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
270 : { NEW_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
271 : { NEAREST_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
272 : { NEW_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
273 : { NEAR_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
274 : { NEW_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
275 : { ZERO_ZEROMV, { LAST_FRAME, BWDREF_FRAME } },
276 :
277 : { NEAR_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
278 : { NEW_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
279 : { NEAREST_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
280 : { NEW_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
281 : { NEAR_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
282 : { NEW_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
283 : { ZERO_ZEROMV, { LAST2_FRAME, BWDREF_FRAME } },
284 :
285 : { NEAR_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
286 : { NEW_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
287 : { NEAREST_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
288 : { NEW_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
289 : { NEAR_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
290 : { NEW_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
291 : { ZERO_ZEROMV, { LAST3_FRAME, BWDREF_FRAME } },
292 :
293 : { NEAR_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
294 : { NEW_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
295 : { NEAREST_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
296 : { NEW_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
297 : { NEAR_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
298 : { NEW_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
299 : { ZERO_ZEROMV, { GOLDEN_FRAME, BWDREF_FRAME } },
300 : #endif // CONFIG_EXT_REFS
301 :
302 : #else // CONFIG_EXT_INTER
303 :
304 : { NEARMV, { LAST_FRAME, ALTREF_FRAME } },
305 : { NEWMV, { LAST_FRAME, ALTREF_FRAME } },
306 : #if CONFIG_EXT_REFS
307 : { NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
308 : { NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
309 : { NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
310 : { NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
311 : #endif // CONFIG_EXT_REFS
312 : { NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
313 : { NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
314 :
315 : #if CONFIG_EXT_REFS
316 : { NEARMV, { LAST_FRAME, BWDREF_FRAME } },
317 : { NEWMV, { LAST_FRAME, BWDREF_FRAME } },
318 : { NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
319 : { NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
320 : { NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
321 : { NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
322 : { NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
323 : { NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
324 : #endif // CONFIG_EXT_REFS
325 :
326 : { ZEROMV, { LAST_FRAME, ALTREF_FRAME } },
327 : #if CONFIG_EXT_REFS
328 : { ZEROMV, { LAST2_FRAME, ALTREF_FRAME } },
329 : { ZEROMV, { LAST3_FRAME, ALTREF_FRAME } },
330 : #endif // CONFIG_EXT_REFS
331 : { ZEROMV, { GOLDEN_FRAME, ALTREF_FRAME } },
332 :
333 : #if CONFIG_EXT_REFS
334 : { ZEROMV, { LAST_FRAME, BWDREF_FRAME } },
335 : { ZEROMV, { LAST2_FRAME, BWDREF_FRAME } },
336 : { ZEROMV, { LAST3_FRAME, BWDREF_FRAME } },
337 : { ZEROMV, { GOLDEN_FRAME, BWDREF_FRAME } },
338 : #endif // CONFIG_EXT_REFS
339 :
340 : #endif // CONFIG_EXT_INTER
341 :
342 : { H_PRED, { INTRA_FRAME, NONE_FRAME } },
343 : { V_PRED, { INTRA_FRAME, NONE_FRAME } },
344 : { D135_PRED, { INTRA_FRAME, NONE_FRAME } },
345 : { D207_PRED, { INTRA_FRAME, NONE_FRAME } },
346 : { D153_PRED, { INTRA_FRAME, NONE_FRAME } },
347 : { D63_PRED, { INTRA_FRAME, NONE_FRAME } },
348 : { D117_PRED, { INTRA_FRAME, NONE_FRAME } },
349 : { D45_PRED, { INTRA_FRAME, NONE_FRAME } },
350 :
351 : #if CONFIG_EXT_INTER
352 : { ZEROMV, { LAST_FRAME, INTRA_FRAME } },
353 : { NEARESTMV, { LAST_FRAME, INTRA_FRAME } },
354 : { NEARMV, { LAST_FRAME, INTRA_FRAME } },
355 : { NEWMV, { LAST_FRAME, INTRA_FRAME } },
356 :
357 : #if CONFIG_EXT_REFS
358 : { ZEROMV, { LAST2_FRAME, INTRA_FRAME } },
359 : { NEARESTMV, { LAST2_FRAME, INTRA_FRAME } },
360 : { NEARMV, { LAST2_FRAME, INTRA_FRAME } },
361 : { NEWMV, { LAST2_FRAME, INTRA_FRAME } },
362 :
363 : { ZEROMV, { LAST3_FRAME, INTRA_FRAME } },
364 : { NEARESTMV, { LAST3_FRAME, INTRA_FRAME } },
365 : { NEARMV, { LAST3_FRAME, INTRA_FRAME } },
366 : { NEWMV, { LAST3_FRAME, INTRA_FRAME } },
367 : #endif // CONFIG_EXT_REFS
368 :
369 : { ZEROMV, { GOLDEN_FRAME, INTRA_FRAME } },
370 : { NEARESTMV, { GOLDEN_FRAME, INTRA_FRAME } },
371 : { NEARMV, { GOLDEN_FRAME, INTRA_FRAME } },
372 : { NEWMV, { GOLDEN_FRAME, INTRA_FRAME } },
373 :
374 : #if CONFIG_EXT_REFS
375 : { ZEROMV, { BWDREF_FRAME, INTRA_FRAME } },
376 : { NEARESTMV, { BWDREF_FRAME, INTRA_FRAME } },
377 : { NEARMV, { BWDREF_FRAME, INTRA_FRAME } },
378 : { NEWMV, { BWDREF_FRAME, INTRA_FRAME } },
379 : #endif // CONFIG_EXT_REFS
380 :
381 : { ZEROMV, { ALTREF_FRAME, INTRA_FRAME } },
382 : { NEARESTMV, { ALTREF_FRAME, INTRA_FRAME } },
383 : { NEARMV, { ALTREF_FRAME, INTRA_FRAME } },
384 : { NEWMV, { ALTREF_FRAME, INTRA_FRAME } },
385 : #endif // CONFIG_EXT_INTER
386 : };
387 :
388 : #if CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE
389 0 : static INLINE int write_uniform_cost(int n, int v) {
390 0 : const int l = get_unsigned_bits(n);
391 0 : const int m = (1 << l) - n;
392 0 : if (l == 0) return 0;
393 0 : if (v < m)
394 0 : return (l - 1) * av1_cost_bit(128, 0);
395 : else
396 0 : return l * av1_cost_bit(128, 0);
397 : }
398 : #endif // CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE
399 :
400 : // constants for prune 1 and prune 2 decision boundaries
401 : #define FAST_EXT_TX_CORR_MID 0.0
402 : #define FAST_EXT_TX_EDST_MID 0.1
403 : #define FAST_EXT_TX_CORR_MARGIN 0.5
404 : #define FAST_EXT_TX_EDST_MARGIN 0.3
405 :
406 : #if CONFIG_DAALA_DIST
407 : static int od_compute_var_4x4(od_coeff *x, int stride) {
408 : int sum;
409 : int s2;
410 : int i;
411 : sum = 0;
412 : s2 = 0;
413 : for (i = 0; i < 4; i++) {
414 : int j;
415 : for (j = 0; j < 4; j++) {
416 : int t;
417 :
418 : t = x[i * stride + j];
419 : sum += t;
420 : s2 += t * t;
421 : }
422 : }
423 : // TODO(yushin) : Check wheter any changes are required for high bit depth.
424 : return (s2 - (sum * sum >> 4)) >> 4;
425 : }
426 :
427 : /* OD_DIST_LP_MID controls the frequency weighting filter used for computing
428 : the distortion. For a value X, the filter is [1 X 1]/(X + 2) and
429 : is applied both horizontally and vertically. For X=5, the filter is
430 : a good approximation for the OD_QM8_Q4_HVS quantization matrix. */
431 : #define OD_DIST_LP_MID (5)
432 : #define OD_DIST_LP_NORM (OD_DIST_LP_MID + 2)
433 :
434 : static double od_compute_dist_8x8(int qm, int use_activity_masking, od_coeff *x,
435 : od_coeff *y, od_coeff *e_lp, int stride) {
436 : double sum;
437 : int min_var;
438 : double mean_var;
439 : double var_stat;
440 : double activity;
441 : double calibration;
442 : int i;
443 : int j;
444 : double vardist;
445 :
446 : vardist = 0;
447 : OD_ASSERT(qm != OD_FLAT_QM);
448 : (void)qm;
449 : #if 1
450 : min_var = INT_MAX;
451 : mean_var = 0;
452 : for (i = 0; i < 3; i++) {
453 : for (j = 0; j < 3; j++) {
454 : int varx;
455 : int vary;
456 : varx = od_compute_var_4x4(x + 2 * i * stride + 2 * j, stride);
457 : vary = od_compute_var_4x4(y + 2 * i * stride + 2 * j, stride);
458 : min_var = OD_MINI(min_var, varx);
459 : mean_var += 1. / (1 + varx);
460 : /* The cast to (double) is to avoid an overflow before the sqrt.*/
461 : vardist += varx - 2 * sqrt(varx * (double)vary) + vary;
462 : }
463 : }
464 : /* We use a different variance statistic depending on whether activity
465 : masking is used, since the harmonic mean appeared slghtly worse with
466 : masking off. The calibration constant just ensures that we preserve the
467 : rate compared to activity=1. */
468 : if (use_activity_masking) {
469 : calibration = 1.95;
470 : var_stat = 9. / mean_var;
471 : } else {
472 : calibration = 1.62;
473 : var_stat = min_var;
474 : }
475 : /* 1.62 is a calibration constant, 0.25 is a noise floor and 1/6 is the
476 : activity masking constant. */
477 : activity = calibration * pow(.25 + var_stat, -1. / 6);
478 : #else
479 : activity = 1;
480 : #endif // 1
481 : sum = 0;
482 : for (i = 0; i < 8; i++) {
483 : for (j = 0; j < 8; j++)
484 : sum += e_lp[i * stride + j] * (double)e_lp[i * stride + j];
485 : }
486 : /* Normalize the filter to unit DC response. */
487 : sum *= 1. / (OD_DIST_LP_NORM * OD_DIST_LP_NORM * OD_DIST_LP_NORM *
488 : OD_DIST_LP_NORM);
489 : return activity * activity * (sum + vardist);
490 : }
491 :
492 : // Note : Inputs x and y are in a pixel domain
493 : static double od_compute_dist(int qm, int activity_masking, od_coeff *x,
494 : od_coeff *y, int bsize_w, int bsize_h,
495 : int qindex) {
496 : int i;
497 : double sum;
498 : sum = 0;
499 :
500 : assert(bsize_w >= 8 && bsize_h >= 8);
501 :
502 : if (qm == OD_FLAT_QM) {
503 : for (i = 0; i < bsize_w * bsize_h; i++) {
504 : double tmp;
505 : tmp = x[i] - y[i];
506 : sum += tmp * tmp;
507 : }
508 : } else {
509 : int j;
510 : DECLARE_ALIGNED(16, od_coeff, e[MAX_TX_SQUARE]);
511 : DECLARE_ALIGNED(16, od_coeff, tmp[MAX_TX_SQUARE]);
512 : DECLARE_ALIGNED(16, od_coeff, e_lp[MAX_TX_SQUARE]);
513 : int mid = OD_DIST_LP_MID;
514 : for (i = 0; i < bsize_h; i++) {
515 : for (j = 0; j < bsize_w; j++) {
516 : e[i * bsize_w + j] = x[i * bsize_w + j] - y[i * bsize_w + j];
517 : }
518 : }
519 : for (i = 0; i < bsize_h; i++) {
520 : tmp[i * bsize_w] = mid * e[i * bsize_w] + 2 * e[i * bsize_w + 1];
521 : tmp[i * bsize_w + bsize_w - 1] =
522 : mid * e[i * bsize_w + bsize_w - 1] + 2 * e[i * bsize_w + bsize_w - 2];
523 : for (j = 1; j < bsize_w - 1; j++) {
524 : tmp[i * bsize_w + j] = mid * e[i * bsize_w + j] +
525 : e[i * bsize_w + j - 1] + e[i * bsize_w + j + 1];
526 : }
527 : }
528 : for (j = 0; j < bsize_w; j++) {
529 : e_lp[j] = mid * tmp[j] + 2 * tmp[bsize_w + j];
530 : e_lp[(bsize_h - 1) * bsize_w + j] =
531 : mid * tmp[(bsize_h - 1) * bsize_w + j] +
532 : 2 * tmp[(bsize_h - 2) * bsize_w + j];
533 : }
534 : for (i = 1; i < bsize_h - 1; i++) {
535 : for (j = 0; j < bsize_w; j++) {
536 : e_lp[i * bsize_w + j] = mid * tmp[i * bsize_w + j] +
537 : tmp[(i - 1) * bsize_w + j] +
538 : tmp[(i + 1) * bsize_w + j];
539 : }
540 : }
541 : for (i = 0; i < bsize_h; i += 8) {
542 : for (j = 0; j < bsize_w; j += 8) {
543 : sum += od_compute_dist_8x8(qm, activity_masking, &x[i * bsize_w + j],
544 : &y[i * bsize_w + j], &e_lp[i * bsize_w + j],
545 : bsize_w);
546 : }
547 : }
548 : /* Scale according to linear regression against SSE, for 8x8 blocks. */
549 : if (activity_masking) {
550 : sum *= 2.2 + (1.7 - 2.2) * (qindex - 99) / (210 - 99) +
551 : (qindex < 99 ? 2.5 * (qindex - 99) / 99 * (qindex - 99) / 99 : 0);
552 : } else {
553 : sum *= qindex >= 128
554 : ? 1.4 + (0.9 - 1.4) * (qindex - 128) / (209 - 128)
555 : : qindex <= 43
556 : ? 1.5 + (2.0 - 1.5) * (qindex - 43) / (16 - 43)
557 : : 1.5 + (1.4 - 1.5) * (qindex - 43) / (128 - 43);
558 : }
559 : }
560 : return sum;
561 : }
562 :
563 : int64_t av1_daala_dist(const uint8_t *src, int src_stride, const uint8_t *dst,
564 : int dst_stride, int bsw, int bsh, int qm,
565 : int use_activity_masking, int qindex) {
566 : int i, j;
567 : int64_t d;
568 : DECLARE_ALIGNED(16, od_coeff, orig[MAX_TX_SQUARE]);
569 : DECLARE_ALIGNED(16, od_coeff, rec[MAX_TX_SQUARE]);
570 :
571 : assert(qm == OD_HVS_QM);
572 :
573 : for (j = 0; j < bsh; j++)
574 : for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i];
575 :
576 : for (j = 0; j < bsh; j++)
577 : for (i = 0; i < bsw; i++) rec[j * bsw + i] = dst[j * dst_stride + i];
578 :
579 : d = (int64_t)od_compute_dist(qm, use_activity_masking, orig, rec, bsw, bsh,
580 : qindex);
581 : return d;
582 : }
583 : #endif // CONFIG_DAALA_DIST
584 :
585 0 : static void get_energy_distribution_fine(const AV1_COMP *cpi, BLOCK_SIZE bsize,
586 : const uint8_t *src, int src_stride,
587 : const uint8_t *dst, int dst_stride,
588 : double *hordist, double *verdist) {
589 0 : const int bw = block_size_wide[bsize];
590 0 : const int bh = block_size_high[bsize];
591 0 : unsigned int esq[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
592 :
593 0 : const int f_index = bsize - BLOCK_16X16;
594 0 : if (f_index < 0) {
595 0 : const int w_shift = bw == 8 ? 1 : 2;
596 0 : const int h_shift = bh == 8 ? 1 : 2;
597 : #if CONFIG_HIGHBITDEPTH
598 0 : if (cpi->common.use_highbitdepth) {
599 0 : const uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
600 0 : const uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
601 0 : for (int i = 0; i < bh; ++i)
602 0 : for (int j = 0; j < bw; ++j) {
603 0 : const int index = (j >> w_shift) + ((i >> h_shift) << 2);
604 0 : esq[index] +=
605 0 : (src16[j + i * src_stride] - dst16[j + i * dst_stride]) *
606 0 : (src16[j + i * src_stride] - dst16[j + i * dst_stride]);
607 : }
608 : } else {
609 : #endif // CONFIG_HIGHBITDEPTH
610 :
611 0 : for (int i = 0; i < bh; ++i)
612 0 : for (int j = 0; j < bw; ++j) {
613 0 : const int index = (j >> w_shift) + ((i >> h_shift) << 2);
614 0 : esq[index] += (src[j + i * src_stride] - dst[j + i * dst_stride]) *
615 0 : (src[j + i * src_stride] - dst[j + i * dst_stride]);
616 : }
617 : #if CONFIG_HIGHBITDEPTH
618 : }
619 : #endif // CONFIG_HIGHBITDEPTH
620 : } else {
621 0 : cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[0]);
622 0 : cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
623 : &esq[1]);
624 0 : cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
625 : &esq[2]);
626 0 : cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
627 : dst_stride, &esq[3]);
628 0 : src += bh / 4 * src_stride;
629 0 : dst += bh / 4 * dst_stride;
630 :
631 0 : cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[4]);
632 0 : cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
633 : &esq[5]);
634 0 : cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
635 : &esq[6]);
636 0 : cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
637 : dst_stride, &esq[7]);
638 0 : src += bh / 4 * src_stride;
639 0 : dst += bh / 4 * dst_stride;
640 :
641 0 : cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[8]);
642 0 : cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
643 : &esq[9]);
644 0 : cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
645 : &esq[10]);
646 0 : cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
647 : dst_stride, &esq[11]);
648 0 : src += bh / 4 * src_stride;
649 0 : dst += bh / 4 * dst_stride;
650 :
651 0 : cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[12]);
652 0 : cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
653 : &esq[13]);
654 0 : cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
655 : &esq[14]);
656 0 : cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
657 : dst_stride, &esq[15]);
658 : }
659 :
660 0 : double total = (double)esq[0] + esq[1] + esq[2] + esq[3] + esq[4] + esq[5] +
661 0 : esq[6] + esq[7] + esq[8] + esq[9] + esq[10] + esq[11] +
662 0 : esq[12] + esq[13] + esq[14] + esq[15];
663 0 : if (total > 0) {
664 0 : const double e_recip = 1.0 / total;
665 0 : hordist[0] = ((double)esq[0] + esq[4] + esq[8] + esq[12]) * e_recip;
666 0 : hordist[1] = ((double)esq[1] + esq[5] + esq[9] + esq[13]) * e_recip;
667 0 : hordist[2] = ((double)esq[2] + esq[6] + esq[10] + esq[14]) * e_recip;
668 0 : verdist[0] = ((double)esq[0] + esq[1] + esq[2] + esq[3]) * e_recip;
669 0 : verdist[1] = ((double)esq[4] + esq[5] + esq[6] + esq[7]) * e_recip;
670 0 : verdist[2] = ((double)esq[8] + esq[9] + esq[10] + esq[11]) * e_recip;
671 : } else {
672 0 : hordist[0] = verdist[0] = 0.25;
673 0 : hordist[1] = verdist[1] = 0.25;
674 0 : hordist[2] = verdist[2] = 0.25;
675 : }
676 0 : }
677 :
678 0 : static int adst_vs_flipadst(const AV1_COMP *cpi, BLOCK_SIZE bsize,
679 : const uint8_t *src, int src_stride,
680 : const uint8_t *dst, int dst_stride) {
681 0 : int prune_bitmask = 0;
682 0 : double svm_proj_h = 0, svm_proj_v = 0;
683 0 : double hdist[3] = { 0, 0, 0 }, vdist[3] = { 0, 0, 0 };
684 0 : get_energy_distribution_fine(cpi, bsize, src, src_stride, dst, dst_stride,
685 : hdist, vdist);
686 :
687 0 : svm_proj_v = vdist[0] * ADST_FLIP_SVM[0] + vdist[1] * ADST_FLIP_SVM[1] +
688 0 : vdist[2] * ADST_FLIP_SVM[2] + ADST_FLIP_SVM[3];
689 0 : svm_proj_h = hdist[0] * ADST_FLIP_SVM[4] + hdist[1] * ADST_FLIP_SVM[5] +
690 0 : hdist[2] * ADST_FLIP_SVM[6] + ADST_FLIP_SVM[7];
691 0 : if (svm_proj_v > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN)
692 0 : prune_bitmask |= 1 << FLIPADST_1D;
693 0 : else if (svm_proj_v < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN)
694 0 : prune_bitmask |= 1 << ADST_1D;
695 :
696 0 : if (svm_proj_h > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN)
697 0 : prune_bitmask |= 1 << (FLIPADST_1D + 8);
698 0 : else if (svm_proj_h < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN)
699 0 : prune_bitmask |= 1 << (ADST_1D + 8);
700 :
701 0 : return prune_bitmask;
702 : }
703 :
704 : #if CONFIG_EXT_TX
705 0 : static void get_horver_correlation(const int16_t *diff, int stride, int w,
706 : int h, double *hcorr, double *vcorr) {
707 : // Returns hor/ver correlation coefficient
708 0 : const int num = (h - 1) * (w - 1);
709 : double num_r;
710 : int i, j;
711 0 : int64_t xy_sum = 0, xz_sum = 0;
712 0 : int64_t x_sum = 0, y_sum = 0, z_sum = 0;
713 0 : int64_t x2_sum = 0, y2_sum = 0, z2_sum = 0;
714 : double x_var_n, y_var_n, z_var_n, xy_var_n, xz_var_n;
715 0 : *hcorr = *vcorr = 1;
716 :
717 0 : assert(num > 0);
718 0 : num_r = 1.0 / num;
719 0 : for (i = 1; i < h; ++i) {
720 0 : for (j = 1; j < w; ++j) {
721 0 : const int16_t x = diff[i * stride + j];
722 0 : const int16_t y = diff[i * stride + j - 1];
723 0 : const int16_t z = diff[(i - 1) * stride + j];
724 0 : xy_sum += x * y;
725 0 : xz_sum += x * z;
726 0 : x_sum += x;
727 0 : y_sum += y;
728 0 : z_sum += z;
729 0 : x2_sum += x * x;
730 0 : y2_sum += y * y;
731 0 : z2_sum += z * z;
732 : }
733 : }
734 0 : x_var_n = x2_sum - (x_sum * x_sum) * num_r;
735 0 : y_var_n = y2_sum - (y_sum * y_sum) * num_r;
736 0 : z_var_n = z2_sum - (z_sum * z_sum) * num_r;
737 0 : xy_var_n = xy_sum - (x_sum * y_sum) * num_r;
738 0 : xz_var_n = xz_sum - (x_sum * z_sum) * num_r;
739 0 : if (x_var_n > 0 && y_var_n > 0) {
740 0 : *hcorr = xy_var_n / sqrt(x_var_n * y_var_n);
741 0 : *hcorr = *hcorr < 0 ? 0 : *hcorr;
742 : }
743 0 : if (x_var_n > 0 && z_var_n > 0) {
744 0 : *vcorr = xz_var_n / sqrt(x_var_n * z_var_n);
745 0 : *vcorr = *vcorr < 0 ? 0 : *vcorr;
746 : }
747 0 : }
748 :
749 0 : int dct_vs_idtx(const int16_t *diff, int stride, int w, int h) {
750 : double hcorr, vcorr;
751 0 : int prune_bitmask = 0;
752 0 : get_horver_correlation(diff, stride, w, h, &hcorr, &vcorr);
753 :
754 0 : if (vcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN)
755 0 : prune_bitmask |= 1 << IDTX_1D;
756 0 : else if (vcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN)
757 0 : prune_bitmask |= 1 << DCT_1D;
758 :
759 0 : if (hcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN)
760 0 : prune_bitmask |= 1 << (IDTX_1D + 8);
761 0 : else if (hcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN)
762 0 : prune_bitmask |= 1 << (DCT_1D + 8);
763 0 : return prune_bitmask;
764 : }
765 :
766 : // Performance drop: 0.5%, Speed improvement: 24%
767 0 : static int prune_two_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize,
768 : MACROBLOCK *x, const MACROBLOCKD *xd,
769 : int adst_flipadst, int dct_idtx) {
770 0 : int prune = 0;
771 :
772 0 : if (adst_flipadst) {
773 0 : const struct macroblock_plane *const p = &x->plane[0];
774 0 : const struct macroblockd_plane *const pd = &xd->plane[0];
775 0 : prune |= adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride,
776 0 : pd->dst.buf, pd->dst.stride);
777 : }
778 0 : if (dct_idtx) {
779 0 : av1_subtract_plane(x, bsize, 0);
780 0 : const struct macroblock_plane *const p = &x->plane[0];
781 0 : const int bw = 4 << (b_width_log2_lookup[bsize]);
782 0 : const int bh = 4 << (b_height_log2_lookup[bsize]);
783 0 : prune |= dct_vs_idtx(p->src_diff, bw, bw, bh);
784 : }
785 :
786 0 : return prune;
787 : }
788 : #endif // CONFIG_EXT_TX
789 :
790 : // Performance drop: 0.3%, Speed improvement: 5%
791 0 : static int prune_one_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize,
792 : const MACROBLOCK *x, const MACROBLOCKD *xd) {
793 0 : const struct macroblock_plane *const p = &x->plane[0];
794 0 : const struct macroblockd_plane *const pd = &xd->plane[0];
795 0 : return adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride, pd->dst.buf,
796 : pd->dst.stride);
797 : }
798 :
799 0 : static int prune_tx_types(const AV1_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x,
800 : const MACROBLOCKD *const xd, int tx_set) {
801 : #if CONFIG_EXT_TX
802 0 : const int *tx_set_1D = tx_set >= 0 ? ext_tx_used_inter_1D[tx_set] : NULL;
803 : #else
804 : const int tx_set_1D[TX_TYPES_1D] = { 0 };
805 : #endif // CONFIG_EXT_TX
806 :
807 0 : switch (cpi->sf.tx_type_search.prune_mode) {
808 0 : case NO_PRUNE: return 0; break;
809 : case PRUNE_ONE:
810 0 : if ((tx_set >= 0) && !(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D]))
811 0 : return 0;
812 0 : return prune_one_for_sby(cpi, bsize, x, xd);
813 : break;
814 : #if CONFIG_EXT_TX
815 : case PRUNE_TWO:
816 0 : if ((tx_set >= 0) && !(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D])) {
817 0 : if (!(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D])) return 0;
818 0 : return prune_two_for_sby(cpi, bsize, x, xd, 0, 1);
819 : }
820 0 : if ((tx_set >= 0) && !(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D]))
821 0 : return prune_two_for_sby(cpi, bsize, x, xd, 1, 0);
822 0 : return prune_two_for_sby(cpi, bsize, x, xd, 1, 1);
823 : break;
824 : #endif // CONFIG_EXT_TX
825 : }
826 0 : assert(0);
827 : return 0;
828 : }
829 :
830 0 : static int do_tx_type_search(TX_TYPE tx_type, int prune) {
831 : // TODO(sarahparker) implement for non ext tx
832 : #if CONFIG_EXT_TX
833 0 : return !(((prune >> vtx_tab[tx_type]) & 1) |
834 0 : ((prune >> (htx_tab[tx_type] + 8)) & 1));
835 : #else
836 : // temporary to avoid compiler warnings
837 : (void)vtx_tab;
838 : (void)htx_tab;
839 : (void)tx_type;
840 : (void)prune;
841 : return 1;
842 : #endif // CONFIG_EXT_TX
843 : }
844 :
845 0 : static void model_rd_from_sse(const AV1_COMP *const cpi,
846 : const MACROBLOCKD *const xd, BLOCK_SIZE bsize,
847 : int plane, int64_t sse, int *rate,
848 : int64_t *dist) {
849 0 : const struct macroblockd_plane *const pd = &xd->plane[plane];
850 0 : const int dequant_shift =
851 : #if CONFIG_HIGHBITDEPTH
852 0 : (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 :
853 : #endif // CONFIG_HIGHBITDEPTH
854 : 3;
855 :
856 : // Fast approximate the modelling function.
857 0 : if (cpi->sf.simple_model_rd_from_var) {
858 0 : const int64_t square_error = sse;
859 0 : int quantizer = (pd->dequant[1] >> dequant_shift);
860 :
861 0 : if (quantizer < 120)
862 0 : *rate = (int)((square_error * (280 - quantizer)) >>
863 : (16 - AV1_PROB_COST_SHIFT));
864 : else
865 0 : *rate = 0;
866 0 : *dist = (square_error * quantizer) >> 8;
867 : } else {
868 0 : av1_model_rd_from_var_lapndz(sse, num_pels_log2_lookup[bsize],
869 0 : pd->dequant[1] >> dequant_shift, rate, dist);
870 : }
871 :
872 0 : *dist <<= 4;
873 0 : }
874 :
875 0 : static void model_rd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
876 : MACROBLOCK *x, MACROBLOCKD *xd, int plane_from,
877 : int plane_to, int *out_rate_sum,
878 : int64_t *out_dist_sum, int *skip_txfm_sb,
879 : int64_t *skip_sse_sb) {
880 : // Note our transform coeffs are 8 times an orthogonal transform.
881 : // Hence quantizer step is also 8 times. To get effective quantizer
882 : // we need to divide by 8 before sending to modeling function.
883 : int plane;
884 0 : const int ref = xd->mi[0]->mbmi.ref_frame[0];
885 :
886 0 : int64_t rate_sum = 0;
887 0 : int64_t dist_sum = 0;
888 0 : int64_t total_sse = 0;
889 :
890 0 : x->pred_sse[ref] = 0;
891 :
892 0 : for (plane = plane_from; plane <= plane_to; ++plane) {
893 0 : struct macroblock_plane *const p = &x->plane[plane];
894 0 : struct macroblockd_plane *const pd = &xd->plane[plane];
895 : #if CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
896 0 : const BLOCK_SIZE bs = AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd));
897 : #else
898 : const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
899 : #endif // CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
900 :
901 : unsigned int sse;
902 : int rate;
903 : int64_t dist;
904 :
905 : #if CONFIG_CB4X4
906 0 : if (x->skip_chroma_rd && plane) continue;
907 : #endif // CONFIG_CB4X4
908 :
909 : // TODO(geza): Write direct sse functions that do not compute
910 : // variance as well.
911 0 : cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride,
912 : &sse);
913 :
914 0 : if (plane == 0) x->pred_sse[ref] = sse;
915 :
916 0 : total_sse += sse;
917 :
918 0 : model_rd_from_sse(cpi, xd, bs, plane, sse, &rate, &dist);
919 :
920 0 : rate_sum += rate;
921 0 : dist_sum += dist;
922 : }
923 :
924 0 : *skip_txfm_sb = total_sse == 0;
925 0 : *skip_sse_sb = total_sse << 4;
926 0 : *out_rate_sum = (int)rate_sum;
927 0 : *out_dist_sum = dist_sum;
928 0 : }
929 :
930 0 : int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
931 : intptr_t block_size, int64_t *ssz) {
932 : int i;
933 0 : int64_t error = 0, sqcoeff = 0;
934 :
935 0 : for (i = 0; i < block_size; i++) {
936 0 : const int diff = coeff[i] - dqcoeff[i];
937 0 : error += diff * diff;
938 0 : sqcoeff += coeff[i] * coeff[i];
939 : }
940 :
941 0 : *ssz = sqcoeff;
942 0 : return error;
943 : }
944 :
945 0 : int64_t av1_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff,
946 : int block_size) {
947 : int i;
948 0 : int64_t error = 0;
949 :
950 0 : for (i = 0; i < block_size; i++) {
951 0 : const int diff = coeff[i] - dqcoeff[i];
952 0 : error += diff * diff;
953 : }
954 :
955 0 : return error;
956 : }
957 :
958 : #if CONFIG_HIGHBITDEPTH
959 0 : int64_t av1_highbd_block_error_c(const tran_low_t *coeff,
960 : const tran_low_t *dqcoeff, intptr_t block_size,
961 : int64_t *ssz, int bd) {
962 : int i;
963 0 : int64_t error = 0, sqcoeff = 0;
964 0 : int shift = 2 * (bd - 8);
965 0 : int rounding = shift > 0 ? 1 << (shift - 1) : 0;
966 :
967 0 : for (i = 0; i < block_size; i++) {
968 0 : const int64_t diff = coeff[i] - dqcoeff[i];
969 0 : error += diff * diff;
970 0 : sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
971 : }
972 0 : assert(error >= 0 && sqcoeff >= 0);
973 0 : error = (error + rounding) >> shift;
974 0 : sqcoeff = (sqcoeff + rounding) >> shift;
975 :
976 0 : *ssz = sqcoeff;
977 0 : return error;
978 : }
979 : #endif // CONFIG_HIGHBITDEPTH
980 :
981 : #if CONFIG_PVQ
982 : // Without PVQ, av1_block_error_c() return two kind of errors,
983 : // 1) reconstruction (i.e. decoded) error and
984 : // 2) Squared sum of transformed residue (i.e. 'coeff')
985 : // However, if PVQ is enabled, coeff does not keep the transformed residue
986 : // but instead a transformed original is kept.
987 : // Hence, new parameter ref vector (i.e. transformed predicted signal)
988 : // is required to derive the residue signal,
989 : // i.e. coeff - ref = residue (all transformed).
990 :
991 : #if CONFIG_HIGHBITDEPTH
992 : static int64_t av1_highbd_block_error2_c(const tran_low_t *coeff,
993 : const tran_low_t *dqcoeff,
994 : const tran_low_t *ref,
995 : intptr_t block_size, int64_t *ssz,
996 : int bd) {
997 : int64_t error;
998 : int64_t sqcoeff;
999 : int shift = 2 * (bd - 8);
1000 : int rounding = shift > 0 ? 1 << (shift - 1) : 0;
1001 : // Use the existing sse codes for calculating distortion of decoded signal:
1002 : // i.e. (orig - decoded)^2
1003 : // For high bit depth, throw away ssz until a 32-bit version of
1004 : // av1_block_error_fp is written.
1005 : int64_t ssz_trash;
1006 : error = av1_block_error(coeff, dqcoeff, block_size, &ssz_trash);
1007 : // prediction residue^2 = (orig - ref)^2
1008 : sqcoeff = av1_block_error(coeff, ref, block_size, &ssz_trash);
1009 : error = (error + rounding) >> shift;
1010 : sqcoeff = (sqcoeff + rounding) >> shift;
1011 : *ssz = sqcoeff;
1012 : return error;
1013 : }
1014 : #else
1015 : // TODO(yushin) : Since 4x4 case does not need ssz, better to refactor into
1016 : // a separate function that does not do the extra computations for ssz.
1017 : static int64_t av1_block_error2_c(const tran_low_t *coeff,
1018 : const tran_low_t *dqcoeff,
1019 : const tran_low_t *ref, intptr_t block_size,
1020 : int64_t *ssz) {
1021 : int64_t error;
1022 : // Use the existing sse codes for calculating distortion of decoded signal:
1023 : // i.e. (orig - decoded)^2
1024 : error = av1_block_error_fp(coeff, dqcoeff, block_size);
1025 : // prediction residue^2 = (orig - ref)^2
1026 : *ssz = av1_block_error_fp(coeff, ref, block_size);
1027 : return error;
1028 : }
1029 : #endif // CONFIG_HIGHBITDEPTH
1030 : #endif // CONFIG_PVQ
1031 :
1032 : #if !CONFIG_PVQ || CONFIG_VAR_TX
1033 : /* The trailing '0' is a terminator which is used inside av1_cost_coeffs() to
1034 : * decide whether to include cost of a trailing EOB node or not (i.e. we
1035 : * can skip this if the last coefficient in this transform block, e.g. the
1036 : * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
1037 : * were non-zero). */
1038 : #if !CONFIG_LV_MAP
1039 0 : static int cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane,
1040 : int block, TX_SIZE tx_size, const SCAN_ORDER *scan_order,
1041 : const ENTROPY_CONTEXT *a, const ENTROPY_CONTEXT *l,
1042 : int use_fast_coef_costing) {
1043 0 : MACROBLOCKD *const xd = &x->e_mbd;
1044 0 : MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
1045 0 : const struct macroblock_plane *p = &x->plane[plane];
1046 0 : const struct macroblockd_plane *pd = &xd->plane[plane];
1047 0 : const PLANE_TYPE type = pd->plane_type;
1048 0 : const uint16_t *band_count = &band_count_table[tx_size][1];
1049 0 : const int eob = p->eobs[block];
1050 0 : const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
1051 0 : const int tx_size_ctx = txsize_sqr_map[tx_size];
1052 0 : unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
1053 0 : x->token_costs[tx_size_ctx][type][is_inter_block(mbmi)];
1054 : uint8_t token_cache[MAX_TX_SQUARE];
1055 0 : int pt = combine_entropy_contexts(*a, *l);
1056 : int c, cost;
1057 0 : const int16_t *scan = scan_order->scan;
1058 0 : const int16_t *nb = scan_order->neighbors;
1059 0 : const int ref = is_inter_block(mbmi);
1060 0 : aom_prob *blockz_probs =
1061 0 : cm->fc->blockzero_probs[txsize_sqr_map[tx_size]][type][ref];
1062 :
1063 : #if CONFIG_HIGHBITDEPTH
1064 0 : const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, xd->bd);
1065 : #else
1066 : const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, 8);
1067 : #endif // CONFIG_HIGHBITDEPTH
1068 :
1069 : #if !CONFIG_VAR_TX && !CONFIG_SUPERTX
1070 : // Check for consistency of tx_size with mode info
1071 : assert(tx_size == get_tx_size(plane, xd));
1072 : #endif // !CONFIG_VAR_TX && !CONFIG_SUPERTX
1073 : (void)cm;
1074 :
1075 0 : if (eob == 0) {
1076 : // single eob token
1077 0 : cost = av1_cost_bit(blockz_probs[pt], 0);
1078 : } else {
1079 0 : if (use_fast_coef_costing) {
1080 0 : int band_left = *band_count++;
1081 :
1082 : // dc token
1083 0 : int v = qcoeff[0];
1084 : int16_t prev_t;
1085 0 : cost = av1_get_token_cost(v, &prev_t, cat6_bits);
1086 0 : cost += (*token_costs)[!prev_t][pt][prev_t];
1087 :
1088 0 : token_cache[0] = av1_pt_energy_class[prev_t];
1089 0 : ++token_costs;
1090 :
1091 : // ac tokens
1092 0 : for (c = 1; c < eob; c++) {
1093 0 : const int rc = scan[c];
1094 : int16_t t;
1095 :
1096 0 : v = qcoeff[rc];
1097 0 : cost += av1_get_token_cost(v, &t, cat6_bits);
1098 0 : cost += (*token_costs)[!t][!prev_t][t];
1099 0 : prev_t = t;
1100 0 : if (!--band_left) {
1101 0 : band_left = *band_count++;
1102 0 : ++token_costs;
1103 : }
1104 : }
1105 :
1106 : // eob token
1107 0 : cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
1108 :
1109 : } else { // !use_fast_coef_costing
1110 0 : int band_left = *band_count++;
1111 :
1112 : // dc token
1113 0 : int v = qcoeff[0];
1114 : int16_t tok;
1115 0 : cost = av1_get_token_cost(v, &tok, cat6_bits);
1116 0 : cost += (*token_costs)[!tok][pt][tok];
1117 :
1118 0 : token_cache[0] = av1_pt_energy_class[tok];
1119 0 : ++token_costs;
1120 :
1121 : // ac tokens
1122 0 : for (c = 1; c < eob; c++) {
1123 0 : const int rc = scan[c];
1124 :
1125 0 : v = qcoeff[rc];
1126 0 : cost += av1_get_token_cost(v, &tok, cat6_bits);
1127 0 : pt = get_coef_context(nb, token_cache, c);
1128 0 : cost += (*token_costs)[!tok][pt][tok];
1129 0 : token_cache[rc] = av1_pt_energy_class[tok];
1130 0 : if (!--band_left) {
1131 0 : band_left = *band_count++;
1132 0 : ++token_costs;
1133 : }
1134 : }
1135 :
1136 : // eob token
1137 0 : pt = get_coef_context(nb, token_cache, c);
1138 0 : cost += (*token_costs)[0][pt][EOB_TOKEN];
1139 : }
1140 : }
1141 :
1142 0 : return cost;
1143 : }
1144 : #endif // !CONFIG_LV_MAP
1145 :
1146 0 : int av1_cost_coeffs(const AV1_COMP *const cpi, MACROBLOCK *x, int plane,
1147 : int block, TX_SIZE tx_size, const SCAN_ORDER *scan_order,
1148 : const ENTROPY_CONTEXT *a, const ENTROPY_CONTEXT *l,
1149 : int use_fast_coef_costing) {
1150 : #if !CONFIG_LV_MAP
1151 0 : const AV1_COMMON *const cm = &cpi->common;
1152 0 : return cost_coeffs(cm, x, plane, block, tx_size, scan_order, a, l,
1153 : use_fast_coef_costing);
1154 : #else // !CONFIG_LV_MAP
1155 : (void)scan_order;
1156 : (void)use_fast_coef_costing;
1157 : const MACROBLOCKD *xd = &x->e_mbd;
1158 : const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
1159 : const struct macroblockd_plane *pd = &xd->plane[plane];
1160 : const BLOCK_SIZE bsize = mbmi->sb_type;
1161 : #if CONFIG_CB4X4
1162 : #if CONFIG_CHROMA_2X2
1163 : const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
1164 : #else
1165 : const BLOCK_SIZE plane_bsize =
1166 : AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd));
1167 : #endif // CONFIG_CHROMA_2X2
1168 : #else // CONFIG_CB4X4
1169 : const BLOCK_SIZE plane_bsize =
1170 : get_plane_block_size(AOMMAX(BLOCK_8X8, bsize), pd);
1171 : #endif // CONFIG_CB4X4
1172 :
1173 : TXB_CTX txb_ctx;
1174 : get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx);
1175 : return av1_cost_coeffs_txb(cpi, x, plane, block, &txb_ctx);
1176 : #endif // !CONFIG_LV_MAP
1177 : }
1178 : #endif // !CONFIG_PVQ || CONFIG_VAR_TX
1179 :
1180 : // Get transform block visible dimensions cropped to the MI units.
1181 0 : static void get_txb_dimensions(const MACROBLOCKD *xd, int plane,
1182 : BLOCK_SIZE plane_bsize, int blk_row, int blk_col,
1183 : BLOCK_SIZE tx_bsize, int *width, int *height,
1184 : int *visible_width, int *visible_height) {
1185 : #if !(CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT)
1186 0 : assert(tx_bsize <= plane_bsize);
1187 : #endif // !(CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT)
1188 0 : int txb_height = block_size_high[tx_bsize];
1189 0 : int txb_width = block_size_wide[tx_bsize];
1190 0 : const int block_height = block_size_high[plane_bsize];
1191 0 : const int block_width = block_size_wide[plane_bsize];
1192 0 : const struct macroblockd_plane *const pd = &xd->plane[plane];
1193 : // TODO(aconverse@google.com): Investigate using crop_width/height here rather
1194 : // than the MI size
1195 0 : const int block_rows =
1196 0 : (xd->mb_to_bottom_edge >= 0)
1197 : ? block_height
1198 0 : : (xd->mb_to_bottom_edge >> (3 + pd->subsampling_y)) + block_height;
1199 0 : const int block_cols =
1200 0 : (xd->mb_to_right_edge >= 0)
1201 : ? block_width
1202 0 : : (xd->mb_to_right_edge >> (3 + pd->subsampling_x)) + block_width;
1203 0 : const int tx_unit_size = tx_size_wide_log2[0];
1204 0 : if (width) *width = txb_width;
1205 0 : if (height) *height = txb_height;
1206 0 : *visible_width = clamp(block_cols - (blk_col << tx_unit_size), 0, txb_width);
1207 0 : *visible_height =
1208 0 : clamp(block_rows - (blk_row << tx_unit_size), 0, txb_height);
1209 0 : }
1210 :
1211 : // Compute the pixel domain sum square error on all visible 4x4s in the
1212 : // transform block.
1213 0 : static unsigned pixel_sse(const AV1_COMP *const cpi, const MACROBLOCKD *xd,
1214 : int plane, const uint8_t *src, const int src_stride,
1215 : const uint8_t *dst, const int dst_stride, int blk_row,
1216 : int blk_col, const BLOCK_SIZE plane_bsize,
1217 : const BLOCK_SIZE tx_bsize) {
1218 : int txb_rows, txb_cols, visible_rows, visible_cols;
1219 0 : get_txb_dimensions(xd, plane, plane_bsize, blk_row, blk_col, tx_bsize,
1220 : &txb_cols, &txb_rows, &visible_cols, &visible_rows);
1221 0 : assert(visible_rows > 0);
1222 0 : assert(visible_cols > 0);
1223 : #if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
1224 : if ((txb_rows == visible_rows && txb_cols == visible_cols) &&
1225 : tx_bsize < BLOCK_SIZES) {
1226 : #else
1227 0 : if (txb_rows == visible_rows && txb_cols == visible_cols) {
1228 : #endif
1229 : unsigned sse;
1230 0 : cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &sse);
1231 0 : return sse;
1232 : }
1233 : #if CONFIG_HIGHBITDEPTH
1234 0 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1235 0 : uint64_t sse = aom_highbd_sse_odd_size(src, src_stride, dst, dst_stride,
1236 : visible_cols, visible_rows);
1237 0 : return (unsigned int)ROUND_POWER_OF_TWO(sse, (xd->bd - 8) * 2);
1238 : }
1239 : #endif // CONFIG_HIGHBITDEPTH
1240 0 : unsigned sse = aom_sse_odd_size(src, src_stride, dst, dst_stride,
1241 : visible_cols, visible_rows);
1242 0 : return sse;
1243 : }
1244 :
1245 : // Compute the squares sum squares on all visible 4x4s in the transform block.
1246 0 : static int64_t sum_squares_visible(const MACROBLOCKD *xd, int plane,
1247 : const int16_t *diff, const int diff_stride,
1248 : int blk_row, int blk_col,
1249 : const BLOCK_SIZE plane_bsize,
1250 : const BLOCK_SIZE tx_bsize) {
1251 : int visible_rows, visible_cols;
1252 0 : get_txb_dimensions(xd, plane, plane_bsize, blk_row, blk_col, tx_bsize, NULL,
1253 : NULL, &visible_cols, &visible_rows);
1254 0 : return aom_sum_squares_2d_i16(diff, diff_stride, visible_cols, visible_rows);
1255 : }
1256 :
1257 0 : void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
1258 : BLOCK_SIZE plane_bsize, int block, int blk_row, int blk_col,
1259 : TX_SIZE tx_size, int64_t *out_dist, int64_t *out_sse,
1260 : OUTPUT_STATUS output_status) {
1261 0 : MACROBLOCKD *const xd = &x->e_mbd;
1262 0 : const struct macroblock_plane *const p = &x->plane[plane];
1263 : #if CONFIG_DAALA_DIST
1264 : int qm = OD_HVS_QM;
1265 : int use_activity_masking = 0;
1266 : #if CONFIG_PVQ
1267 : use_activity_masking = x->daala_enc.use_activity_masking;
1268 : #endif // CONFIG_PVQ
1269 : struct macroblockd_plane *const pd = &xd->plane[plane];
1270 : #else // CONFIG_DAALA_DIST
1271 0 : const struct macroblockd_plane *const pd = &xd->plane[plane];
1272 : #endif // CONFIG_DAALA_DIST
1273 :
1274 0 : if (cpi->sf.use_transform_domain_distortion && !CONFIG_DAALA_DIST) {
1275 : // Transform domain distortion computation is more efficient as it does
1276 : // not involve an inverse transform, but it is less accurate.
1277 0 : const int buffer_length = tx_size_2d[tx_size];
1278 : int64_t this_sse;
1279 0 : int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2;
1280 0 : tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
1281 0 : tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
1282 : #if CONFIG_PVQ
1283 : tran_low_t *ref_coeff = BLOCK_OFFSET(pd->pvq_ref_coeff, block);
1284 :
1285 : #if CONFIG_HIGHBITDEPTH
1286 : const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8;
1287 : *out_dist = av1_highbd_block_error2_c(coeff, dqcoeff, ref_coeff,
1288 : buffer_length, &this_sse, bd) >>
1289 : shift;
1290 : #else
1291 : *out_dist = av1_block_error2_c(coeff, dqcoeff, ref_coeff, buffer_length,
1292 : &this_sse) >>
1293 : shift;
1294 : #endif // CONFIG_HIGHBITDEPTH
1295 : #elif CONFIG_HIGHBITDEPTH
1296 0 : const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8;
1297 0 : *out_dist =
1298 0 : av1_highbd_block_error(coeff, dqcoeff, buffer_length, &this_sse, bd) >>
1299 : shift;
1300 : #else
1301 : *out_dist =
1302 : av1_block_error(coeff, dqcoeff, buffer_length, &this_sse) >> shift;
1303 : #endif // CONFIG_PVQ
1304 0 : *out_sse = this_sse >> shift;
1305 : } else {
1306 0 : const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
1307 : #if !CONFIG_PVQ || CONFIG_DAALA_DIST
1308 0 : const int bsw = block_size_wide[tx_bsize];
1309 0 : const int bsh = block_size_high[tx_bsize];
1310 : #endif
1311 0 : const int src_stride = x->plane[plane].src.stride;
1312 0 : const int dst_stride = xd->plane[plane].dst.stride;
1313 : // Scale the transform block index to pixel unit.
1314 0 : const int src_idx = (blk_row * src_stride + blk_col)
1315 0 : << tx_size_wide_log2[0];
1316 0 : const int dst_idx = (blk_row * dst_stride + blk_col)
1317 0 : << tx_size_wide_log2[0];
1318 0 : const uint8_t *src = &x->plane[plane].src.buf[src_idx];
1319 0 : const uint8_t *dst = &xd->plane[plane].dst.buf[dst_idx];
1320 0 : const tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
1321 0 : const uint16_t eob = p->eobs[block];
1322 :
1323 0 : assert(cpi != NULL);
1324 0 : assert(tx_size_wide_log2[0] == tx_size_high_log2[0]);
1325 :
1326 : #if CONFIG_DAALA_DIST
1327 : if (plane == 0 && bsw >= 8 && bsh >= 8) {
1328 : if (output_status == OUTPUT_HAS_DECODED_PIXELS) {
1329 : const int pred_stride = block_size_wide[plane_bsize];
1330 : const int pred_idx = (blk_row * pred_stride + blk_col)
1331 : << tx_size_wide_log2[0];
1332 : const int16_t *pred = &pd->pred[pred_idx];
1333 : int i, j;
1334 : DECLARE_ALIGNED(16, uint8_t, pred8[MAX_TX_SQUARE]);
1335 :
1336 : for (j = 0; j < bsh; j++)
1337 : for (i = 0; i < bsw; i++)
1338 : pred8[j * bsw + i] = pred[j * pred_stride + i];
1339 : *out_sse = av1_daala_dist(src, src_stride, pred8, bsw, bsw, bsh, qm,
1340 : use_activity_masking, x->qindex);
1341 : } else {
1342 : *out_sse = av1_daala_dist(src, src_stride, dst, dst_stride, bsw, bsh,
1343 : qm, use_activity_masking, x->qindex);
1344 : }
1345 : } else
1346 : #endif // CONFIG_DAALA_DIST
1347 : {
1348 0 : const int diff_stride = block_size_wide[plane_bsize];
1349 0 : const int diff_idx = (blk_row * diff_stride + blk_col)
1350 0 : << tx_size_wide_log2[0];
1351 0 : const int16_t *diff = &p->src_diff[diff_idx];
1352 0 : *out_sse = sum_squares_visible(xd, plane, diff, diff_stride, blk_row,
1353 : blk_col, plane_bsize, tx_bsize);
1354 : #if CONFIG_HIGHBITDEPTH
1355 0 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
1356 0 : *out_sse = ROUND_POWER_OF_TWO(*out_sse, (xd->bd - 8) * 2);
1357 : #endif // CONFIG_HIGHBITDEPTH
1358 : }
1359 0 : *out_sse *= 16;
1360 :
1361 0 : if (eob) {
1362 0 : if (output_status == OUTPUT_HAS_DECODED_PIXELS) {
1363 : #if CONFIG_DAALA_DIST
1364 : if (plane == 0 && bsw >= 8 && bsh >= 8)
1365 : *out_dist = av1_daala_dist(src, src_stride, dst, dst_stride, bsw, bsh,
1366 : qm, use_activity_masking, x->qindex);
1367 : else
1368 : #endif // CONFIG_DAALA_DIST
1369 0 : *out_dist =
1370 0 : pixel_sse(cpi, xd, plane, src, src_stride, dst, dst_stride,
1371 : blk_row, blk_col, plane_bsize, tx_bsize);
1372 : } else {
1373 : #if CONFIG_HIGHBITDEPTH
1374 : uint8_t *recon;
1375 : DECLARE_ALIGNED(16, uint16_t, recon16[MAX_TX_SQUARE]);
1376 :
1377 0 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
1378 0 : recon = CONVERT_TO_BYTEPTR(recon16);
1379 : else
1380 0 : recon = (uint8_t *)recon16;
1381 : #else
1382 : DECLARE_ALIGNED(16, uint8_t, recon[MAX_TX_SQUARE]);
1383 : #endif // CONFIG_HIGHBITDEPTH
1384 :
1385 : #if !CONFIG_PVQ
1386 : #if CONFIG_HIGHBITDEPTH
1387 0 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1388 0 : aom_highbd_convolve_copy(dst, dst_stride, recon, MAX_TX_SIZE, NULL, 0,
1389 : NULL, 0, bsw, bsh, xd->bd);
1390 : } else {
1391 : #endif // CONFIG_HIGHBITDEPTH
1392 0 : aom_convolve_copy(dst, dst_stride, recon, MAX_TX_SIZE, NULL, 0, NULL,
1393 : 0, bsw, bsh);
1394 : #if CONFIG_HIGHBITDEPTH
1395 : }
1396 : #endif // CONFIG_HIGHBITDEPTH
1397 : #else
1398 : (void)dst;
1399 : #endif // !CONFIG_PVQ
1400 :
1401 0 : const PLANE_TYPE plane_type = get_plane_type(plane);
1402 0 : TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
1403 :
1404 0 : av1_inverse_transform_block(xd, dqcoeff, tx_type, tx_size, recon,
1405 : MAX_TX_SIZE, eob);
1406 :
1407 : #if CONFIG_DAALA_DIST
1408 : if (plane == 0 && bsw >= 8 && bsh >= 8) {
1409 : *out_dist = av1_daala_dist(src, src_stride, recon, MAX_TX_SIZE, bsw,
1410 : bsh, qm, use_activity_masking, x->qindex);
1411 : } else {
1412 : if (plane == 0) {
1413 : // Save decoded pixels for inter block in pd->pred to avoid
1414 : // block_8x8_rd_txfm_daala_dist() need to produce them
1415 : // by calling av1_inverse_transform_block() again.
1416 : const int pred_stride = block_size_wide[plane_bsize];
1417 : const int pred_idx = (blk_row * pred_stride + blk_col)
1418 : << tx_size_wide_log2[0];
1419 : int16_t *pred = &pd->pred[pred_idx];
1420 : int i, j;
1421 :
1422 : for (j = 0; j < bsh; j++)
1423 : for (i = 0; i < bsw; i++)
1424 : pred[j * pred_stride + i] = recon[j * MAX_TX_SIZE + i];
1425 : }
1426 : #endif // CONFIG_DAALA_DIST
1427 0 : *out_dist =
1428 0 : pixel_sse(cpi, xd, plane, src, src_stride, recon, MAX_TX_SIZE,
1429 : blk_row, blk_col, plane_bsize, tx_bsize);
1430 : #if CONFIG_DAALA_DIST
1431 : }
1432 : #endif // CONFIG_DAALA_DIST
1433 : }
1434 0 : *out_dist *= 16;
1435 : } else {
1436 0 : *out_dist = *out_sse;
1437 : }
1438 : }
1439 0 : }
1440 :
1441 0 : static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
1442 : BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
1443 0 : struct rdcost_block_args *args = arg;
1444 0 : MACROBLOCK *const x = args->x;
1445 0 : MACROBLOCKD *const xd = &x->e_mbd;
1446 0 : const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
1447 0 : const AV1_COMP *cpi = args->cpi;
1448 0 : ENTROPY_CONTEXT *a = args->t_above + blk_col;
1449 0 : ENTROPY_CONTEXT *l = args->t_left + blk_row;
1450 : #if !CONFIG_TXK_SEL
1451 0 : const AV1_COMMON *cm = &cpi->common;
1452 : #endif
1453 : int64_t rd1, rd2, rd;
1454 : RD_STATS this_rd_stats;
1455 :
1456 0 : assert(tx_size == get_tx_size(plane, xd));
1457 :
1458 0 : av1_init_rd_stats(&this_rd_stats);
1459 :
1460 0 : if (args->exit_early) return;
1461 :
1462 0 : if (!is_inter_block(mbmi)) {
1463 : #if CONFIG_CFL
1464 :
1465 : #if CONFIG_EC_ADAPT
1466 : FRAME_CONTEXT *const ec_ctx = xd->tile_ctx;
1467 : #else
1468 : FRAME_CONTEXT *const ec_ctx = cm->fc;
1469 : #endif // CONFIG_EC_ADAPT
1470 :
1471 : av1_predict_intra_block_encoder_facade(x, ec_ctx, plane, block, blk_col,
1472 : blk_row, tx_size, plane_bsize);
1473 : #else
1474 0 : av1_predict_intra_block_facade(xd, plane, block, blk_col, blk_row, tx_size);
1475 : #endif
1476 : #if CONFIG_DPCM_INTRA
1477 : const int block_raster_idx =
1478 : av1_block_index_to_raster_order(tx_size, block);
1479 : const PREDICTION_MODE mode =
1480 : (plane == 0) ? get_y_mode(xd->mi[0], block_raster_idx) : mbmi->uv_mode;
1481 : TX_TYPE tx_type = get_tx_type((plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV,
1482 : xd, block, tx_size);
1483 : if (av1_use_dpcm_intra(plane, mode, tx_type, mbmi)) {
1484 : int8_t skip;
1485 : av1_encode_block_intra_dpcm(cm, x, mode, plane, block, blk_row, blk_col,
1486 : plane_bsize, tx_size, tx_type, a, l, &skip);
1487 : av1_dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col,
1488 : tx_size, &this_rd_stats.dist, &this_rd_stats.sse,
1489 : OUTPUT_HAS_DECODED_PIXELS);
1490 : goto CALCULATE_RD;
1491 : }
1492 : #endif // CONFIG_DPCM_INTRA
1493 0 : av1_subtract_txb(x, plane, plane_bsize, blk_col, blk_row, tx_size);
1494 : }
1495 :
1496 : #if !CONFIG_TXK_SEL
1497 : // full forward transform and quantization
1498 0 : const int coeff_ctx = combine_entropy_contexts(*a, *l);
1499 0 : av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
1500 : coeff_ctx, AV1_XFORM_QUANT_FP);
1501 0 : av1_optimize_b(cm, x, plane, block, plane_bsize, tx_size, a, l);
1502 :
1503 0 : if (!is_inter_block(mbmi)) {
1504 0 : struct macroblock_plane *const p = &x->plane[plane];
1505 0 : av1_inverse_transform_block_facade(xd, plane, block, blk_row, blk_col,
1506 0 : p->eobs[block]);
1507 0 : av1_dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col,
1508 : tx_size, &this_rd_stats.dist, &this_rd_stats.sse,
1509 : OUTPUT_HAS_DECODED_PIXELS);
1510 : } else {
1511 0 : av1_dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col,
1512 : tx_size, &this_rd_stats.dist, &this_rd_stats.sse,
1513 : OUTPUT_HAS_PREDICTED_PIXELS);
1514 : }
1515 : #if CONFIG_CFL
1516 : if (plane == AOM_PLANE_Y && x->cfl_store_y) {
1517 : struct macroblockd_plane *const pd = &xd->plane[plane];
1518 : const int dst_stride = pd->dst.stride;
1519 : uint8_t *dst =
1520 : &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
1521 : cfl_store(xd->cfl, dst, dst_stride, blk_row, blk_col, tx_size);
1522 : }
1523 : #endif
1524 : #if CONFIG_DPCM_INTRA
1525 : CALCULATE_RD : {}
1526 : #endif // CONFIG_DPCM_INTRA
1527 0 : rd = RDCOST(x->rdmult, x->rddiv, 0, this_rd_stats.dist);
1528 0 : if (args->this_rd + rd > args->best_rd) {
1529 0 : args->exit_early = 1;
1530 0 : return;
1531 : }
1532 : #if !CONFIG_PVQ
1533 0 : const PLANE_TYPE plane_type = get_plane_type(plane);
1534 0 : const TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
1535 0 : const SCAN_ORDER *scan_order =
1536 0 : get_scan(cm, tx_size, tx_type, is_inter_block(mbmi));
1537 0 : this_rd_stats.rate =
1538 0 : av1_cost_coeffs(cpi, x, plane, block, tx_size, scan_order, a, l,
1539 : args->use_fast_coef_costing);
1540 : #else // !CONFIG_PVQ
1541 : this_rd_stats.rate = x->rate;
1542 : #endif // !CONFIG_PVQ
1543 : #else // !CONFIG_TXK_SEL
1544 : av1_search_txk_type(cpi, x, plane, block, blk_row, blk_col, plane_bsize,
1545 : tx_size, a, l, args->use_fast_coef_costing,
1546 : &this_rd_stats);
1547 : #endif // !CONFIG_TXK_SEL
1548 :
1549 : #if !CONFIG_PVQ
1550 : #if CONFIG_RD_DEBUG
1551 : av1_update_txb_coeff_cost(&this_rd_stats, plane, tx_size, blk_row, blk_col,
1552 : this_rd_stats.rate);
1553 : #endif // CONFIG_RD_DEBUG
1554 0 : av1_set_txb_context(x, plane, block, tx_size, a, l);
1555 : #endif // !CONFIG_PVQ
1556 :
1557 0 : rd1 = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate, this_rd_stats.dist);
1558 0 : rd2 = RDCOST(x->rdmult, x->rddiv, 0, this_rd_stats.sse);
1559 :
1560 : // TODO(jingning): temporarily enabled only for luma component
1561 0 : rd = AOMMIN(rd1, rd2);
1562 :
1563 : #if CONFIG_DAALA_DIST
1564 : if (plane == 0 && plane_bsize >= BLOCK_8X8 &&
1565 : (tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4)) {
1566 : this_rd_stats.dist = 0;
1567 : this_rd_stats.sse = 0;
1568 : rd = 0;
1569 : x->rate_4x4[block] = this_rd_stats.rate;
1570 : }
1571 : #endif // CONFIG_DAALA_DIST
1572 :
1573 : #if !CONFIG_PVQ
1574 0 : this_rd_stats.skip &= !x->plane[plane].eobs[block];
1575 : #else
1576 : this_rd_stats.skip &= x->pvq_skip[plane];
1577 : #endif // !CONFIG_PVQ
1578 0 : av1_merge_rd_stats(&args->rd_stats, &this_rd_stats);
1579 :
1580 0 : args->this_rd += rd;
1581 :
1582 0 : if (args->this_rd > args->best_rd) {
1583 0 : args->exit_early = 1;
1584 0 : return;
1585 : }
1586 : }
1587 :
1588 : #if CONFIG_DAALA_DIST
1589 : static void block_8x8_rd_txfm_daala_dist(int plane, int block, int blk_row,
1590 : int blk_col, BLOCK_SIZE plane_bsize,
1591 : TX_SIZE tx_size, void *arg) {
1592 : struct rdcost_block_args *args = arg;
1593 : MACROBLOCK *const x = args->x;
1594 : MACROBLOCKD *const xd = &x->e_mbd;
1595 : MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
1596 : int64_t rd, rd1, rd2;
1597 : RD_STATS this_rd_stats;
1598 : int qm = OD_HVS_QM;
1599 : int use_activity_masking = 0;
1600 :
1601 : (void)tx_size;
1602 :
1603 : assert(plane == 0);
1604 : assert(plane_bsize >= BLOCK_8X8);
1605 : #if CONFIG_PVQ
1606 : use_activity_masking = x->daala_enc.use_activity_masking;
1607 : #endif // CONFIG_PVQ
1608 : av1_init_rd_stats(&this_rd_stats);
1609 :
1610 : if (args->exit_early) return;
1611 :
1612 : {
1613 : const struct macroblock_plane *const p = &x->plane[plane];
1614 : struct macroblockd_plane *const pd = &xd->plane[plane];
1615 :
1616 : const int src_stride = p->src.stride;
1617 : const int dst_stride = pd->dst.stride;
1618 : const int diff_stride = block_size_wide[plane_bsize];
1619 :
1620 : const uint8_t *src =
1621 : &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
1622 : const uint8_t *dst =
1623 : &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
1624 :
1625 : unsigned int tmp1, tmp2;
1626 : int qindex = x->qindex;
1627 : const int pred_stride = block_size_wide[plane_bsize];
1628 : const int pred_idx = (blk_row * pred_stride + blk_col)
1629 : << tx_size_wide_log2[0];
1630 : int16_t *pred = &pd->pred[pred_idx];
1631 : int i, j;
1632 : const int tx_blk_size = 8;
1633 :
1634 : DECLARE_ALIGNED(16, uint8_t, pred8[8 * 8]);
1635 :
1636 : for (j = 0; j < tx_blk_size; j++)
1637 : for (i = 0; i < tx_blk_size; i++)
1638 : pred8[j * tx_blk_size + i] = pred[j * diff_stride + i];
1639 :
1640 : tmp1 = av1_daala_dist(src, src_stride, pred8, tx_blk_size, 8, 8, qm,
1641 : use_activity_masking, qindex);
1642 : tmp2 = av1_daala_dist(src, src_stride, dst, dst_stride, 8, 8, qm,
1643 : use_activity_masking, qindex);
1644 :
1645 : if (!is_inter_block(mbmi)) {
1646 : this_rd_stats.sse = (int64_t)tmp1 * 16;
1647 : this_rd_stats.dist = (int64_t)tmp2 * 16;
1648 : } else {
1649 : // For inter mode, the decoded pixels are provided in pd->pred,
1650 : // while the predicted pixels are in dst.
1651 : this_rd_stats.sse = (int64_t)tmp2 * 16;
1652 : this_rd_stats.dist = (int64_t)tmp1 * 16;
1653 : }
1654 : }
1655 :
1656 : rd = RDCOST(x->rdmult, x->rddiv, 0, this_rd_stats.dist);
1657 : if (args->this_rd + rd > args->best_rd) {
1658 : args->exit_early = 1;
1659 : return;
1660 : }
1661 :
1662 : {
1663 : const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
1664 : const uint8_t txw_unit = tx_size_wide_unit[tx_size];
1665 : const uint8_t txh_unit = tx_size_high_unit[tx_size];
1666 : const int step = txw_unit * txh_unit;
1667 : int offset_h = tx_size_high_unit[TX_4X4];
1668 : // The rate of the current 8x8 block is the sum of four 4x4 blocks in it.
1669 : this_rd_stats.rate =
1670 : x->rate_4x4[block - max_blocks_wide * offset_h - step] +
1671 : x->rate_4x4[block - max_blocks_wide * offset_h] +
1672 : x->rate_4x4[block - step] + x->rate_4x4[block];
1673 : }
1674 : rd1 = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate, this_rd_stats.dist);
1675 : rd2 = RDCOST(x->rdmult, x->rddiv, 0, this_rd_stats.sse);
1676 : rd = AOMMIN(rd1, rd2);
1677 :
1678 : args->rd_stats.dist += this_rd_stats.dist;
1679 : args->rd_stats.sse += this_rd_stats.sse;
1680 :
1681 : args->this_rd += rd;
1682 :
1683 : if (args->this_rd > args->best_rd) {
1684 : args->exit_early = 1;
1685 : return;
1686 : }
1687 : }
1688 : #endif // CONFIG_DAALA_DIST
1689 :
1690 0 : static void txfm_rd_in_plane(MACROBLOCK *x, const AV1_COMP *cpi,
1691 : RD_STATS *rd_stats, int64_t ref_best_rd, int plane,
1692 : BLOCK_SIZE bsize, TX_SIZE tx_size,
1693 : int use_fast_coef_casting) {
1694 0 : MACROBLOCKD *const xd = &x->e_mbd;
1695 0 : const struct macroblockd_plane *const pd = &xd->plane[plane];
1696 : struct rdcost_block_args args;
1697 0 : av1_zero(args);
1698 0 : args.x = x;
1699 0 : args.cpi = cpi;
1700 0 : args.best_rd = ref_best_rd;
1701 0 : args.use_fast_coef_costing = use_fast_coef_casting;
1702 0 : av1_init_rd_stats(&args.rd_stats);
1703 :
1704 0 : if (plane == 0) xd->mi[0]->mbmi.tx_size = tx_size;
1705 :
1706 0 : av1_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
1707 :
1708 : #if CONFIG_DAALA_DIST
1709 : if (plane == 0 && bsize >= BLOCK_8X8 &&
1710 : (tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4))
1711 : av1_foreach_8x8_transformed_block_in_yplane(
1712 : xd, bsize, block_rd_txfm, block_8x8_rd_txfm_daala_dist, &args);
1713 : else
1714 : #endif // CONFIG_DAALA_DIST
1715 0 : av1_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm,
1716 : &args);
1717 :
1718 0 : if (args.exit_early) {
1719 0 : av1_invalid_rd_stats(rd_stats);
1720 : } else {
1721 0 : *rd_stats = args.rd_stats;
1722 : }
1723 0 : }
1724 :
1725 : #if CONFIG_SUPERTX
1726 : void av1_txfm_rd_in_plane_supertx(MACROBLOCK *x, const AV1_COMP *cpi, int *rate,
1727 : int64_t *distortion, int *skippable,
1728 : int64_t *sse, int64_t ref_best_rd, int plane,
1729 : BLOCK_SIZE bsize, TX_SIZE tx_size,
1730 : int use_fast_coef_casting) {
1731 : MACROBLOCKD *const xd = &x->e_mbd;
1732 : const struct macroblockd_plane *const pd = &xd->plane[plane];
1733 : struct rdcost_block_args args;
1734 : av1_zero(args);
1735 : args.cpi = cpi;
1736 : args.x = x;
1737 : args.best_rd = ref_best_rd;
1738 : args.use_fast_coef_costing = use_fast_coef_casting;
1739 :
1740 : #if CONFIG_EXT_TX
1741 : assert(tx_size < TX_SIZES);
1742 : #endif // CONFIG_EXT_TX
1743 :
1744 : if (plane == 0) xd->mi[0]->mbmi.tx_size = tx_size;
1745 :
1746 : av1_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
1747 :
1748 : block_rd_txfm(plane, 0, 0, 0, get_plane_block_size(bsize, pd), tx_size,
1749 : &args);
1750 :
1751 : if (args.exit_early) {
1752 : *rate = INT_MAX;
1753 : *distortion = INT64_MAX;
1754 : *sse = INT64_MAX;
1755 : *skippable = 0;
1756 : } else {
1757 : *distortion = args.rd_stats.dist;
1758 : *rate = args.rd_stats.rate;
1759 : *sse = args.rd_stats.sse;
1760 : *skippable = !x->plane[plane].eobs[0];
1761 : }
1762 : }
1763 : #endif // CONFIG_SUPERTX
1764 :
1765 0 : static int tx_size_cost(const AV1_COMP *const cpi, const MACROBLOCK *const x,
1766 : BLOCK_SIZE bsize, TX_SIZE tx_size) {
1767 0 : const AV1_COMMON *const cm = &cpi->common;
1768 0 : const MACROBLOCKD *const xd = &x->e_mbd;
1769 0 : const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
1770 :
1771 0 : const int tx_select =
1772 0 : cm->tx_mode == TX_MODE_SELECT && mbmi->sb_type >= BLOCK_8X8;
1773 :
1774 0 : if (tx_select) {
1775 0 : const int is_inter = is_inter_block(mbmi);
1776 0 : const int tx_size_cat = is_inter ? inter_tx_size_cat_lookup[bsize]
1777 0 : : intra_tx_size_cat_lookup[bsize];
1778 0 : const TX_SIZE coded_tx_size = txsize_sqr_up_map[tx_size];
1779 0 : const int depth = tx_size_to_depth(coded_tx_size);
1780 0 : const int tx_size_ctx = get_tx_size_context(xd);
1781 0 : int r_tx_size = cpi->tx_size_cost[tx_size_cat][tx_size_ctx][depth];
1782 : #if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
1783 : if (is_quarter_tx_allowed(xd, mbmi, is_inter) && tx_size != coded_tx_size)
1784 : r_tx_size += av1_cost_bit(cm->fc->quarter_tx_size_prob,
1785 : tx_size == quarter_txsize_lookup[bsize]);
1786 : #endif // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
1787 0 : return r_tx_size;
1788 : } else {
1789 0 : return 0;
1790 : }
1791 : }
1792 :
1793 : // #TODO(angiebird): use this function whenever it's possible
1794 0 : int av1_tx_type_cost(const AV1_COMP *cpi, const MACROBLOCKD *xd,
1795 : BLOCK_SIZE bsize, int plane, TX_SIZE tx_size,
1796 : TX_TYPE tx_type) {
1797 0 : if (plane > 0) return 0;
1798 :
1799 0 : const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
1800 0 : const int is_inter = is_inter_block(mbmi);
1801 : #if CONFIG_EXT_TX
1802 0 : const AV1_COMMON *cm = &cpi->common;
1803 0 : if (get_ext_tx_types(tx_size, bsize, is_inter, cm->reduced_tx_set_used) > 1 &&
1804 0 : !xd->lossless[xd->mi[0]->mbmi.segment_id]) {
1805 0 : const int ext_tx_set =
1806 0 : get_ext_tx_set(tx_size, bsize, is_inter, cm->reduced_tx_set_used);
1807 0 : if (is_inter) {
1808 0 : if (ext_tx_set > 0)
1809 : return cpi
1810 0 : ->inter_tx_type_costs[ext_tx_set][txsize_sqr_map[tx_size]][tx_type];
1811 : } else {
1812 0 : if (ext_tx_set > 0 && ALLOW_INTRA_EXT_TX)
1813 0 : return cpi->intra_tx_type_costs[ext_tx_set][txsize_sqr_map[tx_size]]
1814 0 : [mbmi->mode][tx_type];
1815 : }
1816 : }
1817 : #else
1818 : (void)bsize;
1819 : if (tx_size < TX_32X32 && !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
1820 : !FIXED_TX_TYPE) {
1821 : if (is_inter) {
1822 : return cpi->inter_tx_type_costs[tx_size][tx_type];
1823 : } else {
1824 : return cpi->intra_tx_type_costs[tx_size]
1825 : [intra_mode_to_tx_type_context[mbmi->mode]]
1826 : [tx_type];
1827 : }
1828 : }
1829 : #endif // CONFIG_EXT_TX
1830 0 : return 0;
1831 : }
1832 0 : static int64_t txfm_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
1833 : RD_STATS *rd_stats, int64_t ref_best_rd, BLOCK_SIZE bs,
1834 : TX_TYPE tx_type, int tx_size) {
1835 0 : const AV1_COMMON *const cm = &cpi->common;
1836 0 : MACROBLOCKD *const xd = &x->e_mbd;
1837 0 : MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
1838 0 : int64_t rd = INT64_MAX;
1839 0 : aom_prob skip_prob = av1_get_skip_prob(cm, xd);
1840 : int s0, s1;
1841 0 : const int is_inter = is_inter_block(mbmi);
1842 0 : const int tx_select =
1843 0 : cm->tx_mode == TX_MODE_SELECT && mbmi->sb_type >= BLOCK_8X8;
1844 :
1845 0 : const int r_tx_size = tx_size_cost(cpi, x, bs, tx_size);
1846 :
1847 0 : assert(skip_prob > 0);
1848 : #if CONFIG_EXT_TX && CONFIG_RECT_TX
1849 0 : assert(IMPLIES(is_rect_tx(tx_size), is_rect_tx_allowed_bsize(bs)));
1850 : #endif // CONFIG_EXT_TX && CONFIG_RECT_TX
1851 :
1852 0 : s0 = av1_cost_bit(skip_prob, 0);
1853 0 : s1 = av1_cost_bit(skip_prob, 1);
1854 :
1855 0 : mbmi->tx_type = tx_type;
1856 0 : mbmi->tx_size = tx_size;
1857 0 : txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, 0, bs, tx_size,
1858 : cpi->sf.use_fast_coef_costing);
1859 0 : if (rd_stats->rate == INT_MAX) return INT64_MAX;
1860 : #if !CONFIG_TXK_SEL
1861 0 : int plane = 0;
1862 0 : rd_stats->rate += av1_tx_type_cost(cpi, xd, bs, plane, tx_size, tx_type);
1863 : #endif
1864 :
1865 0 : if (rd_stats->skip) {
1866 0 : if (is_inter) {
1867 0 : rd = RDCOST(x->rdmult, x->rddiv, s1, rd_stats->sse);
1868 : } else {
1869 0 : rd = RDCOST(x->rdmult, x->rddiv, s1 + r_tx_size * tx_select,
1870 : rd_stats->sse);
1871 : }
1872 : } else {
1873 0 : rd = RDCOST(x->rdmult, x->rddiv,
1874 : rd_stats->rate + s0 + r_tx_size * tx_select, rd_stats->dist);
1875 : }
1876 :
1877 0 : if (tx_select) rd_stats->rate += r_tx_size;
1878 :
1879 0 : if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
1880 0 : !(rd_stats->skip))
1881 0 : rd = AOMMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, rd_stats->sse));
1882 :
1883 0 : return rd;
1884 : }
1885 :
1886 0 : static int skip_txfm_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs,
1887 : TX_TYPE tx_type, TX_SIZE tx_size) {
1888 0 : const MACROBLOCKD *const xd = &x->e_mbd;
1889 0 : const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
1890 0 : const TX_SIZE max_tx_size = max_txsize_lookup[bs];
1891 0 : const int is_inter = is_inter_block(mbmi);
1892 0 : int prune = 0;
1893 0 : if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE)
1894 : // passing -1 in for tx_type indicates that all 1D
1895 : // transforms should be considered for pruning
1896 0 : prune = prune_tx_types(cpi, bs, x, xd, -1);
1897 :
1898 0 : if (mbmi->ref_mv_idx > 0 && tx_type != DCT_DCT) return 1;
1899 : if (FIXED_TX_TYPE && tx_type != get_default_tx_type(0, xd, 0, tx_size))
1900 : return 1;
1901 0 : if (!is_inter && x->use_default_intra_tx_type &&
1902 0 : tx_type != get_default_tx_type(0, xd, 0, tx_size))
1903 0 : return 1;
1904 0 : if (is_inter && x->use_default_inter_tx_type &&
1905 0 : tx_type != get_default_tx_type(0, xd, 0, tx_size))
1906 0 : return 1;
1907 0 : if (max_tx_size >= TX_32X32 && tx_size == TX_4X4) return 1;
1908 : #if CONFIG_EXT_TX
1909 0 : const AV1_COMMON *const cm = &cpi->common;
1910 0 : int ext_tx_set =
1911 0 : get_ext_tx_set(tx_size, bs, is_inter, cm->reduced_tx_set_used);
1912 0 : if (is_inter) {
1913 0 : if (!ext_tx_used_inter[ext_tx_set][tx_type]) return 1;
1914 0 : if (cpi->sf.tx_type_search.prune_mode > NO_PRUNE) {
1915 0 : if (!do_tx_type_search(tx_type, prune)) return 1;
1916 : }
1917 : } else {
1918 : if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) {
1919 : if (tx_type != intra_mode_to_tx_type_context[mbmi->mode]) return 1;
1920 : }
1921 0 : if (!ext_tx_used_intra[ext_tx_set][tx_type]) return 1;
1922 : }
1923 : #else // CONFIG_EXT_TX
1924 : if (tx_size >= TX_32X32 && tx_type != DCT_DCT) return 1;
1925 : if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE &&
1926 : !do_tx_type_search(tx_type, prune))
1927 : return 1;
1928 : #endif // CONFIG_EXT_TX
1929 0 : return 0;
1930 : }
1931 :
1932 : #if CONFIG_EXT_INTER && (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT)
1933 0 : static int64_t estimate_yrd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bs,
1934 : MACROBLOCK *x, int *r, int64_t *d, int *s,
1935 : int64_t *sse, int64_t ref_best_rd) {
1936 : RD_STATS rd_stats;
1937 0 : int64_t rd = txfm_yrd(cpi, x, &rd_stats, ref_best_rd, bs, DCT_DCT,
1938 0 : max_txsize_lookup[bs]);
1939 0 : *r = rd_stats.rate;
1940 0 : *d = rd_stats.dist;
1941 0 : *s = rd_stats.skip;
1942 0 : *sse = rd_stats.sse;
1943 0 : return rd;
1944 : }
1945 : #endif // CONFIG_EXT_INTER && (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT)
1946 :
1947 0 : static void choose_largest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
1948 : RD_STATS *rd_stats, int64_t ref_best_rd,
1949 : BLOCK_SIZE bs) {
1950 0 : const AV1_COMMON *const cm = &cpi->common;
1951 0 : MACROBLOCKD *const xd = &x->e_mbd;
1952 0 : MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
1953 0 : TX_TYPE tx_type, best_tx_type = DCT_DCT;
1954 0 : int64_t this_rd, best_rd = INT64_MAX;
1955 0 : aom_prob skip_prob = av1_get_skip_prob(cm, xd);
1956 0 : int s0 = av1_cost_bit(skip_prob, 0);
1957 0 : int s1 = av1_cost_bit(skip_prob, 1);
1958 0 : const int is_inter = is_inter_block(mbmi);
1959 0 : int prune = 0;
1960 0 : const int plane = 0;
1961 : #if CONFIG_EXT_TX
1962 : int ext_tx_set;
1963 : #endif // CONFIG_EXT_TX
1964 0 : av1_invalid_rd_stats(rd_stats);
1965 :
1966 0 : mbmi->tx_size = tx_size_from_tx_mode(bs, cm->tx_mode, is_inter);
1967 : #if CONFIG_VAR_TX
1968 0 : mbmi->min_tx_size = get_min_tx_size(mbmi->tx_size);
1969 : #endif // CONFIG_VAR_TX
1970 : #if CONFIG_EXT_TX
1971 0 : ext_tx_set =
1972 0 : get_ext_tx_set(mbmi->tx_size, bs, is_inter, cm->reduced_tx_set_used);
1973 : #endif // CONFIG_EXT_TX
1974 :
1975 0 : if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE)
1976 : #if CONFIG_EXT_TX
1977 0 : prune = prune_tx_types(cpi, bs, x, xd, ext_tx_set);
1978 : #else
1979 : prune = prune_tx_types(cpi, bs, x, xd, 0);
1980 : #endif // CONFIG_EXT_TX
1981 : #if CONFIG_EXT_TX
1982 0 : if (get_ext_tx_types(mbmi->tx_size, bs, is_inter, cm->reduced_tx_set_used) >
1983 0 : 1 &&
1984 0 : !xd->lossless[mbmi->segment_id]) {
1985 : #if CONFIG_PVQ
1986 : od_rollback_buffer pre_buf, post_buf;
1987 :
1988 : od_encode_checkpoint(&x->daala_enc, &pre_buf);
1989 : od_encode_checkpoint(&x->daala_enc, &post_buf);
1990 : #endif // CONFIG_PVQ
1991 :
1992 0 : for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
1993 : RD_STATS this_rd_stats;
1994 0 : if (is_inter) {
1995 0 : if (x->use_default_inter_tx_type &&
1996 0 : tx_type != get_default_tx_type(0, xd, 0, mbmi->tx_size))
1997 0 : continue;
1998 0 : if (!ext_tx_used_inter[ext_tx_set][tx_type]) continue;
1999 0 : if (cpi->sf.tx_type_search.prune_mode > NO_PRUNE) {
2000 0 : if (!do_tx_type_search(tx_type, prune)) continue;
2001 : }
2002 : } else {
2003 0 : if (x->use_default_intra_tx_type &&
2004 0 : tx_type != get_default_tx_type(0, xd, 0, mbmi->tx_size))
2005 0 : continue;
2006 : if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) {
2007 : if (tx_type != intra_mode_to_tx_type_context[mbmi->mode]) continue;
2008 : }
2009 0 : if (!ext_tx_used_intra[ext_tx_set][tx_type]) continue;
2010 : }
2011 :
2012 0 : mbmi->tx_type = tx_type;
2013 :
2014 0 : txfm_rd_in_plane(x, cpi, &this_rd_stats, ref_best_rd, 0, bs,
2015 0 : mbmi->tx_size, cpi->sf.use_fast_coef_costing);
2016 : #if CONFIG_PVQ
2017 : od_encode_rollback(&x->daala_enc, &pre_buf);
2018 : #endif // CONFIG_PVQ
2019 0 : if (this_rd_stats.rate == INT_MAX) continue;
2020 0 : av1_tx_type_cost(cpi, xd, bs, plane, mbmi->tx_size, tx_type);
2021 :
2022 0 : if (this_rd_stats.skip)
2023 0 : this_rd = RDCOST(x->rdmult, x->rddiv, s1, this_rd_stats.sse);
2024 : else
2025 0 : this_rd = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate + s0,
2026 : this_rd_stats.dist);
2027 0 : if (is_inter_block(mbmi) && !xd->lossless[mbmi->segment_id] &&
2028 0 : !this_rd_stats.skip)
2029 0 : this_rd =
2030 0 : AOMMIN(this_rd, RDCOST(x->rdmult, x->rddiv, s1, this_rd_stats.sse));
2031 :
2032 0 : if (this_rd < best_rd) {
2033 0 : best_rd = this_rd;
2034 0 : best_tx_type = mbmi->tx_type;
2035 0 : *rd_stats = this_rd_stats;
2036 : #if CONFIG_PVQ
2037 : od_encode_checkpoint(&x->daala_enc, &post_buf);
2038 : #endif // CONFIG_PVQ
2039 : }
2040 : }
2041 : #if CONFIG_PVQ
2042 : od_encode_rollback(&x->daala_enc, &post_buf);
2043 : #endif // CONFIG_PVQ
2044 : } else {
2045 0 : mbmi->tx_type = DCT_DCT;
2046 0 : txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, 0, bs, mbmi->tx_size,
2047 : cpi->sf.use_fast_coef_costing);
2048 : }
2049 : #else // CONFIG_EXT_TX
2050 : if (mbmi->tx_size < TX_32X32 && !xd->lossless[mbmi->segment_id]) {
2051 : for (tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
2052 : RD_STATS this_rd_stats;
2053 : if (!is_inter && x->use_default_intra_tx_type &&
2054 : tx_type != get_default_tx_type(0, xd, 0, mbmi->tx_size))
2055 : continue;
2056 : if (is_inter && x->use_default_inter_tx_type &&
2057 : tx_type != get_default_tx_type(0, xd, 0, mbmi->tx_size))
2058 : continue;
2059 : mbmi->tx_type = tx_type;
2060 : txfm_rd_in_plane(x, cpi, &this_rd_stats, ref_best_rd, 0, bs,
2061 : mbmi->tx_size, cpi->sf.use_fast_coef_costing);
2062 : if (this_rd_stats.rate == INT_MAX) continue;
2063 :
2064 : av1_tx_type_cost(cpi, xd, bs, plane, mbmi->tx_size, tx_type);
2065 : if (is_inter) {
2066 : if (cpi->sf.tx_type_search.prune_mode > NO_PRUNE &&
2067 : !do_tx_type_search(tx_type, prune))
2068 : continue;
2069 : }
2070 : if (this_rd_stats.skip)
2071 : this_rd = RDCOST(x->rdmult, x->rddiv, s1, this_rd_stats.sse);
2072 : else
2073 : this_rd = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate + s0,
2074 : this_rd_stats.dist);
2075 : if (is_inter && !xd->lossless[mbmi->segment_id] && !this_rd_stats.skip)
2076 : this_rd =
2077 : AOMMIN(this_rd, RDCOST(x->rdmult, x->rddiv, s1, this_rd_stats.sse));
2078 :
2079 : if (this_rd < best_rd) {
2080 : best_rd = this_rd;
2081 : best_tx_type = mbmi->tx_type;
2082 : *rd_stats = this_rd_stats;
2083 : }
2084 : }
2085 : } else {
2086 : mbmi->tx_type = DCT_DCT;
2087 : txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, 0, bs, mbmi->tx_size,
2088 : cpi->sf.use_fast_coef_costing);
2089 : }
2090 : #endif // CONFIG_EXT_TX
2091 0 : mbmi->tx_type = best_tx_type;
2092 0 : }
2093 :
2094 0 : static void choose_smallest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
2095 : RD_STATS *rd_stats, int64_t ref_best_rd,
2096 : BLOCK_SIZE bs) {
2097 0 : MACROBLOCKD *const xd = &x->e_mbd;
2098 0 : MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
2099 :
2100 0 : mbmi->tx_size = TX_4X4;
2101 0 : mbmi->tx_type = DCT_DCT;
2102 : #if CONFIG_VAR_TX
2103 0 : mbmi->min_tx_size = get_min_tx_size(TX_4X4);
2104 : #endif // CONFIG_VAR_TX
2105 :
2106 0 : txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, 0, bs, mbmi->tx_size,
2107 : cpi->sf.use_fast_coef_costing);
2108 0 : }
2109 :
2110 : #if CONFIG_TXK_SEL || CONFIG_VAR_TX
2111 0 : static INLINE int bsize_to_num_blk(BLOCK_SIZE bsize) {
2112 0 : int num_blk = 1 << (num_pels_log2_lookup[bsize] - 2 * tx_size_wide_log2[0]);
2113 0 : return num_blk;
2114 : }
2115 : #endif // CONFIG_TXK_SEL || CONFIG_VAR_TX
2116 :
2117 0 : static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi,
2118 : MACROBLOCK *x, RD_STATS *rd_stats,
2119 : int64_t ref_best_rd, BLOCK_SIZE bs) {
2120 0 : const AV1_COMMON *const cm = &cpi->common;
2121 0 : MACROBLOCKD *const xd = &x->e_mbd;
2122 0 : MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
2123 0 : int64_t rd = INT64_MAX;
2124 : int n;
2125 : int start_tx, end_tx;
2126 0 : int64_t best_rd = INT64_MAX, last_rd = INT64_MAX;
2127 0 : const TX_SIZE max_tx_size = max_txsize_lookup[bs];
2128 0 : TX_SIZE best_tx_size = max_tx_size;
2129 0 : TX_TYPE best_tx_type = DCT_DCT;
2130 : #if CONFIG_TXK_SEL
2131 : TX_TYPE best_txk_type[MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
2132 : const int num_blk = bsize_to_num_blk(bs);
2133 : #endif // CONFIG_TXK_SEL
2134 0 : const int tx_select = cm->tx_mode == TX_MODE_SELECT;
2135 0 : const int is_inter = is_inter_block(mbmi);
2136 : #if CONFIG_PVQ
2137 : od_rollback_buffer buf;
2138 : od_encode_checkpoint(&x->daala_enc, &buf);
2139 : #endif // CONFIG_PVQ
2140 :
2141 0 : av1_invalid_rd_stats(rd_stats);
2142 :
2143 : #if CONFIG_EXT_TX && CONFIG_RECT_TX
2144 0 : int evaluate_rect_tx = 0;
2145 0 : if (tx_select) {
2146 0 : evaluate_rect_tx = is_rect_tx_allowed(xd, mbmi);
2147 : } else {
2148 0 : const TX_SIZE chosen_tx_size =
2149 0 : tx_size_from_tx_mode(bs, cm->tx_mode, is_inter);
2150 0 : evaluate_rect_tx = is_rect_tx(chosen_tx_size);
2151 0 : assert(IMPLIES(evaluate_rect_tx, is_rect_tx_allowed(xd, mbmi)));
2152 : }
2153 0 : if (evaluate_rect_tx) {
2154 0 : TX_TYPE tx_start = DCT_DCT;
2155 0 : TX_TYPE tx_end = TX_TYPES;
2156 : #if CONFIG_TXK_SEL
2157 : // The tx_type becomes dummy when lv_map is on. The tx_type search will be
2158 : // performed in av1_search_txk_type()
2159 : tx_end = DCT_DCT + 1;
2160 : #endif
2161 : TX_TYPE tx_type;
2162 0 : for (tx_type = tx_start; tx_type < tx_end; ++tx_type) {
2163 0 : if (mbmi->ref_mv_idx > 0 && tx_type != DCT_DCT) continue;
2164 0 : const TX_SIZE rect_tx_size = max_txsize_rect_lookup[bs];
2165 : RD_STATS this_rd_stats;
2166 0 : int ext_tx_set =
2167 0 : get_ext_tx_set(rect_tx_size, bs, is_inter, cm->reduced_tx_set_used);
2168 0 : if ((is_inter && ext_tx_used_inter[ext_tx_set][tx_type]) ||
2169 0 : (!is_inter && ext_tx_used_intra[ext_tx_set][tx_type])) {
2170 0 : rd = txfm_yrd(cpi, x, &this_rd_stats, ref_best_rd, bs, tx_type,
2171 : rect_tx_size);
2172 0 : if (rd < best_rd) {
2173 : #if CONFIG_TXK_SEL
2174 : memcpy(best_txk_type, mbmi->txk_type,
2175 : sizeof(best_txk_type[0]) * num_blk);
2176 : #endif
2177 0 : best_tx_type = tx_type;
2178 0 : best_tx_size = rect_tx_size;
2179 0 : best_rd = rd;
2180 0 : *rd_stats = this_rd_stats;
2181 : }
2182 : }
2183 : #if CONFIG_CB4X4 && !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
2184 : const int is_inter = is_inter_block(mbmi);
2185 : if (mbmi->sb_type < BLOCK_8X8 && is_inter) break;
2186 : #endif // CONFIG_CB4X4 && !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
2187 : }
2188 : }
2189 :
2190 : #if CONFIG_RECT_TX_EXT
2191 : // test 1:4/4:1 tx
2192 : int evaluate_quarter_tx = 0;
2193 : if (is_quarter_tx_allowed(xd, mbmi, is_inter)) {
2194 : if (tx_select) {
2195 : evaluate_quarter_tx = 1;
2196 : } else {
2197 : const TX_SIZE chosen_tx_size =
2198 : tx_size_from_tx_mode(bs, cm->tx_mode, is_inter);
2199 : evaluate_quarter_tx = chosen_tx_size == quarter_txsize_lookup[bs];
2200 : }
2201 : }
2202 : if (evaluate_quarter_tx) {
2203 : TX_TYPE tx_start = DCT_DCT;
2204 : TX_TYPE tx_end = TX_TYPES;
2205 : #if CONFIG_TXK_SEL
2206 : // The tx_type becomes dummy when lv_map is on. The tx_type search will be
2207 : // performed in av1_search_txk_type()
2208 : tx_end = DCT_DCT + 1;
2209 : #endif
2210 : TX_TYPE tx_type;
2211 : for (tx_type = tx_start; tx_type < tx_end; ++tx_type) {
2212 : if (mbmi->ref_mv_idx > 0 && tx_type != DCT_DCT) continue;
2213 : const TX_SIZE tx_size = quarter_txsize_lookup[bs];
2214 : RD_STATS this_rd_stats;
2215 : int ext_tx_set =
2216 : get_ext_tx_set(tx_size, bs, is_inter, cm->reduced_tx_set_used);
2217 : if ((is_inter && ext_tx_used_inter[ext_tx_set][tx_type]) ||
2218 : (!is_inter && ext_tx_used_intra[ext_tx_set][tx_type])) {
2219 : rd =
2220 : txfm_yrd(cpi, x, &this_rd_stats, ref_best_rd, bs, tx_type, tx_size);
2221 : if (rd < best_rd) {
2222 : #if CONFIG_TXK_SEL
2223 : memcpy(best_txk_type, mbmi->txk_type,
2224 : sizeof(best_txk_type[0]) * num_blk);
2225 : #endif
2226 : best_tx_type = tx_type;
2227 : best_tx_size = tx_size;
2228 : best_rd = rd;
2229 : *rd_stats = this_rd_stats;
2230 : }
2231 : }
2232 : #if CONFIG_CB4X4 && !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
2233 : const int is_inter = is_inter_block(mbmi);
2234 : if (mbmi->sb_type < BLOCK_8X8 && is_inter) break;
2235 : #endif // CONFIG_CB4X4 && !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
2236 : }
2237 : }
2238 : #endif // CONFIG_RECT_TX_EXT
2239 : #endif // CONFIG_EXT_TX && CONFIG_RECT_TX
2240 :
2241 0 : if (tx_select) {
2242 0 : start_tx = max_tx_size;
2243 0 : end_tx = (max_tx_size >= TX_32X32) ? TX_8X8 : TX_4X4;
2244 : } else {
2245 0 : const TX_SIZE chosen_tx_size =
2246 0 : tx_size_from_tx_mode(bs, cm->tx_mode, is_inter);
2247 0 : start_tx = chosen_tx_size;
2248 0 : end_tx = chosen_tx_size;
2249 : }
2250 :
2251 0 : last_rd = INT64_MAX;
2252 0 : for (n = start_tx; n >= end_tx; --n) {
2253 : #if CONFIG_EXT_TX && CONFIG_RECT_TX
2254 0 : if (is_rect_tx(n)) break;
2255 : #endif // CONFIG_EXT_TX && CONFIG_RECT_TX
2256 0 : TX_TYPE tx_start = DCT_DCT;
2257 0 : TX_TYPE tx_end = TX_TYPES;
2258 : #if CONFIG_TXK_SEL
2259 : // The tx_type becomes dummy when lv_map is on. The tx_type search will be
2260 : // performed in av1_search_txk_type()
2261 : tx_end = DCT_DCT + 1;
2262 : #endif
2263 : TX_TYPE tx_type;
2264 0 : for (tx_type = tx_start; tx_type < tx_end; ++tx_type) {
2265 : RD_STATS this_rd_stats;
2266 0 : if (skip_txfm_search(cpi, x, bs, tx_type, n)) continue;
2267 0 : rd = txfm_yrd(cpi, x, &this_rd_stats, ref_best_rd, bs, tx_type, n);
2268 : #if CONFIG_PVQ
2269 : od_encode_rollback(&x->daala_enc, &buf);
2270 : #endif // CONFIG_PVQ
2271 : // Early termination in transform size search.
2272 0 : if (cpi->sf.tx_size_search_breakout &&
2273 0 : (rd == INT64_MAX ||
2274 0 : (this_rd_stats.skip == 1 && tx_type != DCT_DCT && n < start_tx) ||
2275 0 : (n < (int)max_tx_size && rd > last_rd)))
2276 : break;
2277 :
2278 0 : last_rd = rd;
2279 0 : if (rd < best_rd) {
2280 : #if CONFIG_TXK_SEL
2281 : memcpy(best_txk_type, mbmi->txk_type,
2282 : sizeof(best_txk_type[0]) * num_blk);
2283 : #endif
2284 0 : best_tx_type = tx_type;
2285 0 : best_tx_size = n;
2286 0 : best_rd = rd;
2287 0 : *rd_stats = this_rd_stats;
2288 : }
2289 : #if CONFIG_CB4X4 && !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
2290 : const int is_inter = is_inter_block(mbmi);
2291 : if (mbmi->sb_type < BLOCK_8X8 && is_inter) break;
2292 : #endif // CONFIG_CB4X4 && !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
2293 : }
2294 : }
2295 0 : mbmi->tx_size = best_tx_size;
2296 0 : mbmi->tx_type = best_tx_type;
2297 : #if CONFIG_TXK_SEL
2298 : memcpy(mbmi->txk_type, best_txk_type, sizeof(best_txk_type[0]) * num_blk);
2299 : #endif
2300 :
2301 : #if CONFIG_VAR_TX
2302 0 : mbmi->min_tx_size = get_min_tx_size(mbmi->tx_size);
2303 : #endif // CONFIG_VAR_TX
2304 :
2305 : #if !CONFIG_EXT_TX
2306 : if (mbmi->tx_size >= TX_32X32) assert(mbmi->tx_type == DCT_DCT);
2307 : #endif // !CONFIG_EXT_TX
2308 : #if CONFIG_PVQ
2309 : if (best_rd != INT64_MAX) {
2310 : txfm_yrd(cpi, x, rd_stats, ref_best_rd, bs, best_tx_type, best_tx_size);
2311 : }
2312 : #endif // CONFIG_PVQ
2313 0 : }
2314 :
2315 0 : static void super_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
2316 : RD_STATS *rd_stats, BLOCK_SIZE bs,
2317 : int64_t ref_best_rd) {
2318 0 : MACROBLOCKD *xd = &x->e_mbd;
2319 0 : av1_init_rd_stats(rd_stats);
2320 :
2321 0 : assert(bs == xd->mi[0]->mbmi.sb_type);
2322 :
2323 0 : if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
2324 0 : choose_smallest_tx_size(cpi, x, rd_stats, ref_best_rd, bs);
2325 0 : } else if (cpi->sf.tx_size_search_method == USE_LARGESTALL) {
2326 0 : choose_largest_tx_size(cpi, x, rd_stats, ref_best_rd, bs);
2327 : } else {
2328 0 : choose_tx_size_type_from_rd(cpi, x, rd_stats, ref_best_rd, bs);
2329 : }
2330 0 : }
2331 :
2332 0 : static int conditional_skipintra(PREDICTION_MODE mode,
2333 : PREDICTION_MODE best_intra_mode) {
2334 0 : if (mode == D117_PRED && best_intra_mode != V_PRED &&
2335 : best_intra_mode != D135_PRED)
2336 0 : return 1;
2337 0 : if (mode == D63_PRED && best_intra_mode != V_PRED &&
2338 : best_intra_mode != D45_PRED)
2339 0 : return 1;
2340 0 : if (mode == D207_PRED && best_intra_mode != H_PRED &&
2341 : best_intra_mode != D45_PRED)
2342 0 : return 1;
2343 0 : if (mode == D153_PRED && best_intra_mode != H_PRED &&
2344 : best_intra_mode != D135_PRED)
2345 0 : return 1;
2346 0 : return 0;
2347 : }
2348 :
2349 : // Model based RD estimation for luma intra blocks.
2350 0 : static int64_t intra_model_yrd(const AV1_COMP *const cpi, MACROBLOCK *const x,
2351 : BLOCK_SIZE bsize, int mode_cost) {
2352 0 : MACROBLOCKD *const xd = &x->e_mbd;
2353 0 : MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
2354 0 : assert(!is_inter_block(mbmi));
2355 : RD_STATS this_rd_stats;
2356 : int row, col;
2357 : int64_t temp_sse, this_rd;
2358 0 : const TX_SIZE tx_size = tx_size_from_tx_mode(bsize, cpi->common.tx_mode, 0);
2359 0 : const int stepr = tx_size_high_unit[tx_size];
2360 0 : const int stepc = tx_size_wide_unit[tx_size];
2361 0 : const int max_blocks_wide = max_block_wide(xd, bsize, 0);
2362 0 : const int max_blocks_high = max_block_high(xd, bsize, 0);
2363 0 : mbmi->tx_size = tx_size;
2364 : // Prediction.
2365 0 : const int step = stepr * stepc;
2366 0 : int block = 0;
2367 0 : for (row = 0; row < max_blocks_high; row += stepr) {
2368 0 : for (col = 0; col < max_blocks_wide; col += stepc) {
2369 : #if CONFIG_CFL
2370 : const struct macroblockd_plane *const pd = &xd->plane[0];
2371 : const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
2372 :
2373 : #if CONFIG_EC_ADAPT
2374 : FRAME_CONTEXT *const ec_ctx = xd->tile_ctx;
2375 : #else
2376 : FRAME_CONTEXT *const ec_ctx = cpi->common.fc;
2377 : #endif // CONFIG_EC_ADAPT
2378 :
2379 : av1_predict_intra_block_encoder_facade(x, ec_ctx, 0, block, col, row,
2380 : tx_size, plane_bsize);
2381 : #else
2382 0 : av1_predict_intra_block_facade(xd, 0, block, col, row, tx_size);
2383 : #endif
2384 0 : block += step;
2385 : }
2386 : }
2387 : // RD estimation.
2388 0 : model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &this_rd_stats.rate,
2389 : &this_rd_stats.dist, &this_rd_stats.skip, &temp_sse);
2390 : #if CONFIG_EXT_INTRA
2391 0 : if (av1_is_directional_mode(mbmi->mode, bsize)) {
2392 0 : mode_cost += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
2393 0 : MAX_ANGLE_DELTA + mbmi->angle_delta[0]);
2394 : }
2395 : #endif // CONFIG_EXT_INTRA
2396 : #if CONFIG_FILTER_INTRA
2397 : if (mbmi->mode == DC_PRED) {
2398 : const aom_prob prob = cpi->common.fc->filter_intra_probs[0];
2399 : if (mbmi->filter_intra_mode_info.use_filter_intra_mode[0]) {
2400 : const int mode = mbmi->filter_intra_mode_info.filter_intra_mode[0];
2401 : mode_cost += (av1_cost_bit(prob, 1) +
2402 : write_uniform_cost(FILTER_INTRA_MODES, mode));
2403 : } else {
2404 : mode_cost += av1_cost_bit(prob, 0);
2405 : }
2406 : }
2407 : #endif // CONFIG_FILTER_INTRA
2408 0 : this_rd = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate + mode_cost,
2409 : this_rd_stats.dist);
2410 0 : return this_rd;
2411 : }
2412 :
2413 : #if CONFIG_PALETTE
2414 : // Extends 'color_map' array from 'orig_width x orig_height' to 'new_width x
2415 : // new_height'. Extra rows and columns are filled in by copying last valid
2416 : // row/column.
2417 0 : static void extend_palette_color_map(uint8_t *const color_map, int orig_width,
2418 : int orig_height, int new_width,
2419 : int new_height) {
2420 : int j;
2421 0 : assert(new_width >= orig_width);
2422 0 : assert(new_height >= orig_height);
2423 0 : if (new_width == orig_width && new_height == orig_height) return;
2424 :
2425 0 : for (j = orig_height - 1; j >= 0; --j) {
2426 0 : memmove(color_map + j * new_width, color_map + j * orig_width, orig_width);
2427 : // Copy last column to extra columns.
2428 0 : memset(color_map + j * new_width + orig_width,
2429 0 : color_map[j * new_width + orig_width - 1], new_width - orig_width);
2430 : }
2431 : // Copy last row to extra rows.
2432 0 : for (j = orig_height; j < new_height; ++j) {
2433 0 : memcpy(color_map + j * new_width, color_map + (orig_height - 1) * new_width,
2434 : new_width);
2435 : }
2436 : }
2437 :
2438 : #if CONFIG_PALETTE_DELTA_ENCODING
2439 : // Bias toward using colors in the cache.
2440 : // TODO(huisu): Try other schemes to improve compression.
2441 : static void optimize_palette_colors(uint16_t *color_cache, int n_cache,
2442 : int n_colors, int stride,
2443 : float *centroids) {
2444 : if (n_cache <= 0) return;
2445 : for (int i = 0; i < n_colors * stride; i += stride) {
2446 : float min_diff = fabsf(centroids[i] - color_cache[0]);
2447 : int idx = 0;
2448 : for (int j = 1; j < n_cache; ++j) {
2449 : float this_diff = fabsf(centroids[i] - color_cache[j]);
2450 : if (this_diff < min_diff) {
2451 : min_diff = this_diff;
2452 : idx = j;
2453 : }
2454 : }
2455 : if (min_diff < 1.5) centroids[i] = color_cache[idx];
2456 : }
2457 : }
2458 : #endif // CONFIG_PALETTE_DELTA_ENCODING
2459 :
2460 0 : static int rd_pick_palette_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
2461 : BLOCK_SIZE bsize, int palette_ctx,
2462 : int dc_mode_cost, MB_MODE_INFO *best_mbmi,
2463 : uint8_t *best_palette_color_map,
2464 : int64_t *best_rd, int64_t *best_model_rd,
2465 : int *rate, int *rate_tokenonly,
2466 : int64_t *distortion, int *skippable) {
2467 0 : int rate_overhead = 0;
2468 0 : MACROBLOCKD *const xd = &x->e_mbd;
2469 0 : MODE_INFO *const mic = xd->mi[0];
2470 0 : MB_MODE_INFO *const mbmi = &mic->mbmi;
2471 0 : assert(!is_inter_block(mbmi));
2472 : int this_rate, colors, n;
2473 0 : const int src_stride = x->plane[0].src.stride;
2474 0 : const uint8_t *const src = x->plane[0].src.buf;
2475 0 : uint8_t *const color_map = xd->plane[0].color_index_map;
2476 : int block_width, block_height, rows, cols;
2477 0 : av1_get_block_dimensions(bsize, 0, xd, &block_width, &block_height, &rows,
2478 : &cols);
2479 :
2480 0 : assert(cpi->common.allow_screen_content_tools);
2481 :
2482 : #if CONFIG_HIGHBITDEPTH
2483 0 : if (cpi->common.use_highbitdepth)
2484 0 : colors = av1_count_colors_highbd(src, src_stride, rows, cols,
2485 0 : cpi->common.bit_depth);
2486 : else
2487 : #endif // CONFIG_HIGHBITDEPTH
2488 0 : colors = av1_count_colors(src, src_stride, rows, cols);
2489 : #if CONFIG_FILTER_INTRA
2490 : mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
2491 : #endif // CONFIG_FILTER_INTRA
2492 :
2493 0 : if (colors > 1 && colors <= 64) {
2494 : int r, c, i, j, k, palette_mode_cost;
2495 0 : const int max_itr = 50;
2496 : uint8_t color_order[PALETTE_MAX_SIZE];
2497 0 : float *const data = x->palette_buffer->kmeans_data_buf;
2498 : float centroids[PALETTE_MAX_SIZE];
2499 : float lb, ub, val;
2500 : RD_STATS tokenonly_rd_stats;
2501 : int64_t this_rd, this_model_rd;
2502 0 : PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
2503 : #if CONFIG_HIGHBITDEPTH
2504 0 : uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
2505 0 : if (cpi->common.use_highbitdepth)
2506 0 : lb = ub = src16[0];
2507 : else
2508 : #endif // CONFIG_HIGHBITDEPTH
2509 0 : lb = ub = src[0];
2510 :
2511 : #if CONFIG_HIGHBITDEPTH
2512 0 : if (cpi->common.use_highbitdepth) {
2513 0 : for (r = 0; r < rows; ++r) {
2514 0 : for (c = 0; c < cols; ++c) {
2515 0 : val = src16[r * src_stride + c];
2516 0 : data[r * cols + c] = val;
2517 0 : if (val < lb)
2518 0 : lb = val;
2519 0 : else if (val > ub)
2520 0 : ub = val;
2521 : }
2522 : }
2523 : } else {
2524 : #endif // CONFIG_HIGHBITDEPTH
2525 0 : for (r = 0; r < rows; ++r) {
2526 0 : for (c = 0; c < cols; ++c) {
2527 0 : val = src[r * src_stride + c];
2528 0 : data[r * cols + c] = val;
2529 0 : if (val < lb)
2530 0 : lb = val;
2531 0 : else if (val > ub)
2532 0 : ub = val;
2533 : }
2534 : }
2535 : #if CONFIG_HIGHBITDEPTH
2536 : }
2537 : #endif // CONFIG_HIGHBITDEPTH
2538 :
2539 0 : mbmi->mode = DC_PRED;
2540 : #if CONFIG_FILTER_INTRA
2541 : mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
2542 : #endif // CONFIG_FILTER_INTRA
2543 :
2544 0 : if (rows * cols > PALETTE_MAX_BLOCK_SIZE) return 0;
2545 :
2546 : #if CONFIG_PALETTE_DELTA_ENCODING
2547 : const MODE_INFO *above_mi = xd->above_mi;
2548 : const MODE_INFO *left_mi = xd->left_mi;
2549 : uint16_t color_cache[2 * PALETTE_MAX_SIZE];
2550 : const int n_cache =
2551 : av1_get_palette_cache(above_mi, left_mi, 0, color_cache);
2552 : #endif // CONFIG_PALETTE_DELTA_ENCODING
2553 :
2554 0 : for (n = colors > PALETTE_MAX_SIZE ? PALETTE_MAX_SIZE : colors; n >= 2;
2555 0 : --n) {
2556 0 : if (colors == PALETTE_MIN_SIZE) {
2557 : // Special case: These colors automatically become the centroids.
2558 0 : assert(colors == n);
2559 0 : assert(colors == 2);
2560 0 : centroids[0] = lb;
2561 0 : centroids[1] = ub;
2562 0 : k = 2;
2563 : } else {
2564 0 : for (i = 0; i < n; ++i) {
2565 0 : centroids[i] = lb + (2 * i + 1) * (ub - lb) / n / 2;
2566 : }
2567 0 : av1_k_means(data, centroids, color_map, rows * cols, n, 1, max_itr);
2568 : #if CONFIG_PALETTE_DELTA_ENCODING
2569 : optimize_palette_colors(color_cache, n_cache, n, 1, centroids);
2570 : #endif // CONFIG_PALETTE_DELTA_ENCODING
2571 0 : k = av1_remove_duplicates(centroids, n);
2572 0 : if (k < PALETTE_MIN_SIZE) {
2573 : // Too few unique colors to create a palette. And DC_PRED will work
2574 : // well for that case anyway. So skip.
2575 0 : continue;
2576 : }
2577 : }
2578 :
2579 : #if CONFIG_HIGHBITDEPTH
2580 0 : if (cpi->common.use_highbitdepth)
2581 0 : for (i = 0; i < k; ++i)
2582 0 : pmi->palette_colors[i] =
2583 0 : clip_pixel_highbd((int)centroids[i], cpi->common.bit_depth);
2584 : else
2585 : #endif // CONFIG_HIGHBITDEPTH
2586 0 : for (i = 0; i < k; ++i)
2587 0 : pmi->palette_colors[i] = clip_pixel((int)centroids[i]);
2588 0 : pmi->palette_size[0] = k;
2589 :
2590 0 : av1_calc_indices(data, centroids, color_map, rows * cols, k, 1);
2591 0 : extend_palette_color_map(color_map, cols, rows, block_width,
2592 : block_height);
2593 0 : palette_mode_cost =
2594 0 : dc_mode_cost +
2595 0 : cpi->palette_y_size_cost[bsize - BLOCK_8X8][k - PALETTE_MIN_SIZE] +
2596 0 : write_uniform_cost(k, color_map[0]) +
2597 0 : av1_cost_bit(
2598 : av1_default_palette_y_mode_prob[bsize - BLOCK_8X8][palette_ctx],
2599 : 1);
2600 0 : palette_mode_cost += av1_palette_color_cost_y(pmi,
2601 : #if CONFIG_PALETTE_DELTA_ENCODING
2602 : color_cache, n_cache,
2603 : #endif // CONFIG_PALETTE_DELTA_ENCODING
2604 0 : cpi->common.bit_depth);
2605 0 : for (i = 0; i < rows; ++i) {
2606 0 : for (j = (i == 0 ? 1 : 0); j < cols; ++j) {
2607 : int color_idx;
2608 0 : const int color_ctx = av1_get_palette_color_index_context(
2609 : color_map, block_width, i, j, k, color_order, &color_idx);
2610 0 : assert(color_idx >= 0 && color_idx < k);
2611 0 : palette_mode_cost += cpi->palette_y_color_cost[k - PALETTE_MIN_SIZE]
2612 0 : [color_ctx][color_idx];
2613 : }
2614 : }
2615 0 : this_model_rd = intra_model_yrd(cpi, x, bsize, palette_mode_cost);
2616 0 : if (*best_model_rd != INT64_MAX &&
2617 0 : this_model_rd > *best_model_rd + (*best_model_rd >> 1))
2618 0 : continue;
2619 0 : if (this_model_rd < *best_model_rd) *best_model_rd = this_model_rd;
2620 0 : super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd);
2621 0 : if (tokenonly_rd_stats.rate == INT_MAX) continue;
2622 0 : this_rate = tokenonly_rd_stats.rate + palette_mode_cost;
2623 0 : this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist);
2624 0 : if (!xd->lossless[mbmi->segment_id] && mbmi->sb_type >= BLOCK_8X8) {
2625 0 : tokenonly_rd_stats.rate -= tx_size_cost(cpi, x, bsize, mbmi->tx_size);
2626 : }
2627 0 : if (this_rd < *best_rd) {
2628 0 : *best_rd = this_rd;
2629 0 : memcpy(best_palette_color_map, color_map,
2630 0 : block_width * block_height * sizeof(color_map[0]));
2631 0 : *best_mbmi = *mbmi;
2632 0 : rate_overhead = this_rate - tokenonly_rd_stats.rate;
2633 0 : if (rate) *rate = this_rate;
2634 0 : if (rate_tokenonly) *rate_tokenonly = tokenonly_rd_stats.rate;
2635 0 : if (distortion) *distortion = tokenonly_rd_stats.dist;
2636 0 : if (skippable) *skippable = tokenonly_rd_stats.skip;
2637 : }
2638 : }
2639 : }
2640 :
2641 0 : if (best_mbmi->palette_mode_info.palette_size[0] > 0) {
2642 0 : memcpy(color_map, best_palette_color_map,
2643 0 : rows * cols * sizeof(best_palette_color_map[0]));
2644 : }
2645 0 : *mbmi = *best_mbmi;
2646 0 : return rate_overhead;
2647 : }
2648 : #endif // CONFIG_PALETTE
2649 :
2650 0 : static int64_t rd_pick_intra_sub_8x8_y_subblock_mode(
2651 : const AV1_COMP *const cpi, MACROBLOCK *x, int row, int col,
2652 : PREDICTION_MODE *best_mode, const int *bmode_costs, ENTROPY_CONTEXT *a,
2653 : ENTROPY_CONTEXT *l, int *bestrate, int *bestratey, int64_t *bestdistortion,
2654 : BLOCK_SIZE bsize, TX_SIZE tx_size, int *y_skip, int64_t rd_thresh) {
2655 0 : const AV1_COMMON *const cm = &cpi->common;
2656 : PREDICTION_MODE mode;
2657 0 : MACROBLOCKD *const xd = &x->e_mbd;
2658 0 : assert(!is_inter_block(&xd->mi[0]->mbmi));
2659 0 : int64_t best_rd = rd_thresh;
2660 0 : struct macroblock_plane *p = &x->plane[0];
2661 0 : struct macroblockd_plane *pd = &xd->plane[0];
2662 0 : const int src_stride = p->src.stride;
2663 0 : const int dst_stride = pd->dst.stride;
2664 0 : const uint8_t *src_init = &p->src.buf[row * 4 * src_stride + col * 4];
2665 0 : uint8_t *dst_init = &pd->dst.buf[row * 4 * dst_stride + col * 4];
2666 : #if CONFIG_CHROMA_2X2
2667 : // TODO(jingning): This is a temporal change. The whole function should be
2668 : // out when cb4x4 is enabled.
2669 : ENTROPY_CONTEXT ta[4], tempa[4];
2670 : ENTROPY_CONTEXT tl[4], templ[4];
2671 : #else
2672 : ENTROPY_CONTEXT ta[2], tempa[2];
2673 : ENTROPY_CONTEXT tl[2], templ[2];
2674 : #endif // CONFIG_CHROMA_2X2
2675 :
2676 0 : const int pred_width_in_4x4_blocks = num_4x4_blocks_wide_lookup[bsize];
2677 0 : const int pred_height_in_4x4_blocks = num_4x4_blocks_high_lookup[bsize];
2678 0 : const int tx_width_unit = tx_size_wide_unit[tx_size];
2679 0 : const int tx_height_unit = tx_size_high_unit[tx_size];
2680 0 : const int pred_block_width = block_size_wide[bsize];
2681 0 : const int pred_block_height = block_size_high[bsize];
2682 0 : const int tx_width = tx_size_wide[tx_size];
2683 0 : const int tx_height = tx_size_high[tx_size];
2684 0 : const int pred_width_in_transform_blocks = pred_block_width / tx_width;
2685 0 : const int pred_height_in_transform_blocks = pred_block_height / tx_height;
2686 : int idx, idy;
2687 0 : int best_can_skip = 0;
2688 : uint8_t best_dst[8 * 8];
2689 : #if CONFIG_HIGHBITDEPTH
2690 : uint16_t best_dst16[8 * 8];
2691 : #endif // CONFIG_HIGHBITDEPTH
2692 0 : const int is_lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
2693 : #if CONFIG_EXT_TX && CONFIG_RECT_TX
2694 0 : const int sub_bsize = bsize;
2695 : #else
2696 : const int sub_bsize = BLOCK_4X4;
2697 : #endif // CONFIG_EXT_TX && CONFIG_RECT_TX
2698 :
2699 : #if CONFIG_PVQ
2700 : od_rollback_buffer pre_buf, post_buf;
2701 : od_encode_checkpoint(&x->daala_enc, &pre_buf);
2702 : od_encode_checkpoint(&x->daala_enc, &post_buf);
2703 : #endif // CONFIG_PVQ
2704 :
2705 0 : assert(bsize < BLOCK_8X8);
2706 0 : assert(tx_width < 8 || tx_height < 8);
2707 : #if CONFIG_EXT_TX && CONFIG_RECT_TX
2708 0 : if (is_lossless)
2709 0 : assert(tx_width == 4 && tx_height == 4);
2710 : else
2711 0 : assert(tx_width == pred_block_width && tx_height == pred_block_height);
2712 : #else
2713 : assert(tx_width == 4 && tx_height == 4);
2714 : #endif // CONFIG_EXT_TX && CONFIG_RECT_TX
2715 :
2716 0 : memcpy(ta, a, pred_width_in_transform_blocks * sizeof(a[0]));
2717 0 : memcpy(tl, l, pred_height_in_transform_blocks * sizeof(l[0]));
2718 :
2719 0 : xd->mi[0]->mbmi.tx_size = tx_size;
2720 :
2721 : #if CONFIG_PALETTE
2722 0 : xd->mi[0]->mbmi.palette_mode_info.palette_size[0] = 0;
2723 : #endif // CONFIG_PALETTE
2724 :
2725 : #if CONFIG_HIGHBITDEPTH
2726 0 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
2727 : #if CONFIG_PVQ
2728 : od_encode_checkpoint(&x->daala_enc, &pre_buf);
2729 : #endif
2730 0 : for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
2731 : int64_t this_rd;
2732 0 : int ratey = 0;
2733 0 : int64_t distortion = 0;
2734 0 : int rate = bmode_costs[mode];
2735 0 : int can_skip = 1;
2736 :
2737 0 : if (!(cpi->sf.intra_y_mode_mask[txsize_sqr_up_map[tx_size]] &
2738 : (1 << mode)))
2739 0 : continue;
2740 :
2741 : // Only do the oblique modes if the best so far is
2742 : // one of the neighboring directional modes
2743 0 : if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
2744 0 : if (conditional_skipintra(mode, *best_mode)) continue;
2745 : }
2746 :
2747 0 : memcpy(tempa, ta, pred_width_in_transform_blocks * sizeof(ta[0]));
2748 0 : memcpy(templ, tl, pred_height_in_transform_blocks * sizeof(tl[0]));
2749 :
2750 0 : for (idy = 0; idy < pred_height_in_transform_blocks; ++idy) {
2751 0 : for (idx = 0; idx < pred_width_in_transform_blocks; ++idx) {
2752 0 : const int block_raster_idx = (row + idy) * 2 + (col + idx);
2753 0 : const int block =
2754 0 : av1_raster_order_to_block_index(tx_size, block_raster_idx);
2755 0 : const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
2756 0 : uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
2757 : #if !CONFIG_PVQ
2758 0 : int16_t *const src_diff = av1_raster_block_offset_int16(
2759 0 : BLOCK_8X8, block_raster_idx, p->src_diff);
2760 : #endif
2761 : int skip;
2762 0 : assert(block < 4);
2763 0 : assert(IMPLIES(tx_size == TX_4X8 || tx_size == TX_8X4,
2764 : idx == 0 && idy == 0));
2765 0 : assert(IMPLIES(tx_size == TX_4X8 || tx_size == TX_8X4,
2766 : block == 0 || block == 2));
2767 0 : xd->mi[0]->bmi[block_raster_idx].as_mode = mode;
2768 0 : av1_predict_intra_block(
2769 0 : xd, pd->width, pd->height, txsize_to_bsize[tx_size], mode, dst,
2770 : dst_stride, dst, dst_stride, col + idx, row + idy, 0);
2771 : #if !CONFIG_PVQ
2772 0 : aom_highbd_subtract_block(tx_height, tx_width, src_diff, 8, src,
2773 : src_stride, dst, dst_stride, xd->bd);
2774 : #endif
2775 0 : if (is_lossless) {
2776 0 : TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, tx_size);
2777 0 : const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, 0);
2778 0 : const int coeff_ctx =
2779 0 : combine_entropy_contexts(tempa[idx], templ[idy]);
2780 : #if !CONFIG_PVQ
2781 0 : av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8,
2782 : tx_size, coeff_ctx, AV1_XFORM_QUANT_FP);
2783 0 : ratey += av1_cost_coeffs(cpi, x, 0, block, tx_size, scan_order,
2784 : tempa + idx, templ + idy,
2785 : cpi->sf.use_fast_coef_costing);
2786 0 : skip = (p->eobs[block] == 0);
2787 0 : can_skip &= skip;
2788 0 : tempa[idx] = !skip;
2789 0 : templ[idy] = !skip;
2790 : #if CONFIG_EXT_TX
2791 0 : if (tx_size == TX_8X4) {
2792 0 : tempa[idx + 1] = tempa[idx];
2793 0 : } else if (tx_size == TX_4X8) {
2794 0 : templ[idy + 1] = templ[idy];
2795 : }
2796 : #endif // CONFIG_EXT_TX
2797 : #else
2798 : (void)scan_order;
2799 :
2800 : av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8,
2801 : tx_size, coeff_ctx, AV1_XFORM_QUANT_B);
2802 :
2803 : ratey += x->rate;
2804 : skip = x->pvq_skip[0];
2805 : tempa[idx] = !skip;
2806 : templ[idy] = !skip;
2807 : can_skip &= skip;
2808 : #endif
2809 0 : if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
2810 0 : goto next_highbd;
2811 : #if CONFIG_PVQ
2812 : if (!skip)
2813 : #endif
2814 0 : av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block),
2815 : DCT_DCT, tx_size, dst, dst_stride,
2816 0 : p->eobs[block]);
2817 : } else {
2818 : int64_t dist;
2819 : unsigned int tmp;
2820 0 : TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, tx_size);
2821 0 : const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, 0);
2822 0 : const int coeff_ctx =
2823 0 : combine_entropy_contexts(tempa[idx], templ[idy]);
2824 : #if !CONFIG_PVQ
2825 0 : av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8,
2826 : tx_size, coeff_ctx, AV1_XFORM_QUANT_FP);
2827 0 : av1_optimize_b(cm, x, 0, block, BLOCK_8X8, tx_size, tempa + idx,
2828 : templ + idy);
2829 0 : ratey += av1_cost_coeffs(cpi, x, 0, block, tx_size, scan_order,
2830 : tempa + idx, templ + idy,
2831 : cpi->sf.use_fast_coef_costing);
2832 0 : skip = (p->eobs[block] == 0);
2833 0 : can_skip &= skip;
2834 0 : tempa[idx] = !skip;
2835 0 : templ[idy] = !skip;
2836 : #if CONFIG_EXT_TX
2837 0 : if (tx_size == TX_8X4) {
2838 0 : tempa[idx + 1] = tempa[idx];
2839 0 : } else if (tx_size == TX_4X8) {
2840 0 : templ[idy + 1] = templ[idy];
2841 : }
2842 : #endif // CONFIG_EXT_TX
2843 : #else
2844 : (void)scan_order;
2845 :
2846 : av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8,
2847 : tx_size, coeff_ctx, AV1_XFORM_QUANT_FP);
2848 : ratey += x->rate;
2849 : skip = x->pvq_skip[0];
2850 : tempa[idx] = !skip;
2851 : templ[idy] = !skip;
2852 : can_skip &= skip;
2853 : #endif
2854 : #if CONFIG_PVQ
2855 : if (!skip)
2856 : #endif
2857 0 : av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block),
2858 : tx_type, tx_size, dst, dst_stride,
2859 0 : p->eobs[block]);
2860 0 : cpi->fn_ptr[sub_bsize].vf(src, src_stride, dst, dst_stride, &tmp);
2861 0 : dist = (int64_t)tmp << 4;
2862 0 : distortion += dist;
2863 0 : if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
2864 0 : goto next_highbd;
2865 : }
2866 : }
2867 : }
2868 :
2869 0 : rate += ratey;
2870 0 : this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
2871 :
2872 0 : if (this_rd < best_rd) {
2873 0 : *bestrate = rate;
2874 0 : *bestratey = ratey;
2875 0 : *bestdistortion = distortion;
2876 0 : best_rd = this_rd;
2877 0 : best_can_skip = can_skip;
2878 0 : *best_mode = mode;
2879 0 : memcpy(a, tempa, pred_width_in_transform_blocks * sizeof(tempa[0]));
2880 0 : memcpy(l, templ, pred_height_in_transform_blocks * sizeof(templ[0]));
2881 : #if CONFIG_PVQ
2882 : od_encode_checkpoint(&x->daala_enc, &post_buf);
2883 : #endif
2884 0 : for (idy = 0; idy < pred_height_in_transform_blocks * 4; ++idy) {
2885 0 : memcpy(best_dst16 + idy * 8,
2886 0 : CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
2887 : pred_width_in_transform_blocks * 4 * sizeof(uint16_t));
2888 : }
2889 : }
2890 : next_highbd : {}
2891 : #if CONFIG_PVQ
2892 : od_encode_rollback(&x->daala_enc, &pre_buf);
2893 : #endif
2894 : }
2895 :
2896 0 : if (best_rd >= rd_thresh) return best_rd;
2897 :
2898 : #if CONFIG_PVQ
2899 : od_encode_rollback(&x->daala_enc, &post_buf);
2900 : #endif
2901 :
2902 0 : if (y_skip) *y_skip &= best_can_skip;
2903 :
2904 0 : for (idy = 0; idy < pred_height_in_transform_blocks * 4; ++idy) {
2905 0 : memcpy(CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
2906 : best_dst16 + idy * 8,
2907 : pred_width_in_transform_blocks * 4 * sizeof(uint16_t));
2908 : }
2909 :
2910 0 : return best_rd;
2911 : }
2912 : #endif // CONFIG_HIGHBITDEPTH
2913 :
2914 : #if CONFIG_PVQ
2915 : od_encode_checkpoint(&x->daala_enc, &pre_buf);
2916 : #endif // CONFIG_PVQ
2917 :
2918 0 : for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
2919 : int64_t this_rd;
2920 0 : int ratey = 0;
2921 0 : int64_t distortion = 0;
2922 0 : int rate = bmode_costs[mode];
2923 0 : int can_skip = 1;
2924 :
2925 0 : if (!(cpi->sf.intra_y_mode_mask[txsize_sqr_up_map[tx_size]] &
2926 : (1 << mode))) {
2927 0 : continue;
2928 : }
2929 :
2930 : // Only do the oblique modes if the best so far is
2931 : // one of the neighboring directional modes
2932 0 : if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
2933 0 : if (conditional_skipintra(mode, *best_mode)) continue;
2934 : }
2935 :
2936 0 : memcpy(tempa, ta, pred_width_in_transform_blocks * sizeof(ta[0]));
2937 0 : memcpy(templ, tl, pred_height_in_transform_blocks * sizeof(tl[0]));
2938 :
2939 0 : for (idy = 0; idy < pred_height_in_4x4_blocks; idy += tx_height_unit) {
2940 0 : for (idx = 0; idx < pred_width_in_4x4_blocks; idx += tx_width_unit) {
2941 0 : const int block_raster_idx = (row + idy) * 2 + (col + idx);
2942 0 : int block = av1_raster_order_to_block_index(tx_size, block_raster_idx);
2943 0 : const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
2944 0 : uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
2945 : #if !CONFIG_PVQ
2946 0 : int16_t *const src_diff = av1_raster_block_offset_int16(
2947 0 : BLOCK_8X8, block_raster_idx, p->src_diff);
2948 : #endif // !CONFIG_PVQ
2949 : int skip;
2950 0 : assert(block < 4);
2951 0 : assert(IMPLIES(tx_size == TX_4X8 || tx_size == TX_8X4,
2952 : idx == 0 && idy == 0));
2953 0 : assert(IMPLIES(tx_size == TX_4X8 || tx_size == TX_8X4,
2954 : block == 0 || block == 2));
2955 0 : xd->mi[0]->bmi[block_raster_idx].as_mode = mode;
2956 0 : av1_predict_intra_block(xd, pd->width, pd->height,
2957 0 : txsize_to_bsize[tx_size], mode, dst, dst_stride,
2958 : dst, dst_stride,
2959 : #if CONFIG_CB4X4
2960 0 : 2 * (col + idx), 2 * (row + idy),
2961 : #else
2962 : col + idx, row + idy,
2963 : #endif // CONFIG_CB4X4
2964 : 0);
2965 : #if !CONFIG_PVQ
2966 0 : aom_subtract_block(tx_height, tx_width, src_diff, 8, src, src_stride,
2967 : dst, dst_stride);
2968 : #endif // !CONFIG_PVQ
2969 :
2970 0 : TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, tx_size);
2971 0 : const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, 0);
2972 0 : const int coeff_ctx = combine_entropy_contexts(tempa[idx], templ[idy]);
2973 : #if CONFIG_CB4X4
2974 0 : block = 4 * block;
2975 : #endif // CONFIG_CB4X4
2976 : #if !CONFIG_PVQ
2977 0 : const AV1_XFORM_QUANT xform_quant =
2978 0 : is_lossless ? AV1_XFORM_QUANT_B : AV1_XFORM_QUANT_FP;
2979 0 : av1_xform_quant(cm, x, 0, block,
2980 : #if CONFIG_CB4X4
2981 0 : 2 * (row + idy), 2 * (col + idx),
2982 : #else
2983 : row + idy, col + idx,
2984 : #endif // CONFIG_CB4X4
2985 : BLOCK_8X8, tx_size, coeff_ctx, xform_quant);
2986 :
2987 0 : av1_optimize_b(cm, x, 0, block, BLOCK_8X8, tx_size, tempa + idx,
2988 : templ + idy);
2989 :
2990 0 : ratey +=
2991 0 : av1_cost_coeffs(cpi, x, 0, block, tx_size, scan_order, tempa + idx,
2992 : templ + idy, cpi->sf.use_fast_coef_costing);
2993 0 : skip = (p->eobs[block] == 0);
2994 0 : can_skip &= skip;
2995 0 : tempa[idx] = !skip;
2996 0 : templ[idy] = !skip;
2997 : #if CONFIG_EXT_TX
2998 0 : if (tx_size == TX_8X4) {
2999 0 : tempa[idx + 1] = tempa[idx];
3000 0 : } else if (tx_size == TX_4X8) {
3001 0 : templ[idy + 1] = templ[idy];
3002 : }
3003 : #endif // CONFIG_EXT_TX
3004 : #else
3005 : (void)scan_order;
3006 :
3007 : av1_xform_quant(cm, x, 0, block,
3008 : #if CONFIG_CB4X4
3009 : 2 * (row + idy), 2 * (col + idx),
3010 : #else
3011 : row + idy, col + idx,
3012 : #endif // CONFIG_CB4X4
3013 : BLOCK_8X8, tx_size, coeff_ctx, AV1_XFORM_QUANT_FP);
3014 :
3015 : ratey += x->rate;
3016 : skip = x->pvq_skip[0];
3017 : tempa[idx] = !skip;
3018 : templ[idy] = !skip;
3019 : can_skip &= skip;
3020 : #endif // !CONFIG_PVQ
3021 :
3022 0 : if (!is_lossless) { // To use the pixel domain distortion, we need to
3023 : // calculate inverse txfm *before* calculating RD
3024 : // cost. Compared to calculating the distortion in
3025 : // the frequency domain, the overhead of encoding
3026 : // effort is low.
3027 : #if CONFIG_PVQ
3028 : if (!skip)
3029 : #endif // CONFIG_PVQ
3030 0 : av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block),
3031 : tx_type, tx_size, dst, dst_stride,
3032 0 : p->eobs[block]);
3033 : unsigned int tmp;
3034 0 : cpi->fn_ptr[sub_bsize].vf(src, src_stride, dst, dst_stride, &tmp);
3035 0 : const int64_t dist = (int64_t)tmp << 4;
3036 0 : distortion += dist;
3037 : }
3038 :
3039 0 : if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
3040 0 : goto next;
3041 :
3042 0 : if (is_lossless) { // Calculate inverse txfm *after* RD cost.
3043 : #if CONFIG_PVQ
3044 : if (!skip)
3045 : #endif // CONFIG_PVQ
3046 0 : av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block),
3047 : DCT_DCT, tx_size, dst, dst_stride,
3048 0 : p->eobs[block]);
3049 : }
3050 : }
3051 : }
3052 :
3053 0 : rate += ratey;
3054 0 : this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
3055 :
3056 0 : if (this_rd < best_rd) {
3057 0 : *bestrate = rate;
3058 0 : *bestratey = ratey;
3059 0 : *bestdistortion = distortion;
3060 0 : best_rd = this_rd;
3061 0 : best_can_skip = can_skip;
3062 0 : *best_mode = mode;
3063 0 : memcpy(a, tempa, pred_width_in_transform_blocks * sizeof(tempa[0]));
3064 0 : memcpy(l, templ, pred_height_in_transform_blocks * sizeof(templ[0]));
3065 : #if CONFIG_PVQ
3066 : od_encode_checkpoint(&x->daala_enc, &post_buf);
3067 : #endif // CONFIG_PVQ
3068 0 : for (idy = 0; idy < pred_height_in_transform_blocks * 4; ++idy)
3069 0 : memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
3070 0 : pred_width_in_transform_blocks * 4);
3071 : }
3072 : next : {}
3073 : #if CONFIG_PVQ
3074 : od_encode_rollback(&x->daala_enc, &pre_buf);
3075 : #endif // CONFIG_PVQ
3076 : } // mode decision loop
3077 :
3078 0 : if (best_rd >= rd_thresh) return best_rd;
3079 :
3080 : #if CONFIG_PVQ
3081 : od_encode_rollback(&x->daala_enc, &post_buf);
3082 : #endif // CONFIG_PVQ
3083 :
3084 0 : if (y_skip) *y_skip &= best_can_skip;
3085 :
3086 0 : for (idy = 0; idy < pred_height_in_transform_blocks * 4; ++idy)
3087 0 : memcpy(dst_init + idy * dst_stride, best_dst + idy * 8,
3088 0 : pred_width_in_transform_blocks * 4);
3089 :
3090 0 : return best_rd;
3091 : }
3092 :
3093 0 : static int64_t rd_pick_intra_sub_8x8_y_mode(const AV1_COMP *const cpi,
3094 : MACROBLOCK *mb, int *rate,
3095 : int *rate_y, int64_t *distortion,
3096 : int *y_skip, int64_t best_rd) {
3097 0 : const MACROBLOCKD *const xd = &mb->e_mbd;
3098 0 : MODE_INFO *const mic = xd->mi[0];
3099 0 : const MODE_INFO *above_mi = xd->above_mi;
3100 0 : const MODE_INFO *left_mi = xd->left_mi;
3101 0 : MB_MODE_INFO *const mbmi = &mic->mbmi;
3102 0 : assert(!is_inter_block(mbmi));
3103 0 : const BLOCK_SIZE bsize = mbmi->sb_type;
3104 0 : const int pred_width_in_4x4_blocks = num_4x4_blocks_wide_lookup[bsize];
3105 0 : const int pred_height_in_4x4_blocks = num_4x4_blocks_high_lookup[bsize];
3106 : int idx, idy;
3107 0 : int cost = 0;
3108 0 : int64_t total_distortion = 0;
3109 0 : int tot_rate_y = 0;
3110 0 : int64_t total_rd = 0;
3111 0 : const int *bmode_costs = cpi->mbmode_cost[0];
3112 0 : const int is_lossless = xd->lossless[mbmi->segment_id];
3113 : #if CONFIG_EXT_TX && CONFIG_RECT_TX
3114 0 : const TX_SIZE tx_size = is_lossless ? TX_4X4 : max_txsize_rect_lookup[bsize];
3115 : #else
3116 : const TX_SIZE tx_size = TX_4X4;
3117 : #endif // CONFIG_EXT_TX && CONFIG_RECT_TX
3118 :
3119 : #if CONFIG_EXT_INTRA
3120 : #if CONFIG_INTRA_INTERP
3121 : mbmi->intra_filter = INTRA_FILTER_LINEAR;
3122 : #endif // CONFIG_INTRA_INTERP
3123 : #endif // CONFIG_EXT_INTRA
3124 : #if CONFIG_FILTER_INTRA
3125 : mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
3126 : #endif // CONFIG_FILTER_INTRA
3127 :
3128 : // TODO(any): Add search of the tx_type to improve rd performance at the
3129 : // expense of speed.
3130 0 : mbmi->tx_type = DCT_DCT;
3131 0 : mbmi->tx_size = tx_size;
3132 :
3133 0 : if (y_skip) *y_skip = 1;
3134 :
3135 : // Pick modes for each prediction sub-block (of size 4x4, 4x8, or 8x4) in this
3136 : // 8x8 coding block.
3137 0 : for (idy = 0; idy < 2; idy += pred_height_in_4x4_blocks) {
3138 0 : for (idx = 0; idx < 2; idx += pred_width_in_4x4_blocks) {
3139 0 : PREDICTION_MODE best_mode = DC_PRED;
3140 0 : int r = INT_MAX, ry = INT_MAX;
3141 0 : int64_t d = INT64_MAX, this_rd = INT64_MAX;
3142 : int j;
3143 0 : const int pred_block_idx = idy * 2 + idx;
3144 0 : if (cpi->common.frame_type == KEY_FRAME) {
3145 0 : const PREDICTION_MODE A =
3146 : av1_above_block_mode(mic, above_mi, pred_block_idx);
3147 0 : const PREDICTION_MODE L =
3148 : av1_left_block_mode(mic, left_mi, pred_block_idx);
3149 :
3150 0 : bmode_costs = cpi->y_mode_costs[A][L];
3151 : }
3152 0 : this_rd = rd_pick_intra_sub_8x8_y_subblock_mode(
3153 : cpi, mb, idy, idx, &best_mode, bmode_costs,
3154 0 : xd->plane[0].above_context + idx, xd->plane[0].left_context + idy, &r,
3155 : &ry, &d, bsize, tx_size, y_skip, best_rd - total_rd);
3156 : #if !CONFIG_DAALA_DIST
3157 0 : if (this_rd >= best_rd - total_rd) return INT64_MAX;
3158 : #endif // !CONFIG_DAALA_DIST
3159 0 : total_rd += this_rd;
3160 0 : cost += r;
3161 0 : total_distortion += d;
3162 0 : tot_rate_y += ry;
3163 :
3164 0 : mic->bmi[pred_block_idx].as_mode = best_mode;
3165 0 : for (j = 1; j < pred_height_in_4x4_blocks; ++j)
3166 0 : mic->bmi[pred_block_idx + j * 2].as_mode = best_mode;
3167 0 : for (j = 1; j < pred_width_in_4x4_blocks; ++j)
3168 0 : mic->bmi[pred_block_idx + j].as_mode = best_mode;
3169 :
3170 0 : if (total_rd >= best_rd) return INT64_MAX;
3171 : }
3172 : }
3173 0 : mbmi->mode = mic->bmi[3].as_mode;
3174 :
3175 : #if CONFIG_DAALA_DIST
3176 : {
3177 : const struct macroblock_plane *p = &mb->plane[0];
3178 : const struct macroblockd_plane *pd = &xd->plane[0];
3179 : const int src_stride = p->src.stride;
3180 : const int dst_stride = pd->dst.stride;
3181 : uint8_t *src = p->src.buf;
3182 : uint8_t *dst = pd->dst.buf;
3183 : int use_activity_masking = 0;
3184 : int qm = OD_HVS_QM;
3185 :
3186 : #if CONFIG_PVQ
3187 : use_activity_masking = mb->daala_enc.use_activity_masking;
3188 : #endif // CONFIG_PVQ
3189 : // Daala-defined distortion computed for the block of 8x8 pixels
3190 : total_distortion = av1_daala_dist(src, src_stride, dst, dst_stride, 8, 8,
3191 : qm, use_activity_masking, mb->qindex)
3192 : << 4;
3193 : }
3194 : #endif // CONFIG_DAALA_DIST
3195 : // Add in the cost of the transform type
3196 0 : if (!is_lossless) {
3197 0 : int rate_tx_type = 0;
3198 : #if CONFIG_EXT_TX
3199 0 : if (get_ext_tx_types(tx_size, bsize, 0, cpi->common.reduced_tx_set_used) >
3200 : 1) {
3201 0 : const int eset =
3202 0 : get_ext_tx_set(tx_size, bsize, 0, cpi->common.reduced_tx_set_used);
3203 0 : rate_tx_type = cpi->intra_tx_type_costs[eset][txsize_sqr_map[tx_size]]
3204 0 : [mbmi->mode][mbmi->tx_type];
3205 : }
3206 : #else
3207 : rate_tx_type =
3208 : cpi->intra_tx_type_costs[txsize_sqr_map[tx_size]]
3209 : [intra_mode_to_tx_type_context[mbmi->mode]]
3210 : [mbmi->tx_type];
3211 : #endif // CONFIG_EXT_TX
3212 0 : assert(mbmi->tx_size == tx_size);
3213 0 : cost += rate_tx_type;
3214 0 : tot_rate_y += rate_tx_type;
3215 : }
3216 :
3217 0 : *rate = cost;
3218 0 : *rate_y = tot_rate_y;
3219 0 : *distortion = total_distortion;
3220 :
3221 0 : return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
3222 : }
3223 :
3224 : #if CONFIG_FILTER_INTRA
3225 : // Return 1 if an filter intra mode is selected; return 0 otherwise.
3226 : static int rd_pick_filter_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
3227 : int *rate, int *rate_tokenonly,
3228 : int64_t *distortion, int *skippable,
3229 : BLOCK_SIZE bsize, int mode_cost,
3230 : int64_t *best_rd, int64_t *best_model_rd,
3231 : uint16_t skip_mask) {
3232 : MACROBLOCKD *const xd = &x->e_mbd;
3233 : MODE_INFO *const mic = xd->mi[0];
3234 : MB_MODE_INFO *mbmi = &mic->mbmi;
3235 : int filter_intra_selected_flag = 0;
3236 : FILTER_INTRA_MODE mode;
3237 : TX_SIZE best_tx_size = TX_4X4;
3238 : FILTER_INTRA_MODE_INFO filter_intra_mode_info;
3239 : TX_TYPE best_tx_type;
3240 :
3241 : av1_zero(filter_intra_mode_info);
3242 : mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 1;
3243 : mbmi->mode = DC_PRED;
3244 : #if CONFIG_PALETTE
3245 : mbmi->palette_mode_info.palette_size[0] = 0;
3246 : #endif // CONFIG_PALETTE
3247 :
3248 : for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) {
3249 : int this_rate;
3250 : int64_t this_rd, this_model_rd;
3251 : RD_STATS tokenonly_rd_stats;
3252 : if (skip_mask & (1 << mode)) continue;
3253 : mbmi->filter_intra_mode_info.filter_intra_mode[0] = mode;
3254 : this_model_rd = intra_model_yrd(cpi, x, bsize, mode_cost);
3255 : if (*best_model_rd != INT64_MAX &&
3256 : this_model_rd > *best_model_rd + (*best_model_rd >> 1))
3257 : continue;
3258 : if (this_model_rd < *best_model_rd) *best_model_rd = this_model_rd;
3259 : super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd);
3260 : if (tokenonly_rd_stats.rate == INT_MAX) continue;
3261 : this_rate = tokenonly_rd_stats.rate +
3262 : av1_cost_bit(cpi->common.fc->filter_intra_probs[0], 1) +
3263 : write_uniform_cost(FILTER_INTRA_MODES, mode) + mode_cost;
3264 : this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist);
3265 :
3266 : if (this_rd < *best_rd) {
3267 : *best_rd = this_rd;
3268 : best_tx_size = mic->mbmi.tx_size;
3269 : filter_intra_mode_info = mbmi->filter_intra_mode_info;
3270 : best_tx_type = mic->mbmi.tx_type;
3271 : *rate = this_rate;
3272 : *rate_tokenonly = tokenonly_rd_stats.rate;
3273 : *distortion = tokenonly_rd_stats.dist;
3274 : *skippable = tokenonly_rd_stats.skip;
3275 : filter_intra_selected_flag = 1;
3276 : }
3277 : }
3278 :
3279 : if (filter_intra_selected_flag) {
3280 : mbmi->mode = DC_PRED;
3281 : mbmi->tx_size = best_tx_size;
3282 : mbmi->filter_intra_mode_info.use_filter_intra_mode[0] =
3283 : filter_intra_mode_info.use_filter_intra_mode[0];
3284 : mbmi->filter_intra_mode_info.filter_intra_mode[0] =
3285 : filter_intra_mode_info.filter_intra_mode[0];
3286 : mbmi->tx_type = best_tx_type;
3287 : return 1;
3288 : } else {
3289 : return 0;
3290 : }
3291 : }
3292 : #endif // CONFIG_FILTER_INTRA
3293 :
3294 : #if CONFIG_EXT_INTRA
3295 : // Run RD calculation with given luma intra prediction angle., and return
3296 : // the RD cost. Update the best mode info. if the RD cost is the best so far.
3297 0 : static int64_t calc_rd_given_intra_angle(
3298 : const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mode_cost,
3299 : int64_t best_rd_in, int8_t angle_delta, int max_angle_delta, int *rate,
3300 : RD_STATS *rd_stats, int *best_angle_delta, TX_SIZE *best_tx_size,
3301 : TX_TYPE *best_tx_type,
3302 : #if CONFIG_INTRA_INTERP
3303 : INTRA_FILTER *best_filter,
3304 : #endif // CONFIG_INTRA_INTERP
3305 : int64_t *best_rd, int64_t *best_model_rd) {
3306 : int this_rate;
3307 : RD_STATS tokenonly_rd_stats;
3308 : int64_t this_rd, this_model_rd;
3309 0 : MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi;
3310 0 : assert(!is_inter_block(mbmi));
3311 :
3312 0 : mbmi->angle_delta[0] = angle_delta;
3313 0 : this_model_rd = intra_model_yrd(cpi, x, bsize, mode_cost);
3314 0 : if (*best_model_rd != INT64_MAX &&
3315 0 : this_model_rd > *best_model_rd + (*best_model_rd >> 1))
3316 0 : return INT64_MAX;
3317 0 : if (this_model_rd < *best_model_rd) *best_model_rd = this_model_rd;
3318 0 : super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd_in);
3319 0 : if (tokenonly_rd_stats.rate == INT_MAX) return INT64_MAX;
3320 :
3321 0 : this_rate = tokenonly_rd_stats.rate + mode_cost +
3322 0 : write_uniform_cost(2 * max_angle_delta + 1,
3323 0 : mbmi->angle_delta[0] + max_angle_delta);
3324 0 : this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist);
3325 :
3326 0 : if (this_rd < *best_rd) {
3327 0 : *best_rd = this_rd;
3328 0 : *best_angle_delta = mbmi->angle_delta[0];
3329 0 : *best_tx_size = mbmi->tx_size;
3330 : #if CONFIG_INTRA_INTERP
3331 : *best_filter = mbmi->intra_filter;
3332 : #endif // CONFIG_INTRA_INTERP
3333 0 : *best_tx_type = mbmi->tx_type;
3334 0 : *rate = this_rate;
3335 0 : rd_stats->rate = tokenonly_rd_stats.rate;
3336 0 : rd_stats->dist = tokenonly_rd_stats.dist;
3337 0 : rd_stats->skip = tokenonly_rd_stats.skip;
3338 : }
3339 0 : return this_rd;
3340 : }
3341 :
3342 : // With given luma directional intra prediction mode, pick the best angle delta
3343 : // Return the RD cost corresponding to the best angle delta.
3344 0 : static int64_t rd_pick_intra_angle_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
3345 : int *rate, RD_STATS *rd_stats,
3346 : BLOCK_SIZE bsize, int mode_cost,
3347 : int64_t best_rd,
3348 : int64_t *best_model_rd) {
3349 0 : MACROBLOCKD *const xd = &x->e_mbd;
3350 0 : MODE_INFO *const mic = xd->mi[0];
3351 0 : MB_MODE_INFO *mbmi = &mic->mbmi;
3352 0 : assert(!is_inter_block(mbmi));
3353 0 : int i, angle_delta, best_angle_delta = 0;
3354 0 : int first_try = 1;
3355 : #if CONFIG_INTRA_INTERP
3356 : int p_angle;
3357 : const int intra_filter_ctx = av1_get_pred_context_intra_interp(xd);
3358 : INTRA_FILTER filter, best_filter = INTRA_FILTER_LINEAR;
3359 : #endif // CONFIG_INTRA_INTERP
3360 : int64_t this_rd, best_rd_in, rd_cost[2 * (MAX_ANGLE_DELTA + 2)];
3361 0 : TX_SIZE best_tx_size = mic->mbmi.tx_size;
3362 0 : TX_TYPE best_tx_type = mbmi->tx_type;
3363 :
3364 0 : for (i = 0; i < 2 * (MAX_ANGLE_DELTA + 2); ++i) rd_cost[i] = INT64_MAX;
3365 :
3366 0 : for (angle_delta = 0; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
3367 : #if CONFIG_INTRA_INTERP
3368 : for (filter = INTRA_FILTER_LINEAR; filter < INTRA_FILTERS; ++filter) {
3369 : if (FILTER_FAST_SEARCH && filter != INTRA_FILTER_LINEAR) continue;
3370 : mic->mbmi.intra_filter = filter;
3371 : #endif // CONFIG_INTRA_INTERP
3372 0 : for (i = 0; i < 2; ++i) {
3373 0 : best_rd_in = (best_rd == INT64_MAX)
3374 : ? INT64_MAX
3375 0 : : (best_rd + (best_rd >> (first_try ? 3 : 5)));
3376 0 : this_rd = calc_rd_given_intra_angle(
3377 : cpi, x, bsize,
3378 : #if CONFIG_INTRA_INTERP
3379 : mode_cost + cpi->intra_filter_cost[intra_filter_ctx][filter],
3380 : #else
3381 : mode_cost,
3382 : #endif // CONFIG_INTRA_INTERP
3383 0 : best_rd_in, (1 - 2 * i) * angle_delta, MAX_ANGLE_DELTA, rate,
3384 : rd_stats, &best_angle_delta, &best_tx_size, &best_tx_type,
3385 : #if CONFIG_INTRA_INTERP
3386 : &best_filter,
3387 : #endif // CONFIG_INTRA_INTERP
3388 : &best_rd, best_model_rd);
3389 0 : rd_cost[2 * angle_delta + i] = this_rd;
3390 0 : if (first_try && this_rd == INT64_MAX) return best_rd;
3391 0 : first_try = 0;
3392 0 : if (angle_delta == 0) {
3393 0 : rd_cost[1] = this_rd;
3394 0 : break;
3395 : }
3396 : }
3397 : #if CONFIG_INTRA_INTERP
3398 : }
3399 : #endif // CONFIG_INTRA_INTERP
3400 : }
3401 :
3402 0 : assert(best_rd != INT64_MAX);
3403 0 : for (angle_delta = 1; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
3404 : int64_t rd_thresh;
3405 : #if CONFIG_INTRA_INTERP
3406 : for (filter = INTRA_FILTER_LINEAR; filter < INTRA_FILTERS; ++filter) {
3407 : if (FILTER_FAST_SEARCH && filter != INTRA_FILTER_LINEAR) continue;
3408 : mic->mbmi.intra_filter = filter;
3409 : #endif // CONFIG_INTRA_INTERP
3410 0 : for (i = 0; i < 2; ++i) {
3411 0 : int skip_search = 0;
3412 0 : rd_thresh = best_rd + (best_rd >> 5);
3413 0 : if (rd_cost[2 * (angle_delta + 1) + i] > rd_thresh &&
3414 0 : rd_cost[2 * (angle_delta - 1) + i] > rd_thresh)
3415 0 : skip_search = 1;
3416 0 : if (!skip_search) {
3417 0 : calc_rd_given_intra_angle(
3418 : cpi, x, bsize,
3419 : #if CONFIG_INTRA_INTERP
3420 : mode_cost + cpi->intra_filter_cost[intra_filter_ctx][filter],
3421 : #else
3422 : mode_cost,
3423 : #endif // CONFIG_INTRA_INTERP
3424 0 : best_rd, (1 - 2 * i) * angle_delta, MAX_ANGLE_DELTA, rate,
3425 : rd_stats, &best_angle_delta, &best_tx_size, &best_tx_type,
3426 : #if CONFIG_INTRA_INTERP
3427 : &best_filter,
3428 : #endif // CONFIG_INTRA_INTERP
3429 : &best_rd, best_model_rd);
3430 : }
3431 : }
3432 : #if CONFIG_INTRA_INTERP
3433 : }
3434 : #endif // CONFIG_INTRA_INTERP
3435 : }
3436 :
3437 : #if CONFIG_INTRA_INTERP
3438 : if (FILTER_FAST_SEARCH && rd_stats->rate < INT_MAX) {
3439 : p_angle = mode_to_angle_map[mbmi->mode] + best_angle_delta * ANGLE_STEP;
3440 : if (av1_is_intra_filter_switchable(p_angle)) {
3441 : for (filter = INTRA_FILTER_LINEAR + 1; filter < INTRA_FILTERS; ++filter) {
3442 : mic->mbmi.intra_filter = filter;
3443 : this_rd = calc_rd_given_intra_angle(
3444 : cpi, x, bsize,
3445 : mode_cost + cpi->intra_filter_cost[intra_filter_ctx][filter],
3446 : best_rd, best_angle_delta, MAX_ANGLE_DELTA, rate, rd_stats,
3447 : &best_angle_delta, &best_tx_size, &best_tx_type, &best_filter,
3448 : &best_rd, best_model_rd);
3449 : }
3450 : }
3451 : }
3452 : #endif // CONFIG_INTRA_INTERP
3453 :
3454 0 : mbmi->tx_size = best_tx_size;
3455 0 : mbmi->angle_delta[0] = best_angle_delta;
3456 : #if CONFIG_INTRA_INTERP
3457 : mic->mbmi.intra_filter = best_filter;
3458 : #endif // CONFIG_INTRA_INTERP
3459 0 : mbmi->tx_type = best_tx_type;
3460 0 : return best_rd;
3461 : }
3462 :
3463 : // Indices are sign, integer, and fractional part of the gradient value
3464 : static const uint8_t gradient_to_angle_bin[2][7][16] = {
3465 : {
3466 : { 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0 },
3467 : { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1 },
3468 : { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
3469 : { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
3470 : { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
3471 : { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
3472 : { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
3473 : },
3474 : {
3475 : { 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4 },
3476 : { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3 },
3477 : { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
3478 : { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
3479 : { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
3480 : { 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
3481 : { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
3482 : },
3483 : };
3484 :
3485 : /* clang-format off */
3486 : static const uint8_t mode_to_angle_bin[INTRA_MODES] = {
3487 : 0, 2, 6, 0, 4, 3, 5, 7, 1, 0,
3488 : #if CONFIG_ALT_INTRA
3489 : 0,
3490 : #endif // CONFIG_ALT_INTRA
3491 : };
3492 : /* clang-format on */
3493 :
3494 0 : static void angle_estimation(const uint8_t *src, int src_stride, int rows,
3495 : int cols, BLOCK_SIZE bsize,
3496 : uint8_t *directional_mode_skip_mask) {
3497 0 : memset(directional_mode_skip_mask, 0,
3498 : INTRA_MODES * sizeof(*directional_mode_skip_mask));
3499 : // Sub-8x8 blocks do not use extra directions.
3500 0 : if (bsize < BLOCK_8X8) return;
3501 : uint64_t hist[DIRECTIONAL_MODES];
3502 0 : memset(hist, 0, DIRECTIONAL_MODES * sizeof(hist[0]));
3503 0 : src += src_stride;
3504 : int r, c, dx, dy;
3505 0 : for (r = 1; r < rows; ++r) {
3506 0 : for (c = 1; c < cols; ++c) {
3507 0 : dx = src[c] - src[c - 1];
3508 0 : dy = src[c] - src[c - src_stride];
3509 : int index;
3510 0 : const int temp = dx * dx + dy * dy;
3511 0 : if (dy == 0) {
3512 0 : index = 2;
3513 : } else {
3514 0 : const int sn = (dx > 0) ^ (dy > 0);
3515 0 : dx = abs(dx);
3516 0 : dy = abs(dy);
3517 0 : const int remd = (dx % dy) * 16 / dy;
3518 0 : const int quot = dx / dy;
3519 0 : index = gradient_to_angle_bin[sn][AOMMIN(quot, 6)][AOMMIN(remd, 15)];
3520 : }
3521 0 : hist[index] += temp;
3522 : }
3523 0 : src += src_stride;
3524 : }
3525 :
3526 : int i;
3527 0 : uint64_t hist_sum = 0;
3528 0 : for (i = 0; i < DIRECTIONAL_MODES; ++i) hist_sum += hist[i];
3529 0 : for (i = 0; i < INTRA_MODES; ++i) {
3530 0 : if (av1_is_directional_mode(i, bsize)) {
3531 0 : const uint8_t angle_bin = mode_to_angle_bin[i];
3532 0 : uint64_t score = 2 * hist[angle_bin];
3533 0 : int weight = 2;
3534 0 : if (angle_bin > 0) {
3535 0 : score += hist[angle_bin - 1];
3536 0 : ++weight;
3537 : }
3538 0 : if (angle_bin < DIRECTIONAL_MODES - 1) {
3539 0 : score += hist[angle_bin + 1];
3540 0 : ++weight;
3541 : }
3542 0 : if (score * ANGLE_SKIP_THRESH < hist_sum * weight)
3543 0 : directional_mode_skip_mask[i] = 1;
3544 : }
3545 : }
3546 : }
3547 :
3548 : #if CONFIG_HIGHBITDEPTH
3549 0 : static void highbd_angle_estimation(const uint8_t *src8, int src_stride,
3550 : int rows, int cols, BLOCK_SIZE bsize,
3551 : uint8_t *directional_mode_skip_mask) {
3552 0 : memset(directional_mode_skip_mask, 0,
3553 : INTRA_MODES * sizeof(*directional_mode_skip_mask));
3554 : // Sub-8x8 blocks do not use extra directions.
3555 0 : if (bsize < BLOCK_8X8) return;
3556 0 : uint16_t *src = CONVERT_TO_SHORTPTR(src8);
3557 : uint64_t hist[DIRECTIONAL_MODES];
3558 0 : memset(hist, 0, DIRECTIONAL_MODES * sizeof(hist[0]));
3559 0 : src += src_stride;
3560 : int r, c, dx, dy;
3561 0 : for (r = 1; r < rows; ++r) {
3562 0 : for (c = 1; c < cols; ++c) {
3563 0 : dx = src[c] - src[c - 1];
3564 0 : dy = src[c] - src[c - src_stride];
3565 : int index;
3566 0 : const int temp = dx * dx + dy * dy;
3567 0 : if (dy == 0) {
3568 0 : index = 2;
3569 : } else {
3570 0 : const int sn = (dx > 0) ^ (dy > 0);
3571 0 : dx = abs(dx);
3572 0 : dy = abs(dy);
3573 0 : const int remd = (dx % dy) * 16 / dy;
3574 0 : const int quot = dx / dy;
3575 0 : index = gradient_to_angle_bin[sn][AOMMIN(quot, 6)][AOMMIN(remd, 15)];
3576 : }
3577 0 : hist[index] += temp;
3578 : }
3579 0 : src += src_stride;
3580 : }
3581 :
3582 : int i;
3583 0 : uint64_t hist_sum = 0;
3584 0 : for (i = 0; i < DIRECTIONAL_MODES; ++i) hist_sum += hist[i];
3585 0 : for (i = 0; i < INTRA_MODES; ++i) {
3586 0 : if (av1_is_directional_mode(i, bsize)) {
3587 0 : const uint8_t angle_bin = mode_to_angle_bin[i];
3588 0 : uint64_t score = 2 * hist[angle_bin];
3589 0 : int weight = 2;
3590 0 : if (angle_bin > 0) {
3591 0 : score += hist[angle_bin - 1];
3592 0 : ++weight;
3593 : }
3594 0 : if (angle_bin < DIRECTIONAL_MODES - 1) {
3595 0 : score += hist[angle_bin + 1];
3596 0 : ++weight;
3597 : }
3598 0 : if (score * ANGLE_SKIP_THRESH < hist_sum * weight)
3599 0 : directional_mode_skip_mask[i] = 1;
3600 : }
3601 : }
3602 : }
3603 : #endif // CONFIG_HIGHBITDEPTH
3604 : #endif // CONFIG_EXT_INTRA
3605 :
3606 : // This function is used only for intra_only frames
3607 0 : static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
3608 : int *rate, int *rate_tokenonly,
3609 : int64_t *distortion, int *skippable,
3610 : BLOCK_SIZE bsize, int64_t best_rd) {
3611 : uint8_t mode_idx;
3612 0 : MACROBLOCKD *const xd = &x->e_mbd;
3613 0 : MODE_INFO *const mic = xd->mi[0];
3614 0 : MB_MODE_INFO *const mbmi = &mic->mbmi;
3615 0 : assert(!is_inter_block(mbmi));
3616 0 : MB_MODE_INFO best_mbmi = *mbmi;
3617 0 : int64_t best_model_rd = INT64_MAX;
3618 : #if CONFIG_EXT_INTRA
3619 0 : const int rows = block_size_high[bsize];
3620 0 : const int cols = block_size_wide[bsize];
3621 : #if CONFIG_INTRA_INTERP
3622 : const int intra_filter_ctx = av1_get_pred_context_intra_interp(xd);
3623 : #endif // CONFIG_INTRA_INTERP
3624 : int is_directional_mode;
3625 : uint8_t directional_mode_skip_mask[INTRA_MODES];
3626 0 : const int src_stride = x->plane[0].src.stride;
3627 0 : const uint8_t *src = x->plane[0].src.buf;
3628 : #endif // CONFIG_EXT_INTRA
3629 : #if CONFIG_FILTER_INTRA
3630 : int beat_best_rd = 0;
3631 : uint16_t filter_intra_mode_skip_mask = (1 << FILTER_INTRA_MODES) - 1;
3632 : #endif // CONFIG_FILTER_INTRA
3633 : const int *bmode_costs;
3634 : #if CONFIG_PALETTE
3635 0 : PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
3636 0 : uint8_t *best_palette_color_map =
3637 0 : cpi->common.allow_screen_content_tools
3638 0 : ? x->palette_buffer->best_palette_color_map
3639 0 : : NULL;
3640 0 : int palette_y_mode_ctx = 0;
3641 0 : const int try_palette =
3642 0 : cpi->common.allow_screen_content_tools && bsize >= BLOCK_8X8;
3643 : #endif // CONFIG_PALETTE
3644 0 : const MODE_INFO *above_mi = xd->above_mi;
3645 0 : const MODE_INFO *left_mi = xd->left_mi;
3646 0 : const PREDICTION_MODE A = av1_above_block_mode(mic, above_mi, 0);
3647 0 : const PREDICTION_MODE L = av1_left_block_mode(mic, left_mi, 0);
3648 0 : const PREDICTION_MODE FINAL_MODE_SEARCH = TM_PRED + 1;
3649 : #if CONFIG_PVQ
3650 : od_rollback_buffer pre_buf, post_buf;
3651 :
3652 : od_encode_checkpoint(&x->daala_enc, &pre_buf);
3653 : od_encode_checkpoint(&x->daala_enc, &post_buf);
3654 : #endif // CONFIG_PVQ
3655 0 : bmode_costs = cpi->y_mode_costs[A][L];
3656 :
3657 : #if CONFIG_EXT_INTRA
3658 0 : mbmi->angle_delta[0] = 0;
3659 : #if CONFIG_HIGHBITDEPTH
3660 0 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
3661 0 : highbd_angle_estimation(src, src_stride, rows, cols, bsize,
3662 : directional_mode_skip_mask);
3663 : else
3664 : #endif // CONFIG_HIGHBITDEPTH
3665 0 : angle_estimation(src, src_stride, rows, cols, bsize,
3666 : directional_mode_skip_mask);
3667 : #endif // CONFIG_EXT_INTRA
3668 : #if CONFIG_FILTER_INTRA
3669 : mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
3670 : #endif // CONFIG_FILTER_INTRA
3671 : #if CONFIG_PALETTE
3672 0 : pmi->palette_size[0] = 0;
3673 0 : if (above_mi)
3674 0 : palette_y_mode_ctx +=
3675 0 : (above_mi->mbmi.palette_mode_info.palette_size[0] > 0);
3676 0 : if (left_mi)
3677 0 : palette_y_mode_ctx += (left_mi->mbmi.palette_mode_info.palette_size[0] > 0);
3678 : #endif // CONFIG_PALETTE
3679 :
3680 0 : if (cpi->sf.tx_type_search.fast_intra_tx_type_search)
3681 0 : x->use_default_intra_tx_type = 1;
3682 : else
3683 0 : x->use_default_intra_tx_type = 0;
3684 :
3685 : /* Y Search for intra prediction mode */
3686 0 : for (mode_idx = DC_PRED; mode_idx <= FINAL_MODE_SEARCH; ++mode_idx) {
3687 : RD_STATS this_rd_stats;
3688 : int this_rate, this_rate_tokenonly, s;
3689 : int64_t this_distortion, this_rd, this_model_rd;
3690 0 : if (mode_idx == FINAL_MODE_SEARCH) {
3691 0 : if (x->use_default_intra_tx_type == 0) break;
3692 0 : mbmi->mode = best_mbmi.mode;
3693 0 : x->use_default_intra_tx_type = 0;
3694 : } else {
3695 0 : mbmi->mode = mode_idx;
3696 : }
3697 : #if CONFIG_PVQ
3698 : od_encode_rollback(&x->daala_enc, &pre_buf);
3699 : #endif // CONFIG_PVQ
3700 : #if CONFIG_EXT_INTRA
3701 0 : mbmi->angle_delta[0] = 0;
3702 : #endif // CONFIG_EXT_INTRA
3703 0 : this_model_rd = intra_model_yrd(cpi, x, bsize, bmode_costs[mbmi->mode]);
3704 0 : if (best_model_rd != INT64_MAX &&
3705 0 : this_model_rd > best_model_rd + (best_model_rd >> 1))
3706 0 : continue;
3707 0 : if (this_model_rd < best_model_rd) best_model_rd = this_model_rd;
3708 : #if CONFIG_EXT_INTRA
3709 0 : is_directional_mode = av1_is_directional_mode(mbmi->mode, bsize);
3710 0 : if (is_directional_mode && directional_mode_skip_mask[mbmi->mode]) continue;
3711 0 : if (is_directional_mode) {
3712 0 : this_rd_stats.rate = INT_MAX;
3713 0 : rd_pick_intra_angle_sby(cpi, x, &this_rate, &this_rd_stats, bsize,
3714 0 : bmode_costs[mbmi->mode], best_rd, &best_model_rd);
3715 : } else {
3716 0 : super_block_yrd(cpi, x, &this_rd_stats, bsize, best_rd);
3717 : }
3718 : #else
3719 : super_block_yrd(cpi, x, &this_rd_stats, bsize, best_rd);
3720 : #endif // CONFIG_EXT_INTRA
3721 0 : this_rate_tokenonly = this_rd_stats.rate;
3722 0 : this_distortion = this_rd_stats.dist;
3723 0 : s = this_rd_stats.skip;
3724 :
3725 0 : if (this_rate_tokenonly == INT_MAX) continue;
3726 :
3727 0 : this_rate = this_rate_tokenonly + bmode_costs[mbmi->mode];
3728 :
3729 0 : if (!xd->lossless[mbmi->segment_id] && mbmi->sb_type >= BLOCK_8X8) {
3730 : // super_block_yrd above includes the cost of the tx_size in the
3731 : // tokenonly rate, but for intra blocks, tx_size is always coded
3732 : // (prediction granularity), so we account for it in the full rate,
3733 : // not the tokenonly rate.
3734 0 : this_rate_tokenonly -= tx_size_cost(cpi, x, bsize, mbmi->tx_size);
3735 : }
3736 : #if CONFIG_PALETTE
3737 0 : if (try_palette && mbmi->mode == DC_PRED) {
3738 0 : this_rate +=
3739 0 : av1_cost_bit(av1_default_palette_y_mode_prob[bsize - BLOCK_8X8]
3740 : [palette_y_mode_ctx],
3741 : 0);
3742 : }
3743 : #endif // CONFIG_PALETTE
3744 : #if CONFIG_FILTER_INTRA
3745 : if (mbmi->mode == DC_PRED)
3746 : this_rate += av1_cost_bit(cpi->common.fc->filter_intra_probs[0], 0);
3747 : #endif // CONFIG_FILTER_INTRA
3748 : #if CONFIG_EXT_INTRA
3749 0 : if (is_directional_mode) {
3750 : #if CONFIG_INTRA_INTERP
3751 : const int p_angle =
3752 : mode_to_angle_map[mbmi->mode] + mbmi->angle_delta[0] * ANGLE_STEP;
3753 : if (av1_is_intra_filter_switchable(p_angle))
3754 : this_rate +=
3755 : cpi->intra_filter_cost[intra_filter_ctx][mbmi->intra_filter];
3756 : #endif // CONFIG_INTRA_INTERP
3757 0 : this_rate += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
3758 0 : MAX_ANGLE_DELTA + mbmi->angle_delta[0]);
3759 : }
3760 : #endif // CONFIG_EXT_INTRA
3761 0 : this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
3762 : #if CONFIG_FILTER_INTRA
3763 : if (best_rd == INT64_MAX || this_rd - best_rd < (best_rd >> 4)) {
3764 : filter_intra_mode_skip_mask ^= (1 << mbmi->mode);
3765 : }
3766 : #endif // CONFIG_FILTER_INTRA
3767 :
3768 0 : if (this_rd < best_rd) {
3769 0 : best_mbmi = *mbmi;
3770 0 : best_rd = this_rd;
3771 : #if CONFIG_FILTER_INTRA
3772 : beat_best_rd = 1;
3773 : #endif // CONFIG_FILTER_INTRA
3774 0 : *rate = this_rate;
3775 0 : *rate_tokenonly = this_rate_tokenonly;
3776 0 : *distortion = this_distortion;
3777 0 : *skippable = s;
3778 : #if CONFIG_PVQ
3779 : od_encode_checkpoint(&x->daala_enc, &post_buf);
3780 : #endif // CONFIG_PVQ
3781 : }
3782 : }
3783 :
3784 : #if CONFIG_PVQ
3785 : od_encode_rollback(&x->daala_enc, &post_buf);
3786 : #endif // CONFIG_PVQ
3787 :
3788 : #if CONFIG_CFL
3789 : // Perform one extra txfm_rd_in_plane() call, this time with the best value so
3790 : // we can store reconstructed luma values
3791 : RD_STATS this_rd_stats;
3792 : x->cfl_store_y = 1;
3793 : txfm_rd_in_plane(x, cpi, &this_rd_stats, INT64_MAX, 0, bsize,
3794 : mic->mbmi.tx_size, cpi->sf.use_fast_coef_costing);
3795 : x->cfl_store_y = 0;
3796 : #endif
3797 :
3798 : #if CONFIG_PALETTE
3799 0 : if (try_palette) {
3800 0 : rd_pick_palette_intra_sby(cpi, x, bsize, palette_y_mode_ctx,
3801 : bmode_costs[DC_PRED], &best_mbmi,
3802 : best_palette_color_map, &best_rd, &best_model_rd,
3803 : rate, rate_tokenonly, distortion, skippable);
3804 : }
3805 : #endif // CONFIG_PALETTE
3806 :
3807 : #if CONFIG_FILTER_INTRA
3808 : if (beat_best_rd) {
3809 : if (rd_pick_filter_intra_sby(cpi, x, rate, rate_tokenonly, distortion,
3810 : skippable, bsize, bmode_costs[DC_PRED],
3811 : &best_rd, &best_model_rd,
3812 : filter_intra_mode_skip_mask)) {
3813 : best_mbmi = *mbmi;
3814 : }
3815 : }
3816 : #endif // CONFIG_FILTER_INTRA
3817 :
3818 0 : *mbmi = best_mbmi;
3819 0 : return best_rd;
3820 : }
3821 :
3822 : // Return value 0: early termination triggered, no valid rd cost available;
3823 : // 1: rd cost values are valid.
3824 0 : static int super_block_uvrd(const AV1_COMP *const cpi, MACROBLOCK *x,
3825 : RD_STATS *rd_stats, BLOCK_SIZE bsize,
3826 : int64_t ref_best_rd) {
3827 0 : MACROBLOCKD *const xd = &x->e_mbd;
3828 0 : MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
3829 0 : const TX_SIZE uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]);
3830 : int plane;
3831 0 : int is_cost_valid = 1;
3832 0 : av1_init_rd_stats(rd_stats);
3833 :
3834 0 : if (ref_best_rd < 0) is_cost_valid = 0;
3835 :
3836 : #if CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
3837 0 : if (x->skip_chroma_rd) return is_cost_valid;
3838 :
3839 0 : bsize = scale_chroma_bsize(bsize, xd->plane[1].subsampling_x,
3840 : xd->plane[1].subsampling_y);
3841 : #endif // CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
3842 :
3843 : #if !CONFIG_PVQ
3844 0 : if (is_inter_block(mbmi) && is_cost_valid) {
3845 0 : for (plane = 1; plane < MAX_MB_PLANE; ++plane)
3846 0 : av1_subtract_plane(x, bsize, plane);
3847 : }
3848 : #endif // !CONFIG_PVQ
3849 :
3850 0 : if (is_cost_valid) {
3851 0 : for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
3852 : RD_STATS pn_rd_stats;
3853 0 : txfm_rd_in_plane(x, cpi, &pn_rd_stats, ref_best_rd, plane, bsize,
3854 : uv_tx_size, cpi->sf.use_fast_coef_costing);
3855 0 : if (pn_rd_stats.rate == INT_MAX) {
3856 0 : is_cost_valid = 0;
3857 0 : break;
3858 : }
3859 0 : av1_merge_rd_stats(rd_stats, &pn_rd_stats);
3860 0 : if (RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist) >
3861 0 : ref_best_rd &&
3862 0 : RDCOST(x->rdmult, x->rddiv, 0, rd_stats->sse) > ref_best_rd) {
3863 0 : is_cost_valid = 0;
3864 0 : break;
3865 : }
3866 : }
3867 : }
3868 :
3869 0 : if (!is_cost_valid) {
3870 : // reset cost value
3871 0 : av1_invalid_rd_stats(rd_stats);
3872 : }
3873 :
3874 0 : return is_cost_valid;
3875 : }
3876 :
3877 : #if CONFIG_VAR_TX
3878 : // FIXME crop these calls
3879 0 : static uint64_t sum_squares_2d(const int16_t *diff, int diff_stride,
3880 : TX_SIZE tx_size) {
3881 0 : return aom_sum_squares_2d_i16(diff, diff_stride, tx_size_wide[tx_size],
3882 : tx_size_high[tx_size]);
3883 : }
3884 :
3885 0 : void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
3886 : int blk_row, int blk_col, int plane, int block,
3887 : int plane_bsize, const ENTROPY_CONTEXT *a,
3888 : const ENTROPY_CONTEXT *l, RD_STATS *rd_stats) {
3889 0 : const AV1_COMMON *const cm = &cpi->common;
3890 0 : MACROBLOCKD *xd = &x->e_mbd;
3891 0 : const struct macroblock_plane *const p = &x->plane[plane];
3892 0 : struct macroblockd_plane *const pd = &xd->plane[plane];
3893 : int64_t tmp;
3894 0 : tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
3895 0 : PLANE_TYPE plane_type = get_plane_type(plane);
3896 0 : TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
3897 0 : const SCAN_ORDER *const scan_order =
3898 0 : get_scan(cm, tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
3899 0 : BLOCK_SIZE txm_bsize = txsize_to_bsize[tx_size];
3900 0 : int bh = block_size_high[txm_bsize];
3901 0 : int bw = block_size_wide[txm_bsize];
3902 0 : int txb_h = tx_size_high_unit[tx_size];
3903 0 : int txb_w = tx_size_wide_unit[tx_size];
3904 :
3905 0 : int src_stride = p->src.stride;
3906 0 : uint8_t *src =
3907 0 : &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
3908 0 : uint8_t *dst =
3909 : &pd->dst
3910 0 : .buf[(blk_row * pd->dst.stride + blk_col) << tx_size_wide_log2[0]];
3911 : #if CONFIG_HIGHBITDEPTH
3912 : DECLARE_ALIGNED(16, uint16_t, rec_buffer16[MAX_TX_SQUARE]);
3913 : uint8_t *rec_buffer;
3914 : #else
3915 : DECLARE_ALIGNED(16, uint8_t, rec_buffer[MAX_TX_SQUARE]);
3916 : #endif // CONFIG_HIGHBITDEPTH
3917 0 : int max_blocks_high = block_size_high[plane_bsize];
3918 0 : int max_blocks_wide = block_size_wide[plane_bsize];
3919 0 : const int diff_stride = max_blocks_wide;
3920 0 : const int16_t *diff =
3921 0 : &p->src_diff[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
3922 : int txb_coeff_cost;
3923 :
3924 0 : assert(tx_size < TX_SIZES_ALL);
3925 :
3926 0 : if (xd->mb_to_bottom_edge < 0)
3927 0 : max_blocks_high += xd->mb_to_bottom_edge >> (3 + pd->subsampling_y);
3928 0 : if (xd->mb_to_right_edge < 0)
3929 0 : max_blocks_wide += xd->mb_to_right_edge >> (3 + pd->subsampling_x);
3930 :
3931 0 : max_blocks_high >>= tx_size_wide_log2[0];
3932 0 : max_blocks_wide >>= tx_size_wide_log2[0];
3933 :
3934 0 : int coeff_ctx = get_entropy_context(tx_size, a, l);
3935 :
3936 0 : av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
3937 : coeff_ctx, AV1_XFORM_QUANT_FP);
3938 :
3939 0 : av1_optimize_b(cm, x, plane, block, plane_bsize, tx_size, a, l);
3940 :
3941 : // TODO(any): Use av1_dist_block to compute distortion
3942 : #if CONFIG_HIGHBITDEPTH
3943 0 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
3944 0 : rec_buffer = CONVERT_TO_BYTEPTR(rec_buffer16);
3945 0 : aom_highbd_convolve_copy(dst, pd->dst.stride, rec_buffer, MAX_TX_SIZE, NULL,
3946 : 0, NULL, 0, bw, bh, xd->bd);
3947 : } else {
3948 0 : rec_buffer = (uint8_t *)rec_buffer16;
3949 0 : aom_convolve_copy(dst, pd->dst.stride, rec_buffer, MAX_TX_SIZE, NULL, 0,
3950 : NULL, 0, bw, bh);
3951 : }
3952 : #else
3953 : aom_convolve_copy(dst, pd->dst.stride, rec_buffer, MAX_TX_SIZE, NULL, 0, NULL,
3954 : 0, bw, bh);
3955 : #endif // CONFIG_HIGHBITDEPTH
3956 :
3957 0 : if (blk_row + txb_h > max_blocks_high || blk_col + txb_w > max_blocks_wide) {
3958 : int idx, idy;
3959 0 : int blocks_height = AOMMIN(txb_h, max_blocks_high - blk_row);
3960 0 : int blocks_width = AOMMIN(txb_w, max_blocks_wide - blk_col);
3961 0 : tmp = 0;
3962 0 : for (idy = 0; idy < blocks_height; ++idy) {
3963 0 : for (idx = 0; idx < blocks_width; ++idx) {
3964 0 : const int16_t *d =
3965 0 : diff + ((idy * diff_stride + idx) << tx_size_wide_log2[0]);
3966 0 : tmp += sum_squares_2d(d, diff_stride, 0);
3967 : }
3968 : }
3969 : } else {
3970 0 : tmp = sum_squares_2d(diff, diff_stride, tx_size);
3971 : }
3972 :
3973 : #if CONFIG_HIGHBITDEPTH
3974 0 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
3975 0 : tmp = ROUND_POWER_OF_TWO(tmp, (xd->bd - 8) * 2);
3976 : #endif // CONFIG_HIGHBITDEPTH
3977 0 : rd_stats->sse += tmp * 16;
3978 0 : const int eob = p->eobs[block];
3979 :
3980 0 : av1_inverse_transform_block(xd, dqcoeff, tx_type, tx_size, rec_buffer,
3981 : MAX_TX_SIZE, eob);
3982 0 : if (eob > 0) {
3983 0 : if (txb_w + blk_col > max_blocks_wide ||
3984 0 : txb_h + blk_row > max_blocks_high) {
3985 : int idx, idy;
3986 : unsigned int this_dist;
3987 0 : int blocks_height = AOMMIN(txb_h, max_blocks_high - blk_row);
3988 0 : int blocks_width = AOMMIN(txb_w, max_blocks_wide - blk_col);
3989 0 : tmp = 0;
3990 0 : for (idy = 0; idy < blocks_height; ++idy) {
3991 0 : for (idx = 0; idx < blocks_width; ++idx) {
3992 0 : uint8_t *const s =
3993 0 : src + ((idy * src_stride + idx) << tx_size_wide_log2[0]);
3994 0 : uint8_t *const r =
3995 0 : rec_buffer + ((idy * MAX_TX_SIZE + idx) << tx_size_wide_log2[0]);
3996 0 : cpi->fn_ptr[0].vf(s, src_stride, r, MAX_TX_SIZE, &this_dist);
3997 0 : tmp += this_dist;
3998 : }
3999 : }
4000 : } else {
4001 : uint32_t this_dist;
4002 0 : cpi->fn_ptr[txm_bsize].vf(src, src_stride, rec_buffer, MAX_TX_SIZE,
4003 : &this_dist);
4004 0 : tmp = this_dist;
4005 : }
4006 : }
4007 0 : rd_stats->dist += tmp * 16;
4008 0 : txb_coeff_cost =
4009 0 : av1_cost_coeffs(cpi, x, plane, block, tx_size, scan_order, a, l, 0);
4010 0 : rd_stats->rate += txb_coeff_cost;
4011 0 : rd_stats->skip &= (eob == 0);
4012 :
4013 : #if CONFIG_RD_DEBUG
4014 : av1_update_txb_coeff_cost(rd_stats, plane, tx_size, blk_row, blk_col,
4015 : txb_coeff_cost);
4016 : #endif // CONFIG_RD_DEBUG
4017 0 : }
4018 :
4019 0 : static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
4020 : int blk_col, int plane, int block, int block32,
4021 : TX_SIZE tx_size, int depth, BLOCK_SIZE plane_bsize,
4022 : ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl,
4023 : TXFM_CONTEXT *tx_above, TXFM_CONTEXT *tx_left,
4024 : RD_STATS *rd_stats, int64_t ref_best_rd,
4025 : int *is_cost_valid, RD_STATS *rd_stats_stack) {
4026 0 : MACROBLOCKD *const xd = &x->e_mbd;
4027 0 : MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
4028 0 : struct macroblock_plane *const p = &x->plane[plane];
4029 0 : struct macroblockd_plane *const pd = &xd->plane[plane];
4030 0 : const int tx_row = blk_row >> (1 - pd->subsampling_y);
4031 0 : const int tx_col = blk_col >> (1 - pd->subsampling_x);
4032 0 : TX_SIZE(*const inter_tx_size)
4033 : [MAX_MIB_SIZE] =
4034 : (TX_SIZE(*)[MAX_MIB_SIZE]) & mbmi->inter_tx_size[tx_row][tx_col];
4035 0 : const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
4036 0 : const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
4037 0 : const int bw = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
4038 0 : int64_t this_rd = INT64_MAX;
4039 0 : ENTROPY_CONTEXT *pta = ta + blk_col;
4040 0 : ENTROPY_CONTEXT *ptl = tl + blk_row;
4041 : int coeff_ctx, i;
4042 0 : int ctx = txfm_partition_context(tx_above + blk_col, tx_left + blk_row,
4043 0 : mbmi->sb_type, tx_size);
4044 0 : int64_t sum_rd = INT64_MAX;
4045 0 : int tmp_eob = 0;
4046 : int zero_blk_rate;
4047 : RD_STATS sum_rd_stats;
4048 0 : const int tx_size_ctx = txsize_sqr_map[tx_size];
4049 :
4050 0 : av1_init_rd_stats(&sum_rd_stats);
4051 :
4052 0 : assert(tx_size < TX_SIZES_ALL);
4053 :
4054 0 : if (ref_best_rd < 0) {
4055 0 : *is_cost_valid = 0;
4056 0 : return;
4057 : }
4058 :
4059 0 : coeff_ctx = get_entropy_context(tx_size, pta, ptl);
4060 :
4061 0 : av1_init_rd_stats(rd_stats);
4062 :
4063 0 : if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
4064 :
4065 0 : zero_blk_rate = x->token_costs[tx_size_ctx][pd->plane_type][1][0][0]
4066 0 : [coeff_ctx][EOB_TOKEN];
4067 :
4068 0 : if (cpi->common.tx_mode == TX_MODE_SELECT || tx_size == TX_4X4) {
4069 0 : inter_tx_size[0][0] = tx_size;
4070 :
4071 0 : if (tx_size == TX_32X32 && mbmi->tx_type != DCT_DCT &&
4072 0 : rd_stats_stack[block32].rate != INT_MAX) {
4073 0 : *rd_stats = rd_stats_stack[block32];
4074 0 : p->eobs[block] = !rd_stats->skip;
4075 0 : x->blk_skip[plane][blk_row * bw + blk_col] = rd_stats->skip;
4076 : } else {
4077 0 : av1_tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
4078 : plane_bsize, pta, ptl, rd_stats);
4079 0 : if (tx_size == TX_32X32) {
4080 0 : rd_stats_stack[block32] = *rd_stats;
4081 : }
4082 : }
4083 :
4084 0 : if ((RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist) >=
4085 0 : RDCOST(x->rdmult, x->rddiv, zero_blk_rate, rd_stats->sse) ||
4086 0 : rd_stats->skip == 1) &&
4087 0 : !xd->lossless[mbmi->segment_id]) {
4088 : #if CONFIG_RD_DEBUG
4089 : av1_update_txb_coeff_cost(rd_stats, plane, tx_size, blk_row, blk_col,
4090 : zero_blk_rate - rd_stats->rate);
4091 : #endif // CONFIG_RD_DEBUG
4092 0 : rd_stats->rate = zero_blk_rate;
4093 0 : rd_stats->dist = rd_stats->sse;
4094 0 : rd_stats->skip = 1;
4095 0 : x->blk_skip[plane][blk_row * bw + blk_col] = 1;
4096 0 : p->eobs[block] = 0;
4097 : } else {
4098 0 : x->blk_skip[plane][blk_row * bw + blk_col] = 0;
4099 0 : rd_stats->skip = 0;
4100 : }
4101 :
4102 0 : if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH)
4103 0 : rd_stats->rate +=
4104 0 : av1_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 0);
4105 0 : this_rd = RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist);
4106 0 : tmp_eob = p->eobs[block];
4107 : }
4108 :
4109 0 : if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH) {
4110 0 : const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
4111 0 : const int bsl = tx_size_wide_unit[sub_txs];
4112 0 : int sub_step = tx_size_wide_unit[sub_txs] * tx_size_high_unit[sub_txs];
4113 : RD_STATS this_rd_stats;
4114 0 : int this_cost_valid = 1;
4115 0 : int64_t tmp_rd = 0;
4116 :
4117 0 : sum_rd_stats.rate =
4118 0 : av1_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 1);
4119 :
4120 0 : assert(tx_size < TX_SIZES_ALL);
4121 :
4122 0 : for (i = 0; i < 4 && this_cost_valid; ++i) {
4123 0 : int offsetr = blk_row + (i >> 1) * bsl;
4124 0 : int offsetc = blk_col + (i & 0x01) * bsl;
4125 :
4126 0 : if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
4127 :
4128 0 : select_tx_block(cpi, x, offsetr, offsetc, plane, block, block32, sub_txs,
4129 : depth + 1, plane_bsize, ta, tl, tx_above, tx_left,
4130 : &this_rd_stats, ref_best_rd - tmp_rd, &this_cost_valid,
4131 : rd_stats_stack);
4132 :
4133 0 : av1_merge_rd_stats(&sum_rd_stats, &this_rd_stats);
4134 :
4135 0 : tmp_rd =
4136 0 : RDCOST(x->rdmult, x->rddiv, sum_rd_stats.rate, sum_rd_stats.dist);
4137 0 : if (this_rd < tmp_rd) break;
4138 0 : block += sub_step;
4139 : }
4140 0 : if (this_cost_valid) sum_rd = tmp_rd;
4141 : }
4142 :
4143 0 : if (this_rd < sum_rd) {
4144 : int idx, idy;
4145 0 : for (i = 0; i < tx_size_wide_unit[tx_size]; ++i) pta[i] = !(tmp_eob == 0);
4146 0 : for (i = 0; i < tx_size_high_unit[tx_size]; ++i) ptl[i] = !(tmp_eob == 0);
4147 0 : txfm_partition_update(tx_above + blk_col, tx_left + blk_row, tx_size,
4148 : tx_size);
4149 0 : inter_tx_size[0][0] = tx_size;
4150 0 : for (idy = 0; idy < tx_size_high_unit[tx_size] / 2; ++idy)
4151 0 : for (idx = 0; idx < tx_size_wide_unit[tx_size] / 2; ++idx)
4152 0 : inter_tx_size[idy][idx] = tx_size;
4153 0 : mbmi->tx_size = tx_size;
4154 0 : if (this_rd == INT64_MAX) *is_cost_valid = 0;
4155 0 : x->blk_skip[plane][blk_row * bw + blk_col] = rd_stats->skip;
4156 : } else {
4157 0 : *rd_stats = sum_rd_stats;
4158 0 : if (sum_rd == INT64_MAX) *is_cost_valid = 0;
4159 : }
4160 : }
4161 :
4162 0 : static void inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
4163 : RD_STATS *rd_stats, BLOCK_SIZE bsize,
4164 : int64_t ref_best_rd, RD_STATS *rd_stats_stack) {
4165 0 : MACROBLOCKD *const xd = &x->e_mbd;
4166 0 : int is_cost_valid = 1;
4167 0 : int64_t this_rd = 0;
4168 :
4169 0 : if (ref_best_rd < 0) is_cost_valid = 0;
4170 :
4171 0 : av1_init_rd_stats(rd_stats);
4172 :
4173 0 : if (is_cost_valid) {
4174 0 : const struct macroblockd_plane *const pd = &xd->plane[0];
4175 0 : const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
4176 0 : const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
4177 0 : const int mi_height = block_size_high[plane_bsize] >> tx_size_high_log2[0];
4178 0 : const TX_SIZE max_tx_size = max_txsize_rect_lookup[plane_bsize];
4179 0 : const int bh = tx_size_high_unit[max_tx_size];
4180 0 : const int bw = tx_size_wide_unit[max_tx_size];
4181 : int idx, idy;
4182 0 : int block = 0;
4183 0 : int block32 = 0;
4184 0 : int step = tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size];
4185 : ENTROPY_CONTEXT ctxa[2 * MAX_MIB_SIZE];
4186 : ENTROPY_CONTEXT ctxl[2 * MAX_MIB_SIZE];
4187 : TXFM_CONTEXT tx_above[MAX_MIB_SIZE * 2];
4188 : TXFM_CONTEXT tx_left[MAX_MIB_SIZE * 2];
4189 :
4190 : RD_STATS pn_rd_stats;
4191 0 : av1_init_rd_stats(&pn_rd_stats);
4192 :
4193 0 : av1_get_entropy_contexts(bsize, 0, pd, ctxa, ctxl);
4194 0 : memcpy(tx_above, xd->above_txfm_context, sizeof(TXFM_CONTEXT) * mi_width);
4195 0 : memcpy(tx_left, xd->left_txfm_context, sizeof(TXFM_CONTEXT) * mi_height);
4196 :
4197 0 : for (idy = 0; idy < mi_height; idy += bh) {
4198 0 : for (idx = 0; idx < mi_width; idx += bw) {
4199 0 : select_tx_block(cpi, x, idy, idx, 0, block, block32, max_tx_size,
4200 : mi_height != mi_width, plane_bsize, ctxa, ctxl,
4201 : tx_above, tx_left, &pn_rd_stats, ref_best_rd - this_rd,
4202 : &is_cost_valid, rd_stats_stack);
4203 0 : av1_merge_rd_stats(rd_stats, &pn_rd_stats);
4204 0 : this_rd += AOMMIN(
4205 : RDCOST(x->rdmult, x->rddiv, pn_rd_stats.rate, pn_rd_stats.dist),
4206 : RDCOST(x->rdmult, x->rddiv, 0, pn_rd_stats.sse));
4207 0 : block += step;
4208 0 : ++block32;
4209 : }
4210 : }
4211 : }
4212 :
4213 0 : this_rd = AOMMIN(RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist),
4214 : RDCOST(x->rdmult, x->rddiv, 0, rd_stats->sse));
4215 0 : if (this_rd > ref_best_rd) is_cost_valid = 0;
4216 :
4217 0 : if (!is_cost_valid) {
4218 : // reset cost value
4219 0 : av1_invalid_rd_stats(rd_stats);
4220 : }
4221 0 : }
4222 :
4223 0 : static int64_t select_tx_size_fix_type(const AV1_COMP *cpi, MACROBLOCK *x,
4224 : RD_STATS *rd_stats, BLOCK_SIZE bsize,
4225 : int64_t ref_best_rd, TX_TYPE tx_type,
4226 : RD_STATS *rd_stats_stack) {
4227 0 : const AV1_COMMON *const cm = &cpi->common;
4228 0 : MACROBLOCKD *const xd = &x->e_mbd;
4229 0 : MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
4230 0 : const int is_inter = is_inter_block(mbmi);
4231 0 : aom_prob skip_prob = av1_get_skip_prob(cm, xd);
4232 0 : int s0 = av1_cost_bit(skip_prob, 0);
4233 0 : int s1 = av1_cost_bit(skip_prob, 1);
4234 : int64_t rd;
4235 : int row, col;
4236 0 : const int max_blocks_high = max_block_high(xd, bsize, 0);
4237 0 : const int max_blocks_wide = max_block_wide(xd, bsize, 0);
4238 :
4239 0 : mbmi->tx_type = tx_type;
4240 0 : inter_block_yrd(cpi, x, rd_stats, bsize, ref_best_rd, rd_stats_stack);
4241 0 : mbmi->min_tx_size = get_min_tx_size(mbmi->inter_tx_size[0][0]);
4242 :
4243 0 : if (rd_stats->rate == INT_MAX) return INT64_MAX;
4244 :
4245 0 : for (row = 0; row < max_blocks_high / 2; ++row)
4246 0 : for (col = 0; col < max_blocks_wide / 2; ++col)
4247 0 : mbmi->min_tx_size = AOMMIN(
4248 : mbmi->min_tx_size, get_min_tx_size(mbmi->inter_tx_size[row][col]));
4249 :
4250 : #if CONFIG_EXT_TX
4251 0 : if (get_ext_tx_types(mbmi->min_tx_size, bsize, is_inter,
4252 0 : cm->reduced_tx_set_used) > 1 &&
4253 0 : !xd->lossless[xd->mi[0]->mbmi.segment_id]) {
4254 0 : const int ext_tx_set = get_ext_tx_set(mbmi->min_tx_size, bsize, is_inter,
4255 : cm->reduced_tx_set_used);
4256 0 : if (is_inter) {
4257 0 : if (ext_tx_set > 0)
4258 0 : rd_stats->rate +=
4259 : cpi->inter_tx_type_costs[ext_tx_set]
4260 0 : [txsize_sqr_map[mbmi->min_tx_size]]
4261 0 : [mbmi->tx_type];
4262 : } else {
4263 0 : if (ext_tx_set > 0 && ALLOW_INTRA_EXT_TX)
4264 0 : rd_stats->rate +=
4265 0 : cpi->intra_tx_type_costs[ext_tx_set][mbmi->min_tx_size][mbmi->mode]
4266 0 : [mbmi->tx_type];
4267 : }
4268 : }
4269 : #else // CONFIG_EXT_TX
4270 : if (mbmi->min_tx_size < TX_32X32 && !xd->lossless[xd->mi[0]->mbmi.segment_id])
4271 : rd_stats->rate +=
4272 : cpi->inter_tx_type_costs[mbmi->min_tx_size][mbmi->tx_type];
4273 : #endif // CONFIG_EXT_TX
4274 :
4275 0 : if (rd_stats->skip)
4276 0 : rd = RDCOST(x->rdmult, x->rddiv, s1, rd_stats->sse);
4277 : else
4278 0 : rd = RDCOST(x->rdmult, x->rddiv, rd_stats->rate + s0, rd_stats->dist);
4279 :
4280 0 : if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
4281 0 : !(rd_stats->skip))
4282 0 : rd = AOMMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, rd_stats->sse));
4283 :
4284 0 : return rd;
4285 : }
4286 :
4287 0 : static void select_tx_type_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
4288 : RD_STATS *rd_stats, BLOCK_SIZE bsize,
4289 : int64_t ref_best_rd) {
4290 0 : const AV1_COMMON *cm = &cpi->common;
4291 0 : const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
4292 0 : MACROBLOCKD *const xd = &x->e_mbd;
4293 0 : MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
4294 0 : int64_t rd = INT64_MAX;
4295 0 : int64_t best_rd = INT64_MAX;
4296 0 : TX_TYPE tx_type, best_tx_type = DCT_DCT;
4297 0 : const int is_inter = is_inter_block(mbmi);
4298 : TX_SIZE best_tx_size[MAX_MIB_SIZE][MAX_MIB_SIZE];
4299 0 : TX_SIZE best_tx = max_txsize_lookup[bsize];
4300 0 : TX_SIZE best_min_tx_size = TX_SIZES_ALL;
4301 : uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE * 8];
4302 0 : const int n4 = bsize_to_num_blk(bsize);
4303 : int idx, idy;
4304 0 : int prune = 0;
4305 0 : const int count32 =
4306 0 : 1 << (2 * (cm->mib_size_log2 - mi_width_log2_lookup[BLOCK_32X32]));
4307 : #if CONFIG_EXT_PARTITION
4308 : RD_STATS rd_stats_stack[16];
4309 : #else
4310 : RD_STATS rd_stats_stack[4];
4311 : #endif // CONFIG_EXT_PARTITION
4312 : #if CONFIG_EXT_TX
4313 0 : const int ext_tx_set =
4314 0 : get_ext_tx_set(max_tx_size, bsize, is_inter, cm->reduced_tx_set_used);
4315 : #endif // CONFIG_EXT_TX
4316 :
4317 0 : if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE)
4318 : #if CONFIG_EXT_TX
4319 0 : prune = prune_tx_types(cpi, bsize, x, xd, ext_tx_set);
4320 : #else
4321 : prune = prune_tx_types(cpi, bsize, x, xd, 0);
4322 : #endif // CONFIG_EXT_TX
4323 :
4324 0 : av1_invalid_rd_stats(rd_stats);
4325 :
4326 0 : for (idx = 0; idx < count32; ++idx)
4327 0 : av1_invalid_rd_stats(&rd_stats_stack[idx]);
4328 :
4329 0 : for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
4330 : RD_STATS this_rd_stats;
4331 0 : av1_init_rd_stats(&this_rd_stats);
4332 : #if CONFIG_EXT_TX
4333 0 : if (is_inter) {
4334 0 : if (!ext_tx_used_inter[ext_tx_set][tx_type]) continue;
4335 0 : if (cpi->sf.tx_type_search.prune_mode > NO_PRUNE) {
4336 0 : if (!do_tx_type_search(tx_type, prune)) continue;
4337 : }
4338 : } else {
4339 : if (!ALLOW_INTRA_EXT_TX && bsize >= BLOCK_8X8) {
4340 : if (tx_type != intra_mode_to_tx_type_context[mbmi->mode]) continue;
4341 : }
4342 0 : if (!ext_tx_used_intra[ext_tx_set][tx_type]) continue;
4343 : }
4344 : #else // CONFIG_EXT_TX
4345 : if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE &&
4346 : !do_tx_type_search(tx_type, prune))
4347 : continue;
4348 : #endif // CONFIG_EXT_TX
4349 0 : if (is_inter && x->use_default_inter_tx_type &&
4350 0 : tx_type != get_default_tx_type(0, xd, 0, max_tx_size))
4351 0 : continue;
4352 :
4353 0 : if (xd->lossless[mbmi->segment_id])
4354 0 : if (tx_type != DCT_DCT) continue;
4355 :
4356 0 : rd = select_tx_size_fix_type(cpi, x, &this_rd_stats, bsize, ref_best_rd,
4357 : tx_type, rd_stats_stack);
4358 :
4359 0 : if (rd < best_rd) {
4360 0 : best_rd = rd;
4361 0 : *rd_stats = this_rd_stats;
4362 0 : best_tx_type = mbmi->tx_type;
4363 0 : best_tx = mbmi->tx_size;
4364 0 : best_min_tx_size = mbmi->min_tx_size;
4365 0 : memcpy(best_blk_skip, x->blk_skip[0], sizeof(best_blk_skip[0]) * n4);
4366 0 : for (idy = 0; idy < xd->n8_h; ++idy)
4367 0 : for (idx = 0; idx < xd->n8_w; ++idx)
4368 0 : best_tx_size[idy][idx] = mbmi->inter_tx_size[idy][idx];
4369 : }
4370 : }
4371 :
4372 0 : mbmi->tx_type = best_tx_type;
4373 0 : for (idy = 0; idy < xd->n8_h; ++idy)
4374 0 : for (idx = 0; idx < xd->n8_w; ++idx)
4375 0 : mbmi->inter_tx_size[idy][idx] = best_tx_size[idy][idx];
4376 0 : mbmi->tx_size = best_tx;
4377 0 : mbmi->min_tx_size = best_min_tx_size;
4378 0 : memcpy(x->blk_skip[0], best_blk_skip, sizeof(best_blk_skip[0]) * n4);
4379 0 : }
4380 :
4381 0 : static void tx_block_rd(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
4382 : int blk_col, int plane, int block, TX_SIZE tx_size,
4383 : BLOCK_SIZE plane_bsize, ENTROPY_CONTEXT *above_ctx,
4384 : ENTROPY_CONTEXT *left_ctx, RD_STATS *rd_stats) {
4385 0 : MACROBLOCKD *const xd = &x->e_mbd;
4386 0 : MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
4387 0 : struct macroblock_plane *const p = &x->plane[plane];
4388 0 : struct macroblockd_plane *const pd = &xd->plane[plane];
4389 0 : BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
4390 0 : const int tx_row = blk_row >> (1 - pd->subsampling_y);
4391 0 : const int tx_col = blk_col >> (1 - pd->subsampling_x);
4392 : TX_SIZE plane_tx_size;
4393 0 : const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
4394 0 : const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
4395 :
4396 0 : assert(tx_size < TX_SIZES_ALL);
4397 :
4398 0 : if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
4399 :
4400 0 : plane_tx_size =
4401 0 : plane ? uv_txsize_lookup[bsize][mbmi->inter_tx_size[tx_row][tx_col]][0][0]
4402 0 : : mbmi->inter_tx_size[tx_row][tx_col];
4403 :
4404 0 : if (tx_size == plane_tx_size) {
4405 : int i;
4406 0 : ENTROPY_CONTEXT *ta = above_ctx + blk_col;
4407 0 : ENTROPY_CONTEXT *tl = left_ctx + blk_row;
4408 0 : av1_tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
4409 : plane_bsize, ta, tl, rd_stats);
4410 :
4411 0 : for (i = 0; i < tx_size_wide_unit[tx_size]; ++i)
4412 0 : ta[i] = !(p->eobs[block] == 0);
4413 0 : for (i = 0; i < tx_size_high_unit[tx_size]; ++i)
4414 0 : tl[i] = !(p->eobs[block] == 0);
4415 : } else {
4416 0 : const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
4417 0 : const int bsl = tx_size_wide_unit[sub_txs];
4418 0 : int step = tx_size_wide_unit[sub_txs] * tx_size_high_unit[sub_txs];
4419 : int i;
4420 :
4421 0 : assert(bsl > 0);
4422 :
4423 0 : for (i = 0; i < 4; ++i) {
4424 0 : int offsetr = blk_row + (i >> 1) * bsl;
4425 0 : int offsetc = blk_col + (i & 0x01) * bsl;
4426 :
4427 0 : if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
4428 :
4429 0 : tx_block_rd(cpi, x, offsetr, offsetc, plane, block, sub_txs, plane_bsize,
4430 : above_ctx, left_ctx, rd_stats);
4431 0 : block += step;
4432 : }
4433 : }
4434 : }
4435 :
4436 : // Return value 0: early termination triggered, no valid rd cost available;
4437 : // 1: rd cost values are valid.
4438 0 : static int inter_block_uvrd(const AV1_COMP *cpi, MACROBLOCK *x,
4439 : RD_STATS *rd_stats, BLOCK_SIZE bsize,
4440 : int64_t ref_best_rd) {
4441 0 : MACROBLOCKD *const xd = &x->e_mbd;
4442 0 : MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
4443 : int plane;
4444 0 : int is_cost_valid = 1;
4445 : int64_t this_rd;
4446 :
4447 0 : if (ref_best_rd < 0) is_cost_valid = 0;
4448 :
4449 0 : av1_init_rd_stats(rd_stats);
4450 :
4451 : #if CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
4452 0 : if (x->skip_chroma_rd) return is_cost_valid;
4453 0 : bsize = scale_chroma_bsize(mbmi->sb_type, xd->plane[1].subsampling_x,
4454 : xd->plane[1].subsampling_y);
4455 : #endif // CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
4456 :
4457 : #if CONFIG_EXT_TX && CONFIG_RECT_TX
4458 0 : if (is_rect_tx(mbmi->tx_size)) {
4459 0 : return super_block_uvrd(cpi, x, rd_stats, bsize, ref_best_rd);
4460 : }
4461 : #endif // CONFIG_EXT_TX && CONFIG_RECT_TX
4462 :
4463 0 : if (is_inter_block(mbmi) && is_cost_valid) {
4464 0 : for (plane = 1; plane < MAX_MB_PLANE; ++plane)
4465 0 : av1_subtract_plane(x, bsize, plane);
4466 : }
4467 :
4468 0 : for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
4469 0 : const struct macroblockd_plane *const pd = &xd->plane[plane];
4470 0 : const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
4471 0 : const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
4472 0 : const int mi_height = block_size_high[plane_bsize] >> tx_size_high_log2[0];
4473 0 : const TX_SIZE max_tx_size = max_txsize_rect_lookup[plane_bsize];
4474 0 : const int bh = tx_size_high_unit[max_tx_size];
4475 0 : const int bw = tx_size_wide_unit[max_tx_size];
4476 : int idx, idy;
4477 0 : int block = 0;
4478 0 : const int step = bh * bw;
4479 : ENTROPY_CONTEXT ta[2 * MAX_MIB_SIZE];
4480 : ENTROPY_CONTEXT tl[2 * MAX_MIB_SIZE];
4481 : RD_STATS pn_rd_stats;
4482 0 : av1_init_rd_stats(&pn_rd_stats);
4483 :
4484 0 : av1_get_entropy_contexts(bsize, 0, pd, ta, tl);
4485 :
4486 0 : for (idy = 0; idy < mi_height; idy += bh) {
4487 0 : for (idx = 0; idx < mi_width; idx += bw) {
4488 0 : tx_block_rd(cpi, x, idy, idx, plane, block, max_tx_size, plane_bsize,
4489 : ta, tl, &pn_rd_stats);
4490 0 : block += step;
4491 : }
4492 : }
4493 :
4494 0 : if (pn_rd_stats.rate == INT_MAX) {
4495 0 : is_cost_valid = 0;
4496 0 : break;
4497 : }
4498 :
4499 0 : av1_merge_rd_stats(rd_stats, &pn_rd_stats);
4500 :
4501 0 : this_rd =
4502 0 : AOMMIN(RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist),
4503 : RDCOST(x->rdmult, x->rddiv, 0, rd_stats->sse));
4504 :
4505 0 : if (this_rd > ref_best_rd) {
4506 0 : is_cost_valid = 0;
4507 0 : break;
4508 : }
4509 : }
4510 :
4511 0 : if (!is_cost_valid) {
4512 : // reset cost value
4513 0 : av1_invalid_rd_stats(rd_stats);
4514 : }
4515 :
4516 0 : return is_cost_valid;
4517 : }
4518 : #endif // CONFIG_VAR_TX
4519 :
4520 : #if CONFIG_PALETTE
4521 0 : static void rd_pick_palette_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
4522 : int dc_mode_cost,
4523 : uint8_t *best_palette_color_map,
4524 : MB_MODE_INFO *const best_mbmi,
4525 : int64_t *best_rd, int *rate,
4526 : int *rate_tokenonly, int64_t *distortion,
4527 : int *skippable) {
4528 0 : MACROBLOCKD *const xd = &x->e_mbd;
4529 0 : MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
4530 0 : assert(!is_inter_block(mbmi));
4531 0 : PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
4532 0 : const BLOCK_SIZE bsize = mbmi->sb_type;
4533 : int this_rate;
4534 : int64_t this_rd;
4535 : int colors_u, colors_v, colors;
4536 0 : const int src_stride = x->plane[1].src.stride;
4537 0 : const uint8_t *const src_u = x->plane[1].src.buf;
4538 0 : const uint8_t *const src_v = x->plane[2].src.buf;
4539 0 : uint8_t *const color_map = xd->plane[1].color_index_map;
4540 : RD_STATS tokenonly_rd_stats;
4541 : int plane_block_width, plane_block_height, rows, cols;
4542 0 : av1_get_block_dimensions(bsize, 1, xd, &plane_block_width,
4543 : &plane_block_height, &rows, &cols);
4544 0 : if (rows * cols > PALETTE_MAX_BLOCK_SIZE) return;
4545 :
4546 0 : mbmi->uv_mode = DC_PRED;
4547 : #if CONFIG_FILTER_INTRA
4548 : mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
4549 : #endif // CONFIG_FILTER_INTRA
4550 :
4551 : #if CONFIG_HIGHBITDEPTH
4552 0 : if (cpi->common.use_highbitdepth) {
4553 0 : colors_u = av1_count_colors_highbd(src_u, src_stride, rows, cols,
4554 0 : cpi->common.bit_depth);
4555 0 : colors_v = av1_count_colors_highbd(src_v, src_stride, rows, cols,
4556 0 : cpi->common.bit_depth);
4557 : } else {
4558 : #endif // CONFIG_HIGHBITDEPTH
4559 0 : colors_u = av1_count_colors(src_u, src_stride, rows, cols);
4560 0 : colors_v = av1_count_colors(src_v, src_stride, rows, cols);
4561 : #if CONFIG_HIGHBITDEPTH
4562 : }
4563 : #endif // CONFIG_HIGHBITDEPTH
4564 :
4565 : #if CONFIG_PALETTE_DELTA_ENCODING
4566 : const MODE_INFO *above_mi = xd->above_mi;
4567 : const MODE_INFO *left_mi = xd->left_mi;
4568 : uint16_t color_cache[2 * PALETTE_MAX_SIZE];
4569 : const int n_cache = av1_get_palette_cache(above_mi, left_mi, 1, color_cache);
4570 : #endif // CONFIG_PALETTE_DELTA_ENCODING
4571 :
4572 0 : colors = colors_u > colors_v ? colors_u : colors_v;
4573 0 : if (colors > 1 && colors <= 64) {
4574 : int r, c, n, i, j;
4575 0 : const int max_itr = 50;
4576 : uint8_t color_order[PALETTE_MAX_SIZE];
4577 : float lb_u, ub_u, val_u;
4578 : float lb_v, ub_v, val_v;
4579 0 : float *const data = x->palette_buffer->kmeans_data_buf;
4580 : float centroids[2 * PALETTE_MAX_SIZE];
4581 :
4582 : #if CONFIG_HIGHBITDEPTH
4583 0 : uint16_t *src_u16 = CONVERT_TO_SHORTPTR(src_u);
4584 0 : uint16_t *src_v16 = CONVERT_TO_SHORTPTR(src_v);
4585 0 : if (cpi->common.use_highbitdepth) {
4586 0 : lb_u = src_u16[0];
4587 0 : ub_u = src_u16[0];
4588 0 : lb_v = src_v16[0];
4589 0 : ub_v = src_v16[0];
4590 : } else {
4591 : #endif // CONFIG_HIGHBITDEPTH
4592 0 : lb_u = src_u[0];
4593 0 : ub_u = src_u[0];
4594 0 : lb_v = src_v[0];
4595 0 : ub_v = src_v[0];
4596 : #if CONFIG_HIGHBITDEPTH
4597 : }
4598 : #endif // CONFIG_HIGHBITDEPTH
4599 :
4600 0 : for (r = 0; r < rows; ++r) {
4601 0 : for (c = 0; c < cols; ++c) {
4602 : #if CONFIG_HIGHBITDEPTH
4603 0 : if (cpi->common.use_highbitdepth) {
4604 0 : val_u = src_u16[r * src_stride + c];
4605 0 : val_v = src_v16[r * src_stride + c];
4606 0 : data[(r * cols + c) * 2] = val_u;
4607 0 : data[(r * cols + c) * 2 + 1] = val_v;
4608 : } else {
4609 : #endif // CONFIG_HIGHBITDEPTH
4610 0 : val_u = src_u[r * src_stride + c];
4611 0 : val_v = src_v[r * src_stride + c];
4612 0 : data[(r * cols + c) * 2] = val_u;
4613 0 : data[(r * cols + c) * 2 + 1] = val_v;
4614 : #if CONFIG_HIGHBITDEPTH
4615 : }
4616 : #endif // CONFIG_HIGHBITDEPTH
4617 0 : if (val_u < lb_u)
4618 0 : lb_u = val_u;
4619 0 : else if (val_u > ub_u)
4620 0 : ub_u = val_u;
4621 0 : if (val_v < lb_v)
4622 0 : lb_v = val_v;
4623 0 : else if (val_v > ub_v)
4624 0 : ub_v = val_v;
4625 : }
4626 : }
4627 :
4628 0 : for (n = colors > PALETTE_MAX_SIZE ? PALETTE_MAX_SIZE : colors; n >= 2;
4629 0 : --n) {
4630 0 : for (i = 0; i < n; ++i) {
4631 0 : centroids[i * 2] = lb_u + (2 * i + 1) * (ub_u - lb_u) / n / 2;
4632 0 : centroids[i * 2 + 1] = lb_v + (2 * i + 1) * (ub_v - lb_v) / n / 2;
4633 : }
4634 0 : av1_k_means(data, centroids, color_map, rows * cols, n, 2, max_itr);
4635 : #if CONFIG_PALETTE_DELTA_ENCODING
4636 : optimize_palette_colors(color_cache, n_cache, n, 2, centroids);
4637 : // Sort the U channel colors in ascending order.
4638 : for (i = 0; i < 2 * (n - 1); i += 2) {
4639 : int min_idx = i;
4640 : float min_val = centroids[i];
4641 : for (j = i + 2; j < 2 * n; j += 2)
4642 : if (centroids[j] < min_val) min_val = centroids[j], min_idx = j;
4643 : if (min_idx != i) {
4644 : float temp_u = centroids[i], temp_v = centroids[i + 1];
4645 : centroids[i] = centroids[min_idx];
4646 : centroids[i + 1] = centroids[min_idx + 1];
4647 : centroids[min_idx] = temp_u, centroids[min_idx + 1] = temp_v;
4648 : }
4649 : }
4650 : av1_calc_indices(data, centroids, color_map, rows * cols, n, 2);
4651 : #endif // CONFIG_PALETTE_DELTA_ENCODING
4652 0 : extend_palette_color_map(color_map, cols, rows, plane_block_width,
4653 : plane_block_height);
4654 0 : pmi->palette_size[1] = n;
4655 0 : for (i = 1; i < 3; ++i) {
4656 0 : for (j = 0; j < n; ++j) {
4657 : #if CONFIG_HIGHBITDEPTH
4658 0 : if (cpi->common.use_highbitdepth)
4659 0 : pmi->palette_colors[i * PALETTE_MAX_SIZE + j] = clip_pixel_highbd(
4660 0 : (int)centroids[j * 2 + i - 1], cpi->common.bit_depth);
4661 : else
4662 : #endif // CONFIG_HIGHBITDEPTH
4663 0 : pmi->palette_colors[i * PALETTE_MAX_SIZE + j] =
4664 0 : clip_pixel((int)centroids[j * 2 + i - 1]);
4665 : }
4666 : }
4667 :
4668 0 : super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd);
4669 0 : if (tokenonly_rd_stats.rate == INT_MAX) continue;
4670 0 : this_rate =
4671 0 : tokenonly_rd_stats.rate + dc_mode_cost +
4672 0 : cpi->palette_uv_size_cost[bsize - BLOCK_8X8][n - PALETTE_MIN_SIZE] +
4673 0 : write_uniform_cost(n, color_map[0]) +
4674 0 : av1_cost_bit(
4675 : av1_default_palette_uv_mode_prob[pmi->palette_size[0] > 0], 1);
4676 0 : this_rate += av1_palette_color_cost_uv(pmi,
4677 : #if CONFIG_PALETTE_DELTA_ENCODING
4678 : color_cache, n_cache,
4679 : #endif // CONFIG_PALETTE_DELTA_ENCODING
4680 0 : cpi->common.bit_depth);
4681 0 : for (i = 0; i < rows; ++i) {
4682 0 : for (j = (i == 0 ? 1 : 0); j < cols; ++j) {
4683 : int color_idx;
4684 0 : const int color_ctx = av1_get_palette_color_index_context(
4685 : color_map, plane_block_width, i, j, n, color_order, &color_idx);
4686 0 : assert(color_idx >= 0 && color_idx < n);
4687 0 : this_rate += cpi->palette_uv_color_cost[n - PALETTE_MIN_SIZE]
4688 0 : [color_ctx][color_idx];
4689 : }
4690 : }
4691 :
4692 0 : this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist);
4693 0 : if (this_rd < *best_rd) {
4694 0 : *best_rd = this_rd;
4695 0 : *best_mbmi = *mbmi;
4696 0 : memcpy(best_palette_color_map, color_map,
4697 0 : plane_block_width * plane_block_height *
4698 : sizeof(best_palette_color_map[0]));
4699 0 : *rate = this_rate;
4700 0 : *distortion = tokenonly_rd_stats.dist;
4701 0 : *rate_tokenonly = tokenonly_rd_stats.rate;
4702 0 : *skippable = tokenonly_rd_stats.skip;
4703 : }
4704 : }
4705 : }
4706 0 : if (best_mbmi->palette_mode_info.palette_size[1] > 0) {
4707 0 : memcpy(color_map, best_palette_color_map,
4708 0 : rows * cols * sizeof(best_palette_color_map[0]));
4709 : }
4710 : }
4711 : #endif // CONFIG_PALETTE
4712 :
4713 : #if CONFIG_FILTER_INTRA
4714 : // Return 1 if an filter intra mode is selected; return 0 otherwise.
4715 : static int rd_pick_filter_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
4716 : int *rate, int *rate_tokenonly,
4717 : int64_t *distortion, int *skippable,
4718 : BLOCK_SIZE bsize, int64_t *best_rd) {
4719 : MACROBLOCKD *const xd = &x->e_mbd;
4720 : MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
4721 : int filter_intra_selected_flag = 0;
4722 : int this_rate;
4723 : int64_t this_rd;
4724 : FILTER_INTRA_MODE mode;
4725 : FILTER_INTRA_MODE_INFO filter_intra_mode_info;
4726 : RD_STATS tokenonly_rd_stats;
4727 :
4728 : av1_zero(filter_intra_mode_info);
4729 : mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 1;
4730 : mbmi->uv_mode = DC_PRED;
4731 : #if CONFIG_PALETTE
4732 : mbmi->palette_mode_info.palette_size[1] = 0;
4733 : #endif // CONFIG_PALETTE
4734 :
4735 : for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) {
4736 : mbmi->filter_intra_mode_info.filter_intra_mode[1] = mode;
4737 : if (!super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd))
4738 : continue;
4739 :
4740 : this_rate = tokenonly_rd_stats.rate +
4741 : av1_cost_bit(cpi->common.fc->filter_intra_probs[1], 1) +
4742 : cpi->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode] +
4743 : write_uniform_cost(FILTER_INTRA_MODES, mode);
4744 : this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist);
4745 : if (this_rd < *best_rd) {
4746 : *best_rd = this_rd;
4747 : *rate = this_rate;
4748 : *rate_tokenonly = tokenonly_rd_stats.rate;
4749 : *distortion = tokenonly_rd_stats.dist;
4750 : *skippable = tokenonly_rd_stats.skip;
4751 : filter_intra_mode_info = mbmi->filter_intra_mode_info;
4752 : filter_intra_selected_flag = 1;
4753 : }
4754 : }
4755 :
4756 : if (filter_intra_selected_flag) {
4757 : mbmi->uv_mode = DC_PRED;
4758 : mbmi->filter_intra_mode_info.use_filter_intra_mode[1] =
4759 : filter_intra_mode_info.use_filter_intra_mode[1];
4760 : mbmi->filter_intra_mode_info.filter_intra_mode[1] =
4761 : filter_intra_mode_info.filter_intra_mode[1];
4762 : return 1;
4763 : } else {
4764 : return 0;
4765 : }
4766 : }
4767 : #endif // CONFIG_FILTER_INTRA
4768 :
4769 : #if CONFIG_EXT_INTRA
4770 : // Run RD calculation with given chroma intra prediction angle., and return
4771 : // the RD cost. Update the best mode info. if the RD cost is the best so far.
4772 0 : static int64_t pick_intra_angle_routine_sbuv(
4773 : const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
4774 : int rate_overhead, int64_t best_rd_in, int *rate, RD_STATS *rd_stats,
4775 : int *best_angle_delta, int64_t *best_rd) {
4776 0 : MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi;
4777 0 : assert(!is_inter_block(mbmi));
4778 : int this_rate;
4779 : int64_t this_rd;
4780 : RD_STATS tokenonly_rd_stats;
4781 :
4782 0 : if (!super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd_in))
4783 0 : return INT64_MAX;
4784 0 : this_rate = tokenonly_rd_stats.rate + rate_overhead;
4785 0 : this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist);
4786 0 : if (this_rd < *best_rd) {
4787 0 : *best_rd = this_rd;
4788 0 : *best_angle_delta = mbmi->angle_delta[1];
4789 0 : *rate = this_rate;
4790 0 : rd_stats->rate = tokenonly_rd_stats.rate;
4791 0 : rd_stats->dist = tokenonly_rd_stats.dist;
4792 0 : rd_stats->skip = tokenonly_rd_stats.skip;
4793 : }
4794 0 : return this_rd;
4795 : }
4796 :
4797 : // With given chroma directional intra prediction mode, pick the best angle
4798 : // delta. Return true if a RD cost that is smaller than the input one is found.
4799 0 : static int rd_pick_intra_angle_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
4800 : BLOCK_SIZE bsize, int rate_overhead,
4801 : int64_t best_rd, int *rate,
4802 : RD_STATS *rd_stats) {
4803 0 : MACROBLOCKD *const xd = &x->e_mbd;
4804 0 : MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
4805 0 : assert(!is_inter_block(mbmi));
4806 0 : int i, angle_delta, best_angle_delta = 0;
4807 : int64_t this_rd, best_rd_in, rd_cost[2 * (MAX_ANGLE_DELTA + 2)];
4808 :
4809 0 : rd_stats->rate = INT_MAX;
4810 0 : rd_stats->skip = 0;
4811 0 : rd_stats->dist = INT64_MAX;
4812 0 : for (i = 0; i < 2 * (MAX_ANGLE_DELTA + 2); ++i) rd_cost[i] = INT64_MAX;
4813 :
4814 0 : for (angle_delta = 0; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
4815 0 : for (i = 0; i < 2; ++i) {
4816 0 : best_rd_in = (best_rd == INT64_MAX)
4817 : ? INT64_MAX
4818 0 : : (best_rd + (best_rd >> ((angle_delta == 0) ? 3 : 5)));
4819 0 : mbmi->angle_delta[1] = (1 - 2 * i) * angle_delta;
4820 0 : this_rd = pick_intra_angle_routine_sbuv(cpi, x, bsize, rate_overhead,
4821 : best_rd_in, rate, rd_stats,
4822 : &best_angle_delta, &best_rd);
4823 0 : rd_cost[2 * angle_delta + i] = this_rd;
4824 0 : if (angle_delta == 0) {
4825 0 : if (this_rd == INT64_MAX) return 0;
4826 0 : rd_cost[1] = this_rd;
4827 0 : break;
4828 : }
4829 : }
4830 : }
4831 :
4832 0 : assert(best_rd != INT64_MAX);
4833 0 : for (angle_delta = 1; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
4834 : int64_t rd_thresh;
4835 0 : for (i = 0; i < 2; ++i) {
4836 0 : int skip_search = 0;
4837 0 : rd_thresh = best_rd + (best_rd >> 5);
4838 0 : if (rd_cost[2 * (angle_delta + 1) + i] > rd_thresh &&
4839 0 : rd_cost[2 * (angle_delta - 1) + i] > rd_thresh)
4840 0 : skip_search = 1;
4841 0 : if (!skip_search) {
4842 0 : mbmi->angle_delta[1] = (1 - 2 * i) * angle_delta;
4843 0 : pick_intra_angle_routine_sbuv(cpi, x, bsize, rate_overhead, best_rd,
4844 : rate, rd_stats, &best_angle_delta,
4845 : &best_rd);
4846 : }
4847 : }
4848 : }
4849 :
4850 0 : mbmi->angle_delta[1] = best_angle_delta;
4851 0 : return rd_stats->rate != INT_MAX;
4852 : }
4853 : #endif // CONFIG_EXT_INTRA
4854 :
4855 0 : static void init_sbuv_mode(MB_MODE_INFO *const mbmi) {
4856 0 : mbmi->uv_mode = DC_PRED;
4857 : #if CONFIG_PALETTE
4858 0 : mbmi->palette_mode_info.palette_size[1] = 0;
4859 : #endif // CONFIG_PALETTE
4860 : #if CONFIG_FILTER_INTRA
4861 : mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
4862 : #endif // CONFIG_FILTER_INTRA
4863 0 : }
4864 :
4865 0 : static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
4866 : int *rate, int *rate_tokenonly,
4867 : int64_t *distortion, int *skippable,
4868 : BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
4869 0 : MACROBLOCKD *xd = &x->e_mbd;
4870 0 : MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
4871 0 : assert(!is_inter_block(mbmi));
4872 0 : MB_MODE_INFO best_mbmi = *mbmi;
4873 : PREDICTION_MODE mode;
4874 0 : int64_t best_rd = INT64_MAX, this_rd;
4875 : int this_rate;
4876 : RD_STATS tokenonly_rd_stats;
4877 : #if CONFIG_PVQ
4878 : od_rollback_buffer buf;
4879 : od_encode_checkpoint(&x->daala_enc, &buf);
4880 : #endif // CONFIG_PVQ
4881 : #if CONFIG_PALETTE
4882 0 : PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
4883 0 : uint8_t *best_palette_color_map = NULL;
4884 : #endif // CONFIG_PALETTE
4885 :
4886 0 : for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
4887 : #if CONFIG_EXT_INTRA
4888 0 : const int is_directional_mode =
4889 0 : av1_is_directional_mode(mode, mbmi->sb_type);
4890 : #endif // CONFIG_EXT_INTRA
4891 0 : if (!(cpi->sf.intra_uv_mode_mask[txsize_sqr_up_map[max_tx_size]] &
4892 : (1 << mode)))
4893 0 : continue;
4894 :
4895 0 : mbmi->uv_mode = mode;
4896 : #if CONFIG_EXT_INTRA
4897 0 : mbmi->angle_delta[1] = 0;
4898 0 : if (is_directional_mode) {
4899 0 : const int rate_overhead = cpi->intra_uv_mode_cost[mbmi->mode][mode] +
4900 0 : write_uniform_cost(2 * MAX_ANGLE_DELTA + 1, 0);
4901 0 : if (!rd_pick_intra_angle_sbuv(cpi, x, bsize, rate_overhead, best_rd,
4902 : &this_rate, &tokenonly_rd_stats))
4903 0 : continue;
4904 : } else {
4905 : #endif // CONFIG_EXT_INTRA
4906 0 : if (!super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd)) {
4907 : #if CONFIG_PVQ
4908 : od_encode_rollback(&x->daala_enc, &buf);
4909 : #endif // CONFIG_PVQ
4910 0 : continue;
4911 : }
4912 : #if CONFIG_EXT_INTRA
4913 : }
4914 : #endif // CONFIG_EXT_INTRA
4915 0 : this_rate =
4916 0 : tokenonly_rd_stats.rate + cpi->intra_uv_mode_cost[mbmi->mode][mode];
4917 :
4918 : #if CONFIG_EXT_INTRA
4919 0 : if (is_directional_mode) {
4920 0 : this_rate += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
4921 0 : MAX_ANGLE_DELTA + mbmi->angle_delta[1]);
4922 : }
4923 : #endif // CONFIG_EXT_INTRA
4924 : #if CONFIG_FILTER_INTRA
4925 : if (mbmi->sb_type >= BLOCK_8X8 && mode == DC_PRED)
4926 : this_rate += av1_cost_bit(cpi->common.fc->filter_intra_probs[1], 0);
4927 : #endif // CONFIG_FILTER_INTRA
4928 : #if CONFIG_PALETTE
4929 0 : if (cpi->common.allow_screen_content_tools && mbmi->sb_type >= BLOCK_8X8 &&
4930 : mode == DC_PRED)
4931 0 : this_rate += av1_cost_bit(
4932 : av1_default_palette_uv_mode_prob[pmi->palette_size[0] > 0], 0);
4933 : #endif // CONFIG_PALETTE
4934 :
4935 : #if CONFIG_PVQ
4936 : od_encode_rollback(&x->daala_enc, &buf);
4937 : #endif // CONFIG_PVQ
4938 0 : this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist);
4939 :
4940 0 : if (this_rd < best_rd) {
4941 0 : best_mbmi = *mbmi;
4942 0 : best_rd = this_rd;
4943 0 : *rate = this_rate;
4944 0 : *rate_tokenonly = tokenonly_rd_stats.rate;
4945 0 : *distortion = tokenonly_rd_stats.dist;
4946 0 : *skippable = tokenonly_rd_stats.skip;
4947 : }
4948 : }
4949 :
4950 : #if CONFIG_PALETTE
4951 0 : if (cpi->common.allow_screen_content_tools && mbmi->sb_type >= BLOCK_8X8) {
4952 0 : best_palette_color_map = x->palette_buffer->best_palette_color_map;
4953 0 : rd_pick_palette_intra_sbuv(cpi, x,
4954 0 : cpi->intra_uv_mode_cost[mbmi->mode][DC_PRED],
4955 : best_palette_color_map, &best_mbmi, &best_rd,
4956 : rate, rate_tokenonly, distortion, skippable);
4957 : }
4958 : #endif // CONFIG_PALETTE
4959 :
4960 : #if CONFIG_FILTER_INTRA
4961 : if (mbmi->sb_type >= BLOCK_8X8) {
4962 : if (rd_pick_filter_intra_sbuv(cpi, x, rate, rate_tokenonly, distortion,
4963 : skippable, bsize, &best_rd))
4964 : best_mbmi = *mbmi;
4965 : }
4966 : #endif // CONFIG_FILTER_INTRA
4967 :
4968 0 : *mbmi = best_mbmi;
4969 : // Make sure we actually chose a mode
4970 0 : assert(best_rd < INT64_MAX);
4971 0 : return best_rd;
4972 : }
4973 :
4974 0 : static void choose_intra_uv_mode(const AV1_COMP *const cpi, MACROBLOCK *const x,
4975 : PICK_MODE_CONTEXT *ctx, BLOCK_SIZE bsize,
4976 : TX_SIZE max_tx_size, int *rate_uv,
4977 : int *rate_uv_tokenonly, int64_t *dist_uv,
4978 : int *skip_uv, PREDICTION_MODE *mode_uv) {
4979 : // Use an estimated rd for uv_intra based on DC_PRED if the
4980 : // appropriate speed flag is set.
4981 : (void)ctx;
4982 0 : init_sbuv_mode(&x->e_mbd.mi[0]->mbmi);
4983 : #if CONFIG_CB4X4
4984 : #if CONFIG_CHROMA_2X2
4985 : rd_pick_intra_sbuv_mode(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
4986 : bsize, max_tx_size);
4987 : #else
4988 0 : if (x->skip_chroma_rd) {
4989 0 : *rate_uv = 0;
4990 0 : *rate_uv_tokenonly = 0;
4991 0 : *dist_uv = 0;
4992 0 : *skip_uv = 1;
4993 0 : *mode_uv = DC_PRED;
4994 0 : return;
4995 : }
4996 0 : BLOCK_SIZE bs = scale_chroma_bsize(bsize, x->e_mbd.plane[1].subsampling_x,
4997 : x->e_mbd.plane[1].subsampling_y);
4998 0 : rd_pick_intra_sbuv_mode(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
4999 : bs, max_tx_size);
5000 : #endif // CONFIG_CHROMA_2X2
5001 : #else
5002 : rd_pick_intra_sbuv_mode(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
5003 : bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, max_tx_size);
5004 : #endif // CONFIG_CB4X4
5005 0 : *mode_uv = x->e_mbd.mi[0]->mbmi.uv_mode;
5006 : }
5007 :
5008 0 : static int cost_mv_ref(const AV1_COMP *const cpi, PREDICTION_MODE mode,
5009 : int16_t mode_context) {
5010 : #if CONFIG_EXT_INTER
5011 0 : if (is_inter_compound_mode(mode)) {
5012 : return cpi
5013 0 : ->inter_compound_mode_cost[mode_context][INTER_COMPOUND_OFFSET(mode)];
5014 : }
5015 : #endif
5016 :
5017 0 : int mode_cost = 0;
5018 0 : int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
5019 0 : int16_t is_all_zero_mv = mode_context & (1 << ALL_ZERO_FLAG_OFFSET);
5020 :
5021 0 : assert(is_inter_mode(mode));
5022 :
5023 0 : if (mode == NEWMV) {
5024 0 : mode_cost = cpi->newmv_mode_cost[mode_ctx][0];
5025 0 : return mode_cost;
5026 : } else {
5027 0 : mode_cost = cpi->newmv_mode_cost[mode_ctx][1];
5028 0 : mode_ctx = (mode_context >> ZEROMV_OFFSET) & ZEROMV_CTX_MASK;
5029 :
5030 0 : if (is_all_zero_mv) return mode_cost;
5031 :
5032 0 : if (mode == ZEROMV) {
5033 0 : mode_cost += cpi->zeromv_mode_cost[mode_ctx][0];
5034 0 : return mode_cost;
5035 : } else {
5036 0 : mode_cost += cpi->zeromv_mode_cost[mode_ctx][1];
5037 0 : mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
5038 :
5039 0 : if (mode_context & (1 << SKIP_NEARESTMV_OFFSET)) mode_ctx = 6;
5040 0 : if (mode_context & (1 << SKIP_NEARMV_OFFSET)) mode_ctx = 7;
5041 0 : if (mode_context & (1 << SKIP_NEARESTMV_SUB8X8_OFFSET)) mode_ctx = 8;
5042 :
5043 0 : mode_cost += cpi->refmv_mode_cost[mode_ctx][mode != NEARESTMV];
5044 0 : return mode_cost;
5045 : }
5046 : }
5047 : }
5048 :
5049 : #if CONFIG_EXT_INTER && (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT)
5050 0 : static int get_interinter_compound_type_bits(BLOCK_SIZE bsize,
5051 : COMPOUND_TYPE comp_type) {
5052 : (void)bsize;
5053 0 : switch (comp_type) {
5054 0 : case COMPOUND_AVERAGE: return 0;
5055 : #if CONFIG_WEDGE
5056 0 : case COMPOUND_WEDGE: return get_interinter_wedge_bits(bsize);
5057 : #endif // CONFIG_WEDGE
5058 : #if CONFIG_COMPOUND_SEGMENT
5059 0 : case COMPOUND_SEG: return 1;
5060 : #endif // CONFIG_COMPOUND_SEGMENT
5061 0 : default: assert(0); return 0;
5062 : }
5063 : }
5064 : #endif // CONFIG_EXT_INTER && (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT)
5065 :
5066 : typedef struct {
5067 : int eobs;
5068 : int brate;
5069 : int byrate;
5070 : int64_t bdist;
5071 : int64_t bsse;
5072 : int64_t brdcost;
5073 : int_mv mvs[2];
5074 : int_mv pred_mv[2];
5075 : #if CONFIG_EXT_INTER
5076 : int_mv ref_mv[2];
5077 : #endif // CONFIG_EXT_INTER
5078 :
5079 : #if CONFIG_CHROMA_2X2
5080 : ENTROPY_CONTEXT ta[4];
5081 : ENTROPY_CONTEXT tl[4];
5082 : #else
5083 : ENTROPY_CONTEXT ta[2];
5084 : ENTROPY_CONTEXT tl[2];
5085 : #endif // CONFIG_CHROMA_2X2
5086 : } SEG_RDSTAT;
5087 :
5088 : typedef struct {
5089 : int_mv *ref_mv[2];
5090 : int_mv mvp;
5091 :
5092 : int64_t segment_rd;
5093 : int r;
5094 : int64_t d;
5095 : int64_t sse;
5096 : int segment_yrate;
5097 : PREDICTION_MODE modes[4];
5098 : #if CONFIG_EXT_INTER
5099 : SEG_RDSTAT rdstat[4][INTER_MODES + INTER_COMPOUND_MODES];
5100 : #else
5101 : SEG_RDSTAT rdstat[4][INTER_MODES];
5102 : #endif // CONFIG_EXT_INTER
5103 : int mvthresh;
5104 : } BEST_SEG_INFO;
5105 :
5106 0 : static INLINE int mv_check_bounds(const MvLimits *mv_limits, const MV *mv) {
5107 0 : return (mv->row >> 3) < mv_limits->row_min ||
5108 0 : (mv->row >> 3) > mv_limits->row_max ||
5109 0 : (mv->col >> 3) < mv_limits->col_min ||
5110 0 : (mv->col >> 3) > mv_limits->col_max;
5111 : }
5112 :
5113 : // Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion.
5114 : // TODO(aconverse): Find out if this is still productive then clean up or remove
5115 0 : static int check_best_zero_mv(
5116 : const AV1_COMP *const cpi, const int16_t mode_context[TOTAL_REFS_PER_FRAME],
5117 : #if CONFIG_EXT_INTER
5118 : const int16_t compound_mode_context[TOTAL_REFS_PER_FRAME],
5119 : #endif // CONFIG_EXT_INTER
5120 : int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME], int this_mode,
5121 : const MV_REFERENCE_FRAME ref_frames[2], const BLOCK_SIZE bsize, int block,
5122 : int mi_row, int mi_col) {
5123 : int_mv zeromv[2];
5124 0 : int comp_pred_mode = ref_frames[1] > INTRA_FRAME;
5125 : int cur_frm;
5126 : (void)mi_row;
5127 : (void)mi_col;
5128 0 : for (cur_frm = 0; cur_frm < 1 + comp_pred_mode; cur_frm++) {
5129 : #if CONFIG_GLOBAL_MOTION
5130 0 : if (this_mode == ZEROMV
5131 : #if CONFIG_EXT_INTER
5132 0 : || this_mode == ZERO_ZEROMV
5133 : #endif // CONFIG_EXT_INTER
5134 : )
5135 0 : zeromv[cur_frm].as_int =
5136 0 : gm_get_motion_vector(&cpi->common.global_motion[ref_frames[cur_frm]],
5137 : cpi->common.allow_high_precision_mv, bsize,
5138 : mi_col, mi_row, block)
5139 0 : .as_int;
5140 : else
5141 : #endif // CONFIG_GLOBAL_MOTION
5142 0 : zeromv[cur_frm].as_int = 0;
5143 : }
5144 : #if !CONFIG_EXT_INTER
5145 : assert(ref_frames[1] != INTRA_FRAME); // Just sanity check
5146 : #endif // !CONFIG_EXT_INTER
5147 0 : if ((this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
5148 0 : frame_mv[this_mode][ref_frames[0]].as_int == zeromv[0].as_int &&
5149 0 : (ref_frames[1] <= INTRA_FRAME ||
5150 0 : frame_mv[this_mode][ref_frames[1]].as_int == zeromv[1].as_int)) {
5151 0 : int16_t rfc =
5152 0 : av1_mode_context_analyzer(mode_context, ref_frames, bsize, block);
5153 0 : int c1 = cost_mv_ref(cpi, NEARMV, rfc);
5154 0 : int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
5155 0 : int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
5156 :
5157 0 : if (this_mode == NEARMV) {
5158 0 : if (c1 > c3) return 0;
5159 0 : } else if (this_mode == NEARESTMV) {
5160 0 : if (c2 > c3) return 0;
5161 : } else {
5162 0 : assert(this_mode == ZEROMV);
5163 0 : if (ref_frames[1] <= INTRA_FRAME) {
5164 0 : if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0) ||
5165 0 : (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0))
5166 0 : return 0;
5167 : } else {
5168 0 : if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0 &&
5169 0 : frame_mv[NEARESTMV][ref_frames[1]].as_int == 0) ||
5170 0 : (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0 &&
5171 0 : frame_mv[NEARMV][ref_frames[1]].as_int == 0))
5172 0 : return 0;
5173 : }
5174 : }
5175 : }
5176 : #if CONFIG_EXT_INTER
5177 0 : else if ((this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
5178 0 : this_mode == ZERO_ZEROMV) &&
5179 0 : frame_mv[this_mode][ref_frames[0]].as_int == zeromv[0].as_int &&
5180 0 : frame_mv[this_mode][ref_frames[1]].as_int == zeromv[1].as_int) {
5181 0 : int16_t rfc = compound_mode_context[ref_frames[0]];
5182 0 : int c2 = cost_mv_ref(cpi, NEAREST_NEARESTMV, rfc);
5183 0 : int c3 = cost_mv_ref(cpi, ZERO_ZEROMV, rfc);
5184 0 : int c5 = cost_mv_ref(cpi, NEAR_NEARMV, rfc);
5185 :
5186 0 : if (this_mode == NEAREST_NEARESTMV) {
5187 0 : if (c2 > c3) return 0;
5188 0 : } else if (this_mode == NEAR_NEARMV) {
5189 0 : if (c5 > c3) return 0;
5190 : } else {
5191 0 : assert(this_mode == ZERO_ZEROMV);
5192 0 : if ((c3 >= c2 && frame_mv[NEAREST_NEARESTMV][ref_frames[0]].as_int == 0 &&
5193 0 : frame_mv[NEAREST_NEARESTMV][ref_frames[1]].as_int == 0) ||
5194 0 : (c3 >= c5 && frame_mv[NEAR_NEARMV][ref_frames[0]].as_int == 0 &&
5195 0 : frame_mv[NEAR_NEARMV][ref_frames[1]].as_int == 0))
5196 0 : return 0;
5197 : }
5198 : }
5199 : #endif // CONFIG_EXT_INTER
5200 0 : return 1;
5201 : }
5202 :
5203 0 : static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
5204 : BLOCK_SIZE bsize, int_mv *frame_mv, int mi_row,
5205 : int mi_col,
5206 : #if CONFIG_EXT_INTER
5207 : int_mv *ref_mv_sub8x8[2], const uint8_t *mask,
5208 : int mask_stride,
5209 : #endif // CONFIG_EXT_INTER
5210 : int *rate_mv, const int block) {
5211 0 : const AV1_COMMON *const cm = &cpi->common;
5212 0 : const int pw = block_size_wide[bsize];
5213 0 : const int ph = block_size_high[bsize];
5214 0 : MACROBLOCKD *xd = &x->e_mbd;
5215 0 : MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
5216 : // This function should only ever be called for compound modes
5217 0 : assert(has_second_ref(mbmi));
5218 0 : const int refs[2] = { mbmi->ref_frame[0], mbmi->ref_frame[1] };
5219 : int_mv ref_mv[2];
5220 : int ite, ref;
5221 : #if CONFIG_DUAL_FILTER
5222 0 : InterpFilter interp_filter[4] = {
5223 0 : mbmi->interp_filter[0], mbmi->interp_filter[1], mbmi->interp_filter[2],
5224 0 : mbmi->interp_filter[3],
5225 : };
5226 : #else
5227 : const InterpFilter interp_filter = mbmi->interp_filter;
5228 : #endif // CONFIG_DUAL_FILTER
5229 : struct scale_factors sf;
5230 0 : struct macroblockd_plane *const pd = &xd->plane[0];
5231 : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
5232 : // ic and ir are the 4x4 coordiantes of the sub8x8 at index "block"
5233 0 : const int ic = block & 1;
5234 0 : const int ir = (block - ic) >> 1;
5235 0 : const int p_col = ((mi_col * MI_SIZE) >> pd->subsampling_x) + 4 * ic;
5236 0 : const int p_row = ((mi_row * MI_SIZE) >> pd->subsampling_y) + 4 * ir;
5237 : #if CONFIG_GLOBAL_MOTION
5238 : int is_global[2];
5239 0 : for (ref = 0; ref < 2; ++ref) {
5240 0 : WarpedMotionParams *const wm =
5241 0 : &xd->global_motion[xd->mi[0]->mbmi.ref_frame[ref]];
5242 0 : is_global[ref] = is_global_mv_block(xd->mi[0], block, wm->wmtype);
5243 : }
5244 : #endif // CONFIG_GLOBAL_MOTION
5245 : #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
5246 :
5247 : // Do joint motion search in compound mode to get more accurate mv.
5248 : struct buf_2d backup_yv12[2][MAX_MB_PLANE];
5249 0 : int last_besterr[2] = { INT_MAX, INT_MAX };
5250 0 : const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = {
5251 0 : av1_get_scaled_ref_frame(cpi, refs[0]),
5252 0 : av1_get_scaled_ref_frame(cpi, refs[1])
5253 : };
5254 :
5255 : // Prediction buffer from second frame.
5256 : #if CONFIG_HIGHBITDEPTH
5257 : DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[MAX_SB_SQUARE]);
5258 : uint8_t *second_pred;
5259 : #else
5260 : DECLARE_ALIGNED(16, uint8_t, second_pred[MAX_SB_SQUARE]);
5261 : #endif // CONFIG_HIGHBITDEPTH
5262 :
5263 : #if CONFIG_EXT_INTER && CONFIG_CB4X4
5264 : (void)ref_mv_sub8x8;
5265 : #endif // CONFIG_EXT_INTER && CONFIG_CB4X4
5266 :
5267 0 : for (ref = 0; ref < 2; ++ref) {
5268 : #if CONFIG_EXT_INTER && !CONFIG_CB4X4
5269 : if (bsize < BLOCK_8X8 && ref_mv_sub8x8 != NULL)
5270 : ref_mv[ref].as_int = ref_mv_sub8x8[ref]->as_int;
5271 : else
5272 : #endif // CONFIG_EXT_INTER && !CONFIG_CB4X4
5273 0 : ref_mv[ref] = x->mbmi_ext->ref_mvs[refs[ref]][0];
5274 :
5275 0 : if (scaled_ref_frame[ref]) {
5276 : int i;
5277 : // Swap out the reference frame for a version that's been scaled to
5278 : // match the resolution of the current frame, allowing the existing
5279 : // motion search code to be used without additional modifications.
5280 0 : for (i = 0; i < MAX_MB_PLANE; i++)
5281 0 : backup_yv12[ref][i] = xd->plane[i].pre[ref];
5282 0 : av1_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col,
5283 : NULL);
5284 : }
5285 : }
5286 :
5287 : // Since we have scaled the reference frames to match the size of the current
5288 : // frame we must use a unit scaling factor during mode selection.
5289 : #if CONFIG_HIGHBITDEPTH
5290 0 : av1_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width,
5291 : cm->height, cm->use_highbitdepth);
5292 : #else
5293 : av1_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width,
5294 : cm->height);
5295 : #endif // CONFIG_HIGHBITDEPTH
5296 :
5297 : // Allow joint search multiple times iteratively for each reference frame
5298 : // and break out of the search loop if it couldn't find a better mv.
5299 0 : for (ite = 0; ite < 4; ite++) {
5300 : struct buf_2d ref_yv12[2];
5301 0 : int bestsme = INT_MAX;
5302 0 : int sadpb = x->sadperbit16;
5303 0 : MV *const best_mv = &x->best_mv.as_mv;
5304 0 : int search_range = 3;
5305 :
5306 0 : MvLimits tmp_mv_limits = x->mv_limits;
5307 0 : int id = ite % 2; // Even iterations search in the first reference frame,
5308 : // odd iterations search in the second. The predictor
5309 : // found for the 'other' reference frame is factored in.
5310 0 : const int plane = 0;
5311 0 : ConvolveParams conv_params = get_conv_params(0, plane);
5312 : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
5313 : WarpTypesAllowed warp_types;
5314 : #if CONFIG_GLOBAL_MOTION
5315 0 : warp_types.global_warp_allowed = is_global[!id];
5316 : #endif // CONFIG_GLOBAL_MOTION
5317 : #if CONFIG_WARPED_MOTION
5318 0 : warp_types.local_warp_allowed = mbmi->motion_mode == WARPED_CAUSAL;
5319 : #endif // CONFIG_WARPED_MOTION
5320 : #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
5321 :
5322 : // Initialized here because of compiler problem in Visual Studio.
5323 0 : ref_yv12[0] = xd->plane[plane].pre[0];
5324 0 : ref_yv12[1] = xd->plane[plane].pre[1];
5325 :
5326 : #if CONFIG_DUAL_FILTER
5327 : // reload the filter types
5328 0 : interp_filter[0] =
5329 0 : (id == 0) ? mbmi->interp_filter[2] : mbmi->interp_filter[0];
5330 0 : interp_filter[1] =
5331 0 : (id == 0) ? mbmi->interp_filter[3] : mbmi->interp_filter[1];
5332 : #endif // CONFIG_DUAL_FILTER
5333 :
5334 : // Get the prediction block from the 'other' reference frame.
5335 : #if CONFIG_HIGHBITDEPTH
5336 0 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
5337 0 : second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16);
5338 0 : av1_highbd_build_inter_predictor(
5339 0 : ref_yv12[!id].buf, ref_yv12[!id].stride, second_pred, pw,
5340 0 : &frame_mv[refs[!id]].as_mv, &sf, pw, ph, 0, interp_filter,
5341 : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
5342 : &warp_types, p_col, p_row,
5343 : #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
5344 : plane, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE, xd);
5345 : } else {
5346 0 : second_pred = (uint8_t *)second_pred_alloc_16;
5347 : #endif // CONFIG_HIGHBITDEPTH
5348 0 : av1_build_inter_predictor(
5349 0 : ref_yv12[!id].buf, ref_yv12[!id].stride, second_pred, pw,
5350 0 : &frame_mv[refs[!id]].as_mv, &sf, pw, ph, &conv_params, interp_filter,
5351 : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
5352 : &warp_types, p_col, p_row, plane, !id,
5353 : #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
5354 : MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE, xd);
5355 : #if CONFIG_HIGHBITDEPTH
5356 : }
5357 : #endif // CONFIG_HIGHBITDEPTH
5358 :
5359 : // Do compound motion search on the current reference frame.
5360 0 : if (id) xd->plane[plane].pre[0] = ref_yv12[id];
5361 0 : av1_set_mv_search_range(&x->mv_limits, &ref_mv[id].as_mv);
5362 :
5363 : // Use the mv result from the single mode as mv predictor.
5364 0 : *best_mv = frame_mv[refs[id]].as_mv;
5365 :
5366 0 : best_mv->col >>= 3;
5367 0 : best_mv->row >>= 3;
5368 :
5369 0 : av1_set_mvcost(x, refs[id], id, mbmi->ref_mv_idx);
5370 :
5371 : // Small-range full-pixel motion search.
5372 0 : bestsme =
5373 0 : av1_refining_search_8p_c(x, sadpb, search_range, &cpi->fn_ptr[bsize],
5374 : #if CONFIG_EXT_INTER
5375 : mask, mask_stride, id,
5376 : #endif
5377 0 : &ref_mv[id].as_mv, second_pred);
5378 0 : if (bestsme < INT_MAX) {
5379 : #if CONFIG_EXT_INTER
5380 0 : if (mask)
5381 0 : bestsme = av1_get_mvpred_mask_var(x, best_mv, &ref_mv[id].as_mv,
5382 : second_pred, mask, mask_stride, id,
5383 0 : &cpi->fn_ptr[bsize], 1);
5384 : else
5385 : #endif
5386 0 : bestsme = av1_get_mvpred_av_var(x, best_mv, &ref_mv[id].as_mv,
5387 0 : second_pred, &cpi->fn_ptr[bsize], 1);
5388 : }
5389 :
5390 0 : x->mv_limits = tmp_mv_limits;
5391 :
5392 0 : if (bestsme < INT_MAX) {
5393 : int dis; /* TODO: use dis in distortion calculation later. */
5394 : unsigned int sse;
5395 0 : if (cpi->sf.use_upsampled_references) {
5396 : // Use up-sampled reference frames.
5397 0 : struct buf_2d backup_pred = pd->pre[0];
5398 0 : const YV12_BUFFER_CONFIG *upsampled_ref =
5399 0 : get_upsampled_ref(cpi, refs[id]);
5400 :
5401 : // Set pred for Y plane
5402 0 : setup_pred_plane(&pd->pre[0], bsize, upsampled_ref->y_buffer,
5403 : upsampled_ref->y_crop_width,
5404 : upsampled_ref->y_crop_height, upsampled_ref->y_stride,
5405 : (mi_row << 3), (mi_col << 3), NULL, pd->subsampling_x,
5406 : pd->subsampling_y);
5407 :
5408 : // If bsize < BLOCK_8X8, adjust pred pointer for this block
5409 : #if !CONFIG_CB4X4
5410 : if (bsize < BLOCK_8X8)
5411 : pd->pre[0].buf =
5412 : &pd->pre[0].buf[(av1_raster_block_offset(BLOCK_8X8, block,
5413 : pd->pre[0].stride))
5414 : << 3];
5415 : #endif // !CONFIG_CB4X4
5416 :
5417 0 : bestsme = cpi->find_fractional_mv_step(
5418 0 : x, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv,
5419 0 : x->errorperbit, &cpi->fn_ptr[bsize], 0,
5420 : cpi->sf.mv.subpel_iters_per_step, NULL, x->nmvjointcost, x->mvcost,
5421 : &dis, &sse, second_pred,
5422 : #if CONFIG_EXT_INTER
5423 : mask, mask_stride, id,
5424 : #endif
5425 : pw, ph, 1);
5426 :
5427 : // Restore the reference frames.
5428 0 : pd->pre[0] = backup_pred;
5429 : } else {
5430 : (void)block;
5431 0 : bestsme = cpi->find_fractional_mv_step(
5432 0 : x, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv,
5433 0 : x->errorperbit, &cpi->fn_ptr[bsize], 0,
5434 : cpi->sf.mv.subpel_iters_per_step, NULL, x->nmvjointcost, x->mvcost,
5435 : &dis, &sse, second_pred,
5436 : #if CONFIG_EXT_INTER
5437 : mask, mask_stride, id,
5438 : #endif
5439 : pw, ph, 0);
5440 : }
5441 : }
5442 :
5443 : // Restore the pointer to the first (possibly scaled) prediction buffer.
5444 0 : if (id) xd->plane[plane].pre[0] = ref_yv12[0];
5445 :
5446 0 : if (bestsme < last_besterr[id]) {
5447 0 : frame_mv[refs[id]].as_mv = *best_mv;
5448 0 : last_besterr[id] = bestsme;
5449 : } else {
5450 0 : break;
5451 : }
5452 : }
5453 :
5454 0 : *rate_mv = 0;
5455 :
5456 0 : for (ref = 0; ref < 2; ++ref) {
5457 0 : if (scaled_ref_frame[ref]) {
5458 : // Restore the prediction frame pointers to their unscaled versions.
5459 : int i;
5460 0 : for (i = 0; i < MAX_MB_PLANE; i++)
5461 0 : xd->plane[i].pre[ref] = backup_yv12[ref][i];
5462 : }
5463 0 : av1_set_mvcost(x, refs[ref], ref, mbmi->ref_mv_idx);
5464 : #if CONFIG_EXT_INTER && !CONFIG_CB4X4
5465 : if (bsize >= BLOCK_8X8)
5466 : #endif // CONFIG_EXT_INTER && !CONFIG_CB4X4
5467 0 : *rate_mv += av1_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
5468 0 : &x->mbmi_ext->ref_mvs[refs[ref]][0].as_mv,
5469 0 : x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
5470 : #if CONFIG_EXT_INTER && !CONFIG_CB4X4
5471 : else
5472 : *rate_mv += av1_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
5473 : &ref_mv_sub8x8[ref]->as_mv, x->nmvjointcost,
5474 : x->mvcost, MV_COST_WEIGHT);
5475 : #endif // CONFIG_EXT_INTER && !CONFIG_CB4X4
5476 : }
5477 0 : }
5478 :
5479 0 : static void estimate_ref_frame_costs(const AV1_COMMON *cm,
5480 : const MACROBLOCKD *xd, int segment_id,
5481 : unsigned int *ref_costs_single,
5482 : unsigned int *ref_costs_comp,
5483 : aom_prob *comp_mode_p) {
5484 0 : int seg_ref_active =
5485 0 : segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
5486 0 : if (seg_ref_active) {
5487 0 : memset(ref_costs_single, 0,
5488 : TOTAL_REFS_PER_FRAME * sizeof(*ref_costs_single));
5489 0 : memset(ref_costs_comp, 0, TOTAL_REFS_PER_FRAME * sizeof(*ref_costs_comp));
5490 0 : *comp_mode_p = 128;
5491 : } else {
5492 0 : aom_prob intra_inter_p = av1_get_intra_inter_prob(cm, xd);
5493 0 : aom_prob comp_inter_p = 128;
5494 :
5495 0 : if (cm->reference_mode == REFERENCE_MODE_SELECT) {
5496 0 : comp_inter_p = av1_get_reference_mode_prob(cm, xd);
5497 0 : *comp_mode_p = comp_inter_p;
5498 : } else {
5499 0 : *comp_mode_p = 128;
5500 : }
5501 :
5502 0 : ref_costs_single[INTRA_FRAME] = av1_cost_bit(intra_inter_p, 0);
5503 :
5504 0 : if (cm->reference_mode != COMPOUND_REFERENCE) {
5505 0 : aom_prob ref_single_p1 = av1_get_pred_prob_single_ref_p1(cm, xd);
5506 0 : aom_prob ref_single_p2 = av1_get_pred_prob_single_ref_p2(cm, xd);
5507 : #if CONFIG_EXT_REFS
5508 0 : aom_prob ref_single_p3 = av1_get_pred_prob_single_ref_p3(cm, xd);
5509 0 : aom_prob ref_single_p4 = av1_get_pred_prob_single_ref_p4(cm, xd);
5510 0 : aom_prob ref_single_p5 = av1_get_pred_prob_single_ref_p5(cm, xd);
5511 : #endif // CONFIG_EXT_REFS
5512 :
5513 0 : unsigned int base_cost = av1_cost_bit(intra_inter_p, 1);
5514 :
5515 0 : ref_costs_single[LAST_FRAME] =
5516 : #if CONFIG_EXT_REFS
5517 0 : ref_costs_single[LAST2_FRAME] = ref_costs_single[LAST3_FRAME] =
5518 0 : ref_costs_single[BWDREF_FRAME] =
5519 : #endif // CONFIG_EXT_REFS
5520 0 : ref_costs_single[GOLDEN_FRAME] =
5521 0 : ref_costs_single[ALTREF_FRAME] = base_cost;
5522 :
5523 : #if CONFIG_EXT_REFS
5524 0 : ref_costs_single[LAST_FRAME] += av1_cost_bit(ref_single_p1, 0);
5525 0 : ref_costs_single[LAST2_FRAME] += av1_cost_bit(ref_single_p1, 0);
5526 0 : ref_costs_single[LAST3_FRAME] += av1_cost_bit(ref_single_p1, 0);
5527 0 : ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p1, 0);
5528 0 : ref_costs_single[BWDREF_FRAME] += av1_cost_bit(ref_single_p1, 1);
5529 0 : ref_costs_single[ALTREF_FRAME] += av1_cost_bit(ref_single_p1, 1);
5530 :
5531 0 : ref_costs_single[LAST_FRAME] += av1_cost_bit(ref_single_p3, 0);
5532 0 : ref_costs_single[LAST2_FRAME] += av1_cost_bit(ref_single_p3, 0);
5533 0 : ref_costs_single[LAST3_FRAME] += av1_cost_bit(ref_single_p3, 1);
5534 0 : ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p3, 1);
5535 :
5536 0 : ref_costs_single[BWDREF_FRAME] += av1_cost_bit(ref_single_p2, 0);
5537 0 : ref_costs_single[ALTREF_FRAME] += av1_cost_bit(ref_single_p2, 1);
5538 :
5539 0 : ref_costs_single[LAST_FRAME] += av1_cost_bit(ref_single_p4, 0);
5540 0 : ref_costs_single[LAST2_FRAME] += av1_cost_bit(ref_single_p4, 1);
5541 :
5542 0 : ref_costs_single[LAST3_FRAME] += av1_cost_bit(ref_single_p5, 0);
5543 0 : ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p5, 1);
5544 : #else
5545 : ref_costs_single[LAST_FRAME] += av1_cost_bit(ref_single_p1, 0);
5546 : ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p1, 1);
5547 : ref_costs_single[ALTREF_FRAME] += av1_cost_bit(ref_single_p1, 1);
5548 :
5549 : ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p2, 0);
5550 : ref_costs_single[ALTREF_FRAME] += av1_cost_bit(ref_single_p2, 1);
5551 : #endif // CONFIG_EXT_REFS
5552 : } else {
5553 0 : ref_costs_single[LAST_FRAME] = 512;
5554 : #if CONFIG_EXT_REFS
5555 0 : ref_costs_single[LAST2_FRAME] = 512;
5556 0 : ref_costs_single[LAST3_FRAME] = 512;
5557 0 : ref_costs_single[BWDREF_FRAME] = 512;
5558 : #endif // CONFIG_EXT_REFS
5559 0 : ref_costs_single[GOLDEN_FRAME] = 512;
5560 0 : ref_costs_single[ALTREF_FRAME] = 512;
5561 : }
5562 :
5563 0 : if (cm->reference_mode != SINGLE_REFERENCE) {
5564 0 : aom_prob ref_comp_p = av1_get_pred_prob_comp_ref_p(cm, xd);
5565 : #if CONFIG_EXT_REFS
5566 0 : aom_prob ref_comp_p1 = av1_get_pred_prob_comp_ref_p1(cm, xd);
5567 0 : aom_prob ref_comp_p2 = av1_get_pred_prob_comp_ref_p2(cm, xd);
5568 0 : aom_prob bwdref_comp_p = av1_get_pred_prob_comp_bwdref_p(cm, xd);
5569 : #endif // CONFIG_EXT_REFS
5570 :
5571 0 : unsigned int base_cost = av1_cost_bit(intra_inter_p, 1);
5572 :
5573 0 : ref_costs_comp[LAST_FRAME] =
5574 : #if CONFIG_EXT_REFS
5575 0 : ref_costs_comp[LAST2_FRAME] = ref_costs_comp[LAST3_FRAME] =
5576 : #endif // CONFIG_EXT_REFS
5577 0 : ref_costs_comp[GOLDEN_FRAME] = base_cost;
5578 :
5579 : #if CONFIG_EXT_REFS
5580 0 : ref_costs_comp[BWDREF_FRAME] = ref_costs_comp[ALTREF_FRAME] = 0;
5581 : #endif // CONFIG_EXT_REFS
5582 :
5583 : #if CONFIG_EXT_REFS
5584 0 : ref_costs_comp[LAST_FRAME] += av1_cost_bit(ref_comp_p, 0);
5585 0 : ref_costs_comp[LAST2_FRAME] += av1_cost_bit(ref_comp_p, 0);
5586 0 : ref_costs_comp[LAST3_FRAME] += av1_cost_bit(ref_comp_p, 1);
5587 0 : ref_costs_comp[GOLDEN_FRAME] += av1_cost_bit(ref_comp_p, 1);
5588 :
5589 0 : ref_costs_comp[LAST_FRAME] += av1_cost_bit(ref_comp_p1, 1);
5590 0 : ref_costs_comp[LAST2_FRAME] += av1_cost_bit(ref_comp_p1, 0);
5591 :
5592 0 : ref_costs_comp[LAST3_FRAME] += av1_cost_bit(ref_comp_p2, 0);
5593 0 : ref_costs_comp[GOLDEN_FRAME] += av1_cost_bit(ref_comp_p2, 1);
5594 :
5595 : // NOTE(zoeliu): BWDREF and ALTREF each add an extra cost by coding 1
5596 : // more bit.
5597 0 : ref_costs_comp[BWDREF_FRAME] += av1_cost_bit(bwdref_comp_p, 0);
5598 0 : ref_costs_comp[ALTREF_FRAME] += av1_cost_bit(bwdref_comp_p, 1);
5599 : #else
5600 : ref_costs_comp[LAST_FRAME] += av1_cost_bit(ref_comp_p, 0);
5601 : ref_costs_comp[GOLDEN_FRAME] += av1_cost_bit(ref_comp_p, 1);
5602 : #endif // CONFIG_EXT_REFS
5603 : } else {
5604 0 : ref_costs_comp[LAST_FRAME] = 512;
5605 : #if CONFIG_EXT_REFS
5606 0 : ref_costs_comp[LAST2_FRAME] = 512;
5607 0 : ref_costs_comp[LAST3_FRAME] = 512;
5608 0 : ref_costs_comp[BWDREF_FRAME] = 512;
5609 0 : ref_costs_comp[ALTREF_FRAME] = 512;
5610 : #endif // CONFIG_EXT_REFS
5611 0 : ref_costs_comp[GOLDEN_FRAME] = 512;
5612 : }
5613 : }
5614 0 : }
5615 :
5616 0 : static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
5617 : int mode_index,
5618 : int64_t comp_pred_diff[REFERENCE_MODES],
5619 : int skippable) {
5620 0 : MACROBLOCKD *const xd = &x->e_mbd;
5621 :
5622 : // Take a snapshot of the coding context so it can be
5623 : // restored if we decide to encode this way
5624 0 : ctx->skip = x->skip;
5625 0 : ctx->skippable = skippable;
5626 0 : ctx->best_mode_index = mode_index;
5627 0 : ctx->mic = *xd->mi[0];
5628 0 : ctx->mbmi_ext = *x->mbmi_ext;
5629 0 : ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
5630 0 : ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE];
5631 0 : ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
5632 0 : }
5633 :
5634 0 : static void setup_buffer_inter(
5635 : const AV1_COMP *const cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
5636 : BLOCK_SIZE block_size, int mi_row, int mi_col,
5637 : int_mv frame_nearest_mv[TOTAL_REFS_PER_FRAME],
5638 : int_mv frame_near_mv[TOTAL_REFS_PER_FRAME],
5639 : struct buf_2d yv12_mb[TOTAL_REFS_PER_FRAME][MAX_MB_PLANE]) {
5640 0 : const AV1_COMMON *cm = &cpi->common;
5641 0 : const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
5642 0 : MACROBLOCKD *const xd = &x->e_mbd;
5643 0 : MODE_INFO *const mi = xd->mi[0];
5644 0 : int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
5645 0 : const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
5646 0 : MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
5647 :
5648 0 : assert(yv12 != NULL);
5649 :
5650 : // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
5651 : // use the UV scaling factors.
5652 0 : av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf);
5653 :
5654 : // Gets an initial list of candidate vectors from neighbours and orders them
5655 0 : av1_find_mv_refs(cm, xd, mi, ref_frame, &mbmi_ext->ref_mv_count[ref_frame],
5656 0 : mbmi_ext->ref_mv_stack[ref_frame],
5657 : #if CONFIG_EXT_INTER
5658 0 : mbmi_ext->compound_mode_context,
5659 : #endif // CONFIG_EXT_INTER
5660 : candidates, mi_row, mi_col, NULL, NULL,
5661 0 : mbmi_ext->mode_context);
5662 :
5663 : // Candidate refinement carried out at encoder and decoder
5664 0 : av1_find_best_ref_mvs(cm->allow_high_precision_mv, candidates,
5665 0 : &frame_nearest_mv[ref_frame],
5666 0 : &frame_near_mv[ref_frame]);
5667 :
5668 : // Further refinement that is encode side only to test the top few candidates
5669 : // in full and choose the best as the centre point for subsequent searches.
5670 : // The current implementation doesn't support scaling.
5671 : #if CONFIG_CB4X4
5672 0 : av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, ref_frame,
5673 : block_size);
5674 : #else
5675 : if (!av1_is_scaled(sf) && block_size >= BLOCK_8X8)
5676 : av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, ref_frame,
5677 : block_size);
5678 : #endif // CONFIG_CB4X4
5679 0 : }
5680 :
5681 0 : static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
5682 : BLOCK_SIZE bsize, int mi_row, int mi_col,
5683 : #if CONFIG_EXT_INTER
5684 : int ref_idx,
5685 : #endif // CONFIG_EXT_INTER
5686 : int *rate_mv) {
5687 0 : MACROBLOCKD *xd = &x->e_mbd;
5688 0 : const AV1_COMMON *cm = &cpi->common;
5689 0 : MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
5690 0 : struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0, 0, 0, 0 } };
5691 0 : int bestsme = INT_MAX;
5692 : int step_param;
5693 0 : int sadpb = x->sadperbit16;
5694 : MV mvp_full;
5695 : #if CONFIG_EXT_INTER
5696 0 : int ref = mbmi->ref_frame[ref_idx];
5697 : #else
5698 : int ref = mbmi->ref_frame[0];
5699 : int ref_idx = 0;
5700 : #endif // CONFIG_EXT_INTER
5701 0 : MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
5702 :
5703 0 : MvLimits tmp_mv_limits = x->mv_limits;
5704 : int cost_list[5];
5705 :
5706 0 : const YV12_BUFFER_CONFIG *scaled_ref_frame =
5707 : av1_get_scaled_ref_frame(cpi, ref);
5708 :
5709 : MV pred_mv[3];
5710 0 : pred_mv[0] = x->mbmi_ext->ref_mvs[ref][0].as_mv;
5711 0 : pred_mv[1] = x->mbmi_ext->ref_mvs[ref][1].as_mv;
5712 0 : pred_mv[2] = x->pred_mv[ref];
5713 :
5714 0 : if (scaled_ref_frame) {
5715 : int i;
5716 : // Swap out the reference frame for a version that's been scaled to
5717 : // match the resolution of the current frame, allowing the existing
5718 : // motion search code to be used without additional modifications.
5719 0 : for (i = 0; i < MAX_MB_PLANE; i++)
5720 0 : backup_yv12[i] = xd->plane[i].pre[ref_idx];
5721 :
5722 0 : av1_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL);
5723 : }
5724 :
5725 0 : av1_set_mv_search_range(&x->mv_limits, &ref_mv);
5726 :
5727 0 : av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx);
5728 :
5729 : // Work out the size of the first step in the mv step search.
5730 : // 0 here is maximum length first step. 1 is AOMMAX >> 1 etc.
5731 0 : if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
5732 : // Take wtd average of the step_params based on the last frame's
5733 : // max mv magnitude and that based on the best ref mvs of the current
5734 : // block for the given reference.
5735 0 : step_param =
5736 0 : (av1_init_search_range(x->max_mv_context[ref]) + cpi->mv_step_param) /
5737 : 2;
5738 : } else {
5739 0 : step_param = cpi->mv_step_param;
5740 : }
5741 :
5742 0 : if (cpi->sf.adaptive_motion_search && bsize < cm->sb_size) {
5743 0 : int boffset =
5744 0 : 2 * (b_width_log2_lookup[cm->sb_size] -
5745 0 : AOMMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize]));
5746 0 : step_param = AOMMAX(step_param, boffset);
5747 : }
5748 :
5749 0 : if (cpi->sf.adaptive_motion_search) {
5750 0 : int bwl = b_width_log2_lookup[bsize];
5751 0 : int bhl = b_height_log2_lookup[bsize];
5752 0 : int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4);
5753 :
5754 0 : if (tlevel < 5) step_param += 2;
5755 :
5756 : // prev_mv_sad is not setup for dynamically scaled frames.
5757 0 : if (cpi->oxcf.resize_mode != RESIZE_DYNAMIC) {
5758 : int i;
5759 0 : for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) {
5760 0 : if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
5761 0 : x->pred_mv[ref].row = 0;
5762 0 : x->pred_mv[ref].col = 0;
5763 0 : x->best_mv.as_int = INVALID_MV;
5764 :
5765 0 : if (scaled_ref_frame) {
5766 : int j;
5767 0 : for (j = 0; j < MAX_MB_PLANE; ++j)
5768 0 : xd->plane[j].pre[ref_idx] = backup_yv12[j];
5769 : }
5770 0 : return;
5771 : }
5772 : }
5773 : }
5774 : }
5775 :
5776 0 : av1_set_mv_search_range(&x->mv_limits, &ref_mv);
5777 :
5778 : #if CONFIG_MOTION_VAR
5779 0 : if (mbmi->motion_mode != SIMPLE_TRANSLATION)
5780 0 : mvp_full = mbmi->mv[0].as_mv;
5781 : else
5782 : #endif // CONFIG_MOTION_VAR
5783 0 : mvp_full = pred_mv[x->mv_best_ref_index[ref]];
5784 :
5785 0 : mvp_full.col >>= 3;
5786 0 : mvp_full.row >>= 3;
5787 :
5788 0 : x->best_mv.as_int = x->second_best_mv.as_int = INVALID_MV;
5789 :
5790 : #if CONFIG_MOTION_VAR
5791 0 : switch (mbmi->motion_mode) {
5792 : case SIMPLE_TRANSLATION:
5793 : #endif // CONFIG_MOTION_VAR
5794 0 : bestsme = av1_full_pixel_search(cpi, x, bsize, &mvp_full, step_param,
5795 : sadpb, cond_cost_list(cpi, cost_list),
5796 : &ref_mv, INT_MAX, 1);
5797 : #if CONFIG_MOTION_VAR
5798 0 : break;
5799 : case OBMC_CAUSAL:
5800 0 : bestsme = av1_obmc_full_pixel_diamond(
5801 : cpi, x, &mvp_full, step_param, sadpb,
5802 0 : MAX_MVSEARCH_STEPS - 1 - step_param, 1, &cpi->fn_ptr[bsize], &ref_mv,
5803 : &(x->best_mv.as_mv), 0);
5804 0 : break;
5805 : default: assert("Invalid motion mode!\n");
5806 : }
5807 : #endif // CONFIG_MOTION_VAR
5808 :
5809 0 : x->mv_limits = tmp_mv_limits;
5810 :
5811 0 : if (bestsme < INT_MAX) {
5812 : int dis; /* TODO: use dis in distortion calculation later. */
5813 : #if CONFIG_MOTION_VAR
5814 0 : switch (mbmi->motion_mode) {
5815 : case SIMPLE_TRANSLATION:
5816 : #endif // CONFIG_MOTION_VAR
5817 0 : if (cpi->sf.use_upsampled_references) {
5818 : int best_mv_var;
5819 0 : const int try_second = x->second_best_mv.as_int != INVALID_MV &&
5820 0 : x->second_best_mv.as_int != x->best_mv.as_int;
5821 0 : const int pw = block_size_wide[bsize];
5822 0 : const int ph = block_size_high[bsize];
5823 : // Use up-sampled reference frames.
5824 0 : struct macroblockd_plane *const pd = &xd->plane[0];
5825 0 : struct buf_2d backup_pred = pd->pre[ref_idx];
5826 0 : const YV12_BUFFER_CONFIG *upsampled_ref = get_upsampled_ref(cpi, ref);
5827 :
5828 : // Set pred for Y plane
5829 0 : setup_pred_plane(
5830 : &pd->pre[ref_idx], bsize, upsampled_ref->y_buffer,
5831 : upsampled_ref->y_crop_width, upsampled_ref->y_crop_height,
5832 : upsampled_ref->y_stride, (mi_row << 3), (mi_col << 3), NULL,
5833 : pd->subsampling_x, pd->subsampling_y);
5834 :
5835 0 : best_mv_var = cpi->find_fractional_mv_step(
5836 : x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
5837 0 : &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
5838 : cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
5839 : x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL,
5840 : #if CONFIG_EXT_INTER
5841 : NULL, 0, 0,
5842 : #endif
5843 : pw, ph, 1);
5844 :
5845 0 : if (try_second) {
5846 0 : const int minc =
5847 0 : AOMMAX(x->mv_limits.col_min * 8, ref_mv.col - MV_MAX);
5848 0 : const int maxc =
5849 0 : AOMMIN(x->mv_limits.col_max * 8, ref_mv.col + MV_MAX);
5850 0 : const int minr =
5851 0 : AOMMAX(x->mv_limits.row_min * 8, ref_mv.row - MV_MAX);
5852 0 : const int maxr =
5853 0 : AOMMIN(x->mv_limits.row_max * 8, ref_mv.row + MV_MAX);
5854 : int this_var;
5855 0 : MV best_mv = x->best_mv.as_mv;
5856 :
5857 0 : x->best_mv = x->second_best_mv;
5858 0 : if (x->best_mv.as_mv.row * 8 <= maxr &&
5859 0 : x->best_mv.as_mv.row * 8 >= minr &&
5860 0 : x->best_mv.as_mv.col * 8 <= maxc &&
5861 0 : x->best_mv.as_mv.col * 8 >= minc) {
5862 0 : this_var = cpi->find_fractional_mv_step(
5863 : x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
5864 0 : &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
5865 : cpi->sf.mv.subpel_iters_per_step,
5866 : cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost,
5867 : &dis, &x->pred_sse[ref], NULL,
5868 : #if CONFIG_EXT_INTER
5869 : NULL, 0, 0,
5870 : #endif
5871 : pw, ph, 1);
5872 0 : if (this_var < best_mv_var) best_mv = x->best_mv.as_mv;
5873 0 : x->best_mv.as_mv = best_mv;
5874 : }
5875 : }
5876 :
5877 : // Restore the reference frames.
5878 0 : pd->pre[ref_idx] = backup_pred;
5879 : } else {
5880 0 : cpi->find_fractional_mv_step(
5881 : x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
5882 0 : &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
5883 : cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
5884 : x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL,
5885 : #if CONFIG_EXT_INTER
5886 : NULL, 0, 0,
5887 : #endif
5888 : 0, 0, 0);
5889 : }
5890 : #if CONFIG_MOTION_VAR
5891 0 : break;
5892 : case OBMC_CAUSAL:
5893 0 : av1_find_best_obmc_sub_pixel_tree_up(
5894 : cpi, x, mi_row, mi_col, &x->best_mv.as_mv, &ref_mv,
5895 0 : cm->allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize],
5896 : cpi->sf.mv.subpel_force_stop, cpi->sf.mv.subpel_iters_per_step,
5897 : x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], 0,
5898 : cpi->sf.use_upsampled_references);
5899 0 : break;
5900 : default: assert("Invalid motion mode!\n");
5901 : }
5902 : #endif // CONFIG_MOTION_VAR
5903 : }
5904 0 : *rate_mv = av1_mv_bit_cost(&x->best_mv.as_mv, &ref_mv, x->nmvjointcost,
5905 : x->mvcost, MV_COST_WEIGHT);
5906 :
5907 : #if CONFIG_MOTION_VAR
5908 0 : if (cpi->sf.adaptive_motion_search && mbmi->motion_mode == SIMPLE_TRANSLATION)
5909 : #else
5910 : if (cpi->sf.adaptive_motion_search)
5911 : #endif // CONFIG_MOTION_VAR
5912 0 : x->pred_mv[ref] = x->best_mv.as_mv;
5913 :
5914 0 : if (scaled_ref_frame) {
5915 : int i;
5916 0 : for (i = 0; i < MAX_MB_PLANE; i++)
5917 0 : xd->plane[i].pre[ref_idx] = backup_yv12[i];
5918 : }
5919 : }
5920 :
5921 0 : static INLINE void restore_dst_buf(MACROBLOCKD *xd, BUFFER_SET dst) {
5922 : int i;
5923 0 : for (i = 0; i < MAX_MB_PLANE; i++) {
5924 0 : xd->plane[i].dst.buf = dst.plane[i];
5925 0 : xd->plane[i].dst.stride = dst.stride[i];
5926 : }
5927 0 : }
5928 :
5929 : #if CONFIG_EXT_INTER
5930 0 : static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x,
5931 : BLOCK_SIZE bsize, const MV *other_mv,
5932 : int mi_row, int mi_col, const int block,
5933 : int ref_idx, uint8_t *second_pred) {
5934 0 : const AV1_COMMON *const cm = &cpi->common;
5935 0 : const int pw = block_size_wide[bsize];
5936 0 : const int ph = block_size_high[bsize];
5937 0 : MACROBLOCKD *xd = &x->e_mbd;
5938 0 : MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
5939 0 : const int other_ref = mbmi->ref_frame[!ref_idx];
5940 : #if CONFIG_DUAL_FILTER
5941 0 : InterpFilter interp_filter[2] = {
5942 0 : (ref_idx == 0) ? mbmi->interp_filter[2] : mbmi->interp_filter[0],
5943 0 : (ref_idx == 0) ? mbmi->interp_filter[3] : mbmi->interp_filter[1]
5944 : };
5945 : #else
5946 : const InterpFilter interp_filter = mbmi->interp_filter;
5947 : #endif // CONFIG_DUAL_FILTER
5948 : struct scale_factors sf;
5949 : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
5950 0 : struct macroblockd_plane *const pd = &xd->plane[0];
5951 : // ic and ir are the 4x4 coordiantes of the sub8x8 at index "block"
5952 0 : const int ic = block & 1;
5953 0 : const int ir = (block - ic) >> 1;
5954 0 : const int p_col = ((mi_col * MI_SIZE) >> pd->subsampling_x) + 4 * ic;
5955 0 : const int p_row = ((mi_row * MI_SIZE) >> pd->subsampling_y) + 4 * ir;
5956 : #if CONFIG_GLOBAL_MOTION
5957 0 : WarpedMotionParams *const wm = &xd->global_motion[other_ref];
5958 0 : int is_global = is_global_mv_block(xd->mi[0], block, wm->wmtype);
5959 : #endif // CONFIG_GLOBAL_MOTION
5960 : #else
5961 : (void)block;
5962 : #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
5963 :
5964 : // This function should only ever be called for compound modes
5965 0 : assert(has_second_ref(mbmi));
5966 :
5967 : struct buf_2d backup_yv12[MAX_MB_PLANE];
5968 0 : const YV12_BUFFER_CONFIG *const scaled_ref_frame =
5969 : av1_get_scaled_ref_frame(cpi, other_ref);
5970 :
5971 0 : if (scaled_ref_frame) {
5972 : int i;
5973 : // Swap out the reference frame for a version that's been scaled to
5974 : // match the resolution of the current frame, allowing the existing
5975 : // motion search code to be used without additional modifications.
5976 0 : for (i = 0; i < MAX_MB_PLANE; i++)
5977 0 : backup_yv12[i] = xd->plane[i].pre[!ref_idx];
5978 0 : av1_setup_pre_planes(xd, !ref_idx, scaled_ref_frame, mi_row, mi_col, NULL);
5979 : }
5980 :
5981 : // Since we have scaled the reference frames to match the size of the current
5982 : // frame we must use a unit scaling factor during mode selection.
5983 : #if CONFIG_HIGHBITDEPTH
5984 0 : av1_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width,
5985 : cm->height, cm->use_highbitdepth);
5986 : #else
5987 : av1_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width,
5988 : cm->height);
5989 : #endif // CONFIG_HIGHBITDEPTH
5990 :
5991 : struct buf_2d ref_yv12;
5992 :
5993 0 : const int plane = 0;
5994 0 : ConvolveParams conv_params = get_conv_params(0, plane);
5995 : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
5996 : WarpTypesAllowed warp_types;
5997 : #if CONFIG_GLOBAL_MOTION
5998 0 : warp_types.global_warp_allowed = is_global;
5999 : #endif // CONFIG_GLOBAL_MOTION
6000 : #if CONFIG_WARPED_MOTION
6001 0 : warp_types.local_warp_allowed = mbmi->motion_mode == WARPED_CAUSAL;
6002 : #endif // CONFIG_WARPED_MOTION
6003 : #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
6004 :
6005 : // Initialized here because of compiler problem in Visual Studio.
6006 0 : ref_yv12 = xd->plane[plane].pre[!ref_idx];
6007 :
6008 : // Get the prediction block from the 'other' reference frame.
6009 : #if CONFIG_HIGHBITDEPTH
6010 0 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
6011 0 : av1_highbd_build_inter_predictor(
6012 0 : ref_yv12.buf, ref_yv12.stride, second_pred, pw, other_mv, &sf, pw, ph,
6013 : 0, interp_filter,
6014 : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
6015 : &warp_types, p_col, p_row,
6016 : #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
6017 : plane, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE, xd);
6018 : } else {
6019 : #endif // CONFIG_HIGHBITDEPTH
6020 0 : av1_build_inter_predictor(
6021 0 : ref_yv12.buf, ref_yv12.stride, second_pred, pw, other_mv, &sf, pw, ph,
6022 : &conv_params, interp_filter,
6023 : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
6024 : &warp_types, p_col, p_row, plane, !ref_idx,
6025 : #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
6026 : MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE, xd);
6027 : #if CONFIG_HIGHBITDEPTH
6028 : }
6029 : #endif // CONFIG_HIGHBITDEPTH
6030 :
6031 0 : if (scaled_ref_frame) {
6032 : // Restore the prediction frame pointers to their unscaled versions.
6033 : int i;
6034 0 : for (i = 0; i < MAX_MB_PLANE; i++)
6035 0 : xd->plane[i].pre[!ref_idx] = backup_yv12[i];
6036 : }
6037 0 : }
6038 :
6039 : // Search for the best mv for one component of a compound,
6040 : // given that the other component is fixed.
6041 0 : static void compound_single_motion_search(
6042 : const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, MV *this_mv,
6043 : int mi_row, int mi_col, const uint8_t *second_pred, const uint8_t *mask,
6044 : int mask_stride, int *rate_mv, const int block, int ref_idx) {
6045 0 : const int pw = block_size_wide[bsize];
6046 0 : const int ph = block_size_high[bsize];
6047 0 : MACROBLOCKD *xd = &x->e_mbd;
6048 0 : MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
6049 0 : const int ref = mbmi->ref_frame[ref_idx];
6050 0 : int_mv ref_mv = x->mbmi_ext->ref_mvs[ref][0];
6051 0 : struct macroblockd_plane *const pd = &xd->plane[0];
6052 :
6053 : struct buf_2d backup_yv12[MAX_MB_PLANE];
6054 0 : const YV12_BUFFER_CONFIG *const scaled_ref_frame =
6055 : av1_get_scaled_ref_frame(cpi, ref);
6056 :
6057 : // Check that this is either an interinter or an interintra block
6058 0 : assert(has_second_ref(mbmi) ||
6059 : (ref_idx == 0 && mbmi->ref_frame[1] == INTRA_FRAME));
6060 :
6061 0 : if (scaled_ref_frame) {
6062 : int i;
6063 : // Swap out the reference frame for a version that's been scaled to
6064 : // match the resolution of the current frame, allowing the existing
6065 : // motion search code to be used without additional modifications.
6066 0 : for (i = 0; i < MAX_MB_PLANE; i++)
6067 0 : backup_yv12[i] = xd->plane[i].pre[ref_idx];
6068 0 : av1_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL);
6069 : }
6070 :
6071 : struct buf_2d orig_yv12;
6072 0 : int bestsme = INT_MAX;
6073 0 : int sadpb = x->sadperbit16;
6074 0 : MV *const best_mv = &x->best_mv.as_mv;
6075 0 : int search_range = 3;
6076 :
6077 0 : MvLimits tmp_mv_limits = x->mv_limits;
6078 :
6079 : // Initialized here because of compiler problem in Visual Studio.
6080 0 : if (ref_idx) {
6081 0 : orig_yv12 = pd->pre[0];
6082 0 : pd->pre[0] = pd->pre[ref_idx];
6083 : }
6084 :
6085 : // Do compound motion search on the current reference frame.
6086 0 : av1_set_mv_search_range(&x->mv_limits, &ref_mv.as_mv);
6087 :
6088 : // Use the mv result from the single mode as mv predictor.
6089 0 : *best_mv = *this_mv;
6090 :
6091 0 : best_mv->col >>= 3;
6092 0 : best_mv->row >>= 3;
6093 :
6094 0 : av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx);
6095 :
6096 : // Small-range full-pixel motion search.
6097 0 : bestsme = av1_refining_search_8p_c(x, sadpb, search_range,
6098 0 : &cpi->fn_ptr[bsize], mask, mask_stride,
6099 : ref_idx, &ref_mv.as_mv, second_pred);
6100 0 : if (bestsme < INT_MAX) {
6101 0 : if (mask)
6102 0 : bestsme =
6103 0 : av1_get_mvpred_mask_var(x, best_mv, &ref_mv.as_mv, second_pred, mask,
6104 0 : mask_stride, ref_idx, &cpi->fn_ptr[bsize], 1);
6105 : else
6106 0 : bestsme = av1_get_mvpred_av_var(x, best_mv, &ref_mv.as_mv, second_pred,
6107 0 : &cpi->fn_ptr[bsize], 1);
6108 : }
6109 :
6110 0 : x->mv_limits = tmp_mv_limits;
6111 :
6112 0 : if (bestsme < INT_MAX) {
6113 : int dis; /* TODO: use dis in distortion calculation later. */
6114 : unsigned int sse;
6115 0 : if (cpi->sf.use_upsampled_references) {
6116 : // Use up-sampled reference frames.
6117 0 : struct buf_2d backup_pred = pd->pre[0];
6118 0 : const YV12_BUFFER_CONFIG *upsampled_ref = get_upsampled_ref(cpi, ref);
6119 :
6120 : // Set pred for Y plane
6121 0 : setup_pred_plane(&pd->pre[0], bsize, upsampled_ref->y_buffer,
6122 : upsampled_ref->y_crop_width,
6123 : upsampled_ref->y_crop_height, upsampled_ref->y_stride,
6124 : (mi_row << 3), (mi_col << 3), NULL, pd->subsampling_x,
6125 : pd->subsampling_y);
6126 :
6127 : // If bsize < BLOCK_8X8, adjust pred pointer for this block
6128 : #if !CONFIG_CB4X4
6129 : if (bsize < BLOCK_8X8)
6130 : pd->pre[0].buf =
6131 : &pd->pre[0].buf[(av1_raster_block_offset(BLOCK_8X8, block,
6132 : pd->pre[0].stride))
6133 : << 3];
6134 : #endif // !CONFIG_CB4X4
6135 :
6136 0 : bestsme = cpi->find_fractional_mv_step(
6137 : x, &ref_mv.as_mv, cpi->common.allow_high_precision_mv, x->errorperbit,
6138 0 : &cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_iters_per_step, NULL,
6139 : x->nmvjointcost, x->mvcost, &dis, &sse, second_pred, mask,
6140 : mask_stride, ref_idx, pw, ph, 1);
6141 :
6142 : // Restore the reference frames.
6143 0 : pd->pre[0] = backup_pred;
6144 : } else {
6145 : (void)block;
6146 0 : bestsme = cpi->find_fractional_mv_step(
6147 : x, &ref_mv.as_mv, cpi->common.allow_high_precision_mv, x->errorperbit,
6148 0 : &cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_iters_per_step, NULL,
6149 : x->nmvjointcost, x->mvcost, &dis, &sse, second_pred, mask,
6150 : mask_stride, ref_idx, pw, ph, 0);
6151 : }
6152 : }
6153 :
6154 : // Restore the pointer to the first (possibly scaled) prediction buffer.
6155 0 : if (ref_idx) pd->pre[0] = orig_yv12;
6156 :
6157 0 : if (bestsme < INT_MAX) *this_mv = *best_mv;
6158 :
6159 0 : *rate_mv = 0;
6160 :
6161 0 : if (scaled_ref_frame) {
6162 : // Restore the prediction frame pointers to their unscaled versions.
6163 : int i;
6164 0 : for (i = 0; i < MAX_MB_PLANE; i++)
6165 0 : xd->plane[i].pre[ref_idx] = backup_yv12[i];
6166 : }
6167 :
6168 0 : av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx);
6169 0 : *rate_mv += av1_mv_bit_cost(this_mv, &ref_mv.as_mv, x->nmvjointcost,
6170 : x->mvcost, MV_COST_WEIGHT);
6171 0 : }
6172 :
6173 : // Wrapper for compound_single_motion_search, for the common case
6174 : // where the second prediction is also an inter mode.
6175 0 : static void compound_single_motion_search_interinter(
6176 : const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int_mv *frame_mv,
6177 : int mi_row, int mi_col, const uint8_t *mask, int mask_stride, int *rate_mv,
6178 : const int block, int ref_idx) {
6179 0 : MACROBLOCKD *xd = &x->e_mbd;
6180 0 : MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
6181 :
6182 : // This function should only ever be called for compound modes
6183 0 : assert(has_second_ref(mbmi));
6184 :
6185 : // Prediction buffer from second frame.
6186 : #if CONFIG_HIGHBITDEPTH
6187 : DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[MAX_SB_SQUARE]);
6188 : uint8_t *second_pred;
6189 0 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
6190 0 : second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16);
6191 : else
6192 0 : second_pred = (uint8_t *)second_pred_alloc_16;
6193 : #else
6194 : DECLARE_ALIGNED(16, uint8_t, second_pred[MAX_SB_SQUARE]);
6195 : #endif // CONFIG_HIGHBITDEPTH
6196 :
6197 0 : MV *this_mv = &frame_mv[mbmi->ref_frame[ref_idx]].as_mv;
6198 0 : const MV *other_mv = &frame_mv[mbmi->ref_frame[!ref_idx]].as_mv;
6199 :
6200 0 : build_second_inter_pred(cpi, x, bsize, other_mv, mi_row, mi_col, block,
6201 : ref_idx, second_pred);
6202 :
6203 0 : compound_single_motion_search(cpi, x, bsize, this_mv, mi_row, mi_col,
6204 : second_pred, mask, mask_stride, rate_mv, block,
6205 : ref_idx);
6206 0 : }
6207 :
6208 : #if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
6209 0 : static void do_masked_motion_search_indexed(
6210 : const AV1_COMP *const cpi, MACROBLOCK *x, const int_mv *const cur_mv,
6211 : const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE bsize,
6212 : int mi_row, int mi_col, int_mv *tmp_mv, int *rate_mv, int which) {
6213 : // NOTE: which values: 0 - 0 only, 1 - 1 only, 2 - both
6214 0 : MACROBLOCKD *xd = &x->e_mbd;
6215 0 : MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
6216 0 : BLOCK_SIZE sb_type = mbmi->sb_type;
6217 : const uint8_t *mask;
6218 0 : const int mask_stride = block_size_wide[bsize];
6219 :
6220 0 : mask = av1_get_compound_type_mask(comp_data, sb_type);
6221 :
6222 : int_mv frame_mv[TOTAL_REFS_PER_FRAME];
6223 0 : MV_REFERENCE_FRAME rf[2] = { mbmi->ref_frame[0], mbmi->ref_frame[1] };
6224 : assert(bsize >= BLOCK_8X8 || CONFIG_CB4X4);
6225 :
6226 0 : frame_mv[rf[0]].as_int = cur_mv[0].as_int;
6227 0 : frame_mv[rf[1]].as_int = cur_mv[1].as_int;
6228 0 : if (which == 0 || which == 1) {
6229 0 : compound_single_motion_search_interinter(cpi, x, bsize, frame_mv, mi_row,
6230 : mi_col, mask, mask_stride, rate_mv,
6231 : 0, which);
6232 0 : } else if (which == 2) {
6233 0 : joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, mask,
6234 : mask_stride, rate_mv, 0);
6235 : }
6236 0 : tmp_mv[0].as_int = frame_mv[rf[0]].as_int;
6237 0 : tmp_mv[1].as_int = frame_mv[rf[1]].as_int;
6238 0 : }
6239 : #endif // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
6240 : #endif // CONFIG_EXT_INTER
6241 :
6242 : // In some situations we want to discount tha pparent cost of a new motion
6243 : // vector. Where there is a subtle motion field and especially where there is
6244 : // low spatial complexity then it can be hard to cover the cost of a new motion
6245 : // vector in a single block, even if that motion vector reduces distortion.
6246 : // However, once established that vector may be usable through the nearest and
6247 : // near mv modes to reduce distortion in subsequent blocks and also improve
6248 : // visual quality.
6249 0 : static int discount_newmv_test(const AV1_COMP *const cpi, int this_mode,
6250 : int_mv this_mv,
6251 : int_mv (*mode_mv)[TOTAL_REFS_PER_FRAME],
6252 : int ref_frame) {
6253 0 : return (!cpi->rc.is_src_frame_alt_ref && (this_mode == NEWMV) &&
6254 0 : (this_mv.as_int != 0) &&
6255 0 : ((mode_mv[NEARESTMV][ref_frame].as_int == 0) ||
6256 0 : (mode_mv[NEARESTMV][ref_frame].as_int == INVALID_MV)) &&
6257 0 : ((mode_mv[NEARMV][ref_frame].as_int == 0) ||
6258 0 : (mode_mv[NEARMV][ref_frame].as_int == INVALID_MV)));
6259 : }
6260 :
6261 : #define LEFT_TOP_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
6262 : #define RIGHT_BOTTOM_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
6263 :
6264 : // TODO(jingning): this mv clamping function should be block size dependent.
6265 0 : static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
6266 0 : clamp_mv(mv, xd->mb_to_left_edge - LEFT_TOP_MARGIN,
6267 0 : xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
6268 0 : xd->mb_to_top_edge - LEFT_TOP_MARGIN,
6269 0 : xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
6270 0 : }
6271 :
6272 : #if CONFIG_EXT_INTER
6273 : #if CONFIG_WEDGE
6274 0 : static int estimate_wedge_sign(const AV1_COMP *cpi, const MACROBLOCK *x,
6275 : const BLOCK_SIZE bsize, const uint8_t *pred0,
6276 : int stride0, const uint8_t *pred1, int stride1) {
6277 0 : const struct macroblock_plane *const p = &x->plane[0];
6278 0 : const uint8_t *src = p->src.buf;
6279 0 : int src_stride = p->src.stride;
6280 0 : const int f_index = bsize - BLOCK_8X8;
6281 0 : const int bw = block_size_wide[bsize];
6282 0 : const int bh = block_size_high[bsize];
6283 : uint32_t esq[2][4];
6284 : int64_t tl, br;
6285 :
6286 : #if CONFIG_HIGHBITDEPTH
6287 0 : if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
6288 0 : pred0 = CONVERT_TO_BYTEPTR(pred0);
6289 0 : pred1 = CONVERT_TO_BYTEPTR(pred1);
6290 : }
6291 : #endif // CONFIG_HIGHBITDEPTH
6292 :
6293 0 : cpi->fn_ptr[f_index].vf(src, src_stride, pred0, stride0, &esq[0][0]);
6294 0 : cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, pred0 + bw / 2, stride0,
6295 : &esq[0][1]);
6296 0 : cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride, src_stride,
6297 0 : pred0 + bh / 2 * stride0, stride0, &esq[0][2]);
6298 0 : cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride + bw / 2, src_stride,
6299 0 : pred0 + bh / 2 * stride0 + bw / 2, stride0,
6300 : &esq[0][3]);
6301 0 : cpi->fn_ptr[f_index].vf(src, src_stride, pred1, stride1, &esq[1][0]);
6302 0 : cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, pred1 + bw / 2, stride1,
6303 : &esq[1][1]);
6304 0 : cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride, src_stride,
6305 0 : pred1 + bh / 2 * stride1, stride0, &esq[1][2]);
6306 0 : cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride + bw / 2, src_stride,
6307 0 : pred1 + bh / 2 * stride1 + bw / 2, stride0,
6308 : &esq[1][3]);
6309 :
6310 0 : tl = (int64_t)(esq[0][0] + esq[0][1] + esq[0][2]) -
6311 0 : (int64_t)(esq[1][0] + esq[1][1] + esq[1][2]);
6312 0 : br = (int64_t)(esq[1][3] + esq[1][1] + esq[1][2]) -
6313 0 : (int64_t)(esq[0][3] + esq[0][1] + esq[0][2]);
6314 0 : return (tl + br > 0);
6315 : }
6316 : #endif // CONFIG_WEDGE
6317 : #endif // CONFIG_EXT_INTER
6318 :
6319 : #if !CONFIG_DUAL_FILTER
6320 : static InterpFilter predict_interp_filter(
6321 : const AV1_COMP *cpi, const MACROBLOCK *x, const BLOCK_SIZE bsize,
6322 : const int mi_row, const int mi_col,
6323 : InterpFilter (*single_filter)[TOTAL_REFS_PER_FRAME]) {
6324 : InterpFilter best_filter = SWITCHABLE;
6325 : const AV1_COMMON *cm = &cpi->common;
6326 : const MACROBLOCKD *xd = &x->e_mbd;
6327 : int bsl = mi_width_log2_lookup[bsize];
6328 : int pred_filter_search =
6329 : cpi->sf.cb_pred_filter_search
6330 : ? (((mi_row + mi_col) >> bsl) +
6331 : get_chessboard_index(cm->current_video_frame)) &
6332 : 0x1
6333 : : 0;
6334 : MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
6335 : const int is_comp_pred = has_second_ref(mbmi);
6336 : const int this_mode = mbmi->mode;
6337 : int refs[2] = { mbmi->ref_frame[0],
6338 : (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
6339 : if (pred_filter_search) {
6340 : InterpFilter af = SWITCHABLE, lf = SWITCHABLE;
6341 : if (xd->up_available) af = xd->mi[-xd->mi_stride]->mbmi.interp_filter;
6342 : if (xd->left_available) lf = xd->mi[-1]->mbmi.interp_filter;
6343 :
6344 : #if CONFIG_EXT_INTER
6345 : if ((this_mode != NEWMV && this_mode != NEW_NEWMV) || (af == lf))
6346 : #else
6347 : if ((this_mode != NEWMV) || (af == lf))
6348 : #endif // CONFIG_EXT_INTER
6349 : best_filter = af;
6350 : }
6351 : if (is_comp_pred) {
6352 : if (cpi->sf.adaptive_mode_search) {
6353 : #if CONFIG_EXT_INTER
6354 : switch (this_mode) {
6355 : case NEAREST_NEARESTMV:
6356 : if (single_filter[NEARESTMV][refs[0]] ==
6357 : single_filter[NEARESTMV][refs[1]])
6358 : best_filter = single_filter[NEARESTMV][refs[0]];
6359 : break;
6360 : case NEAR_NEARMV:
6361 : if (single_filter[NEARMV][refs[0]] == single_filter[NEARMV][refs[1]])
6362 : best_filter = single_filter[NEARMV][refs[0]];
6363 : break;
6364 : case ZERO_ZEROMV:
6365 : if (single_filter[ZEROMV][refs[0]] == single_filter[ZEROMV][refs[1]])
6366 : best_filter = single_filter[ZEROMV][refs[0]];
6367 : break;
6368 : case NEW_NEWMV:
6369 : if (single_filter[NEWMV][refs[0]] == single_filter[NEWMV][refs[1]])
6370 : best_filter = single_filter[NEWMV][refs[0]];
6371 : break;
6372 : case NEAREST_NEWMV:
6373 : if (single_filter[NEARESTMV][refs[0]] ==
6374 : single_filter[NEWMV][refs[1]])
6375 : best_filter = single_filter[NEARESTMV][refs[0]];
6376 : break;
6377 : case NEAR_NEWMV:
6378 : if (single_filter[NEARMV][refs[0]] == single_filter[NEWMV][refs[1]])
6379 : best_filter = single_filter[NEARMV][refs[0]];
6380 : break;
6381 : case NEW_NEARESTMV:
6382 : if (single_filter[NEWMV][refs[0]] ==
6383 : single_filter[NEARESTMV][refs[1]])
6384 : best_filter = single_filter[NEWMV][refs[0]];
6385 : break;
6386 : case NEW_NEARMV:
6387 : if (single_filter[NEWMV][refs[0]] == single_filter[NEARMV][refs[1]])
6388 : best_filter = single_filter[NEWMV][refs[0]];
6389 : break;
6390 : default:
6391 : if (single_filter[this_mode][refs[0]] ==
6392 : single_filter[this_mode][refs[1]])
6393 : best_filter = single_filter[this_mode][refs[0]];
6394 : break;
6395 : }
6396 : #else
6397 : if (single_filter[this_mode][refs[0]] ==
6398 : single_filter[this_mode][refs[1]])
6399 : best_filter = single_filter[this_mode][refs[0]];
6400 : #endif // CONFIG_EXT_INTER
6401 : }
6402 : }
6403 : if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
6404 : best_filter = EIGHTTAP_REGULAR;
6405 : }
6406 : return best_filter;
6407 : }
6408 : #endif // !CONFIG_DUAL_FILTER
6409 :
6410 : #if CONFIG_EXT_INTER
6411 : // Choose the best wedge index and sign
6412 : #if CONFIG_WEDGE
6413 0 : static int64_t pick_wedge(const AV1_COMP *const cpi, const MACROBLOCK *const x,
6414 : const BLOCK_SIZE bsize, const uint8_t *const p0,
6415 : const uint8_t *const p1, int *const best_wedge_sign,
6416 : int *const best_wedge_index) {
6417 0 : const MACROBLOCKD *const xd = &x->e_mbd;
6418 0 : const struct buf_2d *const src = &x->plane[0].src;
6419 0 : const int bw = block_size_wide[bsize];
6420 0 : const int bh = block_size_high[bsize];
6421 0 : const int N = bw * bh;
6422 : int rate;
6423 : int64_t dist;
6424 0 : int64_t rd, best_rd = INT64_MAX;
6425 : int wedge_index;
6426 : int wedge_sign;
6427 0 : int wedge_types = (1 << get_wedge_bits_lookup(bsize));
6428 : const uint8_t *mask;
6429 : uint64_t sse;
6430 : #if CONFIG_HIGHBITDEPTH
6431 0 : const int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
6432 0 : const int bd_round = hbd ? (xd->bd - 8) * 2 : 0;
6433 : #else
6434 : const int bd_round = 0;
6435 : #endif // CONFIG_HIGHBITDEPTH
6436 :
6437 : DECLARE_ALIGNED(32, int16_t, r0[MAX_SB_SQUARE]);
6438 : DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]);
6439 : DECLARE_ALIGNED(32, int16_t, d10[MAX_SB_SQUARE]);
6440 : DECLARE_ALIGNED(32, int16_t, ds[MAX_SB_SQUARE]);
6441 :
6442 : int64_t sign_limit;
6443 :
6444 : #if CONFIG_HIGHBITDEPTH
6445 0 : if (hbd) {
6446 0 : aom_highbd_subtract_block(bh, bw, r0, bw, src->buf, src->stride,
6447 0 : CONVERT_TO_BYTEPTR(p0), bw, xd->bd);
6448 0 : aom_highbd_subtract_block(bh, bw, r1, bw, src->buf, src->stride,
6449 0 : CONVERT_TO_BYTEPTR(p1), bw, xd->bd);
6450 0 : aom_highbd_subtract_block(bh, bw, d10, bw, CONVERT_TO_BYTEPTR(p1), bw,
6451 0 : CONVERT_TO_BYTEPTR(p0), bw, xd->bd);
6452 : } else // NOLINT
6453 : #endif // CONFIG_HIGHBITDEPTH
6454 : {
6455 0 : aom_subtract_block(bh, bw, r0, bw, src->buf, src->stride, p0, bw);
6456 0 : aom_subtract_block(bh, bw, r1, bw, src->buf, src->stride, p1, bw);
6457 0 : aom_subtract_block(bh, bw, d10, bw, p1, bw, p0, bw);
6458 : }
6459 :
6460 0 : sign_limit = ((int64_t)aom_sum_squares_i16(r0, N) -
6461 0 : (int64_t)aom_sum_squares_i16(r1, N)) *
6462 : (1 << WEDGE_WEIGHT_BITS) / 2;
6463 :
6464 0 : if (N < 64)
6465 0 : av1_wedge_compute_delta_squares_c(ds, r0, r1, N);
6466 : else
6467 0 : av1_wedge_compute_delta_squares(ds, r0, r1, N);
6468 :
6469 0 : for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
6470 0 : mask = av1_get_contiguous_soft_mask(wedge_index, 0, bsize);
6471 :
6472 : // TODO(jingning): Make sse2 functions support N = 16 case
6473 0 : if (N < 64)
6474 0 : wedge_sign = av1_wedge_sign_from_residuals_c(ds, mask, N, sign_limit);
6475 : else
6476 0 : wedge_sign = av1_wedge_sign_from_residuals(ds, mask, N, sign_limit);
6477 :
6478 0 : mask = av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
6479 0 : if (N < 64)
6480 0 : sse = av1_wedge_sse_from_residuals_c(r1, d10, mask, N);
6481 : else
6482 0 : sse = av1_wedge_sse_from_residuals(r1, d10, mask, N);
6483 0 : sse = ROUND_POWER_OF_TWO(sse, bd_round);
6484 :
6485 0 : model_rd_from_sse(cpi, xd, bsize, 0, sse, &rate, &dist);
6486 0 : rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
6487 :
6488 0 : if (rd < best_rd) {
6489 0 : *best_wedge_index = wedge_index;
6490 0 : *best_wedge_sign = wedge_sign;
6491 0 : best_rd = rd;
6492 : }
6493 : }
6494 :
6495 0 : return best_rd;
6496 : }
6497 :
6498 : // Choose the best wedge index the specified sign
6499 0 : static int64_t pick_wedge_fixed_sign(
6500 : const AV1_COMP *const cpi, const MACROBLOCK *const x,
6501 : const BLOCK_SIZE bsize, const uint8_t *const p0, const uint8_t *const p1,
6502 : const int wedge_sign, int *const best_wedge_index) {
6503 0 : const MACROBLOCKD *const xd = &x->e_mbd;
6504 0 : const struct buf_2d *const src = &x->plane[0].src;
6505 0 : const int bw = block_size_wide[bsize];
6506 0 : const int bh = block_size_high[bsize];
6507 0 : const int N = bw * bh;
6508 : int rate;
6509 : int64_t dist;
6510 0 : int64_t rd, best_rd = INT64_MAX;
6511 : int wedge_index;
6512 0 : int wedge_types = (1 << get_wedge_bits_lookup(bsize));
6513 : const uint8_t *mask;
6514 : uint64_t sse;
6515 : #if CONFIG_HIGHBITDEPTH
6516 0 : const int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
6517 0 : const int bd_round = hbd ? (xd->bd - 8) * 2 : 0;
6518 : #else
6519 : const int bd_round = 0;
6520 : #endif // CONFIG_HIGHBITDEPTH
6521 :
6522 : DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]);
6523 : DECLARE_ALIGNED(32, int16_t, d10[MAX_SB_SQUARE]);
6524 :
6525 : #if CONFIG_HIGHBITDEPTH
6526 0 : if (hbd) {
6527 0 : aom_highbd_subtract_block(bh, bw, r1, bw, src->buf, src->stride,
6528 0 : CONVERT_TO_BYTEPTR(p1), bw, xd->bd);
6529 0 : aom_highbd_subtract_block(bh, bw, d10, bw, CONVERT_TO_BYTEPTR(p1), bw,
6530 0 : CONVERT_TO_BYTEPTR(p0), bw, xd->bd);
6531 : } else // NOLINT
6532 : #endif // CONFIG_HIGHBITDEPTH
6533 : {
6534 0 : aom_subtract_block(bh, bw, r1, bw, src->buf, src->stride, p1, bw);
6535 0 : aom_subtract_block(bh, bw, d10, bw, p1, bw, p0, bw);
6536 : }
6537 :
6538 0 : for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
6539 0 : mask = av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
6540 0 : if (N < 64)
6541 0 : sse = av1_wedge_sse_from_residuals_c(r1, d10, mask, N);
6542 : else
6543 0 : sse = av1_wedge_sse_from_residuals(r1, d10, mask, N);
6544 0 : sse = ROUND_POWER_OF_TWO(sse, bd_round);
6545 :
6546 0 : model_rd_from_sse(cpi, xd, bsize, 0, sse, &rate, &dist);
6547 0 : rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
6548 :
6549 0 : if (rd < best_rd) {
6550 0 : *best_wedge_index = wedge_index;
6551 0 : best_rd = rd;
6552 : }
6553 : }
6554 :
6555 0 : return best_rd;
6556 : }
6557 :
6558 0 : static int64_t pick_interinter_wedge(const AV1_COMP *const cpi,
6559 : MACROBLOCK *const x,
6560 : const BLOCK_SIZE bsize,
6561 : const uint8_t *const p0,
6562 : const uint8_t *const p1) {
6563 0 : MACROBLOCKD *const xd = &x->e_mbd;
6564 0 : MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
6565 0 : const int bw = block_size_wide[bsize];
6566 :
6567 : int64_t rd;
6568 0 : int wedge_index = -1;
6569 0 : int wedge_sign = 0;
6570 :
6571 0 : assert(is_interinter_compound_used(COMPOUND_WEDGE, bsize));
6572 0 : assert(cpi->common.allow_masked_compound);
6573 :
6574 0 : if (cpi->sf.fast_wedge_sign_estimate) {
6575 0 : wedge_sign = estimate_wedge_sign(cpi, x, bsize, p0, bw, p1, bw);
6576 0 : rd = pick_wedge_fixed_sign(cpi, x, bsize, p0, p1, wedge_sign, &wedge_index);
6577 : } else {
6578 0 : rd = pick_wedge(cpi, x, bsize, p0, p1, &wedge_sign, &wedge_index);
6579 : }
6580 :
6581 0 : mbmi->wedge_sign = wedge_sign;
6582 0 : mbmi->wedge_index = wedge_index;
6583 0 : return rd;
6584 : }
6585 : #endif // CONFIG_WEDGE
6586 :
6587 : #if CONFIG_COMPOUND_SEGMENT
6588 0 : static int64_t pick_interinter_seg(const AV1_COMP *const cpi,
6589 : MACROBLOCK *const x, const BLOCK_SIZE bsize,
6590 : const uint8_t *const p0,
6591 : const uint8_t *const p1) {
6592 0 : MACROBLOCKD *const xd = &x->e_mbd;
6593 0 : MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
6594 0 : const struct buf_2d *const src = &x->plane[0].src;
6595 0 : const int bw = block_size_wide[bsize];
6596 0 : const int bh = block_size_high[bsize];
6597 0 : const int N = bw * bh;
6598 : int rate;
6599 : uint64_t sse;
6600 : int64_t dist;
6601 : int64_t rd0;
6602 : SEG_MASK_TYPE cur_mask_type;
6603 0 : int64_t best_rd = INT64_MAX;
6604 0 : SEG_MASK_TYPE best_mask_type = 0;
6605 : #if CONFIG_HIGHBITDEPTH
6606 0 : const int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
6607 0 : const int bd_round = hbd ? (xd->bd - 8) * 2 : 0;
6608 : #else
6609 : const int bd_round = 0;
6610 : #endif // CONFIG_HIGHBITDEPTH
6611 : DECLARE_ALIGNED(32, int16_t, r0[MAX_SB_SQUARE]);
6612 : DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]);
6613 : DECLARE_ALIGNED(32, int16_t, d10[MAX_SB_SQUARE]);
6614 :
6615 : #if CONFIG_HIGHBITDEPTH
6616 0 : if (hbd) {
6617 0 : aom_highbd_subtract_block(bh, bw, r0, bw, src->buf, src->stride,
6618 0 : CONVERT_TO_BYTEPTR(p0), bw, xd->bd);
6619 0 : aom_highbd_subtract_block(bh, bw, r1, bw, src->buf, src->stride,
6620 0 : CONVERT_TO_BYTEPTR(p1), bw, xd->bd);
6621 0 : aom_highbd_subtract_block(bh, bw, d10, bw, CONVERT_TO_BYTEPTR(p1), bw,
6622 0 : CONVERT_TO_BYTEPTR(p0), bw, xd->bd);
6623 : } else // NOLINT
6624 : #endif // CONFIG_HIGHBITDEPTH
6625 : {
6626 0 : aom_subtract_block(bh, bw, r0, bw, src->buf, src->stride, p0, bw);
6627 0 : aom_subtract_block(bh, bw, r1, bw, src->buf, src->stride, p1, bw);
6628 0 : aom_subtract_block(bh, bw, d10, bw, p1, bw, p0, bw);
6629 : }
6630 :
6631 : // try each mask type and its inverse
6632 0 : for (cur_mask_type = 0; cur_mask_type < SEG_MASK_TYPES; cur_mask_type++) {
6633 : // build mask and inverse
6634 : #if CONFIG_HIGHBITDEPTH
6635 0 : if (hbd)
6636 0 : build_compound_seg_mask_highbd(
6637 0 : xd->seg_mask, cur_mask_type, CONVERT_TO_BYTEPTR(p0), bw,
6638 0 : CONVERT_TO_BYTEPTR(p1), bw, bsize, bh, bw, xd->bd);
6639 : else
6640 : #endif // CONFIG_HIGHBITDEPTH
6641 0 : build_compound_seg_mask(xd->seg_mask, cur_mask_type, p0, bw, p1, bw,
6642 : bsize, bh, bw);
6643 :
6644 : // compute rd for mask
6645 0 : sse = av1_wedge_sse_from_residuals(r1, d10, xd->seg_mask, N);
6646 0 : sse = ROUND_POWER_OF_TWO(sse, bd_round);
6647 :
6648 0 : model_rd_from_sse(cpi, xd, bsize, 0, sse, &rate, &dist);
6649 0 : rd0 = RDCOST(x->rdmult, x->rddiv, rate, dist);
6650 :
6651 0 : if (rd0 < best_rd) {
6652 0 : best_mask_type = cur_mask_type;
6653 0 : best_rd = rd0;
6654 : }
6655 : }
6656 :
6657 : // make final mask
6658 0 : mbmi->mask_type = best_mask_type;
6659 : #if CONFIG_HIGHBITDEPTH
6660 0 : if (hbd)
6661 0 : build_compound_seg_mask_highbd(
6662 0 : xd->seg_mask, mbmi->mask_type, CONVERT_TO_BYTEPTR(p0), bw,
6663 0 : CONVERT_TO_BYTEPTR(p1), bw, bsize, bh, bw, xd->bd);
6664 : else
6665 : #endif // CONFIG_HIGHBITDEPTH
6666 0 : build_compound_seg_mask(xd->seg_mask, mbmi->mask_type, p0, bw, p1, bw,
6667 : bsize, bh, bw);
6668 :
6669 0 : return best_rd;
6670 : }
6671 : #endif // CONFIG_COMPOUND_SEGMENT
6672 :
6673 : #if CONFIG_WEDGE && CONFIG_INTERINTRA
6674 0 : static int64_t pick_interintra_wedge(const AV1_COMP *const cpi,
6675 : const MACROBLOCK *const x,
6676 : const BLOCK_SIZE bsize,
6677 : const uint8_t *const p0,
6678 : const uint8_t *const p1) {
6679 0 : const MACROBLOCKD *const xd = &x->e_mbd;
6680 0 : MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
6681 :
6682 : int64_t rd;
6683 0 : int wedge_index = -1;
6684 :
6685 0 : assert(is_interintra_wedge_used(bsize));
6686 0 : assert(cpi->common.allow_interintra_compound);
6687 :
6688 0 : rd = pick_wedge_fixed_sign(cpi, x, bsize, p0, p1, 0, &wedge_index);
6689 :
6690 0 : mbmi->interintra_wedge_sign = 0;
6691 0 : mbmi->interintra_wedge_index = wedge_index;
6692 0 : return rd;
6693 : }
6694 : #endif // CONFIG_WEDGE && CONFIG_INTERINTRA
6695 :
6696 : #if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
6697 0 : static int64_t pick_interinter_mask(const AV1_COMP *const cpi, MACROBLOCK *x,
6698 : const BLOCK_SIZE bsize,
6699 : const uint8_t *const p0,
6700 : const uint8_t *const p1) {
6701 0 : const COMPOUND_TYPE compound_type =
6702 0 : x->e_mbd.mi[0]->mbmi.interinter_compound_type;
6703 0 : switch (compound_type) {
6704 : #if CONFIG_WEDGE
6705 0 : case COMPOUND_WEDGE: return pick_interinter_wedge(cpi, x, bsize, p0, p1);
6706 : #endif // CONFIG_WEDGE
6707 : #if CONFIG_COMPOUND_SEGMENT
6708 0 : case COMPOUND_SEG: return pick_interinter_seg(cpi, x, bsize, p0, p1);
6709 : #endif // CONFIG_COMPOUND_SEGMENT
6710 0 : default: assert(0); return 0;
6711 : }
6712 : }
6713 :
6714 0 : static int interinter_compound_motion_search(
6715 : const AV1_COMP *const cpi, MACROBLOCK *x, const int_mv *const cur_mv,
6716 : const BLOCK_SIZE bsize, const int this_mode, int mi_row, int mi_col) {
6717 0 : MACROBLOCKD *const xd = &x->e_mbd;
6718 0 : MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
6719 : int_mv tmp_mv[2];
6720 0 : int tmp_rate_mv = 0;
6721 0 : const INTERINTER_COMPOUND_DATA compound_data = {
6722 : #if CONFIG_WEDGE
6723 0 : mbmi->wedge_index,
6724 0 : mbmi->wedge_sign,
6725 : #endif // CONFIG_WEDGE
6726 : #if CONFIG_COMPOUND_SEGMENT
6727 0 : mbmi->mask_type,
6728 0 : xd->seg_mask,
6729 : #endif // CONFIG_COMPOUND_SEGMENT
6730 0 : mbmi->interinter_compound_type
6731 : };
6732 0 : if (this_mode == NEW_NEWMV) {
6733 0 : do_masked_motion_search_indexed(cpi, x, cur_mv, &compound_data, bsize,
6734 : mi_row, mi_col, tmp_mv, &tmp_rate_mv, 2);
6735 0 : mbmi->mv[0].as_int = tmp_mv[0].as_int;
6736 0 : mbmi->mv[1].as_int = tmp_mv[1].as_int;
6737 0 : } else if (this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV) {
6738 0 : do_masked_motion_search_indexed(cpi, x, cur_mv, &compound_data, bsize,
6739 : mi_row, mi_col, tmp_mv, &tmp_rate_mv, 0);
6740 0 : mbmi->mv[0].as_int = tmp_mv[0].as_int;
6741 0 : } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
6742 0 : do_masked_motion_search_indexed(cpi, x, cur_mv, &compound_data, bsize,
6743 : mi_row, mi_col, tmp_mv, &tmp_rate_mv, 1);
6744 0 : mbmi->mv[1].as_int = tmp_mv[1].as_int;
6745 : }
6746 0 : return tmp_rate_mv;
6747 : }
6748 :
6749 0 : static int64_t build_and_cost_compound_type(
6750 : const AV1_COMP *const cpi, MACROBLOCK *x, const int_mv *const cur_mv,
6751 : const BLOCK_SIZE bsize, const int this_mode, int rs2, int rate_mv,
6752 : BUFFER_SET *ctx, int *out_rate_mv, uint8_t **preds0, uint8_t **preds1,
6753 : int *strides, int mi_row, int mi_col) {
6754 0 : const AV1_COMMON *const cm = &cpi->common;
6755 0 : MACROBLOCKD *xd = &x->e_mbd;
6756 0 : MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
6757 : int rate_sum;
6758 : int64_t dist_sum;
6759 0 : int64_t best_rd_cur = INT64_MAX;
6760 0 : int64_t rd = INT64_MAX;
6761 : int tmp_skip_txfm_sb;
6762 : int64_t tmp_skip_sse_sb;
6763 0 : const COMPOUND_TYPE compound_type = mbmi->interinter_compound_type;
6764 :
6765 0 : best_rd_cur = pick_interinter_mask(cpi, x, bsize, *preds0, *preds1);
6766 0 : best_rd_cur += RDCOST(x->rdmult, x->rddiv, rs2 + rate_mv, 0);
6767 :
6768 0 : if (have_newmv_in_inter_mode(this_mode) &&
6769 0 : use_masked_motion_search(compound_type)) {
6770 0 : *out_rate_mv = interinter_compound_motion_search(cpi, x, cur_mv, bsize,
6771 : this_mode, mi_row, mi_col);
6772 0 : av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, ctx, bsize);
6773 0 : model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
6774 : &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
6775 0 : rd = RDCOST(x->rdmult, x->rddiv, rs2 + *out_rate_mv + rate_sum, dist_sum);
6776 0 : if (rd >= best_rd_cur) {
6777 0 : mbmi->mv[0].as_int = cur_mv[0].as_int;
6778 0 : mbmi->mv[1].as_int = cur_mv[1].as_int;
6779 0 : *out_rate_mv = rate_mv;
6780 0 : av1_build_wedge_inter_predictor_from_buf(xd, bsize, 0, 0,
6781 : #if CONFIG_SUPERTX
6782 : 0, 0,
6783 : #endif // CONFIG_SUPERTX
6784 : preds0, strides, preds1,
6785 : strides);
6786 : }
6787 0 : av1_subtract_plane(x, bsize, 0);
6788 0 : rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
6789 : &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
6790 0 : if (rd != INT64_MAX)
6791 0 : rd = RDCOST(x->rdmult, x->rddiv, rs2 + *out_rate_mv + rate_sum, dist_sum);
6792 0 : best_rd_cur = rd;
6793 :
6794 : } else {
6795 0 : av1_build_wedge_inter_predictor_from_buf(xd, bsize, 0, 0,
6796 : #if CONFIG_SUPERTX
6797 : 0, 0,
6798 : #endif // CONFIG_SUPERTX
6799 : preds0, strides, preds1, strides);
6800 0 : av1_subtract_plane(x, bsize, 0);
6801 0 : rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
6802 : &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
6803 0 : if (rd != INT64_MAX)
6804 0 : rd = RDCOST(x->rdmult, x->rddiv, rs2 + rate_mv + rate_sum, dist_sum);
6805 0 : best_rd_cur = rd;
6806 : }
6807 0 : return best_rd_cur;
6808 : }
6809 : #endif // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
6810 : #endif // CONFIG_EXT_INTER
6811 :
6812 : typedef struct {
6813 : #if CONFIG_MOTION_VAR
6814 : // Inter prediction buffers and respective strides
6815 : uint8_t *above_pred_buf[MAX_MB_PLANE];
6816 : int above_pred_stride[MAX_MB_PLANE];
6817 : uint8_t *left_pred_buf[MAX_MB_PLANE];
6818 : int left_pred_stride[MAX_MB_PLANE];
6819 : #endif // CONFIG_MOTION_VAR
6820 : int_mv *single_newmv;
6821 : #if CONFIG_EXT_INTER
6822 : // Pointer to array of motion vectors to use for each ref and their rates
6823 : // Should point to first of 2 arrays in 2D array
6824 : int *single_newmv_rate;
6825 : // Pointer to array of predicted rate-distortion
6826 : // Should point to first of 2 arrays in 2D array
6827 : int64_t (*modelled_rd)[TOTAL_REFS_PER_FRAME];
6828 : #endif // CONFIG_EXT_INTER
6829 : InterpFilter single_filter[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
6830 : } HandleInterModeArgs;
6831 :
6832 0 : static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
6833 : const BLOCK_SIZE bsize,
6834 : int_mv (*const mode_mv)[TOTAL_REFS_PER_FRAME],
6835 : const int mi_row, const int mi_col,
6836 : int *const rate_mv, int_mv *const single_newmv,
6837 : HandleInterModeArgs *const args) {
6838 0 : const MACROBLOCKD *const xd = &x->e_mbd;
6839 0 : const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
6840 0 : const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
6841 0 : const int is_comp_pred = has_second_ref(mbmi);
6842 0 : const PREDICTION_MODE this_mode = mbmi->mode;
6843 : #if CONFIG_EXT_INTER
6844 0 : const int is_comp_interintra_pred = (mbmi->ref_frame[1] == INTRA_FRAME);
6845 : #endif // CONFIG_EXT_INTER
6846 0 : int_mv *const frame_mv = mode_mv[this_mode];
6847 0 : const int refs[2] = { mbmi->ref_frame[0],
6848 0 : mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
6849 : int i;
6850 :
6851 : (void)args;
6852 :
6853 0 : if (is_comp_pred) {
6854 : #if CONFIG_EXT_INTER
6855 0 : for (i = 0; i < 2; ++i) {
6856 0 : single_newmv[refs[i]].as_int = args->single_newmv[refs[i]].as_int;
6857 : }
6858 :
6859 0 : if (this_mode == NEW_NEWMV) {
6860 0 : frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
6861 0 : frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
6862 :
6863 0 : if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
6864 0 : joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, NULL,
6865 : 0, rate_mv, 0);
6866 : } else {
6867 0 : *rate_mv = 0;
6868 0 : for (i = 0; i < 2; ++i) {
6869 0 : av1_set_mvcost(x, refs[i], i, mbmi->ref_mv_idx);
6870 0 : *rate_mv += av1_mv_bit_cost(
6871 0 : &frame_mv[refs[i]].as_mv, &mbmi_ext->ref_mvs[refs[i]][0].as_mv,
6872 0 : x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
6873 : }
6874 : }
6875 0 : } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
6876 0 : frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
6877 0 : if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
6878 0 : frame_mv[refs[0]].as_int =
6879 0 : mode_mv[compound_ref0_mode(this_mode)][refs[0]].as_int;
6880 0 : compound_single_motion_search_interinter(
6881 : cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, 0, rate_mv, 0, 1);
6882 : } else {
6883 0 : av1_set_mvcost(x, refs[1], 1, mbmi->ref_mv_idx);
6884 0 : *rate_mv = av1_mv_bit_cost(&frame_mv[refs[1]].as_mv,
6885 0 : &mbmi_ext->ref_mvs[refs[1]][0].as_mv,
6886 0 : x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
6887 : }
6888 : } else {
6889 0 : assert(this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV);
6890 0 : frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
6891 0 : if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
6892 0 : frame_mv[refs[1]].as_int =
6893 0 : mode_mv[compound_ref1_mode(this_mode)][refs[1]].as_int;
6894 0 : compound_single_motion_search_interinter(
6895 : cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, 0, rate_mv, 0, 0);
6896 : } else {
6897 0 : av1_set_mvcost(x, refs[0], 0, mbmi->ref_mv_idx);
6898 0 : *rate_mv = av1_mv_bit_cost(&frame_mv[refs[0]].as_mv,
6899 0 : &mbmi_ext->ref_mvs[refs[0]][0].as_mv,
6900 0 : x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
6901 : }
6902 : }
6903 : #else
6904 : // Initialize mv using single prediction mode result.
6905 : frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
6906 : frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
6907 :
6908 : if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
6909 : joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col, rate_mv, 0);
6910 : } else {
6911 : *rate_mv = 0;
6912 : for (i = 0; i < 2; ++i) {
6913 : av1_set_mvcost(x, refs[i], i, mbmi->ref_mv_idx);
6914 : *rate_mv += av1_mv_bit_cost(&frame_mv[refs[i]].as_mv,
6915 : &mbmi_ext->ref_mvs[refs[i]][0].as_mv,
6916 : x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
6917 : }
6918 : }
6919 : #endif // CONFIG_EXT_INTER
6920 : } else {
6921 : #if CONFIG_EXT_INTER
6922 0 : if (is_comp_interintra_pred) {
6923 0 : x->best_mv = args->single_newmv[refs[0]];
6924 0 : *rate_mv = args->single_newmv_rate[refs[0]];
6925 : } else {
6926 0 : single_motion_search(cpi, x, bsize, mi_row, mi_col, 0, rate_mv);
6927 0 : args->single_newmv[refs[0]] = x->best_mv;
6928 0 : args->single_newmv_rate[refs[0]] = *rate_mv;
6929 : }
6930 : #else
6931 : single_motion_search(cpi, x, bsize, mi_row, mi_col, rate_mv);
6932 : single_newmv[refs[0]] = x->best_mv;
6933 : #endif // CONFIG_EXT_INTER
6934 :
6935 0 : if (x->best_mv.as_int == INVALID_MV) return INT64_MAX;
6936 :
6937 0 : frame_mv[refs[0]] = x->best_mv;
6938 0 : xd->mi[0]->bmi[0].as_mv[0] = x->best_mv;
6939 :
6940 : // Estimate the rate implications of a new mv but discount this
6941 : // under certain circumstances where we want to help initiate a weak
6942 : // motion field, where the distortion gain for a single block may not
6943 : // be enough to overcome the cost of a new mv.
6944 0 : if (discount_newmv_test(cpi, this_mode, x->best_mv, mode_mv, refs[0])) {
6945 0 : *rate_mv = AOMMAX(*rate_mv / NEW_MV_DISCOUNT_FACTOR, 1);
6946 : }
6947 : }
6948 :
6949 0 : return 0;
6950 : }
6951 :
6952 0 : int64_t interpolation_filter_search(
6953 : MACROBLOCK *const x, const AV1_COMP *const cpi, BLOCK_SIZE bsize,
6954 : int mi_row, int mi_col, const BUFFER_SET *const tmp_dst,
6955 : BUFFER_SET *const orig_dst,
6956 : InterpFilter (*const single_filter)[TOTAL_REFS_PER_FRAME],
6957 : int64_t *const rd, int *const switchable_rate, int *const skip_txfm_sb,
6958 : int64_t *const skip_sse_sb) {
6959 0 : const AV1_COMMON *cm = &cpi->common;
6960 0 : MACROBLOCKD *const xd = &x->e_mbd;
6961 0 : MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
6962 : int i;
6963 : int tmp_rate;
6964 : int64_t tmp_dist;
6965 :
6966 : (void)single_filter;
6967 :
6968 0 : InterpFilter assign_filter = SWITCHABLE;
6969 :
6970 0 : if (cm->interp_filter == SWITCHABLE) {
6971 : #if !CONFIG_DUAL_FILTER
6972 : assign_filter = av1_is_interp_needed(xd)
6973 : ? predict_interp_filter(cpi, x, bsize, mi_row, mi_col,
6974 : single_filter)
6975 : : cm->interp_filter;
6976 : #endif // !CONFIG_DUAL_FILTER
6977 : } else {
6978 0 : assign_filter = cm->interp_filter;
6979 : }
6980 :
6981 0 : set_default_interp_filters(mbmi, assign_filter);
6982 :
6983 0 : *switchable_rate = av1_get_switchable_rate(cpi, xd);
6984 0 : av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
6985 0 : model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate, &tmp_dist,
6986 : skip_txfm_sb, skip_sse_sb);
6987 0 : *rd = RDCOST(x->rdmult, x->rddiv, *switchable_rate + tmp_rate, tmp_dist);
6988 :
6989 0 : if (assign_filter == SWITCHABLE) {
6990 : // do interp_filter search
6991 0 : if (av1_is_interp_needed(xd) && av1_is_interp_search_needed(xd)) {
6992 : #if CONFIG_DUAL_FILTER
6993 0 : const int filter_set_size = DUAL_FILTER_SET_SIZE;
6994 : #else
6995 : const int filter_set_size = SWITCHABLE_FILTERS;
6996 : #endif // CONFIG_DUAL_FILTER
6997 0 : int best_in_temp = 0;
6998 : #if CONFIG_DUAL_FILTER
6999 : InterpFilter best_filter[4];
7000 0 : av1_copy(best_filter, mbmi->interp_filter);
7001 : #else
7002 : InterpFilter best_filter = mbmi->interp_filter;
7003 : #endif // CONFIG_DUAL_FILTER
7004 0 : restore_dst_buf(xd, *tmp_dst);
7005 : // EIGHTTAP_REGULAR mode is calculated beforehand
7006 0 : for (i = 1; i < filter_set_size; ++i) {
7007 0 : int tmp_skip_sb = 0;
7008 0 : int64_t tmp_skip_sse = INT64_MAX;
7009 : int tmp_rs;
7010 : int64_t tmp_rd;
7011 : #if CONFIG_DUAL_FILTER
7012 0 : mbmi->interp_filter[0] = filter_sets[i][0];
7013 0 : mbmi->interp_filter[1] = filter_sets[i][1];
7014 0 : mbmi->interp_filter[2] = filter_sets[i][0];
7015 0 : mbmi->interp_filter[3] = filter_sets[i][1];
7016 : #else
7017 : mbmi->interp_filter = (InterpFilter)i;
7018 : #endif // CONFIG_DUAL_FILTER
7019 0 : tmp_rs = av1_get_switchable_rate(cpi, xd);
7020 0 : av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
7021 0 : model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
7022 : &tmp_dist, &tmp_skip_sb, &tmp_skip_sse);
7023 0 : tmp_rd = RDCOST(x->rdmult, x->rddiv, tmp_rs + tmp_rate, tmp_dist);
7024 :
7025 0 : if (tmp_rd < *rd) {
7026 0 : *rd = tmp_rd;
7027 0 : *switchable_rate = av1_get_switchable_rate(cpi, xd);
7028 : #if CONFIG_DUAL_FILTER
7029 0 : av1_copy(best_filter, mbmi->interp_filter);
7030 : #else
7031 : best_filter = mbmi->interp_filter;
7032 : #endif // CONFIG_DUAL_FILTER
7033 0 : *skip_txfm_sb = tmp_skip_sb;
7034 0 : *skip_sse_sb = tmp_skip_sse;
7035 0 : best_in_temp = !best_in_temp;
7036 0 : if (best_in_temp) {
7037 0 : restore_dst_buf(xd, *orig_dst);
7038 : } else {
7039 0 : restore_dst_buf(xd, *tmp_dst);
7040 : }
7041 : }
7042 : }
7043 0 : if (best_in_temp) {
7044 0 : restore_dst_buf(xd, *tmp_dst);
7045 : } else {
7046 0 : restore_dst_buf(xd, *orig_dst);
7047 : }
7048 : #if CONFIG_DUAL_FILTER
7049 0 : av1_copy(mbmi->interp_filter, best_filter);
7050 : #else
7051 : mbmi->interp_filter = best_filter;
7052 : #endif // CONFIG_DUAL_FILTER
7053 : } else {
7054 : #if CONFIG_DUAL_FILTER
7055 0 : for (i = 0; i < 4; ++i)
7056 0 : assert(mbmi->interp_filter[i] == EIGHTTAP_REGULAR);
7057 : #else
7058 : assert(mbmi->interp_filter == EIGHTTAP_REGULAR);
7059 : #endif // CONFIG_DUAL_FILTER
7060 : }
7061 : }
7062 :
7063 0 : return 0;
7064 : }
7065 :
7066 : // TODO(afergs): Refactor the MBMI references in here - there's four
7067 : // TODO(afergs): Refactor optional args - add them to a struct or remove
7068 0 : static int64_t motion_mode_rd(
7069 : const AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize,
7070 : RD_STATS *rd_stats, RD_STATS *rd_stats_y, RD_STATS *rd_stats_uv,
7071 : int *disable_skip, int_mv (*mode_mv)[TOTAL_REFS_PER_FRAME], int mi_row,
7072 : int mi_col, HandleInterModeArgs *const args, const int64_t ref_best_rd,
7073 : const int *refs, int rate_mv,
7074 : #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7075 : int_mv *const single_newmv,
7076 : #if CONFIG_EXT_INTER
7077 : int rate2_bmc_nocoeff, MB_MODE_INFO *best_bmc_mbmi,
7078 : #if CONFIG_MOTION_VAR
7079 : int rate_mv_bmc,
7080 : #endif // CONFIG_MOTION_VAR
7081 : #endif // CONFIG_EXT_INTER
7082 : #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7083 : int rs, int *skip_txfm_sb, int64_t *skip_sse_sb, BUFFER_SET *orig_dst) {
7084 0 : const AV1_COMMON *const cm = &cpi->common;
7085 0 : MACROBLOCKD *xd = &x->e_mbd;
7086 0 : MODE_INFO *mi = xd->mi[0];
7087 0 : MB_MODE_INFO *mbmi = &mi->mbmi;
7088 0 : const int is_comp_pred = has_second_ref(mbmi);
7089 0 : const PREDICTION_MODE this_mode = mbmi->mode;
7090 :
7091 : (void)mode_mv;
7092 : (void)mi_row;
7093 : (void)mi_col;
7094 : (void)args;
7095 : (void)refs;
7096 : (void)rate_mv;
7097 : (void)is_comp_pred;
7098 : (void)this_mode;
7099 :
7100 : #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7101 : MOTION_MODE motion_mode, last_motion_mode_allowed;
7102 0 : int rate2_nocoeff = 0, best_xskip, best_disable_skip = 0;
7103 : RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
7104 : MB_MODE_INFO base_mbmi, best_mbmi;
7105 : #if CONFIG_VAR_TX
7106 : uint8_t best_blk_skip[MAX_MB_PLANE][MAX_MIB_SIZE * MAX_MIB_SIZE * 4];
7107 : #endif // CONFIG_VAR_TX
7108 : #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7109 :
7110 : #if CONFIG_WARPED_MOTION
7111 : int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
7112 : #endif // CONFIG_WARPED_MOTION
7113 :
7114 : #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7115 0 : av1_invalid_rd_stats(&best_rd_stats);
7116 : #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7117 :
7118 0 : if (cm->interp_filter == SWITCHABLE) rd_stats->rate += rs;
7119 : #if CONFIG_WARPED_MOTION
7120 0 : aom_clear_system_state();
7121 0 : mbmi->num_proj_ref[0] = findSamples(cm, xd, mi_row, mi_col, pts, pts_inref);
7122 : #if CONFIG_EXT_INTER
7123 0 : best_bmc_mbmi->num_proj_ref[0] = mbmi->num_proj_ref[0];
7124 : #endif // CONFIG_EXT_INTER
7125 : #endif // CONFIG_WARPED_MOTION
7126 : #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7127 0 : rate2_nocoeff = rd_stats->rate;
7128 0 : last_motion_mode_allowed = motion_mode_allowed(
7129 : #if CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION
7130 0 : 0, xd->global_motion,
7131 : #endif // CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION
7132 : mi);
7133 0 : base_mbmi = *mbmi;
7134 : #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7135 :
7136 : #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7137 0 : int64_t best_rd = INT64_MAX;
7138 0 : for (motion_mode = SIMPLE_TRANSLATION;
7139 0 : motion_mode <= last_motion_mode_allowed; motion_mode++) {
7140 0 : int64_t tmp_rd = INT64_MAX;
7141 : int tmp_rate;
7142 : int64_t tmp_dist;
7143 : #if CONFIG_EXT_INTER
7144 0 : int tmp_rate2 =
7145 0 : motion_mode != SIMPLE_TRANSLATION ? rate2_bmc_nocoeff : rate2_nocoeff;
7146 : #else
7147 : int tmp_rate2 = rate2_nocoeff;
7148 : #endif // CONFIG_EXT_INTER
7149 :
7150 0 : *mbmi = base_mbmi;
7151 0 : mbmi->motion_mode = motion_mode;
7152 : #if CONFIG_MOTION_VAR
7153 0 : if (mbmi->motion_mode == OBMC_CAUSAL) {
7154 : #if CONFIG_EXT_INTER
7155 0 : *mbmi = *best_bmc_mbmi;
7156 0 : mbmi->motion_mode = OBMC_CAUSAL;
7157 : #endif // CONFIG_EXT_INTER
7158 0 : if (!is_comp_pred && have_newmv_in_inter_mode(this_mode)) {
7159 0 : int tmp_rate_mv = 0;
7160 :
7161 0 : single_motion_search(cpi, x, bsize, mi_row, mi_col,
7162 : #if CONFIG_EXT_INTER
7163 : 0,
7164 : #endif // CONFIG_EXT_INTER
7165 : &tmp_rate_mv);
7166 0 : mbmi->mv[0].as_int = x->best_mv.as_int;
7167 0 : if (discount_newmv_test(cpi, this_mode, mbmi->mv[0], mode_mv,
7168 : refs[0])) {
7169 0 : tmp_rate_mv = AOMMAX((tmp_rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
7170 : }
7171 : #if CONFIG_EXT_INTER
7172 0 : tmp_rate2 = rate2_bmc_nocoeff - rate_mv_bmc + tmp_rate_mv;
7173 : #else
7174 : tmp_rate2 = rate2_nocoeff - rate_mv + tmp_rate_mv;
7175 : #endif // CONFIG_EXT_INTER
7176 : #if CONFIG_DUAL_FILTER
7177 0 : if (!has_subpel_mv_component(xd->mi[0], xd, 0))
7178 0 : mbmi->interp_filter[0] = EIGHTTAP_REGULAR;
7179 0 : if (!has_subpel_mv_component(xd->mi[0], xd, 1))
7180 0 : mbmi->interp_filter[1] = EIGHTTAP_REGULAR;
7181 : #endif // CONFIG_DUAL_FILTER
7182 0 : av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
7183 : #if CONFIG_EXT_INTER
7184 : } else {
7185 0 : av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
7186 : #endif // CONFIG_EXT_INTER
7187 : }
7188 0 : av1_build_obmc_inter_prediction(
7189 0 : cm, xd, mi_row, mi_col, args->above_pred_buf, args->above_pred_stride,
7190 0 : args->left_pred_buf, args->left_pred_stride);
7191 0 : model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
7192 : &tmp_dist, skip_txfm_sb, skip_sse_sb);
7193 : }
7194 : #endif // CONFIG_MOTION_VAR
7195 :
7196 : #if CONFIG_WARPED_MOTION
7197 0 : if (mbmi->motion_mode == WARPED_CAUSAL) {
7198 : #if CONFIG_EXT_INTER
7199 0 : *mbmi = *best_bmc_mbmi;
7200 0 : mbmi->motion_mode = WARPED_CAUSAL;
7201 : #endif // CONFIG_EXT_INTER
7202 0 : mbmi->wm_params[0].wmtype = DEFAULT_WMTYPE;
7203 : #if CONFIG_DUAL_FILTER
7204 0 : for (int dir = 0; dir < 4; ++dir)
7205 0 : mbmi->interp_filter[dir] = cm->interp_filter == SWITCHABLE
7206 : ? EIGHTTAP_REGULAR
7207 0 : : cm->interp_filter;
7208 : #else
7209 : mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP_REGULAR
7210 : : cm->interp_filter;
7211 : #endif // CONFIG_DUAL_FILTER
7212 :
7213 0 : if (!find_projection(mbmi->num_proj_ref[0], pts, pts_inref, bsize,
7214 0 : mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col,
7215 : &mbmi->wm_params[0], mi_row, mi_col)) {
7216 : // Refine MV for NEWMV mode
7217 0 : if (!is_comp_pred && have_newmv_in_inter_mode(this_mode)) {
7218 0 : int tmp_rate_mv = 0;
7219 0 : const int_mv mv0 = mbmi->mv[0];
7220 0 : WarpedMotionParams wm_params0 = mbmi->wm_params[0];
7221 :
7222 : // Refine MV in a small range.
7223 0 : av1_refine_warped_mv(cpi, x, bsize, mi_row, mi_col, pts, pts_inref);
7224 :
7225 : // Keep the refined MV and WM parameters.
7226 0 : if (mv0.as_int != mbmi->mv[0].as_int) {
7227 0 : const int ref = refs[0];
7228 0 : const MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
7229 :
7230 0 : tmp_rate_mv =
7231 0 : av1_mv_bit_cost(&mbmi->mv[0].as_mv, &ref_mv, x->nmvjointcost,
7232 : x->mvcost, MV_COST_WEIGHT);
7233 :
7234 0 : if (cpi->sf.adaptive_motion_search)
7235 0 : x->pred_mv[ref] = mbmi->mv[0].as_mv;
7236 :
7237 0 : single_newmv[ref] = mbmi->mv[0];
7238 :
7239 0 : if (discount_newmv_test(cpi, this_mode, mbmi->mv[0], mode_mv,
7240 : refs[0])) {
7241 0 : tmp_rate_mv = AOMMAX((tmp_rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
7242 : }
7243 : #if CONFIG_EXT_INTER
7244 0 : tmp_rate2 = rate2_bmc_nocoeff - rate_mv_bmc + tmp_rate_mv;
7245 : #else
7246 : tmp_rate2 = rate2_nocoeff - rate_mv + tmp_rate_mv;
7247 : #endif // CONFIG_EXT_INTER
7248 : #if CONFIG_DUAL_FILTER
7249 0 : if (!has_subpel_mv_component(xd->mi[0], xd, 0))
7250 0 : mbmi->interp_filter[0] = EIGHTTAP_REGULAR;
7251 0 : if (!has_subpel_mv_component(xd->mi[0], xd, 1))
7252 0 : mbmi->interp_filter[1] = EIGHTTAP_REGULAR;
7253 : #endif // CONFIG_DUAL_FILTER
7254 : } else {
7255 : // Restore the old MV and WM parameters.
7256 0 : mbmi->mv[0] = mv0;
7257 0 : mbmi->wm_params[0] = wm_params0;
7258 : }
7259 : }
7260 :
7261 0 : av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
7262 0 : model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
7263 : &tmp_dist, skip_txfm_sb, skip_sse_sb);
7264 : } else {
7265 0 : continue;
7266 : }
7267 : }
7268 : #endif // CONFIG_WARPED_MOTION
7269 0 : x->skip = 0;
7270 :
7271 0 : rd_stats->dist = 0;
7272 0 : rd_stats->sse = 0;
7273 0 : rd_stats->skip = 1;
7274 0 : rd_stats->rate = tmp_rate2;
7275 0 : if (last_motion_mode_allowed > SIMPLE_TRANSLATION) {
7276 : #if CONFIG_WARPED_MOTION && CONFIG_MOTION_VAR
7277 0 : if (last_motion_mode_allowed == WARPED_CAUSAL)
7278 : #endif // CONFIG_WARPED_MOTION && CONFIG_MOTION_VAR
7279 0 : rd_stats->rate += cpi->motion_mode_cost[bsize][mbmi->motion_mode];
7280 : #if CONFIG_WARPED_MOTION && CONFIG_MOTION_VAR
7281 : else
7282 0 : rd_stats->rate += cpi->motion_mode_cost1[bsize][mbmi->motion_mode];
7283 : #endif // CONFIG_WARPED_MOTION && CONFIG_MOTION_VAR
7284 : }
7285 : #if CONFIG_WARPED_MOTION
7286 0 : if (mbmi->motion_mode == WARPED_CAUSAL) {
7287 0 : rd_stats->rate -= rs;
7288 : }
7289 : #endif // CONFIG_WARPED_MOTION
7290 : #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7291 0 : if (!*skip_txfm_sb) {
7292 0 : int64_t rdcosty = INT64_MAX;
7293 0 : int is_cost_valid_uv = 0;
7294 :
7295 : // cost and distortion
7296 0 : av1_subtract_plane(x, bsize, 0);
7297 : #if CONFIG_VAR_TX
7298 0 : if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) {
7299 0 : select_tx_type_yrd(cpi, x, rd_stats_y, bsize, ref_best_rd);
7300 : } else {
7301 : int idx, idy;
7302 0 : super_block_yrd(cpi, x, rd_stats_y, bsize, ref_best_rd);
7303 0 : for (idy = 0; idy < xd->n8_h; ++idy)
7304 0 : for (idx = 0; idx < xd->n8_w; ++idx)
7305 0 : mbmi->inter_tx_size[idy][idx] = mbmi->tx_size;
7306 0 : memset(x->blk_skip[0], rd_stats_y->skip,
7307 0 : sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
7308 : }
7309 : #else
7310 : /* clang-format off */
7311 : super_block_yrd(cpi, x, rd_stats_y, bsize, ref_best_rd);
7312 : /* clang-format on */
7313 : #endif // CONFIG_VAR_TX
7314 :
7315 0 : if (rd_stats_y->rate == INT_MAX) {
7316 0 : av1_invalid_rd_stats(rd_stats);
7317 : #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7318 0 : if (mbmi->motion_mode != SIMPLE_TRANSLATION) {
7319 0 : continue;
7320 : } else {
7321 : #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7322 0 : restore_dst_buf(xd, *orig_dst);
7323 0 : return INT64_MAX;
7324 : #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7325 : }
7326 : #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7327 : }
7328 :
7329 0 : av1_merge_rd_stats(rd_stats, rd_stats_y);
7330 :
7331 0 : rdcosty = RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist);
7332 0 : rdcosty = AOMMIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, rd_stats->sse));
7333 : /* clang-format off */
7334 : #if CONFIG_VAR_TX
7335 0 : is_cost_valid_uv =
7336 0 : inter_block_uvrd(cpi, x, rd_stats_uv, bsize, ref_best_rd - rdcosty);
7337 : #else
7338 : is_cost_valid_uv =
7339 : super_block_uvrd(cpi, x, rd_stats_uv, bsize, ref_best_rd - rdcosty);
7340 : #endif // CONFIG_VAR_TX
7341 0 : if (!is_cost_valid_uv) {
7342 : #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7343 0 : continue;
7344 : #else
7345 : restore_dst_buf(xd, *orig_dst);
7346 : return INT64_MAX;
7347 : #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7348 : }
7349 : /* clang-format on */
7350 0 : av1_merge_rd_stats(rd_stats, rd_stats_uv);
7351 : #if CONFIG_RD_DEBUG
7352 : // record transform block coefficient cost
7353 : // TODO(angiebird): So far rd_debug tool only detects discrepancy of
7354 : // coefficient cost. Therefore, it is fine to copy rd_stats into mbmi
7355 : // here because we already collect the coefficient cost. Move this part to
7356 : // other place when we need to compare non-coefficient cost.
7357 : mbmi->rd_stats = *rd_stats;
7358 : #endif // CONFIG_RD_DEBUG
7359 : #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7360 0 : if (rd_stats->skip) {
7361 0 : rd_stats->rate -= rd_stats_uv->rate + rd_stats_y->rate;
7362 0 : rd_stats_y->rate = 0;
7363 0 : rd_stats_uv->rate = 0;
7364 0 : rd_stats->rate += av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
7365 0 : mbmi->skip = 0;
7366 : // here mbmi->skip temporarily plays a role as what this_skip2 does
7367 0 : } else if (!xd->lossless[mbmi->segment_id] &&
7368 0 : (RDCOST(x->rdmult, x->rddiv,
7369 : rd_stats_y->rate + rd_stats_uv->rate +
7370 : av1_cost_bit(av1_get_skip_prob(cm, xd), 0),
7371 : rd_stats->dist) >=
7372 0 : RDCOST(x->rdmult, x->rddiv,
7373 : av1_cost_bit(av1_get_skip_prob(cm, xd), 1),
7374 : rd_stats->sse))) {
7375 0 : rd_stats->rate -= rd_stats_uv->rate + rd_stats_y->rate;
7376 0 : rd_stats->rate += av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
7377 0 : rd_stats->dist = rd_stats->sse;
7378 0 : rd_stats_y->rate = 0;
7379 0 : rd_stats_uv->rate = 0;
7380 0 : mbmi->skip = 1;
7381 : } else {
7382 0 : rd_stats->rate += av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
7383 0 : mbmi->skip = 0;
7384 : }
7385 0 : *disable_skip = 0;
7386 : #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7387 : } else {
7388 0 : x->skip = 1;
7389 0 : *disable_skip = 1;
7390 0 : mbmi->tx_size = tx_size_from_tx_mode(bsize, cm->tx_mode, 1);
7391 :
7392 : // The cost of skip bit needs to be added.
7393 : #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7394 0 : mbmi->skip = 0;
7395 : #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7396 0 : rd_stats->rate += av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
7397 :
7398 0 : rd_stats->dist = *skip_sse_sb;
7399 0 : rd_stats->sse = *skip_sse_sb;
7400 0 : rd_stats_y->rate = 0;
7401 0 : rd_stats_uv->rate = 0;
7402 0 : rd_stats->skip = 1;
7403 : }
7404 :
7405 : #if CONFIG_GLOBAL_MOTION
7406 0 : if (this_mode == ZEROMV
7407 : #if CONFIG_EXT_INTER
7408 0 : || this_mode == ZERO_ZEROMV
7409 : #endif // CONFIG_EXT_INTER
7410 : ) {
7411 0 : if (is_nontrans_global_motion(xd)) {
7412 0 : rd_stats->rate -= rs;
7413 : #if CONFIG_DUAL_FILTER
7414 0 : mbmi->interp_filter[0] = cm->interp_filter == SWITCHABLE
7415 : ? EIGHTTAP_REGULAR
7416 0 : : cm->interp_filter;
7417 0 : mbmi->interp_filter[1] = cm->interp_filter == SWITCHABLE
7418 : ? EIGHTTAP_REGULAR
7419 0 : : cm->interp_filter;
7420 : #else
7421 : mbmi->interp_filter = cm->interp_filter == SWITCHABLE
7422 : ? EIGHTTAP_REGULAR
7423 : : cm->interp_filter;
7424 : #endif // CONFIG_DUAL_FILTER
7425 : }
7426 : }
7427 : #endif // CONFIG_GLOBAL_MOTION
7428 :
7429 : #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7430 0 : tmp_rd = RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist);
7431 0 : if (mbmi->motion_mode == SIMPLE_TRANSLATION || (tmp_rd < best_rd)) {
7432 0 : best_mbmi = *mbmi;
7433 0 : best_rd = tmp_rd;
7434 0 : best_rd_stats = *rd_stats;
7435 0 : best_rd_stats_y = *rd_stats_y;
7436 0 : best_rd_stats_uv = *rd_stats_uv;
7437 : #if CONFIG_VAR_TX
7438 0 : for (int i = 0; i < MAX_MB_PLANE; ++i)
7439 0 : memcpy(best_blk_skip[i], x->blk_skip[i],
7440 0 : sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
7441 : #endif // CONFIG_VAR_TX
7442 0 : best_xskip = x->skip;
7443 0 : best_disable_skip = *disable_skip;
7444 : }
7445 : }
7446 :
7447 0 : if (best_rd == INT64_MAX) {
7448 0 : av1_invalid_rd_stats(rd_stats);
7449 0 : restore_dst_buf(xd, *orig_dst);
7450 0 : return INT64_MAX;
7451 : }
7452 0 : *mbmi = best_mbmi;
7453 0 : *rd_stats = best_rd_stats;
7454 0 : *rd_stats_y = best_rd_stats_y;
7455 0 : *rd_stats_uv = best_rd_stats_uv;
7456 : #if CONFIG_VAR_TX
7457 0 : for (int i = 0; i < MAX_MB_PLANE; ++i)
7458 0 : memcpy(x->blk_skip[i], best_blk_skip[i],
7459 0 : sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
7460 : #endif // CONFIG_VAR_TX
7461 0 : x->skip = best_xskip;
7462 0 : *disable_skip = best_disable_skip;
7463 : #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7464 :
7465 0 : restore_dst_buf(xd, *orig_dst);
7466 0 : return 0;
7467 : }
7468 :
7469 0 : static int64_t handle_inter_mode(
7470 : const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
7471 : RD_STATS *rd_stats, RD_STATS *rd_stats_y, RD_STATS *rd_stats_uv,
7472 : int *disable_skip, int_mv (*mode_mv)[TOTAL_REFS_PER_FRAME], int mi_row,
7473 : int mi_col, HandleInterModeArgs *args, const int64_t ref_best_rd) {
7474 0 : const AV1_COMMON *cm = &cpi->common;
7475 : (void)cm;
7476 0 : MACROBLOCKD *xd = &x->e_mbd;
7477 0 : MODE_INFO *mi = xd->mi[0];
7478 0 : MB_MODE_INFO *mbmi = &mi->mbmi;
7479 0 : MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
7480 0 : const int is_comp_pred = has_second_ref(mbmi);
7481 0 : const int this_mode = mbmi->mode;
7482 0 : int_mv *frame_mv = mode_mv[this_mode];
7483 : int i;
7484 0 : int refs[2] = { mbmi->ref_frame[0],
7485 0 : (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
7486 : int_mv cur_mv[2];
7487 0 : int rate_mv = 0;
7488 : #if CONFIG_EXT_INTER
7489 0 : int pred_exists = 1;
7490 : #if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
7491 0 : const int bw = block_size_wide[bsize];
7492 : #endif // ONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
7493 : int_mv single_newmv[TOTAL_REFS_PER_FRAME];
7494 : #if CONFIG_INTERINTRA
7495 0 : const unsigned int *const interintra_mode_cost =
7496 0 : cpi->interintra_mode_cost[size_group_lookup[bsize]];
7497 : #endif // CONFIG_INTERINTRA
7498 0 : const int is_comp_interintra_pred = (mbmi->ref_frame[1] == INTRA_FRAME);
7499 0 : uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
7500 : #else
7501 : int_mv *const single_newmv = args->single_newmv;
7502 : #endif // CONFIG_EXT_INTER
7503 : #if CONFIG_HIGHBITDEPTH
7504 : DECLARE_ALIGNED(16, uint8_t, tmp_buf_[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
7505 : #else
7506 : DECLARE_ALIGNED(16, uint8_t, tmp_buf_[MAX_MB_PLANE * MAX_SB_SQUARE]);
7507 : #endif // CONFIG_HIGHBITDEPTH
7508 : uint8_t *tmp_buf;
7509 :
7510 : #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7511 : #if CONFIG_EXT_INTER
7512 : int rate2_bmc_nocoeff;
7513 : MB_MODE_INFO best_bmc_mbmi;
7514 : #if CONFIG_MOTION_VAR
7515 : int rate_mv_bmc;
7516 : #endif // CONFIG_MOTION_VAR
7517 : #endif // CONFIG_EXT_INTER
7518 : #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7519 0 : int64_t rd = INT64_MAX;
7520 : BUFFER_SET orig_dst, tmp_dst;
7521 0 : int rs = 0;
7522 :
7523 0 : int skip_txfm_sb = 0;
7524 0 : int64_t skip_sse_sb = INT64_MAX;
7525 : int16_t mode_ctx;
7526 :
7527 : #if CONFIG_EXT_INTER
7528 : #if CONFIG_INTERINTRA
7529 0 : int compmode_interintra_cost = 0;
7530 0 : mbmi->use_wedge_interintra = 0;
7531 : #endif
7532 : #if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
7533 0 : int compmode_interinter_cost = 0;
7534 0 : mbmi->interinter_compound_type = COMPOUND_AVERAGE;
7535 : #endif
7536 :
7537 : #if CONFIG_INTERINTRA
7538 0 : if (!cm->allow_interintra_compound && is_comp_interintra_pred)
7539 0 : return INT64_MAX;
7540 : #endif // CONFIG_INTERINTRA
7541 :
7542 : // is_comp_interintra_pred implies !is_comp_pred
7543 0 : assert(!is_comp_interintra_pred || (!is_comp_pred));
7544 : // is_comp_interintra_pred implies is_interintra_allowed(mbmi->sb_type)
7545 0 : assert(!is_comp_interintra_pred || is_interintra_allowed(mbmi));
7546 : #endif // CONFIG_EXT_INTER
7547 :
7548 : #if CONFIG_EXT_INTER
7549 0 : if (is_comp_pred)
7550 0 : mode_ctx = mbmi_ext->compound_mode_context[refs[0]];
7551 : else
7552 : #endif // CONFIG_EXT_INTER
7553 0 : mode_ctx = av1_mode_context_analyzer(mbmi_ext->mode_context,
7554 0 : mbmi->ref_frame, bsize, -1);
7555 :
7556 : #if CONFIG_HIGHBITDEPTH
7557 0 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
7558 0 : tmp_buf = CONVERT_TO_BYTEPTR(tmp_buf_);
7559 : else
7560 : #endif // CONFIG_HIGHBITDEPTH
7561 0 : tmp_buf = tmp_buf_;
7562 : // Make sure that we didn't leave the plane destination buffers set
7563 : // to tmp_buf at the end of the last iteration
7564 0 : assert(xd->plane[0].dst.buf != tmp_buf);
7565 :
7566 : #if CONFIG_WARPED_MOTION
7567 0 : mbmi->num_proj_ref[0] = 0;
7568 0 : mbmi->num_proj_ref[1] = 0;
7569 : #endif // CONFIG_WARPED_MOTION
7570 :
7571 0 : if (is_comp_pred) {
7572 0 : if (frame_mv[refs[0]].as_int == INVALID_MV ||
7573 0 : frame_mv[refs[1]].as_int == INVALID_MV)
7574 0 : return INT64_MAX;
7575 : }
7576 :
7577 0 : mbmi->motion_mode = SIMPLE_TRANSLATION;
7578 0 : if (have_newmv_in_inter_mode(this_mode)) {
7579 0 : const int64_t ret_val = handle_newmv(cpi, x, bsize, mode_mv, mi_row, mi_col,
7580 : &rate_mv, single_newmv, args);
7581 0 : if (ret_val != 0)
7582 0 : return ret_val;
7583 : else
7584 0 : rd_stats->rate += rate_mv;
7585 : }
7586 0 : for (i = 0; i < is_comp_pred + 1; ++i) {
7587 0 : cur_mv[i] = frame_mv[refs[i]];
7588 : // Clip "next_nearest" so that it does not extend to far out of image
7589 0 : if (this_mode != NEWMV) clamp_mv2(&cur_mv[i].as_mv, xd);
7590 0 : if (mv_check_bounds(&x->mv_limits, &cur_mv[i].as_mv)) return INT64_MAX;
7591 0 : mbmi->mv[i].as_int = cur_mv[i].as_int;
7592 : }
7593 :
7594 : #if CONFIG_EXT_INTER
7595 0 : if (this_mode == NEAREST_NEARESTMV)
7596 : #else
7597 : if (this_mode == NEARESTMV && is_comp_pred)
7598 : #endif // CONFIG_EXT_INTER
7599 : {
7600 : #if !CONFIG_EXT_INTER
7601 : uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
7602 : #endif // !CONFIG_EXT_INTER
7603 0 : if (mbmi_ext->ref_mv_count[ref_frame_type] > 0) {
7604 0 : cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv;
7605 0 : cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
7606 :
7607 0 : for (i = 0; i < 2; ++i) {
7608 0 : clamp_mv2(&cur_mv[i].as_mv, xd);
7609 0 : if (mv_check_bounds(&x->mv_limits, &cur_mv[i].as_mv)) return INT64_MAX;
7610 0 : mbmi->mv[i].as_int = cur_mv[i].as_int;
7611 : }
7612 : }
7613 : }
7614 :
7615 : #if CONFIG_EXT_INTER
7616 0 : if (mbmi_ext->ref_mv_count[ref_frame_type] > 0) {
7617 0 : if (this_mode == NEAREST_NEWMV) {
7618 0 : cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv;
7619 :
7620 0 : lower_mv_precision(&cur_mv[0].as_mv, cm->allow_high_precision_mv);
7621 0 : clamp_mv2(&cur_mv[0].as_mv, xd);
7622 0 : if (mv_check_bounds(&x->mv_limits, &cur_mv[0].as_mv)) return INT64_MAX;
7623 0 : mbmi->mv[0].as_int = cur_mv[0].as_int;
7624 : }
7625 :
7626 0 : if (this_mode == NEW_NEARESTMV) {
7627 0 : cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
7628 :
7629 0 : lower_mv_precision(&cur_mv[1].as_mv, cm->allow_high_precision_mv);
7630 0 : clamp_mv2(&cur_mv[1].as_mv, xd);
7631 0 : if (mv_check_bounds(&x->mv_limits, &cur_mv[1].as_mv)) return INT64_MAX;
7632 0 : mbmi->mv[1].as_int = cur_mv[1].as_int;
7633 : }
7634 : }
7635 :
7636 0 : if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
7637 0 : int ref_mv_idx = mbmi->ref_mv_idx + 1;
7638 0 : if (this_mode == NEAR_NEWMV || this_mode == NEAR_NEARMV) {
7639 0 : cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
7640 :
7641 0 : lower_mv_precision(&cur_mv[0].as_mv, cm->allow_high_precision_mv);
7642 0 : clamp_mv2(&cur_mv[0].as_mv, xd);
7643 0 : if (mv_check_bounds(&x->mv_limits, &cur_mv[0].as_mv)) return INT64_MAX;
7644 0 : mbmi->mv[0].as_int = cur_mv[0].as_int;
7645 : }
7646 :
7647 0 : if (this_mode == NEW_NEARMV || this_mode == NEAR_NEARMV) {
7648 0 : cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
7649 :
7650 0 : lower_mv_precision(&cur_mv[1].as_mv, cm->allow_high_precision_mv);
7651 0 : clamp_mv2(&cur_mv[1].as_mv, xd);
7652 0 : if (mv_check_bounds(&x->mv_limits, &cur_mv[1].as_mv)) return INT64_MAX;
7653 0 : mbmi->mv[1].as_int = cur_mv[1].as_int;
7654 : }
7655 : }
7656 : #else
7657 : if (this_mode == NEARMV && is_comp_pred) {
7658 : uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
7659 : if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
7660 : int ref_mv_idx = mbmi->ref_mv_idx + 1;
7661 : cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
7662 : cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
7663 :
7664 : for (i = 0; i < 2; ++i) {
7665 : clamp_mv2(&cur_mv[i].as_mv, xd);
7666 : if (mv_check_bounds(&x->mv_limits, &cur_mv[i].as_mv)) return INT64_MAX;
7667 : mbmi->mv[i].as_int = cur_mv[i].as_int;
7668 : }
7669 : }
7670 : }
7671 : #endif // CONFIG_EXT_INTER
7672 :
7673 : // do first prediction into the destination buffer. Do the next
7674 : // prediction into a temporary buffer. Then keep track of which one
7675 : // of these currently holds the best predictor, and use the other
7676 : // one for future predictions. In the end, copy from tmp_buf to
7677 : // dst if necessary.
7678 0 : for (i = 0; i < MAX_MB_PLANE; i++) {
7679 0 : tmp_dst.plane[i] = tmp_buf + i * MAX_SB_SQUARE;
7680 0 : tmp_dst.stride[i] = MAX_SB_SIZE;
7681 : }
7682 0 : for (i = 0; i < MAX_MB_PLANE; i++) {
7683 0 : orig_dst.plane[i] = xd->plane[i].dst.buf;
7684 0 : orig_dst.stride[i] = xd->plane[i].dst.stride;
7685 : }
7686 :
7687 : // We don't include the cost of the second reference here, because there
7688 : // are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
7689 : // words if you present them in that order, the second one is always known
7690 : // if the first is known.
7691 : //
7692 : // Under some circumstances we discount the cost of new mv mode to encourage
7693 : // initiation of a motion field.
7694 0 : if (discount_newmv_test(cpi, this_mode, frame_mv[refs[0]], mode_mv,
7695 : refs[0])) {
7696 : #if CONFIG_EXT_INTER
7697 0 : rd_stats->rate +=
7698 0 : AOMMIN(cost_mv_ref(cpi, this_mode, mode_ctx),
7699 : cost_mv_ref(cpi, is_comp_pred ? NEAREST_NEARESTMV : NEARESTMV,
7700 : mode_ctx));
7701 : #else
7702 : rd_stats->rate += AOMMIN(cost_mv_ref(cpi, this_mode, mode_ctx),
7703 : cost_mv_ref(cpi, NEARESTMV, mode_ctx));
7704 : #endif // CONFIG_EXT_INTER
7705 : } else {
7706 0 : rd_stats->rate += cost_mv_ref(cpi, this_mode, mode_ctx);
7707 : }
7708 :
7709 0 : if (RDCOST(x->rdmult, x->rddiv, rd_stats->rate, 0) > ref_best_rd &&
7710 : #if CONFIG_EXT_INTER
7711 0 : mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV
7712 : #else
7713 : mbmi->mode != NEARESTMV
7714 : #endif // CONFIG_EXT_INTER
7715 : )
7716 0 : return INT64_MAX;
7717 :
7718 0 : int64_t ret_val = interpolation_filter_search(
7719 0 : x, cpi, bsize, mi_row, mi_col, &tmp_dst, &orig_dst, args->single_filter,
7720 : &rd, &rs, &skip_txfm_sb, &skip_sse_sb);
7721 0 : if (ret_val != 0) return ret_val;
7722 :
7723 : #if CONFIG_EXT_INTER
7724 : #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7725 0 : best_bmc_mbmi = *mbmi;
7726 0 : rate2_bmc_nocoeff = rd_stats->rate;
7727 0 : if (cm->interp_filter == SWITCHABLE) rate2_bmc_nocoeff += rs;
7728 : #if CONFIG_MOTION_VAR
7729 0 : rate_mv_bmc = rate_mv;
7730 : #endif // CONFIG_MOTION_VAR
7731 : #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
7732 :
7733 : #if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
7734 0 : if (is_comp_pred) {
7735 : int rate_sum, rs2;
7736 : int64_t dist_sum;
7737 0 : int64_t best_rd_compound = INT64_MAX, best_rd_cur = INT64_MAX;
7738 : INTERINTER_COMPOUND_DATA best_compound_data;
7739 : int_mv best_mv[2];
7740 0 : int best_tmp_rate_mv = rate_mv;
7741 : int tmp_skip_txfm_sb;
7742 : int64_t tmp_skip_sse_sb;
7743 : int compound_type_cost[COMPOUND_TYPES];
7744 : uint8_t pred0[2 * MAX_SB_SQUARE];
7745 : uint8_t pred1[2 * MAX_SB_SQUARE];
7746 0 : uint8_t *preds0[1] = { pred0 };
7747 0 : uint8_t *preds1[1] = { pred1 };
7748 0 : int strides[1] = { bw };
7749 : int tmp_rate_mv;
7750 0 : int masked_compound_used = is_any_masked_compound_used(bsize);
7751 : #if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
7752 0 : masked_compound_used = masked_compound_used && cm->allow_masked_compound;
7753 : #endif // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
7754 : COMPOUND_TYPE cur_type;
7755 :
7756 0 : best_mv[0].as_int = cur_mv[0].as_int;
7757 0 : best_mv[1].as_int = cur_mv[1].as_int;
7758 0 : memset(&best_compound_data, 0, sizeof(best_compound_data));
7759 : #if CONFIG_COMPOUND_SEGMENT
7760 : uint8_t tmp_mask_buf[2 * MAX_SB_SQUARE];
7761 0 : best_compound_data.seg_mask = tmp_mask_buf;
7762 : #endif // CONFIG_COMPOUND_SEGMENT
7763 :
7764 0 : if (masked_compound_used) {
7765 0 : av1_cost_tokens(compound_type_cost, cm->fc->compound_type_prob[bsize],
7766 : av1_compound_type_tree);
7767 : // get inter predictors to use for masked compound modes
7768 0 : av1_build_inter_predictors_for_planes_single_buf(
7769 : xd, bsize, 0, 0, mi_row, mi_col, 0, preds0, strides);
7770 0 : av1_build_inter_predictors_for_planes_single_buf(
7771 : xd, bsize, 0, 0, mi_row, mi_col, 1, preds1, strides);
7772 : }
7773 :
7774 0 : for (cur_type = COMPOUND_AVERAGE; cur_type < COMPOUND_TYPES; cur_type++) {
7775 0 : if (cur_type != COMPOUND_AVERAGE && !masked_compound_used) break;
7776 0 : if (!is_interinter_compound_used(cur_type, bsize)) break;
7777 0 : tmp_rate_mv = rate_mv;
7778 0 : best_rd_cur = INT64_MAX;
7779 0 : mbmi->interinter_compound_type = cur_type;
7780 0 : rs2 = av1_cost_literal(get_interinter_compound_type_bits(
7781 : bsize, mbmi->interinter_compound_type)) +
7782 : (masked_compound_used
7783 0 : ? compound_type_cost[mbmi->interinter_compound_type]
7784 0 : : 0);
7785 :
7786 0 : switch (cur_type) {
7787 : case COMPOUND_AVERAGE:
7788 0 : av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, &orig_dst,
7789 : bsize);
7790 0 : av1_subtract_plane(x, bsize, 0);
7791 0 : rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
7792 : &tmp_skip_txfm_sb, &tmp_skip_sse_sb,
7793 : INT64_MAX);
7794 0 : if (rd != INT64_MAX)
7795 0 : best_rd_cur =
7796 0 : RDCOST(x->rdmult, x->rddiv, rs2 + rate_mv + rate_sum, dist_sum);
7797 0 : best_rd_compound = best_rd_cur;
7798 0 : break;
7799 : #if CONFIG_WEDGE
7800 : case COMPOUND_WEDGE:
7801 0 : if (x->source_variance > cpi->sf.disable_wedge_search_var_thresh &&
7802 0 : best_rd_compound / 3 < ref_best_rd) {
7803 0 : best_rd_cur = build_and_cost_compound_type(
7804 : cpi, x, cur_mv, bsize, this_mode, rs2, rate_mv, &orig_dst,
7805 : &tmp_rate_mv, preds0, preds1, strides, mi_row, mi_col);
7806 : }
7807 0 : break;
7808 : #endif // CONFIG_WEDGE
7809 : #if CONFIG_COMPOUND_SEGMENT
7810 : case COMPOUND_SEG:
7811 0 : if (x->source_variance > cpi->sf.disable_wedge_search_var_thresh &&
7812 0 : best_rd_compound / 3 < ref_best_rd) {
7813 0 : best_rd_cur = build_and_cost_compound_type(
7814 : cpi, x, cur_mv, bsize, this_mode, rs2, rate_mv, &orig_dst,
7815 : &tmp_rate_mv, preds0, preds1, strides, mi_row, mi_col);
7816 : }
7817 0 : break;
7818 : #endif // CONFIG_COMPOUND_SEGMENT
7819 0 : default: assert(0); return 0;
7820 : }
7821 :
7822 0 : if (best_rd_cur < best_rd_compound) {
7823 0 : best_rd_compound = best_rd_cur;
7824 : #if CONFIG_WEDGE
7825 0 : best_compound_data.wedge_index = mbmi->wedge_index;
7826 0 : best_compound_data.wedge_sign = mbmi->wedge_sign;
7827 : #endif // CONFIG_WEDGE
7828 : #if CONFIG_COMPOUND_SEGMENT
7829 0 : best_compound_data.mask_type = mbmi->mask_type;
7830 0 : memcpy(best_compound_data.seg_mask, xd->seg_mask,
7831 : 2 * MAX_SB_SQUARE * sizeof(uint8_t));
7832 : #endif // CONFIG_COMPOUND_SEGMENT
7833 0 : best_compound_data.interinter_compound_type =
7834 0 : mbmi->interinter_compound_type;
7835 0 : if (have_newmv_in_inter_mode(this_mode)) {
7836 0 : if (use_masked_motion_search(cur_type)) {
7837 0 : best_tmp_rate_mv = tmp_rate_mv;
7838 0 : best_mv[0].as_int = mbmi->mv[0].as_int;
7839 0 : best_mv[1].as_int = mbmi->mv[1].as_int;
7840 : } else {
7841 0 : best_mv[0].as_int = cur_mv[0].as_int;
7842 0 : best_mv[1].as_int = cur_mv[1].as_int;
7843 : }
7844 : }
7845 : }
7846 : // reset to original mvs for next iteration
7847 0 : mbmi->mv[0].as_int = cur_mv[0].as_int;
7848 0 : mbmi->mv[1].as_int = cur_mv[1].as_int;
7849 : }
7850 : #if CONFIG_WEDGE
7851 0 : mbmi->wedge_index = best_compound_data.wedge_index;
7852 0 : mbmi->wedge_sign = best_compound_data.wedge_sign;
7853 : #endif // CONFIG_WEDGE
7854 : #if CONFIG_COMPOUND_SEGMENT
7855 0 : mbmi->mask_type = best_compound_data.mask_type;
7856 0 : memcpy(xd->seg_mask, best_compound_data.seg_mask,
7857 : 2 * MAX_SB_SQUARE * sizeof(uint8_t));
7858 : #endif // CONFIG_COMPOUND_SEGMENT
7859 0 : mbmi->interinter_compound_type =
7860 0 : best_compound_data.interinter_compound_type;
7861 0 : if (have_newmv_in_inter_mode(this_mode)) {
7862 0 : mbmi->mv[0].as_int = best_mv[0].as_int;
7863 0 : mbmi->mv[1].as_int = best_mv[1].as_int;
7864 0 : xd->mi[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int;
7865 0 : xd->mi[0]->bmi[0].as_mv[1].as_int = mbmi->mv[1].as_int;
7866 0 : if (use_masked_motion_search(mbmi->interinter_compound_type)) {
7867 0 : rd_stats->rate += best_tmp_rate_mv - rate_mv;
7868 0 : rate_mv = best_tmp_rate_mv;
7869 : }
7870 : }
7871 :
7872 0 : if (ref_best_rd < INT64_MAX && best_rd_compound / 3 > ref_best_rd) {
7873 0 : restore_dst_buf(xd, orig_dst);
7874 0 : return INT64_MAX;
7875 : }
7876 :
7877 0 : pred_exists = 0;
7878 :
7879 0 : compmode_interinter_cost =
7880 0 : av1_cost_literal(get_interinter_compound_type_bits(
7881 : bsize, mbmi->interinter_compound_type)) +
7882 : (masked_compound_used
7883 0 : ? compound_type_cost[mbmi->interinter_compound_type]
7884 0 : : 0);
7885 : }
7886 : #endif // CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
7887 :
7888 : #if CONFIG_INTERINTRA
7889 0 : if (is_comp_interintra_pred) {
7890 0 : INTERINTRA_MODE best_interintra_mode = II_DC_PRED;
7891 0 : int64_t best_interintra_rd = INT64_MAX;
7892 : int rmode, rate_sum;
7893 : int64_t dist_sum;
7894 : int j;
7895 0 : int tmp_rate_mv = 0;
7896 : int tmp_skip_txfm_sb;
7897 : int64_t tmp_skip_sse_sb;
7898 : DECLARE_ALIGNED(16, uint8_t, intrapred_[2 * MAX_SB_SQUARE]);
7899 : uint8_t *intrapred;
7900 :
7901 : #if CONFIG_HIGHBITDEPTH
7902 0 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
7903 0 : intrapred = CONVERT_TO_BYTEPTR(intrapred_);
7904 : else
7905 : #endif // CONFIG_HIGHBITDEPTH
7906 0 : intrapred = intrapred_;
7907 :
7908 0 : mbmi->ref_frame[1] = NONE_FRAME;
7909 0 : for (j = 0; j < MAX_MB_PLANE; j++) {
7910 0 : xd->plane[j].dst.buf = tmp_buf + j * MAX_SB_SQUARE;
7911 0 : xd->plane[j].dst.stride = bw;
7912 : }
7913 0 : av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, &orig_dst, bsize);
7914 0 : restore_dst_buf(xd, orig_dst);
7915 0 : mbmi->ref_frame[1] = INTRA_FRAME;
7916 0 : mbmi->use_wedge_interintra = 0;
7917 :
7918 0 : for (j = 0; j < INTERINTRA_MODES; ++j) {
7919 0 : mbmi->interintra_mode = (INTERINTRA_MODE)j;
7920 0 : rmode = interintra_mode_cost[mbmi->interintra_mode];
7921 0 : av1_build_intra_predictors_for_interintra(xd, bsize, 0, &orig_dst,
7922 : intrapred, bw);
7923 0 : av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw);
7924 0 : model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
7925 : &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
7926 0 : rd =
7927 0 : RDCOST(x->rdmult, x->rddiv, tmp_rate_mv + rate_sum + rmode, dist_sum);
7928 0 : if (rd < best_interintra_rd) {
7929 0 : best_interintra_rd = rd;
7930 0 : best_interintra_mode = mbmi->interintra_mode;
7931 : }
7932 : }
7933 0 : mbmi->interintra_mode = best_interintra_mode;
7934 0 : rmode = interintra_mode_cost[mbmi->interintra_mode];
7935 0 : av1_build_intra_predictors_for_interintra(xd, bsize, 0, &orig_dst,
7936 : intrapred, bw);
7937 0 : av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw);
7938 0 : av1_subtract_plane(x, bsize, 0);
7939 0 : rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
7940 : &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
7941 0 : if (rd != INT64_MAX)
7942 0 : rd = RDCOST(x->rdmult, x->rddiv, rate_mv + rmode + rate_sum, dist_sum);
7943 0 : best_interintra_rd = rd;
7944 :
7945 0 : if (ref_best_rd < INT64_MAX && best_interintra_rd > 2 * ref_best_rd) {
7946 : // Don't need to call restore_dst_buf here
7947 0 : return INT64_MAX;
7948 : }
7949 : #if CONFIG_WEDGE
7950 0 : if (is_interintra_wedge_used(bsize)) {
7951 0 : int64_t best_interintra_rd_nowedge = INT64_MAX;
7952 0 : int64_t best_interintra_rd_wedge = INT64_MAX;
7953 : int_mv tmp_mv;
7954 0 : int rwedge = av1_cost_bit(cm->fc->wedge_interintra_prob[bsize], 0);
7955 0 : if (rd != INT64_MAX)
7956 0 : rd = RDCOST(x->rdmult, x->rddiv, rmode + rate_mv + rwedge + rate_sum,
7957 : dist_sum);
7958 0 : best_interintra_rd_nowedge = best_interintra_rd;
7959 :
7960 : // Disable wedge search if source variance is small
7961 0 : if (x->source_variance > cpi->sf.disable_wedge_search_var_thresh) {
7962 0 : mbmi->use_wedge_interintra = 1;
7963 :
7964 0 : rwedge = av1_cost_literal(get_interintra_wedge_bits(bsize)) +
7965 0 : av1_cost_bit(cm->fc->wedge_interintra_prob[bsize], 1);
7966 :
7967 0 : best_interintra_rd_wedge =
7968 0 : pick_interintra_wedge(cpi, x, bsize, intrapred_, tmp_buf_);
7969 :
7970 0 : best_interintra_rd_wedge +=
7971 0 : RDCOST(x->rdmult, x->rddiv, rmode + rate_mv + rwedge, 0);
7972 : // Refine motion vector.
7973 0 : if (have_newmv_in_inter_mode(this_mode)) {
7974 : // get negative of mask
7975 0 : const uint8_t *mask = av1_get_contiguous_soft_mask(
7976 : mbmi->interintra_wedge_index, 1, bsize);
7977 0 : tmp_mv.as_int = x->mbmi_ext->ref_mvs[refs[0]][0].as_int;
7978 0 : compound_single_motion_search(cpi, x, bsize, &tmp_mv.as_mv, mi_row,
7979 : mi_col, intrapred, mask, bw,
7980 : &tmp_rate_mv, 0, 0);
7981 0 : mbmi->mv[0].as_int = tmp_mv.as_int;
7982 0 : av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, &orig_dst,
7983 : bsize);
7984 0 : model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
7985 : &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
7986 0 : rd = RDCOST(x->rdmult, x->rddiv,
7987 : rmode + tmp_rate_mv + rwedge + rate_sum, dist_sum);
7988 0 : if (rd >= best_interintra_rd_wedge) {
7989 0 : tmp_mv.as_int = cur_mv[0].as_int;
7990 0 : tmp_rate_mv = rate_mv;
7991 : }
7992 : } else {
7993 0 : tmp_mv.as_int = cur_mv[0].as_int;
7994 0 : tmp_rate_mv = rate_mv;
7995 0 : av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw);
7996 : }
7997 : // Evaluate closer to true rd
7998 0 : av1_subtract_plane(x, bsize, 0);
7999 0 : rd =
8000 0 : estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
8001 : &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
8002 0 : if (rd != INT64_MAX)
8003 0 : rd = RDCOST(x->rdmult, x->rddiv,
8004 : rmode + tmp_rate_mv + rwedge + rate_sum, dist_sum);
8005 0 : best_interintra_rd_wedge = rd;
8006 0 : if (best_interintra_rd_wedge < best_interintra_rd_nowedge) {
8007 0 : mbmi->use_wedge_interintra = 1;
8008 0 : mbmi->mv[0].as_int = tmp_mv.as_int;
8009 0 : rd_stats->rate += tmp_rate_mv - rate_mv;
8010 0 : rate_mv = tmp_rate_mv;
8011 : } else {
8012 0 : mbmi->use_wedge_interintra = 0;
8013 0 : mbmi->mv[0].as_int = cur_mv[0].as_int;
8014 : }
8015 : } else {
8016 0 : mbmi->use_wedge_interintra = 0;
8017 : }
8018 : }
8019 : #endif // CONFIG_WEDGE
8020 :
8021 0 : pred_exists = 0;
8022 0 : compmode_interintra_cost =
8023 0 : av1_cost_bit(cm->fc->interintra_prob[size_group_lookup[bsize]], 1) +
8024 0 : interintra_mode_cost[mbmi->interintra_mode];
8025 0 : if (is_interintra_wedge_used(bsize)) {
8026 0 : compmode_interintra_cost += av1_cost_bit(
8027 : cm->fc->wedge_interintra_prob[bsize], mbmi->use_wedge_interintra);
8028 0 : if (mbmi->use_wedge_interintra) {
8029 0 : compmode_interintra_cost +=
8030 0 : av1_cost_literal(get_interintra_wedge_bits(bsize));
8031 : }
8032 : }
8033 0 : } else if (is_interintra_allowed(mbmi)) {
8034 0 : compmode_interintra_cost =
8035 0 : av1_cost_bit(cm->fc->interintra_prob[size_group_lookup[bsize]], 0);
8036 : }
8037 : #endif // CONFIG_INTERINTRA
8038 :
8039 0 : if (pred_exists == 0) {
8040 : int tmp_rate;
8041 : int64_t tmp_dist;
8042 0 : av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, &orig_dst, bsize);
8043 0 : model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
8044 : &tmp_dist, &skip_txfm_sb, &skip_sse_sb);
8045 0 : rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
8046 : }
8047 : #endif // CONFIG_EXT_INTER
8048 :
8049 0 : if (!is_comp_pred)
8050 : #if CONFIG_DUAL_FILTER
8051 0 : args->single_filter[this_mode][refs[0]] = mbmi->interp_filter[0];
8052 : #else
8053 : args->single_filter[this_mode][refs[0]] = mbmi->interp_filter;
8054 : #endif // CONFIG_DUAL_FILTER
8055 :
8056 : #if CONFIG_EXT_INTER
8057 0 : if (args->modelled_rd != NULL) {
8058 0 : if (is_comp_pred) {
8059 0 : const int mode0 = compound_ref0_mode(this_mode);
8060 0 : const int mode1 = compound_ref1_mode(this_mode);
8061 0 : const int64_t mrd = AOMMIN(args->modelled_rd[mode0][refs[0]],
8062 : args->modelled_rd[mode1][refs[1]]);
8063 0 : if (rd / 4 * 3 > mrd && ref_best_rd < INT64_MAX) {
8064 0 : restore_dst_buf(xd, orig_dst);
8065 0 : return INT64_MAX;
8066 : }
8067 0 : } else if (!is_comp_interintra_pred) {
8068 0 : args->modelled_rd[this_mode][refs[0]] = rd;
8069 : }
8070 : }
8071 : #endif // CONFIG_EXT_INTER
8072 :
8073 0 : if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
8074 : // if current pred_error modeled rd is substantially more than the best
8075 : // so far, do not bother doing full rd
8076 0 : if (rd / 2 > ref_best_rd) {
8077 0 : restore_dst_buf(xd, orig_dst);
8078 0 : return INT64_MAX;
8079 : }
8080 : }
8081 :
8082 : #if CONFIG_EXT_INTER
8083 : #if CONFIG_INTERINTRA
8084 0 : rd_stats->rate += compmode_interintra_cost;
8085 : #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
8086 0 : rate2_bmc_nocoeff += compmode_interintra_cost;
8087 : #endif
8088 : #endif
8089 : #if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
8090 0 : rd_stats->rate += compmode_interinter_cost;
8091 : #endif
8092 : #endif
8093 :
8094 0 : ret_val = motion_mode_rd(cpi, x, bsize, rd_stats, rd_stats_y, rd_stats_uv,
8095 : disable_skip, mode_mv, mi_row, mi_col, args,
8096 : ref_best_rd, refs, rate_mv,
8097 : #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
8098 : single_newmv,
8099 : #if CONFIG_EXT_INTER
8100 : rate2_bmc_nocoeff, &best_bmc_mbmi,
8101 : #if CONFIG_MOTION_VAR
8102 : rate_mv_bmc,
8103 : #endif // CONFIG_MOTION_VAR
8104 : #endif // CONFIG_EXT_INTER
8105 : #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
8106 : rs, &skip_txfm_sb, &skip_sse_sb, &orig_dst);
8107 0 : if (ret_val != 0) return ret_val;
8108 :
8109 0 : return 0; // The rate-distortion cost will be re-calculated by caller.
8110 : }
8111 :
8112 : #if CONFIG_INTRABC
8113 : static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
8114 : RD_STATS *rd_cost, BLOCK_SIZE bsize,
8115 : int64_t best_rd) {
8116 : const AV1_COMMON *const cm = &cpi->common;
8117 : if (bsize < BLOCK_8X8 || !cm->allow_screen_content_tools) return INT64_MAX;
8118 :
8119 : MACROBLOCKD *const xd = &x->e_mbd;
8120 : const TileInfo *tile = &xd->tile;
8121 : #if CONFIG_EC_ADAPT
8122 : FRAME_CONTEXT *const ec_ctx = xd->tile_ctx;
8123 : #else
8124 : FRAME_CONTEXT *const ec_ctx = cm->fc;
8125 : #endif // CONFIG_EC_ADAPT
8126 : MODE_INFO *const mi = xd->mi[0];
8127 : const int mi_row = -xd->mb_to_top_edge / (8 * MI_SIZE);
8128 : const int mi_col = -xd->mb_to_left_edge / (8 * MI_SIZE);
8129 : const int w = block_size_wide[bsize];
8130 : const int h = block_size_high[bsize];
8131 : const int sb_row = mi_row / MAX_MIB_SIZE;
8132 : const int sb_col = mi_col / MAX_MIB_SIZE;
8133 :
8134 : MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
8135 : MV_REFERENCE_FRAME ref_frame = INTRA_FRAME;
8136 : int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
8137 : av1_find_mv_refs(cm, xd, mi, ref_frame, &mbmi_ext->ref_mv_count[ref_frame],
8138 : mbmi_ext->ref_mv_stack[ref_frame],
8139 : #if CONFIG_EXT_INTER
8140 : mbmi_ext->compound_mode_context,
8141 : #endif // CONFIG_EXT_INTER
8142 : candidates, mi_row, mi_col, NULL, NULL,
8143 : mbmi_ext->mode_context);
8144 :
8145 : int_mv nearestmv, nearmv;
8146 : av1_find_best_ref_mvs(0, candidates, &nearestmv, &nearmv);
8147 :
8148 : int_mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv;
8149 : if (dv_ref.as_int == 0) av1_find_ref_dv(&dv_ref, mi_row, mi_col);
8150 : mbmi_ext->ref_mvs[INTRA_FRAME][0] = dv_ref;
8151 :
8152 : struct buf_2d yv12_mb[MAX_MB_PLANE];
8153 : av1_setup_pred_block(xd, yv12_mb, xd->cur_buf, mi_row, mi_col, NULL, NULL);
8154 : for (int i = 0; i < MAX_MB_PLANE; ++i) {
8155 : xd->plane[i].pre[0] = yv12_mb[i];
8156 : }
8157 :
8158 : enum IntrabcMotionDirection {
8159 : IBC_MOTION_ABOVE,
8160 : IBC_MOTION_LEFT,
8161 : IBC_MOTION_DIRECTIONS
8162 : };
8163 :
8164 : MB_MODE_INFO *mbmi = &mi->mbmi;
8165 : MB_MODE_INFO best_mbmi = *mbmi;
8166 : RD_STATS best_rdcost = *rd_cost;
8167 : int best_skip = x->skip;
8168 :
8169 : for (enum IntrabcMotionDirection dir = IBC_MOTION_ABOVE;
8170 : dir < IBC_MOTION_DIRECTIONS; ++dir) {
8171 : const MvLimits tmp_mv_limits = x->mv_limits;
8172 : switch (dir) {
8173 : case IBC_MOTION_ABOVE:
8174 : x->mv_limits.col_min = (tile->mi_col_start - mi_col) * MI_SIZE;
8175 : x->mv_limits.col_max = (tile->mi_col_end - mi_col) * MI_SIZE - w;
8176 : x->mv_limits.row_min = (tile->mi_row_start - mi_row) * MI_SIZE;
8177 : x->mv_limits.row_max = (sb_row * MAX_MIB_SIZE - mi_row) * MI_SIZE - h;
8178 : break;
8179 : case IBC_MOTION_LEFT:
8180 : x->mv_limits.col_min = (tile->mi_col_start - mi_col) * MI_SIZE;
8181 : x->mv_limits.col_max = (sb_col * MAX_MIB_SIZE - mi_col) * MI_SIZE - w;
8182 : // TODO(aconverse@google.com): Minimize the overlap between above and
8183 : // left areas.
8184 : x->mv_limits.row_min = (tile->mi_row_start - mi_row) * MI_SIZE;
8185 : int bottom_coded_mi_edge =
8186 : AOMMIN((sb_row + 1) * MAX_MIB_SIZE, tile->mi_row_end);
8187 : x->mv_limits.row_max = (bottom_coded_mi_edge - mi_row) * MI_SIZE - h;
8188 : break;
8189 : default: assert(0);
8190 : }
8191 : assert(x->mv_limits.col_min >= tmp_mv_limits.col_min);
8192 : assert(x->mv_limits.col_max <= tmp_mv_limits.col_max);
8193 : assert(x->mv_limits.row_min >= tmp_mv_limits.row_min);
8194 : assert(x->mv_limits.row_max <= tmp_mv_limits.row_max);
8195 : av1_set_mv_search_range(&x->mv_limits, &dv_ref.as_mv);
8196 :
8197 : if (x->mv_limits.col_max < x->mv_limits.col_min ||
8198 : x->mv_limits.row_max < x->mv_limits.row_min) {
8199 : x->mv_limits = tmp_mv_limits;
8200 : continue;
8201 : }
8202 :
8203 : int step_param = cpi->mv_step_param;
8204 : MV mvp_full = dv_ref.as_mv;
8205 : mvp_full.col >>= 3;
8206 : mvp_full.row >>= 3;
8207 : int sadpb = x->sadperbit16;
8208 : int cost_list[5];
8209 : int bestsme = av1_full_pixel_search(cpi, x, bsize, &mvp_full, step_param,
8210 : sadpb, cond_cost_list(cpi, cost_list),
8211 : &dv_ref.as_mv, INT_MAX, 1);
8212 :
8213 : x->mv_limits = tmp_mv_limits;
8214 : if (bestsme == INT_MAX) continue;
8215 : mvp_full = x->best_mv.as_mv;
8216 : MV dv = {.row = mvp_full.row * 8, .col = mvp_full.col * 8 };
8217 : if (mv_check_bounds(&x->mv_limits, &dv)) continue;
8218 : if (!is_dv_valid(dv, tile, mi_row, mi_col, bsize)) continue;
8219 :
8220 : #if CONFIG_PALETTE
8221 : memset(&mbmi->palette_mode_info, 0, sizeof(mbmi->palette_mode_info));
8222 : #endif
8223 : mbmi->use_intrabc = 1;
8224 : mbmi->mode = DC_PRED;
8225 : mbmi->uv_mode = DC_PRED;
8226 : mbmi->mv[0].as_mv = dv;
8227 : #if CONFIG_DUAL_FILTER
8228 : for (int idx = 0; idx < 4; ++idx) mbmi->interp_filter[idx] = BILINEAR;
8229 : #else
8230 : mbmi->interp_filter = BILINEAR;
8231 : #endif
8232 : mbmi->skip = 0;
8233 : x->skip = 0;
8234 : av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
8235 :
8236 : int rate_mv = av1_mv_bit_cost(&dv, &dv_ref.as_mv, x->nmvjointcost,
8237 : x->mvcost, MV_COST_WEIGHT);
8238 : const PREDICTION_MODE A = av1_above_block_mode(mi, xd->above_mi, 0);
8239 : const PREDICTION_MODE L = av1_left_block_mode(mi, xd->left_mi, 0);
8240 : const int rate_mode = cpi->y_mode_costs[A][L][DC_PRED] +
8241 : av1_cost_bit(ec_ctx->intrabc_prob, 1);
8242 :
8243 : RD_STATS rd_stats, rd_stats_uv;
8244 : av1_subtract_plane(x, bsize, 0);
8245 : super_block_yrd(cpi, x, &rd_stats, bsize, INT64_MAX);
8246 : super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
8247 : av1_merge_rd_stats(&rd_stats, &rd_stats_uv);
8248 : #if CONFIG_RD_DEBUG
8249 : mbmi->rd_stats = rd_stats;
8250 : #endif
8251 :
8252 : #if CONFIG_VAR_TX
8253 : // TODO(aconverse@google.com): Evaluate allowing VAR TX on intrabc blocks
8254 : const int width = block_size_wide[bsize] >> tx_size_wide_log2[0];
8255 : const int height = block_size_high[bsize] >> tx_size_high_log2[0];
8256 : int idx, idy;
8257 : for (idy = 0; idy < height; ++idy)
8258 : for (idx = 0; idx < width; ++idx)
8259 : mbmi->inter_tx_size[idy >> 1][idx >> 1] = mbmi->tx_size;
8260 : mbmi->min_tx_size = get_min_tx_size(mbmi->tx_size);
8261 : #endif // CONFIG_VAR_TX
8262 :
8263 : const aom_prob skip_prob = av1_get_skip_prob(cm, xd);
8264 :
8265 : RD_STATS rdc_noskip;
8266 : av1_init_rd_stats(&rdc_noskip);
8267 : rdc_noskip.rate =
8268 : rate_mode + rate_mv + rd_stats.rate + av1_cost_bit(skip_prob, 0);
8269 : rdc_noskip.dist = rd_stats.dist;
8270 : rdc_noskip.rdcost =
8271 : RDCOST(x->rdmult, x->rddiv, rdc_noskip.rate, rdc_noskip.dist);
8272 : if (rdc_noskip.rdcost < best_rd) {
8273 : best_rd = rdc_noskip.rdcost;
8274 : best_mbmi = *mbmi;
8275 : best_skip = x->skip;
8276 : best_rdcost = rdc_noskip;
8277 : }
8278 :
8279 : x->skip = 1;
8280 : mbmi->skip = 1;
8281 : RD_STATS rdc_skip;
8282 : av1_init_rd_stats(&rdc_skip);
8283 : rdc_skip.rate = rate_mode + rate_mv + av1_cost_bit(skip_prob, 1);
8284 : rdc_skip.dist = rd_stats.sse;
8285 : rdc_skip.rdcost = RDCOST(x->rdmult, x->rddiv, rdc_skip.rate, rdc_skip.dist);
8286 : if (rdc_skip.rdcost < best_rd) {
8287 : best_rd = rdc_skip.rdcost;
8288 : best_mbmi = *mbmi;
8289 : best_skip = x->skip;
8290 : best_rdcost = rdc_skip;
8291 : }
8292 : }
8293 : *mbmi = best_mbmi;
8294 : *rd_cost = best_rdcost;
8295 : x->skip = best_skip;
8296 : return best_rd;
8297 : }
8298 : #endif // CONFIG_INTRABC
8299 :
8300 0 : void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
8301 : RD_STATS *rd_cost, BLOCK_SIZE bsize,
8302 : PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
8303 0 : const AV1_COMMON *const cm = &cpi->common;
8304 0 : MACROBLOCKD *const xd = &x->e_mbd;
8305 0 : struct macroblockd_plane *const pd = xd->plane;
8306 0 : int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
8307 0 : int y_skip = 0, uv_skip = 0;
8308 0 : int64_t dist_y = 0, dist_uv = 0;
8309 : TX_SIZE max_uv_tx_size;
8310 0 : const int unify_bsize = CONFIG_CB4X4;
8311 :
8312 0 : ctx->skip = 0;
8313 0 : xd->mi[0]->mbmi.ref_frame[0] = INTRA_FRAME;
8314 0 : xd->mi[0]->mbmi.ref_frame[1] = NONE_FRAME;
8315 : #if CONFIG_INTRABC
8316 : xd->mi[0]->mbmi.use_intrabc = 0;
8317 : xd->mi[0]->mbmi.mv[0].as_int = 0;
8318 : #endif // CONFIG_INTRABC
8319 :
8320 0 : const int64_t intra_yrd =
8321 0 : (bsize >= BLOCK_8X8 || unify_bsize)
8322 0 : ? rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y,
8323 : &y_skip, bsize, best_rd)
8324 0 : : rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
8325 : &dist_y, &y_skip, best_rd);
8326 :
8327 0 : if (intra_yrd < best_rd) {
8328 0 : max_uv_tx_size = uv_txsize_lookup[bsize][xd->mi[0]->mbmi.tx_size]
8329 0 : [pd[1].subsampling_x][pd[1].subsampling_y];
8330 0 : init_sbuv_mode(&xd->mi[0]->mbmi);
8331 : #if CONFIG_CB4X4
8332 0 : if (!x->skip_chroma_rd)
8333 0 : rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, &dist_uv,
8334 : &uv_skip, bsize, max_uv_tx_size);
8335 : #else
8336 : rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, &dist_uv,
8337 : &uv_skip, AOMMAX(BLOCK_8X8, bsize), max_uv_tx_size);
8338 : #endif // CONFIG_CB4X4
8339 :
8340 0 : if (y_skip && uv_skip) {
8341 0 : rd_cost->rate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
8342 0 : av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
8343 0 : rd_cost->dist = dist_y + dist_uv;
8344 : } else {
8345 0 : rd_cost->rate =
8346 0 : rate_y + rate_uv + av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
8347 0 : rd_cost->dist = dist_y + dist_uv;
8348 : }
8349 0 : rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
8350 : #if CONFIG_DAALA_DIST && CONFIG_CB4X4
8351 : rd_cost->dist_y = dist_y;
8352 : #endif
8353 : } else {
8354 0 : rd_cost->rate = INT_MAX;
8355 : }
8356 :
8357 : #if CONFIG_INTRABC
8358 : if (rd_cost->rate != INT_MAX && rd_cost->rdcost < best_rd)
8359 : best_rd = rd_cost->rdcost;
8360 : if (rd_pick_intrabc_mode_sb(cpi, x, rd_cost, bsize, best_rd) < best_rd) {
8361 : ctx->skip = x->skip; // FIXME where is the proper place to set this?!
8362 : assert(rd_cost->rate != INT_MAX);
8363 : rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
8364 : }
8365 : #endif
8366 0 : if (rd_cost->rate == INT_MAX) return;
8367 :
8368 0 : ctx->mic = *xd->mi[0];
8369 0 : ctx->mbmi_ext = *x->mbmi_ext;
8370 : }
8371 :
8372 : // Do we have an internal image edge (e.g. formatting bars).
8373 0 : int av1_internal_image_edge(const AV1_COMP *cpi) {
8374 0 : return (cpi->oxcf.pass == 2) &&
8375 0 : ((cpi->twopass.this_frame_stats.inactive_zone_rows > 0) ||
8376 0 : (cpi->twopass.this_frame_stats.inactive_zone_cols > 0));
8377 : }
8378 :
8379 : // Checks to see if a super block is on a horizontal image edge.
8380 : // In most cases this is the "real" edge unless there are formatting
8381 : // bars embedded in the stream.
8382 0 : int av1_active_h_edge(const AV1_COMP *cpi, int mi_row, int mi_step) {
8383 0 : int top_edge = 0;
8384 0 : int bottom_edge = cpi->common.mi_rows;
8385 0 : int is_active_h_edge = 0;
8386 :
8387 : // For two pass account for any formatting bars detected.
8388 0 : if (cpi->oxcf.pass == 2) {
8389 0 : const TWO_PASS *const twopass = &cpi->twopass;
8390 :
8391 : // The inactive region is specified in MBs not mi units.
8392 : // The image edge is in the following MB row.
8393 0 : top_edge += (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
8394 :
8395 0 : bottom_edge -= (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
8396 0 : bottom_edge = AOMMAX(top_edge, bottom_edge);
8397 : }
8398 :
8399 0 : if (((top_edge >= mi_row) && (top_edge < (mi_row + mi_step))) ||
8400 0 : ((bottom_edge >= mi_row) && (bottom_edge < (mi_row + mi_step)))) {
8401 0 : is_active_h_edge = 1;
8402 : }
8403 0 : return is_active_h_edge;
8404 : }
8405 :
8406 : // Checks to see if a super block is on a vertical image edge.
8407 : // In most cases this is the "real" edge unless there are formatting
8408 : // bars embedded in the stream.
8409 0 : int av1_active_v_edge(const AV1_COMP *cpi, int mi_col, int mi_step) {
8410 0 : int left_edge = 0;
8411 0 : int right_edge = cpi->common.mi_cols;
8412 0 : int is_active_v_edge = 0;
8413 :
8414 : // For two pass account for any formatting bars detected.
8415 0 : if (cpi->oxcf.pass == 2) {
8416 0 : const TWO_PASS *const twopass = &cpi->twopass;
8417 :
8418 : // The inactive region is specified in MBs not mi units.
8419 : // The image edge is in the following MB row.
8420 0 : left_edge += (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
8421 :
8422 0 : right_edge -= (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
8423 0 : right_edge = AOMMAX(left_edge, right_edge);
8424 : }
8425 :
8426 0 : if (((left_edge >= mi_col) && (left_edge < (mi_col + mi_step))) ||
8427 0 : ((right_edge >= mi_col) && (right_edge < (mi_col + mi_step)))) {
8428 0 : is_active_v_edge = 1;
8429 : }
8430 0 : return is_active_v_edge;
8431 : }
8432 :
8433 : // Checks to see if a super block is at the edge of the active image.
8434 : // In most cases this is the "real" edge unless there are formatting
8435 : // bars embedded in the stream.
8436 0 : int av1_active_edge_sb(const AV1_COMP *cpi, int mi_row, int mi_col) {
8437 0 : return av1_active_h_edge(cpi, mi_row, cpi->common.mib_size) ||
8438 0 : av1_active_v_edge(cpi, mi_col, cpi->common.mib_size);
8439 : }
8440 :
8441 : #if CONFIG_PALETTE
8442 0 : static void restore_uv_color_map(const AV1_COMP *const cpi, MACROBLOCK *x) {
8443 0 : MACROBLOCKD *const xd = &x->e_mbd;
8444 0 : MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
8445 0 : PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
8446 0 : const BLOCK_SIZE bsize = mbmi->sb_type;
8447 0 : int src_stride = x->plane[1].src.stride;
8448 0 : const uint8_t *const src_u = x->plane[1].src.buf;
8449 0 : const uint8_t *const src_v = x->plane[2].src.buf;
8450 0 : float *const data = x->palette_buffer->kmeans_data_buf;
8451 : float centroids[2 * PALETTE_MAX_SIZE];
8452 0 : uint8_t *const color_map = xd->plane[1].color_index_map;
8453 : int r, c;
8454 : #if CONFIG_HIGHBITDEPTH
8455 0 : const uint16_t *const src_u16 = CONVERT_TO_SHORTPTR(src_u);
8456 0 : const uint16_t *const src_v16 = CONVERT_TO_SHORTPTR(src_v);
8457 : #endif // CONFIG_HIGHBITDEPTH
8458 : int plane_block_width, plane_block_height, rows, cols;
8459 0 : av1_get_block_dimensions(bsize, 1, xd, &plane_block_width,
8460 : &plane_block_height, &rows, &cols);
8461 : (void)cpi;
8462 :
8463 0 : for (r = 0; r < rows; ++r) {
8464 0 : for (c = 0; c < cols; ++c) {
8465 : #if CONFIG_HIGHBITDEPTH
8466 0 : if (cpi->common.use_highbitdepth) {
8467 0 : data[(r * cols + c) * 2] = src_u16[r * src_stride + c];
8468 0 : data[(r * cols + c) * 2 + 1] = src_v16[r * src_stride + c];
8469 : } else {
8470 : #endif // CONFIG_HIGHBITDEPTH
8471 0 : data[(r * cols + c) * 2] = src_u[r * src_stride + c];
8472 0 : data[(r * cols + c) * 2 + 1] = src_v[r * src_stride + c];
8473 : #if CONFIG_HIGHBITDEPTH
8474 : }
8475 : #endif // CONFIG_HIGHBITDEPTH
8476 : }
8477 : }
8478 :
8479 0 : for (r = 1; r < 3; ++r) {
8480 0 : for (c = 0; c < pmi->palette_size[1]; ++c) {
8481 0 : centroids[c * 2 + r - 1] = pmi->palette_colors[r * PALETTE_MAX_SIZE + c];
8482 : }
8483 : }
8484 :
8485 0 : av1_calc_indices(data, centroids, color_map, rows * cols,
8486 0 : pmi->palette_size[1], 2);
8487 0 : extend_palette_color_map(color_map, cols, rows, plane_block_width,
8488 : plane_block_height);
8489 0 : }
8490 : #endif // CONFIG_PALETTE
8491 :
8492 : #if CONFIG_FILTER_INTRA
8493 : static void pick_filter_intra_interframe(
8494 : const AV1_COMP *cpi, MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
8495 : BLOCK_SIZE bsize, int mi_row, int mi_col, int *rate_uv_intra,
8496 : int *rate_uv_tokenonly, int64_t *dist_uv, int *skip_uv,
8497 : PREDICTION_MODE *mode_uv, FILTER_INTRA_MODE_INFO *filter_intra_mode_info_uv,
8498 : #if CONFIG_EXT_INTRA
8499 : int8_t *uv_angle_delta,
8500 : #endif // CONFIG_EXT_INTRA
8501 : #if CONFIG_PALETTE
8502 : PALETTE_MODE_INFO *pmi_uv, int palette_ctx,
8503 : #endif // CONFIG_PALETTE
8504 : int skip_mask, unsigned int *ref_costs_single, int64_t *best_rd,
8505 : int64_t *best_intra_rd, PREDICTION_MODE *best_intra_mode,
8506 : int *best_mode_index, int *best_skip2, int *best_mode_skippable,
8507 : #if CONFIG_SUPERTX
8508 : int *returnrate_nocoef,
8509 : #endif // CONFIG_SUPERTX
8510 : int64_t *best_pred_rd, MB_MODE_INFO *best_mbmode, RD_STATS *rd_cost) {
8511 : const AV1_COMMON *const cm = &cpi->common;
8512 : MACROBLOCKD *const xd = &x->e_mbd;
8513 : MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
8514 : #if CONFIG_PALETTE
8515 : PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
8516 : #endif // CONFIG_PALETTE
8517 : int rate2 = 0, rate_y = INT_MAX, skippable = 0, rate_uv, rate_dummy, i;
8518 : int dc_mode_index;
8519 : const int *const intra_mode_cost = cpi->mbmode_cost[size_group_lookup[bsize]];
8520 : int64_t distortion2 = 0, distortion_y = 0, this_rd = *best_rd;
8521 : int64_t distortion_uv, model_rd = INT64_MAX;
8522 : TX_SIZE uv_tx;
8523 :
8524 : for (i = 0; i < MAX_MODES; ++i)
8525 : if (av1_mode_order[i].mode == DC_PRED &&
8526 : av1_mode_order[i].ref_frame[0] == INTRA_FRAME)
8527 : break;
8528 : dc_mode_index = i;
8529 : assert(i < MAX_MODES);
8530 :
8531 : // TODO(huisu): use skip_mask for further speedup.
8532 : (void)skip_mask;
8533 : mbmi->mode = DC_PRED;
8534 : mbmi->uv_mode = DC_PRED;
8535 : mbmi->ref_frame[0] = INTRA_FRAME;
8536 : mbmi->ref_frame[1] = NONE_FRAME;
8537 : if (!rd_pick_filter_intra_sby(cpi, x, &rate_dummy, &rate_y, &distortion_y,
8538 : &skippable, bsize, intra_mode_cost[mbmi->mode],
8539 : &this_rd, &model_rd, 0)) {
8540 : return;
8541 : }
8542 : if (rate_y == INT_MAX) return;
8543 :
8544 : uv_tx = uv_txsize_lookup[bsize][mbmi->tx_size][xd->plane[1].subsampling_x]
8545 : [xd->plane[1].subsampling_y];
8546 : if (rate_uv_intra[uv_tx] == INT_MAX) {
8547 : choose_intra_uv_mode(cpi, x, ctx, bsize, uv_tx, &rate_uv_intra[uv_tx],
8548 : &rate_uv_tokenonly[uv_tx], &dist_uv[uv_tx],
8549 : &skip_uv[uv_tx], &mode_uv[uv_tx]);
8550 : #if CONFIG_PALETTE
8551 : if (cm->allow_screen_content_tools) pmi_uv[uv_tx] = *pmi;
8552 : #endif // CONFIG_PALETTE
8553 : filter_intra_mode_info_uv[uv_tx] = mbmi->filter_intra_mode_info;
8554 : #if CONFIG_EXT_INTRA
8555 : uv_angle_delta[uv_tx] = mbmi->angle_delta[1];
8556 : #endif // CONFIG_EXT_INTRA
8557 : }
8558 :
8559 : rate_uv = rate_uv_tokenonly[uv_tx];
8560 : distortion_uv = dist_uv[uv_tx];
8561 : skippable = skippable && skip_uv[uv_tx];
8562 : mbmi->uv_mode = mode_uv[uv_tx];
8563 : #if CONFIG_PALETTE
8564 : if (cm->allow_screen_content_tools) {
8565 : pmi->palette_size[1] = pmi_uv[uv_tx].palette_size[1];
8566 : memcpy(pmi->palette_colors + PALETTE_MAX_SIZE,
8567 : pmi_uv[uv_tx].palette_colors + PALETTE_MAX_SIZE,
8568 : 2 * PALETTE_MAX_SIZE * sizeof(pmi->palette_colors[0]));
8569 : }
8570 : #endif // CONFIG_PALETTE
8571 : #if CONFIG_EXT_INTRA
8572 : mbmi->angle_delta[1] = uv_angle_delta[uv_tx];
8573 : #endif // CONFIG_EXT_INTRA
8574 : mbmi->filter_intra_mode_info.use_filter_intra_mode[1] =
8575 : filter_intra_mode_info_uv[uv_tx].use_filter_intra_mode[1];
8576 : if (filter_intra_mode_info_uv[uv_tx].use_filter_intra_mode[1]) {
8577 : mbmi->filter_intra_mode_info.filter_intra_mode[1] =
8578 : filter_intra_mode_info_uv[uv_tx].filter_intra_mode[1];
8579 : }
8580 :
8581 : rate2 = rate_y + intra_mode_cost[mbmi->mode] + rate_uv +
8582 : cpi->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode];
8583 : #if CONFIG_PALETTE
8584 : if (cpi->common.allow_screen_content_tools && mbmi->mode == DC_PRED &&
8585 : bsize >= BLOCK_8X8)
8586 : rate2 += av1_cost_bit(
8587 : av1_default_palette_y_mode_prob[bsize - BLOCK_8X8][palette_ctx], 0);
8588 : #endif // CONFIG_PALETTE
8589 :
8590 : if (!xd->lossless[mbmi->segment_id]) {
8591 : // super_block_yrd above includes the cost of the tx_size in the
8592 : // tokenonly rate, but for intra blocks, tx_size is always coded
8593 : // (prediction granularity), so we account for it in the full rate,
8594 : // not the tokenonly rate.
8595 : rate_y -= tx_size_cost(cpi, x, bsize, mbmi->tx_size);
8596 : }
8597 :
8598 : rate2 += av1_cost_bit(cm->fc->filter_intra_probs[0],
8599 : mbmi->filter_intra_mode_info.use_filter_intra_mode[0]);
8600 : rate2 += write_uniform_cost(
8601 : FILTER_INTRA_MODES, mbmi->filter_intra_mode_info.filter_intra_mode[0]);
8602 : #if CONFIG_EXT_INTRA
8603 : if (av1_is_directional_mode(mbmi->uv_mode, bsize)) {
8604 : rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
8605 : MAX_ANGLE_DELTA + mbmi->angle_delta[1]);
8606 : }
8607 : #endif // CONFIG_EXT_INTRA
8608 : if (mbmi->mode == DC_PRED) {
8609 : rate2 +=
8610 : av1_cost_bit(cpi->common.fc->filter_intra_probs[1],
8611 : mbmi->filter_intra_mode_info.use_filter_intra_mode[1]);
8612 : if (mbmi->filter_intra_mode_info.use_filter_intra_mode[1])
8613 : rate2 +=
8614 : write_uniform_cost(FILTER_INTRA_MODES,
8615 : mbmi->filter_intra_mode_info.filter_intra_mode[1]);
8616 : }
8617 : distortion2 = distortion_y + distortion_uv;
8618 : av1_encode_intra_block_plane((AV1_COMMON *)cm, x, bsize, 0, 0, mi_row,
8619 : mi_col);
8620 :
8621 : rate2 += ref_costs_single[INTRA_FRAME];
8622 :
8623 : if (skippable) {
8624 : rate2 -= (rate_y + rate_uv);
8625 : rate_y = 0;
8626 : rate_uv = 0;
8627 : rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
8628 : } else {
8629 : rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
8630 : }
8631 : this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
8632 :
8633 : if (this_rd < *best_intra_rd) {
8634 : *best_intra_rd = this_rd;
8635 : *best_intra_mode = mbmi->mode;
8636 : }
8637 : for (i = 0; i < REFERENCE_MODES; ++i)
8638 : best_pred_rd[i] = AOMMIN(best_pred_rd[i], this_rd);
8639 :
8640 : if (this_rd < *best_rd) {
8641 : *best_mode_index = dc_mode_index;
8642 : mbmi->mv[0].as_int = 0;
8643 : rd_cost->rate = rate2;
8644 : #if CONFIG_SUPERTX
8645 : if (x->skip)
8646 : *returnrate_nocoef = rate2;
8647 : else
8648 : *returnrate_nocoef = rate2 - rate_y - rate_uv;
8649 : *returnrate_nocoef -= av1_cost_bit(av1_get_skip_prob(cm, xd), skippable);
8650 : *returnrate_nocoef -= av1_cost_bit(av1_get_intra_inter_prob(cm, xd),
8651 : mbmi->ref_frame[0] != INTRA_FRAME);
8652 : #endif // CONFIG_SUPERTX
8653 : rd_cost->dist = distortion2;
8654 : rd_cost->rdcost = this_rd;
8655 : *best_rd = this_rd;
8656 : *best_mbmode = *mbmi;
8657 : *best_skip2 = 0;
8658 : *best_mode_skippable = skippable;
8659 : }
8660 : }
8661 : #endif // CONFIG_FILTER_INTRA
8662 :
8663 : #if CONFIG_MOTION_VAR
8664 : static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x,
8665 : const MACROBLOCKD *xd, int mi_row,
8666 : int mi_col, const uint8_t *above,
8667 : int above_stride, const uint8_t *left,
8668 : int left_stride);
8669 : #endif // CONFIG_MOTION_VAR
8670 :
8671 0 : void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
8672 : MACROBLOCK *x, int mi_row, int mi_col,
8673 : RD_STATS *rd_cost,
8674 : #if CONFIG_SUPERTX
8675 : int *returnrate_nocoef,
8676 : #endif // CONFIG_SUPERTX
8677 : BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
8678 : int64_t best_rd_so_far) {
8679 0 : const AV1_COMMON *const cm = &cpi->common;
8680 0 : const RD_OPT *const rd_opt = &cpi->rd;
8681 0 : const SPEED_FEATURES *const sf = &cpi->sf;
8682 0 : MACROBLOCKD *const xd = &x->e_mbd;
8683 0 : MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
8684 : #if CONFIG_PALETTE
8685 0 : const int try_palette =
8686 0 : cpi->common.allow_screen_content_tools && bsize >= BLOCK_8X8;
8687 0 : PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
8688 : #endif // CONFIG_PALETTE
8689 0 : MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
8690 0 : const struct segmentation *const seg = &cm->seg;
8691 : PREDICTION_MODE this_mode;
8692 : MV_REFERENCE_FRAME ref_frame, second_ref_frame;
8693 0 : unsigned char segment_id = mbmi->segment_id;
8694 : int comp_pred, i, k;
8695 : int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
8696 : struct buf_2d yv12_mb[TOTAL_REFS_PER_FRAME][MAX_MB_PLANE];
8697 0 : int_mv single_newmv[TOTAL_REFS_PER_FRAME] = { { 0 } };
8698 : #if CONFIG_EXT_INTER
8699 0 : int single_newmv_rate[TOTAL_REFS_PER_FRAME] = { 0 };
8700 : int64_t modelled_rd[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
8701 : #endif // CONFIG_EXT_INTER
8702 : static const int flag_list[TOTAL_REFS_PER_FRAME] = {
8703 : 0,
8704 : AOM_LAST_FLAG,
8705 : #if CONFIG_EXT_REFS
8706 : AOM_LAST2_FLAG,
8707 : AOM_LAST3_FLAG,
8708 : #endif // CONFIG_EXT_REFS
8709 : AOM_GOLD_FLAG,
8710 : #if CONFIG_EXT_REFS
8711 : AOM_BWD_FLAG,
8712 : #endif // CONFIG_EXT_REFS
8713 : AOM_ALT_FLAG
8714 : };
8715 0 : int64_t best_rd = best_rd_so_far;
8716 0 : int best_rate_y = INT_MAX, best_rate_uv = INT_MAX;
8717 : int64_t best_pred_diff[REFERENCE_MODES];
8718 : int64_t best_pred_rd[REFERENCE_MODES];
8719 : MB_MODE_INFO best_mbmode;
8720 0 : int rate_skip0 = av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
8721 0 : int rate_skip1 = av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
8722 0 : int best_mode_skippable = 0;
8723 0 : int midx, best_mode_index = -1;
8724 : unsigned int ref_costs_single[TOTAL_REFS_PER_FRAME];
8725 : unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME];
8726 : aom_prob comp_mode_p;
8727 0 : int64_t best_intra_rd = INT64_MAX;
8728 0 : unsigned int best_pred_sse = UINT_MAX;
8729 0 : PREDICTION_MODE best_intra_mode = DC_PRED;
8730 : int rate_uv_intra[TX_SIZES_ALL], rate_uv_tokenonly[TX_SIZES_ALL];
8731 : int64_t dist_uvs[TX_SIZES_ALL];
8732 : int skip_uvs[TX_SIZES_ALL];
8733 : PREDICTION_MODE mode_uv[TX_SIZES_ALL];
8734 : #if CONFIG_PALETTE
8735 : PALETTE_MODE_INFO pmi_uv[TX_SIZES_ALL];
8736 : #endif // CONFIG_PALETTE
8737 : #if CONFIG_EXT_INTRA
8738 : int8_t uv_angle_delta[TX_SIZES_ALL];
8739 0 : int is_directional_mode, angle_stats_ready = 0;
8740 : uint8_t directional_mode_skip_mask[INTRA_MODES];
8741 : #endif // CONFIG_EXT_INTRA
8742 : #if CONFIG_FILTER_INTRA
8743 : int8_t dc_skipped = 1;
8744 : FILTER_INTRA_MODE_INFO filter_intra_mode_info_uv[TX_SIZES_ALL];
8745 : #endif // CONFIG_FILTER_INTRA
8746 0 : const int intra_cost_penalty = av1_get_intra_cost_penalty(
8747 : cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
8748 0 : const int *const intra_mode_cost = cpi->mbmode_cost[size_group_lookup[bsize]];
8749 0 : int best_skip2 = 0;
8750 0 : uint8_t ref_frame_skip_mask[2] = { 0 };
8751 0 : uint32_t mode_skip_mask[TOTAL_REFS_PER_FRAME] = { 0 };
8752 : #if CONFIG_EXT_INTER && CONFIG_INTERINTRA
8753 0 : MV_REFERENCE_FRAME best_single_inter_ref = LAST_FRAME;
8754 0 : int64_t best_single_inter_rd = INT64_MAX;
8755 : #endif // CONFIG_EXT_INTER && CONFIG_INTERINTRA
8756 0 : int mode_skip_start = sf->mode_skip_start + 1;
8757 0 : const int *const rd_threshes = rd_opt->threshes[segment_id][bsize];
8758 0 : const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize];
8759 : int64_t mode_threshold[MAX_MODES];
8760 0 : int *mode_map = tile_data->mode_map[bsize];
8761 0 : const int mode_search_skip_flags = sf->mode_search_skip_flags;
8762 : #if CONFIG_PVQ
8763 : od_rollback_buffer pre_buf;
8764 : #endif // CONFIG_PVQ
8765 :
8766 0 : HandleInterModeArgs args = {
8767 : #if CONFIG_MOTION_VAR
8768 : { NULL },
8769 : { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE },
8770 : { NULL },
8771 : { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE },
8772 : #endif // CONFIG_MOTION_VAR
8773 : #if CONFIG_EXT_INTER
8774 : NULL,
8775 : NULL,
8776 : NULL,
8777 : #else // CONFIG_EXT_INTER
8778 : NULL,
8779 : #endif // CONFIG_EXT_INTER
8780 : { { 0 } },
8781 : };
8782 :
8783 : #if CONFIG_PALETTE || CONFIG_EXT_INTRA
8784 0 : const int rows = block_size_high[bsize];
8785 0 : const int cols = block_size_wide[bsize];
8786 : #endif // CONFIG_PALETTE || CONFIG_EXT_INTRA
8787 : #if CONFIG_PALETTE
8788 0 : int palette_ctx = 0;
8789 0 : const MODE_INFO *above_mi = xd->above_mi;
8790 0 : const MODE_INFO *left_mi = xd->left_mi;
8791 : #endif // CONFIG_PALETTE
8792 : #if CONFIG_MOTION_VAR
8793 0 : int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
8794 0 : int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
8795 0 : int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
8796 0 : int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
8797 :
8798 : #if CONFIG_HIGHBITDEPTH
8799 0 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
8800 0 : int len = sizeof(uint16_t);
8801 0 : args.above_pred_buf[0] = CONVERT_TO_BYTEPTR(x->above_pred_buf);
8802 0 : args.above_pred_buf[1] =
8803 0 : CONVERT_TO_BYTEPTR(x->above_pred_buf + MAX_SB_SQUARE * len);
8804 0 : args.above_pred_buf[2] =
8805 0 : CONVERT_TO_BYTEPTR(x->above_pred_buf + 2 * MAX_SB_SQUARE * len);
8806 0 : args.left_pred_buf[0] = CONVERT_TO_BYTEPTR(x->left_pred_buf);
8807 0 : args.left_pred_buf[1] =
8808 0 : CONVERT_TO_BYTEPTR(x->left_pred_buf + MAX_SB_SQUARE * len);
8809 0 : args.left_pred_buf[2] =
8810 0 : CONVERT_TO_BYTEPTR(x->left_pred_buf + 2 * MAX_SB_SQUARE * len);
8811 : } else {
8812 : #endif // CONFIG_HIGHBITDEPTH
8813 0 : args.above_pred_buf[0] = x->above_pred_buf;
8814 0 : args.above_pred_buf[1] = x->above_pred_buf + MAX_SB_SQUARE;
8815 0 : args.above_pred_buf[2] = x->above_pred_buf + 2 * MAX_SB_SQUARE;
8816 0 : args.left_pred_buf[0] = x->left_pred_buf;
8817 0 : args.left_pred_buf[1] = x->left_pred_buf + MAX_SB_SQUARE;
8818 0 : args.left_pred_buf[2] = x->left_pred_buf + 2 * MAX_SB_SQUARE;
8819 : #if CONFIG_HIGHBITDEPTH
8820 : }
8821 : #endif // CONFIG_HIGHBITDEPTH
8822 : #endif // CONFIG_MOTION_VAR
8823 :
8824 0 : av1_zero(best_mbmode);
8825 :
8826 : #if CONFIG_PALETTE
8827 0 : av1_zero(pmi_uv);
8828 0 : if (try_palette) {
8829 0 : if (above_mi)
8830 0 : palette_ctx += (above_mi->mbmi.palette_mode_info.palette_size[0] > 0);
8831 0 : if (left_mi)
8832 0 : palette_ctx += (left_mi->mbmi.palette_mode_info.palette_size[0] > 0);
8833 : }
8834 : #endif // CONFIG_PALETTE
8835 :
8836 0 : estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
8837 : &comp_mode_p);
8838 :
8839 0 : for (i = 0; i < REFERENCE_MODES; ++i) best_pred_rd[i] = INT64_MAX;
8840 0 : for (i = 0; i < TX_SIZES_ALL; i++) rate_uv_intra[i] = INT_MAX;
8841 0 : for (i = 0; i < TOTAL_REFS_PER_FRAME; ++i) x->pred_sse[i] = INT_MAX;
8842 0 : for (i = 0; i < MB_MODE_COUNT; ++i) {
8843 0 : for (k = 0; k < TOTAL_REFS_PER_FRAME; ++k) {
8844 0 : args.single_filter[i][k] = SWITCHABLE;
8845 : }
8846 : }
8847 :
8848 0 : rd_cost->rate = INT_MAX;
8849 : #if CONFIG_SUPERTX
8850 : *returnrate_nocoef = INT_MAX;
8851 : #endif // CONFIG_SUPERTX
8852 :
8853 0 : for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
8854 0 : x->pred_mv_sad[ref_frame] = INT_MAX;
8855 0 : x->mbmi_ext->mode_context[ref_frame] = 0;
8856 : #if CONFIG_EXT_INTER
8857 0 : x->mbmi_ext->compound_mode_context[ref_frame] = 0;
8858 : #endif // CONFIG_EXT_INTER
8859 0 : if (cpi->ref_frame_flags & flag_list[ref_frame]) {
8860 0 : assert(get_ref_frame_buffer(cpi, ref_frame) != NULL);
8861 0 : setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col,
8862 : frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
8863 : }
8864 0 : frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
8865 : #if CONFIG_GLOBAL_MOTION
8866 0 : frame_mv[ZEROMV][ref_frame].as_int =
8867 0 : gm_get_motion_vector(&cm->global_motion[ref_frame],
8868 : cm->allow_high_precision_mv, bsize, mi_col, mi_row,
8869 : 0)
8870 0 : .as_int;
8871 : #else // CONFIG_GLOBAL_MOTION
8872 : frame_mv[ZEROMV][ref_frame].as_int = 0;
8873 : #endif // CONFIG_GLOBAL_MOTION
8874 : #if CONFIG_EXT_INTER
8875 0 : frame_mv[NEW_NEWMV][ref_frame].as_int = INVALID_MV;
8876 : #if CONFIG_GLOBAL_MOTION
8877 0 : frame_mv[ZERO_ZEROMV][ref_frame].as_int =
8878 0 : gm_get_motion_vector(&cm->global_motion[ref_frame],
8879 : cm->allow_high_precision_mv, bsize, mi_col, mi_row,
8880 : 0)
8881 0 : .as_int;
8882 : #else // CONFIG_GLOBAL_MOTION
8883 : frame_mv[ZERO_ZEROMV][ref_frame].as_int = 0;
8884 : #endif // CONFIG_GLOBAL_MOTION
8885 : #endif // CONFIG_EXT_INTER
8886 : }
8887 :
8888 0 : for (; ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) {
8889 0 : MODE_INFO *const mi = xd->mi[0];
8890 0 : int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
8891 0 : x->mbmi_ext->mode_context[ref_frame] = 0;
8892 0 : av1_find_mv_refs(cm, xd, mi, ref_frame, &mbmi_ext->ref_mv_count[ref_frame],
8893 0 : mbmi_ext->ref_mv_stack[ref_frame],
8894 : #if CONFIG_EXT_INTER
8895 0 : mbmi_ext->compound_mode_context,
8896 : #endif // CONFIG_EXT_INTER
8897 : candidates, mi_row, mi_col, NULL, NULL,
8898 0 : mbmi_ext->mode_context);
8899 0 : if (mbmi_ext->ref_mv_count[ref_frame] < 2) {
8900 : MV_REFERENCE_FRAME rf[2];
8901 0 : av1_set_ref_frame(rf, ref_frame);
8902 0 : if (mbmi_ext->ref_mvs[rf[0]][0].as_int !=
8903 0 : frame_mv[ZEROMV][rf[0]].as_int ||
8904 0 : mbmi_ext->ref_mvs[rf[0]][1].as_int !=
8905 0 : frame_mv[ZEROMV][rf[0]].as_int ||
8906 0 : mbmi_ext->ref_mvs[rf[1]][0].as_int !=
8907 0 : frame_mv[ZEROMV][rf[1]].as_int ||
8908 0 : mbmi_ext->ref_mvs[rf[1]][1].as_int != frame_mv[ZEROMV][rf[1]].as_int)
8909 0 : mbmi_ext->mode_context[ref_frame] &= ~(1 << ALL_ZERO_FLAG_OFFSET);
8910 : }
8911 : }
8912 :
8913 : #if CONFIG_MOTION_VAR
8914 0 : av1_count_overlappable_neighbors(cm, xd, mi_row, mi_col);
8915 :
8916 0 : if (check_num_overlappable_neighbors(mbmi) &&
8917 0 : is_motion_variation_allowed_bsize(bsize)) {
8918 0 : av1_build_prediction_by_above_preds(cm, xd, mi_row, mi_col,
8919 : args.above_pred_buf, dst_width1,
8920 : dst_height1, args.above_pred_stride);
8921 0 : av1_build_prediction_by_left_preds(cm, xd, mi_row, mi_col,
8922 : args.left_pred_buf, dst_width2,
8923 : dst_height2, args.left_pred_stride);
8924 0 : av1_setup_dst_planes(xd->plane, bsize, get_frame_new_buffer(cm), mi_row,
8925 : mi_col);
8926 0 : calc_target_weighted_pred(cm, x, xd, mi_row, mi_col, args.above_pred_buf[0],
8927 0 : args.above_pred_stride[0], args.left_pred_buf[0],
8928 : args.left_pred_stride[0]);
8929 : }
8930 : #endif // CONFIG_MOTION_VAR
8931 :
8932 0 : for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
8933 0 : if (!(cpi->ref_frame_flags & flag_list[ref_frame])) {
8934 : // Skip checking missing references in both single and compound reference
8935 : // modes. Note that a mode will be skipped iff both reference frames
8936 : // are masked out.
8937 : #if CONFIG_EXT_REFS
8938 0 : if (ref_frame == BWDREF_FRAME || ref_frame == ALTREF_FRAME) {
8939 0 : ref_frame_skip_mask[0] |= (1 << ref_frame);
8940 0 : ref_frame_skip_mask[1] |= ((1 << ref_frame) | 0x01);
8941 : } else {
8942 : #endif // CONFIG_EXT_REFS
8943 0 : ref_frame_skip_mask[0] |= (1 << ref_frame);
8944 0 : ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
8945 : #if CONFIG_EXT_REFS
8946 : }
8947 : #endif // CONFIG_EXT_REFS
8948 : } else {
8949 0 : for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
8950 : // Skip fixed mv modes for poor references
8951 0 : if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) {
8952 0 : mode_skip_mask[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
8953 0 : break;
8954 : }
8955 : }
8956 : }
8957 : // If the segment reference frame feature is enabled....
8958 : // then do nothing if the current ref frame is not allowed..
8959 0 : if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
8960 0 : get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
8961 0 : ref_frame_skip_mask[0] |= (1 << ref_frame);
8962 0 : ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
8963 : }
8964 : }
8965 :
8966 : // Disable this drop out case if the ref frame
8967 : // segment level feature is enabled for this segment. This is to
8968 : // prevent the possibility that we end up unable to pick any mode.
8969 0 : if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
8970 : // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
8971 : // unless ARNR filtering is enabled in which case we want
8972 : // an unfiltered alternative. We allow near/nearest as well
8973 : // because they may result in zero-zero MVs but be cheaper.
8974 0 : if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
8975 : int_mv zeromv;
8976 0 : ref_frame_skip_mask[0] = (1 << LAST_FRAME) |
8977 : #if CONFIG_EXT_REFS
8978 : (1 << LAST2_FRAME) | (1 << LAST3_FRAME) |
8979 : (1 << BWDREF_FRAME) |
8980 : #endif // CONFIG_EXT_REFS
8981 : (1 << GOLDEN_FRAME);
8982 0 : ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
8983 : // TODO(zoeliu): To further explore whether following needs to be done for
8984 : // BWDREF_FRAME as well.
8985 0 : mode_skip_mask[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
8986 : #if CONFIG_GLOBAL_MOTION
8987 0 : zeromv.as_int = gm_get_motion_vector(&cm->global_motion[ALTREF_FRAME],
8988 : cm->allow_high_precision_mv, bsize,
8989 : mi_col, mi_row, 0)
8990 0 : .as_int;
8991 : #else
8992 : zeromv.as_int = 0;
8993 : #endif // CONFIG_GLOBAL_MOTION
8994 0 : if (frame_mv[NEARMV][ALTREF_FRAME].as_int != zeromv.as_int)
8995 0 : mode_skip_mask[ALTREF_FRAME] |= (1 << NEARMV);
8996 0 : if (frame_mv[NEARESTMV][ALTREF_FRAME].as_int != zeromv.as_int)
8997 0 : mode_skip_mask[ALTREF_FRAME] |= (1 << NEARESTMV);
8998 : #if CONFIG_EXT_INTER
8999 0 : if (frame_mv[NEAREST_NEARESTMV][ALTREF_FRAME].as_int != zeromv.as_int)
9000 0 : mode_skip_mask[ALTREF_FRAME] |= (1 << NEAREST_NEARESTMV);
9001 0 : if (frame_mv[NEAR_NEARMV][ALTREF_FRAME].as_int != zeromv.as_int)
9002 0 : mode_skip_mask[ALTREF_FRAME] |= (1 << NEAR_NEARMV);
9003 : #endif // CONFIG_EXT_INTER
9004 : }
9005 : }
9006 :
9007 0 : if (cpi->rc.is_src_frame_alt_ref) {
9008 0 : if (sf->alt_ref_search_fp) {
9009 0 : assert(cpi->ref_frame_flags & flag_list[ALTREF_FRAME]);
9010 0 : mode_skip_mask[ALTREF_FRAME] = 0;
9011 0 : ref_frame_skip_mask[0] = ~(1 << ALTREF_FRAME);
9012 0 : ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
9013 : }
9014 : }
9015 :
9016 0 : if (sf->alt_ref_search_fp)
9017 0 : if (!cm->show_frame && x->pred_mv_sad[GOLDEN_FRAME] < INT_MAX)
9018 0 : if (x->pred_mv_sad[ALTREF_FRAME] > (x->pred_mv_sad[GOLDEN_FRAME] << 1))
9019 0 : mode_skip_mask[ALTREF_FRAME] |= INTER_ALL;
9020 :
9021 0 : if (sf->adaptive_mode_search) {
9022 0 : if (cm->show_frame && !cpi->rc.is_src_frame_alt_ref &&
9023 0 : cpi->rc.frames_since_golden >= 3)
9024 0 : if ((x->pred_mv_sad[GOLDEN_FRAME] >> 1) > x->pred_mv_sad[LAST_FRAME])
9025 0 : mode_skip_mask[GOLDEN_FRAME] |= INTER_ALL;
9026 : }
9027 :
9028 0 : if (bsize > sf->max_intra_bsize) {
9029 0 : ref_frame_skip_mask[0] |= (1 << INTRA_FRAME);
9030 0 : ref_frame_skip_mask[1] |= (1 << INTRA_FRAME);
9031 : }
9032 :
9033 0 : mode_skip_mask[INTRA_FRAME] |=
9034 0 : ~(sf->intra_y_mode_mask[max_txsize_lookup[bsize]]);
9035 :
9036 0 : for (i = 0; i <= LAST_NEW_MV_INDEX; ++i) mode_threshold[i] = 0;
9037 0 : for (i = LAST_NEW_MV_INDEX + 1; i < MAX_MODES; ++i)
9038 0 : mode_threshold[i] = ((int64_t)rd_threshes[i] * rd_thresh_freq_fact[i]) >> 5;
9039 :
9040 0 : midx = sf->schedule_mode_search ? mode_skip_start : 0;
9041 0 : while (midx > 4) {
9042 0 : uint8_t end_pos = 0;
9043 0 : for (i = 5; i < midx; ++i) {
9044 0 : if (mode_threshold[mode_map[i - 1]] > mode_threshold[mode_map[i]]) {
9045 0 : uint8_t tmp = mode_map[i];
9046 0 : mode_map[i] = mode_map[i - 1];
9047 0 : mode_map[i - 1] = tmp;
9048 0 : end_pos = i;
9049 : }
9050 : }
9051 0 : midx = end_pos;
9052 : }
9053 :
9054 0 : if (cpi->sf.tx_type_search.fast_intra_tx_type_search)
9055 0 : x->use_default_intra_tx_type = 1;
9056 : else
9057 0 : x->use_default_intra_tx_type = 0;
9058 :
9059 0 : if (cpi->sf.tx_type_search.fast_inter_tx_type_search)
9060 0 : x->use_default_inter_tx_type = 1;
9061 : else
9062 0 : x->use_default_inter_tx_type = 0;
9063 : #if CONFIG_PVQ
9064 : od_encode_checkpoint(&x->daala_enc, &pre_buf);
9065 : #endif // CONFIG_PVQ
9066 : #if CONFIG_EXT_INTER
9067 0 : for (i = 0; i < MB_MODE_COUNT; ++i)
9068 0 : for (ref_frame = 0; ref_frame < TOTAL_REFS_PER_FRAME; ++ref_frame)
9069 0 : modelled_rd[i][ref_frame] = INT64_MAX;
9070 : #endif // CONFIG_EXT_INTER
9071 :
9072 0 : for (midx = 0; midx < MAX_MODES; ++midx) {
9073 : int mode_index;
9074 0 : int mode_excluded = 0;
9075 0 : int64_t this_rd = INT64_MAX;
9076 0 : int disable_skip = 0;
9077 0 : int compmode_cost = 0;
9078 0 : int rate2 = 0, rate_y = 0, rate_uv = 0;
9079 0 : int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
9080 : #if CONFIG_DAALA_DIST && CONFIG_CB4X4
9081 : int64_t distortion2_y = 0;
9082 : int64_t total_sse_y = INT64_MAX;
9083 : #endif
9084 0 : int skippable = 0;
9085 0 : int this_skip2 = 0;
9086 0 : int64_t total_sse = INT64_MAX;
9087 : uint8_t ref_frame_type;
9088 : #if CONFIG_PVQ
9089 : od_encode_rollback(&x->daala_enc, &pre_buf);
9090 : #endif // CONFIG_PVQ
9091 0 : mode_index = mode_map[midx];
9092 0 : this_mode = av1_mode_order[mode_index].mode;
9093 0 : ref_frame = av1_mode_order[mode_index].ref_frame[0];
9094 0 : second_ref_frame = av1_mode_order[mode_index].ref_frame[1];
9095 0 : mbmi->ref_mv_idx = 0;
9096 :
9097 : #if CONFIG_EXT_INTER
9098 0 : if (ref_frame > INTRA_FRAME && second_ref_frame == INTRA_FRAME) {
9099 : // Mode must by compatible
9100 0 : if (!is_interintra_allowed_mode(this_mode)) continue;
9101 0 : if (!is_interintra_allowed_bsize(bsize)) continue;
9102 : }
9103 :
9104 0 : if (is_inter_compound_mode(this_mode)) {
9105 0 : frame_mv[this_mode][ref_frame].as_int =
9106 0 : frame_mv[compound_ref0_mode(this_mode)][ref_frame].as_int;
9107 0 : frame_mv[this_mode][second_ref_frame].as_int =
9108 0 : frame_mv[compound_ref1_mode(this_mode)][second_ref_frame].as_int;
9109 : }
9110 : #endif // CONFIG_EXT_INTER
9111 :
9112 : // Look at the reference frame of the best mode so far and set the
9113 : // skip mask to look at a subset of the remaining modes.
9114 0 : if (midx == mode_skip_start && best_mode_index >= 0) {
9115 0 : switch (best_mbmode.ref_frame[0]) {
9116 0 : case INTRA_FRAME: break;
9117 : case LAST_FRAME:
9118 0 : ref_frame_skip_mask[0] |= LAST_FRAME_MODE_MASK;
9119 0 : ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
9120 0 : break;
9121 : #if CONFIG_EXT_REFS
9122 : case LAST2_FRAME:
9123 0 : ref_frame_skip_mask[0] |= LAST2_FRAME_MODE_MASK;
9124 0 : ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
9125 0 : break;
9126 : case LAST3_FRAME:
9127 0 : ref_frame_skip_mask[0] |= LAST3_FRAME_MODE_MASK;
9128 0 : ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
9129 0 : break;
9130 : #endif // CONFIG_EXT_REFS
9131 : case GOLDEN_FRAME:
9132 0 : ref_frame_skip_mask[0] |= GOLDEN_FRAME_MODE_MASK;
9133 0 : ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
9134 0 : break;
9135 : #if CONFIG_EXT_REFS
9136 : case BWDREF_FRAME:
9137 0 : ref_frame_skip_mask[0] |= BWDREF_FRAME_MODE_MASK;
9138 0 : ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
9139 0 : break;
9140 : #endif // CONFIG_EXT_REFS
9141 0 : case ALTREF_FRAME: ref_frame_skip_mask[0] |= ALTREF_FRAME_MODE_MASK;
9142 : #if CONFIG_EXT_REFS
9143 0 : ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
9144 : #endif // CONFIG_EXT_REFS
9145 0 : break;
9146 : case NONE_FRAME:
9147 : case TOTAL_REFS_PER_FRAME:
9148 0 : assert(0 && "Invalid Reference frame");
9149 : break;
9150 : }
9151 : }
9152 :
9153 0 : if ((ref_frame_skip_mask[0] & (1 << ref_frame)) &&
9154 0 : (ref_frame_skip_mask[1] & (1 << AOMMAX(0, second_ref_frame))))
9155 0 : continue;
9156 :
9157 0 : if (mode_skip_mask[ref_frame] & (1 << this_mode)) continue;
9158 :
9159 : // Test best rd so far against threshold for trying this mode.
9160 0 : if (best_mode_skippable && sf->schedule_mode_search)
9161 0 : mode_threshold[mode_index] <<= 1;
9162 :
9163 0 : if (best_rd < mode_threshold[mode_index]) continue;
9164 :
9165 : // This is only used in motion vector unit test.
9166 0 : if (cpi->oxcf.motion_vector_unit_test && ref_frame == INTRA_FRAME) continue;
9167 :
9168 : #if CONFIG_ONE_SIDED_COMPOUND // Changes LL bitstream
9169 : #if CONFIG_EXT_REFS
9170 0 : if (cpi->oxcf.pass == 0) {
9171 : // Complexity-compression trade-offs
9172 : // if (ref_frame == ALTREF_FRAME) continue;
9173 : // if (ref_frame == BWDREF_FRAME) continue;
9174 0 : if (second_ref_frame == ALTREF_FRAME) continue;
9175 : // if (second_ref_frame == BWDREF_FRAME) continue;
9176 : }
9177 : #endif
9178 : #endif
9179 0 : comp_pred = second_ref_frame > INTRA_FRAME;
9180 0 : if (comp_pred) {
9181 0 : if (!cpi->allow_comp_inter_inter) continue;
9182 :
9183 : // Skip compound inter modes if ARF is not available.
9184 0 : if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) continue;
9185 :
9186 : // Do not allow compound prediction if the segment level reference frame
9187 : // feature is in use as in this case there can only be one reference.
9188 0 : if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) continue;
9189 :
9190 0 : if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
9191 0 : best_mode_index >= 0 && best_mbmode.ref_frame[0] == INTRA_FRAME)
9192 0 : continue;
9193 :
9194 0 : mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
9195 : } else {
9196 0 : if (ref_frame != INTRA_FRAME)
9197 0 : mode_excluded = cm->reference_mode == COMPOUND_REFERENCE;
9198 : }
9199 :
9200 0 : if (ref_frame == INTRA_FRAME) {
9201 0 : if (sf->adaptive_mode_search)
9202 0 : if ((x->source_variance << num_pels_log2_lookup[bsize]) > best_pred_sse)
9203 0 : continue;
9204 :
9205 0 : if (this_mode != DC_PRED) {
9206 : // Disable intra modes other than DC_PRED for blocks with low variance
9207 : // Threshold for intra skipping based on source variance
9208 : // TODO(debargha): Specialize the threshold for super block sizes
9209 0 : const unsigned int skip_intra_var_thresh = 64;
9210 0 : if ((mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
9211 0 : x->source_variance < skip_intra_var_thresh)
9212 0 : continue;
9213 : // Only search the oblique modes if the best so far is
9214 : // one of the neighboring directional modes
9215 0 : if ((mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
9216 0 : (this_mode >= D45_PRED && this_mode <= TM_PRED)) {
9217 0 : if (best_mode_index >= 0 && best_mbmode.ref_frame[0] > INTRA_FRAME)
9218 0 : continue;
9219 : }
9220 0 : if (mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
9221 0 : if (conditional_skipintra(this_mode, best_intra_mode)) continue;
9222 : }
9223 : }
9224 : #if CONFIG_GLOBAL_MOTION
9225 0 : } else if (cm->global_motion[ref_frame].wmtype == IDENTITY &&
9226 0 : (!comp_pred ||
9227 0 : cm->global_motion[second_ref_frame].wmtype == IDENTITY)) {
9228 : #else // CONFIG_GLOBAL_MOTION
9229 : } else {
9230 : #endif // CONFIG_GLOBAL_MOTION
9231 0 : const MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, second_ref_frame };
9232 0 : if (!check_best_zero_mv(cpi, mbmi_ext->mode_context,
9233 : #if CONFIG_EXT_INTER
9234 0 : mbmi_ext->compound_mode_context,
9235 : #endif // CONFIG_EXT_INTER
9236 : frame_mv, this_mode, ref_frames, bsize, -1,
9237 : mi_row, mi_col))
9238 0 : continue;
9239 : }
9240 :
9241 0 : mbmi->mode = this_mode;
9242 0 : mbmi->uv_mode = DC_PRED;
9243 0 : mbmi->ref_frame[0] = ref_frame;
9244 0 : mbmi->ref_frame[1] = second_ref_frame;
9245 : #if CONFIG_PALETTE
9246 0 : pmi->palette_size[0] = 0;
9247 0 : pmi->palette_size[1] = 0;
9248 : #endif // CONFIG_PALETTE
9249 : #if CONFIG_FILTER_INTRA
9250 : mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
9251 : mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
9252 : #endif // CONFIG_FILTER_INTRA
9253 : // Evaluate all sub-pel filters irrespective of whether we can use
9254 : // them for this frame.
9255 :
9256 0 : set_default_interp_filters(mbmi, cm->interp_filter);
9257 :
9258 0 : mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
9259 0 : mbmi->motion_mode = SIMPLE_TRANSLATION;
9260 :
9261 0 : x->skip = 0;
9262 0 : set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
9263 :
9264 : // Select prediction reference frames.
9265 0 : for (i = 0; i < MAX_MB_PLANE; i++) {
9266 0 : xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
9267 0 : if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
9268 : }
9269 :
9270 : #if CONFIG_EXT_INTER && CONFIG_INTERINTRA
9271 0 : mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
9272 : #endif // CONFIG_EXT_INTER && CONFIG_INTERINTRA
9273 :
9274 0 : if (ref_frame == INTRA_FRAME) {
9275 : RD_STATS rd_stats_y;
9276 : TX_SIZE uv_tx;
9277 0 : struct macroblockd_plane *const pd = &xd->plane[1];
9278 : #if CONFIG_EXT_INTRA
9279 0 : is_directional_mode = av1_is_directional_mode(mbmi->mode, bsize);
9280 0 : if (is_directional_mode) {
9281 : int rate_dummy;
9282 0 : int64_t model_rd = INT64_MAX;
9283 0 : if (!angle_stats_ready) {
9284 0 : const int src_stride = x->plane[0].src.stride;
9285 0 : const uint8_t *src = x->plane[0].src.buf;
9286 : #if CONFIG_HIGHBITDEPTH
9287 0 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
9288 0 : highbd_angle_estimation(src, src_stride, rows, cols, bsize,
9289 : directional_mode_skip_mask);
9290 : else
9291 : #endif // CONFIG_HIGHBITDEPTH
9292 0 : angle_estimation(src, src_stride, rows, cols, bsize,
9293 : directional_mode_skip_mask);
9294 0 : angle_stats_ready = 1;
9295 : }
9296 0 : if (directional_mode_skip_mask[mbmi->mode]) continue;
9297 0 : rd_stats_y.rate = INT_MAX;
9298 0 : rd_pick_intra_angle_sby(cpi, x, &rate_dummy, &rd_stats_y, bsize,
9299 0 : intra_mode_cost[mbmi->mode], best_rd,
9300 : &model_rd);
9301 : } else {
9302 0 : mbmi->angle_delta[0] = 0;
9303 0 : super_block_yrd(cpi, x, &rd_stats_y, bsize, best_rd);
9304 : }
9305 : #else
9306 : super_block_yrd(cpi, x, &rd_stats_y, bsize, best_rd);
9307 : #endif // CONFIG_EXT_INTRA
9308 0 : rate_y = rd_stats_y.rate;
9309 0 : distortion_y = rd_stats_y.dist;
9310 0 : skippable = rd_stats_y.skip;
9311 :
9312 0 : if (rate_y == INT_MAX) continue;
9313 :
9314 : #if CONFIG_FILTER_INTRA
9315 : if (mbmi->mode == DC_PRED) dc_skipped = 0;
9316 : #endif // CONFIG_FILTER_INTRA
9317 :
9318 0 : uv_tx = uv_txsize_lookup[bsize][mbmi->tx_size][pd->subsampling_x]
9319 0 : [pd->subsampling_y];
9320 0 : if (rate_uv_intra[uv_tx] == INT_MAX) {
9321 0 : choose_intra_uv_mode(cpi, x, ctx, bsize, uv_tx, &rate_uv_intra[uv_tx],
9322 0 : &rate_uv_tokenonly[uv_tx], &dist_uvs[uv_tx],
9323 0 : &skip_uvs[uv_tx], &mode_uv[uv_tx]);
9324 : #if CONFIG_PALETTE
9325 0 : if (try_palette) pmi_uv[uv_tx] = *pmi;
9326 : #endif // CONFIG_PALETTE
9327 :
9328 : #if CONFIG_EXT_INTRA
9329 0 : uv_angle_delta[uv_tx] = mbmi->angle_delta[1];
9330 : #endif // CONFIG_EXT_INTRA
9331 : #if CONFIG_FILTER_INTRA
9332 : filter_intra_mode_info_uv[uv_tx] = mbmi->filter_intra_mode_info;
9333 : #endif // CONFIG_FILTER_INTRA
9334 : }
9335 :
9336 0 : rate_uv = rate_uv_tokenonly[uv_tx];
9337 0 : distortion_uv = dist_uvs[uv_tx];
9338 0 : skippable = skippable && skip_uvs[uv_tx];
9339 0 : mbmi->uv_mode = mode_uv[uv_tx];
9340 : #if CONFIG_PALETTE
9341 0 : if (try_palette) {
9342 0 : pmi->palette_size[1] = pmi_uv[uv_tx].palette_size[1];
9343 0 : memcpy(pmi->palette_colors + PALETTE_MAX_SIZE,
9344 0 : pmi_uv[uv_tx].palette_colors + PALETTE_MAX_SIZE,
9345 : 2 * PALETTE_MAX_SIZE * sizeof(pmi->palette_colors[0]));
9346 : }
9347 : #endif // CONFIG_PALETTE
9348 :
9349 : #if CONFIG_EXT_INTRA
9350 0 : mbmi->angle_delta[1] = uv_angle_delta[uv_tx];
9351 : #endif // CONFIG_EXT_INTRA
9352 : #if CONFIG_FILTER_INTRA
9353 : mbmi->filter_intra_mode_info.use_filter_intra_mode[1] =
9354 : filter_intra_mode_info_uv[uv_tx].use_filter_intra_mode[1];
9355 : if (filter_intra_mode_info_uv[uv_tx].use_filter_intra_mode[1]) {
9356 : mbmi->filter_intra_mode_info.filter_intra_mode[1] =
9357 : filter_intra_mode_info_uv[uv_tx].filter_intra_mode[1];
9358 : }
9359 : #endif // CONFIG_FILTER_INTRA
9360 :
9361 : #if CONFIG_CB4X4
9362 0 : rate2 = rate_y + intra_mode_cost[mbmi->mode];
9363 0 : if (!x->skip_chroma_rd)
9364 0 : rate2 += rate_uv + cpi->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode];
9365 : #else
9366 : rate2 = rate_y + intra_mode_cost[mbmi->mode] + rate_uv +
9367 : cpi->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode];
9368 : #endif // CONFIG_CB4X4
9369 :
9370 : #if CONFIG_PALETTE
9371 0 : if (try_palette && mbmi->mode == DC_PRED) {
9372 0 : rate2 += av1_cost_bit(
9373 : av1_default_palette_y_mode_prob[bsize - BLOCK_8X8][palette_ctx], 0);
9374 : }
9375 : #endif // CONFIG_PALETTE
9376 :
9377 0 : if (!xd->lossless[mbmi->segment_id] && bsize >= BLOCK_8X8) {
9378 : // super_block_yrd above includes the cost of the tx_size in the
9379 : // tokenonly rate, but for intra blocks, tx_size is always coded
9380 : // (prediction granularity), so we account for it in the full rate,
9381 : // not the tokenonly rate.
9382 0 : rate_y -= tx_size_cost(cpi, x, bsize, mbmi->tx_size);
9383 : }
9384 : #if CONFIG_EXT_INTRA
9385 0 : if (is_directional_mode) {
9386 : #if CONFIG_INTRA_INTERP
9387 : const int intra_filter_ctx = av1_get_pred_context_intra_interp(xd);
9388 : const int p_angle =
9389 : mode_to_angle_map[mbmi->mode] + mbmi->angle_delta[0] * ANGLE_STEP;
9390 : if (av1_is_intra_filter_switchable(p_angle))
9391 : rate2 += cpi->intra_filter_cost[intra_filter_ctx][mbmi->intra_filter];
9392 : #endif // CONFIG_INTRA_INTERP
9393 0 : rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
9394 0 : MAX_ANGLE_DELTA + mbmi->angle_delta[0]);
9395 : }
9396 0 : if (mbmi->uv_mode != DC_PRED && mbmi->uv_mode != TM_PRED) {
9397 0 : rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
9398 0 : MAX_ANGLE_DELTA + mbmi->angle_delta[1]);
9399 : }
9400 : #endif // CONFIG_EXT_INTRA
9401 : #if CONFIG_FILTER_INTRA
9402 : if (mbmi->mode == DC_PRED) {
9403 : rate2 +=
9404 : av1_cost_bit(cm->fc->filter_intra_probs[0],
9405 : mbmi->filter_intra_mode_info.use_filter_intra_mode[0]);
9406 : if (mbmi->filter_intra_mode_info.use_filter_intra_mode[0]) {
9407 : rate2 += write_uniform_cost(
9408 : FILTER_INTRA_MODES,
9409 : mbmi->filter_intra_mode_info.filter_intra_mode[0]);
9410 : }
9411 : }
9412 : if (mbmi->uv_mode == DC_PRED) {
9413 : rate2 +=
9414 : av1_cost_bit(cpi->common.fc->filter_intra_probs[1],
9415 : mbmi->filter_intra_mode_info.use_filter_intra_mode[1]);
9416 : if (mbmi->filter_intra_mode_info.use_filter_intra_mode[1])
9417 : rate2 += write_uniform_cost(
9418 : FILTER_INTRA_MODES,
9419 : mbmi->filter_intra_mode_info.filter_intra_mode[1]);
9420 : }
9421 : #endif // CONFIG_FILTER_INTRA
9422 0 : if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED)
9423 0 : rate2 += intra_cost_penalty;
9424 0 : distortion2 = distortion_y + distortion_uv;
9425 : #if CONFIG_DAALA_DIST && CONFIG_CB4X4
9426 : if (bsize < BLOCK_8X8) distortion2_y = distortion_y;
9427 : #endif
9428 : } else {
9429 : int_mv backup_ref_mv[2];
9430 :
9431 : #if !SUB8X8_COMP_REF
9432 0 : if (bsize == BLOCK_4X4 && mbmi->ref_frame[1] > INTRA_FRAME) continue;
9433 : #endif // !SUB8X8_COMP_REF
9434 :
9435 0 : backup_ref_mv[0] = mbmi_ext->ref_mvs[ref_frame][0];
9436 0 : if (comp_pred) backup_ref_mv[1] = mbmi_ext->ref_mvs[second_ref_frame][0];
9437 : #if CONFIG_EXT_INTER && CONFIG_INTERINTRA
9438 0 : if (second_ref_frame == INTRA_FRAME) {
9439 0 : if (best_single_inter_ref != ref_frame) continue;
9440 0 : mbmi->interintra_mode = intra_to_interintra_mode[best_intra_mode];
9441 : // TODO(debargha|geza.lore):
9442 : // Should we use ext_intra modes for interintra?
9443 : #if CONFIG_EXT_INTRA
9444 0 : mbmi->angle_delta[0] = 0;
9445 0 : mbmi->angle_delta[1] = 0;
9446 : #if CONFIG_INTRA_INTERP
9447 : mbmi->intra_filter = INTRA_FILTER_LINEAR;
9448 : #endif // CONFIG_INTRA_INTERP
9449 : #endif // CONFIG_EXT_INTRA
9450 : #if CONFIG_FILTER_INTRA
9451 : mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
9452 : mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
9453 : #endif // CONFIG_FILTER_INTRA
9454 : }
9455 : #endif // CONFIG_EXT_INTER && CONFIG_INTERINTRA
9456 0 : mbmi->ref_mv_idx = 0;
9457 0 : ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
9458 :
9459 : #if CONFIG_EXT_INTER
9460 0 : if (comp_pred) {
9461 0 : if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
9462 0 : int ref_mv_idx = 0;
9463 : // Special case: NEAR_NEWMV and NEW_NEARMV modes use
9464 : // 1 + mbmi->ref_mv_idx (like NEARMV) instead of
9465 : // mbmi->ref_mv_idx (like NEWMV)
9466 0 : if (mbmi->mode == NEAR_NEWMV || mbmi->mode == NEW_NEARMV)
9467 0 : ref_mv_idx = 1;
9468 :
9469 0 : if (compound_ref0_mode(mbmi->mode) == NEWMV) {
9470 0 : int_mv this_mv =
9471 0 : mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
9472 0 : clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
9473 0 : xd->n8_h << MI_SIZE_LOG2, xd);
9474 0 : mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0] = this_mv;
9475 : }
9476 0 : if (compound_ref1_mode(mbmi->mode) == NEWMV) {
9477 0 : int_mv this_mv =
9478 0 : mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
9479 0 : clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
9480 0 : xd->n8_h << MI_SIZE_LOG2, xd);
9481 0 : mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0] = this_mv;
9482 : }
9483 : }
9484 : } else {
9485 : #endif // CONFIG_EXT_INTER
9486 0 : if (mbmi->mode == NEWMV && mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
9487 : int ref;
9488 0 : for (ref = 0; ref < 1 + comp_pred; ++ref) {
9489 0 : int_mv this_mv =
9490 0 : (ref == 0) ? mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv
9491 0 : : mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
9492 0 : clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
9493 0 : xd->n8_h << MI_SIZE_LOG2, xd);
9494 0 : mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0] = this_mv;
9495 : }
9496 : }
9497 : #if CONFIG_EXT_INTER
9498 : }
9499 : #endif // CONFIG_EXT_INTER
9500 : {
9501 : RD_STATS rd_stats, rd_stats_y, rd_stats_uv;
9502 0 : av1_init_rd_stats(&rd_stats);
9503 0 : rd_stats.rate = rate2;
9504 :
9505 : // Point to variables that are maintained between loop iterations
9506 0 : args.single_newmv = single_newmv;
9507 : #if CONFIG_EXT_INTER
9508 0 : args.single_newmv_rate = single_newmv_rate;
9509 0 : args.modelled_rd = modelled_rd;
9510 : #endif // CONFIG_EXT_INTER
9511 0 : this_rd = handle_inter_mode(cpi, x, bsize, &rd_stats, &rd_stats_y,
9512 : &rd_stats_uv, &disable_skip, frame_mv,
9513 : mi_row, mi_col, &args, best_rd);
9514 :
9515 0 : rate2 = rd_stats.rate;
9516 0 : skippable = rd_stats.skip;
9517 0 : distortion2 = rd_stats.dist;
9518 0 : total_sse = rd_stats.sse;
9519 0 : rate_y = rd_stats_y.rate;
9520 0 : rate_uv = rd_stats_uv.rate;
9521 : #if CONFIG_DAALA_DIST && CONFIG_CB4X4
9522 : if (bsize < BLOCK_8X8) distortion2_y = rd_stats_y.dist;
9523 : #endif
9524 : }
9525 :
9526 : // TODO(jingning): This needs some refactoring to improve code quality
9527 : // and reduce redundant steps.
9528 : #if CONFIG_EXT_INTER
9529 0 : if ((have_nearmv_in_inter_mode(mbmi->mode) &&
9530 0 : mbmi_ext->ref_mv_count[ref_frame_type] > 2) ||
9531 0 : ((mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) &&
9532 0 : mbmi_ext->ref_mv_count[ref_frame_type] > 1)) {
9533 : #else
9534 : if ((mbmi->mode == NEARMV &&
9535 : mbmi_ext->ref_mv_count[ref_frame_type] > 2) ||
9536 : (mbmi->mode == NEWMV && mbmi_ext->ref_mv_count[ref_frame_type] > 1)) {
9537 : #endif
9538 0 : int_mv backup_mv = frame_mv[NEARMV][ref_frame];
9539 0 : MB_MODE_INFO backup_mbmi = *mbmi;
9540 0 : int backup_skip = x->skip;
9541 0 : int64_t tmp_ref_rd = this_rd;
9542 : int ref_idx;
9543 :
9544 : // TODO(jingning): This should be deprecated shortly.
9545 : #if CONFIG_EXT_INTER
9546 0 : int idx_offset = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
9547 : #else
9548 : int idx_offset = (mbmi->mode == NEARMV) ? 1 : 0;
9549 : #endif // CONFIG_EXT_INTER
9550 0 : int ref_set =
9551 0 : AOMMIN(2, mbmi_ext->ref_mv_count[ref_frame_type] - 1 - idx_offset);
9552 :
9553 0 : uint8_t drl_ctx =
9554 0 : av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx_offset);
9555 : // Dummy
9556 : int_mv backup_fmv[2];
9557 0 : backup_fmv[0] = frame_mv[NEWMV][ref_frame];
9558 0 : if (comp_pred) backup_fmv[1] = frame_mv[NEWMV][second_ref_frame];
9559 :
9560 0 : rate2 += (rate2 < INT_MAX ? cpi->drl_mode_cost0[drl_ctx][0] : 0);
9561 :
9562 0 : if (this_rd < INT64_MAX) {
9563 0 : if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
9564 0 : RDCOST(x->rdmult, x->rddiv, 0, total_sse))
9565 0 : tmp_ref_rd =
9566 0 : RDCOST(x->rdmult, x->rddiv,
9567 : rate2 + av1_cost_bit(av1_get_skip_prob(cm, xd), 0),
9568 : distortion2);
9569 : else
9570 0 : tmp_ref_rd =
9571 0 : RDCOST(x->rdmult, x->rddiv,
9572 : rate2 + av1_cost_bit(av1_get_skip_prob(cm, xd), 1) -
9573 : rate_y - rate_uv,
9574 : total_sse);
9575 : }
9576 : #if CONFIG_VAR_TX
9577 0 : for (i = 0; i < MAX_MB_PLANE; ++i)
9578 0 : memcpy(x->blk_skip_drl[i], x->blk_skip[i],
9579 0 : sizeof(uint8_t) * ctx->num_4x4_blk);
9580 : #endif // CONFIG_VAR_TX
9581 :
9582 0 : for (ref_idx = 0; ref_idx < ref_set; ++ref_idx) {
9583 0 : int64_t tmp_alt_rd = INT64_MAX;
9584 0 : int dummy_disable_skip = 0;
9585 : int ref;
9586 : int_mv cur_mv;
9587 : RD_STATS tmp_rd_stats, tmp_rd_stats_y, tmp_rd_stats_uv;
9588 :
9589 0 : av1_invalid_rd_stats(&tmp_rd_stats);
9590 0 : x->skip = 0;
9591 :
9592 0 : mbmi->ref_mv_idx = 1 + ref_idx;
9593 :
9594 : #if CONFIG_EXT_INTER
9595 0 : if (comp_pred) {
9596 0 : int ref_mv_idx = mbmi->ref_mv_idx;
9597 : // Special case: NEAR_NEWMV and NEW_NEARMV modes use
9598 : // 1 + mbmi->ref_mv_idx (like NEARMV) instead of
9599 : // mbmi->ref_mv_idx (like NEWMV)
9600 0 : if (mbmi->mode == NEAR_NEWMV || mbmi->mode == NEW_NEARMV)
9601 0 : ref_mv_idx = 1 + mbmi->ref_mv_idx;
9602 :
9603 0 : if (compound_ref0_mode(mbmi->mode) == NEWMV) {
9604 0 : int_mv this_mv =
9605 0 : mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
9606 0 : clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
9607 0 : xd->n8_h << MI_SIZE_LOG2, xd);
9608 0 : mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0] = this_mv;
9609 0 : } else if (compound_ref0_mode(mbmi->mode) == NEARESTMV) {
9610 0 : int_mv this_mv =
9611 0 : mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv;
9612 0 : clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
9613 0 : xd->n8_h << MI_SIZE_LOG2, xd);
9614 0 : mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0] = this_mv;
9615 : }
9616 :
9617 0 : if (compound_ref1_mode(mbmi->mode) == NEWMV) {
9618 0 : int_mv this_mv =
9619 0 : mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
9620 0 : clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
9621 0 : xd->n8_h << MI_SIZE_LOG2, xd);
9622 0 : mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0] = this_mv;
9623 0 : } else if (compound_ref1_mode(mbmi->mode) == NEARESTMV) {
9624 0 : int_mv this_mv =
9625 0 : mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
9626 0 : clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
9627 0 : xd->n8_h << MI_SIZE_LOG2, xd);
9628 0 : mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0] = this_mv;
9629 : }
9630 : } else {
9631 : #endif // CONFIG_EXT_INTER
9632 0 : for (ref = 0; ref < 1 + comp_pred; ++ref) {
9633 0 : int_mv this_mv =
9634 : (ref == 0)
9635 0 : ? mbmi_ext->ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
9636 0 : .this_mv
9637 0 : : mbmi_ext->ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
9638 0 : .comp_mv;
9639 0 : clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
9640 0 : xd->n8_h << MI_SIZE_LOG2, xd);
9641 0 : mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0] = this_mv;
9642 : }
9643 : #if CONFIG_EXT_INTER
9644 : }
9645 : #endif
9646 :
9647 0 : cur_mv =
9648 0 : mbmi_ext->ref_mv_stack[ref_frame][mbmi->ref_mv_idx + idx_offset]
9649 0 : .this_mv;
9650 0 : clamp_mv2(&cur_mv.as_mv, xd);
9651 :
9652 0 : if (!mv_check_bounds(&x->mv_limits, &cur_mv.as_mv)) {
9653 0 : int_mv dummy_single_newmv[TOTAL_REFS_PER_FRAME] = { { 0 } };
9654 : #if CONFIG_EXT_INTER
9655 0 : int dummy_single_newmv_rate[TOTAL_REFS_PER_FRAME] = { 0 };
9656 : #endif // CONFIG_EXT_INTER
9657 :
9658 0 : frame_mv[NEARMV][ref_frame] = cur_mv;
9659 0 : av1_init_rd_stats(&tmp_rd_stats);
9660 :
9661 : // Point to variables that are not maintained between iterations
9662 0 : args.single_newmv = dummy_single_newmv;
9663 : #if CONFIG_EXT_INTER
9664 0 : args.single_newmv_rate = dummy_single_newmv_rate;
9665 0 : args.modelled_rd = NULL;
9666 : #endif // CONFIG_EXT_INTER
9667 0 : tmp_alt_rd = handle_inter_mode(
9668 : cpi, x, bsize, &tmp_rd_stats, &tmp_rd_stats_y, &tmp_rd_stats_uv,
9669 : &dummy_disable_skip, frame_mv, mi_row, mi_col, &args, best_rd);
9670 : // Prevent pointers from escaping local scope
9671 0 : args.single_newmv = NULL;
9672 : #if CONFIG_EXT_INTER
9673 0 : args.single_newmv_rate = NULL;
9674 : #endif // CONFIG_EXT_INTER
9675 : }
9676 :
9677 0 : for (i = 0; i < mbmi->ref_mv_idx; ++i) {
9678 0 : uint8_t drl1_ctx = 0;
9679 0 : drl1_ctx = av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type],
9680 : i + idx_offset);
9681 0 : tmp_rd_stats.rate +=
9682 0 : (tmp_rd_stats.rate < INT_MAX ? cpi->drl_mode_cost0[drl1_ctx][1]
9683 0 : : 0);
9684 : }
9685 :
9686 0 : if (mbmi_ext->ref_mv_count[ref_frame_type] >
9687 0 : mbmi->ref_mv_idx + idx_offset + 1 &&
9688 0 : ref_idx < ref_set - 1) {
9689 0 : uint8_t drl1_ctx =
9690 0 : av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type],
9691 0 : mbmi->ref_mv_idx + idx_offset);
9692 0 : tmp_rd_stats.rate +=
9693 0 : (tmp_rd_stats.rate < INT_MAX ? cpi->drl_mode_cost0[drl1_ctx][0]
9694 0 : : 0);
9695 : }
9696 :
9697 0 : if (tmp_alt_rd < INT64_MAX) {
9698 : #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9699 0 : tmp_alt_rd = RDCOST(x->rdmult, x->rddiv, tmp_rd_stats.rate,
9700 : tmp_rd_stats.dist);
9701 : #else
9702 : if (RDCOST(x->rdmult, x->rddiv,
9703 : tmp_rd_stats_y.rate + tmp_rd_stats_uv.rate,
9704 : tmp_rd_stats.dist) <
9705 : RDCOST(x->rdmult, x->rddiv, 0, tmp_rd_stats.sse))
9706 : tmp_alt_rd =
9707 : RDCOST(x->rdmult, x->rddiv,
9708 : tmp_rd_stats.rate +
9709 : av1_cost_bit(av1_get_skip_prob(cm, xd), 0),
9710 : tmp_rd_stats.dist);
9711 : else
9712 : tmp_alt_rd =
9713 : RDCOST(x->rdmult, x->rddiv,
9714 : tmp_rd_stats.rate +
9715 : av1_cost_bit(av1_get_skip_prob(cm, xd), 1) -
9716 : tmp_rd_stats_y.rate - tmp_rd_stats_uv.rate,
9717 : tmp_rd_stats.sse);
9718 : #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9719 : }
9720 :
9721 0 : if (tmp_ref_rd > tmp_alt_rd) {
9722 0 : rate2 = tmp_rd_stats.rate;
9723 0 : disable_skip = dummy_disable_skip;
9724 0 : distortion2 = tmp_rd_stats.dist;
9725 0 : skippable = tmp_rd_stats.skip;
9726 0 : rate_y = tmp_rd_stats_y.rate;
9727 0 : rate_uv = tmp_rd_stats_uv.rate;
9728 0 : total_sse = tmp_rd_stats.sse;
9729 0 : this_rd = tmp_alt_rd;
9730 0 : tmp_ref_rd = tmp_alt_rd;
9731 0 : backup_mbmi = *mbmi;
9732 0 : backup_skip = x->skip;
9733 : #if CONFIG_DAALA_DIST && CONFIG_CB4X4
9734 : if (bsize < BLOCK_8X8) {
9735 : total_sse_y = tmp_rd_stats_y.sse;
9736 : distortion2_y = tmp_rd_stats_y.dist;
9737 : }
9738 : #endif
9739 : #if CONFIG_VAR_TX
9740 0 : for (i = 0; i < MAX_MB_PLANE; ++i)
9741 0 : memcpy(x->blk_skip_drl[i], x->blk_skip[i],
9742 0 : sizeof(uint8_t) * ctx->num_4x4_blk);
9743 : #endif // CONFIG_VAR_TX
9744 : } else {
9745 0 : *mbmi = backup_mbmi;
9746 0 : x->skip = backup_skip;
9747 : }
9748 : }
9749 :
9750 0 : frame_mv[NEARMV][ref_frame] = backup_mv;
9751 0 : frame_mv[NEWMV][ref_frame] = backup_fmv[0];
9752 0 : if (comp_pred) frame_mv[NEWMV][second_ref_frame] = backup_fmv[1];
9753 : #if CONFIG_VAR_TX
9754 0 : for (i = 0; i < MAX_MB_PLANE; ++i)
9755 0 : memcpy(x->blk_skip[i], x->blk_skip_drl[i],
9756 0 : sizeof(uint8_t) * ctx->num_4x4_blk);
9757 : #endif // CONFIG_VAR_TX
9758 : }
9759 0 : mbmi_ext->ref_mvs[ref_frame][0] = backup_ref_mv[0];
9760 0 : if (comp_pred) mbmi_ext->ref_mvs[second_ref_frame][0] = backup_ref_mv[1];
9761 :
9762 0 : if (this_rd == INT64_MAX) continue;
9763 :
9764 : #if SUB8X8_COMP_REF
9765 : compmode_cost = av1_cost_bit(comp_mode_p, comp_pred);
9766 : #else
9767 0 : if (mbmi->sb_type != BLOCK_4X4)
9768 0 : compmode_cost = av1_cost_bit(comp_mode_p, comp_pred);
9769 : #endif // SUB8X8_COMP_REF
9770 :
9771 0 : if (cm->reference_mode == REFERENCE_MODE_SELECT) rate2 += compmode_cost;
9772 : }
9773 :
9774 : // Estimate the reference frame signaling cost and add it
9775 : // to the rolling cost variable.
9776 0 : if (comp_pred) {
9777 0 : rate2 += ref_costs_comp[ref_frame];
9778 : #if CONFIG_EXT_REFS
9779 0 : rate2 += ref_costs_comp[second_ref_frame];
9780 : #endif // CONFIG_EXT_REFS
9781 : } else {
9782 0 : rate2 += ref_costs_single[ref_frame];
9783 : }
9784 :
9785 : #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9786 0 : if (ref_frame == INTRA_FRAME) {
9787 : #else
9788 : if (!disable_skip) {
9789 : #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9790 0 : if (skippable) {
9791 : // Back out the coefficient coding costs
9792 0 : rate2 -= (rate_y + rate_uv);
9793 0 : rate_y = 0;
9794 0 : rate_uv = 0;
9795 : // Cost the skip mb case
9796 0 : rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
9797 0 : } else if (ref_frame != INTRA_FRAME && !xd->lossless[mbmi->segment_id]) {
9798 0 : if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv + rate_skip0,
9799 : distortion2) <
9800 0 : RDCOST(x->rdmult, x->rddiv, rate_skip1, total_sse)) {
9801 : // Add in the cost of the no skip flag.
9802 0 : rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
9803 : } else {
9804 : // FIXME(rbultje) make this work for splitmv also
9805 0 : rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
9806 0 : distortion2 = total_sse;
9807 0 : assert(total_sse >= 0);
9808 0 : rate2 -= (rate_y + rate_uv);
9809 0 : this_skip2 = 1;
9810 0 : rate_y = 0;
9811 0 : rate_uv = 0;
9812 : #if CONFIG_DAALA_DIST && CONFIG_CB4X4
9813 : if (bsize < BLOCK_8X8) distortion2_y = total_sse_y;
9814 : #endif
9815 : }
9816 : } else {
9817 : // Add in the cost of the no skip flag.
9818 0 : rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
9819 : }
9820 :
9821 : // Calculate the final RD estimate for this mode.
9822 0 : this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
9823 : #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9824 : } else {
9825 0 : this_skip2 = mbmi->skip;
9826 0 : this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
9827 0 : if (this_skip2) {
9828 0 : rate_y = 0;
9829 0 : rate_uv = 0;
9830 : }
9831 : #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9832 : }
9833 :
9834 0 : if (ref_frame == INTRA_FRAME) {
9835 : // Keep record of best intra rd
9836 0 : if (this_rd < best_intra_rd) {
9837 0 : best_intra_rd = this_rd;
9838 0 : best_intra_mode = mbmi->mode;
9839 : }
9840 : #if CONFIG_EXT_INTER && CONFIG_INTERINTRA
9841 0 : } else if (second_ref_frame == NONE_FRAME) {
9842 0 : if (this_rd < best_single_inter_rd) {
9843 0 : best_single_inter_rd = this_rd;
9844 0 : best_single_inter_ref = mbmi->ref_frame[0];
9845 : }
9846 : #endif // CONFIG_EXT_INTER && CONFIG_INTERINTRA
9847 : }
9848 :
9849 0 : if (!disable_skip && ref_frame == INTRA_FRAME) {
9850 0 : for (i = 0; i < REFERENCE_MODES; ++i)
9851 0 : best_pred_rd[i] = AOMMIN(best_pred_rd[i], this_rd);
9852 : }
9853 :
9854 : // Did this mode help.. i.e. is it the new best mode
9855 0 : if (this_rd < best_rd || x->skip) {
9856 0 : if (!mode_excluded) {
9857 : // Note index of best mode so far
9858 0 : best_mode_index = mode_index;
9859 :
9860 0 : if (ref_frame == INTRA_FRAME) {
9861 : /* required for left and above block mv */
9862 0 : mbmi->mv[0].as_int = 0;
9863 : } else {
9864 0 : best_pred_sse = x->pred_sse[ref_frame];
9865 : }
9866 :
9867 0 : rd_cost->rate = rate2;
9868 : #if CONFIG_SUPERTX
9869 : if (x->skip)
9870 : *returnrate_nocoef = rate2;
9871 : else
9872 : *returnrate_nocoef = rate2 - rate_y - rate_uv;
9873 : *returnrate_nocoef -= av1_cost_bit(
9874 : av1_get_skip_prob(cm, xd), disable_skip || skippable || this_skip2);
9875 : *returnrate_nocoef -= av1_cost_bit(av1_get_intra_inter_prob(cm, xd),
9876 : mbmi->ref_frame[0] != INTRA_FRAME);
9877 : #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9878 : #if CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
9879 : MODE_INFO *const mi = xd->mi[0];
9880 : const MOTION_MODE motion_allowed = motion_mode_allowed(
9881 : #if CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION
9882 : 0, xd->global_motion,
9883 : #endif // CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION
9884 : mi);
9885 : if (motion_allowed == WARPED_CAUSAL)
9886 : *returnrate_nocoef -= cpi->motion_mode_cost[bsize][mbmi->motion_mode];
9887 : else if (motion_allowed == OBMC_CAUSAL)
9888 : *returnrate_nocoef -=
9889 : cpi->motion_mode_cost1[bsize][mbmi->motion_mode];
9890 : #else
9891 : *returnrate_nocoef -= cpi->motion_mode_cost[bsize][mbmi->motion_mode];
9892 : #endif // CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
9893 : #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
9894 : #endif // CONFIG_SUPERTX
9895 0 : rd_cost->dist = distortion2;
9896 0 : rd_cost->rdcost = this_rd;
9897 0 : best_rd = this_rd;
9898 0 : best_mbmode = *mbmi;
9899 0 : best_skip2 = this_skip2;
9900 0 : best_mode_skippable = skippable;
9901 0 : best_rate_y = rate_y + av1_cost_bit(av1_get_skip_prob(cm, xd),
9902 : this_skip2 || skippable);
9903 0 : best_rate_uv = rate_uv;
9904 : #if CONFIG_DAALA_DIST && CONFIG_CB4X4
9905 : if (bsize < BLOCK_8X8) rd_cost->dist_y = distortion2_y;
9906 : #endif
9907 : #if CONFIG_VAR_TX
9908 0 : for (i = 0; i < MAX_MB_PLANE; ++i)
9909 0 : memcpy(ctx->blk_skip[i], x->blk_skip[i],
9910 0 : sizeof(uint8_t) * ctx->num_4x4_blk);
9911 : #endif // CONFIG_VAR_TX
9912 : }
9913 : }
9914 :
9915 : /* keep record of best compound/single-only prediction */
9916 0 : if (!disable_skip && ref_frame != INTRA_FRAME) {
9917 : int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
9918 :
9919 0 : if (cm->reference_mode == REFERENCE_MODE_SELECT) {
9920 0 : single_rate = rate2 - compmode_cost;
9921 0 : hybrid_rate = rate2;
9922 : } else {
9923 0 : single_rate = rate2;
9924 0 : hybrid_rate = rate2 + compmode_cost;
9925 : }
9926 :
9927 0 : single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
9928 0 : hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
9929 :
9930 0 : if (!comp_pred) {
9931 0 : if (single_rd < best_pred_rd[SINGLE_REFERENCE])
9932 0 : best_pred_rd[SINGLE_REFERENCE] = single_rd;
9933 : } else {
9934 0 : if (single_rd < best_pred_rd[COMPOUND_REFERENCE])
9935 0 : best_pred_rd[COMPOUND_REFERENCE] = single_rd;
9936 : }
9937 0 : if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
9938 0 : best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
9939 : }
9940 :
9941 0 : if (x->skip && !comp_pred) break;
9942 : }
9943 :
9944 0 : if (xd->lossless[mbmi->segment_id] == 0 && best_mode_index >= 0 &&
9945 0 : ((sf->tx_type_search.fast_inter_tx_type_search == 1 &&
9946 0 : is_inter_mode(best_mbmode.mode)) ||
9947 0 : (sf->tx_type_search.fast_intra_tx_type_search == 1 &&
9948 0 : !is_inter_mode(best_mbmode.mode)))) {
9949 0 : int skip_blk = 0;
9950 : RD_STATS rd_stats_y, rd_stats_uv;
9951 :
9952 0 : x->use_default_inter_tx_type = 0;
9953 0 : x->use_default_intra_tx_type = 0;
9954 :
9955 0 : *mbmi = best_mbmode;
9956 :
9957 0 : set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
9958 :
9959 : // Select prediction reference frames.
9960 0 : for (i = 0; i < MAX_MB_PLANE; i++) {
9961 0 : xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
9962 0 : if (has_second_ref(mbmi))
9963 0 : xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
9964 : }
9965 :
9966 0 : if (is_inter_mode(mbmi->mode)) {
9967 0 : av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
9968 : #if CONFIG_MOTION_VAR
9969 0 : if (mbmi->motion_mode == OBMC_CAUSAL) {
9970 0 : av1_build_obmc_inter_prediction(
9971 : cm, xd, mi_row, mi_col, args.above_pred_buf, args.above_pred_stride,
9972 : args.left_pred_buf, args.left_pred_stride);
9973 : }
9974 : #endif // CONFIG_MOTION_VAR
9975 0 : av1_subtract_plane(x, bsize, 0);
9976 : #if CONFIG_VAR_TX
9977 0 : if (cm->tx_mode == TX_MODE_SELECT || xd->lossless[mbmi->segment_id]) {
9978 0 : select_tx_type_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
9979 : } else {
9980 : int idx, idy;
9981 0 : super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
9982 0 : for (idy = 0; idy < xd->n8_h; ++idy)
9983 0 : for (idx = 0; idx < xd->n8_w; ++idx)
9984 0 : mbmi->inter_tx_size[idy][idx] = mbmi->tx_size;
9985 0 : memset(x->blk_skip[0], rd_stats_y.skip,
9986 0 : sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
9987 : }
9988 :
9989 0 : inter_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
9990 : #else
9991 : super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
9992 : super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
9993 : #endif // CONFIG_VAR_TX
9994 : } else {
9995 0 : super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
9996 0 : super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
9997 : }
9998 :
9999 0 : if (RDCOST(x->rdmult, x->rddiv, rd_stats_y.rate + rd_stats_uv.rate,
10000 : (rd_stats_y.dist + rd_stats_uv.dist)) >
10001 0 : RDCOST(x->rdmult, x->rddiv, 0, (rd_stats_y.sse + rd_stats_uv.sse))) {
10002 0 : skip_blk = 1;
10003 0 : rd_stats_y.rate = av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
10004 0 : rd_stats_uv.rate = 0;
10005 0 : rd_stats_y.dist = rd_stats_y.sse;
10006 0 : rd_stats_uv.dist = rd_stats_uv.sse;
10007 : } else {
10008 0 : skip_blk = 0;
10009 0 : rd_stats_y.rate += av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
10010 : }
10011 :
10012 0 : if (RDCOST(x->rdmult, x->rddiv, best_rate_y + best_rate_uv, rd_cost->dist) >
10013 0 : RDCOST(x->rdmult, x->rddiv, rd_stats_y.rate + rd_stats_uv.rate,
10014 : (rd_stats_y.dist + rd_stats_uv.dist))) {
10015 : #if CONFIG_VAR_TX
10016 : int idx, idy;
10017 : #endif // CONFIG_VAR_TX
10018 0 : best_mbmode.tx_type = mbmi->tx_type;
10019 0 : best_mbmode.tx_size = mbmi->tx_size;
10020 : #if CONFIG_VAR_TX
10021 0 : for (idy = 0; idy < xd->n8_h; ++idy)
10022 0 : for (idx = 0; idx < xd->n8_w; ++idx)
10023 0 : best_mbmode.inter_tx_size[idy][idx] = mbmi->inter_tx_size[idy][idx];
10024 :
10025 0 : for (i = 0; i < MAX_MB_PLANE; ++i)
10026 0 : memcpy(ctx->blk_skip[i], x->blk_skip[i],
10027 0 : sizeof(uint8_t) * ctx->num_4x4_blk);
10028 :
10029 0 : best_mbmode.min_tx_size = mbmi->min_tx_size;
10030 : #endif // CONFIG_VAR_TX
10031 0 : rd_cost->rate +=
10032 0 : (rd_stats_y.rate + rd_stats_uv.rate - best_rate_y - best_rate_uv);
10033 0 : rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist;
10034 : #if CONFIG_DAALA_DIST && CONFIG_CB4X4
10035 : if (bsize < BLOCK_8X8) rd_cost->dist_y = rd_stats_y.dist;
10036 : #endif
10037 0 : rd_cost->rdcost =
10038 0 : RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
10039 0 : best_skip2 = skip_blk;
10040 : }
10041 : }
10042 :
10043 : #if CONFIG_PALETTE
10044 : // Only try palette mode when the best mode so far is an intra mode.
10045 0 : if (try_palette && !is_inter_mode(best_mbmode.mode)) {
10046 0 : int rate2 = 0;
10047 : #if CONFIG_SUPERTX
10048 : int best_rate_nocoef;
10049 : #endif // CONFIG_SUPERTX
10050 0 : int64_t distortion2 = 0, best_rd_palette = best_rd, this_rd,
10051 0 : best_model_rd_palette = INT64_MAX;
10052 0 : int skippable = 0, rate_overhead_palette = 0;
10053 : RD_STATS rd_stats_y;
10054 : TX_SIZE uv_tx;
10055 0 : uint8_t *const best_palette_color_map =
10056 0 : x->palette_buffer->best_palette_color_map;
10057 0 : uint8_t *const color_map = xd->plane[0].color_index_map;
10058 0 : MB_MODE_INFO best_mbmi_palette = best_mbmode;
10059 :
10060 0 : mbmi->mode = DC_PRED;
10061 0 : mbmi->uv_mode = DC_PRED;
10062 0 : mbmi->ref_frame[0] = INTRA_FRAME;
10063 0 : mbmi->ref_frame[1] = NONE_FRAME;
10064 0 : rate_overhead_palette = rd_pick_palette_intra_sby(
10065 : cpi, x, bsize, palette_ctx, intra_mode_cost[DC_PRED],
10066 : &best_mbmi_palette, best_palette_color_map, &best_rd_palette,
10067 : &best_model_rd_palette, NULL, NULL, NULL, NULL);
10068 0 : if (pmi->palette_size[0] == 0) goto PALETTE_EXIT;
10069 0 : memcpy(color_map, best_palette_color_map,
10070 0 : rows * cols * sizeof(best_palette_color_map[0]));
10071 0 : super_block_yrd(cpi, x, &rd_stats_y, bsize, best_rd);
10072 0 : if (rd_stats_y.rate == INT_MAX) goto PALETTE_EXIT;
10073 0 : uv_tx = uv_txsize_lookup[bsize][mbmi->tx_size][xd->plane[1].subsampling_x]
10074 0 : [xd->plane[1].subsampling_y];
10075 0 : if (rate_uv_intra[uv_tx] == INT_MAX) {
10076 0 : choose_intra_uv_mode(cpi, x, ctx, bsize, uv_tx, &rate_uv_intra[uv_tx],
10077 0 : &rate_uv_tokenonly[uv_tx], &dist_uvs[uv_tx],
10078 0 : &skip_uvs[uv_tx], &mode_uv[uv_tx]);
10079 0 : pmi_uv[uv_tx] = *pmi;
10080 : #if CONFIG_EXT_INTRA
10081 0 : uv_angle_delta[uv_tx] = mbmi->angle_delta[1];
10082 : #endif // CONFIG_EXT_INTRA
10083 : #if CONFIG_FILTER_INTRA
10084 : filter_intra_mode_info_uv[uv_tx] = mbmi->filter_intra_mode_info;
10085 : #endif // CONFIG_FILTER_INTRA
10086 : }
10087 0 : mbmi->uv_mode = mode_uv[uv_tx];
10088 0 : pmi->palette_size[1] = pmi_uv[uv_tx].palette_size[1];
10089 0 : if (pmi->palette_size[1] > 0) {
10090 0 : memcpy(pmi->palette_colors + PALETTE_MAX_SIZE,
10091 0 : pmi_uv[uv_tx].palette_colors + PALETTE_MAX_SIZE,
10092 : 2 * PALETTE_MAX_SIZE * sizeof(pmi->palette_colors[0]));
10093 : }
10094 : #if CONFIG_EXT_INTRA
10095 0 : mbmi->angle_delta[1] = uv_angle_delta[uv_tx];
10096 : #endif // CONFIG_EXT_INTRA
10097 : #if CONFIG_FILTER_INTRA
10098 : mbmi->filter_intra_mode_info.use_filter_intra_mode[1] =
10099 : filter_intra_mode_info_uv[uv_tx].use_filter_intra_mode[1];
10100 : if (filter_intra_mode_info_uv[uv_tx].use_filter_intra_mode[1]) {
10101 : mbmi->filter_intra_mode_info.filter_intra_mode[1] =
10102 : filter_intra_mode_info_uv[uv_tx].filter_intra_mode[1];
10103 : }
10104 : #endif // CONFIG_FILTER_INTRA
10105 0 : skippable = rd_stats_y.skip && skip_uvs[uv_tx];
10106 0 : distortion2 = rd_stats_y.dist + dist_uvs[uv_tx];
10107 0 : rate2 = rd_stats_y.rate + rate_overhead_palette + rate_uv_intra[uv_tx];
10108 0 : rate2 += ref_costs_single[INTRA_FRAME];
10109 :
10110 0 : if (skippable) {
10111 0 : rate2 -= (rd_stats_y.rate + rate_uv_tokenonly[uv_tx]);
10112 : #if CONFIG_SUPERTX
10113 : best_rate_nocoef = rate2;
10114 : #endif // CONFIG_SUPERTX
10115 0 : rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
10116 : } else {
10117 : #if CONFIG_SUPERTX
10118 : best_rate_nocoef = rate2 - (rd_stats_y.rate + rate_uv_tokenonly[uv_tx]);
10119 : #endif // CONFIG_SUPERTX
10120 0 : rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
10121 : }
10122 0 : this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
10123 0 : if (this_rd < best_rd) {
10124 0 : best_mode_index = 3;
10125 0 : mbmi->mv[0].as_int = 0;
10126 0 : rd_cost->rate = rate2;
10127 : #if CONFIG_SUPERTX
10128 : *returnrate_nocoef = best_rate_nocoef;
10129 : #endif // CONFIG_SUPERTX
10130 0 : rd_cost->dist = distortion2;
10131 0 : rd_cost->rdcost = this_rd;
10132 0 : best_rd = this_rd;
10133 0 : best_mbmode = *mbmi;
10134 0 : best_skip2 = 0;
10135 0 : best_mode_skippable = skippable;
10136 : }
10137 : }
10138 : PALETTE_EXIT:
10139 : #endif // CONFIG_PALETTE
10140 :
10141 : #if CONFIG_FILTER_INTRA
10142 : // TODO(huisu): filter-intra is turned off in lossless mode for now to
10143 : // avoid a unit test failure
10144 : if (!xd->lossless[mbmi->segment_id] &&
10145 : #if CONFIG_PALETTE
10146 : pmi->palette_size[0] == 0 &&
10147 : #endif // CONFIG_PALETTE
10148 : !dc_skipped && best_mode_index >= 0 &&
10149 : best_intra_rd < (best_rd + (best_rd >> 3))) {
10150 : pick_filter_intra_interframe(
10151 : cpi, x, ctx, bsize, mi_row, mi_col, rate_uv_intra, rate_uv_tokenonly,
10152 : dist_uvs, skip_uvs, mode_uv, filter_intra_mode_info_uv,
10153 : #if CONFIG_EXT_INTRA
10154 : uv_angle_delta,
10155 : #endif // CONFIG_EXT_INTRA
10156 : #if CONFIG_PALETTE
10157 : pmi_uv, palette_ctx,
10158 : #endif // CONFIG_PALETTE
10159 : 0, ref_costs_single, &best_rd, &best_intra_rd, &best_intra_mode,
10160 : &best_mode_index, &best_skip2, &best_mode_skippable,
10161 : #if CONFIG_SUPERTX
10162 : returnrate_nocoef,
10163 : #endif // CONFIG_SUPERTX
10164 : best_pred_rd, &best_mbmode, rd_cost);
10165 : }
10166 : #endif // CONFIG_FILTER_INTRA
10167 :
10168 : // The inter modes' rate costs are not calculated precisely in some cases.
10169 : // Therefore, sometimes, NEWMV is chosen instead of NEARESTMV, NEARMV, and
10170 : // ZEROMV. Here, checks are added for those cases, and the mode decisions
10171 : // are corrected.
10172 0 : if (best_mbmode.mode == NEWMV
10173 : #if CONFIG_EXT_INTER
10174 0 : || best_mbmode.mode == NEW_NEWMV
10175 : #endif // CONFIG_EXT_INTER
10176 : ) {
10177 0 : const MV_REFERENCE_FRAME refs[2] = { best_mbmode.ref_frame[0],
10178 0 : best_mbmode.ref_frame[1] };
10179 0 : int comp_pred_mode = refs[1] > INTRA_FRAME;
10180 : int_mv zeromv[2];
10181 0 : const uint8_t rf_type = av1_ref_frame_type(best_mbmode.ref_frame);
10182 : #if CONFIG_GLOBAL_MOTION
10183 0 : zeromv[0].as_int = gm_get_motion_vector(&cm->global_motion[refs[0]],
10184 : cm->allow_high_precision_mv, bsize,
10185 : mi_col, mi_row, 0)
10186 0 : .as_int;
10187 0 : zeromv[1].as_int = comp_pred_mode
10188 0 : ? gm_get_motion_vector(&cm->global_motion[refs[1]],
10189 : cm->allow_high_precision_mv,
10190 : bsize, mi_col, mi_row, 0)
10191 : .as_int
10192 0 : : 0;
10193 : #else
10194 : zeromv[0].as_int = 0;
10195 : zeromv[1].as_int = 0;
10196 : #endif // CONFIG_GLOBAL_MOTION
10197 0 : if (!comp_pred_mode) {
10198 0 : int ref_set = (mbmi_ext->ref_mv_count[rf_type] >= 2)
10199 0 : ? AOMMIN(2, mbmi_ext->ref_mv_count[rf_type] - 2)
10200 0 : : INT_MAX;
10201 :
10202 0 : for (i = 0; i <= ref_set && ref_set != INT_MAX; ++i) {
10203 0 : int_mv cur_mv = mbmi_ext->ref_mv_stack[rf_type][i + 1].this_mv;
10204 0 : if (cur_mv.as_int == best_mbmode.mv[0].as_int) {
10205 0 : best_mbmode.mode = NEARMV;
10206 0 : best_mbmode.ref_mv_idx = i;
10207 : }
10208 : }
10209 :
10210 0 : if (frame_mv[NEARESTMV][refs[0]].as_int == best_mbmode.mv[0].as_int)
10211 0 : best_mbmode.mode = NEARESTMV;
10212 0 : else if (best_mbmode.mv[0].as_int == zeromv[0].as_int)
10213 0 : best_mbmode.mode = ZEROMV;
10214 : } else {
10215 : int_mv nearestmv[2];
10216 : int_mv nearmv[2];
10217 :
10218 : #if CONFIG_EXT_INTER
10219 0 : if (mbmi_ext->ref_mv_count[rf_type] > 1) {
10220 0 : nearmv[0] = mbmi_ext->ref_mv_stack[rf_type][1].this_mv;
10221 0 : nearmv[1] = mbmi_ext->ref_mv_stack[rf_type][1].comp_mv;
10222 : } else {
10223 0 : nearmv[0] = frame_mv[NEARMV][refs[0]];
10224 0 : nearmv[1] = frame_mv[NEARMV][refs[1]];
10225 : }
10226 : #else
10227 : int ref_set = (mbmi_ext->ref_mv_count[rf_type] >= 2)
10228 : ? AOMMIN(2, mbmi_ext->ref_mv_count[rf_type] - 2)
10229 : : INT_MAX;
10230 :
10231 : for (i = 0; i <= ref_set && ref_set != INT_MAX; ++i) {
10232 : nearmv[0] = mbmi_ext->ref_mv_stack[rf_type][i + 1].this_mv;
10233 : nearmv[1] = mbmi_ext->ref_mv_stack[rf_type][i + 1].comp_mv;
10234 :
10235 : if (nearmv[0].as_int == best_mbmode.mv[0].as_int &&
10236 : nearmv[1].as_int == best_mbmode.mv[1].as_int) {
10237 : best_mbmode.mode = NEARMV;
10238 : best_mbmode.ref_mv_idx = i;
10239 : }
10240 : }
10241 : #endif // CONFIG_EXT_INTER
10242 0 : if (mbmi_ext->ref_mv_count[rf_type] >= 1) {
10243 0 : nearestmv[0] = mbmi_ext->ref_mv_stack[rf_type][0].this_mv;
10244 0 : nearestmv[1] = mbmi_ext->ref_mv_stack[rf_type][0].comp_mv;
10245 : } else {
10246 0 : nearestmv[0] = frame_mv[NEARESTMV][refs[0]];
10247 0 : nearestmv[1] = frame_mv[NEARESTMV][refs[1]];
10248 : }
10249 :
10250 0 : if (nearestmv[0].as_int == best_mbmode.mv[0].as_int &&
10251 0 : nearestmv[1].as_int == best_mbmode.mv[1].as_int) {
10252 : #if CONFIG_EXT_INTER
10253 0 : best_mbmode.mode = NEAREST_NEARESTMV;
10254 : } else {
10255 0 : int ref_set = (mbmi_ext->ref_mv_count[rf_type] >= 2)
10256 0 : ? AOMMIN(2, mbmi_ext->ref_mv_count[rf_type] - 2)
10257 0 : : INT_MAX;
10258 :
10259 0 : for (i = 0; i <= ref_set && ref_set != INT_MAX; ++i) {
10260 0 : nearmv[0] = mbmi_ext->ref_mv_stack[rf_type][i + 1].this_mv;
10261 0 : nearmv[1] = mbmi_ext->ref_mv_stack[rf_type][i + 1].comp_mv;
10262 :
10263 : // Try switching to the NEAR_NEARMV mode
10264 0 : if (nearmv[0].as_int == best_mbmode.mv[0].as_int &&
10265 0 : nearmv[1].as_int == best_mbmode.mv[1].as_int) {
10266 0 : best_mbmode.mode = NEAR_NEARMV;
10267 0 : best_mbmode.ref_mv_idx = i;
10268 : }
10269 : }
10270 :
10271 0 : if (best_mbmode.mode == NEW_NEWMV &&
10272 0 : best_mbmode.mv[0].as_int == zeromv[0].as_int &&
10273 0 : best_mbmode.mv[1].as_int == zeromv[1].as_int)
10274 0 : best_mbmode.mode = ZERO_ZEROMV;
10275 : }
10276 : #else
10277 : best_mbmode.mode = NEARESTMV;
10278 : } else if (best_mbmode.mv[0].as_int == zeromv[0].as_int &&
10279 : best_mbmode.mv[1].as_int == zeromv[1].as_int) {
10280 : best_mbmode.mode = ZEROMV;
10281 : }
10282 : #endif // CONFIG_EXT_INTER
10283 : }
10284 : }
10285 :
10286 : // Make sure that the ref_mv_idx is only nonzero when we're
10287 : // using a mode which can support ref_mv_idx
10288 0 : if (best_mbmode.ref_mv_idx != 0 &&
10289 : #if CONFIG_EXT_INTER
10290 0 : !(best_mbmode.mode == NEWMV || best_mbmode.mode == NEW_NEWMV ||
10291 0 : have_nearmv_in_inter_mode(best_mbmode.mode))) {
10292 : #else
10293 : !(best_mbmode.mode == NEARMV || best_mbmode.mode == NEWMV)) {
10294 : #endif
10295 0 : best_mbmode.ref_mv_idx = 0;
10296 : }
10297 :
10298 : {
10299 0 : int8_t ref_frame_type = av1_ref_frame_type(best_mbmode.ref_frame);
10300 0 : int16_t mode_ctx = mbmi_ext->mode_context[ref_frame_type];
10301 0 : if (mode_ctx & (1 << ALL_ZERO_FLAG_OFFSET)) {
10302 : int_mv zeromv[2];
10303 : #if CONFIG_GLOBAL_MOTION
10304 0 : const MV_REFERENCE_FRAME refs[2] = { best_mbmode.ref_frame[0],
10305 0 : best_mbmode.ref_frame[1] };
10306 0 : zeromv[0].as_int = gm_get_motion_vector(&cm->global_motion[refs[0]],
10307 : cm->allow_high_precision_mv,
10308 : bsize, mi_col, mi_row, 0)
10309 0 : .as_int;
10310 0 : zeromv[1].as_int = (refs[1] != NONE_FRAME)
10311 0 : ? gm_get_motion_vector(&cm->global_motion[refs[1]],
10312 : cm->allow_high_precision_mv,
10313 : bsize, mi_col, mi_row, 0)
10314 : .as_int
10315 0 : : 0;
10316 0 : lower_mv_precision(&zeromv[0].as_mv, cm->allow_high_precision_mv);
10317 0 : lower_mv_precision(&zeromv[1].as_mv, cm->allow_high_precision_mv);
10318 : #else
10319 : zeromv[0].as_int = zeromv[1].as_int = 0;
10320 : #endif // CONFIG_GLOBAL_MOTION
10321 0 : if (best_mbmode.ref_frame[0] > INTRA_FRAME &&
10322 0 : best_mbmode.mv[0].as_int == zeromv[0].as_int &&
10323 : #if CONFIG_EXT_INTER
10324 0 : (best_mbmode.ref_frame[1] <= INTRA_FRAME)
10325 : #else
10326 : (best_mbmode.ref_frame[1] == NONE_FRAME ||
10327 : best_mbmode.mv[1].as_int == zeromv[1].as_int)
10328 : #endif // CONFIG_EXT_INTER
10329 : ) {
10330 0 : best_mbmode.mode = ZEROMV;
10331 : }
10332 : }
10333 : }
10334 :
10335 0 : if (best_mode_index < 0 || best_rd >= best_rd_so_far) {
10336 0 : rd_cost->rate = INT_MAX;
10337 0 : rd_cost->rdcost = INT64_MAX;
10338 0 : return;
10339 : }
10340 :
10341 : #if CONFIG_DUAL_FILTER
10342 0 : assert((cm->interp_filter == SWITCHABLE) ||
10343 : (cm->interp_filter == best_mbmode.interp_filter[0]) ||
10344 : !is_inter_block(&best_mbmode));
10345 0 : assert((cm->interp_filter == SWITCHABLE) ||
10346 : (cm->interp_filter == best_mbmode.interp_filter[1]) ||
10347 : !is_inter_block(&best_mbmode));
10348 0 : if (best_mbmode.ref_frame[1] > INTRA_FRAME) {
10349 0 : assert((cm->interp_filter == SWITCHABLE) ||
10350 : (cm->interp_filter == best_mbmode.interp_filter[2]) ||
10351 : !is_inter_block(&best_mbmode));
10352 0 : assert((cm->interp_filter == SWITCHABLE) ||
10353 : (cm->interp_filter == best_mbmode.interp_filter[3]) ||
10354 : !is_inter_block(&best_mbmode));
10355 : }
10356 : #else
10357 : assert((cm->interp_filter == SWITCHABLE) ||
10358 : (cm->interp_filter == best_mbmode.interp_filter) ||
10359 : !is_inter_block(&best_mbmode));
10360 : #endif // CONFIG_DUAL_FILTER
10361 :
10362 0 : if (!cpi->rc.is_src_frame_alt_ref)
10363 0 : av1_update_rd_thresh_fact(cm, tile_data->thresh_freq_fact,
10364 : sf->adaptive_rd_thresh, bsize, best_mode_index);
10365 :
10366 : // macroblock modes
10367 0 : *mbmi = best_mbmode;
10368 0 : x->skip |= best_skip2;
10369 :
10370 : // Note: this section is needed since the mode may have been forced to
10371 : // ZEROMV by the all-zero mode handling of ref-mv.
10372 : #if CONFIG_GLOBAL_MOTION
10373 0 : if (mbmi->mode == ZEROMV
10374 : #if CONFIG_EXT_INTER
10375 0 : || mbmi->mode == ZERO_ZEROMV
10376 : #endif // CONFIG_EXT_INTER
10377 : ) {
10378 : #if CONFIG_WARPED_MOTION || CONFIG_MOTION_VAR
10379 : // Correct the motion mode for ZEROMV
10380 0 : const MOTION_MODE last_motion_mode_allowed = motion_mode_allowed(
10381 : #if SEPARATE_GLOBAL_MOTION
10382 0 : 0, xd->global_motion,
10383 : #endif // SEPARATE_GLOBAL_MOTION
10384 0 : xd->mi[0]);
10385 0 : if (mbmi->motion_mode > last_motion_mode_allowed)
10386 0 : mbmi->motion_mode = last_motion_mode_allowed;
10387 : #endif // CONFIG_WARPED_MOTION || CONFIG_MOTION_VAR
10388 :
10389 : // Correct the interpolation filter for ZEROMV
10390 0 : if (is_nontrans_global_motion(xd)) {
10391 : #if CONFIG_DUAL_FILTER
10392 0 : mbmi->interp_filter[0] = cm->interp_filter == SWITCHABLE
10393 : ? EIGHTTAP_REGULAR
10394 0 : : cm->interp_filter;
10395 0 : mbmi->interp_filter[1] = cm->interp_filter == SWITCHABLE
10396 : ? EIGHTTAP_REGULAR
10397 0 : : cm->interp_filter;
10398 : #else
10399 : mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP_REGULAR
10400 : : cm->interp_filter;
10401 : #endif // CONFIG_DUAL_FILTER
10402 : }
10403 : }
10404 : #endif // CONFIG_GLOBAL_MOTION
10405 :
10406 0 : for (i = 0; i < 1 + has_second_ref(mbmi); ++i) {
10407 0 : if (mbmi->mode != NEWMV)
10408 0 : mbmi->pred_mv[i].as_int = mbmi->mv[i].as_int;
10409 : else
10410 0 : mbmi->pred_mv[i].as_int = mbmi_ext->ref_mvs[mbmi->ref_frame[i]][0].as_int;
10411 : }
10412 :
10413 0 : for (i = 0; i < REFERENCE_MODES; ++i) {
10414 0 : if (best_pred_rd[i] == INT64_MAX)
10415 0 : best_pred_diff[i] = INT_MIN;
10416 : else
10417 0 : best_pred_diff[i] = best_rd - best_pred_rd[i];
10418 : }
10419 :
10420 0 : x->skip |= best_mode_skippable;
10421 :
10422 0 : assert(best_mode_index >= 0);
10423 :
10424 0 : store_coding_context(x, ctx, best_mode_index, best_pred_diff,
10425 : best_mode_skippable);
10426 :
10427 : #if CONFIG_PALETTE
10428 0 : if (cm->allow_screen_content_tools && pmi->palette_size[1] > 0) {
10429 0 : restore_uv_color_map(cpi, x);
10430 : }
10431 : #endif // CONFIG_PALETTE
10432 : }
10433 :
10434 0 : void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
10435 : TileDataEnc *tile_data, MACROBLOCK *x,
10436 : int mi_row, int mi_col,
10437 : RD_STATS *rd_cost, BLOCK_SIZE bsize,
10438 : PICK_MODE_CONTEXT *ctx,
10439 : int64_t best_rd_so_far) {
10440 0 : const AV1_COMMON *const cm = &cpi->common;
10441 0 : MACROBLOCKD *const xd = &x->e_mbd;
10442 0 : MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
10443 0 : unsigned char segment_id = mbmi->segment_id;
10444 0 : const int comp_pred = 0;
10445 : int i;
10446 : int64_t best_pred_diff[REFERENCE_MODES];
10447 : unsigned int ref_costs_single[TOTAL_REFS_PER_FRAME];
10448 : unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME];
10449 : aom_prob comp_mode_p;
10450 0 : InterpFilter best_filter = SWITCHABLE;
10451 0 : int64_t this_rd = INT64_MAX;
10452 0 : int rate2 = 0;
10453 0 : const int64_t distortion2 = 0;
10454 : (void)mi_row;
10455 : (void)mi_col;
10456 :
10457 0 : estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
10458 : &comp_mode_p);
10459 :
10460 0 : for (i = 0; i < TOTAL_REFS_PER_FRAME; ++i) x->pred_sse[i] = INT_MAX;
10461 0 : for (i = LAST_FRAME; i < TOTAL_REFS_PER_FRAME; ++i)
10462 0 : x->pred_mv_sad[i] = INT_MAX;
10463 :
10464 0 : rd_cost->rate = INT_MAX;
10465 :
10466 0 : assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
10467 :
10468 : #if CONFIG_PALETTE
10469 0 : mbmi->palette_mode_info.palette_size[0] = 0;
10470 0 : mbmi->palette_mode_info.palette_size[1] = 0;
10471 : #endif // CONFIG_PALETTE
10472 :
10473 : #if CONFIG_FILTER_INTRA
10474 : mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
10475 : mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
10476 : #endif // CONFIG_FILTER_INTRA
10477 0 : mbmi->mode = ZEROMV;
10478 0 : mbmi->motion_mode = SIMPLE_TRANSLATION;
10479 0 : mbmi->uv_mode = DC_PRED;
10480 0 : mbmi->ref_frame[0] = LAST_FRAME;
10481 0 : mbmi->ref_frame[1] = NONE_FRAME;
10482 : #if CONFIG_GLOBAL_MOTION
10483 0 : mbmi->mv[0].as_int =
10484 0 : gm_get_motion_vector(&cm->global_motion[mbmi->ref_frame[0]],
10485 : cm->allow_high_precision_mv, bsize, mi_col, mi_row,
10486 : 0)
10487 0 : .as_int;
10488 : #else // CONFIG_GLOBAL_MOTION
10489 : mbmi->mv[0].as_int = 0;
10490 : #endif // CONFIG_GLOBAL_MOTION
10491 0 : mbmi->tx_size = max_txsize_lookup[bsize];
10492 0 : x->skip = 1;
10493 :
10494 0 : mbmi->ref_mv_idx = 0;
10495 0 : mbmi->pred_mv[0].as_int = 0;
10496 :
10497 0 : mbmi->motion_mode = SIMPLE_TRANSLATION;
10498 : #if CONFIG_MOTION_VAR
10499 0 : av1_count_overlappable_neighbors(cm, xd, mi_row, mi_col);
10500 : #endif
10501 : #if CONFIG_WARPED_MOTION
10502 0 : if (is_motion_variation_allowed_bsize(bsize) && !has_second_ref(mbmi)) {
10503 : int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
10504 0 : mbmi->num_proj_ref[0] = findSamples(cm, xd, mi_row, mi_col, pts, pts_inref);
10505 : }
10506 : #endif
10507 :
10508 0 : set_default_interp_filters(mbmi, cm->interp_filter);
10509 :
10510 0 : if (cm->interp_filter != SWITCHABLE) {
10511 0 : best_filter = cm->interp_filter;
10512 : } else {
10513 0 : best_filter = EIGHTTAP_REGULAR;
10514 0 : if (av1_is_interp_needed(xd) && av1_is_interp_search_needed(xd) &&
10515 0 : x->source_variance >= cpi->sf.disable_filter_search_var_thresh) {
10516 : int rs;
10517 0 : int best_rs = INT_MAX;
10518 0 : for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
10519 : #if CONFIG_DUAL_FILTER
10520 : int k;
10521 0 : for (k = 0; k < 4; ++k) mbmi->interp_filter[k] = i;
10522 : #else
10523 : mbmi->interp_filter = i;
10524 : #endif // CONFIG_DUAL_FILTER
10525 0 : rs = av1_get_switchable_rate(cpi, xd);
10526 0 : if (rs < best_rs) {
10527 0 : best_rs = rs;
10528 : #if CONFIG_DUAL_FILTER
10529 0 : best_filter = mbmi->interp_filter[0];
10530 : #else
10531 : best_filter = mbmi->interp_filter;
10532 : #endif // CONFIG_DUAL_FILTER
10533 : }
10534 : }
10535 : }
10536 : }
10537 : // Set the appropriate filter
10538 : #if CONFIG_DUAL_FILTER
10539 0 : for (i = 0; i < 4; ++i) mbmi->interp_filter[i] = best_filter;
10540 : #else
10541 : mbmi->interp_filter = best_filter;
10542 : #endif // CONFIG_DUAL_FILTER
10543 0 : rate2 += av1_get_switchable_rate(cpi, xd);
10544 :
10545 0 : if (cm->reference_mode == REFERENCE_MODE_SELECT)
10546 0 : rate2 += av1_cost_bit(comp_mode_p, comp_pred);
10547 :
10548 : // Estimate the reference frame signaling cost and add it
10549 : // to the rolling cost variable.
10550 0 : rate2 += ref_costs_single[LAST_FRAME];
10551 0 : this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
10552 :
10553 0 : rd_cost->rate = rate2;
10554 0 : rd_cost->dist = distortion2;
10555 0 : rd_cost->rdcost = this_rd;
10556 : #if CONFIG_DAALA_DIST && CONFIG_CB4X4
10557 : if (bsize < BLOCK_8X8) rd_cost->dist_y = distortion2;
10558 : #endif
10559 0 : if (this_rd >= best_rd_so_far) {
10560 0 : rd_cost->rate = INT_MAX;
10561 0 : rd_cost->rdcost = INT64_MAX;
10562 0 : return;
10563 : }
10564 :
10565 : #if CONFIG_DUAL_FILTER
10566 0 : assert((cm->interp_filter == SWITCHABLE) ||
10567 : (cm->interp_filter == mbmi->interp_filter[0]));
10568 : #else
10569 : assert((cm->interp_filter == SWITCHABLE) ||
10570 : (cm->interp_filter == mbmi->interp_filter));
10571 : #endif // CONFIG_DUAL_FILTER
10572 :
10573 0 : av1_update_rd_thresh_fact(cm, tile_data->thresh_freq_fact,
10574 : cpi->sf.adaptive_rd_thresh, bsize, THR_ZEROMV);
10575 :
10576 0 : av1_zero(best_pred_diff);
10577 :
10578 0 : store_coding_context(x, ctx, THR_ZEROMV, best_pred_diff, 0);
10579 : }
10580 :
10581 : #if CONFIG_MOTION_VAR
10582 : // This function has a structure similar to av1_build_obmc_inter_prediction
10583 : //
10584 : // The OBMC predictor is computed as:
10585 : //
10586 : // PObmc(x,y) =
10587 : // AOM_BLEND_A64(Mh(x),
10588 : // AOM_BLEND_A64(Mv(y), P(x,y), PAbove(x,y)),
10589 : // PLeft(x, y))
10590 : //
10591 : // Scaling up by AOM_BLEND_A64_MAX_ALPHA ** 2 and omitting the intermediate
10592 : // rounding, this can be written as:
10593 : //
10594 : // AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * Pobmc(x,y) =
10595 : // Mh(x) * Mv(y) * P(x,y) +
10596 : // Mh(x) * Cv(y) * Pabove(x,y) +
10597 : // AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
10598 : //
10599 : // Where :
10600 : //
10601 : // Cv(y) = AOM_BLEND_A64_MAX_ALPHA - Mv(y)
10602 : // Ch(y) = AOM_BLEND_A64_MAX_ALPHA - Mh(y)
10603 : //
10604 : // This function computes 'wsrc' and 'mask' as:
10605 : //
10606 : // wsrc(x, y) =
10607 : // AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * src(x, y) -
10608 : // Mh(x) * Cv(y) * Pabove(x,y) +
10609 : // AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
10610 : //
10611 : // mask(x, y) = Mh(x) * Mv(y)
10612 : //
10613 : // These can then be used to efficiently approximate the error for any
10614 : // predictor P in the context of the provided neighbouring predictors by
10615 : // computing:
10616 : //
10617 : // error(x, y) =
10618 : // wsrc(x, y) - mask(x, y) * P(x, y) / (AOM_BLEND_A64_MAX_ALPHA ** 2)
10619 : //
10620 0 : static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x,
10621 : const MACROBLOCKD *xd, int mi_row,
10622 : int mi_col, const uint8_t *above,
10623 : int above_stride, const uint8_t *left,
10624 : int left_stride) {
10625 0 : const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
10626 : int row, col, i;
10627 0 : const int bw = xd->n8_w << MI_SIZE_LOG2;
10628 0 : const int bh = xd->n8_h << MI_SIZE_LOG2;
10629 0 : int32_t *mask_buf = x->mask_buf;
10630 0 : int32_t *wsrc_buf = x->wsrc_buf;
10631 0 : const int wsrc_stride = bw;
10632 0 : const int mask_stride = bw;
10633 0 : const int src_scale = AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA;
10634 : #if CONFIG_HIGHBITDEPTH
10635 0 : const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
10636 : #else
10637 : const int is_hbd = 0;
10638 : #endif // CONFIG_HIGHBITDEPTH
10639 :
10640 : // plane 0 should not be subsampled
10641 0 : assert(xd->plane[0].subsampling_x == 0);
10642 0 : assert(xd->plane[0].subsampling_y == 0);
10643 :
10644 0 : av1_zero_array(wsrc_buf, bw * bh);
10645 0 : for (i = 0; i < bw * bh; ++i) mask_buf[i] = AOM_BLEND_A64_MAX_ALPHA;
10646 :
10647 : // handle above row
10648 0 : if (xd->up_available) {
10649 0 : const int overlap = num_4x4_blocks_high_lookup[bsize] * 2;
10650 0 : const int miw = AOMMIN(xd->n8_w, cm->mi_cols - mi_col);
10651 0 : const int mi_row_offset = -1;
10652 0 : const uint8_t *const mask1d = av1_get_obmc_mask(overlap);
10653 0 : const int neighbor_limit = max_neighbor_obmc[b_width_log2_lookup[bsize]];
10654 0 : int neighbor_count = 0;
10655 :
10656 0 : assert(miw > 0);
10657 :
10658 0 : i = 0;
10659 : do { // for each mi in the above row
10660 0 : const int mi_col_offset = i;
10661 0 : const MB_MODE_INFO *above_mbmi =
10662 0 : &xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
10663 : #if CONFIG_CHROMA_SUB8X8
10664 0 : if (above_mbmi->sb_type < BLOCK_8X8)
10665 0 : above_mbmi =
10666 0 : &xd->mi[mi_col_offset + 1 + mi_row_offset * xd->mi_stride]->mbmi;
10667 : #endif
10668 0 : const BLOCK_SIZE a_bsize = AOMMAX(above_mbmi->sb_type, BLOCK_8X8);
10669 0 : const int mi_step = AOMMIN(xd->n8_w, mi_size_wide[a_bsize]);
10670 0 : const int neighbor_bw = mi_step * MI_SIZE;
10671 :
10672 0 : if (is_neighbor_overlappable(above_mbmi)) {
10673 : if (!CONFIG_CB4X4 && (a_bsize == BLOCK_4X4 || a_bsize == BLOCK_4X8))
10674 : neighbor_count += 2;
10675 : else
10676 0 : neighbor_count++;
10677 0 : if (neighbor_count > neighbor_limit) break;
10678 :
10679 0 : const int tmp_stride = above_stride;
10680 0 : int32_t *wsrc = wsrc_buf + (i * MI_SIZE);
10681 0 : int32_t *mask = mask_buf + (i * MI_SIZE);
10682 :
10683 0 : if (!is_hbd) {
10684 0 : const uint8_t *tmp = above;
10685 :
10686 0 : for (row = 0; row < overlap; ++row) {
10687 0 : const uint8_t m0 = mask1d[row];
10688 0 : const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
10689 0 : for (col = 0; col < neighbor_bw; ++col) {
10690 0 : wsrc[col] = m1 * tmp[col];
10691 0 : mask[col] = m0;
10692 : }
10693 0 : wsrc += wsrc_stride;
10694 0 : mask += mask_stride;
10695 0 : tmp += tmp_stride;
10696 : }
10697 : #if CONFIG_HIGHBITDEPTH
10698 : } else {
10699 0 : const uint16_t *tmp = CONVERT_TO_SHORTPTR(above);
10700 :
10701 0 : for (row = 0; row < overlap; ++row) {
10702 0 : const uint8_t m0 = mask1d[row];
10703 0 : const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
10704 0 : for (col = 0; col < neighbor_bw; ++col) {
10705 0 : wsrc[col] = m1 * tmp[col];
10706 0 : mask[col] = m0;
10707 : }
10708 0 : wsrc += wsrc_stride;
10709 0 : mask += mask_stride;
10710 0 : tmp += tmp_stride;
10711 : }
10712 : #endif // CONFIG_HIGHBITDEPTH
10713 : }
10714 : }
10715 :
10716 0 : above += neighbor_bw;
10717 0 : i += mi_step;
10718 0 : } while (i < miw);
10719 : }
10720 :
10721 0 : for (i = 0; i < bw * bh; ++i) {
10722 0 : wsrc_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
10723 0 : mask_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
10724 : }
10725 :
10726 : // handle left column
10727 0 : if (xd->left_available) {
10728 0 : const int overlap = num_4x4_blocks_wide_lookup[bsize] * 2;
10729 0 : const int mih = AOMMIN(xd->n8_h, cm->mi_rows - mi_row);
10730 0 : const int mi_col_offset = -1;
10731 0 : const uint8_t *const mask1d = av1_get_obmc_mask(overlap);
10732 0 : const int neighbor_limit = max_neighbor_obmc[b_height_log2_lookup[bsize]];
10733 0 : int neighbor_count = 0;
10734 :
10735 0 : assert(mih > 0);
10736 :
10737 0 : i = 0;
10738 : do { // for each mi in the left column
10739 0 : const int mi_row_offset = i;
10740 0 : MB_MODE_INFO *left_mbmi =
10741 0 : &xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
10742 :
10743 : #if CONFIG_CHROMA_SUB8X8
10744 0 : if (left_mbmi->sb_type < BLOCK_8X8)
10745 0 : left_mbmi =
10746 0 : &xd->mi[mi_col_offset + (mi_row_offset + 1) * xd->mi_stride]->mbmi;
10747 : #endif
10748 0 : const BLOCK_SIZE l_bsize = AOMMAX(left_mbmi->sb_type, BLOCK_8X8);
10749 0 : const int mi_step = AOMMIN(xd->n8_h, mi_size_high[l_bsize]);
10750 0 : const int neighbor_bh = mi_step * MI_SIZE;
10751 :
10752 0 : if (is_neighbor_overlappable(left_mbmi)) {
10753 : if (!CONFIG_CB4X4 && (l_bsize == BLOCK_4X4 || l_bsize == BLOCK_8X4))
10754 : neighbor_count += 2;
10755 : else
10756 0 : neighbor_count++;
10757 0 : if (neighbor_count > neighbor_limit) break;
10758 :
10759 0 : const int tmp_stride = left_stride;
10760 0 : int32_t *wsrc = wsrc_buf + (i * MI_SIZE * wsrc_stride);
10761 0 : int32_t *mask = mask_buf + (i * MI_SIZE * mask_stride);
10762 :
10763 0 : if (!is_hbd) {
10764 0 : const uint8_t *tmp = left;
10765 :
10766 0 : for (row = 0; row < neighbor_bh; ++row) {
10767 0 : for (col = 0; col < overlap; ++col) {
10768 0 : const uint8_t m0 = mask1d[col];
10769 0 : const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
10770 0 : wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
10771 0 : (tmp[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
10772 0 : mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
10773 : }
10774 0 : wsrc += wsrc_stride;
10775 0 : mask += mask_stride;
10776 0 : tmp += tmp_stride;
10777 : }
10778 : #if CONFIG_HIGHBITDEPTH
10779 : } else {
10780 0 : const uint16_t *tmp = CONVERT_TO_SHORTPTR(left);
10781 :
10782 0 : for (row = 0; row < neighbor_bh; ++row) {
10783 0 : for (col = 0; col < overlap; ++col) {
10784 0 : const uint8_t m0 = mask1d[col];
10785 0 : const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
10786 0 : wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
10787 0 : (tmp[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
10788 0 : mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
10789 : }
10790 0 : wsrc += wsrc_stride;
10791 0 : mask += mask_stride;
10792 0 : tmp += tmp_stride;
10793 : }
10794 : #endif // CONFIG_HIGHBITDEPTH
10795 : }
10796 : }
10797 :
10798 0 : left += neighbor_bh * left_stride;
10799 0 : i += mi_step;
10800 0 : } while (i < mih);
10801 : }
10802 :
10803 0 : if (!is_hbd) {
10804 0 : const uint8_t *src = x->plane[0].src.buf;
10805 :
10806 0 : for (row = 0; row < bh; ++row) {
10807 0 : for (col = 0; col < bw; ++col) {
10808 0 : wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
10809 : }
10810 0 : wsrc_buf += wsrc_stride;
10811 0 : src += x->plane[0].src.stride;
10812 : }
10813 : #if CONFIG_HIGHBITDEPTH
10814 : } else {
10815 0 : const uint16_t *src = CONVERT_TO_SHORTPTR(x->plane[0].src.buf);
10816 :
10817 0 : for (row = 0; row < bh; ++row) {
10818 0 : for (col = 0; col < bw; ++col) {
10819 0 : wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
10820 : }
10821 0 : wsrc_buf += wsrc_stride;
10822 0 : src += x->plane[0].src.stride;
10823 : }
10824 : #endif // CONFIG_HIGHBITDEPTH
10825 : }
10826 0 : }
10827 :
10828 : #if CONFIG_NCOBMC
10829 : void av1_check_ncobmc_rd(const struct AV1_COMP *cpi, struct macroblock *x,
10830 : int mi_row, int mi_col) {
10831 : const AV1_COMMON *const cm = &cpi->common;
10832 : MACROBLOCKD *const xd = &x->e_mbd;
10833 : MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
10834 : MB_MODE_INFO backup_mbmi;
10835 : BLOCK_SIZE bsize = mbmi->sb_type;
10836 : int ref, skip_blk, backup_skip = x->skip;
10837 : int64_t rd_causal;
10838 : RD_STATS rd_stats_y, rd_stats_uv;
10839 : int rate_skip0 = av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
10840 : int rate_skip1 = av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
10841 :
10842 : // Recompute the best causal predictor and rd
10843 : mbmi->motion_mode = SIMPLE_TRANSLATION;
10844 : set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
10845 : for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
10846 : YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, mbmi->ref_frame[ref]);
10847 : assert(cfg != NULL);
10848 : av1_setup_pre_planes(xd, ref, cfg, mi_row, mi_col,
10849 : &xd->block_refs[ref]->sf);
10850 : }
10851 : av1_setup_dst_planes(x->e_mbd.plane, bsize,
10852 : get_frame_new_buffer(&cpi->common), mi_row, mi_col);
10853 :
10854 : av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
10855 :
10856 : av1_subtract_plane(x, bsize, 0);
10857 : super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
10858 : super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
10859 : assert(rd_stats_y.rate != INT_MAX && rd_stats_uv.rate != INT_MAX);
10860 : if (rd_stats_y.skip && rd_stats_uv.skip) {
10861 : rd_stats_y.rate = rate_skip1;
10862 : rd_stats_uv.rate = 0;
10863 : rd_stats_y.dist = rd_stats_y.sse;
10864 : rd_stats_uv.dist = rd_stats_uv.sse;
10865 : skip_blk = 0;
10866 : } else if (RDCOST(x->rdmult, x->rddiv,
10867 : (rd_stats_y.rate + rd_stats_uv.rate + rate_skip0),
10868 : (rd_stats_y.dist + rd_stats_uv.dist)) >
10869 : RDCOST(x->rdmult, x->rddiv, rate_skip1,
10870 : (rd_stats_y.sse + rd_stats_uv.sse))) {
10871 : rd_stats_y.rate = rate_skip1;
10872 : rd_stats_uv.rate = 0;
10873 : rd_stats_y.dist = rd_stats_y.sse;
10874 : rd_stats_uv.dist = rd_stats_uv.sse;
10875 : skip_blk = 1;
10876 : } else {
10877 : rd_stats_y.rate += rate_skip0;
10878 : skip_blk = 0;
10879 : }
10880 : backup_skip = skip_blk;
10881 : backup_mbmi = *mbmi;
10882 : rd_causal = RDCOST(x->rdmult, x->rddiv, (rd_stats_y.rate + rd_stats_uv.rate),
10883 : (rd_stats_y.dist + rd_stats_uv.dist));
10884 : rd_causal += RDCOST(x->rdmult, x->rddiv,
10885 : av1_cost_bit(cm->fc->motion_mode_prob[bsize][0], 0), 0);
10886 :
10887 : // Check non-causal mode
10888 : mbmi->motion_mode = OBMC_CAUSAL;
10889 : av1_build_ncobmc_inter_predictors_sb(cm, xd, mi_row, mi_col);
10890 :
10891 : av1_subtract_plane(x, bsize, 0);
10892 : super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
10893 : super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
10894 : assert(rd_stats_y.rate != INT_MAX && rd_stats_uv.rate != INT_MAX);
10895 : if (rd_stats_y.skip && rd_stats_uv.skip) {
10896 : rd_stats_y.rate = rate_skip1;
10897 : rd_stats_uv.rate = 0;
10898 : rd_stats_y.dist = rd_stats_y.sse;
10899 : rd_stats_uv.dist = rd_stats_uv.sse;
10900 : skip_blk = 0;
10901 : } else if (RDCOST(x->rdmult, x->rddiv,
10902 : (rd_stats_y.rate + rd_stats_uv.rate + rate_skip0),
10903 : (rd_stats_y.dist + rd_stats_uv.dist)) >
10904 : RDCOST(x->rdmult, x->rddiv, rate_skip1,
10905 : (rd_stats_y.sse + rd_stats_uv.sse))) {
10906 : rd_stats_y.rate = rate_skip1;
10907 : rd_stats_uv.rate = 0;
10908 : rd_stats_y.dist = rd_stats_y.sse;
10909 : rd_stats_uv.dist = rd_stats_uv.sse;
10910 : skip_blk = 1;
10911 : } else {
10912 : rd_stats_y.rate += rate_skip0;
10913 : skip_blk = 0;
10914 : }
10915 :
10916 : if (rd_causal >
10917 : RDCOST(x->rdmult, x->rddiv,
10918 : rd_stats_y.rate + rd_stats_uv.rate +
10919 : av1_cost_bit(cm->fc->motion_mode_prob[bsize][0], 1),
10920 : (rd_stats_y.dist + rd_stats_uv.dist))) {
10921 : x->skip = skip_blk;
10922 : } else {
10923 : *mbmi = backup_mbmi;
10924 : x->skip = backup_skip;
10925 : }
10926 : }
10927 : #endif // CONFIG_NCOBMC
10928 : #endif // CONFIG_MOTION_VAR
|