Line data Source code
1 : /*
2 : * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 : *
4 : * This source code is subject to the terms of the BSD 2 Clause License and
5 : * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 : * was not distributed with this source code in the LICENSE file, you can
7 : * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 : * Media Patent License 1.0 was not distributed with this source code in the
9 : * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 : */
11 :
12 : #include <limits.h>
13 : #include <math.h>
14 : #include <stdio.h>
15 :
16 : #include "./aom_config.h"
17 : #include "./aom_dsp_rtcd.h"
18 :
19 : #include "aom_dsp/aom_dsp_common.h"
20 : #include "aom_mem/aom_mem.h"
21 : #include "aom_ports/mem.h"
22 :
23 : #include "av1/common/common.h"
24 : #include "av1/common/mvref_common.h"
25 : #include "av1/common/reconinter.h"
26 :
27 : #include "av1/encoder/encoder.h"
28 : #include "av1/encoder/mcomp.h"
29 : #include "av1/encoder/rdopt.h"
30 :
31 : // #define NEW_DIAMOND_SEARCH
32 :
33 0 : static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf,
34 : const MV *mv) {
35 0 : return &buf->buf[mv->row * buf->stride + mv->col];
36 : }
37 :
38 0 : void av1_set_mv_search_range(MvLimits *mv_limits, const MV *mv) {
39 0 : int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0);
40 0 : int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0);
41 0 : int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL;
42 0 : int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL;
43 :
44 0 : col_min = AOMMAX(col_min, (MV_LOW >> 3) + 1);
45 0 : row_min = AOMMAX(row_min, (MV_LOW >> 3) + 1);
46 0 : col_max = AOMMIN(col_max, (MV_UPP >> 3) - 1);
47 0 : row_max = AOMMIN(row_max, (MV_UPP >> 3) - 1);
48 :
49 : // Get intersection of UMV window and valid MV window to reduce # of checks
50 : // in diamond search.
51 0 : if (mv_limits->col_min < col_min) mv_limits->col_min = col_min;
52 0 : if (mv_limits->col_max > col_max) mv_limits->col_max = col_max;
53 0 : if (mv_limits->row_min < row_min) mv_limits->row_min = row_min;
54 0 : if (mv_limits->row_max > row_max) mv_limits->row_max = row_max;
55 0 : }
56 :
57 0 : static void av1_set_subpel_mv_search_range(const MvLimits *mv_limits,
58 : int *col_min, int *col_max,
59 : int *row_min, int *row_max,
60 : const MV *ref_mv) {
61 0 : const int max_mv = MAX_FULL_PEL_VAL * 8;
62 0 : const int minc = AOMMAX(mv_limits->col_min * 8, ref_mv->col - max_mv);
63 0 : const int maxc = AOMMIN(mv_limits->col_max * 8, ref_mv->col + max_mv);
64 0 : const int minr = AOMMAX(mv_limits->row_min * 8, ref_mv->row - max_mv);
65 0 : const int maxr = AOMMIN(mv_limits->row_max * 8, ref_mv->row + max_mv);
66 :
67 0 : *col_min = AOMMAX(MV_LOW + 1, minc);
68 0 : *col_max = AOMMIN(MV_UPP - 1, maxc);
69 0 : *row_min = AOMMAX(MV_LOW + 1, minr);
70 0 : *row_max = AOMMIN(MV_UPP - 1, maxr);
71 0 : }
72 :
73 0 : int av1_init_search_range(int size) {
74 0 : int sr = 0;
75 : // Minimum search size no matter what the passed in value.
76 0 : size = AOMMAX(16, size);
77 :
78 0 : while ((size << sr) < MAX_FULL_PEL_VAL) sr++;
79 :
80 0 : sr = AOMMIN(sr, MAX_MVSEARCH_STEPS - 2);
81 0 : return sr;
82 : }
83 :
84 0 : static INLINE int mv_cost(const MV *mv, const int *joint_cost,
85 : int *const comp_cost[2]) {
86 0 : return joint_cost[av1_get_mv_joint(mv)] + comp_cost[0][mv->row] +
87 0 : comp_cost[1][mv->col];
88 : }
89 :
90 0 : int av1_mv_bit_cost(const MV *mv, const MV *ref, const int *mvjcost,
91 : int *mvcost[2], int weight) {
92 0 : const MV diff = { mv->row - ref->row, mv->col - ref->col };
93 0 : return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7);
94 : }
95 :
96 : #define PIXEL_TRANSFORM_ERROR_SCALE 4
97 0 : static int mv_err_cost(const MV *mv, const MV *ref, const int *mvjcost,
98 : int *mvcost[2], int error_per_bit) {
99 0 : if (mvcost) {
100 0 : const MV diff = { mv->row - ref->row, mv->col - ref->col };
101 0 : return (int)ROUND_POWER_OF_TWO_64(
102 : (int64_t)mv_cost(&diff, mvjcost, mvcost) * error_per_bit,
103 : RDDIV_BITS + AV1_PROB_COST_SHIFT - RD_EPB_SHIFT +
104 : PIXEL_TRANSFORM_ERROR_SCALE);
105 : }
106 0 : return 0;
107 : }
108 :
109 0 : static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,
110 : int sad_per_bit) {
111 0 : const MV diff = { (mv->row - ref->row) * 8, (mv->col - ref->col) * 8 };
112 0 : return ROUND_POWER_OF_TWO(
113 : (unsigned)mv_cost(&diff, x->nmvjointcost, x->mvcost) * sad_per_bit,
114 : AV1_PROB_COST_SHIFT);
115 : }
116 :
117 0 : void av1_init_dsmotion_compensation(search_site_config *cfg, int stride) {
118 0 : int len, ss_count = 1;
119 :
120 0 : cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
121 0 : cfg->ss[0].offset = 0;
122 :
123 0 : for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
124 : // Generate offsets for 4 search sites per step.
125 0 : const MV ss_mvs[] = { { -len, 0 }, { len, 0 }, { 0, -len }, { 0, len } };
126 : int i;
127 0 : for (i = 0; i < 4; ++i) {
128 0 : search_site *const ss = &cfg->ss[ss_count++];
129 0 : ss->mv = ss_mvs[i];
130 0 : ss->offset = ss->mv.row * stride + ss->mv.col;
131 : }
132 : }
133 :
134 0 : cfg->ss_count = ss_count;
135 0 : cfg->searches_per_step = 4;
136 0 : }
137 :
138 0 : void av1_init3smotion_compensation(search_site_config *cfg, int stride) {
139 0 : int len, ss_count = 1;
140 :
141 0 : cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
142 0 : cfg->ss[0].offset = 0;
143 :
144 0 : for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
145 : // Generate offsets for 8 search sites per step.
146 0 : const MV ss_mvs[8] = { { -len, 0 }, { len, 0 }, { 0, -len },
147 : { 0, len }, { -len, -len }, { -len, len },
148 : { len, -len }, { len, len } };
149 : int i;
150 0 : for (i = 0; i < 8; ++i) {
151 0 : search_site *const ss = &cfg->ss[ss_count++];
152 0 : ss->mv = ss_mvs[i];
153 0 : ss->offset = ss->mv.row * stride + ss->mv.col;
154 : }
155 : }
156 :
157 0 : cfg->ss_count = ss_count;
158 0 : cfg->searches_per_step = 8;
159 0 : }
160 :
161 : /*
162 : * To avoid the penalty for crossing cache-line read, preload the reference
163 : * area in a small buffer, which is aligned to make sure there won't be crossing
164 : * cache-line read while reading from this buffer. This reduced the cpu
165 : * cycles spent on reading ref data in sub-pixel filter functions.
166 : * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
167 : * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
168 : * could reduce the area.
169 : */
170 :
171 : // convert motion vector component to offset for sv[a]f calc
172 0 : static INLINE int sp(int x) { return x & 7; }
173 :
174 0 : static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
175 0 : return &buf[(r >> 3) * stride + (c >> 3)];
176 : }
177 :
178 : /* checks if (r, c) has better score than previous best */
179 : #if CONFIG_EXT_INTER
180 : #define CHECK_BETTER(v, r, c) \
181 : if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
182 : MV this_mv = { r, c }; \
183 : v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); \
184 : if (second_pred == NULL) \
185 : thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
186 : src_address, src_stride, &sse); \
187 : else if (mask) \
188 : thismse = vfp->msvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
189 : src_address, src_stride, second_pred, mask, \
190 : mask_stride, invert_mask, &sse); \
191 : else \
192 : thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
193 : src_address, src_stride, &sse, second_pred); \
194 : v += thismse; \
195 : if (v < besterr) { \
196 : besterr = v; \
197 : br = r; \
198 : bc = c; \
199 : *distortion = thismse; \
200 : *sse1 = sse; \
201 : } \
202 : } else { \
203 : v = INT_MAX; \
204 : }
205 : #else
206 : #define CHECK_BETTER(v, r, c) \
207 : if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
208 : MV this_mv = { r, c }; \
209 : v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); \
210 : if (second_pred == NULL) \
211 : thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
212 : src_address, src_stride, &sse); \
213 : else \
214 : thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
215 : src_address, src_stride, &sse, second_pred); \
216 : v += thismse; \
217 : if (v < besterr) { \
218 : besterr = v; \
219 : br = r; \
220 : bc = c; \
221 : *distortion = thismse; \
222 : *sse1 = sse; \
223 : } \
224 : } else { \
225 : v = INT_MAX; \
226 : }
227 : #endif // CONFIG_EXT_INTER
228 :
229 : #define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c)
230 :
231 0 : static INLINE const uint8_t *upre(const uint8_t *buf, int stride, int r,
232 : int c) {
233 0 : return &buf[(r)*stride + (c)];
234 : }
235 :
236 : /* checks if (r, c) has better score than previous best */
237 : #if CONFIG_EXT_INTER
238 : #define CHECK_BETTER1(v, r, c) \
239 : if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
240 : MV this_mv = { r, c }; \
241 : thismse = upsampled_pref_error( \
242 : xd, vfp, src_address, src_stride, upre(y, y_stride, r, c), y_stride, \
243 : second_pred, mask, mask_stride, invert_mask, w, h, &sse); \
244 : v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); \
245 : v += thismse; \
246 : if (v < besterr) { \
247 : besterr = v; \
248 : br = r; \
249 : bc = c; \
250 : *distortion = thismse; \
251 : *sse1 = sse; \
252 : } \
253 : } else { \
254 : v = INT_MAX; \
255 : }
256 : #else
257 : #define CHECK_BETTER1(v, r, c) \
258 : if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
259 : MV this_mv = { r, c }; \
260 : thismse = upsampled_pref_error(xd, vfp, src_address, src_stride, \
261 : upre(y, y_stride, r, c), y_stride, \
262 : second_pred, w, h, &sse); \
263 : v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); \
264 : v += thismse; \
265 : if (v < besterr) { \
266 : besterr = v; \
267 : br = r; \
268 : bc = c; \
269 : *distortion = thismse; \
270 : *sse1 = sse; \
271 : } \
272 : } else { \
273 : v = INT_MAX; \
274 : }
275 : #endif // CONFIG_EXT_INTER
276 :
277 : #define FIRST_LEVEL_CHECKS \
278 : { \
279 : unsigned int left, right, up, down, diag; \
280 : CHECK_BETTER(left, tr, tc - hstep); \
281 : CHECK_BETTER(right, tr, tc + hstep); \
282 : CHECK_BETTER(up, tr - hstep, tc); \
283 : CHECK_BETTER(down, tr + hstep, tc); \
284 : whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); \
285 : switch (whichdir) { \
286 : case 0: CHECK_BETTER(diag, tr - hstep, tc - hstep); break; \
287 : case 1: CHECK_BETTER(diag, tr - hstep, tc + hstep); break; \
288 : case 2: CHECK_BETTER(diag, tr + hstep, tc - hstep); break; \
289 : case 3: CHECK_BETTER(diag, tr + hstep, tc + hstep); break; \
290 : } \
291 : }
292 :
293 : #define SECOND_LEVEL_CHECKS \
294 : { \
295 : int kr, kc; \
296 : unsigned int second; \
297 : if (tr != br && tc != bc) { \
298 : kr = br - tr; \
299 : kc = bc - tc; \
300 : CHECK_BETTER(second, tr + kr, tc + 2 * kc); \
301 : CHECK_BETTER(second, tr + 2 * kr, tc + kc); \
302 : } else if (tr == br && tc != bc) { \
303 : kc = bc - tc; \
304 : CHECK_BETTER(second, tr + hstep, tc + 2 * kc); \
305 : CHECK_BETTER(second, tr - hstep, tc + 2 * kc); \
306 : switch (whichdir) { \
307 : case 0: \
308 : case 1: CHECK_BETTER(second, tr + hstep, tc + kc); break; \
309 : case 2: \
310 : case 3: CHECK_BETTER(second, tr - hstep, tc + kc); break; \
311 : } \
312 : } else if (tr != br && tc == bc) { \
313 : kr = br - tr; \
314 : CHECK_BETTER(second, tr + 2 * kr, tc + hstep); \
315 : CHECK_BETTER(second, tr + 2 * kr, tc - hstep); \
316 : switch (whichdir) { \
317 : case 0: \
318 : case 2: CHECK_BETTER(second, tr + kr, tc + hstep); break; \
319 : case 1: \
320 : case 3: CHECK_BETTER(second, tr + kr, tc - hstep); break; \
321 : } \
322 : } \
323 : }
324 :
325 : // TODO(yunqingwang): SECOND_LEVEL_CHECKS_BEST was a rewrote of
326 : // SECOND_LEVEL_CHECKS, and SECOND_LEVEL_CHECKS should be rewritten
327 : // later in the same way.
328 : #define SECOND_LEVEL_CHECKS_BEST(k) \
329 : { \
330 : unsigned int second; \
331 : int br0 = br; \
332 : int bc0 = bc; \
333 : assert(tr == br || tc == bc); \
334 : if (tr == br && tc != bc) { \
335 : kc = bc - tc; \
336 : } else if (tr != br && tc == bc) { \
337 : kr = br - tr; \
338 : } \
339 : CHECK_BETTER##k(second, br0 + kr, bc0); \
340 : CHECK_BETTER##k(second, br0, bc0 + kc); \
341 : if (br0 != br || bc0 != bc) { \
342 : CHECK_BETTER##k(second, br0 + kr, bc0 + kc); \
343 : } \
344 : }
345 :
346 : #define SETUP_SUBPEL_SEARCH \
347 : const uint8_t *const src_address = x->plane[0].src.buf; \
348 : const int src_stride = x->plane[0].src.stride; \
349 : const MACROBLOCKD *xd = &x->e_mbd; \
350 : unsigned int besterr = INT_MAX; \
351 : unsigned int sse; \
352 : unsigned int whichdir; \
353 : int thismse; \
354 : MV *bestmv = &x->best_mv.as_mv; \
355 : const unsigned int halfiters = iters_per_step; \
356 : const unsigned int quarteriters = iters_per_step; \
357 : const unsigned int eighthiters = iters_per_step; \
358 : const int y_stride = xd->plane[0].pre[0].stride; \
359 : const int offset = bestmv->row * y_stride + bestmv->col; \
360 : const uint8_t *const y = xd->plane[0].pre[0].buf; \
361 : \
362 : int br = bestmv->row * 8; \
363 : int bc = bestmv->col * 8; \
364 : int hstep = 4; \
365 : int minc, maxc, minr, maxr; \
366 : int tr = br; \
367 : int tc = bc; \
368 : \
369 : av1_set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr, \
370 : ref_mv); \
371 : \
372 : bestmv->row *= 8; \
373 : bestmv->col *= 8;
374 :
375 0 : static unsigned int setup_center_error(
376 : const MACROBLOCKD *xd, const MV *bestmv, const MV *ref_mv,
377 : int error_per_bit, const aom_variance_fn_ptr_t *vfp,
378 : const uint8_t *const src, const int src_stride, const uint8_t *const y,
379 : int y_stride, const uint8_t *second_pred,
380 : #if CONFIG_EXT_INTER
381 : const uint8_t *mask, int mask_stride, int invert_mask,
382 : #endif
383 : int w, int h, int offset, int *mvjcost, int *mvcost[2], unsigned int *sse1,
384 : int *distortion) {
385 : unsigned int besterr;
386 : #if CONFIG_HIGHBITDEPTH
387 0 : if (second_pred != NULL) {
388 0 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
389 : DECLARE_ALIGNED(16, uint16_t, comp_pred16[MAX_SB_SQUARE]);
390 : #if CONFIG_EXT_INTER
391 0 : if (mask)
392 0 : aom_highbd_comp_mask_pred(comp_pred16, second_pred, w, h, y + offset,
393 : y_stride, mask, mask_stride, invert_mask);
394 : else
395 : #endif
396 0 : aom_highbd_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset,
397 : y_stride);
398 0 : besterr =
399 0 : vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src, src_stride, sse1);
400 : } else {
401 : DECLARE_ALIGNED(16, uint8_t, comp_pred[MAX_SB_SQUARE]);
402 : #if CONFIG_EXT_INTER
403 0 : if (mask)
404 0 : aom_comp_mask_pred(comp_pred, second_pred, w, h, y + offset, y_stride,
405 : mask, mask_stride, invert_mask);
406 : else
407 : #endif
408 0 : aom_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
409 0 : besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
410 : }
411 : } else {
412 0 : besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1);
413 : }
414 0 : *distortion = besterr;
415 0 : besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
416 : #else
417 : (void)xd;
418 : if (second_pred != NULL) {
419 : DECLARE_ALIGNED(16, uint8_t, comp_pred[MAX_SB_SQUARE]);
420 : #if CONFIG_EXT_INTER
421 : if (mask)
422 : aom_comp_mask_pred(comp_pred, second_pred, w, h, y + offset, y_stride,
423 : mask, mask_stride, invert_mask);
424 : else
425 : #endif
426 : aom_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
427 : besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
428 : } else {
429 : besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1);
430 : }
431 : *distortion = besterr;
432 : besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
433 : #endif // CONFIG_HIGHBITDEPTH
434 0 : return besterr;
435 : }
436 :
437 0 : static INLINE int divide_and_round(int n, int d) {
438 0 : return ((n < 0) ^ (d < 0)) ? ((n - d / 2) / d) : ((n + d / 2) / d);
439 : }
440 :
441 0 : static INLINE int is_cost_list_wellbehaved(int *cost_list) {
442 0 : return cost_list[0] < cost_list[1] && cost_list[0] < cost_list[2] &&
443 0 : cost_list[0] < cost_list[3] && cost_list[0] < cost_list[4];
444 : }
445 :
446 : // Returns surface minima estimate at given precision in 1/2^n bits.
447 : // Assume a model for the cost surface: S = A(x - x0)^2 + B(y - y0)^2 + C
448 : // For a given set of costs S0, S1, S2, S3, S4 at points
449 : // (y, x) = (0, 0), (0, -1), (1, 0), (0, 1) and (-1, 0) respectively,
450 : // the solution for the location of the minima (x0, y0) is given by:
451 : // x0 = 1/2 (S1 - S3)/(S1 + S3 - 2*S0),
452 : // y0 = 1/2 (S4 - S2)/(S4 + S2 - 2*S0).
453 : // The code below is an integerized version of that.
454 0 : static void get_cost_surf_min(int *cost_list, int *ir, int *ic, int bits) {
455 0 : *ic = divide_and_round((cost_list[1] - cost_list[3]) * (1 << (bits - 1)),
456 0 : (cost_list[1] - 2 * cost_list[0] + cost_list[3]));
457 0 : *ir = divide_and_round((cost_list[4] - cost_list[2]) * (1 << (bits - 1)),
458 0 : (cost_list[4] - 2 * cost_list[0] + cost_list[2]));
459 0 : }
460 :
461 0 : int av1_find_best_sub_pixel_tree_pruned_evenmore(
462 : MACROBLOCK *x, const MV *ref_mv, int allow_hp, int error_per_bit,
463 : const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
464 : int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
465 : unsigned int *sse1, const uint8_t *second_pred,
466 : #if CONFIG_EXT_INTER
467 : const uint8_t *mask, int mask_stride, int invert_mask,
468 : #endif
469 : int w, int h, int use_upsampled_ref) {
470 0 : SETUP_SUBPEL_SEARCH;
471 0 : besterr =
472 : setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, src_address,
473 : src_stride, y, y_stride, second_pred,
474 : #if CONFIG_EXT_INTER
475 : mask, mask_stride, invert_mask,
476 : #endif
477 : w, h, offset, mvjcost, mvcost, sse1, distortion);
478 : (void)halfiters;
479 : (void)quarteriters;
480 : (void)eighthiters;
481 : (void)whichdir;
482 : (void)allow_hp;
483 : (void)forced_stop;
484 : (void)hstep;
485 : (void)use_upsampled_ref;
486 :
487 0 : if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
488 0 : cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
489 0 : cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) {
490 : int ir, ic;
491 : unsigned int minpt;
492 0 : get_cost_surf_min(cost_list, &ir, &ic, 2);
493 0 : if (ir != 0 || ic != 0) {
494 0 : CHECK_BETTER(minpt, tr + 2 * ir, tc + 2 * ic);
495 : }
496 : } else {
497 0 : FIRST_LEVEL_CHECKS;
498 0 : if (halfiters > 1) {
499 0 : SECOND_LEVEL_CHECKS;
500 : }
501 :
502 0 : tr = br;
503 0 : tc = bc;
504 :
505 : // Each subsequent iteration checks at least one point in common with
506 : // the last iteration could be 2 ( if diag selected) 1/4 pel
507 : // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
508 0 : if (forced_stop != 2) {
509 0 : hstep >>= 1;
510 0 : FIRST_LEVEL_CHECKS;
511 0 : if (quarteriters > 1) {
512 0 : SECOND_LEVEL_CHECKS;
513 : }
514 : }
515 : }
516 :
517 0 : tr = br;
518 0 : tc = bc;
519 :
520 0 : if (allow_hp && forced_stop == 0) {
521 0 : hstep >>= 1;
522 0 : FIRST_LEVEL_CHECKS;
523 0 : if (eighthiters > 1) {
524 0 : SECOND_LEVEL_CHECKS;
525 : }
526 : }
527 :
528 0 : bestmv->row = br;
529 0 : bestmv->col = bc;
530 :
531 0 : return besterr;
532 : }
533 :
534 0 : int av1_find_best_sub_pixel_tree_pruned_more(
535 : MACROBLOCK *x, const MV *ref_mv, int allow_hp, int error_per_bit,
536 : const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
537 : int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
538 : unsigned int *sse1, const uint8_t *second_pred,
539 : #if CONFIG_EXT_INTER
540 : const uint8_t *mask, int mask_stride, int invert_mask,
541 : #endif
542 : int w, int h, int use_upsampled_ref) {
543 0 : SETUP_SUBPEL_SEARCH;
544 : (void)use_upsampled_ref;
545 :
546 0 : besterr =
547 : setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, src_address,
548 : src_stride, y, y_stride, second_pred,
549 : #if CONFIG_EXT_INTER
550 : mask, mask_stride, invert_mask,
551 : #endif
552 : w, h, offset, mvjcost, mvcost, sse1, distortion);
553 0 : if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
554 0 : cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
555 0 : cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) {
556 : unsigned int minpt;
557 : int ir, ic;
558 0 : get_cost_surf_min(cost_list, &ir, &ic, 1);
559 0 : if (ir != 0 || ic != 0) {
560 0 : CHECK_BETTER(minpt, tr + ir * hstep, tc + ic * hstep);
561 : }
562 : } else {
563 0 : FIRST_LEVEL_CHECKS;
564 0 : if (halfiters > 1) {
565 0 : SECOND_LEVEL_CHECKS;
566 : }
567 : }
568 :
569 : // Each subsequent iteration checks at least one point in common with
570 : // the last iteration could be 2 ( if diag selected) 1/4 pel
571 :
572 : // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
573 0 : if (forced_stop != 2) {
574 0 : tr = br;
575 0 : tc = bc;
576 0 : hstep >>= 1;
577 0 : FIRST_LEVEL_CHECKS;
578 0 : if (quarteriters > 1) {
579 0 : SECOND_LEVEL_CHECKS;
580 : }
581 : }
582 :
583 0 : if (allow_hp && forced_stop == 0) {
584 0 : tr = br;
585 0 : tc = bc;
586 0 : hstep >>= 1;
587 0 : FIRST_LEVEL_CHECKS;
588 0 : if (eighthiters > 1) {
589 0 : SECOND_LEVEL_CHECKS;
590 : }
591 : }
592 : // These lines insure static analysis doesn't warn that
593 : // tr and tc aren't used after the above point.
594 : (void)tr;
595 : (void)tc;
596 :
597 0 : bestmv->row = br;
598 0 : bestmv->col = bc;
599 :
600 0 : return besterr;
601 : }
602 :
603 0 : int av1_find_best_sub_pixel_tree_pruned(
604 : MACROBLOCK *x, const MV *ref_mv, int allow_hp, int error_per_bit,
605 : const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
606 : int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
607 : unsigned int *sse1, const uint8_t *second_pred,
608 : #if CONFIG_EXT_INTER
609 : const uint8_t *mask, int mask_stride, int invert_mask,
610 : #endif
611 : int w, int h, int use_upsampled_ref) {
612 0 : SETUP_SUBPEL_SEARCH;
613 : (void)use_upsampled_ref;
614 :
615 0 : besterr =
616 : setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, src_address,
617 : src_stride, y, y_stride, second_pred,
618 : #if CONFIG_EXT_INTER
619 : mask, mask_stride, invert_mask,
620 : #endif
621 : w, h, offset, mvjcost, mvcost, sse1, distortion);
622 0 : if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
623 0 : cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
624 0 : cost_list[4] != INT_MAX) {
625 : unsigned int left, right, up, down, diag;
626 0 : whichdir = (cost_list[1] < cost_list[3] ? 0 : 1) +
627 0 : (cost_list[2] < cost_list[4] ? 0 : 2);
628 0 : switch (whichdir) {
629 : case 0:
630 0 : CHECK_BETTER(left, tr, tc - hstep);
631 0 : CHECK_BETTER(down, tr + hstep, tc);
632 0 : CHECK_BETTER(diag, tr + hstep, tc - hstep);
633 0 : break;
634 : case 1:
635 0 : CHECK_BETTER(right, tr, tc + hstep);
636 0 : CHECK_BETTER(down, tr + hstep, tc);
637 0 : CHECK_BETTER(diag, tr + hstep, tc + hstep);
638 0 : break;
639 : case 2:
640 0 : CHECK_BETTER(left, tr, tc - hstep);
641 0 : CHECK_BETTER(up, tr - hstep, tc);
642 0 : CHECK_BETTER(diag, tr - hstep, tc - hstep);
643 0 : break;
644 : case 3:
645 0 : CHECK_BETTER(right, tr, tc + hstep);
646 0 : CHECK_BETTER(up, tr - hstep, tc);
647 0 : CHECK_BETTER(diag, tr - hstep, tc + hstep);
648 0 : break;
649 : }
650 : } else {
651 0 : FIRST_LEVEL_CHECKS;
652 0 : if (halfiters > 1) {
653 0 : SECOND_LEVEL_CHECKS;
654 : }
655 : }
656 :
657 0 : tr = br;
658 0 : tc = bc;
659 :
660 : // Each subsequent iteration checks at least one point in common with
661 : // the last iteration could be 2 ( if diag selected) 1/4 pel
662 :
663 : // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
664 0 : if (forced_stop != 2) {
665 0 : hstep >>= 1;
666 0 : FIRST_LEVEL_CHECKS;
667 0 : if (quarteriters > 1) {
668 0 : SECOND_LEVEL_CHECKS;
669 : }
670 0 : tr = br;
671 0 : tc = bc;
672 : }
673 :
674 0 : if (allow_hp && forced_stop == 0) {
675 0 : hstep >>= 1;
676 0 : FIRST_LEVEL_CHECKS;
677 0 : if (eighthiters > 1) {
678 0 : SECOND_LEVEL_CHECKS;
679 : }
680 0 : tr = br;
681 0 : tc = bc;
682 : }
683 : // These lines insure static analysis doesn't warn that
684 : // tr and tc aren't used after the above point.
685 : (void)tr;
686 : (void)tc;
687 :
688 0 : bestmv->row = br;
689 0 : bestmv->col = bc;
690 :
691 0 : return besterr;
692 : }
693 :
694 : /* clang-format off */
695 : static const MV search_step_table[12] = {
696 : // left, right, up, down
697 : { 0, -4 }, { 0, 4 }, { -4, 0 }, { 4, 0 },
698 : { 0, -2 }, { 0, 2 }, { -2, 0 }, { 2, 0 },
699 : { 0, -1 }, { 0, 1 }, { -1, 0 }, { 1, 0 }
700 : };
701 : /* clang-format on */
702 :
703 0 : static int upsampled_pref_error(const MACROBLOCKD *xd,
704 : const aom_variance_fn_ptr_t *vfp,
705 : const uint8_t *const src, const int src_stride,
706 : const uint8_t *const y, int y_stride,
707 : const uint8_t *second_pred,
708 : #if CONFIG_EXT_INTER
709 : const uint8_t *mask, int mask_stride,
710 : int invert_mask,
711 : #endif
712 : int w, int h, unsigned int *sse) {
713 : unsigned int besterr;
714 : #if CONFIG_HIGHBITDEPTH
715 0 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
716 : DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]);
717 0 : if (second_pred != NULL) {
718 : #if CONFIG_EXT_INTER
719 0 : if (mask)
720 0 : aom_highbd_comp_mask_upsampled_pred(pred16, second_pred, w, h, y,
721 : y_stride, mask, mask_stride,
722 : invert_mask);
723 : else
724 : #endif
725 0 : aom_highbd_comp_avg_upsampled_pred(pred16, second_pred, w, h, y,
726 : y_stride);
727 : } else {
728 0 : aom_highbd_upsampled_pred(pred16, w, h, y, y_stride);
729 : }
730 :
731 0 : besterr = vfp->vf(CONVERT_TO_BYTEPTR(pred16), w, src, src_stride, sse);
732 : } else {
733 : DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
734 : #else
735 : DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
736 : (void)xd;
737 : #endif // CONFIG_HIGHBITDEPTH
738 0 : if (second_pred != NULL) {
739 : #if CONFIG_EXT_INTER
740 0 : if (mask)
741 0 : aom_comp_mask_upsampled_pred(pred, second_pred, w, h, y, y_stride, mask,
742 : mask_stride, invert_mask);
743 : else
744 : #endif
745 0 : aom_comp_avg_upsampled_pred(pred, second_pred, w, h, y, y_stride);
746 : } else {
747 0 : aom_upsampled_pred(pred, w, h, y, y_stride);
748 : }
749 :
750 0 : besterr = vfp->vf(pred, w, src, src_stride, sse);
751 : #if CONFIG_HIGHBITDEPTH
752 : }
753 : #endif
754 0 : return besterr;
755 : }
756 :
757 0 : static unsigned int upsampled_setup_center_error(
758 : const MACROBLOCKD *xd, const MV *bestmv, const MV *ref_mv,
759 : int error_per_bit, const aom_variance_fn_ptr_t *vfp,
760 : const uint8_t *const src, const int src_stride, const uint8_t *const y,
761 : int y_stride, const uint8_t *second_pred,
762 : #if CONFIG_EXT_INTER
763 : const uint8_t *mask, int mask_stride, int invert_mask,
764 : #endif
765 : int w, int h, int offset, int *mvjcost, int *mvcost[2], unsigned int *sse1,
766 : int *distortion) {
767 0 : unsigned int besterr = upsampled_pref_error(xd, vfp, src, src_stride,
768 : y + offset, y_stride, second_pred,
769 : #if CONFIG_EXT_INTER
770 : mask, mask_stride, invert_mask,
771 : #endif
772 : w, h, sse1);
773 0 : *distortion = besterr;
774 0 : besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
775 0 : return besterr;
776 : }
777 :
778 0 : int av1_find_best_sub_pixel_tree(
779 : MACROBLOCK *x, const MV *ref_mv, int allow_hp, int error_per_bit,
780 : const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
781 : int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
782 : unsigned int *sse1, const uint8_t *second_pred,
783 : #if CONFIG_EXT_INTER
784 : const uint8_t *mask, int mask_stride, int invert_mask,
785 : #endif
786 : int w, int h, int use_upsampled_ref) {
787 0 : const uint8_t *const src_address = x->plane[0].src.buf;
788 0 : const int src_stride = x->plane[0].src.stride;
789 0 : const MACROBLOCKD *xd = &x->e_mbd;
790 0 : unsigned int besterr = INT_MAX;
791 : unsigned int sse;
792 : unsigned int thismse;
793 0 : const int y_stride = xd->plane[0].pre[0].stride;
794 0 : MV *bestmv = &x->best_mv.as_mv;
795 0 : const int offset = bestmv->row * y_stride + bestmv->col;
796 0 : const uint8_t *const y = xd->plane[0].pre[0].buf;
797 :
798 0 : int br = bestmv->row * 8;
799 0 : int bc = bestmv->col * 8;
800 0 : int hstep = 4;
801 0 : int iter, round = 3 - forced_stop;
802 0 : int tr = br;
803 0 : int tc = bc;
804 0 : const MV *search_step = search_step_table;
805 0 : int idx, best_idx = -1;
806 : unsigned int cost_array[5];
807 : int kr, kc;
808 : int minc, maxc, minr, maxr;
809 :
810 0 : av1_set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr,
811 : ref_mv);
812 :
813 0 : if (!allow_hp)
814 0 : if (round == 3) round = 2;
815 :
816 0 : bestmv->row *= 8;
817 0 : bestmv->col *= 8;
818 :
819 : // use_upsampled_ref can be 0 or 1
820 0 : if (use_upsampled_ref)
821 0 : besterr = upsampled_setup_center_error(
822 : xd, bestmv, ref_mv, error_per_bit, vfp, src_address, src_stride, y,
823 : y_stride, second_pred,
824 : #if CONFIG_EXT_INTER
825 : mask, mask_stride, invert_mask,
826 : #endif
827 : w, h, (offset * 8), mvjcost, mvcost, sse1, distortion);
828 : else
829 0 : besterr =
830 : setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, src_address,
831 : src_stride, y, y_stride, second_pred,
832 : #if CONFIG_EXT_INTER
833 : mask, mask_stride, invert_mask,
834 : #endif
835 : w, h, offset, mvjcost, mvcost, sse1, distortion);
836 :
837 : (void)cost_list; // to silence compiler warning
838 :
839 0 : for (iter = 0; iter < round; ++iter) {
840 : // Check vertical and horizontal sub-pixel positions.
841 0 : for (idx = 0; idx < 4; ++idx) {
842 0 : tr = br + search_step[idx].row;
843 0 : tc = bc + search_step[idx].col;
844 0 : if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
845 0 : MV this_mv = { tr, tc };
846 :
847 0 : if (use_upsampled_ref) {
848 0 : const uint8_t *const pre_address = y + tr * y_stride + tc;
849 :
850 0 : thismse = upsampled_pref_error(xd, vfp, src_address, src_stride,
851 : pre_address, y_stride, second_pred,
852 : #if CONFIG_EXT_INTER
853 : mask, mask_stride, invert_mask,
854 : #endif
855 : w, h, &sse);
856 : } else {
857 0 : const uint8_t *const pre_address =
858 0 : y + (tr >> 3) * y_stride + (tc >> 3);
859 0 : if (second_pred == NULL)
860 0 : thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
861 : src_address, src_stride, &sse);
862 : #if CONFIG_EXT_INTER
863 0 : else if (mask)
864 0 : thismse = vfp->msvf(pre_address, y_stride, sp(tc), sp(tr),
865 : src_address, src_stride, second_pred, mask,
866 : mask_stride, invert_mask, &sse);
867 : #endif
868 : else
869 0 : thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
870 : src_address, src_stride, &sse, second_pred);
871 : }
872 :
873 0 : cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost,
874 : mvcost, error_per_bit);
875 :
876 0 : if (cost_array[idx] < besterr) {
877 0 : best_idx = idx;
878 0 : besterr = cost_array[idx];
879 0 : *distortion = thismse;
880 0 : *sse1 = sse;
881 : }
882 : } else {
883 0 : cost_array[idx] = INT_MAX;
884 : }
885 : }
886 :
887 : // Check diagonal sub-pixel position
888 0 : kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep);
889 0 : kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep);
890 :
891 0 : tc = bc + kc;
892 0 : tr = br + kr;
893 0 : if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
894 0 : MV this_mv = { tr, tc };
895 :
896 0 : if (use_upsampled_ref) {
897 0 : const uint8_t *const pre_address = y + tr * y_stride + tc;
898 :
899 0 : thismse = upsampled_pref_error(xd, vfp, src_address, src_stride,
900 : pre_address, y_stride, second_pred,
901 : #if CONFIG_EXT_INTER
902 : mask, mask_stride, invert_mask,
903 : #endif
904 : w, h, &sse);
905 : } else {
906 0 : const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
907 :
908 0 : if (second_pred == NULL)
909 0 : thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address,
910 : src_stride, &sse);
911 : #if CONFIG_EXT_INTER
912 0 : else if (mask)
913 0 : thismse = vfp->msvf(pre_address, y_stride, sp(tc), sp(tr),
914 : src_address, src_stride, second_pred, mask,
915 : mask_stride, invert_mask, &sse);
916 : #endif
917 : else
918 0 : thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
919 : src_address, src_stride, &sse, second_pred);
920 : }
921 :
922 0 : cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
923 : error_per_bit);
924 :
925 0 : if (cost_array[4] < besterr) {
926 0 : best_idx = 4;
927 0 : besterr = cost_array[4];
928 0 : *distortion = thismse;
929 0 : *sse1 = sse;
930 : }
931 : } else {
932 0 : cost_array[idx] = INT_MAX;
933 : }
934 :
935 0 : if (best_idx < 4 && best_idx >= 0) {
936 0 : br += search_step[best_idx].row;
937 0 : bc += search_step[best_idx].col;
938 0 : } else if (best_idx == 4) {
939 0 : br = tr;
940 0 : bc = tc;
941 : }
942 :
943 0 : if (iters_per_step > 1 && best_idx != -1) {
944 0 : if (use_upsampled_ref) {
945 0 : SECOND_LEVEL_CHECKS_BEST(1);
946 : } else {
947 0 : SECOND_LEVEL_CHECKS_BEST(0);
948 : }
949 : }
950 :
951 0 : search_step += 4;
952 0 : hstep >>= 1;
953 0 : best_idx = -1;
954 : }
955 :
956 : // These lines insure static analysis doesn't warn that
957 : // tr and tc aren't used after the above point.
958 : (void)tr;
959 : (void)tc;
960 :
961 0 : bestmv->row = br;
962 0 : bestmv->col = bc;
963 :
964 0 : return besterr;
965 : }
966 :
967 : #undef PRE
968 : #undef CHECK_BETTER
969 :
970 : #if CONFIG_WARPED_MOTION
971 0 : unsigned int av1_compute_motion_cost(const AV1_COMP *cpi, MACROBLOCK *const x,
972 : BLOCK_SIZE bsize, int mi_row, int mi_col,
973 : const MV *this_mv) {
974 0 : const AV1_COMMON *const cm = &cpi->common;
975 0 : MACROBLOCKD *xd = &x->e_mbd;
976 0 : MODE_INFO *mi = xd->mi[0];
977 0 : MB_MODE_INFO *mbmi = &mi->mbmi;
978 0 : const uint8_t *const src = x->plane[0].src.buf;
979 0 : const int src_stride = x->plane[0].src.stride;
980 0 : uint8_t *const dst = xd->plane[0].dst.buf;
981 0 : const int dst_stride = xd->plane[0].dst.stride;
982 0 : const aom_variance_fn_ptr_t *vfp = &cpi->fn_ptr[bsize];
983 0 : const MV ref_mv = x->mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0].as_mv;
984 : unsigned int mse;
985 : unsigned int sse;
986 :
987 0 : av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, NULL, bsize);
988 0 : mse = vfp->vf(dst, dst_stride, src, src_stride, &sse);
989 0 : mse +=
990 0 : mv_err_cost(this_mv, &ref_mv, x->nmvjointcost, x->mvcost, x->errorperbit);
991 0 : return mse;
992 : }
993 :
994 : // Refine MV in a small range
995 0 : unsigned int av1_refine_warped_mv(const AV1_COMP *cpi, MACROBLOCK *const x,
996 : BLOCK_SIZE bsize, int mi_row, int mi_col,
997 : int *pts, int *pts_inref) {
998 0 : const AV1_COMMON *const cm = &cpi->common;
999 0 : MACROBLOCKD *xd = &x->e_mbd;
1000 0 : MODE_INFO *mi = xd->mi[0];
1001 0 : MB_MODE_INFO *mbmi = &mi->mbmi;
1002 0 : const MV neighbors[8] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 },
1003 : { 0, -2 }, { 2, 0 }, { 0, 2 }, { -2, 0 } };
1004 0 : const MV ref_mv = x->mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0].as_mv;
1005 0 : int16_t br = mbmi->mv[0].as_mv.row;
1006 0 : int16_t bc = mbmi->mv[0].as_mv.col;
1007 0 : int16_t *tr = &mbmi->mv[0].as_mv.row;
1008 0 : int16_t *tc = &mbmi->mv[0].as_mv.col;
1009 0 : WarpedMotionParams best_wm_params = mbmi->wm_params[0];
1010 : unsigned int bestmse;
1011 : int minc, maxc, minr, maxr;
1012 0 : const int start = cm->allow_high_precision_mv ? 0 : 4;
1013 : int ite;
1014 :
1015 0 : av1_set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr,
1016 : &ref_mv);
1017 :
1018 : // Calculate the center position's error
1019 0 : assert(bc >= minc && bc <= maxc && br >= minr && br <= maxr);
1020 0 : bestmse = av1_compute_motion_cost(cpi, x, bsize, mi_row, mi_col,
1021 0 : &mbmi->mv[0].as_mv);
1022 :
1023 : // MV search
1024 0 : for (ite = 0; ite < 2; ++ite) {
1025 0 : int best_idx = -1;
1026 : int idx;
1027 :
1028 0 : for (idx = start; idx < start + 4; ++idx) {
1029 : unsigned int thismse;
1030 :
1031 0 : *tr = br + neighbors[idx].row;
1032 0 : *tc = bc + neighbors[idx].col;
1033 :
1034 0 : if (*tc >= minc && *tc <= maxc && *tr >= minr && *tr <= maxr) {
1035 0 : MV this_mv = { *tr, *tc };
1036 0 : if (!find_projection(mbmi->num_proj_ref[0], pts, pts_inref, bsize, *tr,
1037 0 : *tc, &mbmi->wm_params[0], mi_row, mi_col)) {
1038 0 : thismse =
1039 0 : av1_compute_motion_cost(cpi, x, bsize, mi_row, mi_col, &this_mv);
1040 :
1041 0 : if (thismse < bestmse) {
1042 0 : best_idx = idx;
1043 0 : best_wm_params = mbmi->wm_params[0];
1044 0 : bestmse = thismse;
1045 : }
1046 : }
1047 : }
1048 : }
1049 :
1050 0 : if (best_idx == -1) break;
1051 :
1052 0 : if (best_idx >= 0) {
1053 0 : br += neighbors[best_idx].row;
1054 0 : bc += neighbors[best_idx].col;
1055 : }
1056 : }
1057 :
1058 0 : *tr = br;
1059 0 : *tc = bc;
1060 0 : mbmi->wm_params[0] = best_wm_params;
1061 :
1062 0 : return bestmse;
1063 : }
1064 : #endif // CONFIG_WARPED_MOTION
1065 :
1066 0 : static INLINE int check_bounds(const MvLimits *mv_limits, int row, int col,
1067 : int range) {
1068 0 : return ((row - range) >= mv_limits->row_min) &
1069 0 : ((row + range) <= mv_limits->row_max) &
1070 0 : ((col - range) >= mv_limits->col_min) &
1071 0 : ((col + range) <= mv_limits->col_max);
1072 : }
1073 :
1074 0 : static INLINE int is_mv_in(const MvLimits *mv_limits, const MV *mv) {
1075 0 : return (mv->col >= mv_limits->col_min) && (mv->col <= mv_limits->col_max) &&
1076 0 : (mv->row >= mv_limits->row_min) && (mv->row <= mv_limits->row_max);
1077 : }
1078 :
1079 : #define CHECK_BETTER \
1080 : { \
1081 : if (thissad < bestsad) { \
1082 : if (use_mvcost) \
1083 : thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); \
1084 : if (thissad < bestsad) { \
1085 : bestsad = thissad; \
1086 : best_site = i; \
1087 : } \
1088 : } \
1089 : }
1090 :
1091 : #define MAX_PATTERN_SCALES 11
1092 : #define MAX_PATTERN_CANDIDATES 8 // max number of canddiates per scale
1093 : #define PATTERN_CANDIDATES_REF 3 // number of refinement candidates
1094 :
1095 : // Calculate and return a sad+mvcost list around an integer best pel.
1096 0 : static INLINE void calc_int_cost_list(const MACROBLOCK *x,
1097 : const MV *const ref_mv, int sadpb,
1098 : const aom_variance_fn_ptr_t *fn_ptr,
1099 : const MV *best_mv, int *cost_list) {
1100 : static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } };
1101 0 : const struct buf_2d *const what = &x->plane[0].src;
1102 0 : const struct buf_2d *const in_what = &x->e_mbd.plane[0].pre[0];
1103 0 : const MV fcenter_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
1104 0 : const int br = best_mv->row;
1105 0 : const int bc = best_mv->col;
1106 : int i;
1107 : unsigned int sse;
1108 0 : const MV this_mv = { br, bc };
1109 :
1110 0 : cost_list[0] =
1111 0 : fn_ptr->vf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv),
1112 0 : in_what->stride, &sse) +
1113 0 : mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
1114 0 : if (check_bounds(&x->mv_limits, br, bc, 1)) {
1115 0 : for (i = 0; i < 4; i++) {
1116 0 : const MV neighbor_mv = { br + neighbors[i].row, bc + neighbors[i].col };
1117 0 : cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
1118 : get_buf_from_mv(in_what, &neighbor_mv),
1119 0 : in_what->stride, &sse) +
1120 0 : mv_err_cost(&neighbor_mv, &fcenter_mv, x->nmvjointcost,
1121 : x->mvcost, x->errorperbit);
1122 : }
1123 : } else {
1124 0 : for (i = 0; i < 4; i++) {
1125 0 : const MV neighbor_mv = { br + neighbors[i].row, bc + neighbors[i].col };
1126 0 : if (!is_mv_in(&x->mv_limits, &neighbor_mv))
1127 0 : cost_list[i + 1] = INT_MAX;
1128 : else
1129 0 : cost_list[i + 1] =
1130 0 : fn_ptr->vf(what->buf, what->stride,
1131 : get_buf_from_mv(in_what, &neighbor_mv), in_what->stride,
1132 0 : &sse) +
1133 0 : mv_err_cost(&neighbor_mv, &fcenter_mv, x->nmvjointcost, x->mvcost,
1134 : x->errorperbit);
1135 : }
1136 : }
1137 0 : }
1138 :
1139 0 : static INLINE void calc_int_sad_list(const MACROBLOCK *x,
1140 : const MV *const ref_mv, int sadpb,
1141 : const aom_variance_fn_ptr_t *fn_ptr,
1142 : const MV *best_mv, int *cost_list,
1143 : const int use_mvcost, const int bestsad) {
1144 : static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } };
1145 0 : const struct buf_2d *const what = &x->plane[0].src;
1146 0 : const struct buf_2d *const in_what = &x->e_mbd.plane[0].pre[0];
1147 0 : const MV fcenter_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
1148 : int i;
1149 0 : const int br = best_mv->row;
1150 0 : const int bc = best_mv->col;
1151 :
1152 0 : if (cost_list[0] == INT_MAX) {
1153 0 : cost_list[0] = bestsad;
1154 0 : if (check_bounds(&x->mv_limits, br, bc, 1)) {
1155 0 : for (i = 0; i < 4; i++) {
1156 0 : const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
1157 0 : cost_list[i + 1] =
1158 0 : fn_ptr->sdf(what->buf, what->stride,
1159 : get_buf_from_mv(in_what, &this_mv), in_what->stride);
1160 : }
1161 : } else {
1162 0 : for (i = 0; i < 4; i++) {
1163 0 : const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
1164 0 : if (!is_mv_in(&x->mv_limits, &this_mv))
1165 0 : cost_list[i + 1] = INT_MAX;
1166 : else
1167 0 : cost_list[i + 1] =
1168 0 : fn_ptr->sdf(what->buf, what->stride,
1169 : get_buf_from_mv(in_what, &this_mv), in_what->stride);
1170 : }
1171 : }
1172 : } else {
1173 0 : if (use_mvcost) {
1174 0 : for (i = 0; i < 4; i++) {
1175 0 : const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
1176 0 : if (cost_list[i + 1] != INT_MAX) {
1177 0 : cost_list[i + 1] += mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
1178 : }
1179 : }
1180 : }
1181 : }
1182 0 : }
1183 :
1184 : // Generic pattern search function that searches over multiple scales.
1185 : // Each scale can have a different number of candidates and shape of
1186 : // candidates as indicated in the num_candidates and candidates arrays
1187 : // passed into this function
1188 : //
1189 0 : static int pattern_search(
1190 : MACROBLOCK *x, MV *start_mv, int search_param, int sad_per_bit,
1191 : int do_init_search, int *cost_list, const aom_variance_fn_ptr_t *vfp,
1192 : int use_mvcost, const MV *center_mv,
1193 : const int num_candidates[MAX_PATTERN_SCALES],
1194 : const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES]) {
1195 0 : const MACROBLOCKD *const xd = &x->e_mbd;
1196 : static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = {
1197 : 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
1198 : };
1199 : int i, s, t;
1200 0 : const struct buf_2d *const what = &x->plane[0].src;
1201 0 : const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1202 0 : const int last_is_4 = num_candidates[0] == 4;
1203 : int br, bc;
1204 0 : int bestsad = INT_MAX;
1205 : int thissad;
1206 0 : int k = -1;
1207 0 : const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
1208 0 : int best_init_s = search_param_to_steps[search_param];
1209 : // adjust ref_mv to make sure it is within MV range
1210 0 : clamp_mv(start_mv, x->mv_limits.col_min, x->mv_limits.col_max,
1211 : x->mv_limits.row_min, x->mv_limits.row_max);
1212 0 : br = start_mv->row;
1213 0 : bc = start_mv->col;
1214 0 : if (cost_list != NULL) {
1215 0 : cost_list[0] = cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] =
1216 : INT_MAX;
1217 : }
1218 :
1219 : // Work out the start point for the search
1220 0 : bestsad = vfp->sdf(what->buf, what->stride,
1221 0 : get_buf_from_mv(in_what, start_mv), in_what->stride) +
1222 0 : mvsad_err_cost(x, start_mv, &fcenter_mv, sad_per_bit);
1223 :
1224 : // Search all possible scales upto the search param around the center point
1225 : // pick the scale of the point that is best as the starting scale of
1226 : // further steps around it.
1227 0 : if (do_init_search) {
1228 0 : s = best_init_s;
1229 0 : best_init_s = -1;
1230 0 : for (t = 0; t <= s; ++t) {
1231 0 : int best_site = -1;
1232 0 : if (check_bounds(&x->mv_limits, br, bc, 1 << t)) {
1233 0 : for (i = 0; i < num_candidates[t]; i++) {
1234 0 : const MV this_mv = { br + candidates[t][i].row,
1235 0 : bc + candidates[t][i].col };
1236 0 : thissad =
1237 0 : vfp->sdf(what->buf, what->stride,
1238 : get_buf_from_mv(in_what, &this_mv), in_what->stride);
1239 0 : CHECK_BETTER
1240 : }
1241 : } else {
1242 0 : for (i = 0; i < num_candidates[t]; i++) {
1243 0 : const MV this_mv = { br + candidates[t][i].row,
1244 0 : bc + candidates[t][i].col };
1245 0 : if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
1246 0 : thissad =
1247 0 : vfp->sdf(what->buf, what->stride,
1248 : get_buf_from_mv(in_what, &this_mv), in_what->stride);
1249 0 : CHECK_BETTER
1250 : }
1251 : }
1252 0 : if (best_site == -1) {
1253 0 : continue;
1254 : } else {
1255 0 : best_init_s = t;
1256 0 : k = best_site;
1257 : }
1258 : }
1259 0 : if (best_init_s != -1) {
1260 0 : br += candidates[best_init_s][k].row;
1261 0 : bc += candidates[best_init_s][k].col;
1262 : }
1263 : }
1264 :
1265 : // If the center point is still the best, just skip this and move to
1266 : // the refinement step.
1267 0 : if (best_init_s != -1) {
1268 0 : const int last_s = (last_is_4 && cost_list != NULL);
1269 0 : int best_site = -1;
1270 0 : s = best_init_s;
1271 :
1272 0 : for (; s >= last_s; s--) {
1273 : // No need to search all points the 1st time if initial search was used
1274 0 : if (!do_init_search || s != best_init_s) {
1275 0 : if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
1276 0 : for (i = 0; i < num_candidates[s]; i++) {
1277 0 : const MV this_mv = { br + candidates[s][i].row,
1278 0 : bc + candidates[s][i].col };
1279 0 : thissad =
1280 0 : vfp->sdf(what->buf, what->stride,
1281 : get_buf_from_mv(in_what, &this_mv), in_what->stride);
1282 0 : CHECK_BETTER
1283 : }
1284 : } else {
1285 0 : for (i = 0; i < num_candidates[s]; i++) {
1286 0 : const MV this_mv = { br + candidates[s][i].row,
1287 0 : bc + candidates[s][i].col };
1288 0 : if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
1289 0 : thissad =
1290 0 : vfp->sdf(what->buf, what->stride,
1291 : get_buf_from_mv(in_what, &this_mv), in_what->stride);
1292 0 : CHECK_BETTER
1293 : }
1294 : }
1295 :
1296 0 : if (best_site == -1) {
1297 0 : continue;
1298 : } else {
1299 0 : br += candidates[s][best_site].row;
1300 0 : bc += candidates[s][best_site].col;
1301 0 : k = best_site;
1302 : }
1303 : }
1304 :
1305 : do {
1306 : int next_chkpts_indices[PATTERN_CANDIDATES_REF];
1307 0 : best_site = -1;
1308 0 : next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
1309 0 : next_chkpts_indices[1] = k;
1310 0 : next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
1311 :
1312 0 : if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
1313 0 : for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
1314 0 : const MV this_mv = {
1315 0 : br + candidates[s][next_chkpts_indices[i]].row,
1316 0 : bc + candidates[s][next_chkpts_indices[i]].col
1317 : };
1318 0 : thissad =
1319 0 : vfp->sdf(what->buf, what->stride,
1320 : get_buf_from_mv(in_what, &this_mv), in_what->stride);
1321 0 : CHECK_BETTER
1322 : }
1323 : } else {
1324 0 : for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
1325 0 : const MV this_mv = {
1326 0 : br + candidates[s][next_chkpts_indices[i]].row,
1327 0 : bc + candidates[s][next_chkpts_indices[i]].col
1328 : };
1329 0 : if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
1330 0 : thissad =
1331 0 : vfp->sdf(what->buf, what->stride,
1332 : get_buf_from_mv(in_what, &this_mv), in_what->stride);
1333 0 : CHECK_BETTER
1334 : }
1335 : }
1336 :
1337 0 : if (best_site != -1) {
1338 0 : k = next_chkpts_indices[best_site];
1339 0 : br += candidates[s][k].row;
1340 0 : bc += candidates[s][k].col;
1341 : }
1342 0 : } while (best_site != -1);
1343 : }
1344 :
1345 : // Note: If we enter the if below, then cost_list must be non-NULL.
1346 0 : if (s == 0) {
1347 0 : cost_list[0] = bestsad;
1348 0 : if (!do_init_search || s != best_init_s) {
1349 0 : if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
1350 0 : for (i = 0; i < num_candidates[s]; i++) {
1351 0 : const MV this_mv = { br + candidates[s][i].row,
1352 0 : bc + candidates[s][i].col };
1353 0 : cost_list[i + 1] = thissad =
1354 0 : vfp->sdf(what->buf, what->stride,
1355 : get_buf_from_mv(in_what, &this_mv), in_what->stride);
1356 0 : CHECK_BETTER
1357 : }
1358 : } else {
1359 0 : for (i = 0; i < num_candidates[s]; i++) {
1360 0 : const MV this_mv = { br + candidates[s][i].row,
1361 0 : bc + candidates[s][i].col };
1362 0 : if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
1363 0 : cost_list[i + 1] = thissad =
1364 0 : vfp->sdf(what->buf, what->stride,
1365 : get_buf_from_mv(in_what, &this_mv), in_what->stride);
1366 0 : CHECK_BETTER
1367 : }
1368 : }
1369 :
1370 0 : if (best_site != -1) {
1371 0 : br += candidates[s][best_site].row;
1372 0 : bc += candidates[s][best_site].col;
1373 0 : k = best_site;
1374 : }
1375 : }
1376 0 : while (best_site != -1) {
1377 : int next_chkpts_indices[PATTERN_CANDIDATES_REF];
1378 0 : best_site = -1;
1379 0 : next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
1380 0 : next_chkpts_indices[1] = k;
1381 0 : next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
1382 0 : cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX;
1383 0 : cost_list[((k + 2) % 4) + 1] = cost_list[0];
1384 0 : cost_list[0] = bestsad;
1385 :
1386 0 : if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
1387 0 : for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
1388 0 : const MV this_mv = {
1389 0 : br + candidates[s][next_chkpts_indices[i]].row,
1390 0 : bc + candidates[s][next_chkpts_indices[i]].col
1391 : };
1392 0 : cost_list[next_chkpts_indices[i] + 1] = thissad =
1393 0 : vfp->sdf(what->buf, what->stride,
1394 : get_buf_from_mv(in_what, &this_mv), in_what->stride);
1395 0 : CHECK_BETTER
1396 : }
1397 : } else {
1398 0 : for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
1399 0 : const MV this_mv = {
1400 0 : br + candidates[s][next_chkpts_indices[i]].row,
1401 0 : bc + candidates[s][next_chkpts_indices[i]].col
1402 : };
1403 0 : if (!is_mv_in(&x->mv_limits, &this_mv)) {
1404 0 : cost_list[next_chkpts_indices[i] + 1] = INT_MAX;
1405 0 : continue;
1406 : }
1407 0 : cost_list[next_chkpts_indices[i] + 1] = thissad =
1408 0 : vfp->sdf(what->buf, what->stride,
1409 : get_buf_from_mv(in_what, &this_mv), in_what->stride);
1410 0 : CHECK_BETTER
1411 : }
1412 : }
1413 :
1414 0 : if (best_site != -1) {
1415 0 : k = next_chkpts_indices[best_site];
1416 0 : br += candidates[s][k].row;
1417 0 : bc += candidates[s][k].col;
1418 : }
1419 : }
1420 : }
1421 : }
1422 :
1423 : // Returns the one-away integer pel cost/sad around the best as follows:
1424 : // cost_list[0]: cost/sad at the best integer pel
1425 : // cost_list[1]: cost/sad at delta {0, -1} (left) from the best integer pel
1426 : // cost_list[2]: cost/sad at delta { 1, 0} (bottom) from the best integer pel
1427 : // cost_list[3]: cost/sad at delta { 0, 1} (right) from the best integer pel
1428 : // cost_list[4]: cost/sad at delta {-1, 0} (top) from the best integer pel
1429 0 : if (cost_list) {
1430 0 : const MV best_int_mv = { br, bc };
1431 0 : if (last_is_4) {
1432 0 : calc_int_sad_list(x, center_mv, sad_per_bit, vfp, &best_int_mv, cost_list,
1433 : use_mvcost, bestsad);
1434 : } else {
1435 0 : calc_int_cost_list(x, center_mv, sad_per_bit, vfp, &best_int_mv,
1436 : cost_list);
1437 : }
1438 : }
1439 0 : x->best_mv.as_mv.row = br;
1440 0 : x->best_mv.as_mv.col = bc;
1441 0 : return bestsad;
1442 : }
1443 :
1444 0 : int av1_get_mvpred_var(const MACROBLOCK *x, const MV *best_mv,
1445 : const MV *center_mv, const aom_variance_fn_ptr_t *vfp,
1446 : int use_mvcost) {
1447 0 : const MACROBLOCKD *const xd = &x->e_mbd;
1448 0 : const struct buf_2d *const what = &x->plane[0].src;
1449 0 : const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1450 0 : const MV mv = { best_mv->row * 8, best_mv->col * 8 };
1451 : unsigned int unused;
1452 :
1453 0 : return vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv),
1454 0 : in_what->stride, &unused) +
1455 0 : (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
1456 : x->errorperbit)
1457 0 : : 0);
1458 : }
1459 :
1460 0 : int av1_get_mvpred_av_var(const MACROBLOCK *x, const MV *best_mv,
1461 : const MV *center_mv, const uint8_t *second_pred,
1462 : const aom_variance_fn_ptr_t *vfp, int use_mvcost) {
1463 0 : const MACROBLOCKD *const xd = &x->e_mbd;
1464 0 : const struct buf_2d *const what = &x->plane[0].src;
1465 0 : const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1466 0 : const MV mv = { best_mv->row * 8, best_mv->col * 8 };
1467 : unsigned int unused;
1468 :
1469 0 : return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0,
1470 0 : what->buf, what->stride, &unused, second_pred) +
1471 0 : (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
1472 : x->errorperbit)
1473 0 : : 0);
1474 : }
1475 :
1476 : #if CONFIG_EXT_INTER
1477 0 : int av1_get_mvpred_mask_var(const MACROBLOCK *x, const MV *best_mv,
1478 : const MV *center_mv, const uint8_t *second_pred,
1479 : const uint8_t *mask, int mask_stride,
1480 : int invert_mask, const aom_variance_fn_ptr_t *vfp,
1481 : int use_mvcost) {
1482 0 : const MACROBLOCKD *const xd = &x->e_mbd;
1483 0 : const struct buf_2d *const what = &x->plane[0].src;
1484 0 : const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1485 0 : const MV mv = { best_mv->row * 8, best_mv->col * 8 };
1486 : unsigned int unused;
1487 :
1488 0 : return vfp->msvf(what->buf, what->stride, 0, 0,
1489 : get_buf_from_mv(in_what, best_mv), in_what->stride,
1490 0 : second_pred, mask, mask_stride, invert_mask, &unused) +
1491 0 : (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
1492 : x->errorperbit)
1493 0 : : 0);
1494 : }
1495 : #endif
1496 :
1497 0 : int av1_hex_search(MACROBLOCK *x, MV *start_mv, int search_param,
1498 : int sad_per_bit, int do_init_search, int *cost_list,
1499 : const aom_variance_fn_ptr_t *vfp, int use_mvcost,
1500 : const MV *center_mv) {
1501 : // First scale has 8-closest points, the rest have 6 points in hex shape
1502 : // at increasing scales
1503 : static const int hex_num_candidates[MAX_PATTERN_SCALES] = { 8, 6, 6, 6, 6, 6,
1504 : 6, 6, 6, 6, 6 };
1505 : // Note that the largest candidate step at each scale is 2^scale
1506 : /* clang-format off */
1507 : static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
1508 : { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 }, { -1, 1 },
1509 : { -1, 0 } },
1510 : { { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 } },
1511 : { { -2, -4 }, { 2, -4 }, { 4, 0 }, { 2, 4 }, { -2, 4 }, { -4, 0 } },
1512 : { { -4, -8 }, { 4, -8 }, { 8, 0 }, { 4, 8 }, { -4, 8 }, { -8, 0 } },
1513 : { { -8, -16 }, { 8, -16 }, { 16, 0 }, { 8, 16 }, { -8, 16 }, { -16, 0 } },
1514 : { { -16, -32 }, { 16, -32 }, { 32, 0 }, { 16, 32 }, { -16, 32 },
1515 : { -32, 0 } },
1516 : { { -32, -64 }, { 32, -64 }, { 64, 0 }, { 32, 64 }, { -32, 64 },
1517 : { -64, 0 } },
1518 : { { -64, -128 }, { 64, -128 }, { 128, 0 }, { 64, 128 }, { -64, 128 },
1519 : { -128, 0 } },
1520 : { { -128, -256 }, { 128, -256 }, { 256, 0 }, { 128, 256 }, { -128, 256 },
1521 : { -256, 0 } },
1522 : { { -256, -512 }, { 256, -512 }, { 512, 0 }, { 256, 512 }, { -256, 512 },
1523 : { -512, 0 } },
1524 : { { -512, -1024 }, { 512, -1024 }, { 1024, 0 }, { 512, 1024 },
1525 : { -512, 1024 }, { -1024, 0 } },
1526 : };
1527 : /* clang-format on */
1528 0 : return pattern_search(x, start_mv, search_param, sad_per_bit, do_init_search,
1529 : cost_list, vfp, use_mvcost, center_mv,
1530 : hex_num_candidates, hex_candidates);
1531 : }
1532 :
1533 0 : static int bigdia_search(MACROBLOCK *x, MV *start_mv, int search_param,
1534 : int sad_per_bit, int do_init_search, int *cost_list,
1535 : const aom_variance_fn_ptr_t *vfp, int use_mvcost,
1536 : const MV *center_mv) {
1537 : // First scale has 4-closest points, the rest have 8 points in diamond
1538 : // shape at increasing scales
1539 : static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = {
1540 : 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
1541 : };
1542 : // Note that the largest candidate step at each scale is 2^scale
1543 : /* clang-format off */
1544 : static const MV
1545 : bigdia_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
1546 : { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } },
1547 : { { -1, -1 }, { 0, -2 }, { 1, -1 }, { 2, 0 }, { 1, 1 }, { 0, 2 },
1548 : { -1, 1 }, { -2, 0 } },
1549 : { { -2, -2 }, { 0, -4 }, { 2, -2 }, { 4, 0 }, { 2, 2 }, { 0, 4 },
1550 : { -2, 2 }, { -4, 0 } },
1551 : { { -4, -4 }, { 0, -8 }, { 4, -4 }, { 8, 0 }, { 4, 4 }, { 0, 8 },
1552 : { -4, 4 }, { -8, 0 } },
1553 : { { -8, -8 }, { 0, -16 }, { 8, -8 }, { 16, 0 }, { 8, 8 }, { 0, 16 },
1554 : { -8, 8 }, { -16, 0 } },
1555 : { { -16, -16 }, { 0, -32 }, { 16, -16 }, { 32, 0 }, { 16, 16 },
1556 : { 0, 32 }, { -16, 16 }, { -32, 0 } },
1557 : { { -32, -32 }, { 0, -64 }, { 32, -32 }, { 64, 0 }, { 32, 32 },
1558 : { 0, 64 }, { -32, 32 }, { -64, 0 } },
1559 : { { -64, -64 }, { 0, -128 }, { 64, -64 }, { 128, 0 }, { 64, 64 },
1560 : { 0, 128 }, { -64, 64 }, { -128, 0 } },
1561 : { { -128, -128 }, { 0, -256 }, { 128, -128 }, { 256, 0 }, { 128, 128 },
1562 : { 0, 256 }, { -128, 128 }, { -256, 0 } },
1563 : { { -256, -256 }, { 0, -512 }, { 256, -256 }, { 512, 0 }, { 256, 256 },
1564 : { 0, 512 }, { -256, 256 }, { -512, 0 } },
1565 : { { -512, -512 }, { 0, -1024 }, { 512, -512 }, { 1024, 0 },
1566 : { 512, 512 }, { 0, 1024 }, { -512, 512 }, { -1024, 0 } },
1567 : };
1568 : /* clang-format on */
1569 0 : return pattern_search(x, start_mv, search_param, sad_per_bit, do_init_search,
1570 : cost_list, vfp, use_mvcost, center_mv,
1571 : bigdia_num_candidates, bigdia_candidates);
1572 : }
1573 :
1574 0 : static int square_search(MACROBLOCK *x, MV *start_mv, int search_param,
1575 : int sad_per_bit, int do_init_search, int *cost_list,
1576 : const aom_variance_fn_ptr_t *vfp, int use_mvcost,
1577 : const MV *center_mv) {
1578 : // All scales have 8 closest points in square shape
1579 : static const int square_num_candidates[MAX_PATTERN_SCALES] = {
1580 : 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
1581 : };
1582 : // Note that the largest candidate step at each scale is 2^scale
1583 : /* clang-format off */
1584 : static const MV
1585 : square_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
1586 : { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 },
1587 : { -1, 1 }, { -1, 0 } },
1588 : { { -2, -2 }, { 0, -2 }, { 2, -2 }, { 2, 0 }, { 2, 2 }, { 0, 2 },
1589 : { -2, 2 }, { -2, 0 } },
1590 : { { -4, -4 }, { 0, -4 }, { 4, -4 }, { 4, 0 }, { 4, 4 }, { 0, 4 },
1591 : { -4, 4 }, { -4, 0 } },
1592 : { { -8, -8 }, { 0, -8 }, { 8, -8 }, { 8, 0 }, { 8, 8 }, { 0, 8 },
1593 : { -8, 8 }, { -8, 0 } },
1594 : { { -16, -16 }, { 0, -16 }, { 16, -16 }, { 16, 0 }, { 16, 16 },
1595 : { 0, 16 }, { -16, 16 }, { -16, 0 } },
1596 : { { -32, -32 }, { 0, -32 }, { 32, -32 }, { 32, 0 }, { 32, 32 },
1597 : { 0, 32 }, { -32, 32 }, { -32, 0 } },
1598 : { { -64, -64 }, { 0, -64 }, { 64, -64 }, { 64, 0 }, { 64, 64 },
1599 : { 0, 64 }, { -64, 64 }, { -64, 0 } },
1600 : { { -128, -128 }, { 0, -128 }, { 128, -128 }, { 128, 0 }, { 128, 128 },
1601 : { 0, 128 }, { -128, 128 }, { -128, 0 } },
1602 : { { -256, -256 }, { 0, -256 }, { 256, -256 }, { 256, 0 }, { 256, 256 },
1603 : { 0, 256 }, { -256, 256 }, { -256, 0 } },
1604 : { { -512, -512 }, { 0, -512 }, { 512, -512 }, { 512, 0 }, { 512, 512 },
1605 : { 0, 512 }, { -512, 512 }, { -512, 0 } },
1606 : { { -1024, -1024 }, { 0, -1024 }, { 1024, -1024 }, { 1024, 0 },
1607 : { 1024, 1024 }, { 0, 1024 }, { -1024, 1024 }, { -1024, 0 } },
1608 : };
1609 : /* clang-format on */
1610 0 : return pattern_search(x, start_mv, search_param, sad_per_bit, do_init_search,
1611 : cost_list, vfp, use_mvcost, center_mv,
1612 : square_num_candidates, square_candidates);
1613 : }
1614 :
1615 0 : static int fast_hex_search(MACROBLOCK *x, MV *ref_mv, int search_param,
1616 : int sad_per_bit,
1617 : int do_init_search, // must be zero for fast_hex
1618 : int *cost_list, const aom_variance_fn_ptr_t *vfp,
1619 : int use_mvcost, const MV *center_mv) {
1620 0 : return av1_hex_search(x, ref_mv, AOMMAX(MAX_MVSEARCH_STEPS - 2, search_param),
1621 : sad_per_bit, do_init_search, cost_list, vfp, use_mvcost,
1622 : center_mv);
1623 : }
1624 :
1625 0 : static int fast_dia_search(MACROBLOCK *x, MV *ref_mv, int search_param,
1626 : int sad_per_bit, int do_init_search, int *cost_list,
1627 : const aom_variance_fn_ptr_t *vfp, int use_mvcost,
1628 : const MV *center_mv) {
1629 0 : return bigdia_search(x, ref_mv, AOMMAX(MAX_MVSEARCH_STEPS - 2, search_param),
1630 : sad_per_bit, do_init_search, cost_list, vfp, use_mvcost,
1631 : center_mv);
1632 : }
1633 :
1634 : #undef CHECK_BETTER
1635 :
1636 : // Exhuastive motion search around a given centre position with a given
1637 : // step size.
1638 0 : static int exhuastive_mesh_search(MACROBLOCK *x, MV *ref_mv, MV *best_mv,
1639 : int range, int step, int sad_per_bit,
1640 : const aom_variance_fn_ptr_t *fn_ptr,
1641 : const MV *center_mv) {
1642 0 : const MACROBLOCKD *const xd = &x->e_mbd;
1643 0 : const struct buf_2d *const what = &x->plane[0].src;
1644 0 : const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1645 0 : MV fcenter_mv = { center_mv->row, center_mv->col };
1646 0 : unsigned int best_sad = INT_MAX;
1647 : int r, c, i;
1648 : int start_col, end_col, start_row, end_row;
1649 0 : int col_step = (step > 1) ? step : 4;
1650 :
1651 0 : assert(step >= 1);
1652 :
1653 0 : clamp_mv(&fcenter_mv, x->mv_limits.col_min, x->mv_limits.col_max,
1654 : x->mv_limits.row_min, x->mv_limits.row_max);
1655 0 : *best_mv = fcenter_mv;
1656 0 : best_sad =
1657 0 : fn_ptr->sdf(what->buf, what->stride,
1658 : get_buf_from_mv(in_what, &fcenter_mv), in_what->stride) +
1659 0 : mvsad_err_cost(x, &fcenter_mv, ref_mv, sad_per_bit);
1660 0 : start_row = AOMMAX(-range, x->mv_limits.row_min - fcenter_mv.row);
1661 0 : start_col = AOMMAX(-range, x->mv_limits.col_min - fcenter_mv.col);
1662 0 : end_row = AOMMIN(range, x->mv_limits.row_max - fcenter_mv.row);
1663 0 : end_col = AOMMIN(range, x->mv_limits.col_max - fcenter_mv.col);
1664 :
1665 0 : for (r = start_row; r <= end_row; r += step) {
1666 0 : for (c = start_col; c <= end_col; c += col_step) {
1667 : // Step > 1 means we are not checking every location in this pass.
1668 0 : if (step > 1) {
1669 0 : const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c };
1670 0 : unsigned int sad =
1671 0 : fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv),
1672 : in_what->stride);
1673 0 : if (sad < best_sad) {
1674 0 : sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
1675 0 : if (sad < best_sad) {
1676 0 : best_sad = sad;
1677 0 : x->second_best_mv.as_mv = *best_mv;
1678 0 : *best_mv = mv;
1679 : }
1680 : }
1681 : } else {
1682 : // 4 sads in a single call if we are checking every location
1683 0 : if (c + 3 <= end_col) {
1684 : unsigned int sads[4];
1685 : const uint8_t *addrs[4];
1686 0 : for (i = 0; i < 4; ++i) {
1687 0 : const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
1688 0 : addrs[i] = get_buf_from_mv(in_what, &mv);
1689 : }
1690 0 : fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads);
1691 :
1692 0 : for (i = 0; i < 4; ++i) {
1693 0 : if (sads[i] < best_sad) {
1694 0 : const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
1695 0 : const unsigned int sad =
1696 0 : sads[i] + mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
1697 0 : if (sad < best_sad) {
1698 0 : best_sad = sad;
1699 0 : x->second_best_mv.as_mv = *best_mv;
1700 0 : *best_mv = mv;
1701 : }
1702 : }
1703 : }
1704 : } else {
1705 0 : for (i = 0; i < end_col - c; ++i) {
1706 0 : const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
1707 0 : unsigned int sad =
1708 0 : fn_ptr->sdf(what->buf, what->stride,
1709 : get_buf_from_mv(in_what, &mv), in_what->stride);
1710 0 : if (sad < best_sad) {
1711 0 : sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
1712 0 : if (sad < best_sad) {
1713 0 : best_sad = sad;
1714 0 : x->second_best_mv.as_mv = *best_mv;
1715 0 : *best_mv = mv;
1716 : }
1717 : }
1718 : }
1719 : }
1720 : }
1721 : }
1722 : }
1723 :
1724 0 : return best_sad;
1725 : }
1726 :
1727 0 : int av1_diamond_search_sad_c(MACROBLOCK *x, const search_site_config *cfg,
1728 : MV *ref_mv, MV *best_mv, int search_param,
1729 : int sad_per_bit, int *num00,
1730 : const aom_variance_fn_ptr_t *fn_ptr,
1731 : const MV *center_mv) {
1732 : int i, j, step;
1733 :
1734 0 : const MACROBLOCKD *const xd = &x->e_mbd;
1735 0 : uint8_t *what = x->plane[0].src.buf;
1736 0 : const int what_stride = x->plane[0].src.stride;
1737 : const uint8_t *in_what;
1738 0 : const int in_what_stride = xd->plane[0].pre[0].stride;
1739 : const uint8_t *best_address;
1740 :
1741 0 : unsigned int bestsad = INT_MAX;
1742 0 : int best_site = 0;
1743 0 : int last_site = 0;
1744 :
1745 : int ref_row;
1746 : int ref_col;
1747 :
1748 : // search_param determines the length of the initial step and hence the number
1749 : // of iterations.
1750 : // 0 = initial step (MAX_FIRST_STEP) pel
1751 : // 1 = (MAX_FIRST_STEP/2) pel,
1752 : // 2 = (MAX_FIRST_STEP/4) pel...
1753 0 : const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
1754 0 : const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
1755 :
1756 0 : const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
1757 0 : clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
1758 : x->mv_limits.row_min, x->mv_limits.row_max);
1759 0 : ref_row = ref_mv->row;
1760 0 : ref_col = ref_mv->col;
1761 0 : *num00 = 0;
1762 0 : best_mv->row = ref_row;
1763 0 : best_mv->col = ref_col;
1764 :
1765 : // Work out the start point for the search
1766 0 : in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col;
1767 0 : best_address = in_what;
1768 :
1769 : // Check the starting position
1770 0 : bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
1771 0 : mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
1772 :
1773 0 : i = 1;
1774 :
1775 0 : for (step = 0; step < tot_steps; step++) {
1776 0 : int all_in = 1, t;
1777 :
1778 : // All_in is true if every one of the points we are checking are within
1779 : // the bounds of the image.
1780 0 : all_in &= ((best_mv->row + ss[i].mv.row) > x->mv_limits.row_min);
1781 0 : all_in &= ((best_mv->row + ss[i + 1].mv.row) < x->mv_limits.row_max);
1782 0 : all_in &= ((best_mv->col + ss[i + 2].mv.col) > x->mv_limits.col_min);
1783 0 : all_in &= ((best_mv->col + ss[i + 3].mv.col) < x->mv_limits.col_max);
1784 :
1785 : // If all the pixels are within the bounds we don't check whether the
1786 : // search point is valid in this loop, otherwise we check each point
1787 : // for validity..
1788 0 : if (all_in) {
1789 : unsigned int sad_array[4];
1790 :
1791 0 : for (j = 0; j < cfg->searches_per_step; j += 4) {
1792 : unsigned char const *block_offset[4];
1793 :
1794 0 : for (t = 0; t < 4; t++)
1795 0 : block_offset[t] = ss[i + t].offset + best_address;
1796 :
1797 0 : fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
1798 : sad_array);
1799 :
1800 0 : for (t = 0; t < 4; t++, i++) {
1801 0 : if (sad_array[t] < bestsad) {
1802 0 : const MV this_mv = { best_mv->row + ss[i].mv.row,
1803 0 : best_mv->col + ss[i].mv.col };
1804 0 : sad_array[t] +=
1805 0 : mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
1806 0 : if (sad_array[t] < bestsad) {
1807 0 : bestsad = sad_array[t];
1808 0 : best_site = i;
1809 : }
1810 : }
1811 : }
1812 : }
1813 : } else {
1814 0 : for (j = 0; j < cfg->searches_per_step; j++) {
1815 : // Trap illegal vectors
1816 0 : const MV this_mv = { best_mv->row + ss[i].mv.row,
1817 0 : best_mv->col + ss[i].mv.col };
1818 :
1819 0 : if (is_mv_in(&x->mv_limits, &this_mv)) {
1820 0 : const uint8_t *const check_here = ss[i].offset + best_address;
1821 0 : unsigned int thissad =
1822 0 : fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1823 :
1824 0 : if (thissad < bestsad) {
1825 0 : thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
1826 0 : if (thissad < bestsad) {
1827 0 : bestsad = thissad;
1828 0 : best_site = i;
1829 : }
1830 : }
1831 : }
1832 0 : i++;
1833 : }
1834 : }
1835 0 : if (best_site != last_site) {
1836 0 : x->second_best_mv.as_mv = *best_mv;
1837 0 : best_mv->row += ss[best_site].mv.row;
1838 0 : best_mv->col += ss[best_site].mv.col;
1839 0 : best_address += ss[best_site].offset;
1840 0 : last_site = best_site;
1841 : #if defined(NEW_DIAMOND_SEARCH)
1842 : while (1) {
1843 : const MV this_mv = { best_mv->row + ss[best_site].mv.row,
1844 : best_mv->col + ss[best_site].mv.col };
1845 : if (is_mv_in(&x->mv_limits, &this_mv)) {
1846 : const uint8_t *const check_here = ss[best_site].offset + best_address;
1847 : unsigned int thissad =
1848 : fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1849 : if (thissad < bestsad) {
1850 : thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
1851 : if (thissad < bestsad) {
1852 : bestsad = thissad;
1853 : best_mv->row += ss[best_site].mv.row;
1854 : best_mv->col += ss[best_site].mv.col;
1855 : best_address += ss[best_site].offset;
1856 : continue;
1857 : }
1858 : }
1859 : }
1860 : break;
1861 : }
1862 : #endif
1863 0 : } else if (best_address == in_what) {
1864 0 : (*num00)++;
1865 : }
1866 : }
1867 0 : return bestsad;
1868 : }
1869 :
1870 0 : static int vector_match(int16_t *ref, int16_t *src, int bwl) {
1871 0 : int best_sad = INT_MAX;
1872 : int this_sad;
1873 : int d;
1874 0 : int center, offset = 0;
1875 0 : int bw = 4 << bwl; // redundant variable, to be changed in the experiments.
1876 0 : for (d = 0; d <= bw; d += 16) {
1877 0 : this_sad = aom_vector_var(&ref[d], src, bwl);
1878 0 : if (this_sad < best_sad) {
1879 0 : best_sad = this_sad;
1880 0 : offset = d;
1881 : }
1882 : }
1883 0 : center = offset;
1884 :
1885 0 : for (d = -8; d <= 8; d += 16) {
1886 0 : int this_pos = offset + d;
1887 : // check limit
1888 0 : if (this_pos < 0 || this_pos > bw) continue;
1889 0 : this_sad = aom_vector_var(&ref[this_pos], src, bwl);
1890 0 : if (this_sad < best_sad) {
1891 0 : best_sad = this_sad;
1892 0 : center = this_pos;
1893 : }
1894 : }
1895 0 : offset = center;
1896 :
1897 0 : for (d = -4; d <= 4; d += 8) {
1898 0 : int this_pos = offset + d;
1899 : // check limit
1900 0 : if (this_pos < 0 || this_pos > bw) continue;
1901 0 : this_sad = aom_vector_var(&ref[this_pos], src, bwl);
1902 0 : if (this_sad < best_sad) {
1903 0 : best_sad = this_sad;
1904 0 : center = this_pos;
1905 : }
1906 : }
1907 0 : offset = center;
1908 :
1909 0 : for (d = -2; d <= 2; d += 4) {
1910 0 : int this_pos = offset + d;
1911 : // check limit
1912 0 : if (this_pos < 0 || this_pos > bw) continue;
1913 0 : this_sad = aom_vector_var(&ref[this_pos], src, bwl);
1914 0 : if (this_sad < best_sad) {
1915 0 : best_sad = this_sad;
1916 0 : center = this_pos;
1917 : }
1918 : }
1919 0 : offset = center;
1920 :
1921 0 : for (d = -1; d <= 1; d += 2) {
1922 0 : int this_pos = offset + d;
1923 : // check limit
1924 0 : if (this_pos < 0 || this_pos > bw) continue;
1925 0 : this_sad = aom_vector_var(&ref[this_pos], src, bwl);
1926 0 : if (this_sad < best_sad) {
1927 0 : best_sad = this_sad;
1928 0 : center = this_pos;
1929 : }
1930 : }
1931 :
1932 0 : return (center - (bw >> 1));
1933 : }
1934 :
1935 : static const MV search_pos[4] = {
1936 : { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 },
1937 : };
1938 :
1939 0 : unsigned int av1_int_pro_motion_estimation(const AV1_COMP *cpi, MACROBLOCK *x,
1940 : BLOCK_SIZE bsize, int mi_row,
1941 : int mi_col) {
1942 0 : MACROBLOCKD *xd = &x->e_mbd;
1943 0 : MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
1944 0 : struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0, 0, 0, 0 } };
1945 : DECLARE_ALIGNED(16, int16_t, hbuf[2 * MAX_SB_SIZE]);
1946 : DECLARE_ALIGNED(16, int16_t, vbuf[2 * MAX_SB_SIZE]);
1947 : DECLARE_ALIGNED(16, int16_t, src_hbuf[MAX_SB_SQUARE]);
1948 : DECLARE_ALIGNED(16, int16_t, src_vbuf[MAX_SB_SQUARE]);
1949 : int idx;
1950 0 : const int src_stride = x->plane[0].src.stride;
1951 0 : const int ref_stride = xd->plane[0].pre[0].stride;
1952 : uint8_t const *ref_buf, *src_buf;
1953 0 : MV *tmp_mv = &xd->mi[0]->mbmi.mv[0].as_mv;
1954 : unsigned int best_sad, tmp_sad, sad_arr[4];
1955 : MV this_mv;
1956 0 : const YV12_BUFFER_CONFIG *scaled_ref_frame =
1957 0 : av1_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]);
1958 :
1959 0 : if (scaled_ref_frame) {
1960 : int i;
1961 : // Swap out the reference frame for a version that's been scaled to
1962 : // match the resolution of the current frame, allowing the existing
1963 : // motion search code to be used without additional modifications.
1964 0 : for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[i] = xd->plane[i].pre[0];
1965 0 : av1_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
1966 : }
1967 :
1968 : #if CONFIG_HIGHBITDEPTH
1969 : {
1970 : unsigned int this_sad;
1971 0 : tmp_mv->row = 0;
1972 0 : tmp_mv->col = 0;
1973 0 : this_sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, src_stride,
1974 0 : xd->plane[0].pre[0].buf, ref_stride);
1975 :
1976 0 : if (scaled_ref_frame) {
1977 : int i;
1978 0 : for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i];
1979 : }
1980 0 : return this_sad;
1981 : }
1982 : #endif
1983 :
1984 : const int bw = 4 << b_width_log2_lookup[bsize];
1985 : const int bh = 4 << b_height_log2_lookup[bsize];
1986 : const int search_width = bw << 1;
1987 : const int search_height = bh << 1;
1988 : const int norm_factor = 3 + (bw >> 5);
1989 :
1990 : // Set up prediction 1-D reference set
1991 : ref_buf = xd->plane[0].pre[0].buf - (bw >> 1);
1992 : for (idx = 0; idx < search_width; idx += 16) {
1993 : aom_int_pro_row(&hbuf[idx], ref_buf, ref_stride, bh);
1994 : ref_buf += 16;
1995 : }
1996 :
1997 : ref_buf = xd->plane[0].pre[0].buf - (bh >> 1) * ref_stride;
1998 : for (idx = 0; idx < search_height; ++idx) {
1999 : vbuf[idx] = aom_int_pro_col(ref_buf, bw) >> norm_factor;
2000 : ref_buf += ref_stride;
2001 : }
2002 :
2003 : // Set up src 1-D reference set
2004 : for (idx = 0; idx < bw; idx += 16) {
2005 : src_buf = x->plane[0].src.buf + idx;
2006 : aom_int_pro_row(&src_hbuf[idx], src_buf, src_stride, bh);
2007 : }
2008 :
2009 : src_buf = x->plane[0].src.buf;
2010 : for (idx = 0; idx < bh; ++idx) {
2011 : src_vbuf[idx] = aom_int_pro_col(src_buf, bw) >> norm_factor;
2012 : src_buf += src_stride;
2013 : }
2014 :
2015 : // Find the best match per 1-D search
2016 : tmp_mv->col = vector_match(hbuf, src_hbuf, b_width_log2_lookup[bsize]);
2017 : tmp_mv->row = vector_match(vbuf, src_vbuf, b_height_log2_lookup[bsize]);
2018 :
2019 : this_mv = *tmp_mv;
2020 : src_buf = x->plane[0].src.buf;
2021 : ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col;
2022 : best_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride);
2023 :
2024 : {
2025 : const uint8_t *const pos[4] = {
2026 : ref_buf - ref_stride, ref_buf - 1, ref_buf + 1, ref_buf + ref_stride,
2027 : };
2028 :
2029 : cpi->fn_ptr[bsize].sdx4df(src_buf, src_stride, pos, ref_stride, sad_arr);
2030 : }
2031 :
2032 : for (idx = 0; idx < 4; ++idx) {
2033 : if (sad_arr[idx] < best_sad) {
2034 : best_sad = sad_arr[idx];
2035 : tmp_mv->row = search_pos[idx].row + this_mv.row;
2036 : tmp_mv->col = search_pos[idx].col + this_mv.col;
2037 : }
2038 : }
2039 :
2040 : if (sad_arr[0] < sad_arr[3])
2041 : this_mv.row -= 1;
2042 : else
2043 : this_mv.row += 1;
2044 :
2045 : if (sad_arr[1] < sad_arr[2])
2046 : this_mv.col -= 1;
2047 : else
2048 : this_mv.col += 1;
2049 :
2050 : ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col;
2051 :
2052 : tmp_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride);
2053 : if (best_sad > tmp_sad) {
2054 : *tmp_mv = this_mv;
2055 : best_sad = tmp_sad;
2056 : }
2057 :
2058 : tmp_mv->row *= 8;
2059 : tmp_mv->col *= 8;
2060 :
2061 : if (scaled_ref_frame) {
2062 : int i;
2063 : for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i];
2064 : }
2065 :
2066 : return best_sad;
2067 : }
2068 :
2069 : /* do_refine: If last step (1-away) of n-step search doesn't pick the center
2070 : point as the best match, we will do a final 1-away diamond
2071 : refining search */
2072 0 : static int full_pixel_diamond(const AV1_COMP *const cpi, MACROBLOCK *x,
2073 : MV *mvp_full, int step_param, int sadpb,
2074 : int further_steps, int do_refine, int *cost_list,
2075 : const aom_variance_fn_ptr_t *fn_ptr,
2076 : const MV *ref_mv) {
2077 : MV temp_mv;
2078 0 : int thissme, n, num00 = 0;
2079 0 : int bestsme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
2080 : step_param, sadpb, &n, fn_ptr, ref_mv);
2081 0 : if (bestsme < INT_MAX)
2082 0 : bestsme = av1_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
2083 0 : x->best_mv.as_mv = temp_mv;
2084 :
2085 : // If there won't be more n-step search, check to see if refining search is
2086 : // needed.
2087 0 : if (n > further_steps) do_refine = 0;
2088 :
2089 0 : while (n < further_steps) {
2090 0 : ++n;
2091 :
2092 0 : if (num00) {
2093 0 : num00--;
2094 : } else {
2095 0 : thissme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
2096 : step_param + n, sadpb, &num00, fn_ptr,
2097 : ref_mv);
2098 0 : if (thissme < INT_MAX)
2099 0 : thissme = av1_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
2100 :
2101 : // check to see if refining search is needed.
2102 0 : if (num00 > further_steps - n) do_refine = 0;
2103 :
2104 0 : if (thissme < bestsme) {
2105 0 : bestsme = thissme;
2106 0 : x->best_mv.as_mv = temp_mv;
2107 : }
2108 : }
2109 : }
2110 :
2111 : // final 1-away diamond refining search
2112 0 : if (do_refine) {
2113 0 : const int search_range = 8;
2114 0 : MV best_mv = x->best_mv.as_mv;
2115 0 : thissme = av1_refining_search_sad(x, &best_mv, sadpb, search_range, fn_ptr,
2116 : ref_mv);
2117 0 : if (thissme < INT_MAX)
2118 0 : thissme = av1_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1);
2119 0 : if (thissme < bestsme) {
2120 0 : bestsme = thissme;
2121 0 : x->best_mv.as_mv = best_mv;
2122 : }
2123 : }
2124 :
2125 : // Return cost list.
2126 0 : if (cost_list) {
2127 0 : calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, &x->best_mv.as_mv, cost_list);
2128 : }
2129 0 : return bestsme;
2130 : }
2131 :
2132 : #define MIN_RANGE 7
2133 : #define MAX_RANGE 256
2134 : #define MIN_INTERVAL 1
2135 : // Runs an limited range exhaustive mesh search using a pattern set
2136 : // according to the encode speed profile.
2137 0 : static int full_pixel_exhaustive(const AV1_COMP *const cpi, MACROBLOCK *x,
2138 : const MV *centre_mv_full, int sadpb,
2139 : int *cost_list,
2140 : const aom_variance_fn_ptr_t *fn_ptr,
2141 : const MV *ref_mv, MV *dst_mv) {
2142 0 : const SPEED_FEATURES *const sf = &cpi->sf;
2143 0 : MV temp_mv = { centre_mv_full->row, centre_mv_full->col };
2144 0 : MV f_ref_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
2145 : int bestsme;
2146 : int i;
2147 0 : int interval = sf->mesh_patterns[0].interval;
2148 0 : int range = sf->mesh_patterns[0].range;
2149 : int baseline_interval_divisor;
2150 :
2151 : // Keep track of number of exhaustive calls (this frame in this thread).
2152 0 : ++(*x->ex_search_count_ptr);
2153 :
2154 : // Trap illegal values for interval and range for this function.
2155 0 : if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) ||
2156 : (interval > range))
2157 0 : return INT_MAX;
2158 :
2159 0 : baseline_interval_divisor = range / interval;
2160 :
2161 : // Check size of proposed first range against magnitude of the centre
2162 : // value used as a starting point.
2163 0 : range = AOMMAX(range, (5 * AOMMAX(abs(temp_mv.row), abs(temp_mv.col))) / 4);
2164 0 : range = AOMMIN(range, MAX_RANGE);
2165 0 : interval = AOMMAX(interval, range / baseline_interval_divisor);
2166 :
2167 : // initial search
2168 0 : bestsme = exhuastive_mesh_search(x, &f_ref_mv, &temp_mv, range, interval,
2169 : sadpb, fn_ptr, &temp_mv);
2170 :
2171 0 : if ((interval > MIN_INTERVAL) && (range > MIN_RANGE)) {
2172 : // Progressive searches with range and step size decreasing each time
2173 : // till we reach a step size of 1. Then break out.
2174 0 : for (i = 1; i < MAX_MESH_STEP; ++i) {
2175 : // First pass with coarser step and longer range
2176 0 : bestsme = exhuastive_mesh_search(
2177 : x, &f_ref_mv, &temp_mv, sf->mesh_patterns[i].range,
2178 : sf->mesh_patterns[i].interval, sadpb, fn_ptr, &temp_mv);
2179 :
2180 0 : if (sf->mesh_patterns[i].interval == 1) break;
2181 : }
2182 : }
2183 :
2184 0 : if (bestsme < INT_MAX)
2185 0 : bestsme = av1_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
2186 0 : *dst_mv = temp_mv;
2187 :
2188 : // Return cost list.
2189 0 : if (cost_list) {
2190 0 : calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list);
2191 : }
2192 0 : return bestsme;
2193 : }
2194 :
2195 0 : int av1_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv,
2196 : int sad_per_bit, int distance,
2197 : const aom_variance_fn_ptr_t *fn_ptr,
2198 : const MV *center_mv, MV *best_mv) {
2199 : int r, c;
2200 0 : const MACROBLOCKD *const xd = &x->e_mbd;
2201 0 : const struct buf_2d *const what = &x->plane[0].src;
2202 0 : const struct buf_2d *const in_what = &xd->plane[0].pre[0];
2203 0 : const int row_min = AOMMAX(ref_mv->row - distance, x->mv_limits.row_min);
2204 0 : const int row_max = AOMMIN(ref_mv->row + distance, x->mv_limits.row_max);
2205 0 : const int col_min = AOMMAX(ref_mv->col - distance, x->mv_limits.col_min);
2206 0 : const int col_max = AOMMIN(ref_mv->col + distance, x->mv_limits.col_max);
2207 0 : const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
2208 0 : int best_sad =
2209 0 : fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
2210 0 : in_what->stride) +
2211 0 : mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
2212 0 : *best_mv = *ref_mv;
2213 :
2214 0 : for (r = row_min; r < row_max; ++r) {
2215 0 : for (c = col_min; c < col_max; ++c) {
2216 0 : const MV mv = { r, c };
2217 0 : const int sad =
2218 0 : fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv),
2219 0 : in_what->stride) +
2220 0 : mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
2221 0 : if (sad < best_sad) {
2222 0 : best_sad = sad;
2223 0 : *best_mv = mv;
2224 : }
2225 : }
2226 : }
2227 0 : return best_sad;
2228 : }
2229 :
2230 0 : int av1_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
2231 : int sad_per_bit, int distance,
2232 : const aom_variance_fn_ptr_t *fn_ptr,
2233 : const MV *center_mv, MV *best_mv) {
2234 : int r;
2235 0 : const MACROBLOCKD *const xd = &x->e_mbd;
2236 0 : const struct buf_2d *const what = &x->plane[0].src;
2237 0 : const struct buf_2d *const in_what = &xd->plane[0].pre[0];
2238 0 : const int row_min = AOMMAX(ref_mv->row - distance, x->mv_limits.row_min);
2239 0 : const int row_max = AOMMIN(ref_mv->row + distance, x->mv_limits.row_max);
2240 0 : const int col_min = AOMMAX(ref_mv->col - distance, x->mv_limits.col_min);
2241 0 : const int col_max = AOMMIN(ref_mv->col + distance, x->mv_limits.col_max);
2242 0 : const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
2243 0 : unsigned int best_sad =
2244 0 : fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
2245 : in_what->stride) +
2246 0 : mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
2247 0 : *best_mv = *ref_mv;
2248 :
2249 0 : for (r = row_min; r < row_max; ++r) {
2250 0 : int c = col_min;
2251 0 : const uint8_t *check_here = &in_what->buf[r * in_what->stride + c];
2252 :
2253 0 : if (fn_ptr->sdx3f != NULL) {
2254 0 : while ((c + 2) < col_max) {
2255 : int i;
2256 : DECLARE_ALIGNED(16, uint32_t, sads[3]);
2257 :
2258 0 : fn_ptr->sdx3f(what->buf, what->stride, check_here, in_what->stride,
2259 : sads);
2260 :
2261 0 : for (i = 0; i < 3; ++i) {
2262 0 : unsigned int sad = sads[i];
2263 0 : if (sad < best_sad) {
2264 0 : const MV mv = { r, c };
2265 0 : sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
2266 0 : if (sad < best_sad) {
2267 0 : best_sad = sad;
2268 0 : *best_mv = mv;
2269 : }
2270 : }
2271 0 : ++check_here;
2272 0 : ++c;
2273 : }
2274 : }
2275 : }
2276 :
2277 0 : while (c < col_max) {
2278 0 : unsigned int sad =
2279 0 : fn_ptr->sdf(what->buf, what->stride, check_here, in_what->stride);
2280 0 : if (sad < best_sad) {
2281 0 : const MV mv = { r, c };
2282 0 : sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
2283 0 : if (sad < best_sad) {
2284 0 : best_sad = sad;
2285 0 : *best_mv = mv;
2286 : }
2287 : }
2288 0 : ++check_here;
2289 0 : ++c;
2290 : }
2291 : }
2292 :
2293 0 : return best_sad;
2294 : }
2295 :
2296 0 : int av1_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
2297 : int sad_per_bit, int distance,
2298 : const aom_variance_fn_ptr_t *fn_ptr,
2299 : const MV *center_mv, MV *best_mv) {
2300 : int r;
2301 0 : const MACROBLOCKD *const xd = &x->e_mbd;
2302 0 : const struct buf_2d *const what = &x->plane[0].src;
2303 0 : const struct buf_2d *const in_what = &xd->plane[0].pre[0];
2304 0 : const int row_min = AOMMAX(ref_mv->row - distance, x->mv_limits.row_min);
2305 0 : const int row_max = AOMMIN(ref_mv->row + distance, x->mv_limits.row_max);
2306 0 : const int col_min = AOMMAX(ref_mv->col - distance, x->mv_limits.col_min);
2307 0 : const int col_max = AOMMIN(ref_mv->col + distance, x->mv_limits.col_max);
2308 0 : const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
2309 0 : unsigned int best_sad =
2310 0 : fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
2311 : in_what->stride) +
2312 0 : mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
2313 0 : *best_mv = *ref_mv;
2314 :
2315 0 : for (r = row_min; r < row_max; ++r) {
2316 0 : int c = col_min;
2317 0 : const uint8_t *check_here = &in_what->buf[r * in_what->stride + c];
2318 :
2319 0 : if (fn_ptr->sdx8f != NULL) {
2320 0 : while ((c + 7) < col_max) {
2321 : int i;
2322 : DECLARE_ALIGNED(16, uint32_t, sads[8]);
2323 :
2324 0 : fn_ptr->sdx8f(what->buf, what->stride, check_here, in_what->stride,
2325 : sads);
2326 :
2327 0 : for (i = 0; i < 8; ++i) {
2328 0 : unsigned int sad = sads[i];
2329 0 : if (sad < best_sad) {
2330 0 : const MV mv = { r, c };
2331 0 : sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
2332 0 : if (sad < best_sad) {
2333 0 : best_sad = sad;
2334 0 : *best_mv = mv;
2335 : }
2336 : }
2337 0 : ++check_here;
2338 0 : ++c;
2339 : }
2340 : }
2341 : }
2342 :
2343 0 : if (fn_ptr->sdx3f != NULL) {
2344 0 : while ((c + 2) < col_max) {
2345 : int i;
2346 : DECLARE_ALIGNED(16, uint32_t, sads[3]);
2347 :
2348 0 : fn_ptr->sdx3f(what->buf, what->stride, check_here, in_what->stride,
2349 : sads);
2350 :
2351 0 : for (i = 0; i < 3; ++i) {
2352 0 : unsigned int sad = sads[i];
2353 0 : if (sad < best_sad) {
2354 0 : const MV mv = { r, c };
2355 0 : sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
2356 0 : if (sad < best_sad) {
2357 0 : best_sad = sad;
2358 0 : *best_mv = mv;
2359 : }
2360 : }
2361 0 : ++check_here;
2362 0 : ++c;
2363 : }
2364 : }
2365 : }
2366 :
2367 0 : while (c < col_max) {
2368 0 : unsigned int sad =
2369 0 : fn_ptr->sdf(what->buf, what->stride, check_here, in_what->stride);
2370 0 : if (sad < best_sad) {
2371 0 : const MV mv = { r, c };
2372 0 : sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
2373 0 : if (sad < best_sad) {
2374 0 : best_sad = sad;
2375 0 : *best_mv = mv;
2376 : }
2377 : }
2378 0 : ++check_here;
2379 0 : ++c;
2380 : }
2381 : }
2382 :
2383 0 : return best_sad;
2384 : }
2385 :
2386 0 : int av1_refining_search_sad(MACROBLOCK *x, MV *ref_mv, int error_per_bit,
2387 : int search_range,
2388 : const aom_variance_fn_ptr_t *fn_ptr,
2389 : const MV *center_mv) {
2390 0 : const MACROBLOCKD *const xd = &x->e_mbd;
2391 0 : const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
2392 0 : const struct buf_2d *const what = &x->plane[0].src;
2393 0 : const struct buf_2d *const in_what = &xd->plane[0].pre[0];
2394 0 : const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
2395 0 : const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv);
2396 0 : unsigned int best_sad =
2397 0 : fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride) +
2398 0 : mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
2399 : int i, j;
2400 :
2401 0 : for (i = 0; i < search_range; i++) {
2402 0 : int best_site = -1;
2403 0 : const int all_in = ((ref_mv->row - 1) > x->mv_limits.row_min) &
2404 0 : ((ref_mv->row + 1) < x->mv_limits.row_max) &
2405 0 : ((ref_mv->col - 1) > x->mv_limits.col_min) &
2406 0 : ((ref_mv->col + 1) < x->mv_limits.col_max);
2407 :
2408 0 : if (all_in) {
2409 : unsigned int sads[4];
2410 0 : const uint8_t *const positions[4] = { best_address - in_what->stride,
2411 0 : best_address - 1, best_address + 1,
2412 0 : best_address + in_what->stride };
2413 :
2414 0 : fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads);
2415 :
2416 0 : for (j = 0; j < 4; ++j) {
2417 0 : if (sads[j] < best_sad) {
2418 0 : const MV mv = { ref_mv->row + neighbors[j].row,
2419 0 : ref_mv->col + neighbors[j].col };
2420 0 : sads[j] += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
2421 0 : if (sads[j] < best_sad) {
2422 0 : best_sad = sads[j];
2423 0 : best_site = j;
2424 : }
2425 : }
2426 : }
2427 : } else {
2428 0 : for (j = 0; j < 4; ++j) {
2429 0 : const MV mv = { ref_mv->row + neighbors[j].row,
2430 0 : ref_mv->col + neighbors[j].col };
2431 :
2432 0 : if (is_mv_in(&x->mv_limits, &mv)) {
2433 0 : unsigned int sad =
2434 0 : fn_ptr->sdf(what->buf, what->stride,
2435 : get_buf_from_mv(in_what, &mv), in_what->stride);
2436 0 : if (sad < best_sad) {
2437 0 : sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
2438 0 : if (sad < best_sad) {
2439 0 : best_sad = sad;
2440 0 : best_site = j;
2441 : }
2442 : }
2443 : }
2444 : }
2445 : }
2446 :
2447 0 : if (best_site == -1) {
2448 0 : break;
2449 : } else {
2450 0 : x->second_best_mv.as_mv = *ref_mv;
2451 0 : ref_mv->row += neighbors[best_site].row;
2452 0 : ref_mv->col += neighbors[best_site].col;
2453 0 : best_address = get_buf_from_mv(in_what, ref_mv);
2454 : }
2455 : }
2456 :
2457 0 : return best_sad;
2458 : }
2459 :
2460 : // This function is called when we do joint motion search in comp_inter_inter
2461 : // mode, or when searching for one component of an ext-inter compound mode.
2462 0 : int av1_refining_search_8p_c(MACROBLOCK *x, int error_per_bit, int search_range,
2463 : const aom_variance_fn_ptr_t *fn_ptr,
2464 : #if CONFIG_EXT_INTER
2465 : const uint8_t *mask, int mask_stride,
2466 : int invert_mask,
2467 : #endif
2468 : const MV *center_mv, const uint8_t *second_pred) {
2469 0 : const MV neighbors[8] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 },
2470 : { -1, -1 }, { 1, -1 }, { -1, 1 }, { 1, 1 } };
2471 0 : const MACROBLOCKD *const xd = &x->e_mbd;
2472 0 : const struct buf_2d *const what = &x->plane[0].src;
2473 0 : const struct buf_2d *const in_what = &xd->plane[0].pre[0];
2474 0 : const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
2475 0 : MV *best_mv = &x->best_mv.as_mv;
2476 0 : unsigned int best_sad = INT_MAX;
2477 : int i, j;
2478 :
2479 0 : clamp_mv(best_mv, x->mv_limits.col_min, x->mv_limits.col_max,
2480 : x->mv_limits.row_min, x->mv_limits.row_max);
2481 : #if CONFIG_EXT_INTER
2482 0 : if (mask)
2483 0 : best_sad = fn_ptr->msdf(what->buf, what->stride,
2484 : get_buf_from_mv(in_what, best_mv), in_what->stride,
2485 : second_pred, mask, mask_stride, invert_mask) +
2486 0 : mvsad_err_cost(x, best_mv, &fcenter_mv, error_per_bit);
2487 : else
2488 : #endif
2489 0 : best_sad =
2490 0 : fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv),
2491 : in_what->stride, second_pred) +
2492 0 : mvsad_err_cost(x, best_mv, &fcenter_mv, error_per_bit);
2493 :
2494 0 : for (i = 0; i < search_range; ++i) {
2495 0 : int best_site = -1;
2496 :
2497 0 : for (j = 0; j < 8; ++j) {
2498 0 : const MV mv = { best_mv->row + neighbors[j].row,
2499 0 : best_mv->col + neighbors[j].col };
2500 :
2501 0 : if (is_mv_in(&x->mv_limits, &mv)) {
2502 : unsigned int sad;
2503 : #if CONFIG_EXT_INTER
2504 0 : if (mask)
2505 0 : sad = fn_ptr->msdf(what->buf, what->stride,
2506 : get_buf_from_mv(in_what, &mv), in_what->stride,
2507 : second_pred, mask, mask_stride, invert_mask);
2508 : else
2509 : #endif
2510 0 : sad = fn_ptr->sdaf(what->buf, what->stride,
2511 : get_buf_from_mv(in_what, &mv), in_what->stride,
2512 : second_pred);
2513 0 : if (sad < best_sad) {
2514 0 : sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
2515 0 : if (sad < best_sad) {
2516 0 : best_sad = sad;
2517 0 : best_site = j;
2518 : }
2519 : }
2520 : }
2521 : }
2522 :
2523 0 : if (best_site == -1) {
2524 0 : break;
2525 : } else {
2526 0 : best_mv->row += neighbors[best_site].row;
2527 0 : best_mv->col += neighbors[best_site].col;
2528 : }
2529 : }
2530 0 : return best_sad;
2531 : }
2532 :
2533 : #define MIN_EX_SEARCH_LIMIT 128
2534 0 : static int is_exhaustive_allowed(const AV1_COMP *const cpi, MACROBLOCK *x) {
2535 0 : const SPEED_FEATURES *const sf = &cpi->sf;
2536 0 : const int max_ex =
2537 0 : AOMMAX(MIN_EX_SEARCH_LIMIT,
2538 : (*x->m_search_count_ptr * sf->max_exaustive_pct) / 100);
2539 :
2540 0 : return sf->allow_exhaustive_searches &&
2541 0 : (sf->exhaustive_searches_thresh < INT_MAX) &&
2542 0 : (*x->ex_search_count_ptr <= max_ex) && !cpi->rc.is_src_frame_alt_ref;
2543 : }
2544 :
2545 0 : int av1_full_pixel_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
2546 : MV *mvp_full, int step_param, int error_per_bit,
2547 : int *cost_list, const MV *ref_mv, int var_max,
2548 : int rd) {
2549 0 : const SPEED_FEATURES *const sf = &cpi->sf;
2550 0 : const SEARCH_METHODS method = sf->mv.search_method;
2551 0 : const aom_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize];
2552 0 : int var = 0;
2553 :
2554 0 : if (cost_list) {
2555 0 : cost_list[0] = INT_MAX;
2556 0 : cost_list[1] = INT_MAX;
2557 0 : cost_list[2] = INT_MAX;
2558 0 : cost_list[3] = INT_MAX;
2559 0 : cost_list[4] = INT_MAX;
2560 : }
2561 :
2562 : // Keep track of number of searches (this frame in this thread).
2563 0 : ++(*x->m_search_count_ptr);
2564 :
2565 0 : switch (method) {
2566 : case FAST_DIAMOND:
2567 0 : var = fast_dia_search(x, mvp_full, step_param, error_per_bit, 0,
2568 : cost_list, fn_ptr, 1, ref_mv);
2569 0 : break;
2570 : case FAST_HEX:
2571 0 : var = fast_hex_search(x, mvp_full, step_param, error_per_bit, 0,
2572 : cost_list, fn_ptr, 1, ref_mv);
2573 0 : break;
2574 : case HEX:
2575 0 : var = av1_hex_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
2576 : fn_ptr, 1, ref_mv);
2577 0 : break;
2578 : case SQUARE:
2579 0 : var = square_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
2580 : fn_ptr, 1, ref_mv);
2581 0 : break;
2582 : case BIGDIA:
2583 0 : var = bigdia_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
2584 : fn_ptr, 1, ref_mv);
2585 0 : break;
2586 : case NSTEP:
2587 0 : var = full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit,
2588 : MAX_MVSEARCH_STEPS - 1 - step_param, 1,
2589 : cost_list, fn_ptr, ref_mv);
2590 :
2591 : // Should we allow a follow on exhaustive search?
2592 0 : if (is_exhaustive_allowed(cpi, x)) {
2593 0 : int exhuastive_thr = sf->exhaustive_searches_thresh;
2594 0 : exhuastive_thr >>=
2595 0 : 10 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
2596 :
2597 : // Threshold variance for an exhaustive full search.
2598 0 : if (var > exhuastive_thr) {
2599 : int var_ex;
2600 : MV tmp_mv_ex;
2601 0 : var_ex =
2602 0 : full_pixel_exhaustive(cpi, x, &x->best_mv.as_mv, error_per_bit,
2603 : cost_list, fn_ptr, ref_mv, &tmp_mv_ex);
2604 :
2605 0 : if (var_ex < var) {
2606 0 : var = var_ex;
2607 0 : x->best_mv.as_mv = tmp_mv_ex;
2608 : }
2609 : }
2610 : }
2611 0 : break;
2612 :
2613 : break;
2614 0 : default: assert(0 && "Invalid search method.");
2615 : }
2616 :
2617 0 : if (method != NSTEP && rd && var < var_max)
2618 0 : var = av1_get_mvpred_var(x, &x->best_mv.as_mv, ref_mv, fn_ptr, 1);
2619 :
2620 0 : return var;
2621 : }
2622 :
2623 : #if CONFIG_MOTION_VAR
2624 : /* returns subpixel variance error function */
2625 : #define DIST(r, c) \
2626 : vfp->osvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, mask, &sse)
2627 :
2628 : /* checks if (r, c) has better score than previous best */
2629 : #define MVC(r, c) \
2630 : (unsigned int)(mvcost \
2631 : ? ((mvjcost[((r) != rr) * 2 + ((c) != rc)] + \
2632 : mvcost[0][((r)-rr)] + (int64_t)mvcost[1][((c)-rc)]) * \
2633 : error_per_bit + \
2634 : 4096) >> \
2635 : 13 \
2636 : : 0)
2637 :
2638 : #define CHECK_BETTER(v, r, c) \
2639 : if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
2640 : thismse = (DIST(r, c)); \
2641 : if ((v = MVC(r, c) + thismse) < besterr) { \
2642 : besterr = v; \
2643 : br = r; \
2644 : bc = c; \
2645 : *distortion = thismse; \
2646 : *sse1 = sse; \
2647 : } \
2648 : } else { \
2649 : v = INT_MAX; \
2650 : }
2651 :
2652 : #undef CHECK_BETTER0
2653 : #define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c)
2654 :
2655 : #undef CHECK_BETTER1
2656 : #define CHECK_BETTER1(v, r, c) \
2657 : if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
2658 : thismse = upsampled_obmc_pref_error( \
2659 : xd, mask, vfp, z, upre(y, y_stride, r, c), y_stride, w, h, &sse); \
2660 : if ((v = MVC(r, c) + thismse) < besterr) { \
2661 : besterr = v; \
2662 : br = r; \
2663 : bc = c; \
2664 : *distortion = thismse; \
2665 : *sse1 = sse; \
2666 : } \
2667 : } else { \
2668 : v = INT_MAX; \
2669 : }
2670 :
2671 0 : static unsigned int setup_obmc_center_error(
2672 : const int32_t *mask, const MV *bestmv, const MV *ref_mv, int error_per_bit,
2673 : const aom_variance_fn_ptr_t *vfp, const int32_t *const wsrc,
2674 : const uint8_t *const y, int y_stride, int offset, int *mvjcost,
2675 : int *mvcost[2], unsigned int *sse1, int *distortion) {
2676 : unsigned int besterr;
2677 0 : besterr = vfp->ovf(y + offset, y_stride, wsrc, mask, sse1);
2678 0 : *distortion = besterr;
2679 0 : besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
2680 0 : return besterr;
2681 : }
2682 :
2683 0 : static int upsampled_obmc_pref_error(const MACROBLOCKD *xd, const int32_t *mask,
2684 : const aom_variance_fn_ptr_t *vfp,
2685 : const int32_t *const wsrc,
2686 : const uint8_t *const y, int y_stride,
2687 : int w, int h, unsigned int *sse) {
2688 : unsigned int besterr;
2689 : #if CONFIG_HIGHBITDEPTH
2690 0 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
2691 : DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]);
2692 0 : aom_highbd_upsampled_pred(pred16, w, h, y, y_stride);
2693 :
2694 0 : besterr = vfp->ovf(CONVERT_TO_BYTEPTR(pred16), w, wsrc, mask, sse);
2695 : } else {
2696 : DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
2697 : #else
2698 : DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
2699 : (void)xd;
2700 : #endif // CONFIG_HIGHBITDEPTH
2701 0 : aom_upsampled_pred(pred, w, h, y, y_stride);
2702 :
2703 0 : besterr = vfp->ovf(pred, w, wsrc, mask, sse);
2704 : #if CONFIG_HIGHBITDEPTH
2705 : }
2706 : #endif
2707 0 : return besterr;
2708 : }
2709 :
2710 0 : static unsigned int upsampled_setup_obmc_center_error(
2711 : const MACROBLOCKD *xd, const int32_t *mask, const MV *bestmv,
2712 : const MV *ref_mv, int error_per_bit, const aom_variance_fn_ptr_t *vfp,
2713 : const int32_t *const wsrc, const uint8_t *const y, int y_stride, int w,
2714 : int h, int offset, int *mvjcost, int *mvcost[2], unsigned int *sse1,
2715 : int *distortion) {
2716 0 : unsigned int besterr = upsampled_obmc_pref_error(
2717 : xd, mask, vfp, wsrc, y + offset, y_stride, w, h, sse1);
2718 0 : *distortion = besterr;
2719 0 : besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
2720 0 : return besterr;
2721 : }
2722 :
2723 0 : int av1_find_best_obmc_sub_pixel_tree_up(
2724 : const AV1_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col, MV *bestmv,
2725 : const MV *ref_mv, int allow_hp, int error_per_bit,
2726 : const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
2727 : int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1,
2728 : int is_second, int use_upsampled_ref) {
2729 0 : const int32_t *wsrc = x->wsrc_buf;
2730 0 : const int32_t *mask = x->mask_buf;
2731 0 : const int *const z = wsrc;
2732 0 : const int *const src_address = z;
2733 0 : MACROBLOCKD *xd = &x->e_mbd;
2734 0 : struct macroblockd_plane *const pd = &xd->plane[0];
2735 0 : MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
2736 0 : unsigned int besterr = INT_MAX;
2737 : unsigned int sse;
2738 : unsigned int thismse;
2739 :
2740 0 : int rr = ref_mv->row;
2741 0 : int rc = ref_mv->col;
2742 0 : int br = bestmv->row * 8;
2743 0 : int bc = bestmv->col * 8;
2744 0 : int hstep = 4;
2745 : int iter;
2746 0 : int round = 3 - forced_stop;
2747 0 : int tr = br;
2748 0 : int tc = bc;
2749 0 : const MV *search_step = search_step_table;
2750 0 : int idx, best_idx = -1;
2751 : unsigned int cost_array[5];
2752 : int kr, kc;
2753 0 : const int w = block_size_wide[mbmi->sb_type];
2754 0 : const int h = block_size_high[mbmi->sb_type];
2755 : int offset;
2756 : int y_stride;
2757 : const uint8_t *y;
2758 :
2759 0 : const struct buf_2d backup_pred = pd->pre[is_second];
2760 : int minc, maxc, minr, maxr;
2761 :
2762 0 : av1_set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr,
2763 : ref_mv);
2764 :
2765 0 : if (use_upsampled_ref) {
2766 0 : int ref = xd->mi[0]->mbmi.ref_frame[is_second];
2767 0 : const YV12_BUFFER_CONFIG *upsampled_ref = get_upsampled_ref(cpi, ref);
2768 0 : setup_pred_plane(&pd->pre[is_second], mbmi->sb_type,
2769 : upsampled_ref->y_buffer, upsampled_ref->y_crop_width,
2770 : upsampled_ref->y_crop_height, upsampled_ref->y_stride,
2771 : (mi_row << 3), (mi_col << 3), NULL, pd->subsampling_x,
2772 : pd->subsampling_y);
2773 : }
2774 0 : y = pd->pre[is_second].buf;
2775 0 : y_stride = pd->pre[is_second].stride;
2776 0 : offset = bestmv->row * y_stride + bestmv->col;
2777 :
2778 0 : if (!allow_hp)
2779 0 : if (round == 3) round = 2;
2780 :
2781 0 : bestmv->row *= 8;
2782 0 : bestmv->col *= 8;
2783 : // use_upsampled_ref can be 0 or 1
2784 0 : if (use_upsampled_ref)
2785 0 : besterr = upsampled_setup_obmc_center_error(
2786 : xd, mask, bestmv, ref_mv, error_per_bit, vfp, z, y, y_stride, w, h,
2787 : (offset * 8), mvjcost, mvcost, sse1, distortion);
2788 : else
2789 0 : besterr = setup_obmc_center_error(mask, bestmv, ref_mv, error_per_bit, vfp,
2790 : z, y, y_stride, offset, mvjcost, mvcost,
2791 : sse1, distortion);
2792 :
2793 0 : for (iter = 0; iter < round; ++iter) {
2794 : // Check vertical and horizontal sub-pixel positions.
2795 0 : for (idx = 0; idx < 4; ++idx) {
2796 0 : tr = br + search_step[idx].row;
2797 0 : tc = bc + search_step[idx].col;
2798 0 : if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
2799 0 : MV this_mv = { tr, tc };
2800 :
2801 0 : if (use_upsampled_ref) {
2802 0 : const uint8_t *const pre_address = y + tr * y_stride + tc;
2803 :
2804 0 : thismse = upsampled_obmc_pref_error(
2805 : xd, mask, vfp, src_address, pre_address, y_stride, w, h, &sse);
2806 : } else {
2807 0 : const uint8_t *const pre_address =
2808 0 : y + (tr >> 3) * y_stride + (tc >> 3);
2809 0 : thismse = vfp->osvf(pre_address, y_stride, sp(tc), sp(tr),
2810 : src_address, mask, &sse);
2811 : }
2812 :
2813 0 : cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost,
2814 : mvcost, error_per_bit);
2815 0 : if (cost_array[idx] < besterr) {
2816 0 : best_idx = idx;
2817 0 : besterr = cost_array[idx];
2818 0 : *distortion = thismse;
2819 0 : *sse1 = sse;
2820 : }
2821 : } else {
2822 0 : cost_array[idx] = INT_MAX;
2823 : }
2824 : }
2825 :
2826 : // Check diagonal sub-pixel position
2827 0 : kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep);
2828 0 : kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep);
2829 :
2830 0 : tc = bc + kc;
2831 0 : tr = br + kr;
2832 0 : if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
2833 0 : MV this_mv = { tr, tc };
2834 :
2835 0 : if (use_upsampled_ref) {
2836 0 : const uint8_t *const pre_address = y + tr * y_stride + tc;
2837 :
2838 0 : thismse = upsampled_obmc_pref_error(xd, mask, vfp, src_address,
2839 : pre_address, y_stride, w, h, &sse);
2840 : } else {
2841 0 : const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
2842 :
2843 0 : thismse = vfp->osvf(pre_address, y_stride, sp(tc), sp(tr), src_address,
2844 : mask, &sse);
2845 : }
2846 :
2847 0 : cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
2848 : error_per_bit);
2849 :
2850 0 : if (cost_array[4] < besterr) {
2851 0 : best_idx = 4;
2852 0 : besterr = cost_array[4];
2853 0 : *distortion = thismse;
2854 0 : *sse1 = sse;
2855 : }
2856 : } else {
2857 0 : cost_array[idx] = INT_MAX;
2858 : }
2859 :
2860 0 : if (best_idx < 4 && best_idx >= 0) {
2861 0 : br += search_step[best_idx].row;
2862 0 : bc += search_step[best_idx].col;
2863 0 : } else if (best_idx == 4) {
2864 0 : br = tr;
2865 0 : bc = tc;
2866 : }
2867 :
2868 0 : if (iters_per_step > 1 && best_idx != -1) {
2869 0 : if (use_upsampled_ref) {
2870 0 : SECOND_LEVEL_CHECKS_BEST(1);
2871 : } else {
2872 0 : SECOND_LEVEL_CHECKS_BEST(0);
2873 : }
2874 : }
2875 :
2876 0 : tr = br;
2877 0 : tc = bc;
2878 :
2879 0 : search_step += 4;
2880 0 : hstep >>= 1;
2881 0 : best_idx = -1;
2882 : }
2883 :
2884 : // These lines insure static analysis doesn't warn that
2885 : // tr and tc aren't used after the above point.
2886 : (void)tr;
2887 : (void)tc;
2888 :
2889 0 : bestmv->row = br;
2890 0 : bestmv->col = bc;
2891 :
2892 0 : if (use_upsampled_ref) {
2893 0 : pd->pre[is_second] = backup_pred;
2894 : }
2895 :
2896 0 : return besterr;
2897 : }
2898 :
2899 : #undef DIST
2900 : #undef MVC
2901 : #undef CHECK_BETTER
2902 :
2903 0 : static int get_obmc_mvpred_var(const MACROBLOCK *x, const int32_t *wsrc,
2904 : const int32_t *mask, const MV *best_mv,
2905 : const MV *center_mv,
2906 : const aom_variance_fn_ptr_t *vfp, int use_mvcost,
2907 : int is_second) {
2908 0 : const MACROBLOCKD *const xd = &x->e_mbd;
2909 0 : const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
2910 0 : const MV mv = { best_mv->row * 8, best_mv->col * 8 };
2911 : unsigned int unused;
2912 :
2913 0 : return vfp->ovf(get_buf_from_mv(in_what, best_mv), in_what->stride, wsrc,
2914 0 : mask, &unused) +
2915 0 : (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
2916 : x->errorperbit)
2917 0 : : 0);
2918 : }
2919 :
2920 0 : int obmc_refining_search_sad(const MACROBLOCK *x, const int32_t *wsrc,
2921 : const int32_t *mask, MV *ref_mv, int error_per_bit,
2922 : int search_range,
2923 : const aom_variance_fn_ptr_t *fn_ptr,
2924 : const MV *center_mv, int is_second) {
2925 0 : const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
2926 0 : const MACROBLOCKD *const xd = &x->e_mbd;
2927 0 : const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
2928 0 : const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
2929 0 : unsigned int best_sad = fn_ptr->osdf(get_buf_from_mv(in_what, ref_mv),
2930 : in_what->stride, wsrc, mask) +
2931 0 : mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
2932 : int i, j;
2933 :
2934 0 : for (i = 0; i < search_range; i++) {
2935 0 : int best_site = -1;
2936 :
2937 0 : for (j = 0; j < 4; j++) {
2938 0 : const MV mv = { ref_mv->row + neighbors[j].row,
2939 0 : ref_mv->col + neighbors[j].col };
2940 0 : if (is_mv_in(&x->mv_limits, &mv)) {
2941 0 : unsigned int sad = fn_ptr->osdf(get_buf_from_mv(in_what, &mv),
2942 : in_what->stride, wsrc, mask);
2943 0 : if (sad < best_sad) {
2944 0 : sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
2945 0 : if (sad < best_sad) {
2946 0 : best_sad = sad;
2947 0 : best_site = j;
2948 : }
2949 : }
2950 : }
2951 : }
2952 :
2953 0 : if (best_site == -1) {
2954 0 : break;
2955 : } else {
2956 0 : ref_mv->row += neighbors[best_site].row;
2957 0 : ref_mv->col += neighbors[best_site].col;
2958 : }
2959 : }
2960 0 : return best_sad;
2961 : }
2962 :
2963 0 : int obmc_diamond_search_sad(const MACROBLOCK *x, const search_site_config *cfg,
2964 : const int32_t *wsrc, const int32_t *mask,
2965 : MV *ref_mv, MV *best_mv, int search_param,
2966 : int sad_per_bit, int *num00,
2967 : const aom_variance_fn_ptr_t *fn_ptr,
2968 : const MV *center_mv, int is_second) {
2969 0 : const MACROBLOCKD *const xd = &x->e_mbd;
2970 0 : const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
2971 : // search_param determines the length of the initial step and hence the number
2972 : // of iterations
2973 : // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 =
2974 : // (MAX_FIRST_STEP/4) pel... etc.
2975 0 : const search_site *const ss = &cfg->ss[search_param * cfg->searches_per_step];
2976 0 : const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
2977 0 : const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
2978 : const uint8_t *best_address, *in_what_ref;
2979 0 : int best_sad = INT_MAX;
2980 0 : int best_site = 0;
2981 0 : int last_site = 0;
2982 : int i, j, step;
2983 :
2984 0 : clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
2985 : x->mv_limits.row_min, x->mv_limits.row_max);
2986 0 : in_what_ref = in_what->buf + ref_mv->row * in_what->stride + ref_mv->col;
2987 0 : best_address = in_what_ref;
2988 0 : *num00 = 0;
2989 0 : *best_mv = *ref_mv;
2990 :
2991 : // Check the starting position
2992 0 : best_sad = fn_ptr->osdf(best_address, in_what->stride, wsrc, mask) +
2993 0 : mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
2994 :
2995 0 : i = 1;
2996 :
2997 0 : for (step = 0; step < tot_steps; step++) {
2998 0 : for (j = 0; j < cfg->searches_per_step; j++) {
2999 0 : const MV mv = { best_mv->row + ss[i].mv.row,
3000 0 : best_mv->col + ss[i].mv.col };
3001 0 : if (is_mv_in(&x->mv_limits, &mv)) {
3002 0 : int sad = fn_ptr->osdf(best_address + ss[i].offset, in_what->stride,
3003 : wsrc, mask);
3004 0 : if (sad < best_sad) {
3005 0 : sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
3006 0 : if (sad < best_sad) {
3007 0 : best_sad = sad;
3008 0 : best_site = i;
3009 : }
3010 : }
3011 : }
3012 :
3013 0 : i++;
3014 : }
3015 :
3016 0 : if (best_site != last_site) {
3017 0 : best_mv->row += ss[best_site].mv.row;
3018 0 : best_mv->col += ss[best_site].mv.col;
3019 0 : best_address += ss[best_site].offset;
3020 0 : last_site = best_site;
3021 : #if defined(NEW_DIAMOND_SEARCH)
3022 : while (1) {
3023 : const MV this_mv = { best_mv->row + ss[best_site].mv.row,
3024 : best_mv->col + ss[best_site].mv.col };
3025 : if (is_mv_in(&x->mv_limits, &this_mv)) {
3026 : int sad = fn_ptr->osdf(best_address + ss[best_site].offset,
3027 : in_what->stride, wsrc, mask);
3028 : if (sad < best_sad) {
3029 : sad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
3030 : if (sad < best_sad) {
3031 : best_sad = sad;
3032 : best_mv->row += ss[best_site].mv.row;
3033 : best_mv->col += ss[best_site].mv.col;
3034 : best_address += ss[best_site].offset;
3035 : continue;
3036 : }
3037 : }
3038 : }
3039 : break;
3040 : }
3041 : #endif
3042 0 : } else if (best_address == in_what_ref) {
3043 0 : (*num00)++;
3044 : }
3045 : }
3046 0 : return best_sad;
3047 : }
3048 :
3049 0 : int av1_obmc_full_pixel_diamond(const AV1_COMP *cpi, MACROBLOCK *x,
3050 : MV *mvp_full, int step_param, int sadpb,
3051 : int further_steps, int do_refine,
3052 : const aom_variance_fn_ptr_t *fn_ptr,
3053 : const MV *ref_mv, MV *dst_mv, int is_second) {
3054 0 : const int32_t *wsrc = x->wsrc_buf;
3055 0 : const int32_t *mask = x->mask_buf;
3056 : MV temp_mv;
3057 0 : int thissme, n, num00 = 0;
3058 0 : int bestsme =
3059 0 : obmc_diamond_search_sad(x, &cpi->ss_cfg, wsrc, mask, mvp_full, &temp_mv,
3060 : step_param, sadpb, &n, fn_ptr, ref_mv, is_second);
3061 0 : if (bestsme < INT_MAX)
3062 0 : bestsme = get_obmc_mvpred_var(x, wsrc, mask, &temp_mv, ref_mv, fn_ptr, 1,
3063 : is_second);
3064 0 : *dst_mv = temp_mv;
3065 :
3066 : // If there won't be more n-step search, check to see if refining search is
3067 : // needed.
3068 0 : if (n > further_steps) do_refine = 0;
3069 :
3070 0 : while (n < further_steps) {
3071 0 : ++n;
3072 :
3073 0 : if (num00) {
3074 0 : num00--;
3075 : } else {
3076 0 : thissme = obmc_diamond_search_sad(x, &cpi->ss_cfg, wsrc, mask, mvp_full,
3077 : &temp_mv, step_param + n, sadpb, &num00,
3078 : fn_ptr, ref_mv, is_second);
3079 0 : if (thissme < INT_MAX)
3080 0 : thissme = get_obmc_mvpred_var(x, wsrc, mask, &temp_mv, ref_mv, fn_ptr,
3081 : 1, is_second);
3082 :
3083 : // check to see if refining search is needed.
3084 0 : if (num00 > further_steps - n) do_refine = 0;
3085 :
3086 0 : if (thissme < bestsme) {
3087 0 : bestsme = thissme;
3088 0 : *dst_mv = temp_mv;
3089 : }
3090 : }
3091 : }
3092 :
3093 : // final 1-away diamond refining search
3094 0 : if (do_refine) {
3095 0 : const int search_range = 8;
3096 0 : MV best_mv = *dst_mv;
3097 0 : thissme = obmc_refining_search_sad(x, wsrc, mask, &best_mv, sadpb,
3098 : search_range, fn_ptr, ref_mv, is_second);
3099 0 : if (thissme < INT_MAX)
3100 0 : thissme = get_obmc_mvpred_var(x, wsrc, mask, &best_mv, ref_mv, fn_ptr, 1,
3101 : is_second);
3102 0 : if (thissme < bestsme) {
3103 0 : bestsme = thissme;
3104 0 : *dst_mv = best_mv;
3105 : }
3106 : }
3107 0 : return bestsme;
3108 : }
3109 : #endif // CONFIG_MOTION_VAR
3110 :
3111 : // Note(yunqingwang): The following 2 functions are only used in the motion
3112 : // vector unit test, which return extreme motion vectors allowed by the MV
3113 : // limits.
3114 : #define COMMON_MV_TEST \
3115 : SETUP_SUBPEL_SEARCH; \
3116 : \
3117 : (void)error_per_bit; \
3118 : (void)vfp; \
3119 : (void)src_address; \
3120 : (void)src_stride; \
3121 : (void)y; \
3122 : (void)y_stride; \
3123 : (void)second_pred; \
3124 : (void)w; \
3125 : (void)h; \
3126 : (void)use_upsampled_ref; \
3127 : (void)offset; \
3128 : (void)mvjcost; \
3129 : (void)mvcost; \
3130 : (void)sse1; \
3131 : (void)distortion; \
3132 : \
3133 : (void)halfiters; \
3134 : (void)quarteriters; \
3135 : (void)eighthiters; \
3136 : (void)whichdir; \
3137 : (void)forced_stop; \
3138 : (void)hstep; \
3139 : \
3140 : (void)tr; \
3141 : (void)tc; \
3142 : (void)sse; \
3143 : (void)thismse; \
3144 : (void)cost_list;
3145 : // Return the maximum MV.
3146 0 : int av1_return_max_sub_pixel_mv(
3147 : MACROBLOCK *x, const MV *ref_mv, int allow_hp, int error_per_bit,
3148 : const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
3149 : int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
3150 : unsigned int *sse1, const uint8_t *second_pred,
3151 : #if CONFIG_EXT_INTER
3152 : const uint8_t *mask, int mask_stride, int invert_mask,
3153 : #endif
3154 : int w, int h, int use_upsampled_ref) {
3155 0 : COMMON_MV_TEST;
3156 : #if CONFIG_EXT_INTER
3157 : (void)mask;
3158 : (void)mask_stride;
3159 : (void)invert_mask;
3160 : #endif
3161 : (void)minr;
3162 : (void)minc;
3163 0 : bestmv->row = maxr;
3164 0 : bestmv->col = maxc;
3165 0 : besterr = 0;
3166 : // In the sub-pel motion search, if hp is not used, then the last bit of mv
3167 : // has to be 0.
3168 0 : lower_mv_precision(bestmv, allow_hp);
3169 0 : return besterr;
3170 : }
3171 : // Return the minimum MV.
3172 0 : int av1_return_min_sub_pixel_mv(
3173 : MACROBLOCK *x, const MV *ref_mv, int allow_hp, int error_per_bit,
3174 : const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
3175 : int *cost_list, int *mvjcost, int *mvcost[2], int *distortion,
3176 : unsigned int *sse1, const uint8_t *second_pred,
3177 : #if CONFIG_EXT_INTER
3178 : const uint8_t *mask, int mask_stride, int invert_mask,
3179 : #endif
3180 : int w, int h, int use_upsampled_ref) {
3181 0 : COMMON_MV_TEST;
3182 : (void)maxr;
3183 : (void)maxc;
3184 : #if CONFIG_EXT_INTER
3185 : (void)mask;
3186 : (void)mask_stride;
3187 : (void)invert_mask;
3188 : #endif
3189 0 : bestmv->row = minr;
3190 0 : bestmv->col = minc;
3191 0 : besterr = 0;
3192 : // In the sub-pel motion search, if hp is not used, then the last bit of mv
3193 : // has to be 0.
3194 0 : lower_mv_precision(bestmv, allow_hp);
3195 0 : return besterr;
3196 : }
|