Line data Source code
1 : /*
2 : * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 : *
4 : * Use of this source code is governed by a BSD-style license
5 : * that can be found in the LICENSE file in the root of the source
6 : * tree. An additional intellectual property rights grant can be found
7 : * in the file PATENTS. All contributing project authors may
8 : * be found in the AUTHORS file in the root of the source tree.
9 : */
10 :
11 : #include <assert.h>
12 : #include <limits.h>
13 : #include <math.h>
14 : #include <stdio.h>
15 :
16 : #include "./vpx_config.h"
17 : #include "./vpx_dsp_rtcd.h"
18 :
19 : #include "vpx_dsp/vpx_dsp_common.h"
20 : #include "vpx_mem/vpx_mem.h"
21 : #include "vpx_ports/mem.h"
22 :
23 : #include "vp9/common/vp9_common.h"
24 : #include "vp9/common/vp9_reconinter.h"
25 :
26 : #include "vp9/encoder/vp9_encoder.h"
27 : #include "vp9/encoder/vp9_mcomp.h"
28 :
29 : // #define NEW_DIAMOND_SEARCH
30 :
31 0 : static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf,
32 : const MV *mv) {
33 0 : return &buf->buf[mv->row * buf->stride + mv->col];
34 : }
35 :
36 0 : void vp9_set_mv_search_range(MvLimits *mv_limits, const MV *mv) {
37 0 : int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0);
38 0 : int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0);
39 0 : int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL;
40 0 : int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL;
41 :
42 0 : col_min = VPXMAX(col_min, (MV_LOW >> 3) + 1);
43 0 : row_min = VPXMAX(row_min, (MV_LOW >> 3) + 1);
44 0 : col_max = VPXMIN(col_max, (MV_UPP >> 3) - 1);
45 0 : row_max = VPXMIN(row_max, (MV_UPP >> 3) - 1);
46 :
47 : // Get intersection of UMV window and valid MV window to reduce # of checks
48 : // in diamond search.
49 0 : if (mv_limits->col_min < col_min) mv_limits->col_min = col_min;
50 0 : if (mv_limits->col_max > col_max) mv_limits->col_max = col_max;
51 0 : if (mv_limits->row_min < row_min) mv_limits->row_min = row_min;
52 0 : if (mv_limits->row_max > row_max) mv_limits->row_max = row_max;
53 0 : }
54 :
55 0 : int vp9_init_search_range(int size) {
56 0 : int sr = 0;
57 : // Minimum search size no matter what the passed in value.
58 0 : size = VPXMAX(16, size);
59 :
60 0 : while ((size << sr) < MAX_FULL_PEL_VAL) sr++;
61 :
62 0 : sr = VPXMIN(sr, MAX_MVSEARCH_STEPS - 2);
63 0 : return sr;
64 : }
65 :
66 0 : static INLINE int mv_cost(const MV *mv, const int *joint_cost,
67 : int *const comp_cost[2]) {
68 0 : assert(mv->row >= -MV_MAX && mv->row < MV_MAX);
69 0 : assert(mv->col >= -MV_MAX && mv->col < MV_MAX);
70 0 : return joint_cost[vp9_get_mv_joint(mv)] + comp_cost[0][mv->row] +
71 0 : comp_cost[1][mv->col];
72 : }
73 :
74 0 : int vp9_mv_bit_cost(const MV *mv, const MV *ref, const int *mvjcost,
75 : int *mvcost[2], int weight) {
76 0 : const MV diff = { mv->row - ref->row, mv->col - ref->col };
77 0 : return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7);
78 : }
79 :
80 : #define PIXEL_TRANSFORM_ERROR_SCALE 4
81 0 : static int mv_err_cost(const MV *mv, const MV *ref, const int *mvjcost,
82 : int *mvcost[2], int error_per_bit) {
83 0 : if (mvcost) {
84 0 : const MV diff = { mv->row - ref->row, mv->col - ref->col };
85 : // This product sits at a 32-bit ceiling right now and any additional
86 : // accuracy in either bit cost or error cost will cause it to overflow.
87 0 : return ROUND_POWER_OF_TWO(
88 : (unsigned)mv_cost(&diff, mvjcost, mvcost) * error_per_bit,
89 : RDDIV_BITS + VP9_PROB_COST_SHIFT - RD_EPB_SHIFT +
90 : PIXEL_TRANSFORM_ERROR_SCALE);
91 : }
92 0 : return 0;
93 : }
94 :
95 0 : static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,
96 : int sad_per_bit) {
97 0 : const MV diff = { mv->row - ref->row, mv->col - ref->col };
98 0 : return ROUND_POWER_OF_TWO(
99 : (unsigned)mv_cost(&diff, x->nmvjointsadcost, x->nmvsadcost) * sad_per_bit,
100 : VP9_PROB_COST_SHIFT);
101 : }
102 :
103 0 : void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) {
104 : int len;
105 0 : int ss_count = 0;
106 :
107 0 : for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
108 : // Generate offsets for 4 search sites per step.
109 0 : const MV ss_mvs[] = { { -len, 0 }, { len, 0 }, { 0, -len }, { 0, len } };
110 : int i;
111 0 : for (i = 0; i < 4; ++i, ++ss_count) {
112 0 : cfg->ss_mv[ss_count] = ss_mvs[i];
113 0 : cfg->ss_os[ss_count] = ss_mvs[i].row * stride + ss_mvs[i].col;
114 : }
115 : }
116 :
117 0 : cfg->searches_per_step = 4;
118 0 : cfg->total_steps = ss_count / cfg->searches_per_step;
119 0 : }
120 :
121 0 : void vp9_init3smotion_compensation(search_site_config *cfg, int stride) {
122 : int len;
123 0 : int ss_count = 0;
124 :
125 0 : for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
126 : // Generate offsets for 8 search sites per step.
127 0 : const MV ss_mvs[8] = { { -len, 0 }, { len, 0 }, { 0, -len },
128 : { 0, len }, { -len, -len }, { -len, len },
129 : { len, -len }, { len, len } };
130 : int i;
131 0 : for (i = 0; i < 8; ++i, ++ss_count) {
132 0 : cfg->ss_mv[ss_count] = ss_mvs[i];
133 0 : cfg->ss_os[ss_count] = ss_mvs[i].row * stride + ss_mvs[i].col;
134 : }
135 : }
136 :
137 0 : cfg->searches_per_step = 8;
138 0 : cfg->total_steps = ss_count / cfg->searches_per_step;
139 0 : }
140 :
141 : /* Estimated (square) error cost of a motion vector (r,c). The 14 scale comes
142 : * from the same math as in mv_err_cost(). */
143 : #define MVC(r, c) \
144 : (mvcost \
145 : ? ((unsigned)(mvjcost[((r) != rr) * 2 + ((c) != rc)] + \
146 : mvcost[0][((r)-rr)] + mvcost[1][((c)-rc)]) * \
147 : error_per_bit + \
148 : 8192) >> \
149 : 14 \
150 : : 0)
151 :
152 : // convert motion vector component to offset for sv[a]f calc
153 0 : static INLINE int sp(int x) { return x & 7; }
154 :
155 0 : static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
156 0 : return &buf[(r >> 3) * stride + (c >> 3)];
157 : }
158 :
159 : #if CONFIG_VP9_HIGHBITDEPTH
160 : /* checks if (r, c) has better score than previous best */
161 : #define CHECK_BETTER(v, r, c) \
162 : if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
163 : int64_t tmpmse; \
164 : if (second_pred == NULL) { \
165 : thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
166 : src_stride, &sse); \
167 : } else { \
168 : thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
169 : src_stride, &sse, second_pred); \
170 : } \
171 : tmpmse = thismse; \
172 : tmpmse += MVC(r, c); \
173 : if (tmpmse >= INT_MAX) { \
174 : v = INT_MAX; \
175 : } else if ((v = (uint32_t)tmpmse) < besterr) { \
176 : besterr = v; \
177 : br = r; \
178 : bc = c; \
179 : *distortion = thismse; \
180 : *sse1 = sse; \
181 : } \
182 : } else { \
183 : v = INT_MAX; \
184 : }
185 : #else
186 : /* checks if (r, c) has better score than previous best */
187 : #define CHECK_BETTER(v, r, c) \
188 : if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
189 : if (second_pred == NULL) \
190 : thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
191 : src_stride, &sse); \
192 : else \
193 : thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
194 : src_stride, &sse, second_pred); \
195 : if ((v = MVC(r, c) + thismse) < besterr) { \
196 : besterr = v; \
197 : br = r; \
198 : bc = c; \
199 : *distortion = thismse; \
200 : *sse1 = sse; \
201 : } \
202 : } else { \
203 : v = INT_MAX; \
204 : }
205 :
206 : #endif
207 : #define FIRST_LEVEL_CHECKS \
208 : { \
209 : unsigned int left, right, up, down, diag; \
210 : CHECK_BETTER(left, tr, tc - hstep); \
211 : CHECK_BETTER(right, tr, tc + hstep); \
212 : CHECK_BETTER(up, tr - hstep, tc); \
213 : CHECK_BETTER(down, tr + hstep, tc); \
214 : whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); \
215 : switch (whichdir) { \
216 : case 0: CHECK_BETTER(diag, tr - hstep, tc - hstep); break; \
217 : case 1: CHECK_BETTER(diag, tr - hstep, tc + hstep); break; \
218 : case 2: CHECK_BETTER(diag, tr + hstep, tc - hstep); break; \
219 : case 3: CHECK_BETTER(diag, tr + hstep, tc + hstep); break; \
220 : } \
221 : }
222 :
223 : #define SECOND_LEVEL_CHECKS \
224 : { \
225 : int kr, kc; \
226 : unsigned int second; \
227 : if (tr != br && tc != bc) { \
228 : kr = br - tr; \
229 : kc = bc - tc; \
230 : CHECK_BETTER(second, tr + kr, tc + 2 * kc); \
231 : CHECK_BETTER(second, tr + 2 * kr, tc + kc); \
232 : } else if (tr == br && tc != bc) { \
233 : kc = bc - tc; \
234 : CHECK_BETTER(second, tr + hstep, tc + 2 * kc); \
235 : CHECK_BETTER(second, tr - hstep, tc + 2 * kc); \
236 : switch (whichdir) { \
237 : case 0: \
238 : case 1: CHECK_BETTER(second, tr + hstep, tc + kc); break; \
239 : case 2: \
240 : case 3: CHECK_BETTER(second, tr - hstep, tc + kc); break; \
241 : } \
242 : } else if (tr != br && tc == bc) { \
243 : kr = br - tr; \
244 : CHECK_BETTER(second, tr + 2 * kr, tc + hstep); \
245 : CHECK_BETTER(second, tr + 2 * kr, tc - hstep); \
246 : switch (whichdir) { \
247 : case 0: \
248 : case 2: CHECK_BETTER(second, tr + kr, tc + hstep); break; \
249 : case 1: \
250 : case 3: CHECK_BETTER(second, tr + kr, tc - hstep); break; \
251 : } \
252 : } \
253 : }
254 :
255 : // TODO(yunqingwang): SECOND_LEVEL_CHECKS_BEST was a rewrote of
256 : // SECOND_LEVEL_CHECKS, and SECOND_LEVEL_CHECKS should be rewritten
257 : // later in the same way.
258 : #define SECOND_LEVEL_CHECKS_BEST \
259 : { \
260 : unsigned int second; \
261 : int br0 = br; \
262 : int bc0 = bc; \
263 : assert(tr == br || tc == bc); \
264 : if (tr == br && tc != bc) { \
265 : kc = bc - tc; \
266 : } else if (tr != br && tc == bc) { \
267 : kr = br - tr; \
268 : } \
269 : CHECK_BETTER(second, br0 + kr, bc0); \
270 : CHECK_BETTER(second, br0, bc0 + kc); \
271 : if (br0 != br || bc0 != bc) { \
272 : CHECK_BETTER(second, br0 + kr, bc0 + kc); \
273 : } \
274 : }
275 :
276 : #define SETUP_SUBPEL_SEARCH \
277 : const uint8_t *const z = x->plane[0].src.buf; \
278 : const int src_stride = x->plane[0].src.stride; \
279 : const MACROBLOCKD *xd = &x->e_mbd; \
280 : unsigned int besterr = UINT_MAX; \
281 : unsigned int sse; \
282 : unsigned int whichdir; \
283 : int thismse; \
284 : const unsigned int halfiters = iters_per_step; \
285 : const unsigned int quarteriters = iters_per_step; \
286 : const unsigned int eighthiters = iters_per_step; \
287 : const int y_stride = xd->plane[0].pre[0].stride; \
288 : const int offset = bestmv->row * y_stride + bestmv->col; \
289 : const uint8_t *const y = xd->plane[0].pre[0].buf; \
290 : \
291 : int rr = ref_mv->row; \
292 : int rc = ref_mv->col; \
293 : int br = bestmv->row * 8; \
294 : int bc = bestmv->col * 8; \
295 : int hstep = 4; \
296 : const int minc = VPXMAX(x->mv_limits.col_min * 8, ref_mv->col - MV_MAX); \
297 : const int maxc = VPXMIN(x->mv_limits.col_max * 8, ref_mv->col + MV_MAX); \
298 : const int minr = VPXMAX(x->mv_limits.row_min * 8, ref_mv->row - MV_MAX); \
299 : const int maxr = VPXMIN(x->mv_limits.row_max * 8, ref_mv->row + MV_MAX); \
300 : int tr = br; \
301 : int tc = bc; \
302 : \
303 : bestmv->row *= 8; \
304 : bestmv->col *= 8;
305 :
306 0 : static unsigned int setup_center_error(
307 : const MACROBLOCKD *xd, const MV *bestmv, const MV *ref_mv,
308 : int error_per_bit, const vp9_variance_fn_ptr_t *vfp,
309 : const uint8_t *const src, const int src_stride, const uint8_t *const y,
310 : int y_stride, const uint8_t *second_pred, int w, int h, int offset,
311 : int *mvjcost, int *mvcost[2], uint32_t *sse1, uint32_t *distortion) {
312 : #if CONFIG_VP9_HIGHBITDEPTH
313 : uint64_t besterr;
314 : if (second_pred != NULL) {
315 : if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
316 : DECLARE_ALIGNED(16, uint16_t, comp_pred16[64 * 64]);
317 : vpx_highbd_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset,
318 : y_stride);
319 : besterr =
320 : vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src, src_stride, sse1);
321 : } else {
322 : DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]);
323 : vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
324 : besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
325 : }
326 : } else {
327 : besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1);
328 : }
329 : *distortion = (uint32_t)besterr;
330 : besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
331 : if (besterr >= UINT_MAX) return UINT_MAX;
332 : return (uint32_t)besterr;
333 : #else
334 : uint32_t besterr;
335 : (void)xd;
336 0 : if (second_pred != NULL) {
337 : DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]);
338 0 : vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
339 0 : besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
340 : } else {
341 0 : besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1);
342 : }
343 0 : *distortion = besterr;
344 0 : besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
345 0 : return besterr;
346 : #endif // CONFIG_VP9_HIGHBITDEPTH
347 : }
348 :
349 0 : static INLINE int divide_and_round(const int n, const int d) {
350 0 : return ((n < 0) ^ (d < 0)) ? ((n - d / 2) / d) : ((n + d / 2) / d);
351 : }
352 :
353 0 : static INLINE int is_cost_list_wellbehaved(int *cost_list) {
354 0 : return cost_list[0] < cost_list[1] && cost_list[0] < cost_list[2] &&
355 0 : cost_list[0] < cost_list[3] && cost_list[0] < cost_list[4];
356 : }
357 :
358 : // Returns surface minima estimate at given precision in 1/2^n bits.
359 : // Assume a model for the cost surface: S = A(x - x0)^2 + B(y - y0)^2 + C
360 : // For a given set of costs S0, S1, S2, S3, S4 at points
361 : // (y, x) = (0, 0), (0, -1), (1, 0), (0, 1) and (-1, 0) respectively,
362 : // the solution for the location of the minima (x0, y0) is given by:
363 : // x0 = 1/2 (S1 - S3)/(S1 + S3 - 2*S0),
364 : // y0 = 1/2 (S4 - S2)/(S4 + S2 - 2*S0).
365 : // The code below is an integerized version of that.
366 0 : static void get_cost_surf_min(int *cost_list, int *ir, int *ic, int bits) {
367 0 : *ic = divide_and_round((cost_list[1] - cost_list[3]) * (1 << (bits - 1)),
368 0 : (cost_list[1] - 2 * cost_list[0] + cost_list[3]));
369 0 : *ir = divide_and_round((cost_list[4] - cost_list[2]) * (1 << (bits - 1)),
370 0 : (cost_list[4] - 2 * cost_list[0] + cost_list[2]));
371 0 : }
372 :
373 0 : uint32_t vp9_skip_sub_pixel_tree(const MACROBLOCK *x, MV *bestmv,
374 : const MV *ref_mv, int allow_hp,
375 : int error_per_bit,
376 : const vp9_variance_fn_ptr_t *vfp,
377 : int forced_stop, int iters_per_step,
378 : int *cost_list, int *mvjcost, int *mvcost[2],
379 : uint32_t *distortion, uint32_t *sse1,
380 : const uint8_t *second_pred, int w, int h) {
381 0 : SETUP_SUBPEL_SEARCH;
382 0 : besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
383 : src_stride, y, y_stride, second_pred, w, h,
384 : offset, mvjcost, mvcost, sse1, distortion);
385 : (void)halfiters;
386 : (void)quarteriters;
387 : (void)eighthiters;
388 : (void)whichdir;
389 : (void)allow_hp;
390 : (void)forced_stop;
391 : (void)hstep;
392 : (void)rr;
393 : (void)rc;
394 : (void)minr;
395 : (void)minc;
396 : (void)maxr;
397 : (void)maxc;
398 : (void)tr;
399 : (void)tc;
400 : (void)sse;
401 : (void)thismse;
402 : (void)cost_list;
403 :
404 0 : if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
405 0 : (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
406 0 : return UINT_MAX;
407 :
408 0 : return besterr;
409 : }
410 :
411 0 : uint32_t vp9_find_best_sub_pixel_tree_pruned_evenmore(
412 : const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
413 : int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
414 : int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
415 : uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
416 : int h) {
417 0 : SETUP_SUBPEL_SEARCH;
418 0 : besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
419 : src_stride, y, y_stride, second_pred, w, h,
420 : offset, mvjcost, mvcost, sse1, distortion);
421 : (void)halfiters;
422 : (void)quarteriters;
423 : (void)eighthiters;
424 : (void)whichdir;
425 : (void)allow_hp;
426 : (void)forced_stop;
427 : (void)hstep;
428 :
429 0 : if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
430 0 : cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
431 0 : cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) {
432 : int ir, ic;
433 : unsigned int minpt;
434 0 : get_cost_surf_min(cost_list, &ir, &ic, 2);
435 0 : if (ir != 0 || ic != 0) {
436 0 : CHECK_BETTER(minpt, tr + 2 * ir, tc + 2 * ic);
437 : }
438 : } else {
439 0 : FIRST_LEVEL_CHECKS;
440 0 : if (halfiters > 1) {
441 0 : SECOND_LEVEL_CHECKS;
442 : }
443 :
444 0 : tr = br;
445 0 : tc = bc;
446 :
447 : // Each subsequent iteration checks at least one point in common with
448 : // the last iteration could be 2 ( if diag selected) 1/4 pel
449 : // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
450 0 : if (forced_stop != 2) {
451 0 : hstep >>= 1;
452 0 : FIRST_LEVEL_CHECKS;
453 0 : if (quarteriters > 1) {
454 0 : SECOND_LEVEL_CHECKS;
455 : }
456 : }
457 : }
458 :
459 0 : tr = br;
460 0 : tc = bc;
461 :
462 0 : if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) {
463 0 : hstep >>= 1;
464 0 : FIRST_LEVEL_CHECKS;
465 0 : if (eighthiters > 1) {
466 0 : SECOND_LEVEL_CHECKS;
467 : }
468 : }
469 :
470 0 : bestmv->row = br;
471 0 : bestmv->col = bc;
472 :
473 0 : if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
474 0 : (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
475 0 : return UINT_MAX;
476 :
477 0 : return besterr;
478 : }
479 :
480 0 : uint32_t vp9_find_best_sub_pixel_tree_pruned_more(
481 : const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
482 : int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
483 : int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
484 : uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
485 : int h) {
486 0 : SETUP_SUBPEL_SEARCH;
487 0 : besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
488 : src_stride, y, y_stride, second_pred, w, h,
489 : offset, mvjcost, mvcost, sse1, distortion);
490 0 : if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
491 0 : cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
492 0 : cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) {
493 : unsigned int minpt;
494 : int ir, ic;
495 0 : get_cost_surf_min(cost_list, &ir, &ic, 1);
496 0 : if (ir != 0 || ic != 0) {
497 0 : CHECK_BETTER(minpt, tr + ir * hstep, tc + ic * hstep);
498 : }
499 : } else {
500 0 : FIRST_LEVEL_CHECKS;
501 0 : if (halfiters > 1) {
502 0 : SECOND_LEVEL_CHECKS;
503 : }
504 : }
505 :
506 : // Each subsequent iteration checks at least one point in common with
507 : // the last iteration could be 2 ( if diag selected) 1/4 pel
508 :
509 : // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
510 0 : if (forced_stop != 2) {
511 0 : tr = br;
512 0 : tc = bc;
513 0 : hstep >>= 1;
514 0 : FIRST_LEVEL_CHECKS;
515 0 : if (quarteriters > 1) {
516 0 : SECOND_LEVEL_CHECKS;
517 : }
518 : }
519 :
520 0 : if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) {
521 0 : tr = br;
522 0 : tc = bc;
523 0 : hstep >>= 1;
524 0 : FIRST_LEVEL_CHECKS;
525 0 : if (eighthiters > 1) {
526 0 : SECOND_LEVEL_CHECKS;
527 : }
528 : }
529 : // These lines insure static analysis doesn't warn that
530 : // tr and tc aren't used after the above point.
531 : (void)tr;
532 : (void)tc;
533 :
534 0 : bestmv->row = br;
535 0 : bestmv->col = bc;
536 :
537 0 : if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
538 0 : (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
539 0 : return UINT_MAX;
540 :
541 0 : return besterr;
542 : }
543 :
544 0 : uint32_t vp9_find_best_sub_pixel_tree_pruned(
545 : const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
546 : int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
547 : int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
548 : uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
549 : int h) {
550 0 : SETUP_SUBPEL_SEARCH;
551 0 : besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
552 : src_stride, y, y_stride, second_pred, w, h,
553 : offset, mvjcost, mvcost, sse1, distortion);
554 0 : if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
555 0 : cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
556 0 : cost_list[4] != INT_MAX) {
557 : unsigned int left, right, up, down, diag;
558 0 : whichdir = (cost_list[1] < cost_list[3] ? 0 : 1) +
559 0 : (cost_list[2] < cost_list[4] ? 0 : 2);
560 0 : switch (whichdir) {
561 : case 0:
562 0 : CHECK_BETTER(left, tr, tc - hstep);
563 0 : CHECK_BETTER(down, tr + hstep, tc);
564 0 : CHECK_BETTER(diag, tr + hstep, tc - hstep);
565 0 : break;
566 : case 1:
567 0 : CHECK_BETTER(right, tr, tc + hstep);
568 0 : CHECK_BETTER(down, tr + hstep, tc);
569 0 : CHECK_BETTER(diag, tr + hstep, tc + hstep);
570 0 : break;
571 : case 2:
572 0 : CHECK_BETTER(left, tr, tc - hstep);
573 0 : CHECK_BETTER(up, tr - hstep, tc);
574 0 : CHECK_BETTER(diag, tr - hstep, tc - hstep);
575 0 : break;
576 : case 3:
577 0 : CHECK_BETTER(right, tr, tc + hstep);
578 0 : CHECK_BETTER(up, tr - hstep, tc);
579 0 : CHECK_BETTER(diag, tr - hstep, tc + hstep);
580 0 : break;
581 : }
582 : } else {
583 0 : FIRST_LEVEL_CHECKS;
584 0 : if (halfiters > 1) {
585 0 : SECOND_LEVEL_CHECKS;
586 : }
587 : }
588 :
589 0 : tr = br;
590 0 : tc = bc;
591 :
592 : // Each subsequent iteration checks at least one point in common with
593 : // the last iteration could be 2 ( if diag selected) 1/4 pel
594 :
595 : // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
596 0 : if (forced_stop != 2) {
597 0 : hstep >>= 1;
598 0 : FIRST_LEVEL_CHECKS;
599 0 : if (quarteriters > 1) {
600 0 : SECOND_LEVEL_CHECKS;
601 : }
602 0 : tr = br;
603 0 : tc = bc;
604 : }
605 :
606 0 : if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) {
607 0 : hstep >>= 1;
608 0 : FIRST_LEVEL_CHECKS;
609 0 : if (eighthiters > 1) {
610 0 : SECOND_LEVEL_CHECKS;
611 : }
612 0 : tr = br;
613 0 : tc = bc;
614 : }
615 : // These lines insure static analysis doesn't warn that
616 : // tr and tc aren't used after the above point.
617 : (void)tr;
618 : (void)tc;
619 :
620 0 : bestmv->row = br;
621 0 : bestmv->col = bc;
622 :
623 0 : if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
624 0 : (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
625 0 : return UINT_MAX;
626 :
627 0 : return besterr;
628 : }
629 :
630 : /* clang-format off */
631 : static const MV search_step_table[12] = {
632 : // left, right, up, down
633 : { 0, -4 }, { 0, 4 }, { -4, 0 }, { 4, 0 },
634 : { 0, -2 }, { 0, 2 }, { -2, 0 }, { 2, 0 },
635 : { 0, -1 }, { 0, 1 }, { -1, 0 }, { 1, 0 }
636 : };
637 : /* clang-format on */
638 :
639 0 : uint32_t vp9_find_best_sub_pixel_tree(
640 : const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
641 : int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
642 : int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
643 : uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
644 : int h) {
645 0 : const uint8_t *const z = x->plane[0].src.buf;
646 0 : const uint8_t *const src_address = z;
647 0 : const int src_stride = x->plane[0].src.stride;
648 0 : const MACROBLOCKD *xd = &x->e_mbd;
649 0 : unsigned int besterr = UINT_MAX;
650 : unsigned int sse;
651 : int thismse;
652 0 : const int y_stride = xd->plane[0].pre[0].stride;
653 0 : const int offset = bestmv->row * y_stride + bestmv->col;
654 0 : const uint8_t *const y = xd->plane[0].pre[0].buf;
655 :
656 0 : int rr = ref_mv->row;
657 0 : int rc = ref_mv->col;
658 0 : int br = bestmv->row * 8;
659 0 : int bc = bestmv->col * 8;
660 0 : int hstep = 4;
661 0 : int iter, round = 3 - forced_stop;
662 0 : const int minc = VPXMAX(x->mv_limits.col_min * 8, ref_mv->col - MV_MAX);
663 0 : const int maxc = VPXMIN(x->mv_limits.col_max * 8, ref_mv->col + MV_MAX);
664 0 : const int minr = VPXMAX(x->mv_limits.row_min * 8, ref_mv->row - MV_MAX);
665 0 : const int maxr = VPXMIN(x->mv_limits.row_max * 8, ref_mv->row + MV_MAX);
666 0 : int tr = br;
667 0 : int tc = bc;
668 0 : const MV *search_step = search_step_table;
669 0 : int idx, best_idx = -1;
670 : unsigned int cost_array[5];
671 : int kr, kc;
672 :
673 0 : if (!(allow_hp && use_mv_hp(ref_mv)))
674 0 : if (round == 3) round = 2;
675 :
676 0 : bestmv->row *= 8;
677 0 : bestmv->col *= 8;
678 :
679 0 : besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
680 : src_stride, y, y_stride, second_pred, w, h,
681 : offset, mvjcost, mvcost, sse1, distortion);
682 :
683 : (void)cost_list; // to silence compiler warning
684 :
685 0 : for (iter = 0; iter < round; ++iter) {
686 : // Check vertical and horizontal sub-pixel positions.
687 0 : for (idx = 0; idx < 4; ++idx) {
688 0 : tr = br + search_step[idx].row;
689 0 : tc = bc + search_step[idx].col;
690 0 : if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
691 0 : const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
692 : MV this_mv;
693 0 : this_mv.row = tr;
694 0 : this_mv.col = tc;
695 0 : if (second_pred == NULL)
696 0 : thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address,
697 : src_stride, &sse);
698 : else
699 0 : thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
700 : src_address, src_stride, &sse, second_pred);
701 0 : cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost,
702 : mvcost, error_per_bit);
703 :
704 0 : if (cost_array[idx] < besterr) {
705 0 : best_idx = idx;
706 0 : besterr = cost_array[idx];
707 0 : *distortion = thismse;
708 0 : *sse1 = sse;
709 : }
710 : } else {
711 0 : cost_array[idx] = UINT_MAX;
712 : }
713 : }
714 :
715 : // Check diagonal sub-pixel position
716 0 : kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep);
717 0 : kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep);
718 :
719 0 : tc = bc + kc;
720 0 : tr = br + kr;
721 0 : if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
722 0 : const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
723 0 : MV this_mv = { tr, tc };
724 0 : if (second_pred == NULL)
725 0 : thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address,
726 : src_stride, &sse);
727 : else
728 0 : thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr), src_address,
729 : src_stride, &sse, second_pred);
730 0 : cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
731 : error_per_bit);
732 :
733 0 : if (cost_array[4] < besterr) {
734 0 : best_idx = 4;
735 0 : besterr = cost_array[4];
736 0 : *distortion = thismse;
737 0 : *sse1 = sse;
738 : }
739 : } else {
740 0 : cost_array[idx] = UINT_MAX;
741 : }
742 :
743 0 : if (best_idx < 4 && best_idx >= 0) {
744 0 : br += search_step[best_idx].row;
745 0 : bc += search_step[best_idx].col;
746 0 : } else if (best_idx == 4) {
747 0 : br = tr;
748 0 : bc = tc;
749 : }
750 :
751 0 : if (iters_per_step > 1 && best_idx != -1) SECOND_LEVEL_CHECKS_BEST;
752 :
753 0 : tr = br;
754 0 : tc = bc;
755 :
756 0 : search_step += 4;
757 0 : hstep >>= 1;
758 0 : best_idx = -1;
759 : }
760 :
761 : // Each subsequent iteration checks at least one point in common with
762 : // the last iteration could be 2 ( if diag selected) 1/4 pel
763 :
764 : // These lines insure static analysis doesn't warn that
765 : // tr and tc aren't used after the above point.
766 : (void)tr;
767 : (void)tc;
768 :
769 0 : bestmv->row = br;
770 0 : bestmv->col = bc;
771 :
772 0 : if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
773 0 : (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
774 0 : return UINT_MAX;
775 :
776 0 : return besterr;
777 : }
778 :
779 : #undef MVC
780 : #undef CHECK_BETTER
781 :
782 0 : static INLINE int check_bounds(const MvLimits *mv_limits, int row, int col,
783 : int range) {
784 0 : return ((row - range) >= mv_limits->row_min) &
785 0 : ((row + range) <= mv_limits->row_max) &
786 0 : ((col - range) >= mv_limits->col_min) &
787 0 : ((col + range) <= mv_limits->col_max);
788 : }
789 :
790 0 : static INLINE int is_mv_in(const MvLimits *mv_limits, const MV *mv) {
791 0 : return (mv->col >= mv_limits->col_min) && (mv->col <= mv_limits->col_max) &&
792 0 : (mv->row >= mv_limits->row_min) && (mv->row <= mv_limits->row_max);
793 : }
794 :
795 : #define CHECK_BETTER \
796 : { \
797 : if (thissad < bestsad) { \
798 : if (use_mvcost) \
799 : thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); \
800 : if (thissad < bestsad) { \
801 : bestsad = thissad; \
802 : best_site = i; \
803 : } \
804 : } \
805 : }
806 :
807 : #define MAX_PATTERN_SCALES 11
808 : #define MAX_PATTERN_CANDIDATES 8 // max number of canddiates per scale
809 : #define PATTERN_CANDIDATES_REF 3 // number of refinement candidates
810 :
811 : // Calculate and return a sad+mvcost list around an integer best pel.
812 0 : static INLINE void calc_int_cost_list(const MACROBLOCK *x, const MV *ref_mv,
813 : int sadpb,
814 : const vp9_variance_fn_ptr_t *fn_ptr,
815 : const MV *best_mv, int *cost_list) {
816 : static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } };
817 0 : const struct buf_2d *const what = &x->plane[0].src;
818 0 : const struct buf_2d *const in_what = &x->e_mbd.plane[0].pre[0];
819 0 : const MV fcenter_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
820 0 : int br = best_mv->row;
821 0 : int bc = best_mv->col;
822 : MV this_mv;
823 : int i;
824 : unsigned int sse;
825 :
826 0 : this_mv.row = br;
827 0 : this_mv.col = bc;
828 0 : cost_list[0] =
829 0 : fn_ptr->vf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv),
830 0 : in_what->stride, &sse) +
831 0 : mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
832 0 : if (check_bounds(&x->mv_limits, br, bc, 1)) {
833 0 : for (i = 0; i < 4; i++) {
834 0 : const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
835 0 : cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
836 : get_buf_from_mv(in_what, &this_mv),
837 0 : in_what->stride, &sse) +
838 0 : mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost,
839 : x->mvcost, x->errorperbit);
840 : }
841 : } else {
842 0 : for (i = 0; i < 4; i++) {
843 0 : const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
844 0 : if (!is_mv_in(&x->mv_limits, &this_mv))
845 0 : cost_list[i + 1] = INT_MAX;
846 : else
847 0 : cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
848 : get_buf_from_mv(in_what, &this_mv),
849 0 : in_what->stride, &sse) +
850 0 : mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost,
851 : x->mvcost, x->errorperbit);
852 : }
853 : }
854 0 : }
855 :
856 : // Generic pattern search function that searches over multiple scales.
857 : // Each scale can have a different number of candidates and shape of
858 : // candidates as indicated in the num_candidates and candidates arrays
859 : // passed into this function
860 : //
861 0 : static int vp9_pattern_search(const MACROBLOCK *x, MV *ref_mv, int search_param,
862 : int sad_per_bit, int do_init_search,
863 : int *cost_list, const vp9_variance_fn_ptr_t *vfp,
864 : int use_mvcost, const MV *center_mv, MV *best_mv,
865 : const int num_candidates[MAX_PATTERN_SCALES],
866 : const MV candidates[MAX_PATTERN_SCALES]
867 : [MAX_PATTERN_CANDIDATES]) {
868 0 : const MACROBLOCKD *const xd = &x->e_mbd;
869 : static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = {
870 : 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
871 : };
872 : int i, s, t;
873 0 : const struct buf_2d *const what = &x->plane[0].src;
874 0 : const struct buf_2d *const in_what = &xd->plane[0].pre[0];
875 : int br, bc;
876 0 : int bestsad = INT_MAX;
877 : int thissad;
878 0 : int k = -1;
879 0 : const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
880 0 : int best_init_s = search_param_to_steps[search_param];
881 : // adjust ref_mv to make sure it is within MV range
882 0 : clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
883 : x->mv_limits.row_min, x->mv_limits.row_max);
884 0 : br = ref_mv->row;
885 0 : bc = ref_mv->col;
886 :
887 : // Work out the start point for the search
888 0 : bestsad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
889 0 : in_what->stride) +
890 0 : mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
891 :
892 : // Search all possible scales upto the search param around the center point
893 : // pick the scale of the point that is best as the starting scale of
894 : // further steps around it.
895 0 : if (do_init_search) {
896 0 : s = best_init_s;
897 0 : best_init_s = -1;
898 0 : for (t = 0; t <= s; ++t) {
899 0 : int best_site = -1;
900 0 : if (check_bounds(&x->mv_limits, br, bc, 1 << t)) {
901 0 : for (i = 0; i < num_candidates[t]; i++) {
902 0 : const MV this_mv = { br + candidates[t][i].row,
903 0 : bc + candidates[t][i].col };
904 0 : thissad =
905 0 : vfp->sdf(what->buf, what->stride,
906 : get_buf_from_mv(in_what, &this_mv), in_what->stride);
907 0 : CHECK_BETTER
908 : }
909 : } else {
910 0 : for (i = 0; i < num_candidates[t]; i++) {
911 0 : const MV this_mv = { br + candidates[t][i].row,
912 0 : bc + candidates[t][i].col };
913 0 : if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
914 0 : thissad =
915 0 : vfp->sdf(what->buf, what->stride,
916 : get_buf_from_mv(in_what, &this_mv), in_what->stride);
917 0 : CHECK_BETTER
918 : }
919 : }
920 0 : if (best_site == -1) {
921 0 : continue;
922 : } else {
923 0 : best_init_s = t;
924 0 : k = best_site;
925 : }
926 : }
927 0 : if (best_init_s != -1) {
928 0 : br += candidates[best_init_s][k].row;
929 0 : bc += candidates[best_init_s][k].col;
930 : }
931 : }
932 :
933 : // If the center point is still the best, just skip this and move to
934 : // the refinement step.
935 0 : if (best_init_s != -1) {
936 0 : int best_site = -1;
937 0 : s = best_init_s;
938 :
939 : do {
940 : // No need to search all 6 points the 1st time if initial search was used
941 0 : if (!do_init_search || s != best_init_s) {
942 0 : if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
943 0 : for (i = 0; i < num_candidates[s]; i++) {
944 0 : const MV this_mv = { br + candidates[s][i].row,
945 0 : bc + candidates[s][i].col };
946 0 : thissad =
947 0 : vfp->sdf(what->buf, what->stride,
948 : get_buf_from_mv(in_what, &this_mv), in_what->stride);
949 0 : CHECK_BETTER
950 : }
951 : } else {
952 0 : for (i = 0; i < num_candidates[s]; i++) {
953 0 : const MV this_mv = { br + candidates[s][i].row,
954 0 : bc + candidates[s][i].col };
955 0 : if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
956 0 : thissad =
957 0 : vfp->sdf(what->buf, what->stride,
958 : get_buf_from_mv(in_what, &this_mv), in_what->stride);
959 0 : CHECK_BETTER
960 : }
961 : }
962 :
963 0 : if (best_site == -1) {
964 0 : continue;
965 : } else {
966 0 : br += candidates[s][best_site].row;
967 0 : bc += candidates[s][best_site].col;
968 0 : k = best_site;
969 : }
970 : }
971 :
972 : do {
973 : int next_chkpts_indices[PATTERN_CANDIDATES_REF];
974 0 : best_site = -1;
975 0 : next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
976 0 : next_chkpts_indices[1] = k;
977 0 : next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
978 :
979 0 : if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
980 0 : for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
981 0 : const MV this_mv = {
982 0 : br + candidates[s][next_chkpts_indices[i]].row,
983 0 : bc + candidates[s][next_chkpts_indices[i]].col
984 : };
985 0 : thissad =
986 0 : vfp->sdf(what->buf, what->stride,
987 : get_buf_from_mv(in_what, &this_mv), in_what->stride);
988 0 : CHECK_BETTER
989 : }
990 : } else {
991 0 : for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
992 0 : const MV this_mv = {
993 0 : br + candidates[s][next_chkpts_indices[i]].row,
994 0 : bc + candidates[s][next_chkpts_indices[i]].col
995 : };
996 0 : if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
997 0 : thissad =
998 0 : vfp->sdf(what->buf, what->stride,
999 : get_buf_from_mv(in_what, &this_mv), in_what->stride);
1000 0 : CHECK_BETTER
1001 : }
1002 : }
1003 :
1004 0 : if (best_site != -1) {
1005 0 : k = next_chkpts_indices[best_site];
1006 0 : br += candidates[s][k].row;
1007 0 : bc += candidates[s][k].col;
1008 : }
1009 0 : } while (best_site != -1);
1010 0 : } while (s--);
1011 : }
1012 :
1013 : // Returns the one-away integer pel sad values around the best as follows:
1014 : // cost_list[0]: cost at the best integer pel
1015 : // cost_list[1]: cost at delta {0, -1} (left) from the best integer pel
1016 : // cost_list[2]: cost at delta { 1, 0} (bottom) from the best integer pel
1017 : // cost_list[3]: cost at delta { 0, 1} (right) from the best integer pel
1018 : // cost_list[4]: cost at delta {-1, 0} (top) from the best integer pel
1019 0 : if (cost_list) {
1020 0 : const MV best_mv = { br, bc };
1021 0 : calc_int_cost_list(x, &fcenter_mv, sad_per_bit, vfp, &best_mv, cost_list);
1022 : }
1023 0 : best_mv->row = br;
1024 0 : best_mv->col = bc;
1025 0 : return bestsad;
1026 : }
1027 :
1028 : // A specialized function where the smallest scale search candidates
1029 : // are 4 1-away neighbors, and cost_list is non-null
1030 : // TODO(debargha): Merge this function with the one above. Also remove
1031 : // use_mvcost option since it is always 1, to save unnecessary branches.
1032 0 : static int vp9_pattern_search_sad(
1033 : const MACROBLOCK *x, MV *ref_mv, int search_param, int sad_per_bit,
1034 : int do_init_search, int *cost_list, const vp9_variance_fn_ptr_t *vfp,
1035 : int use_mvcost, const MV *center_mv, MV *best_mv,
1036 : const int num_candidates[MAX_PATTERN_SCALES],
1037 : const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES]) {
1038 0 : const MACROBLOCKD *const xd = &x->e_mbd;
1039 : static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = {
1040 : 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
1041 : };
1042 : int i, s, t;
1043 0 : const struct buf_2d *const what = &x->plane[0].src;
1044 0 : const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1045 : int br, bc;
1046 0 : int bestsad = INT_MAX;
1047 : int thissad;
1048 0 : int k = -1;
1049 0 : const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
1050 0 : int best_init_s = search_param_to_steps[search_param];
1051 : // adjust ref_mv to make sure it is within MV range
1052 0 : clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
1053 : x->mv_limits.row_min, x->mv_limits.row_max);
1054 0 : br = ref_mv->row;
1055 0 : bc = ref_mv->col;
1056 0 : if (cost_list != NULL) {
1057 0 : cost_list[0] = cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] =
1058 : INT_MAX;
1059 : }
1060 :
1061 : // Work out the start point for the search
1062 0 : bestsad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
1063 0 : in_what->stride) +
1064 0 : mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
1065 :
1066 : // Search all possible scales upto the search param around the center point
1067 : // pick the scale of the point that is best as the starting scale of
1068 : // further steps around it.
1069 0 : if (do_init_search) {
1070 0 : s = best_init_s;
1071 0 : best_init_s = -1;
1072 0 : for (t = 0; t <= s; ++t) {
1073 0 : int best_site = -1;
1074 0 : if (check_bounds(&x->mv_limits, br, bc, 1 << t)) {
1075 0 : for (i = 0; i < num_candidates[t]; i++) {
1076 0 : const MV this_mv = { br + candidates[t][i].row,
1077 0 : bc + candidates[t][i].col };
1078 0 : thissad =
1079 0 : vfp->sdf(what->buf, what->stride,
1080 : get_buf_from_mv(in_what, &this_mv), in_what->stride);
1081 0 : CHECK_BETTER
1082 : }
1083 : } else {
1084 0 : for (i = 0; i < num_candidates[t]; i++) {
1085 0 : const MV this_mv = { br + candidates[t][i].row,
1086 0 : bc + candidates[t][i].col };
1087 0 : if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
1088 0 : thissad =
1089 0 : vfp->sdf(what->buf, what->stride,
1090 : get_buf_from_mv(in_what, &this_mv), in_what->stride);
1091 0 : CHECK_BETTER
1092 : }
1093 : }
1094 0 : if (best_site == -1) {
1095 0 : continue;
1096 : } else {
1097 0 : best_init_s = t;
1098 0 : k = best_site;
1099 : }
1100 : }
1101 0 : if (best_init_s != -1) {
1102 0 : br += candidates[best_init_s][k].row;
1103 0 : bc += candidates[best_init_s][k].col;
1104 : }
1105 : }
1106 :
1107 : // If the center point is still the best, just skip this and move to
1108 : // the refinement step.
1109 0 : if (best_init_s != -1) {
1110 0 : int do_sad = (num_candidates[0] == 4 && cost_list != NULL);
1111 0 : int best_site = -1;
1112 0 : s = best_init_s;
1113 :
1114 0 : for (; s >= do_sad; s--) {
1115 0 : if (!do_init_search || s != best_init_s) {
1116 0 : if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
1117 0 : for (i = 0; i < num_candidates[s]; i++) {
1118 0 : const MV this_mv = { br + candidates[s][i].row,
1119 0 : bc + candidates[s][i].col };
1120 0 : thissad =
1121 0 : vfp->sdf(what->buf, what->stride,
1122 : get_buf_from_mv(in_what, &this_mv), in_what->stride);
1123 0 : CHECK_BETTER
1124 : }
1125 : } else {
1126 0 : for (i = 0; i < num_candidates[s]; i++) {
1127 0 : const MV this_mv = { br + candidates[s][i].row,
1128 0 : bc + candidates[s][i].col };
1129 0 : if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
1130 0 : thissad =
1131 0 : vfp->sdf(what->buf, what->stride,
1132 : get_buf_from_mv(in_what, &this_mv), in_what->stride);
1133 0 : CHECK_BETTER
1134 : }
1135 : }
1136 :
1137 0 : if (best_site == -1) {
1138 0 : continue;
1139 : } else {
1140 0 : br += candidates[s][best_site].row;
1141 0 : bc += candidates[s][best_site].col;
1142 0 : k = best_site;
1143 : }
1144 : }
1145 :
1146 : do {
1147 : int next_chkpts_indices[PATTERN_CANDIDATES_REF];
1148 0 : best_site = -1;
1149 0 : next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
1150 0 : next_chkpts_indices[1] = k;
1151 0 : next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
1152 :
1153 0 : if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
1154 0 : for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
1155 0 : const MV this_mv = {
1156 0 : br + candidates[s][next_chkpts_indices[i]].row,
1157 0 : bc + candidates[s][next_chkpts_indices[i]].col
1158 : };
1159 0 : thissad =
1160 0 : vfp->sdf(what->buf, what->stride,
1161 : get_buf_from_mv(in_what, &this_mv), in_what->stride);
1162 0 : CHECK_BETTER
1163 : }
1164 : } else {
1165 0 : for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
1166 0 : const MV this_mv = {
1167 0 : br + candidates[s][next_chkpts_indices[i]].row,
1168 0 : bc + candidates[s][next_chkpts_indices[i]].col
1169 : };
1170 0 : if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
1171 0 : thissad =
1172 0 : vfp->sdf(what->buf, what->stride,
1173 : get_buf_from_mv(in_what, &this_mv), in_what->stride);
1174 0 : CHECK_BETTER
1175 : }
1176 : }
1177 :
1178 0 : if (best_site != -1) {
1179 0 : k = next_chkpts_indices[best_site];
1180 0 : br += candidates[s][k].row;
1181 0 : bc += candidates[s][k].col;
1182 : }
1183 0 : } while (best_site != -1);
1184 : }
1185 :
1186 : // Note: If we enter the if below, then cost_list must be non-NULL.
1187 0 : if (s == 0) {
1188 0 : cost_list[0] = bestsad;
1189 0 : if (!do_init_search || s != best_init_s) {
1190 0 : if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
1191 0 : for (i = 0; i < num_candidates[s]; i++) {
1192 0 : const MV this_mv = { br + candidates[s][i].row,
1193 0 : bc + candidates[s][i].col };
1194 0 : cost_list[i + 1] = thissad =
1195 0 : vfp->sdf(what->buf, what->stride,
1196 : get_buf_from_mv(in_what, &this_mv), in_what->stride);
1197 0 : CHECK_BETTER
1198 : }
1199 : } else {
1200 0 : for (i = 0; i < num_candidates[s]; i++) {
1201 0 : const MV this_mv = { br + candidates[s][i].row,
1202 0 : bc + candidates[s][i].col };
1203 0 : if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
1204 0 : cost_list[i + 1] = thissad =
1205 0 : vfp->sdf(what->buf, what->stride,
1206 : get_buf_from_mv(in_what, &this_mv), in_what->stride);
1207 0 : CHECK_BETTER
1208 : }
1209 : }
1210 :
1211 0 : if (best_site != -1) {
1212 0 : br += candidates[s][best_site].row;
1213 0 : bc += candidates[s][best_site].col;
1214 0 : k = best_site;
1215 : }
1216 : }
1217 0 : while (best_site != -1) {
1218 : int next_chkpts_indices[PATTERN_CANDIDATES_REF];
1219 0 : best_site = -1;
1220 0 : next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
1221 0 : next_chkpts_indices[1] = k;
1222 0 : next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
1223 0 : cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX;
1224 0 : cost_list[((k + 2) % 4) + 1] = cost_list[0];
1225 0 : cost_list[0] = bestsad;
1226 :
1227 0 : if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
1228 0 : for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
1229 0 : const MV this_mv = {
1230 0 : br + candidates[s][next_chkpts_indices[i]].row,
1231 0 : bc + candidates[s][next_chkpts_indices[i]].col
1232 : };
1233 0 : cost_list[next_chkpts_indices[i] + 1] = thissad =
1234 0 : vfp->sdf(what->buf, what->stride,
1235 : get_buf_from_mv(in_what, &this_mv), in_what->stride);
1236 0 : CHECK_BETTER
1237 : }
1238 : } else {
1239 0 : for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
1240 0 : const MV this_mv = {
1241 0 : br + candidates[s][next_chkpts_indices[i]].row,
1242 0 : bc + candidates[s][next_chkpts_indices[i]].col
1243 : };
1244 0 : if (!is_mv_in(&x->mv_limits, &this_mv)) {
1245 0 : cost_list[next_chkpts_indices[i] + 1] = INT_MAX;
1246 0 : continue;
1247 : }
1248 0 : cost_list[next_chkpts_indices[i] + 1] = thissad =
1249 0 : vfp->sdf(what->buf, what->stride,
1250 : get_buf_from_mv(in_what, &this_mv), in_what->stride);
1251 0 : CHECK_BETTER
1252 : }
1253 : }
1254 :
1255 0 : if (best_site != -1) {
1256 0 : k = next_chkpts_indices[best_site];
1257 0 : br += candidates[s][k].row;
1258 0 : bc += candidates[s][k].col;
1259 : }
1260 : }
1261 : }
1262 : }
1263 :
1264 : // Returns the one-away integer pel sad values around the best as follows:
1265 : // cost_list[0]: sad at the best integer pel
1266 : // cost_list[1]: sad at delta {0, -1} (left) from the best integer pel
1267 : // cost_list[2]: sad at delta { 1, 0} (bottom) from the best integer pel
1268 : // cost_list[3]: sad at delta { 0, 1} (right) from the best integer pel
1269 : // cost_list[4]: sad at delta {-1, 0} (top) from the best integer pel
1270 0 : if (cost_list) {
1271 : static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } };
1272 0 : if (cost_list[0] == INT_MAX) {
1273 0 : cost_list[0] = bestsad;
1274 0 : if (check_bounds(&x->mv_limits, br, bc, 1)) {
1275 0 : for (i = 0; i < 4; i++) {
1276 0 : const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
1277 0 : cost_list[i + 1] =
1278 0 : vfp->sdf(what->buf, what->stride,
1279 : get_buf_from_mv(in_what, &this_mv), in_what->stride);
1280 : }
1281 : } else {
1282 0 : for (i = 0; i < 4; i++) {
1283 0 : const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
1284 0 : if (!is_mv_in(&x->mv_limits, &this_mv))
1285 0 : cost_list[i + 1] = INT_MAX;
1286 : else
1287 0 : cost_list[i + 1] =
1288 0 : vfp->sdf(what->buf, what->stride,
1289 : get_buf_from_mv(in_what, &this_mv), in_what->stride);
1290 : }
1291 : }
1292 : } else {
1293 0 : if (use_mvcost) {
1294 0 : for (i = 0; i < 4; i++) {
1295 0 : const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
1296 0 : if (cost_list[i + 1] != INT_MAX) {
1297 0 : cost_list[i + 1] +=
1298 0 : mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
1299 : }
1300 : }
1301 : }
1302 : }
1303 : }
1304 0 : best_mv->row = br;
1305 0 : best_mv->col = bc;
1306 0 : return bestsad;
1307 : }
1308 :
1309 0 : int vp9_get_mvpred_var(const MACROBLOCK *x, const MV *best_mv,
1310 : const MV *center_mv, const vp9_variance_fn_ptr_t *vfp,
1311 : int use_mvcost) {
1312 0 : const MACROBLOCKD *const xd = &x->e_mbd;
1313 0 : const struct buf_2d *const what = &x->plane[0].src;
1314 0 : const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1315 0 : const MV mv = { best_mv->row * 8, best_mv->col * 8 };
1316 : uint32_t unused;
1317 : #if CONFIG_VP9_HIGHBITDEPTH
1318 : uint64_t err =
1319 : vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv),
1320 : in_what->stride, &unused);
1321 : err += (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
1322 : x->errorperbit)
1323 : : 0);
1324 : if (err >= INT_MAX) return INT_MAX;
1325 : return (int)err;
1326 : #else
1327 0 : return vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv),
1328 0 : in_what->stride, &unused) +
1329 0 : (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
1330 : x->errorperbit)
1331 0 : : 0);
1332 : #endif
1333 : }
1334 :
1335 0 : int vp9_get_mvpred_av_var(const MACROBLOCK *x, const MV *best_mv,
1336 : const MV *center_mv, const uint8_t *second_pred,
1337 : const vp9_variance_fn_ptr_t *vfp, int use_mvcost) {
1338 0 : const MACROBLOCKD *const xd = &x->e_mbd;
1339 0 : const struct buf_2d *const what = &x->plane[0].src;
1340 0 : const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1341 0 : const MV mv = { best_mv->row * 8, best_mv->col * 8 };
1342 : unsigned int unused;
1343 :
1344 0 : return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0,
1345 0 : what->buf, what->stride, &unused, second_pred) +
1346 0 : (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
1347 : x->errorperbit)
1348 0 : : 0);
1349 : }
1350 :
1351 0 : static int hex_search(const MACROBLOCK *x, MV *ref_mv, int search_param,
1352 : int sad_per_bit, int do_init_search, int *cost_list,
1353 : const vp9_variance_fn_ptr_t *vfp, int use_mvcost,
1354 : const MV *center_mv, MV *best_mv) {
1355 : // First scale has 8-closest points, the rest have 6 points in hex shape
1356 : // at increasing scales
1357 : static const int hex_num_candidates[MAX_PATTERN_SCALES] = { 8, 6, 6, 6, 6, 6,
1358 : 6, 6, 6, 6, 6 };
1359 : // Note that the largest candidate step at each scale is 2^scale
1360 : /* clang-format off */
1361 : static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
1362 : { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 }, { -1, 1 },
1363 : { -1, 0 } },
1364 : { { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 } },
1365 : { { -2, -4 }, { 2, -4 }, { 4, 0 }, { 2, 4 }, { -2, 4 }, { -4, 0 } },
1366 : { { -4, -8 }, { 4, -8 }, { 8, 0 }, { 4, 8 }, { -4, 8 }, { -8, 0 } },
1367 : { { -8, -16 }, { 8, -16 }, { 16, 0 }, { 8, 16 }, { -8, 16 }, { -16, 0 } },
1368 : { { -16, -32 }, { 16, -32 }, { 32, 0 }, { 16, 32 }, { -16, 32 },
1369 : { -32, 0 } },
1370 : { { -32, -64 }, { 32, -64 }, { 64, 0 }, { 32, 64 }, { -32, 64 },
1371 : { -64, 0 } },
1372 : { { -64, -128 }, { 64, -128 }, { 128, 0 }, { 64, 128 }, { -64, 128 },
1373 : { -128, 0 } },
1374 : { { -128, -256 }, { 128, -256 }, { 256, 0 }, { 128, 256 }, { -128, 256 },
1375 : { -256, 0 } },
1376 : { { -256, -512 }, { 256, -512 }, { 512, 0 }, { 256, 512 }, { -256, 512 },
1377 : { -512, 0 } },
1378 : { { -512, -1024 }, { 512, -1024 }, { 1024, 0 }, { 512, 1024 },
1379 : { -512, 1024 }, { -1024, 0 } }
1380 : };
1381 : /* clang-format on */
1382 0 : return vp9_pattern_search(
1383 : x, ref_mv, search_param, sad_per_bit, do_init_search, cost_list, vfp,
1384 : use_mvcost, center_mv, best_mv, hex_num_candidates, hex_candidates);
1385 : }
1386 :
1387 0 : static int bigdia_search(const MACROBLOCK *x, MV *ref_mv, int search_param,
1388 : int sad_per_bit, int do_init_search, int *cost_list,
1389 : const vp9_variance_fn_ptr_t *vfp, int use_mvcost,
1390 : const MV *center_mv, MV *best_mv) {
1391 : // First scale has 4-closest points, the rest have 8 points in diamond
1392 : // shape at increasing scales
1393 : static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = {
1394 : 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
1395 : };
1396 : // Note that the largest candidate step at each scale is 2^scale
1397 : /* clang-format off */
1398 : static const MV
1399 : bigdia_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
1400 : { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } },
1401 : { { -1, -1 }, { 0, -2 }, { 1, -1 }, { 2, 0 }, { 1, 1 }, { 0, 2 },
1402 : { -1, 1 }, { -2, 0 } },
1403 : { { -2, -2 }, { 0, -4 }, { 2, -2 }, { 4, 0 }, { 2, 2 }, { 0, 4 },
1404 : { -2, 2 }, { -4, 0 } },
1405 : { { -4, -4 }, { 0, -8 }, { 4, -4 }, { 8, 0 }, { 4, 4 }, { 0, 8 },
1406 : { -4, 4 }, { -8, 0 } },
1407 : { { -8, -8 }, { 0, -16 }, { 8, -8 }, { 16, 0 }, { 8, 8 }, { 0, 16 },
1408 : { -8, 8 }, { -16, 0 } },
1409 : { { -16, -16 }, { 0, -32 }, { 16, -16 }, { 32, 0 }, { 16, 16 },
1410 : { 0, 32 }, { -16, 16 }, { -32, 0 } },
1411 : { { -32, -32 }, { 0, -64 }, { 32, -32 }, { 64, 0 }, { 32, 32 },
1412 : { 0, 64 }, { -32, 32 }, { -64, 0 } },
1413 : { { -64, -64 }, { 0, -128 }, { 64, -64 }, { 128, 0 }, { 64, 64 },
1414 : { 0, 128 }, { -64, 64 }, { -128, 0 } },
1415 : { { -128, -128 }, { 0, -256 }, { 128, -128 }, { 256, 0 }, { 128, 128 },
1416 : { 0, 256 }, { -128, 128 }, { -256, 0 } },
1417 : { { -256, -256 }, { 0, -512 }, { 256, -256 }, { 512, 0 }, { 256, 256 },
1418 : { 0, 512 }, { -256, 256 }, { -512, 0 } },
1419 : { { -512, -512 }, { 0, -1024 }, { 512, -512 }, { 1024, 0 },
1420 : { 512, 512 }, { 0, 1024 }, { -512, 512 }, { -1024, 0 } }
1421 : };
1422 : /* clang-format on */
1423 0 : return vp9_pattern_search_sad(
1424 : x, ref_mv, search_param, sad_per_bit, do_init_search, cost_list, vfp,
1425 : use_mvcost, center_mv, best_mv, bigdia_num_candidates, bigdia_candidates);
1426 : }
1427 :
1428 0 : static int square_search(const MACROBLOCK *x, MV *ref_mv, int search_param,
1429 : int sad_per_bit, int do_init_search, int *cost_list,
1430 : const vp9_variance_fn_ptr_t *vfp, int use_mvcost,
1431 : const MV *center_mv, MV *best_mv) {
1432 : // All scales have 8 closest points in square shape
1433 : static const int square_num_candidates[MAX_PATTERN_SCALES] = {
1434 : 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
1435 : };
1436 : // Note that the largest candidate step at each scale is 2^scale
1437 : /* clang-format off */
1438 : static const MV
1439 : square_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
1440 : { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 },
1441 : { -1, 1 }, { -1, 0 } },
1442 : { { -2, -2 }, { 0, -2 }, { 2, -2 }, { 2, 0 }, { 2, 2 }, { 0, 2 },
1443 : { -2, 2 }, { -2, 0 } },
1444 : { { -4, -4 }, { 0, -4 }, { 4, -4 }, { 4, 0 }, { 4, 4 }, { 0, 4 },
1445 : { -4, 4 }, { -4, 0 } },
1446 : { { -8, -8 }, { 0, -8 }, { 8, -8 }, { 8, 0 }, { 8, 8 }, { 0, 8 },
1447 : { -8, 8 }, { -8, 0 } },
1448 : { { -16, -16 }, { 0, -16 }, { 16, -16 }, { 16, 0 }, { 16, 16 },
1449 : { 0, 16 }, { -16, 16 }, { -16, 0 } },
1450 : { { -32, -32 }, { 0, -32 }, { 32, -32 }, { 32, 0 }, { 32, 32 },
1451 : { 0, 32 }, { -32, 32 }, { -32, 0 } },
1452 : { { -64, -64 }, { 0, -64 }, { 64, -64 }, { 64, 0 }, { 64, 64 },
1453 : { 0, 64 }, { -64, 64 }, { -64, 0 } },
1454 : { { -128, -128 }, { 0, -128 }, { 128, -128 }, { 128, 0 }, { 128, 128 },
1455 : { 0, 128 }, { -128, 128 }, { -128, 0 } },
1456 : { { -256, -256 }, { 0, -256 }, { 256, -256 }, { 256, 0 }, { 256, 256 },
1457 : { 0, 256 }, { -256, 256 }, { -256, 0 } },
1458 : { { -512, -512 }, { 0, -512 }, { 512, -512 }, { 512, 0 }, { 512, 512 },
1459 : { 0, 512 }, { -512, 512 }, { -512, 0 } },
1460 : { { -1024, -1024 }, { 0, -1024 }, { 1024, -1024 }, { 1024, 0 },
1461 : { 1024, 1024 }, { 0, 1024 }, { -1024, 1024 }, { -1024, 0 } }
1462 : };
1463 : /* clang-format on */
1464 0 : return vp9_pattern_search(
1465 : x, ref_mv, search_param, sad_per_bit, do_init_search, cost_list, vfp,
1466 : use_mvcost, center_mv, best_mv, square_num_candidates, square_candidates);
1467 : }
1468 :
1469 0 : static int fast_hex_search(const MACROBLOCK *x, MV *ref_mv, int search_param,
1470 : int sad_per_bit,
1471 : int do_init_search, // must be zero for fast_hex
1472 : int *cost_list, const vp9_variance_fn_ptr_t *vfp,
1473 : int use_mvcost, const MV *center_mv, MV *best_mv) {
1474 0 : return hex_search(x, ref_mv, VPXMAX(MAX_MVSEARCH_STEPS - 2, search_param),
1475 : sad_per_bit, do_init_search, cost_list, vfp, use_mvcost,
1476 : center_mv, best_mv);
1477 : }
1478 :
1479 0 : static int fast_dia_search(const MACROBLOCK *x, MV *ref_mv, int search_param,
1480 : int sad_per_bit, int do_init_search, int *cost_list,
1481 : const vp9_variance_fn_ptr_t *vfp, int use_mvcost,
1482 : const MV *center_mv, MV *best_mv) {
1483 0 : return bigdia_search(x, ref_mv, VPXMAX(MAX_MVSEARCH_STEPS - 2, search_param),
1484 : sad_per_bit, do_init_search, cost_list, vfp, use_mvcost,
1485 : center_mv, best_mv);
1486 : }
1487 :
1488 : #undef CHECK_BETTER
1489 :
1490 : // Exhuastive motion search around a given centre position with a given
1491 : // step size.
1492 0 : static int exhuastive_mesh_search(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
1493 : int range, int step, int sad_per_bit,
1494 : const vp9_variance_fn_ptr_t *fn_ptr,
1495 : const MV *center_mv) {
1496 0 : const MACROBLOCKD *const xd = &x->e_mbd;
1497 0 : const struct buf_2d *const what = &x->plane[0].src;
1498 0 : const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1499 0 : MV fcenter_mv = { center_mv->row, center_mv->col };
1500 0 : unsigned int best_sad = INT_MAX;
1501 : int r, c, i;
1502 : int start_col, end_col, start_row, end_row;
1503 0 : int col_step = (step > 1) ? step : 4;
1504 :
1505 0 : assert(step >= 1);
1506 :
1507 0 : clamp_mv(&fcenter_mv, x->mv_limits.col_min, x->mv_limits.col_max,
1508 : x->mv_limits.row_min, x->mv_limits.row_max);
1509 0 : *best_mv = fcenter_mv;
1510 0 : best_sad =
1511 0 : fn_ptr->sdf(what->buf, what->stride,
1512 : get_buf_from_mv(in_what, &fcenter_mv), in_what->stride) +
1513 0 : mvsad_err_cost(x, &fcenter_mv, ref_mv, sad_per_bit);
1514 0 : start_row = VPXMAX(-range, x->mv_limits.row_min - fcenter_mv.row);
1515 0 : start_col = VPXMAX(-range, x->mv_limits.col_min - fcenter_mv.col);
1516 0 : end_row = VPXMIN(range, x->mv_limits.row_max - fcenter_mv.row);
1517 0 : end_col = VPXMIN(range, x->mv_limits.col_max - fcenter_mv.col);
1518 :
1519 0 : for (r = start_row; r <= end_row; r += step) {
1520 0 : for (c = start_col; c <= end_col; c += col_step) {
1521 : // Step > 1 means we are not checking every location in this pass.
1522 0 : if (step > 1) {
1523 0 : const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c };
1524 0 : unsigned int sad =
1525 0 : fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv),
1526 : in_what->stride);
1527 0 : if (sad < best_sad) {
1528 0 : sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
1529 0 : if (sad < best_sad) {
1530 0 : best_sad = sad;
1531 0 : *best_mv = mv;
1532 : }
1533 : }
1534 : } else {
1535 : // 4 sads in a single call if we are checking every location
1536 0 : if (c + 3 <= end_col) {
1537 : unsigned int sads[4];
1538 : const uint8_t *addrs[4];
1539 0 : for (i = 0; i < 4; ++i) {
1540 0 : const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
1541 0 : addrs[i] = get_buf_from_mv(in_what, &mv);
1542 : }
1543 0 : fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads);
1544 :
1545 0 : for (i = 0; i < 4; ++i) {
1546 0 : if (sads[i] < best_sad) {
1547 0 : const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
1548 0 : const unsigned int sad =
1549 0 : sads[i] + mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
1550 0 : if (sad < best_sad) {
1551 0 : best_sad = sad;
1552 0 : *best_mv = mv;
1553 : }
1554 : }
1555 : }
1556 : } else {
1557 0 : for (i = 0; i < end_col - c; ++i) {
1558 0 : const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
1559 0 : unsigned int sad =
1560 0 : fn_ptr->sdf(what->buf, what->stride,
1561 : get_buf_from_mv(in_what, &mv), in_what->stride);
1562 0 : if (sad < best_sad) {
1563 0 : sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
1564 0 : if (sad < best_sad) {
1565 0 : best_sad = sad;
1566 0 : *best_mv = mv;
1567 : }
1568 : }
1569 : }
1570 : }
1571 : }
1572 : }
1573 : }
1574 :
1575 0 : return best_sad;
1576 : }
1577 :
1578 0 : int vp9_diamond_search_sad_c(const MACROBLOCK *x, const search_site_config *cfg,
1579 : MV *ref_mv, MV *best_mv, int search_param,
1580 : int sad_per_bit, int *num00,
1581 : const vp9_variance_fn_ptr_t *fn_ptr,
1582 : const MV *center_mv) {
1583 : int i, j, step;
1584 :
1585 0 : const MACROBLOCKD *const xd = &x->e_mbd;
1586 0 : uint8_t *what = x->plane[0].src.buf;
1587 0 : const int what_stride = x->plane[0].src.stride;
1588 : const uint8_t *in_what;
1589 0 : const int in_what_stride = xd->plane[0].pre[0].stride;
1590 : const uint8_t *best_address;
1591 :
1592 0 : unsigned int bestsad = INT_MAX;
1593 0 : int best_site = -1;
1594 0 : int last_site = -1;
1595 :
1596 : int ref_row;
1597 : int ref_col;
1598 :
1599 : // search_param determines the length of the initial step and hence the number
1600 : // of iterations.
1601 : // 0 = initial step (MAX_FIRST_STEP) pel
1602 : // 1 = (MAX_FIRST_STEP/2) pel,
1603 : // 2 = (MAX_FIRST_STEP/4) pel...
1604 : // const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
1605 0 : const MV *ss_mv = &cfg->ss_mv[search_param * cfg->searches_per_step];
1606 0 : const intptr_t *ss_os = &cfg->ss_os[search_param * cfg->searches_per_step];
1607 0 : const int tot_steps = cfg->total_steps - search_param;
1608 :
1609 0 : const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
1610 0 : clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
1611 : x->mv_limits.row_min, x->mv_limits.row_max);
1612 0 : ref_row = ref_mv->row;
1613 0 : ref_col = ref_mv->col;
1614 0 : *num00 = 0;
1615 0 : best_mv->row = ref_row;
1616 0 : best_mv->col = ref_col;
1617 :
1618 : // Work out the start point for the search
1619 0 : in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col;
1620 0 : best_address = in_what;
1621 :
1622 : // Check the starting position
1623 0 : bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
1624 0 : mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
1625 :
1626 0 : i = 0;
1627 :
1628 0 : for (step = 0; step < tot_steps; step++) {
1629 0 : int all_in = 1, t;
1630 :
1631 : // All_in is true if every one of the points we are checking are within
1632 : // the bounds of the image.
1633 0 : all_in &= ((best_mv->row + ss_mv[i].row) > x->mv_limits.row_min);
1634 0 : all_in &= ((best_mv->row + ss_mv[i + 1].row) < x->mv_limits.row_max);
1635 0 : all_in &= ((best_mv->col + ss_mv[i + 2].col) > x->mv_limits.col_min);
1636 0 : all_in &= ((best_mv->col + ss_mv[i + 3].col) < x->mv_limits.col_max);
1637 :
1638 : // If all the pixels are within the bounds we don't check whether the
1639 : // search point is valid in this loop, otherwise we check each point
1640 : // for validity..
1641 0 : if (all_in) {
1642 : unsigned int sad_array[4];
1643 :
1644 0 : for (j = 0; j < cfg->searches_per_step; j += 4) {
1645 : unsigned char const *block_offset[4];
1646 :
1647 0 : for (t = 0; t < 4; t++) block_offset[t] = ss_os[i + t] + best_address;
1648 :
1649 0 : fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
1650 : sad_array);
1651 :
1652 0 : for (t = 0; t < 4; t++, i++) {
1653 0 : if (sad_array[t] < bestsad) {
1654 0 : const MV this_mv = { best_mv->row + ss_mv[i].row,
1655 0 : best_mv->col + ss_mv[i].col };
1656 0 : sad_array[t] +=
1657 0 : mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
1658 0 : if (sad_array[t] < bestsad) {
1659 0 : bestsad = sad_array[t];
1660 0 : best_site = i;
1661 : }
1662 : }
1663 : }
1664 : }
1665 : } else {
1666 0 : for (j = 0; j < cfg->searches_per_step; j++) {
1667 : // Trap illegal vectors
1668 0 : const MV this_mv = { best_mv->row + ss_mv[i].row,
1669 0 : best_mv->col + ss_mv[i].col };
1670 :
1671 0 : if (is_mv_in(&x->mv_limits, &this_mv)) {
1672 0 : const uint8_t *const check_here = ss_os[i] + best_address;
1673 0 : unsigned int thissad =
1674 0 : fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1675 :
1676 0 : if (thissad < bestsad) {
1677 0 : thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
1678 0 : if (thissad < bestsad) {
1679 0 : bestsad = thissad;
1680 0 : best_site = i;
1681 : }
1682 : }
1683 : }
1684 0 : i++;
1685 : }
1686 : }
1687 0 : if (best_site != last_site) {
1688 0 : best_mv->row += ss_mv[best_site].row;
1689 0 : best_mv->col += ss_mv[best_site].col;
1690 0 : best_address += ss_os[best_site];
1691 0 : last_site = best_site;
1692 : #if defined(NEW_DIAMOND_SEARCH)
1693 : while (1) {
1694 : const MV this_mv = { best_mv->row + ss_mv[best_site].row,
1695 : best_mv->col + ss_mv[best_site].col };
1696 : if (is_mv_in(&x->mv_limits, &this_mv)) {
1697 : const uint8_t *const check_here = ss_os[best_site] + best_address;
1698 : unsigned int thissad =
1699 : fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1700 : if (thissad < bestsad) {
1701 : thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
1702 : if (thissad < bestsad) {
1703 : bestsad = thissad;
1704 : best_mv->row += ss_mv[best_site].row;
1705 : best_mv->col += ss_mv[best_site].col;
1706 : best_address += ss_os[best_site];
1707 : continue;
1708 : }
1709 : }
1710 : }
1711 : break;
1712 : }
1713 : #endif
1714 0 : } else if (best_address == in_what) {
1715 0 : (*num00)++;
1716 : }
1717 : }
1718 0 : return bestsad;
1719 : }
1720 :
1721 0 : static int vector_match(int16_t *ref, int16_t *src, int bwl) {
1722 0 : int best_sad = INT_MAX;
1723 : int this_sad;
1724 : int d;
1725 0 : int center, offset = 0;
1726 0 : int bw = 4 << bwl; // redundant variable, to be changed in the experiments.
1727 0 : for (d = 0; d <= bw; d += 16) {
1728 0 : this_sad = vpx_vector_var(&ref[d], src, bwl);
1729 0 : if (this_sad < best_sad) {
1730 0 : best_sad = this_sad;
1731 0 : offset = d;
1732 : }
1733 : }
1734 0 : center = offset;
1735 :
1736 0 : for (d = -8; d <= 8; d += 16) {
1737 0 : int this_pos = offset + d;
1738 : // check limit
1739 0 : if (this_pos < 0 || this_pos > bw) continue;
1740 0 : this_sad = vpx_vector_var(&ref[this_pos], src, bwl);
1741 0 : if (this_sad < best_sad) {
1742 0 : best_sad = this_sad;
1743 0 : center = this_pos;
1744 : }
1745 : }
1746 0 : offset = center;
1747 :
1748 0 : for (d = -4; d <= 4; d += 8) {
1749 0 : int this_pos = offset + d;
1750 : // check limit
1751 0 : if (this_pos < 0 || this_pos > bw) continue;
1752 0 : this_sad = vpx_vector_var(&ref[this_pos], src, bwl);
1753 0 : if (this_sad < best_sad) {
1754 0 : best_sad = this_sad;
1755 0 : center = this_pos;
1756 : }
1757 : }
1758 0 : offset = center;
1759 :
1760 0 : for (d = -2; d <= 2; d += 4) {
1761 0 : int this_pos = offset + d;
1762 : // check limit
1763 0 : if (this_pos < 0 || this_pos > bw) continue;
1764 0 : this_sad = vpx_vector_var(&ref[this_pos], src, bwl);
1765 0 : if (this_sad < best_sad) {
1766 0 : best_sad = this_sad;
1767 0 : center = this_pos;
1768 : }
1769 : }
1770 0 : offset = center;
1771 :
1772 0 : for (d = -1; d <= 1; d += 2) {
1773 0 : int this_pos = offset + d;
1774 : // check limit
1775 0 : if (this_pos < 0 || this_pos > bw) continue;
1776 0 : this_sad = vpx_vector_var(&ref[this_pos], src, bwl);
1777 0 : if (this_sad < best_sad) {
1778 0 : best_sad = this_sad;
1779 0 : center = this_pos;
1780 : }
1781 : }
1782 :
1783 0 : return (center - (bw >> 1));
1784 : }
1785 :
1786 : static const MV search_pos[4] = {
1787 : { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 },
1788 : };
1789 :
1790 0 : unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
1791 : BLOCK_SIZE bsize, int mi_row,
1792 : int mi_col) {
1793 0 : MACROBLOCKD *xd = &x->e_mbd;
1794 0 : MODE_INFO *mi = xd->mi[0];
1795 0 : struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0 } };
1796 : DECLARE_ALIGNED(16, int16_t, hbuf[128]);
1797 : DECLARE_ALIGNED(16, int16_t, vbuf[128]);
1798 : DECLARE_ALIGNED(16, int16_t, src_hbuf[64]);
1799 : DECLARE_ALIGNED(16, int16_t, src_vbuf[64]);
1800 : int idx;
1801 0 : const int bw = 4 << b_width_log2_lookup[bsize];
1802 0 : const int bh = 4 << b_height_log2_lookup[bsize];
1803 0 : const int search_width = bw << 1;
1804 0 : const int search_height = bh << 1;
1805 0 : const int src_stride = x->plane[0].src.stride;
1806 0 : const int ref_stride = xd->plane[0].pre[0].stride;
1807 : uint8_t const *ref_buf, *src_buf;
1808 0 : MV *tmp_mv = &xd->mi[0]->mv[0].as_mv;
1809 : unsigned int best_sad, tmp_sad, this_sad[4];
1810 : MV this_mv;
1811 0 : const int norm_factor = 3 + (bw >> 5);
1812 0 : const YV12_BUFFER_CONFIG *scaled_ref_frame =
1813 0 : vp9_get_scaled_ref_frame(cpi, mi->ref_frame[0]);
1814 :
1815 0 : if (scaled_ref_frame) {
1816 : int i;
1817 : // Swap out the reference frame for a version that's been scaled to
1818 : // match the resolution of the current frame, allowing the existing
1819 : // motion search code to be used without additional modifications.
1820 0 : for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[i] = xd->plane[i].pre[0];
1821 0 : vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
1822 : }
1823 :
1824 : #if CONFIG_VP9_HIGHBITDEPTH
1825 : {
1826 : unsigned int this_sad;
1827 : tmp_mv->row = 0;
1828 : tmp_mv->col = 0;
1829 : this_sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, src_stride,
1830 : xd->plane[0].pre[0].buf, ref_stride);
1831 :
1832 : if (scaled_ref_frame) {
1833 : int i;
1834 : for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i];
1835 : }
1836 : return this_sad;
1837 : }
1838 : #endif
1839 :
1840 : // Set up prediction 1-D reference set
1841 0 : ref_buf = xd->plane[0].pre[0].buf - (bw >> 1);
1842 0 : for (idx = 0; idx < search_width; idx += 16) {
1843 0 : vpx_int_pro_row(&hbuf[idx], ref_buf, ref_stride, bh);
1844 0 : ref_buf += 16;
1845 : }
1846 :
1847 0 : ref_buf = xd->plane[0].pre[0].buf - (bh >> 1) * ref_stride;
1848 0 : for (idx = 0; idx < search_height; ++idx) {
1849 0 : vbuf[idx] = vpx_int_pro_col(ref_buf, bw) >> norm_factor;
1850 0 : ref_buf += ref_stride;
1851 : }
1852 :
1853 : // Set up src 1-D reference set
1854 0 : for (idx = 0; idx < bw; idx += 16) {
1855 0 : src_buf = x->plane[0].src.buf + idx;
1856 0 : vpx_int_pro_row(&src_hbuf[idx], src_buf, src_stride, bh);
1857 : }
1858 :
1859 0 : src_buf = x->plane[0].src.buf;
1860 0 : for (idx = 0; idx < bh; ++idx) {
1861 0 : src_vbuf[idx] = vpx_int_pro_col(src_buf, bw) >> norm_factor;
1862 0 : src_buf += src_stride;
1863 : }
1864 :
1865 : // Find the best match per 1-D search
1866 0 : tmp_mv->col = vector_match(hbuf, src_hbuf, b_width_log2_lookup[bsize]);
1867 0 : tmp_mv->row = vector_match(vbuf, src_vbuf, b_height_log2_lookup[bsize]);
1868 :
1869 0 : this_mv = *tmp_mv;
1870 0 : src_buf = x->plane[0].src.buf;
1871 0 : ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col;
1872 0 : best_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride);
1873 :
1874 : {
1875 0 : const uint8_t *const pos[4] = {
1876 0 : ref_buf - ref_stride, ref_buf - 1, ref_buf + 1, ref_buf + ref_stride,
1877 : };
1878 :
1879 0 : cpi->fn_ptr[bsize].sdx4df(src_buf, src_stride, pos, ref_stride, this_sad);
1880 : }
1881 :
1882 0 : for (idx = 0; idx < 4; ++idx) {
1883 0 : if (this_sad[idx] < best_sad) {
1884 0 : best_sad = this_sad[idx];
1885 0 : tmp_mv->row = search_pos[idx].row + this_mv.row;
1886 0 : tmp_mv->col = search_pos[idx].col + this_mv.col;
1887 : }
1888 : }
1889 :
1890 0 : if (this_sad[0] < this_sad[3])
1891 0 : this_mv.row -= 1;
1892 : else
1893 0 : this_mv.row += 1;
1894 :
1895 0 : if (this_sad[1] < this_sad[2])
1896 0 : this_mv.col -= 1;
1897 : else
1898 0 : this_mv.col += 1;
1899 :
1900 0 : ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col;
1901 :
1902 0 : tmp_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride);
1903 0 : if (best_sad > tmp_sad) {
1904 0 : *tmp_mv = this_mv;
1905 0 : best_sad = tmp_sad;
1906 : }
1907 :
1908 0 : tmp_mv->row *= 8;
1909 0 : tmp_mv->col *= 8;
1910 :
1911 0 : if (scaled_ref_frame) {
1912 : int i;
1913 0 : for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i];
1914 : }
1915 :
1916 0 : return best_sad;
1917 : }
1918 :
1919 : // Runs sequence of diamond searches in smaller steps for RD.
1920 : /* do_refine: If last step (1-away) of n-step search doesn't pick the center
1921 : point as the best match, we will do a final 1-away diamond
1922 : refining search */
1923 0 : static int full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x, MV *mvp_full,
1924 : int step_param, int sadpb, int further_steps,
1925 : int do_refine, int *cost_list,
1926 : const vp9_variance_fn_ptr_t *fn_ptr,
1927 : const MV *ref_mv, MV *dst_mv) {
1928 : MV temp_mv;
1929 0 : int thissme, n, num00 = 0;
1930 0 : int bestsme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
1931 : step_param, sadpb, &n, fn_ptr, ref_mv);
1932 0 : if (bestsme < INT_MAX)
1933 0 : bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
1934 0 : *dst_mv = temp_mv;
1935 :
1936 : // If there won't be more n-step search, check to see if refining search is
1937 : // needed.
1938 0 : if (n > further_steps) do_refine = 0;
1939 :
1940 0 : while (n < further_steps) {
1941 0 : ++n;
1942 :
1943 0 : if (num00) {
1944 0 : num00--;
1945 : } else {
1946 0 : thissme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
1947 : step_param + n, sadpb, &num00, fn_ptr,
1948 : ref_mv);
1949 0 : if (thissme < INT_MAX)
1950 0 : thissme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
1951 :
1952 : // check to see if refining search is needed.
1953 0 : if (num00 > further_steps - n) do_refine = 0;
1954 :
1955 0 : if (thissme < bestsme) {
1956 0 : bestsme = thissme;
1957 0 : *dst_mv = temp_mv;
1958 : }
1959 : }
1960 : }
1961 :
1962 : // final 1-away diamond refining search
1963 0 : if (do_refine) {
1964 0 : const int search_range = 8;
1965 0 : MV best_mv = *dst_mv;
1966 0 : thissme = vp9_refining_search_sad(x, &best_mv, sadpb, search_range, fn_ptr,
1967 : ref_mv);
1968 0 : if (thissme < INT_MAX)
1969 0 : thissme = vp9_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1);
1970 0 : if (thissme < bestsme) {
1971 0 : bestsme = thissme;
1972 0 : *dst_mv = best_mv;
1973 : }
1974 : }
1975 :
1976 : // Return cost list.
1977 0 : if (cost_list) {
1978 0 : calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list);
1979 : }
1980 0 : return bestsme;
1981 : }
1982 :
1983 : #define MIN_RANGE 7
1984 : #define MAX_RANGE 256
1985 : #define MIN_INTERVAL 1
1986 : // Runs an limited range exhaustive mesh search using a pattern set
1987 : // according to the encode speed profile.
1988 0 : static int full_pixel_exhaustive(VP9_COMP *cpi, MACROBLOCK *x,
1989 : MV *centre_mv_full, int sadpb, int *cost_list,
1990 : const vp9_variance_fn_ptr_t *fn_ptr,
1991 : const MV *ref_mv, MV *dst_mv) {
1992 0 : const SPEED_FEATURES *const sf = &cpi->sf;
1993 0 : MV temp_mv = { centre_mv_full->row, centre_mv_full->col };
1994 0 : MV f_ref_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
1995 : int bestsme;
1996 : int i;
1997 0 : int interval = sf->mesh_patterns[0].interval;
1998 0 : int range = sf->mesh_patterns[0].range;
1999 : int baseline_interval_divisor;
2000 :
2001 : // Keep track of number of exhaustive calls (this frame in this thread).
2002 0 : ++(*x->ex_search_count_ptr);
2003 :
2004 : // Trap illegal values for interval and range for this function.
2005 0 : if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) ||
2006 : (interval > range))
2007 0 : return INT_MAX;
2008 :
2009 0 : baseline_interval_divisor = range / interval;
2010 :
2011 : // Check size of proposed first range against magnitude of the centre
2012 : // value used as a starting point.
2013 0 : range = VPXMAX(range, (5 * VPXMAX(abs(temp_mv.row), abs(temp_mv.col))) / 4);
2014 0 : range = VPXMIN(range, MAX_RANGE);
2015 0 : interval = VPXMAX(interval, range / baseline_interval_divisor);
2016 :
2017 : // initial search
2018 0 : bestsme = exhuastive_mesh_search(x, &f_ref_mv, &temp_mv, range, interval,
2019 : sadpb, fn_ptr, &temp_mv);
2020 :
2021 0 : if ((interval > MIN_INTERVAL) && (range > MIN_RANGE)) {
2022 : // Progressive searches with range and step size decreasing each time
2023 : // till we reach a step size of 1. Then break out.
2024 0 : for (i = 1; i < MAX_MESH_STEP; ++i) {
2025 : // First pass with coarser step and longer range
2026 0 : bestsme = exhuastive_mesh_search(
2027 : x, &f_ref_mv, &temp_mv, sf->mesh_patterns[i].range,
2028 : sf->mesh_patterns[i].interval, sadpb, fn_ptr, &temp_mv);
2029 :
2030 0 : if (sf->mesh_patterns[i].interval == 1) break;
2031 : }
2032 : }
2033 :
2034 0 : if (bestsme < INT_MAX)
2035 0 : bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
2036 0 : *dst_mv = temp_mv;
2037 :
2038 : // Return cost list.
2039 0 : if (cost_list) {
2040 0 : calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list);
2041 : }
2042 0 : return bestsme;
2043 : }
2044 :
2045 0 : int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv,
2046 : int sad_per_bit, int distance,
2047 : const vp9_variance_fn_ptr_t *fn_ptr,
2048 : const MV *center_mv, MV *best_mv) {
2049 : int r, c;
2050 0 : const MACROBLOCKD *const xd = &x->e_mbd;
2051 0 : const struct buf_2d *const what = &x->plane[0].src;
2052 0 : const struct buf_2d *const in_what = &xd->plane[0].pre[0];
2053 0 : const int row_min = VPXMAX(ref_mv->row - distance, x->mv_limits.row_min);
2054 0 : const int row_max = VPXMIN(ref_mv->row + distance, x->mv_limits.row_max);
2055 0 : const int col_min = VPXMAX(ref_mv->col - distance, x->mv_limits.col_min);
2056 0 : const int col_max = VPXMIN(ref_mv->col + distance, x->mv_limits.col_max);
2057 0 : const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
2058 0 : int best_sad =
2059 0 : fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
2060 0 : in_what->stride) +
2061 0 : mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
2062 0 : *best_mv = *ref_mv;
2063 :
2064 0 : for (r = row_min; r < row_max; ++r) {
2065 0 : for (c = col_min; c < col_max; ++c) {
2066 0 : const MV mv = { r, c };
2067 0 : const int sad =
2068 0 : fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv),
2069 0 : in_what->stride) +
2070 0 : mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
2071 0 : if (sad < best_sad) {
2072 0 : best_sad = sad;
2073 0 : *best_mv = mv;
2074 : }
2075 : }
2076 : }
2077 0 : return best_sad;
2078 : }
2079 :
2080 0 : int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
2081 : int sad_per_bit, int distance,
2082 : const vp9_variance_fn_ptr_t *fn_ptr,
2083 : const MV *center_mv, MV *best_mv) {
2084 : int r;
2085 0 : const MACROBLOCKD *const xd = &x->e_mbd;
2086 0 : const struct buf_2d *const what = &x->plane[0].src;
2087 0 : const struct buf_2d *const in_what = &xd->plane[0].pre[0];
2088 0 : const int row_min = VPXMAX(ref_mv->row - distance, x->mv_limits.row_min);
2089 0 : const int row_max = VPXMIN(ref_mv->row + distance, x->mv_limits.row_max);
2090 0 : const int col_min = VPXMAX(ref_mv->col - distance, x->mv_limits.col_min);
2091 0 : const int col_max = VPXMIN(ref_mv->col + distance, x->mv_limits.col_max);
2092 0 : const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
2093 0 : unsigned int best_sad =
2094 0 : fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
2095 : in_what->stride) +
2096 0 : mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
2097 0 : *best_mv = *ref_mv;
2098 :
2099 0 : for (r = row_min; r < row_max; ++r) {
2100 0 : int c = col_min;
2101 0 : const uint8_t *check_here = &in_what->buf[r * in_what->stride + c];
2102 :
2103 0 : if (fn_ptr->sdx3f != NULL) {
2104 0 : while ((c + 2) < col_max) {
2105 : int i;
2106 : DECLARE_ALIGNED(16, uint32_t, sads[3]);
2107 :
2108 0 : fn_ptr->sdx3f(what->buf, what->stride, check_here, in_what->stride,
2109 : sads);
2110 :
2111 0 : for (i = 0; i < 3; ++i) {
2112 0 : unsigned int sad = sads[i];
2113 0 : if (sad < best_sad) {
2114 0 : const MV mv = { r, c };
2115 0 : sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
2116 0 : if (sad < best_sad) {
2117 0 : best_sad = sad;
2118 0 : *best_mv = mv;
2119 : }
2120 : }
2121 0 : ++check_here;
2122 0 : ++c;
2123 : }
2124 : }
2125 : }
2126 :
2127 0 : while (c < col_max) {
2128 0 : unsigned int sad =
2129 0 : fn_ptr->sdf(what->buf, what->stride, check_here, in_what->stride);
2130 0 : if (sad < best_sad) {
2131 0 : const MV mv = { r, c };
2132 0 : sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
2133 0 : if (sad < best_sad) {
2134 0 : best_sad = sad;
2135 0 : *best_mv = mv;
2136 : }
2137 : }
2138 0 : ++check_here;
2139 0 : ++c;
2140 : }
2141 : }
2142 :
2143 0 : return best_sad;
2144 : }
2145 :
2146 0 : int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
2147 : int sad_per_bit, int distance,
2148 : const vp9_variance_fn_ptr_t *fn_ptr,
2149 : const MV *center_mv, MV *best_mv) {
2150 : int r;
2151 0 : const MACROBLOCKD *const xd = &x->e_mbd;
2152 0 : const struct buf_2d *const what = &x->plane[0].src;
2153 0 : const struct buf_2d *const in_what = &xd->plane[0].pre[0];
2154 0 : const int row_min = VPXMAX(ref_mv->row - distance, x->mv_limits.row_min);
2155 0 : const int row_max = VPXMIN(ref_mv->row + distance, x->mv_limits.row_max);
2156 0 : const int col_min = VPXMAX(ref_mv->col - distance, x->mv_limits.col_min);
2157 0 : const int col_max = VPXMIN(ref_mv->col + distance, x->mv_limits.col_max);
2158 0 : const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
2159 0 : unsigned int best_sad =
2160 0 : fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
2161 : in_what->stride) +
2162 0 : mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
2163 0 : *best_mv = *ref_mv;
2164 :
2165 0 : for (r = row_min; r < row_max; ++r) {
2166 0 : int c = col_min;
2167 0 : const uint8_t *check_here = &in_what->buf[r * in_what->stride + c];
2168 :
2169 0 : if (fn_ptr->sdx8f != NULL) {
2170 0 : while ((c + 7) < col_max) {
2171 : int i;
2172 : DECLARE_ALIGNED(16, uint32_t, sads[8]);
2173 :
2174 0 : fn_ptr->sdx8f(what->buf, what->stride, check_here, in_what->stride,
2175 : sads);
2176 :
2177 0 : for (i = 0; i < 8; ++i) {
2178 0 : unsigned int sad = sads[i];
2179 0 : if (sad < best_sad) {
2180 0 : const MV mv = { r, c };
2181 0 : sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
2182 0 : if (sad < best_sad) {
2183 0 : best_sad = sad;
2184 0 : *best_mv = mv;
2185 : }
2186 : }
2187 0 : ++check_here;
2188 0 : ++c;
2189 : }
2190 : }
2191 : }
2192 :
2193 0 : if (fn_ptr->sdx3f != NULL) {
2194 0 : while ((c + 2) < col_max) {
2195 : int i;
2196 : DECLARE_ALIGNED(16, uint32_t, sads[3]);
2197 :
2198 0 : fn_ptr->sdx3f(what->buf, what->stride, check_here, in_what->stride,
2199 : sads);
2200 :
2201 0 : for (i = 0; i < 3; ++i) {
2202 0 : unsigned int sad = sads[i];
2203 0 : if (sad < best_sad) {
2204 0 : const MV mv = { r, c };
2205 0 : sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
2206 0 : if (sad < best_sad) {
2207 0 : best_sad = sad;
2208 0 : *best_mv = mv;
2209 : }
2210 : }
2211 0 : ++check_here;
2212 0 : ++c;
2213 : }
2214 : }
2215 : }
2216 :
2217 0 : while (c < col_max) {
2218 0 : unsigned int sad =
2219 0 : fn_ptr->sdf(what->buf, what->stride, check_here, in_what->stride);
2220 0 : if (sad < best_sad) {
2221 0 : const MV mv = { r, c };
2222 0 : sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
2223 0 : if (sad < best_sad) {
2224 0 : best_sad = sad;
2225 0 : *best_mv = mv;
2226 : }
2227 : }
2228 0 : ++check_here;
2229 0 : ++c;
2230 : }
2231 : }
2232 :
2233 0 : return best_sad;
2234 : }
2235 :
2236 0 : int vp9_refining_search_sad(const MACROBLOCK *x, MV *ref_mv, int error_per_bit,
2237 : int search_range,
2238 : const vp9_variance_fn_ptr_t *fn_ptr,
2239 : const MV *center_mv) {
2240 0 : const MACROBLOCKD *const xd = &x->e_mbd;
2241 0 : const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
2242 0 : const struct buf_2d *const what = &x->plane[0].src;
2243 0 : const struct buf_2d *const in_what = &xd->plane[0].pre[0];
2244 0 : const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
2245 0 : const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv);
2246 0 : unsigned int best_sad =
2247 0 : fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride) +
2248 0 : mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
2249 : int i, j;
2250 :
2251 0 : for (i = 0; i < search_range; i++) {
2252 0 : int best_site = -1;
2253 0 : const int all_in = ((ref_mv->row - 1) > x->mv_limits.row_min) &
2254 0 : ((ref_mv->row + 1) < x->mv_limits.row_max) &
2255 0 : ((ref_mv->col - 1) > x->mv_limits.col_min) &
2256 0 : ((ref_mv->col + 1) < x->mv_limits.col_max);
2257 :
2258 0 : if (all_in) {
2259 : unsigned int sads[4];
2260 0 : const uint8_t *const positions[4] = { best_address - in_what->stride,
2261 0 : best_address - 1, best_address + 1,
2262 0 : best_address + in_what->stride };
2263 :
2264 0 : fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads);
2265 :
2266 0 : for (j = 0; j < 4; ++j) {
2267 0 : if (sads[j] < best_sad) {
2268 0 : const MV mv = { ref_mv->row + neighbors[j].row,
2269 0 : ref_mv->col + neighbors[j].col };
2270 0 : sads[j] += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
2271 0 : if (sads[j] < best_sad) {
2272 0 : best_sad = sads[j];
2273 0 : best_site = j;
2274 : }
2275 : }
2276 : }
2277 : } else {
2278 0 : for (j = 0; j < 4; ++j) {
2279 0 : const MV mv = { ref_mv->row + neighbors[j].row,
2280 0 : ref_mv->col + neighbors[j].col };
2281 :
2282 0 : if (is_mv_in(&x->mv_limits, &mv)) {
2283 0 : unsigned int sad =
2284 0 : fn_ptr->sdf(what->buf, what->stride,
2285 : get_buf_from_mv(in_what, &mv), in_what->stride);
2286 0 : if (sad < best_sad) {
2287 0 : sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
2288 0 : if (sad < best_sad) {
2289 0 : best_sad = sad;
2290 0 : best_site = j;
2291 : }
2292 : }
2293 : }
2294 : }
2295 : }
2296 :
2297 0 : if (best_site == -1) {
2298 0 : break;
2299 : } else {
2300 0 : ref_mv->row += neighbors[best_site].row;
2301 0 : ref_mv->col += neighbors[best_site].col;
2302 0 : best_address = get_buf_from_mv(in_what, ref_mv);
2303 : }
2304 : }
2305 :
2306 0 : return best_sad;
2307 : }
2308 :
2309 : // This function is called when we do joint motion search in comp_inter_inter
2310 : // mode.
2311 0 : int vp9_refining_search_8p_c(const MACROBLOCK *x, MV *ref_mv, int error_per_bit,
2312 : int search_range,
2313 : const vp9_variance_fn_ptr_t *fn_ptr,
2314 : const MV *center_mv, const uint8_t *second_pred) {
2315 0 : const MV neighbors[8] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 },
2316 : { -1, -1 }, { 1, -1 }, { -1, 1 }, { 1, 1 } };
2317 0 : const MACROBLOCKD *const xd = &x->e_mbd;
2318 0 : const struct buf_2d *const what = &x->plane[0].src;
2319 0 : const struct buf_2d *const in_what = &xd->plane[0].pre[0];
2320 0 : const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
2321 0 : unsigned int best_sad = INT_MAX;
2322 : int i, j;
2323 0 : clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
2324 : x->mv_limits.row_min, x->mv_limits.row_max);
2325 0 : best_sad =
2326 0 : fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
2327 : in_what->stride, second_pred) +
2328 0 : mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
2329 :
2330 0 : for (i = 0; i < search_range; ++i) {
2331 0 : int best_site = -1;
2332 :
2333 0 : for (j = 0; j < 8; ++j) {
2334 0 : const MV mv = { ref_mv->row + neighbors[j].row,
2335 0 : ref_mv->col + neighbors[j].col };
2336 :
2337 0 : if (is_mv_in(&x->mv_limits, &mv)) {
2338 0 : unsigned int sad =
2339 0 : fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, &mv),
2340 : in_what->stride, second_pred);
2341 0 : if (sad < best_sad) {
2342 0 : sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
2343 0 : if (sad < best_sad) {
2344 0 : best_sad = sad;
2345 0 : best_site = j;
2346 : }
2347 : }
2348 : }
2349 : }
2350 :
2351 0 : if (best_site == -1) {
2352 0 : break;
2353 : } else {
2354 0 : ref_mv->row += neighbors[best_site].row;
2355 0 : ref_mv->col += neighbors[best_site].col;
2356 : }
2357 : }
2358 0 : return best_sad;
2359 : }
2360 :
2361 : #define MIN_EX_SEARCH_LIMIT 128
2362 0 : static int is_exhaustive_allowed(VP9_COMP *cpi, MACROBLOCK *x) {
2363 0 : const SPEED_FEATURES *const sf = &cpi->sf;
2364 0 : const int max_ex =
2365 0 : VPXMAX(MIN_EX_SEARCH_LIMIT,
2366 : (*x->m_search_count_ptr * sf->max_exaustive_pct) / 100);
2367 :
2368 0 : return sf->allow_exhaustive_searches &&
2369 0 : (sf->exhaustive_searches_thresh < INT_MAX) &&
2370 0 : (*x->ex_search_count_ptr <= max_ex) && !cpi->rc.is_src_frame_alt_ref;
2371 : }
2372 :
2373 0 : int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
2374 : MV *mvp_full, int step_param, int error_per_bit,
2375 : int *cost_list, const MV *ref_mv, MV *tmp_mv,
2376 : int var_max, int rd) {
2377 0 : const SPEED_FEATURES *const sf = &cpi->sf;
2378 0 : const SEARCH_METHODS method = sf->mv.search_method;
2379 0 : vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize];
2380 0 : int var = 0;
2381 0 : if (cost_list) {
2382 0 : cost_list[0] = INT_MAX;
2383 0 : cost_list[1] = INT_MAX;
2384 0 : cost_list[2] = INT_MAX;
2385 0 : cost_list[3] = INT_MAX;
2386 0 : cost_list[4] = INT_MAX;
2387 : }
2388 :
2389 : // Keep track of number of searches (this frame in this thread).
2390 0 : ++(*x->m_search_count_ptr);
2391 :
2392 0 : switch (method) {
2393 : case FAST_DIAMOND:
2394 0 : var = fast_dia_search(x, mvp_full, step_param, error_per_bit, 0,
2395 : cost_list, fn_ptr, 1, ref_mv, tmp_mv);
2396 0 : break;
2397 : case FAST_HEX:
2398 0 : var = fast_hex_search(x, mvp_full, step_param, error_per_bit, 0,
2399 : cost_list, fn_ptr, 1, ref_mv, tmp_mv);
2400 0 : break;
2401 : case HEX:
2402 0 : var = hex_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
2403 : fn_ptr, 1, ref_mv, tmp_mv);
2404 0 : break;
2405 : case SQUARE:
2406 0 : var = square_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
2407 : fn_ptr, 1, ref_mv, tmp_mv);
2408 0 : break;
2409 : case BIGDIA:
2410 0 : var = bigdia_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
2411 : fn_ptr, 1, ref_mv, tmp_mv);
2412 0 : break;
2413 : case NSTEP:
2414 0 : var = full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit,
2415 : MAX_MVSEARCH_STEPS - 1 - step_param, 1,
2416 : cost_list, fn_ptr, ref_mv, tmp_mv);
2417 :
2418 : // Should we allow a follow on exhaustive search?
2419 0 : if (is_exhaustive_allowed(cpi, x)) {
2420 0 : int64_t exhuastive_thr = sf->exhaustive_searches_thresh;
2421 0 : exhuastive_thr >>=
2422 0 : 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
2423 :
2424 : // Threshold variance for an exhaustive full search.
2425 0 : if (var > exhuastive_thr) {
2426 : int var_ex;
2427 : MV tmp_mv_ex;
2428 0 : var_ex = full_pixel_exhaustive(cpi, x, tmp_mv, error_per_bit,
2429 : cost_list, fn_ptr, ref_mv, &tmp_mv_ex);
2430 :
2431 0 : if (var_ex < var) {
2432 0 : var = var_ex;
2433 0 : *tmp_mv = tmp_mv_ex;
2434 : }
2435 : }
2436 : }
2437 0 : break;
2438 0 : default: assert(0 && "Invalid search method.");
2439 : }
2440 :
2441 0 : if (method != NSTEP && rd && var < var_max)
2442 0 : var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, fn_ptr, 1);
2443 :
2444 0 : return var;
2445 : }
|