Line data Source code
1 : /*
2 : * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 : *
4 : * Use of this source code is governed by a BSD-style license
5 : * that can be found in the LICENSE file in the root of the source
6 : * tree. An additional intellectual property rights grant can be found
7 : * in the file PATENTS. All contributing project authors may
8 : * be found in the AUTHORS file in the root of the source tree.
9 : */
10 :
11 : #include "./vp8_rtcd.h"
12 : #include "./vpx_dsp_rtcd.h"
13 : #include "onyx_int.h"
14 : #include "mcomp.h"
15 : #include "vpx_mem/vpx_mem.h"
16 : #include "vpx_config.h"
17 : #include <stdio.h>
18 : #include <limits.h>
19 : #include <math.h>
20 : #include "vp8/common/findnearmv.h"
21 : #include "vp8/common/common.h"
22 : #include "vpx_dsp/vpx_dsp_common.h"
23 :
24 : #ifdef VP8_ENTROPY_STATS
25 : static int mv_ref_ct[31][4][2];
26 : static int mv_mode_cts[4][2];
27 : #endif
28 :
29 0 : int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight) {
30 : /* MV costing is based on the distribution of vectors in the previous
31 : * frame and as such will tend to over state the cost of vectors. In
32 : * addition coding a new vector can have a knock on effect on the cost
33 : * of subsequent vectors and the quality of prediction from NEAR and
34 : * NEAREST for subsequent blocks. The "Weight" parameter allows, to a
35 : * limited extent, for some account to be taken of these factors.
36 : */
37 0 : return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] +
38 0 : mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) *
39 0 : Weight) >>
40 : 7;
41 : }
42 :
43 0 : static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2],
44 : int error_per_bit) {
45 : /* Ignore mv costing if mvcost is NULL */
46 0 : if (mvcost) {
47 0 : return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] +
48 0 : mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) *
49 0 : error_per_bit +
50 0 : 128) >>
51 : 8;
52 : }
53 0 : return 0;
54 : }
55 :
56 0 : static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2],
57 : int error_per_bit) {
58 : /* Calculate sad error cost on full pixel basis. */
59 : /* Ignore mv costing if mvsadcost is NULL */
60 0 : if (mvsadcost) {
61 0 : return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] +
62 0 : mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)]) *
63 0 : error_per_bit +
64 0 : 128) >>
65 : 8;
66 : }
67 0 : return 0;
68 : }
69 :
70 0 : void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride) {
71 : int Len;
72 0 : int search_site_count = 0;
73 :
74 : /* Generate offsets for 4 search sites per step. */
75 0 : Len = MAX_FIRST_STEP;
76 0 : x->ss[search_site_count].mv.col = 0;
77 0 : x->ss[search_site_count].mv.row = 0;
78 0 : x->ss[search_site_count].offset = 0;
79 0 : search_site_count++;
80 :
81 0 : while (Len > 0) {
82 : /* Compute offsets for search sites. */
83 0 : x->ss[search_site_count].mv.col = 0;
84 0 : x->ss[search_site_count].mv.row = -Len;
85 0 : x->ss[search_site_count].offset = -Len * stride;
86 0 : search_site_count++;
87 :
88 : /* Compute offsets for search sites. */
89 0 : x->ss[search_site_count].mv.col = 0;
90 0 : x->ss[search_site_count].mv.row = Len;
91 0 : x->ss[search_site_count].offset = Len * stride;
92 0 : search_site_count++;
93 :
94 : /* Compute offsets for search sites. */
95 0 : x->ss[search_site_count].mv.col = -Len;
96 0 : x->ss[search_site_count].mv.row = 0;
97 0 : x->ss[search_site_count].offset = -Len;
98 0 : search_site_count++;
99 :
100 : /* Compute offsets for search sites. */
101 0 : x->ss[search_site_count].mv.col = Len;
102 0 : x->ss[search_site_count].mv.row = 0;
103 0 : x->ss[search_site_count].offset = Len;
104 0 : search_site_count++;
105 :
106 : /* Contract. */
107 0 : Len /= 2;
108 : }
109 :
110 0 : x->ss_count = search_site_count;
111 0 : x->searches_per_step = 4;
112 0 : }
113 :
114 0 : void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) {
115 : int Len;
116 0 : int search_site_count = 0;
117 :
118 : /* Generate offsets for 8 search sites per step. */
119 0 : Len = MAX_FIRST_STEP;
120 0 : x->ss[search_site_count].mv.col = 0;
121 0 : x->ss[search_site_count].mv.row = 0;
122 0 : x->ss[search_site_count].offset = 0;
123 0 : search_site_count++;
124 :
125 0 : while (Len > 0) {
126 : /* Compute offsets for search sites. */
127 0 : x->ss[search_site_count].mv.col = 0;
128 0 : x->ss[search_site_count].mv.row = -Len;
129 0 : x->ss[search_site_count].offset = -Len * stride;
130 0 : search_site_count++;
131 :
132 : /* Compute offsets for search sites. */
133 0 : x->ss[search_site_count].mv.col = 0;
134 0 : x->ss[search_site_count].mv.row = Len;
135 0 : x->ss[search_site_count].offset = Len * stride;
136 0 : search_site_count++;
137 :
138 : /* Compute offsets for search sites. */
139 0 : x->ss[search_site_count].mv.col = -Len;
140 0 : x->ss[search_site_count].mv.row = 0;
141 0 : x->ss[search_site_count].offset = -Len;
142 0 : search_site_count++;
143 :
144 : /* Compute offsets for search sites. */
145 0 : x->ss[search_site_count].mv.col = Len;
146 0 : x->ss[search_site_count].mv.row = 0;
147 0 : x->ss[search_site_count].offset = Len;
148 0 : search_site_count++;
149 :
150 : /* Compute offsets for search sites. */
151 0 : x->ss[search_site_count].mv.col = -Len;
152 0 : x->ss[search_site_count].mv.row = -Len;
153 0 : x->ss[search_site_count].offset = -Len * stride - Len;
154 0 : search_site_count++;
155 :
156 : /* Compute offsets for search sites. */
157 0 : x->ss[search_site_count].mv.col = Len;
158 0 : x->ss[search_site_count].mv.row = -Len;
159 0 : x->ss[search_site_count].offset = -Len * stride + Len;
160 0 : search_site_count++;
161 :
162 : /* Compute offsets for search sites. */
163 0 : x->ss[search_site_count].mv.col = -Len;
164 0 : x->ss[search_site_count].mv.row = Len;
165 0 : x->ss[search_site_count].offset = Len * stride - Len;
166 0 : search_site_count++;
167 :
168 : /* Compute offsets for search sites. */
169 0 : x->ss[search_site_count].mv.col = Len;
170 0 : x->ss[search_site_count].mv.row = Len;
171 0 : x->ss[search_site_count].offset = Len * stride + Len;
172 0 : search_site_count++;
173 :
174 : /* Contract. */
175 0 : Len /= 2;
176 : }
177 :
178 0 : x->ss_count = search_site_count;
179 0 : x->searches_per_step = 8;
180 0 : }
181 :
182 : /*
183 : * To avoid the penalty for crossing cache-line read, preload the reference
184 : * area in a small buffer, which is aligned to make sure there won't be crossing
185 : * cache-line read while reading from this buffer. This reduced the cpu
186 : * cycles spent on reading ref data in sub-pixel filter functions.
187 : * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
188 : * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
189 : * could reduce the area.
190 : */
191 :
192 : /* estimated cost of a motion vector (r,c) */
193 : #define MVC(r, c) \
194 : (mvcost \
195 : ? ((mvcost[0][(r)-rr] + mvcost[1][(c)-rc]) * error_per_bit + 128) >> 8 \
196 : : 0)
197 : /* pointer to predictor base of a motionvector */
198 : #define PRE(r, c) (y + (((r) >> 2) * y_stride + ((c) >> 2) - (offset)))
199 : /* convert motion vector component to offset for svf calc */
200 : #define SP(x) (((x)&3) << 1)
201 : /* returns subpixel variance error function. */
202 : #define DIST(r, c) \
203 : vfp->svf(PRE(r, c), y_stride, SP(c), SP(r), z, b->src_stride, &sse)
204 : #define IFMVCV(r, c, s, e) \
205 : if (c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
206 : /* returns distortion + motion vector cost */
207 : #define ERR(r, c) (MVC(r, c) + DIST(r, c))
208 : /* checks if (r,c) has better score than previous best */
209 : #define CHECK_BETTER(v, r, c) \
210 : IFMVCV(r, c, \
211 : { \
212 : thismse = DIST(r, c); \
213 : if ((v = (MVC(r, c) + thismse)) < besterr) { \
214 : besterr = v; \
215 : br = r; \
216 : bc = c; \
217 : *distortion = thismse; \
218 : *sse1 = sse; \
219 : } \
220 : }, \
221 : v = UINT_MAX;)
222 :
223 0 : int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
224 : int_mv *bestmv, int_mv *ref_mv,
225 : int error_per_bit,
226 : const vp8_variance_fn_ptr_t *vfp,
227 : int *mvcost[2], int *distortion,
228 : unsigned int *sse1) {
229 0 : unsigned char *z = (*(b->base_src) + b->src);
230 :
231 0 : int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1;
232 0 : int br = bestmv->as_mv.row * 4, bc = bestmv->as_mv.col * 4;
233 0 : int tr = br, tc = bc;
234 : unsigned int besterr;
235 : unsigned int left, right, up, down, diag;
236 : unsigned int sse;
237 : unsigned int whichdir;
238 0 : unsigned int halfiters = 4;
239 0 : unsigned int quarteriters = 4;
240 : int thismse;
241 :
242 0 : int minc = VPXMAX(x->mv_col_min * 4,
243 : (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
244 0 : int maxc = VPXMIN(x->mv_col_max * 4,
245 : (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
246 0 : int minr = VPXMAX(x->mv_row_min * 4,
247 : (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
248 0 : int maxr = VPXMIN(x->mv_row_max * 4,
249 : (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
250 :
251 : int y_stride;
252 : int offset;
253 0 : int pre_stride = x->e_mbd.pre.y_stride;
254 0 : unsigned char *base_pre = x->e_mbd.pre.y_buffer;
255 :
256 : #if ARCH_X86 || ARCH_X86_64
257 0 : MACROBLOCKD *xd = &x->e_mbd;
258 0 : unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
259 0 : bestmv->as_mv.col;
260 : unsigned char *y;
261 : int buf_r1, buf_r2, buf_c1;
262 :
263 : /* Clamping to avoid out-of-range data access */
264 0 : buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min)
265 0 : ? (bestmv->as_mv.row - x->mv_row_min)
266 0 : : 3;
267 0 : buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max)
268 0 : ? (x->mv_row_max - bestmv->as_mv.row)
269 0 : : 3;
270 0 : buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min)
271 0 : ? (bestmv->as_mv.col - x->mv_col_min)
272 0 : : 3;
273 0 : y_stride = 32;
274 :
275 : /* Copy to intermediate buffer before searching. */
276 0 : vfp->copymem(y_0 - buf_c1 - pre_stride * buf_r1, pre_stride, xd->y_buf,
277 0 : y_stride, 16 + buf_r1 + buf_r2);
278 0 : y = xd->y_buf + y_stride * buf_r1 + buf_c1;
279 : #else
280 : unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
281 : bestmv->as_mv.col;
282 : y_stride = pre_stride;
283 : #endif
284 :
285 0 : offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
286 :
287 : /* central mv */
288 0 : bestmv->as_mv.row *= 8;
289 0 : bestmv->as_mv.col *= 8;
290 :
291 : /* calculate central point error */
292 0 : besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
293 0 : *distortion = besterr;
294 0 : besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
295 :
296 : /* TODO: Each subsequent iteration checks at least one point in common
297 : * with the last iteration could be 2 ( if diag selected)
298 : */
299 0 : while (--halfiters) {
300 : /* 1/2 pel */
301 0 : CHECK_BETTER(left, tr, tc - 2);
302 0 : CHECK_BETTER(right, tr, tc + 2);
303 0 : CHECK_BETTER(up, tr - 2, tc);
304 0 : CHECK_BETTER(down, tr + 2, tc);
305 :
306 0 : whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
307 :
308 0 : switch (whichdir) {
309 0 : case 0: CHECK_BETTER(diag, tr - 2, tc - 2); break;
310 0 : case 1: CHECK_BETTER(diag, tr - 2, tc + 2); break;
311 0 : case 2: CHECK_BETTER(diag, tr + 2, tc - 2); break;
312 0 : case 3: CHECK_BETTER(diag, tr + 2, tc + 2); break;
313 : }
314 :
315 : /* no reason to check the same one again. */
316 0 : if (tr == br && tc == bc) break;
317 :
318 0 : tr = br;
319 0 : tc = bc;
320 : }
321 :
322 : /* TODO: Each subsequent iteration checks at least one point in common
323 : * with the last iteration could be 2 ( if diag selected)
324 : */
325 :
326 : /* 1/4 pel */
327 0 : while (--quarteriters) {
328 0 : CHECK_BETTER(left, tr, tc - 1);
329 0 : CHECK_BETTER(right, tr, tc + 1);
330 0 : CHECK_BETTER(up, tr - 1, tc);
331 0 : CHECK_BETTER(down, tr + 1, tc);
332 :
333 0 : whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
334 :
335 0 : switch (whichdir) {
336 0 : case 0: CHECK_BETTER(diag, tr - 1, tc - 1); break;
337 0 : case 1: CHECK_BETTER(diag, tr - 1, tc + 1); break;
338 0 : case 2: CHECK_BETTER(diag, tr + 1, tc - 1); break;
339 0 : case 3: CHECK_BETTER(diag, tr + 1, tc + 1); break;
340 : }
341 :
342 : /* no reason to check the same one again. */
343 0 : if (tr == br && tc == bc) break;
344 :
345 0 : tr = br;
346 0 : tc = bc;
347 : }
348 :
349 0 : bestmv->as_mv.row = br * 2;
350 0 : bestmv->as_mv.col = bc * 2;
351 :
352 0 : if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
353 0 : (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3))) {
354 0 : return INT_MAX;
355 : }
356 :
357 0 : return besterr;
358 : }
359 : #undef MVC
360 : #undef PRE
361 : #undef SP
362 : #undef DIST
363 : #undef IFMVCV
364 : #undef ERR
365 : #undef CHECK_BETTER
366 :
367 0 : int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
368 : int_mv *bestmv, int_mv *ref_mv,
369 : int error_per_bit,
370 : const vp8_variance_fn_ptr_t *vfp,
371 : int *mvcost[2], int *distortion,
372 : unsigned int *sse1) {
373 0 : int bestmse = INT_MAX;
374 : int_mv startmv;
375 : int_mv this_mv;
376 0 : unsigned char *z = (*(b->base_src) + b->src);
377 : int left, right, up, down, diag;
378 : unsigned int sse;
379 : int whichdir;
380 : int thismse;
381 : int y_stride;
382 0 : int pre_stride = x->e_mbd.pre.y_stride;
383 0 : unsigned char *base_pre = x->e_mbd.pre.y_buffer;
384 :
385 : #if ARCH_X86 || ARCH_X86_64
386 0 : MACROBLOCKD *xd = &x->e_mbd;
387 0 : unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
388 0 : bestmv->as_mv.col;
389 : unsigned char *y;
390 :
391 0 : y_stride = 32;
392 : /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
393 0 : vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
394 0 : y = xd->y_buf + y_stride + 1;
395 : #else
396 : unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
397 : bestmv->as_mv.col;
398 : y_stride = pre_stride;
399 : #endif
400 :
401 : /* central mv */
402 0 : bestmv->as_mv.row *= 8;
403 0 : bestmv->as_mv.col *= 8;
404 0 : startmv = *bestmv;
405 :
406 : /* calculate central point error */
407 0 : bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
408 0 : *distortion = bestmse;
409 0 : bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
410 :
411 : /* go left then right and check error */
412 0 : this_mv.as_mv.row = startmv.as_mv.row;
413 0 : this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
414 : /* "halfpix" horizontal variance */
415 0 : thismse = vfp->svf(y - 1, y_stride, 4, 0, z, b->src_stride, &sse);
416 0 : left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
417 :
418 0 : if (left < bestmse) {
419 0 : *bestmv = this_mv;
420 0 : bestmse = left;
421 0 : *distortion = thismse;
422 0 : *sse1 = sse;
423 : }
424 :
425 0 : this_mv.as_mv.col += 8;
426 : /* "halfpix" horizontal variance */
427 0 : thismse = vfp->svf(y, y_stride, 4, 0, z, b->src_stride, &sse);
428 0 : right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
429 :
430 0 : if (right < bestmse) {
431 0 : *bestmv = this_mv;
432 0 : bestmse = right;
433 0 : *distortion = thismse;
434 0 : *sse1 = sse;
435 : }
436 :
437 : /* go up then down and check error */
438 0 : this_mv.as_mv.col = startmv.as_mv.col;
439 0 : this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
440 : /* "halfpix" vertical variance */
441 0 : thismse = vfp->svf(y - y_stride, y_stride, 0, 4, z, b->src_stride, &sse);
442 0 : up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
443 :
444 0 : if (up < bestmse) {
445 0 : *bestmv = this_mv;
446 0 : bestmse = up;
447 0 : *distortion = thismse;
448 0 : *sse1 = sse;
449 : }
450 :
451 0 : this_mv.as_mv.row += 8;
452 : /* "halfpix" vertical variance */
453 0 : thismse = vfp->svf(y, y_stride, 0, 4, z, b->src_stride, &sse);
454 0 : down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
455 :
456 0 : if (down < bestmse) {
457 0 : *bestmv = this_mv;
458 0 : bestmse = down;
459 0 : *distortion = thismse;
460 0 : *sse1 = sse;
461 : }
462 :
463 : /* now check 1 more diagonal */
464 0 : whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
465 0 : this_mv = startmv;
466 :
467 0 : switch (whichdir) {
468 : case 0:
469 0 : this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
470 0 : this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
471 : /* "halfpix" horizontal/vertical variance */
472 0 : thismse =
473 0 : vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
474 0 : break;
475 : case 1:
476 0 : this_mv.as_mv.col += 4;
477 0 : this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
478 : /* "halfpix" horizontal/vertical variance */
479 0 : thismse = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
480 0 : break;
481 : case 2:
482 0 : this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
483 0 : this_mv.as_mv.row += 4;
484 : /* "halfpix" horizontal/vertical variance */
485 0 : thismse = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse);
486 0 : break;
487 : case 3:
488 : default:
489 0 : this_mv.as_mv.col += 4;
490 0 : this_mv.as_mv.row += 4;
491 : /* "halfpix" horizontal/vertical variance */
492 0 : thismse = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse);
493 0 : break;
494 : }
495 :
496 0 : diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
497 :
498 0 : if (diag < bestmse) {
499 0 : *bestmv = this_mv;
500 0 : bestmse = diag;
501 0 : *distortion = thismse;
502 0 : *sse1 = sse;
503 : }
504 :
505 : /* time to check quarter pels. */
506 0 : if (bestmv->as_mv.row < startmv.as_mv.row) y -= y_stride;
507 :
508 0 : if (bestmv->as_mv.col < startmv.as_mv.col) y--;
509 :
510 0 : startmv = *bestmv;
511 :
512 : /* go left then right and check error */
513 0 : this_mv.as_mv.row = startmv.as_mv.row;
514 :
515 0 : if (startmv.as_mv.col & 7) {
516 0 : this_mv.as_mv.col = startmv.as_mv.col - 2;
517 0 : thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
518 0 : this_mv.as_mv.row & 7, z, b->src_stride, &sse);
519 : } else {
520 0 : this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
521 0 : thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
522 : b->src_stride, &sse);
523 : }
524 :
525 0 : left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
526 :
527 0 : if (left < bestmse) {
528 0 : *bestmv = this_mv;
529 0 : bestmse = left;
530 0 : *distortion = thismse;
531 0 : *sse1 = sse;
532 : }
533 :
534 0 : this_mv.as_mv.col += 4;
535 0 : thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7,
536 : z, b->src_stride, &sse);
537 0 : right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
538 :
539 0 : if (right < bestmse) {
540 0 : *bestmv = this_mv;
541 0 : bestmse = right;
542 0 : *distortion = thismse;
543 0 : *sse1 = sse;
544 : }
545 :
546 : /* go up then down and check error */
547 0 : this_mv.as_mv.col = startmv.as_mv.col;
548 :
549 0 : if (startmv.as_mv.row & 7) {
550 0 : this_mv.as_mv.row = startmv.as_mv.row - 2;
551 0 : thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
552 0 : this_mv.as_mv.row & 7, z, b->src_stride, &sse);
553 : } else {
554 0 : this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
555 0 : thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z,
556 : b->src_stride, &sse);
557 : }
558 :
559 0 : up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
560 :
561 0 : if (up < bestmse) {
562 0 : *bestmv = this_mv;
563 0 : bestmse = up;
564 0 : *distortion = thismse;
565 0 : *sse1 = sse;
566 : }
567 :
568 0 : this_mv.as_mv.row += 4;
569 0 : thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7,
570 : z, b->src_stride, &sse);
571 0 : down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
572 :
573 0 : if (down < bestmse) {
574 0 : *bestmv = this_mv;
575 0 : bestmse = down;
576 0 : *distortion = thismse;
577 0 : *sse1 = sse;
578 : }
579 :
580 : /* now check 1 more diagonal */
581 0 : whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
582 :
583 0 : this_mv = startmv;
584 :
585 0 : switch (whichdir) {
586 : case 0:
587 :
588 0 : if (startmv.as_mv.row & 7) {
589 0 : this_mv.as_mv.row -= 2;
590 :
591 0 : if (startmv.as_mv.col & 7) {
592 0 : this_mv.as_mv.col -= 2;
593 0 : thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
594 0 : this_mv.as_mv.row & 7, z, b->src_stride, &sse);
595 : } else {
596 0 : this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
597 0 : thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
598 : b->src_stride, &sse);
599 : }
600 : } else {
601 0 : this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
602 :
603 0 : if (startmv.as_mv.col & 7) {
604 0 : this_mv.as_mv.col -= 2;
605 0 : thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6,
606 : z, b->src_stride, &sse);
607 : } else {
608 0 : this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
609 0 : thismse = vfp->svf(y - y_stride - 1, y_stride, 6, 6, z, b->src_stride,
610 : &sse);
611 : }
612 : }
613 :
614 0 : break;
615 : case 1:
616 0 : this_mv.as_mv.col += 2;
617 :
618 0 : if (startmv.as_mv.row & 7) {
619 0 : this_mv.as_mv.row -= 2;
620 0 : thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
621 0 : this_mv.as_mv.row & 7, z, b->src_stride, &sse);
622 : } else {
623 0 : this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
624 0 : thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z,
625 : b->src_stride, &sse);
626 : }
627 :
628 0 : break;
629 : case 2:
630 0 : this_mv.as_mv.row += 2;
631 :
632 0 : if (startmv.as_mv.col & 7) {
633 0 : this_mv.as_mv.col -= 2;
634 0 : thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
635 0 : this_mv.as_mv.row & 7, z, b->src_stride, &sse);
636 : } else {
637 0 : this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
638 0 : thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
639 : b->src_stride, &sse);
640 : }
641 :
642 0 : break;
643 : case 3:
644 0 : this_mv.as_mv.col += 2;
645 0 : this_mv.as_mv.row += 2;
646 0 : thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
647 0 : this_mv.as_mv.row & 7, z, b->src_stride, &sse);
648 0 : break;
649 : }
650 :
651 0 : diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
652 :
653 0 : if (diag < bestmse) {
654 0 : *bestmv = this_mv;
655 0 : bestmse = diag;
656 0 : *distortion = thismse;
657 0 : *sse1 = sse;
658 : }
659 :
660 0 : return bestmse;
661 : }
662 :
663 0 : int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
664 : int_mv *bestmv, int_mv *ref_mv,
665 : int error_per_bit,
666 : const vp8_variance_fn_ptr_t *vfp,
667 : int *mvcost[2], int *distortion,
668 : unsigned int *sse1) {
669 0 : int bestmse = INT_MAX;
670 : int_mv startmv;
671 : int_mv this_mv;
672 0 : unsigned char *z = (*(b->base_src) + b->src);
673 : int left, right, up, down, diag;
674 : unsigned int sse;
675 : int whichdir;
676 : int thismse;
677 : int y_stride;
678 0 : int pre_stride = x->e_mbd.pre.y_stride;
679 0 : unsigned char *base_pre = x->e_mbd.pre.y_buffer;
680 :
681 : #if ARCH_X86 || ARCH_X86_64
682 0 : MACROBLOCKD *xd = &x->e_mbd;
683 0 : unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
684 0 : bestmv->as_mv.col;
685 : unsigned char *y;
686 :
687 0 : y_stride = 32;
688 : /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
689 0 : vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
690 0 : y = xd->y_buf + y_stride + 1;
691 : #else
692 : unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
693 : bestmv->as_mv.col;
694 : y_stride = pre_stride;
695 : #endif
696 :
697 : /* central mv */
698 0 : bestmv->as_mv.row *= 8;
699 0 : bestmv->as_mv.col *= 8;
700 0 : startmv = *bestmv;
701 :
702 : /* calculate central point error */
703 0 : bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
704 0 : *distortion = bestmse;
705 0 : bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
706 :
707 : /* go left then right and check error */
708 0 : this_mv.as_mv.row = startmv.as_mv.row;
709 0 : this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
710 : /* "halfpix" horizontal variance */
711 0 : thismse = vfp->svf(y - 1, y_stride, 4, 0, z, b->src_stride, &sse);
712 0 : left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
713 :
714 0 : if (left < bestmse) {
715 0 : *bestmv = this_mv;
716 0 : bestmse = left;
717 0 : *distortion = thismse;
718 0 : *sse1 = sse;
719 : }
720 :
721 0 : this_mv.as_mv.col += 8;
722 : /* "halfpix" horizontal variance */
723 0 : thismse = vfp->svf(y, y_stride, 4, 0, z, b->src_stride, &sse);
724 0 : right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
725 :
726 0 : if (right < bestmse) {
727 0 : *bestmv = this_mv;
728 0 : bestmse = right;
729 0 : *distortion = thismse;
730 0 : *sse1 = sse;
731 : }
732 :
733 : /* go up then down and check error */
734 0 : this_mv.as_mv.col = startmv.as_mv.col;
735 0 : this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
736 : /* "halfpix" vertical variance */
737 0 : thismse = vfp->svf(y - y_stride, y_stride, 0, 4, z, b->src_stride, &sse);
738 0 : up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
739 :
740 0 : if (up < bestmse) {
741 0 : *bestmv = this_mv;
742 0 : bestmse = up;
743 0 : *distortion = thismse;
744 0 : *sse1 = sse;
745 : }
746 :
747 0 : this_mv.as_mv.row += 8;
748 : /* "halfpix" vertical variance */
749 0 : thismse = vfp->svf(y, y_stride, 0, 4, z, b->src_stride, &sse);
750 0 : down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
751 :
752 0 : if (down < bestmse) {
753 0 : *bestmv = this_mv;
754 0 : bestmse = down;
755 0 : *distortion = thismse;
756 0 : *sse1 = sse;
757 : }
758 :
759 : /* now check 1 more diagonal - */
760 0 : whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
761 0 : this_mv = startmv;
762 :
763 0 : switch (whichdir) {
764 : case 0:
765 0 : this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
766 0 : this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
767 : /* "halfpix" horizontal/vertical variance */
768 0 : thismse =
769 0 : vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
770 0 : break;
771 : case 1:
772 0 : this_mv.as_mv.col += 4;
773 0 : this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
774 : /* "halfpix" horizontal/vertical variance */
775 0 : thismse = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
776 0 : break;
777 : case 2:
778 0 : this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
779 0 : this_mv.as_mv.row += 4;
780 : /* "halfpix" horizontal/vertical variance */
781 0 : thismse = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse);
782 0 : break;
783 : case 3:
784 : default:
785 0 : this_mv.as_mv.col += 4;
786 0 : this_mv.as_mv.row += 4;
787 : /* "halfpix" horizontal/vertical variance */
788 0 : thismse = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse);
789 0 : break;
790 : }
791 :
792 0 : diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
793 :
794 0 : if (diag < bestmse) {
795 0 : *bestmv = this_mv;
796 0 : bestmse = diag;
797 0 : *distortion = thismse;
798 0 : *sse1 = sse;
799 : }
800 :
801 0 : return bestmse;
802 : }
803 :
804 : #define CHECK_BOUNDS(range) \
805 : { \
806 : all_in = 1; \
807 : all_in &= ((br - range) >= x->mv_row_min); \
808 : all_in &= ((br + range) <= x->mv_row_max); \
809 : all_in &= ((bc - range) >= x->mv_col_min); \
810 : all_in &= ((bc + range) <= x->mv_col_max); \
811 : }
812 :
813 : #define CHECK_POINT \
814 : { \
815 : if (this_mv.as_mv.col < x->mv_col_min) continue; \
816 : if (this_mv.as_mv.col > x->mv_col_max) continue; \
817 : if (this_mv.as_mv.row < x->mv_row_min) continue; \
818 : if (this_mv.as_mv.row > x->mv_row_max) continue; \
819 : }
820 :
821 : #define CHECK_BETTER \
822 : { \
823 : if (thissad < bestsad) { \
824 : thissad += \
825 : mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); \
826 : if (thissad < bestsad) { \
827 : bestsad = thissad; \
828 : best_site = i; \
829 : } \
830 : } \
831 : }
832 :
833 : static const MV next_chkpts[6][3] = {
834 : { { -2, 0 }, { -1, -2 }, { 1, -2 } }, { { -1, -2 }, { 1, -2 }, { 2, 0 } },
835 : { { 1, -2 }, { 2, 0 }, { 1, 2 } }, { { 2, 0 }, { 1, 2 }, { -1, 2 } },
836 : { { 1, 2 }, { -1, 2 }, { -2, 0 } }, { { -1, 2 }, { -2, 0 }, { -1, -2 } }
837 : };
838 :
839 0 : int vp8_hex_search(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
840 : int_mv *best_mv, int search_param, int sad_per_bit,
841 : const vp8_variance_fn_ptr_t *vfp, int *mvsadcost[2],
842 : int *mvcost[2], int_mv *center_mv) {
843 0 : MV hex[6] = {
844 : { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 }
845 : };
846 0 : MV neighbors[4] = { { 0, -1 }, { -1, 0 }, { 1, 0 }, { 0, 1 } };
847 : int i, j;
848 :
849 0 : unsigned char *what = (*(b->base_src) + b->src);
850 0 : int what_stride = b->src_stride;
851 0 : int pre_stride = x->e_mbd.pre.y_stride;
852 0 : unsigned char *base_pre = x->e_mbd.pre.y_buffer;
853 :
854 0 : int in_what_stride = pre_stride;
855 : int br, bc;
856 : int_mv this_mv;
857 : unsigned int bestsad;
858 : unsigned int thissad;
859 : unsigned char *base_offset;
860 : unsigned char *this_offset;
861 0 : int k = -1;
862 : int all_in;
863 0 : int best_site = -1;
864 0 : int hex_range = 127;
865 0 : int dia_range = 8;
866 :
867 : int_mv fcenter_mv;
868 0 : fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
869 0 : fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
870 :
871 : (void)mvcost;
872 :
873 : /* adjust ref_mv to make sure it is within MV range */
874 0 : vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
875 : x->mv_row_max);
876 0 : br = ref_mv->as_mv.row;
877 0 : bc = ref_mv->as_mv.col;
878 :
879 : /* Work out the start point for the search */
880 0 : base_offset = (unsigned char *)(base_pre + d->offset);
881 0 : this_offset = base_offset + (br * (pre_stride)) + bc;
882 0 : this_mv.as_mv.row = br;
883 0 : this_mv.as_mv.col = bc;
884 0 : bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride) +
885 0 : mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
886 :
887 : #if CONFIG_MULTI_RES_ENCODING
888 : /* Lower search range based on prediction info */
889 0 : if (search_param >= 6)
890 0 : goto cal_neighbors;
891 0 : else if (search_param >= 5)
892 0 : hex_range = 4;
893 0 : else if (search_param >= 4)
894 0 : hex_range = 6;
895 0 : else if (search_param >= 3)
896 0 : hex_range = 15;
897 0 : else if (search_param >= 2)
898 0 : hex_range = 31;
899 0 : else if (search_param >= 1)
900 0 : hex_range = 63;
901 :
902 0 : dia_range = 8;
903 : #else
904 : (void)search_param;
905 : #endif
906 :
907 : /* hex search */
908 0 : CHECK_BOUNDS(2)
909 :
910 0 : if (all_in) {
911 0 : for (i = 0; i < 6; ++i) {
912 0 : this_mv.as_mv.row = br + hex[i].row;
913 0 : this_mv.as_mv.col = bc + hex[i].col;
914 0 : this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
915 0 : this_mv.as_mv.col;
916 0 : thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
917 0 : CHECK_BETTER
918 : }
919 : } else {
920 0 : for (i = 0; i < 6; ++i) {
921 0 : this_mv.as_mv.row = br + hex[i].row;
922 0 : this_mv.as_mv.col = bc + hex[i].col;
923 0 : CHECK_POINT
924 0 : this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
925 0 : this_mv.as_mv.col;
926 0 : thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
927 0 : CHECK_BETTER
928 : }
929 : }
930 :
931 0 : if (best_site == -1) {
932 0 : goto cal_neighbors;
933 : } else {
934 0 : br += hex[best_site].row;
935 0 : bc += hex[best_site].col;
936 0 : k = best_site;
937 : }
938 :
939 0 : for (j = 1; j < hex_range; ++j) {
940 0 : best_site = -1;
941 0 : CHECK_BOUNDS(2)
942 :
943 0 : if (all_in) {
944 0 : for (i = 0; i < 3; ++i) {
945 0 : this_mv.as_mv.row = br + next_chkpts[k][i].row;
946 0 : this_mv.as_mv.col = bc + next_chkpts[k][i].col;
947 0 : this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
948 0 : this_mv.as_mv.col;
949 0 : thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
950 0 : CHECK_BETTER
951 : }
952 : } else {
953 0 : for (i = 0; i < 3; ++i) {
954 0 : this_mv.as_mv.row = br + next_chkpts[k][i].row;
955 0 : this_mv.as_mv.col = bc + next_chkpts[k][i].col;
956 0 : CHECK_POINT
957 0 : this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
958 0 : this_mv.as_mv.col;
959 0 : thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
960 0 : CHECK_BETTER
961 : }
962 : }
963 :
964 0 : if (best_site == -1) {
965 0 : break;
966 : } else {
967 0 : br += next_chkpts[k][best_site].row;
968 0 : bc += next_chkpts[k][best_site].col;
969 0 : k += 5 + best_site;
970 0 : if (k >= 12) {
971 0 : k -= 12;
972 0 : } else if (k >= 6) {
973 0 : k -= 6;
974 : }
975 : }
976 : }
977 :
978 : /* check 4 1-away neighbors */
979 : cal_neighbors:
980 0 : for (j = 0; j < dia_range; ++j) {
981 0 : best_site = -1;
982 0 : CHECK_BOUNDS(1)
983 :
984 0 : if (all_in) {
985 0 : for (i = 0; i < 4; ++i) {
986 0 : this_mv.as_mv.row = br + neighbors[i].row;
987 0 : this_mv.as_mv.col = bc + neighbors[i].col;
988 0 : this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
989 0 : this_mv.as_mv.col;
990 0 : thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
991 0 : CHECK_BETTER
992 : }
993 : } else {
994 0 : for (i = 0; i < 4; ++i) {
995 0 : this_mv.as_mv.row = br + neighbors[i].row;
996 0 : this_mv.as_mv.col = bc + neighbors[i].col;
997 0 : CHECK_POINT
998 0 : this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
999 0 : this_mv.as_mv.col;
1000 0 : thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
1001 0 : CHECK_BETTER
1002 : }
1003 : }
1004 :
1005 0 : if (best_site == -1) {
1006 0 : break;
1007 : } else {
1008 0 : br += neighbors[best_site].row;
1009 0 : bc += neighbors[best_site].col;
1010 : }
1011 : }
1012 :
1013 0 : best_mv->as_mv.row = br;
1014 0 : best_mv->as_mv.col = bc;
1015 :
1016 0 : return bestsad;
1017 : }
1018 : #undef CHECK_BOUNDS
1019 : #undef CHECK_POINT
1020 : #undef CHECK_BETTER
1021 :
1022 0 : int vp8_diamond_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1023 : int_mv *best_mv, int search_param, int sad_per_bit,
1024 : int *num00, vp8_variance_fn_ptr_t *fn_ptr,
1025 : int *mvcost[2], int_mv *center_mv) {
1026 : int i, j, step;
1027 :
1028 0 : unsigned char *what = (*(b->base_src) + b->src);
1029 0 : int what_stride = b->src_stride;
1030 : unsigned char *in_what;
1031 0 : int pre_stride = x->e_mbd.pre.y_stride;
1032 0 : unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1033 0 : int in_what_stride = pre_stride;
1034 : unsigned char *best_address;
1035 :
1036 : int tot_steps;
1037 : int_mv this_mv;
1038 :
1039 : unsigned int bestsad;
1040 : unsigned int thissad;
1041 0 : int best_site = 0;
1042 0 : int last_site = 0;
1043 :
1044 : int ref_row;
1045 : int ref_col;
1046 : int this_row_offset;
1047 : int this_col_offset;
1048 : search_site *ss;
1049 :
1050 : unsigned char *check_here;
1051 :
1052 : int *mvsadcost[2];
1053 : int_mv fcenter_mv;
1054 :
1055 0 : mvsadcost[0] = x->mvsadcost[0];
1056 0 : mvsadcost[1] = x->mvsadcost[1];
1057 0 : fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1058 0 : fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1059 :
1060 0 : vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
1061 : x->mv_row_max);
1062 0 : ref_row = ref_mv->as_mv.row;
1063 0 : ref_col = ref_mv->as_mv.col;
1064 0 : *num00 = 0;
1065 0 : best_mv->as_mv.row = ref_row;
1066 0 : best_mv->as_mv.col = ref_col;
1067 :
1068 : /* Work out the start point for the search */
1069 0 : in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) +
1070 : ref_col);
1071 0 : best_address = in_what;
1072 :
1073 : /* Check the starting position */
1074 0 : bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
1075 0 : mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1076 :
1077 : /* search_param determines the length of the initial step and hence
1078 : * the number of iterations 0 = initial step (MAX_FIRST_STEP) pel :
1079 : * 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1080 : */
1081 0 : ss = &x->ss[search_param * x->searches_per_step];
1082 0 : tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1083 :
1084 0 : i = 1;
1085 :
1086 0 : for (step = 0; step < tot_steps; ++step) {
1087 0 : for (j = 0; j < x->searches_per_step; ++j) {
1088 : /* Trap illegal vectors */
1089 0 : this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1090 0 : this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1091 :
1092 0 : if ((this_col_offset > x->mv_col_min) &&
1093 0 : (this_col_offset < x->mv_col_max) &&
1094 0 : (this_row_offset > x->mv_row_min) &&
1095 0 : (this_row_offset < x->mv_row_max))
1096 :
1097 : {
1098 0 : check_here = ss[i].offset + best_address;
1099 0 : thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1100 :
1101 0 : if (thissad < bestsad) {
1102 0 : this_mv.as_mv.row = this_row_offset;
1103 0 : this_mv.as_mv.col = this_col_offset;
1104 0 : thissad +=
1105 0 : mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1106 :
1107 0 : if (thissad < bestsad) {
1108 0 : bestsad = thissad;
1109 0 : best_site = i;
1110 : }
1111 : }
1112 : }
1113 :
1114 0 : i++;
1115 : }
1116 :
1117 0 : if (best_site != last_site) {
1118 0 : best_mv->as_mv.row += ss[best_site].mv.row;
1119 0 : best_mv->as_mv.col += ss[best_site].mv.col;
1120 0 : best_address += ss[best_site].offset;
1121 0 : last_site = best_site;
1122 0 : } else if (best_address == in_what) {
1123 0 : (*num00)++;
1124 : }
1125 : }
1126 :
1127 0 : this_mv.as_mv.row = best_mv->as_mv.row << 3;
1128 0 : this_mv.as_mv.col = best_mv->as_mv.col << 3;
1129 :
1130 0 : return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1131 0 : mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1132 : }
1133 :
1134 0 : int vp8_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1135 : int_mv *best_mv, int search_param, int sad_per_bit,
1136 : int *num00, vp8_variance_fn_ptr_t *fn_ptr,
1137 : int *mvcost[2], int_mv *center_mv) {
1138 : int i, j, step;
1139 :
1140 0 : unsigned char *what = (*(b->base_src) + b->src);
1141 0 : int what_stride = b->src_stride;
1142 : unsigned char *in_what;
1143 0 : int pre_stride = x->e_mbd.pre.y_stride;
1144 0 : unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1145 0 : int in_what_stride = pre_stride;
1146 : unsigned char *best_address;
1147 :
1148 : int tot_steps;
1149 : int_mv this_mv;
1150 :
1151 : unsigned int bestsad;
1152 : unsigned int thissad;
1153 0 : int best_site = 0;
1154 0 : int last_site = 0;
1155 :
1156 : int ref_row;
1157 : int ref_col;
1158 : int this_row_offset;
1159 : int this_col_offset;
1160 : search_site *ss;
1161 :
1162 : unsigned char *check_here;
1163 :
1164 : int *mvsadcost[2];
1165 : int_mv fcenter_mv;
1166 :
1167 0 : mvsadcost[0] = x->mvsadcost[0];
1168 0 : mvsadcost[1] = x->mvsadcost[1];
1169 0 : fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1170 0 : fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1171 :
1172 0 : vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
1173 : x->mv_row_max);
1174 0 : ref_row = ref_mv->as_mv.row;
1175 0 : ref_col = ref_mv->as_mv.col;
1176 0 : *num00 = 0;
1177 0 : best_mv->as_mv.row = ref_row;
1178 0 : best_mv->as_mv.col = ref_col;
1179 :
1180 : /* Work out the start point for the search */
1181 0 : in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) +
1182 : ref_col);
1183 0 : best_address = in_what;
1184 :
1185 : /* Check the starting position */
1186 0 : bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
1187 0 : mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1188 :
1189 : /* search_param determines the length of the initial step and hence the
1190 : * number of iterations 0 = initial step (MAX_FIRST_STEP) pel : 1 =
1191 : * (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1192 : */
1193 0 : ss = &x->ss[search_param * x->searches_per_step];
1194 0 : tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1195 :
1196 0 : i = 1;
1197 :
1198 0 : for (step = 0; step < tot_steps; ++step) {
1199 0 : int all_in = 1, t;
1200 :
1201 : /* To know if all neighbor points are within the bounds, 4 bounds
1202 : * checking are enough instead of checking 4 bounds for each
1203 : * points.
1204 : */
1205 0 : all_in &= ((best_mv->as_mv.row + ss[i].mv.row) > x->mv_row_min);
1206 0 : all_in &= ((best_mv->as_mv.row + ss[i + 1].mv.row) < x->mv_row_max);
1207 0 : all_in &= ((best_mv->as_mv.col + ss[i + 2].mv.col) > x->mv_col_min);
1208 0 : all_in &= ((best_mv->as_mv.col + ss[i + 3].mv.col) < x->mv_col_max);
1209 :
1210 0 : if (all_in) {
1211 : unsigned int sad_array[4];
1212 :
1213 0 : for (j = 0; j < x->searches_per_step; j += 4) {
1214 : const unsigned char *block_offset[4];
1215 :
1216 0 : for (t = 0; t < 4; ++t) {
1217 0 : block_offset[t] = ss[i + t].offset + best_address;
1218 : }
1219 :
1220 0 : fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
1221 : sad_array);
1222 :
1223 0 : for (t = 0; t < 4; t++, i++) {
1224 0 : if (sad_array[t] < bestsad) {
1225 0 : this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row;
1226 0 : this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col;
1227 0 : sad_array[t] +=
1228 0 : mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1229 :
1230 0 : if (sad_array[t] < bestsad) {
1231 0 : bestsad = sad_array[t];
1232 0 : best_site = i;
1233 : }
1234 : }
1235 : }
1236 : }
1237 : } else {
1238 0 : for (j = 0; j < x->searches_per_step; ++j) {
1239 : /* Trap illegal vectors */
1240 0 : this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1241 0 : this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1242 :
1243 0 : if ((this_col_offset > x->mv_col_min) &&
1244 0 : (this_col_offset < x->mv_col_max) &&
1245 0 : (this_row_offset > x->mv_row_min) &&
1246 0 : (this_row_offset < x->mv_row_max)) {
1247 0 : check_here = ss[i].offset + best_address;
1248 0 : thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1249 :
1250 0 : if (thissad < bestsad) {
1251 0 : this_mv.as_mv.row = this_row_offset;
1252 0 : this_mv.as_mv.col = this_col_offset;
1253 0 : thissad +=
1254 0 : mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1255 :
1256 0 : if (thissad < bestsad) {
1257 0 : bestsad = thissad;
1258 0 : best_site = i;
1259 : }
1260 : }
1261 : }
1262 0 : i++;
1263 : }
1264 : }
1265 :
1266 0 : if (best_site != last_site) {
1267 0 : best_mv->as_mv.row += ss[best_site].mv.row;
1268 0 : best_mv->as_mv.col += ss[best_site].mv.col;
1269 0 : best_address += ss[best_site].offset;
1270 0 : last_site = best_site;
1271 0 : } else if (best_address == in_what) {
1272 0 : (*num00)++;
1273 : }
1274 : }
1275 :
1276 0 : this_mv.as_mv.row = best_mv->as_mv.row * 8;
1277 0 : this_mv.as_mv.col = best_mv->as_mv.col * 8;
1278 :
1279 0 : return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1280 0 : mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1281 : }
1282 :
1283 0 : int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1284 : int sad_per_bit, int distance,
1285 : vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1286 : int_mv *center_mv) {
1287 0 : unsigned char *what = (*(b->base_src) + b->src);
1288 0 : int what_stride = b->src_stride;
1289 : unsigned char *in_what;
1290 0 : int pre_stride = x->e_mbd.pre.y_stride;
1291 0 : unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1292 0 : int in_what_stride = pre_stride;
1293 0 : int mv_stride = pre_stride;
1294 : unsigned char *bestaddress;
1295 0 : int_mv *best_mv = &d->bmi.mv;
1296 : int_mv this_mv;
1297 : unsigned int bestsad;
1298 : unsigned int thissad;
1299 : int r, c;
1300 :
1301 : unsigned char *check_here;
1302 :
1303 0 : int ref_row = ref_mv->as_mv.row;
1304 0 : int ref_col = ref_mv->as_mv.col;
1305 :
1306 0 : int row_min = ref_row - distance;
1307 0 : int row_max = ref_row + distance;
1308 0 : int col_min = ref_col - distance;
1309 0 : int col_max = ref_col + distance;
1310 :
1311 : int *mvsadcost[2];
1312 : int_mv fcenter_mv;
1313 :
1314 0 : mvsadcost[0] = x->mvsadcost[0];
1315 0 : mvsadcost[1] = x->mvsadcost[1];
1316 0 : fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1317 0 : fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1318 :
1319 : /* Work out the mid point for the search */
1320 0 : in_what = base_pre + d->offset;
1321 0 : bestaddress = in_what + (ref_row * pre_stride) + ref_col;
1322 :
1323 0 : best_mv->as_mv.row = ref_row;
1324 0 : best_mv->as_mv.col = ref_col;
1325 :
1326 : /* Baseline value at the centre */
1327 0 : bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) +
1328 0 : mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1329 :
1330 : /* Apply further limits to prevent us looking using vectors that
1331 : * stretch beyiond the UMV border
1332 : */
1333 0 : if (col_min < x->mv_col_min) col_min = x->mv_col_min;
1334 :
1335 0 : if (col_max > x->mv_col_max) col_max = x->mv_col_max;
1336 :
1337 0 : if (row_min < x->mv_row_min) row_min = x->mv_row_min;
1338 :
1339 0 : if (row_max > x->mv_row_max) row_max = x->mv_row_max;
1340 :
1341 0 : for (r = row_min; r < row_max; ++r) {
1342 0 : this_mv.as_mv.row = r;
1343 0 : check_here = r * mv_stride + in_what + col_min;
1344 :
1345 0 : for (c = col_min; c < col_max; ++c) {
1346 0 : thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1347 :
1348 0 : this_mv.as_mv.col = c;
1349 0 : thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1350 :
1351 0 : if (thissad < bestsad) {
1352 0 : bestsad = thissad;
1353 0 : best_mv->as_mv.row = r;
1354 0 : best_mv->as_mv.col = c;
1355 0 : bestaddress = check_here;
1356 : }
1357 :
1358 0 : check_here++;
1359 : }
1360 : }
1361 :
1362 0 : this_mv.as_mv.row = best_mv->as_mv.row << 3;
1363 0 : this_mv.as_mv.col = best_mv->as_mv.col << 3;
1364 :
1365 0 : return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) +
1366 0 : mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1367 : }
1368 :
1369 0 : int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1370 : int sad_per_bit, int distance,
1371 : vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1372 : int_mv *center_mv) {
1373 0 : unsigned char *what = (*(b->base_src) + b->src);
1374 0 : int what_stride = b->src_stride;
1375 : unsigned char *in_what;
1376 0 : int pre_stride = x->e_mbd.pre.y_stride;
1377 0 : unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1378 0 : int in_what_stride = pre_stride;
1379 0 : int mv_stride = pre_stride;
1380 : unsigned char *bestaddress;
1381 0 : int_mv *best_mv = &d->bmi.mv;
1382 : int_mv this_mv;
1383 : unsigned int bestsad;
1384 : unsigned int thissad;
1385 : int r, c;
1386 :
1387 : unsigned char *check_here;
1388 :
1389 0 : int ref_row = ref_mv->as_mv.row;
1390 0 : int ref_col = ref_mv->as_mv.col;
1391 :
1392 0 : int row_min = ref_row - distance;
1393 0 : int row_max = ref_row + distance;
1394 0 : int col_min = ref_col - distance;
1395 0 : int col_max = ref_col + distance;
1396 :
1397 : unsigned int sad_array[3];
1398 :
1399 : int *mvsadcost[2];
1400 : int_mv fcenter_mv;
1401 :
1402 0 : mvsadcost[0] = x->mvsadcost[0];
1403 0 : mvsadcost[1] = x->mvsadcost[1];
1404 0 : fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1405 0 : fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1406 :
1407 : /* Work out the mid point for the search */
1408 0 : in_what = base_pre + d->offset;
1409 0 : bestaddress = in_what + (ref_row * pre_stride) + ref_col;
1410 :
1411 0 : best_mv->as_mv.row = ref_row;
1412 0 : best_mv->as_mv.col = ref_col;
1413 :
1414 : /* Baseline value at the centre */
1415 0 : bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) +
1416 0 : mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1417 :
1418 : /* Apply further limits to prevent us looking using vectors that stretch
1419 : * beyond the UMV border
1420 : */
1421 0 : if (col_min < x->mv_col_min) col_min = x->mv_col_min;
1422 :
1423 0 : if (col_max > x->mv_col_max) col_max = x->mv_col_max;
1424 :
1425 0 : if (row_min < x->mv_row_min) row_min = x->mv_row_min;
1426 :
1427 0 : if (row_max > x->mv_row_max) row_max = x->mv_row_max;
1428 :
1429 0 : for (r = row_min; r < row_max; ++r) {
1430 0 : this_mv.as_mv.row = r;
1431 0 : check_here = r * mv_stride + in_what + col_min;
1432 0 : c = col_min;
1433 :
1434 0 : while ((c + 2) < col_max) {
1435 : int i;
1436 :
1437 0 : fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
1438 :
1439 0 : for (i = 0; i < 3; ++i) {
1440 0 : thissad = sad_array[i];
1441 :
1442 0 : if (thissad < bestsad) {
1443 0 : this_mv.as_mv.col = c;
1444 0 : thissad +=
1445 0 : mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1446 :
1447 0 : if (thissad < bestsad) {
1448 0 : bestsad = thissad;
1449 0 : best_mv->as_mv.row = r;
1450 0 : best_mv->as_mv.col = c;
1451 0 : bestaddress = check_here;
1452 : }
1453 : }
1454 :
1455 0 : check_here++;
1456 0 : c++;
1457 : }
1458 : }
1459 :
1460 0 : while (c < col_max) {
1461 0 : thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1462 :
1463 0 : if (thissad < bestsad) {
1464 0 : this_mv.as_mv.col = c;
1465 0 : thissad +=
1466 0 : mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1467 :
1468 0 : if (thissad < bestsad) {
1469 0 : bestsad = thissad;
1470 0 : best_mv->as_mv.row = r;
1471 0 : best_mv->as_mv.col = c;
1472 0 : bestaddress = check_here;
1473 : }
1474 : }
1475 :
1476 0 : check_here++;
1477 0 : c++;
1478 : }
1479 : }
1480 :
1481 0 : this_mv.as_mv.row = best_mv->as_mv.row << 3;
1482 0 : this_mv.as_mv.col = best_mv->as_mv.col << 3;
1483 :
1484 0 : return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) +
1485 0 : mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1486 : }
1487 :
1488 0 : int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1489 : int sad_per_bit, int distance,
1490 : vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1491 : int_mv *center_mv) {
1492 0 : unsigned char *what = (*(b->base_src) + b->src);
1493 0 : int what_stride = b->src_stride;
1494 0 : int pre_stride = x->e_mbd.pre.y_stride;
1495 0 : unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1496 : unsigned char *in_what;
1497 0 : int in_what_stride = pre_stride;
1498 0 : int mv_stride = pre_stride;
1499 : unsigned char *bestaddress;
1500 0 : int_mv *best_mv = &d->bmi.mv;
1501 : int_mv this_mv;
1502 : unsigned int bestsad;
1503 : unsigned int thissad;
1504 : int r, c;
1505 :
1506 : unsigned char *check_here;
1507 :
1508 0 : int ref_row = ref_mv->as_mv.row;
1509 0 : int ref_col = ref_mv->as_mv.col;
1510 :
1511 0 : int row_min = ref_row - distance;
1512 0 : int row_max = ref_row + distance;
1513 0 : int col_min = ref_col - distance;
1514 0 : int col_max = ref_col + distance;
1515 :
1516 : DECLARE_ALIGNED(16, unsigned int, sad_array8[8]);
1517 : unsigned int sad_array[3];
1518 :
1519 : int *mvsadcost[2];
1520 : int_mv fcenter_mv;
1521 :
1522 0 : mvsadcost[0] = x->mvsadcost[0];
1523 0 : mvsadcost[1] = x->mvsadcost[1];
1524 0 : fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1525 0 : fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1526 :
1527 : /* Work out the mid point for the search */
1528 0 : in_what = base_pre + d->offset;
1529 0 : bestaddress = in_what + (ref_row * pre_stride) + ref_col;
1530 :
1531 0 : best_mv->as_mv.row = ref_row;
1532 0 : best_mv->as_mv.col = ref_col;
1533 :
1534 : /* Baseline value at the centre */
1535 0 : bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) +
1536 0 : mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1537 :
1538 : /* Apply further limits to prevent us looking using vectors that stretch
1539 : * beyond the UMV border
1540 : */
1541 0 : if (col_min < x->mv_col_min) col_min = x->mv_col_min;
1542 :
1543 0 : if (col_max > x->mv_col_max) col_max = x->mv_col_max;
1544 :
1545 0 : if (row_min < x->mv_row_min) row_min = x->mv_row_min;
1546 :
1547 0 : if (row_max > x->mv_row_max) row_max = x->mv_row_max;
1548 :
1549 0 : for (r = row_min; r < row_max; ++r) {
1550 0 : this_mv.as_mv.row = r;
1551 0 : check_here = r * mv_stride + in_what + col_min;
1552 0 : c = col_min;
1553 :
1554 0 : while ((c + 7) < col_max) {
1555 : int i;
1556 :
1557 0 : fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8);
1558 :
1559 0 : for (i = 0; i < 8; ++i) {
1560 0 : thissad = sad_array8[i];
1561 :
1562 0 : if (thissad < bestsad) {
1563 0 : this_mv.as_mv.col = c;
1564 0 : thissad +=
1565 0 : mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1566 :
1567 0 : if (thissad < bestsad) {
1568 0 : bestsad = thissad;
1569 0 : best_mv->as_mv.row = r;
1570 0 : best_mv->as_mv.col = c;
1571 0 : bestaddress = check_here;
1572 : }
1573 : }
1574 :
1575 0 : check_here++;
1576 0 : c++;
1577 : }
1578 : }
1579 :
1580 0 : while ((c + 2) < col_max) {
1581 : int i;
1582 :
1583 0 : fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
1584 :
1585 0 : for (i = 0; i < 3; ++i) {
1586 0 : thissad = sad_array[i];
1587 :
1588 0 : if (thissad < bestsad) {
1589 0 : this_mv.as_mv.col = c;
1590 0 : thissad +=
1591 0 : mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1592 :
1593 0 : if (thissad < bestsad) {
1594 0 : bestsad = thissad;
1595 0 : best_mv->as_mv.row = r;
1596 0 : best_mv->as_mv.col = c;
1597 0 : bestaddress = check_here;
1598 : }
1599 : }
1600 :
1601 0 : check_here++;
1602 0 : c++;
1603 : }
1604 : }
1605 :
1606 0 : while (c < col_max) {
1607 0 : thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1608 :
1609 0 : if (thissad < bestsad) {
1610 0 : this_mv.as_mv.col = c;
1611 0 : thissad +=
1612 0 : mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1613 :
1614 0 : if (thissad < bestsad) {
1615 0 : bestsad = thissad;
1616 0 : best_mv->as_mv.row = r;
1617 0 : best_mv->as_mv.col = c;
1618 0 : bestaddress = check_here;
1619 : }
1620 : }
1621 :
1622 0 : check_here++;
1623 0 : c++;
1624 : }
1625 : }
1626 :
1627 0 : this_mv.as_mv.row = best_mv->as_mv.row * 8;
1628 0 : this_mv.as_mv.col = best_mv->as_mv.col * 8;
1629 :
1630 0 : return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) +
1631 0 : mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1632 : }
1633 :
1634 0 : int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
1635 : int_mv *ref_mv, int error_per_bit,
1636 : int search_range, vp8_variance_fn_ptr_t *fn_ptr,
1637 : int *mvcost[2], int_mv *center_mv) {
1638 0 : MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
1639 : int i, j;
1640 : short this_row_offset, this_col_offset;
1641 :
1642 0 : int what_stride = b->src_stride;
1643 0 : int pre_stride = x->e_mbd.pre.y_stride;
1644 0 : unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1645 0 : int in_what_stride = pre_stride;
1646 0 : unsigned char *what = (*(b->base_src) + b->src);
1647 0 : unsigned char *best_address =
1648 0 : (unsigned char *)(base_pre + d->offset +
1649 0 : (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
1650 : unsigned char *check_here;
1651 : int_mv this_mv;
1652 : unsigned int bestsad;
1653 : unsigned int thissad;
1654 :
1655 : int *mvsadcost[2];
1656 : int_mv fcenter_mv;
1657 :
1658 0 : mvsadcost[0] = x->mvsadcost[0];
1659 0 : mvsadcost[1] = x->mvsadcost[1];
1660 0 : fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1661 0 : fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1662 :
1663 0 : bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride) +
1664 0 : mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
1665 :
1666 0 : for (i = 0; i < search_range; ++i) {
1667 0 : int best_site = -1;
1668 :
1669 0 : for (j = 0; j < 4; ++j) {
1670 0 : this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
1671 0 : this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
1672 :
1673 0 : if ((this_col_offset > x->mv_col_min) &&
1674 0 : (this_col_offset < x->mv_col_max) &&
1675 0 : (this_row_offset > x->mv_row_min) &&
1676 0 : (this_row_offset < x->mv_row_max)) {
1677 0 : check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
1678 : best_address;
1679 0 : thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1680 :
1681 0 : if (thissad < bestsad) {
1682 0 : this_mv.as_mv.row = this_row_offset;
1683 0 : this_mv.as_mv.col = this_col_offset;
1684 0 : thissad +=
1685 0 : mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1686 :
1687 0 : if (thissad < bestsad) {
1688 0 : bestsad = thissad;
1689 0 : best_site = j;
1690 : }
1691 : }
1692 : }
1693 : }
1694 :
1695 0 : if (best_site == -1) {
1696 0 : break;
1697 : } else {
1698 0 : ref_mv->as_mv.row += neighbors[best_site].row;
1699 0 : ref_mv->as_mv.col += neighbors[best_site].col;
1700 0 : best_address += (neighbors[best_site].row) * in_what_stride +
1701 0 : neighbors[best_site].col;
1702 : }
1703 : }
1704 :
1705 0 : this_mv.as_mv.row = ref_mv->as_mv.row << 3;
1706 0 : this_mv.as_mv.col = ref_mv->as_mv.col << 3;
1707 :
1708 0 : return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1709 0 : mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1710 : }
1711 :
1712 0 : int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
1713 : int_mv *ref_mv, int error_per_bit,
1714 : int search_range, vp8_variance_fn_ptr_t *fn_ptr,
1715 : int *mvcost[2], int_mv *center_mv) {
1716 0 : MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
1717 : int i, j;
1718 : short this_row_offset, this_col_offset;
1719 :
1720 0 : int what_stride = b->src_stride;
1721 0 : int pre_stride = x->e_mbd.pre.y_stride;
1722 0 : unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1723 0 : int in_what_stride = pre_stride;
1724 0 : unsigned char *what = (*(b->base_src) + b->src);
1725 0 : unsigned char *best_address =
1726 0 : (unsigned char *)(base_pre + d->offset +
1727 0 : (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
1728 : unsigned char *check_here;
1729 : int_mv this_mv;
1730 : unsigned int bestsad;
1731 : unsigned int thissad;
1732 :
1733 : int *mvsadcost[2];
1734 : int_mv fcenter_mv;
1735 :
1736 0 : mvsadcost[0] = x->mvsadcost[0];
1737 0 : mvsadcost[1] = x->mvsadcost[1];
1738 0 : fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1739 0 : fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1740 :
1741 0 : bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride) +
1742 0 : mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
1743 :
1744 0 : for (i = 0; i < search_range; ++i) {
1745 0 : int best_site = -1;
1746 0 : int all_in = 1;
1747 :
1748 0 : all_in &= ((ref_mv->as_mv.row - 1) > x->mv_row_min);
1749 0 : all_in &= ((ref_mv->as_mv.row + 1) < x->mv_row_max);
1750 0 : all_in &= ((ref_mv->as_mv.col - 1) > x->mv_col_min);
1751 0 : all_in &= ((ref_mv->as_mv.col + 1) < x->mv_col_max);
1752 :
1753 0 : if (all_in) {
1754 : unsigned int sad_array[4];
1755 : const unsigned char *block_offset[4];
1756 0 : block_offset[0] = best_address - in_what_stride;
1757 0 : block_offset[1] = best_address - 1;
1758 0 : block_offset[2] = best_address + 1;
1759 0 : block_offset[3] = best_address + in_what_stride;
1760 :
1761 0 : fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
1762 : sad_array);
1763 :
1764 0 : for (j = 0; j < 4; ++j) {
1765 0 : if (sad_array[j] < bestsad) {
1766 0 : this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row;
1767 0 : this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col;
1768 0 : sad_array[j] +=
1769 0 : mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1770 :
1771 0 : if (sad_array[j] < bestsad) {
1772 0 : bestsad = sad_array[j];
1773 0 : best_site = j;
1774 : }
1775 : }
1776 : }
1777 : } else {
1778 0 : for (j = 0; j < 4; ++j) {
1779 0 : this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
1780 0 : this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
1781 :
1782 0 : if ((this_col_offset > x->mv_col_min) &&
1783 0 : (this_col_offset < x->mv_col_max) &&
1784 0 : (this_row_offset > x->mv_row_min) &&
1785 0 : (this_row_offset < x->mv_row_max)) {
1786 0 : check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
1787 : best_address;
1788 0 : thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1789 :
1790 0 : if (thissad < bestsad) {
1791 0 : this_mv.as_mv.row = this_row_offset;
1792 0 : this_mv.as_mv.col = this_col_offset;
1793 0 : thissad +=
1794 0 : mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1795 :
1796 0 : if (thissad < bestsad) {
1797 0 : bestsad = thissad;
1798 0 : best_site = j;
1799 : }
1800 : }
1801 : }
1802 : }
1803 : }
1804 :
1805 0 : if (best_site == -1) {
1806 0 : break;
1807 : } else {
1808 0 : ref_mv->as_mv.row += neighbors[best_site].row;
1809 0 : ref_mv->as_mv.col += neighbors[best_site].col;
1810 0 : best_address += (neighbors[best_site].row) * in_what_stride +
1811 0 : neighbors[best_site].col;
1812 : }
1813 : }
1814 :
1815 0 : this_mv.as_mv.row = ref_mv->as_mv.row * 8;
1816 0 : this_mv.as_mv.col = ref_mv->as_mv.col * 8;
1817 :
1818 0 : return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1819 0 : mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1820 : }
1821 :
1822 : #ifdef VP8_ENTROPY_STATS
1823 : void print_mode_context(void) {
1824 : FILE *f = fopen("modecont.c", "w");
1825 : int i, j;
1826 :
1827 : fprintf(f, "#include \"entropy.h\"\n");
1828 : fprintf(f, "const int vp8_mode_contexts[6][4] =\n");
1829 : fprintf(f, "{\n");
1830 :
1831 : for (j = 0; j < 6; ++j) {
1832 : fprintf(f, " { /* %d */\n", j);
1833 : fprintf(f, " ");
1834 :
1835 : for (i = 0; i < 4; ++i) {
1836 : int overal_prob;
1837 : int this_prob;
1838 : int count;
1839 :
1840 : /* Overall probs */
1841 : count = mv_mode_cts[i][0] + mv_mode_cts[i][1];
1842 :
1843 : if (count)
1844 : overal_prob = 256 * mv_mode_cts[i][0] / count;
1845 : else
1846 : overal_prob = 128;
1847 :
1848 : if (overal_prob == 0) overal_prob = 1;
1849 :
1850 : /* context probs */
1851 : count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1];
1852 :
1853 : if (count)
1854 : this_prob = 256 * mv_ref_ct[j][i][0] / count;
1855 : else
1856 : this_prob = 128;
1857 :
1858 : if (this_prob == 0) this_prob = 1;
1859 :
1860 : fprintf(f, "%5d, ", this_prob);
1861 : }
1862 :
1863 : fprintf(f, " },\n");
1864 : }
1865 :
1866 : fprintf(f, "};\n");
1867 : fclose(f);
1868 : }
1869 :
1870 : /* MV ref count VP8_ENTROPY_STATS stats code */
1871 : #ifdef VP8_ENTROPY_STATS
1872 : void init_mv_ref_counts() {
1873 : memset(mv_ref_ct, 0, sizeof(mv_ref_ct));
1874 : memset(mv_mode_cts, 0, sizeof(mv_mode_cts));
1875 : }
1876 :
1877 : void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4]) {
1878 : if (m == ZEROMV) {
1879 : ++mv_ref_ct[ct[0]][0][0];
1880 : ++mv_mode_cts[0][0];
1881 : } else {
1882 : ++mv_ref_ct[ct[0]][0][1];
1883 : ++mv_mode_cts[0][1];
1884 :
1885 : if (m == NEARESTMV) {
1886 : ++mv_ref_ct[ct[1]][1][0];
1887 : ++mv_mode_cts[1][0];
1888 : } else {
1889 : ++mv_ref_ct[ct[1]][1][1];
1890 : ++mv_mode_cts[1][1];
1891 :
1892 : if (m == NEARMV) {
1893 : ++mv_ref_ct[ct[2]][2][0];
1894 : ++mv_mode_cts[2][0];
1895 : } else {
1896 : ++mv_ref_ct[ct[2]][2][1];
1897 : ++mv_mode_cts[2][1];
1898 :
1899 : if (m == NEWMV) {
1900 : ++mv_ref_ct[ct[3]][3][0];
1901 : ++mv_mode_cts[3][0];
1902 : } else {
1903 : ++mv_ref_ct[ct[3]][3][1];
1904 : ++mv_mode_cts[3][1];
1905 : }
1906 : }
1907 : }
1908 : }
1909 : }
1910 :
1911 : #endif /* END MV ref count VP8_ENTROPY_STATS stats code */
1912 :
1913 : #endif
|