LCOV - code coverage report
Current view: top level - media/libvpx/libvpx/vp8/encoder - mcomp.c (source / functions) Hit Total Coverage
Test: output.info Lines: 0 944 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 16 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
       3             :  *
       4             :  *  Use of this source code is governed by a BSD-style license
       5             :  *  that can be found in the LICENSE file in the root of the source
       6             :  *  tree. An additional intellectual property rights grant can be found
       7             :  *  in the file PATENTS.  All contributing project authors may
       8             :  *  be found in the AUTHORS file in the root of the source tree.
       9             :  */
      10             : 
      11             : #include "./vp8_rtcd.h"
      12             : #include "./vpx_dsp_rtcd.h"
      13             : #include "onyx_int.h"
      14             : #include "mcomp.h"
      15             : #include "vpx_mem/vpx_mem.h"
      16             : #include "vpx_config.h"
      17             : #include <stdio.h>
      18             : #include <limits.h>
      19             : #include <math.h>
      20             : #include "vp8/common/findnearmv.h"
      21             : #include "vp8/common/common.h"
      22             : #include "vpx_dsp/vpx_dsp_common.h"
      23             : 
      24             : #ifdef VP8_ENTROPY_STATS
      25             : static int mv_ref_ct[31][4][2];
      26             : static int mv_mode_cts[4][2];
      27             : #endif
      28             : 
      29           0 : int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight) {
      30             :   /* MV costing is based on the distribution of vectors in the previous
      31             :    * frame and as such will tend to over state the cost of vectors. In
      32             :    * addition coding a new vector can have a knock on effect on the cost
      33             :    * of subsequent vectors and the quality of prediction from NEAR and
      34             :    * NEAREST for subsequent blocks. The "Weight" parameter allows, to a
      35             :    * limited extent, for some account to be taken of these factors.
      36             :    */
      37           0 :   return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] +
      38           0 :            mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) *
      39           0 :           Weight) >>
      40             :          7;
      41             : }
      42             : 
      43           0 : static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2],
      44             :                        int error_per_bit) {
      45             :   /* Ignore mv costing if mvcost is NULL */
      46           0 :   if (mvcost) {
      47           0 :     return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] +
      48           0 :              mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) *
      49           0 :                 error_per_bit +
      50           0 :             128) >>
      51             :            8;
      52             :   }
      53           0 :   return 0;
      54             : }
      55             : 
      56           0 : static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2],
      57             :                           int error_per_bit) {
      58             :   /* Calculate sad error cost on full pixel basis. */
      59             :   /* Ignore mv costing if mvsadcost is NULL */
      60           0 :   if (mvsadcost) {
      61           0 :     return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] +
      62           0 :              mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)]) *
      63           0 :                 error_per_bit +
      64           0 :             128) >>
      65             :            8;
      66             :   }
      67           0 :   return 0;
      68             : }
      69             : 
      70           0 : void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride) {
      71             :   int Len;
      72           0 :   int search_site_count = 0;
      73             : 
      74             :   /* Generate offsets for 4 search sites per step. */
      75           0 :   Len = MAX_FIRST_STEP;
      76           0 :   x->ss[search_site_count].mv.col = 0;
      77           0 :   x->ss[search_site_count].mv.row = 0;
      78           0 :   x->ss[search_site_count].offset = 0;
      79           0 :   search_site_count++;
      80             : 
      81           0 :   while (Len > 0) {
      82             :     /* Compute offsets for search sites. */
      83           0 :     x->ss[search_site_count].mv.col = 0;
      84           0 :     x->ss[search_site_count].mv.row = -Len;
      85           0 :     x->ss[search_site_count].offset = -Len * stride;
      86           0 :     search_site_count++;
      87             : 
      88             :     /* Compute offsets for search sites. */
      89           0 :     x->ss[search_site_count].mv.col = 0;
      90           0 :     x->ss[search_site_count].mv.row = Len;
      91           0 :     x->ss[search_site_count].offset = Len * stride;
      92           0 :     search_site_count++;
      93             : 
      94             :     /* Compute offsets for search sites. */
      95           0 :     x->ss[search_site_count].mv.col = -Len;
      96           0 :     x->ss[search_site_count].mv.row = 0;
      97           0 :     x->ss[search_site_count].offset = -Len;
      98           0 :     search_site_count++;
      99             : 
     100             :     /* Compute offsets for search sites. */
     101           0 :     x->ss[search_site_count].mv.col = Len;
     102           0 :     x->ss[search_site_count].mv.row = 0;
     103           0 :     x->ss[search_site_count].offset = Len;
     104           0 :     search_site_count++;
     105             : 
     106             :     /* Contract. */
     107           0 :     Len /= 2;
     108             :   }
     109             : 
     110           0 :   x->ss_count = search_site_count;
     111           0 :   x->searches_per_step = 4;
     112           0 : }
     113             : 
     114           0 : void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) {
     115             :   int Len;
     116           0 :   int search_site_count = 0;
     117             : 
     118             :   /* Generate offsets for 8 search sites per step. */
     119           0 :   Len = MAX_FIRST_STEP;
     120           0 :   x->ss[search_site_count].mv.col = 0;
     121           0 :   x->ss[search_site_count].mv.row = 0;
     122           0 :   x->ss[search_site_count].offset = 0;
     123           0 :   search_site_count++;
     124             : 
     125           0 :   while (Len > 0) {
     126             :     /* Compute offsets for search sites. */
     127           0 :     x->ss[search_site_count].mv.col = 0;
     128           0 :     x->ss[search_site_count].mv.row = -Len;
     129           0 :     x->ss[search_site_count].offset = -Len * stride;
     130           0 :     search_site_count++;
     131             : 
     132             :     /* Compute offsets for search sites. */
     133           0 :     x->ss[search_site_count].mv.col = 0;
     134           0 :     x->ss[search_site_count].mv.row = Len;
     135           0 :     x->ss[search_site_count].offset = Len * stride;
     136           0 :     search_site_count++;
     137             : 
     138             :     /* Compute offsets for search sites. */
     139           0 :     x->ss[search_site_count].mv.col = -Len;
     140           0 :     x->ss[search_site_count].mv.row = 0;
     141           0 :     x->ss[search_site_count].offset = -Len;
     142           0 :     search_site_count++;
     143             : 
     144             :     /* Compute offsets for search sites. */
     145           0 :     x->ss[search_site_count].mv.col = Len;
     146           0 :     x->ss[search_site_count].mv.row = 0;
     147           0 :     x->ss[search_site_count].offset = Len;
     148           0 :     search_site_count++;
     149             : 
     150             :     /* Compute offsets for search sites. */
     151           0 :     x->ss[search_site_count].mv.col = -Len;
     152           0 :     x->ss[search_site_count].mv.row = -Len;
     153           0 :     x->ss[search_site_count].offset = -Len * stride - Len;
     154           0 :     search_site_count++;
     155             : 
     156             :     /* Compute offsets for search sites. */
     157           0 :     x->ss[search_site_count].mv.col = Len;
     158           0 :     x->ss[search_site_count].mv.row = -Len;
     159           0 :     x->ss[search_site_count].offset = -Len * stride + Len;
     160           0 :     search_site_count++;
     161             : 
     162             :     /* Compute offsets for search sites. */
     163           0 :     x->ss[search_site_count].mv.col = -Len;
     164           0 :     x->ss[search_site_count].mv.row = Len;
     165           0 :     x->ss[search_site_count].offset = Len * stride - Len;
     166           0 :     search_site_count++;
     167             : 
     168             :     /* Compute offsets for search sites. */
     169           0 :     x->ss[search_site_count].mv.col = Len;
     170           0 :     x->ss[search_site_count].mv.row = Len;
     171           0 :     x->ss[search_site_count].offset = Len * stride + Len;
     172           0 :     search_site_count++;
     173             : 
     174             :     /* Contract. */
     175           0 :     Len /= 2;
     176             :   }
     177             : 
     178           0 :   x->ss_count = search_site_count;
     179           0 :   x->searches_per_step = 8;
     180           0 : }
     181             : 
     182             : /*
     183             :  * To avoid the penalty for crossing cache-line read, preload the reference
     184             :  * area in a small buffer, which is aligned to make sure there won't be crossing
     185             :  * cache-line read while reading from this buffer. This reduced the cpu
     186             :  * cycles spent on reading ref data in sub-pixel filter functions.
     187             :  * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
     188             :  * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
     189             :  * could reduce the area.
     190             :  */
     191             : 
     192             : /* estimated cost of a motion vector (r,c) */
     193             : #define MVC(r, c)                                                             \
     194             :   (mvcost                                                                     \
     195             :        ? ((mvcost[0][(r)-rr] + mvcost[1][(c)-rc]) * error_per_bit + 128) >> 8 \
     196             :        : 0)
     197             : /* pointer to predictor base of a motionvector */
     198             : #define PRE(r, c) (y + (((r) >> 2) * y_stride + ((c) >> 2) - (offset)))
     199             : /* convert motion vector component to offset for svf calc */
     200             : #define SP(x) (((x)&3) << 1)
     201             : /* returns subpixel variance error function. */
     202             : #define DIST(r, c) \
     203             :   vfp->svf(PRE(r, c), y_stride, SP(c), SP(r), z, b->src_stride, &sse)
     204             : #define IFMVCV(r, c, s, e) \
     205             :   if (c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
     206             : /* returns distortion + motion vector cost */
     207             : #define ERR(r, c) (MVC(r, c) + DIST(r, c))
     208             : /* checks if (r,c) has better score than previous best */
     209             : #define CHECK_BETTER(v, r, c)                           \
     210             :   IFMVCV(r, c,                                          \
     211             :          {                                              \
     212             :            thismse = DIST(r, c);                        \
     213             :            if ((v = (MVC(r, c) + thismse)) < besterr) { \
     214             :              besterr = v;                               \
     215             :              br = r;                                    \
     216             :              bc = c;                                    \
     217             :              *distortion = thismse;                     \
     218             :              *sse1 = sse;                               \
     219             :            }                                            \
     220             :          },                                             \
     221             :          v = UINT_MAX;)
     222             : 
     223           0 : int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
     224             :                                              int_mv *bestmv, int_mv *ref_mv,
     225             :                                              int error_per_bit,
     226             :                                              const vp8_variance_fn_ptr_t *vfp,
     227             :                                              int *mvcost[2], int *distortion,
     228             :                                              unsigned int *sse1) {
     229           0 :   unsigned char *z = (*(b->base_src) + b->src);
     230             : 
     231           0 :   int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1;
     232           0 :   int br = bestmv->as_mv.row * 4, bc = bestmv->as_mv.col * 4;
     233           0 :   int tr = br, tc = bc;
     234             :   unsigned int besterr;
     235             :   unsigned int left, right, up, down, diag;
     236             :   unsigned int sse;
     237             :   unsigned int whichdir;
     238           0 :   unsigned int halfiters = 4;
     239           0 :   unsigned int quarteriters = 4;
     240             :   int thismse;
     241             : 
     242           0 :   int minc = VPXMAX(x->mv_col_min * 4,
     243             :                     (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
     244           0 :   int maxc = VPXMIN(x->mv_col_max * 4,
     245             :                     (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
     246           0 :   int minr = VPXMAX(x->mv_row_min * 4,
     247             :                     (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
     248           0 :   int maxr = VPXMIN(x->mv_row_max * 4,
     249             :                     (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
     250             : 
     251             :   int y_stride;
     252             :   int offset;
     253           0 :   int pre_stride = x->e_mbd.pre.y_stride;
     254           0 :   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
     255             : 
     256             : #if ARCH_X86 || ARCH_X86_64
     257           0 :   MACROBLOCKD *xd = &x->e_mbd;
     258           0 :   unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
     259           0 :                        bestmv->as_mv.col;
     260             :   unsigned char *y;
     261             :   int buf_r1, buf_r2, buf_c1;
     262             : 
     263             :   /* Clamping to avoid out-of-range data access */
     264           0 :   buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min)
     265           0 :                ? (bestmv->as_mv.row - x->mv_row_min)
     266           0 :                : 3;
     267           0 :   buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max)
     268           0 :                ? (x->mv_row_max - bestmv->as_mv.row)
     269           0 :                : 3;
     270           0 :   buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min)
     271           0 :                ? (bestmv->as_mv.col - x->mv_col_min)
     272           0 :                : 3;
     273           0 :   y_stride = 32;
     274             : 
     275             :   /* Copy to intermediate buffer before searching. */
     276           0 :   vfp->copymem(y_0 - buf_c1 - pre_stride * buf_r1, pre_stride, xd->y_buf,
     277           0 :                y_stride, 16 + buf_r1 + buf_r2);
     278           0 :   y = xd->y_buf + y_stride * buf_r1 + buf_c1;
     279             : #else
     280             :   unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
     281             :                      bestmv->as_mv.col;
     282             :   y_stride = pre_stride;
     283             : #endif
     284             : 
     285           0 :   offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
     286             : 
     287             :   /* central mv */
     288           0 :   bestmv->as_mv.row *= 8;
     289           0 :   bestmv->as_mv.col *= 8;
     290             : 
     291             :   /* calculate central point error */
     292           0 :   besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
     293           0 :   *distortion = besterr;
     294           0 :   besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
     295             : 
     296             :   /* TODO: Each subsequent iteration checks at least one point in common
     297             :    * with the last iteration could be 2 ( if diag selected)
     298             :    */
     299           0 :   while (--halfiters) {
     300             :     /* 1/2 pel */
     301           0 :     CHECK_BETTER(left, tr, tc - 2);
     302           0 :     CHECK_BETTER(right, tr, tc + 2);
     303           0 :     CHECK_BETTER(up, tr - 2, tc);
     304           0 :     CHECK_BETTER(down, tr + 2, tc);
     305             : 
     306           0 :     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
     307             : 
     308           0 :     switch (whichdir) {
     309           0 :       case 0: CHECK_BETTER(diag, tr - 2, tc - 2); break;
     310           0 :       case 1: CHECK_BETTER(diag, tr - 2, tc + 2); break;
     311           0 :       case 2: CHECK_BETTER(diag, tr + 2, tc - 2); break;
     312           0 :       case 3: CHECK_BETTER(diag, tr + 2, tc + 2); break;
     313             :     }
     314             : 
     315             :     /* no reason to check the same one again. */
     316           0 :     if (tr == br && tc == bc) break;
     317             : 
     318           0 :     tr = br;
     319           0 :     tc = bc;
     320             :   }
     321             : 
     322             :   /* TODO: Each subsequent iteration checks at least one point in common
     323             :    * with the last iteration could be 2 ( if diag selected)
     324             :    */
     325             : 
     326             :   /* 1/4 pel */
     327           0 :   while (--quarteriters) {
     328           0 :     CHECK_BETTER(left, tr, tc - 1);
     329           0 :     CHECK_BETTER(right, tr, tc + 1);
     330           0 :     CHECK_BETTER(up, tr - 1, tc);
     331           0 :     CHECK_BETTER(down, tr + 1, tc);
     332             : 
     333           0 :     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
     334             : 
     335           0 :     switch (whichdir) {
     336           0 :       case 0: CHECK_BETTER(diag, tr - 1, tc - 1); break;
     337           0 :       case 1: CHECK_BETTER(diag, tr - 1, tc + 1); break;
     338           0 :       case 2: CHECK_BETTER(diag, tr + 1, tc - 1); break;
     339           0 :       case 3: CHECK_BETTER(diag, tr + 1, tc + 1); break;
     340             :     }
     341             : 
     342             :     /* no reason to check the same one again. */
     343           0 :     if (tr == br && tc == bc) break;
     344             : 
     345           0 :     tr = br;
     346           0 :     tc = bc;
     347             :   }
     348             : 
     349           0 :   bestmv->as_mv.row = br * 2;
     350           0 :   bestmv->as_mv.col = bc * 2;
     351             : 
     352           0 :   if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
     353           0 :       (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3))) {
     354           0 :     return INT_MAX;
     355             :   }
     356             : 
     357           0 :   return besterr;
     358             : }
     359             : #undef MVC
     360             : #undef PRE
     361             : #undef SP
     362             : #undef DIST
     363             : #undef IFMVCV
     364             : #undef ERR
     365             : #undef CHECK_BETTER
     366             : 
     367           0 : int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
     368             :                                  int_mv *bestmv, int_mv *ref_mv,
     369             :                                  int error_per_bit,
     370             :                                  const vp8_variance_fn_ptr_t *vfp,
     371             :                                  int *mvcost[2], int *distortion,
     372             :                                  unsigned int *sse1) {
     373           0 :   int bestmse = INT_MAX;
     374             :   int_mv startmv;
     375             :   int_mv this_mv;
     376           0 :   unsigned char *z = (*(b->base_src) + b->src);
     377             :   int left, right, up, down, diag;
     378             :   unsigned int sse;
     379             :   int whichdir;
     380             :   int thismse;
     381             :   int y_stride;
     382           0 :   int pre_stride = x->e_mbd.pre.y_stride;
     383           0 :   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
     384             : 
     385             : #if ARCH_X86 || ARCH_X86_64
     386           0 :   MACROBLOCKD *xd = &x->e_mbd;
     387           0 :   unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
     388           0 :                        bestmv->as_mv.col;
     389             :   unsigned char *y;
     390             : 
     391           0 :   y_stride = 32;
     392             :   /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
     393           0 :   vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
     394           0 :   y = xd->y_buf + y_stride + 1;
     395             : #else
     396             :   unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
     397             :                      bestmv->as_mv.col;
     398             :   y_stride = pre_stride;
     399             : #endif
     400             : 
     401             :   /* central mv */
     402           0 :   bestmv->as_mv.row *= 8;
     403           0 :   bestmv->as_mv.col *= 8;
     404           0 :   startmv = *bestmv;
     405             : 
     406             :   /* calculate central point error */
     407           0 :   bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
     408           0 :   *distortion = bestmse;
     409           0 :   bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
     410             : 
     411             :   /* go left then right and check error */
     412           0 :   this_mv.as_mv.row = startmv.as_mv.row;
     413           0 :   this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
     414             :   /* "halfpix" horizontal variance */
     415           0 :   thismse = vfp->svf(y - 1, y_stride, 4, 0, z, b->src_stride, &sse);
     416           0 :   left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
     417             : 
     418           0 :   if (left < bestmse) {
     419           0 :     *bestmv = this_mv;
     420           0 :     bestmse = left;
     421           0 :     *distortion = thismse;
     422           0 :     *sse1 = sse;
     423             :   }
     424             : 
     425           0 :   this_mv.as_mv.col += 8;
     426             :   /* "halfpix" horizontal variance */
     427           0 :   thismse = vfp->svf(y, y_stride, 4, 0, z, b->src_stride, &sse);
     428           0 :   right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
     429             : 
     430           0 :   if (right < bestmse) {
     431           0 :     *bestmv = this_mv;
     432           0 :     bestmse = right;
     433           0 :     *distortion = thismse;
     434           0 :     *sse1 = sse;
     435             :   }
     436             : 
     437             :   /* go up then down and check error */
     438           0 :   this_mv.as_mv.col = startmv.as_mv.col;
     439           0 :   this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
     440             :   /* "halfpix" vertical variance */
     441           0 :   thismse = vfp->svf(y - y_stride, y_stride, 0, 4, z, b->src_stride, &sse);
     442           0 :   up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
     443             : 
     444           0 :   if (up < bestmse) {
     445           0 :     *bestmv = this_mv;
     446           0 :     bestmse = up;
     447           0 :     *distortion = thismse;
     448           0 :     *sse1 = sse;
     449             :   }
     450             : 
     451           0 :   this_mv.as_mv.row += 8;
     452             :   /* "halfpix" vertical variance */
     453           0 :   thismse = vfp->svf(y, y_stride, 0, 4, z, b->src_stride, &sse);
     454           0 :   down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
     455             : 
     456           0 :   if (down < bestmse) {
     457           0 :     *bestmv = this_mv;
     458           0 :     bestmse = down;
     459           0 :     *distortion = thismse;
     460           0 :     *sse1 = sse;
     461             :   }
     462             : 
     463             :   /* now check 1 more diagonal */
     464           0 :   whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
     465           0 :   this_mv = startmv;
     466             : 
     467           0 :   switch (whichdir) {
     468             :     case 0:
     469           0 :       this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
     470           0 :       this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
     471             :       /* "halfpix" horizontal/vertical variance */
     472           0 :       thismse =
     473           0 :           vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
     474           0 :       break;
     475             :     case 1:
     476           0 :       this_mv.as_mv.col += 4;
     477           0 :       this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
     478             :       /* "halfpix" horizontal/vertical variance */
     479           0 :       thismse = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
     480           0 :       break;
     481             :     case 2:
     482           0 :       this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
     483           0 :       this_mv.as_mv.row += 4;
     484             :       /* "halfpix" horizontal/vertical variance */
     485           0 :       thismse = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse);
     486           0 :       break;
     487             :     case 3:
     488             :     default:
     489           0 :       this_mv.as_mv.col += 4;
     490           0 :       this_mv.as_mv.row += 4;
     491             :       /* "halfpix" horizontal/vertical variance */
     492           0 :       thismse = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse);
     493           0 :       break;
     494             :   }
     495             : 
     496           0 :   diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
     497             : 
     498           0 :   if (diag < bestmse) {
     499           0 :     *bestmv = this_mv;
     500           0 :     bestmse = diag;
     501           0 :     *distortion = thismse;
     502           0 :     *sse1 = sse;
     503             :   }
     504             : 
     505             :   /* time to check quarter pels. */
     506           0 :   if (bestmv->as_mv.row < startmv.as_mv.row) y -= y_stride;
     507             : 
     508           0 :   if (bestmv->as_mv.col < startmv.as_mv.col) y--;
     509             : 
     510           0 :   startmv = *bestmv;
     511             : 
     512             :   /* go left then right and check error */
     513           0 :   this_mv.as_mv.row = startmv.as_mv.row;
     514             : 
     515           0 :   if (startmv.as_mv.col & 7) {
     516           0 :     this_mv.as_mv.col = startmv.as_mv.col - 2;
     517           0 :     thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
     518           0 :                        this_mv.as_mv.row & 7, z, b->src_stride, &sse);
     519             :   } else {
     520           0 :     this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
     521           0 :     thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
     522             :                        b->src_stride, &sse);
     523             :   }
     524             : 
     525           0 :   left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
     526             : 
     527           0 :   if (left < bestmse) {
     528           0 :     *bestmv = this_mv;
     529           0 :     bestmse = left;
     530           0 :     *distortion = thismse;
     531           0 :     *sse1 = sse;
     532             :   }
     533             : 
     534           0 :   this_mv.as_mv.col += 4;
     535           0 :   thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7,
     536             :                      z, b->src_stride, &sse);
     537           0 :   right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
     538             : 
     539           0 :   if (right < bestmse) {
     540           0 :     *bestmv = this_mv;
     541           0 :     bestmse = right;
     542           0 :     *distortion = thismse;
     543           0 :     *sse1 = sse;
     544             :   }
     545             : 
     546             :   /* go up then down and check error */
     547           0 :   this_mv.as_mv.col = startmv.as_mv.col;
     548             : 
     549           0 :   if (startmv.as_mv.row & 7) {
     550           0 :     this_mv.as_mv.row = startmv.as_mv.row - 2;
     551           0 :     thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
     552           0 :                        this_mv.as_mv.row & 7, z, b->src_stride, &sse);
     553             :   } else {
     554           0 :     this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
     555           0 :     thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z,
     556             :                        b->src_stride, &sse);
     557             :   }
     558             : 
     559           0 :   up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
     560             : 
     561           0 :   if (up < bestmse) {
     562           0 :     *bestmv = this_mv;
     563           0 :     bestmse = up;
     564           0 :     *distortion = thismse;
     565           0 :     *sse1 = sse;
     566             :   }
     567             : 
     568           0 :   this_mv.as_mv.row += 4;
     569           0 :   thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7,
     570             :                      z, b->src_stride, &sse);
     571           0 :   down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
     572             : 
     573           0 :   if (down < bestmse) {
     574           0 :     *bestmv = this_mv;
     575           0 :     bestmse = down;
     576           0 :     *distortion = thismse;
     577           0 :     *sse1 = sse;
     578             :   }
     579             : 
     580             :   /* now check 1 more diagonal */
     581           0 :   whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
     582             : 
     583           0 :   this_mv = startmv;
     584             : 
     585           0 :   switch (whichdir) {
     586             :     case 0:
     587             : 
     588           0 :       if (startmv.as_mv.row & 7) {
     589           0 :         this_mv.as_mv.row -= 2;
     590             : 
     591           0 :         if (startmv.as_mv.col & 7) {
     592           0 :           this_mv.as_mv.col -= 2;
     593           0 :           thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
     594           0 :                              this_mv.as_mv.row & 7, z, b->src_stride, &sse);
     595             :         } else {
     596           0 :           this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
     597           0 :           thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
     598             :                              b->src_stride, &sse);
     599             :         }
     600             :       } else {
     601           0 :         this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
     602             : 
     603           0 :         if (startmv.as_mv.col & 7) {
     604           0 :           this_mv.as_mv.col -= 2;
     605           0 :           thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6,
     606             :                              z, b->src_stride, &sse);
     607             :         } else {
     608           0 :           this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
     609           0 :           thismse = vfp->svf(y - y_stride - 1, y_stride, 6, 6, z, b->src_stride,
     610             :                              &sse);
     611             :         }
     612             :       }
     613             : 
     614           0 :       break;
     615             :     case 1:
     616           0 :       this_mv.as_mv.col += 2;
     617             : 
     618           0 :       if (startmv.as_mv.row & 7) {
     619           0 :         this_mv.as_mv.row -= 2;
     620           0 :         thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
     621           0 :                            this_mv.as_mv.row & 7, z, b->src_stride, &sse);
     622             :       } else {
     623           0 :         this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
     624           0 :         thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z,
     625             :                            b->src_stride, &sse);
     626             :       }
     627             : 
     628           0 :       break;
     629             :     case 2:
     630           0 :       this_mv.as_mv.row += 2;
     631             : 
     632           0 :       if (startmv.as_mv.col & 7) {
     633           0 :         this_mv.as_mv.col -= 2;
     634           0 :         thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
     635           0 :                            this_mv.as_mv.row & 7, z, b->src_stride, &sse);
     636             :       } else {
     637           0 :         this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
     638           0 :         thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
     639             :                            b->src_stride, &sse);
     640             :       }
     641             : 
     642           0 :       break;
     643             :     case 3:
     644           0 :       this_mv.as_mv.col += 2;
     645           0 :       this_mv.as_mv.row += 2;
     646           0 :       thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
     647           0 :                          this_mv.as_mv.row & 7, z, b->src_stride, &sse);
     648           0 :       break;
     649             :   }
     650             : 
     651           0 :   diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
     652             : 
     653           0 :   if (diag < bestmse) {
     654           0 :     *bestmv = this_mv;
     655           0 :     bestmse = diag;
     656           0 :     *distortion = thismse;
     657           0 :     *sse1 = sse;
     658             :   }
     659             : 
     660           0 :   return bestmse;
     661             : }
     662             : 
     663           0 : int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
     664             :                                   int_mv *bestmv, int_mv *ref_mv,
     665             :                                   int error_per_bit,
     666             :                                   const vp8_variance_fn_ptr_t *vfp,
     667             :                                   int *mvcost[2], int *distortion,
     668             :                                   unsigned int *sse1) {
     669           0 :   int bestmse = INT_MAX;
     670             :   int_mv startmv;
     671             :   int_mv this_mv;
     672           0 :   unsigned char *z = (*(b->base_src) + b->src);
     673             :   int left, right, up, down, diag;
     674             :   unsigned int sse;
     675             :   int whichdir;
     676             :   int thismse;
     677             :   int y_stride;
     678           0 :   int pre_stride = x->e_mbd.pre.y_stride;
     679           0 :   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
     680             : 
     681             : #if ARCH_X86 || ARCH_X86_64
     682           0 :   MACROBLOCKD *xd = &x->e_mbd;
     683           0 :   unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
     684           0 :                        bestmv->as_mv.col;
     685             :   unsigned char *y;
     686             : 
     687           0 :   y_stride = 32;
     688             :   /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
     689           0 :   vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
     690           0 :   y = xd->y_buf + y_stride + 1;
     691             : #else
     692             :   unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
     693             :                      bestmv->as_mv.col;
     694             :   y_stride = pre_stride;
     695             : #endif
     696             : 
     697             :   /* central mv */
     698           0 :   bestmv->as_mv.row *= 8;
     699           0 :   bestmv->as_mv.col *= 8;
     700           0 :   startmv = *bestmv;
     701             : 
     702             :   /* calculate central point error */
     703           0 :   bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
     704           0 :   *distortion = bestmse;
     705           0 :   bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
     706             : 
     707             :   /* go left then right and check error */
     708           0 :   this_mv.as_mv.row = startmv.as_mv.row;
     709           0 :   this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
     710             :   /* "halfpix" horizontal variance */
     711           0 :   thismse = vfp->svf(y - 1, y_stride, 4, 0, z, b->src_stride, &sse);
     712           0 :   left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
     713             : 
     714           0 :   if (left < bestmse) {
     715           0 :     *bestmv = this_mv;
     716           0 :     bestmse = left;
     717           0 :     *distortion = thismse;
     718           0 :     *sse1 = sse;
     719             :   }
     720             : 
     721           0 :   this_mv.as_mv.col += 8;
     722             :   /* "halfpix" horizontal variance */
     723           0 :   thismse = vfp->svf(y, y_stride, 4, 0, z, b->src_stride, &sse);
     724           0 :   right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
     725             : 
     726           0 :   if (right < bestmse) {
     727           0 :     *bestmv = this_mv;
     728           0 :     bestmse = right;
     729           0 :     *distortion = thismse;
     730           0 :     *sse1 = sse;
     731             :   }
     732             : 
     733             :   /* go up then down and check error */
     734           0 :   this_mv.as_mv.col = startmv.as_mv.col;
     735           0 :   this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
     736             :   /* "halfpix" vertical variance */
     737           0 :   thismse = vfp->svf(y - y_stride, y_stride, 0, 4, z, b->src_stride, &sse);
     738           0 :   up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
     739             : 
     740           0 :   if (up < bestmse) {
     741           0 :     *bestmv = this_mv;
     742           0 :     bestmse = up;
     743           0 :     *distortion = thismse;
     744           0 :     *sse1 = sse;
     745             :   }
     746             : 
     747           0 :   this_mv.as_mv.row += 8;
     748             :   /* "halfpix" vertical variance */
     749           0 :   thismse = vfp->svf(y, y_stride, 0, 4, z, b->src_stride, &sse);
     750           0 :   down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
     751             : 
     752           0 :   if (down < bestmse) {
     753           0 :     *bestmv = this_mv;
     754           0 :     bestmse = down;
     755           0 :     *distortion = thismse;
     756           0 :     *sse1 = sse;
     757             :   }
     758             : 
     759             :   /* now check 1 more diagonal - */
     760           0 :   whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
     761           0 :   this_mv = startmv;
     762             : 
     763           0 :   switch (whichdir) {
     764             :     case 0:
     765           0 :       this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
     766           0 :       this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
     767             :       /* "halfpix" horizontal/vertical variance */
     768           0 :       thismse =
     769           0 :           vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
     770           0 :       break;
     771             :     case 1:
     772           0 :       this_mv.as_mv.col += 4;
     773           0 :       this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
     774             :       /* "halfpix" horizontal/vertical variance */
     775           0 :       thismse = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
     776           0 :       break;
     777             :     case 2:
     778           0 :       this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
     779           0 :       this_mv.as_mv.row += 4;
     780             :       /* "halfpix" horizontal/vertical variance */
     781           0 :       thismse = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse);
     782           0 :       break;
     783             :     case 3:
     784             :     default:
     785           0 :       this_mv.as_mv.col += 4;
     786           0 :       this_mv.as_mv.row += 4;
     787             :       /* "halfpix" horizontal/vertical variance */
     788           0 :       thismse = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse);
     789           0 :       break;
     790             :   }
     791             : 
     792           0 :   diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
     793             : 
     794           0 :   if (diag < bestmse) {
     795           0 :     *bestmv = this_mv;
     796           0 :     bestmse = diag;
     797           0 :     *distortion = thismse;
     798           0 :     *sse1 = sse;
     799             :   }
     800             : 
     801           0 :   return bestmse;
     802             : }
     803             : 
     804             : #define CHECK_BOUNDS(range)                    \
     805             :   {                                            \
     806             :     all_in = 1;                                \
     807             :     all_in &= ((br - range) >= x->mv_row_min); \
     808             :     all_in &= ((br + range) <= x->mv_row_max); \
     809             :     all_in &= ((bc - range) >= x->mv_col_min); \
     810             :     all_in &= ((bc + range) <= x->mv_col_max); \
     811             :   }
     812             : 
     813             : #define CHECK_POINT                                  \
     814             :   {                                                  \
     815             :     if (this_mv.as_mv.col < x->mv_col_min) continue; \
     816             :     if (this_mv.as_mv.col > x->mv_col_max) continue; \
     817             :     if (this_mv.as_mv.row < x->mv_row_min) continue; \
     818             :     if (this_mv.as_mv.row > x->mv_row_max) continue; \
     819             :   }
     820             : 
     821             : #define CHECK_BETTER                                                     \
     822             :   {                                                                      \
     823             :     if (thissad < bestsad) {                                             \
     824             :       thissad +=                                                         \
     825             :           mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); \
     826             :       if (thissad < bestsad) {                                           \
     827             :         bestsad = thissad;                                               \
     828             :         best_site = i;                                                   \
     829             :       }                                                                  \
     830             :     }                                                                    \
     831             :   }
     832             : 
     833             : static const MV next_chkpts[6][3] = {
     834             :   { { -2, 0 }, { -1, -2 }, { 1, -2 } }, { { -1, -2 }, { 1, -2 }, { 2, 0 } },
     835             :   { { 1, -2 }, { 2, 0 }, { 1, 2 } },    { { 2, 0 }, { 1, 2 }, { -1, 2 } },
     836             :   { { 1, 2 }, { -1, 2 }, { -2, 0 } },   { { -1, 2 }, { -2, 0 }, { -1, -2 } }
     837             : };
     838             : 
     839           0 : int vp8_hex_search(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
     840             :                    int_mv *best_mv, int search_param, int sad_per_bit,
     841             :                    const vp8_variance_fn_ptr_t *vfp, int *mvsadcost[2],
     842             :                    int *mvcost[2], int_mv *center_mv) {
     843           0 :   MV hex[6] = {
     844             :     { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 }
     845             :   };
     846           0 :   MV neighbors[4] = { { 0, -1 }, { -1, 0 }, { 1, 0 }, { 0, 1 } };
     847             :   int i, j;
     848             : 
     849           0 :   unsigned char *what = (*(b->base_src) + b->src);
     850           0 :   int what_stride = b->src_stride;
     851           0 :   int pre_stride = x->e_mbd.pre.y_stride;
     852           0 :   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
     853             : 
     854           0 :   int in_what_stride = pre_stride;
     855             :   int br, bc;
     856             :   int_mv this_mv;
     857             :   unsigned int bestsad;
     858             :   unsigned int thissad;
     859             :   unsigned char *base_offset;
     860             :   unsigned char *this_offset;
     861           0 :   int k = -1;
     862             :   int all_in;
     863           0 :   int best_site = -1;
     864           0 :   int hex_range = 127;
     865           0 :   int dia_range = 8;
     866             : 
     867             :   int_mv fcenter_mv;
     868           0 :   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
     869           0 :   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
     870             : 
     871             :   (void)mvcost;
     872             : 
     873             :   /* adjust ref_mv to make sure it is within MV range */
     874           0 :   vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
     875             :                x->mv_row_max);
     876           0 :   br = ref_mv->as_mv.row;
     877           0 :   bc = ref_mv->as_mv.col;
     878             : 
     879             :   /* Work out the start point for the search */
     880           0 :   base_offset = (unsigned char *)(base_pre + d->offset);
     881           0 :   this_offset = base_offset + (br * (pre_stride)) + bc;
     882           0 :   this_mv.as_mv.row = br;
     883           0 :   this_mv.as_mv.col = bc;
     884           0 :   bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride) +
     885           0 :             mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
     886             : 
     887             : #if CONFIG_MULTI_RES_ENCODING
     888             :   /* Lower search range based on prediction info */
     889           0 :   if (search_param >= 6)
     890           0 :     goto cal_neighbors;
     891           0 :   else if (search_param >= 5)
     892           0 :     hex_range = 4;
     893           0 :   else if (search_param >= 4)
     894           0 :     hex_range = 6;
     895           0 :   else if (search_param >= 3)
     896           0 :     hex_range = 15;
     897           0 :   else if (search_param >= 2)
     898           0 :     hex_range = 31;
     899           0 :   else if (search_param >= 1)
     900           0 :     hex_range = 63;
     901             : 
     902           0 :   dia_range = 8;
     903             : #else
     904             :   (void)search_param;
     905             : #endif
     906             : 
     907             :   /* hex search */
     908           0 :   CHECK_BOUNDS(2)
     909             : 
     910           0 :   if (all_in) {
     911           0 :     for (i = 0; i < 6; ++i) {
     912           0 :       this_mv.as_mv.row = br + hex[i].row;
     913           0 :       this_mv.as_mv.col = bc + hex[i].col;
     914           0 :       this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
     915           0 :                     this_mv.as_mv.col;
     916           0 :       thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
     917           0 :       CHECK_BETTER
     918             :     }
     919             :   } else {
     920           0 :     for (i = 0; i < 6; ++i) {
     921           0 :       this_mv.as_mv.row = br + hex[i].row;
     922           0 :       this_mv.as_mv.col = bc + hex[i].col;
     923           0 :       CHECK_POINT
     924           0 :       this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
     925           0 :                     this_mv.as_mv.col;
     926           0 :       thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
     927           0 :       CHECK_BETTER
     928             :     }
     929             :   }
     930             : 
     931           0 :   if (best_site == -1) {
     932           0 :     goto cal_neighbors;
     933             :   } else {
     934           0 :     br += hex[best_site].row;
     935           0 :     bc += hex[best_site].col;
     936           0 :     k = best_site;
     937             :   }
     938             : 
     939           0 :   for (j = 1; j < hex_range; ++j) {
     940           0 :     best_site = -1;
     941           0 :     CHECK_BOUNDS(2)
     942             : 
     943           0 :     if (all_in) {
     944           0 :       for (i = 0; i < 3; ++i) {
     945           0 :         this_mv.as_mv.row = br + next_chkpts[k][i].row;
     946           0 :         this_mv.as_mv.col = bc + next_chkpts[k][i].col;
     947           0 :         this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
     948           0 :                       this_mv.as_mv.col;
     949           0 :         thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
     950           0 :         CHECK_BETTER
     951             :       }
     952             :     } else {
     953           0 :       for (i = 0; i < 3; ++i) {
     954           0 :         this_mv.as_mv.row = br + next_chkpts[k][i].row;
     955           0 :         this_mv.as_mv.col = bc + next_chkpts[k][i].col;
     956           0 :         CHECK_POINT
     957           0 :         this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
     958           0 :                       this_mv.as_mv.col;
     959           0 :         thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
     960           0 :         CHECK_BETTER
     961             :       }
     962             :     }
     963             : 
     964           0 :     if (best_site == -1) {
     965           0 :       break;
     966             :     } else {
     967           0 :       br += next_chkpts[k][best_site].row;
     968           0 :       bc += next_chkpts[k][best_site].col;
     969           0 :       k += 5 + best_site;
     970           0 :       if (k >= 12) {
     971           0 :         k -= 12;
     972           0 :       } else if (k >= 6) {
     973           0 :         k -= 6;
     974             :       }
     975             :     }
     976             :   }
     977             : 
     978             : /* check 4 1-away neighbors */
     979             : cal_neighbors:
     980           0 :   for (j = 0; j < dia_range; ++j) {
     981           0 :     best_site = -1;
     982           0 :     CHECK_BOUNDS(1)
     983             : 
     984           0 :     if (all_in) {
     985           0 :       for (i = 0; i < 4; ++i) {
     986           0 :         this_mv.as_mv.row = br + neighbors[i].row;
     987           0 :         this_mv.as_mv.col = bc + neighbors[i].col;
     988           0 :         this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
     989           0 :                       this_mv.as_mv.col;
     990           0 :         thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
     991           0 :         CHECK_BETTER
     992             :       }
     993             :     } else {
     994           0 :       for (i = 0; i < 4; ++i) {
     995           0 :         this_mv.as_mv.row = br + neighbors[i].row;
     996           0 :         this_mv.as_mv.col = bc + neighbors[i].col;
     997           0 :         CHECK_POINT
     998           0 :         this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
     999           0 :                       this_mv.as_mv.col;
    1000           0 :         thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
    1001           0 :         CHECK_BETTER
    1002             :       }
    1003             :     }
    1004             : 
    1005           0 :     if (best_site == -1) {
    1006           0 :       break;
    1007             :     } else {
    1008           0 :       br += neighbors[best_site].row;
    1009           0 :       bc += neighbors[best_site].col;
    1010             :     }
    1011             :   }
    1012             : 
    1013           0 :   best_mv->as_mv.row = br;
    1014           0 :   best_mv->as_mv.col = bc;
    1015             : 
    1016           0 :   return bestsad;
    1017             : }
    1018             : #undef CHECK_BOUNDS
    1019             : #undef CHECK_POINT
    1020             : #undef CHECK_BETTER
    1021             : 
    1022           0 : int vp8_diamond_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
    1023             :                              int_mv *best_mv, int search_param, int sad_per_bit,
    1024             :                              int *num00, vp8_variance_fn_ptr_t *fn_ptr,
    1025             :                              int *mvcost[2], int_mv *center_mv) {
    1026             :   int i, j, step;
    1027             : 
    1028           0 :   unsigned char *what = (*(b->base_src) + b->src);
    1029           0 :   int what_stride = b->src_stride;
    1030             :   unsigned char *in_what;
    1031           0 :   int pre_stride = x->e_mbd.pre.y_stride;
    1032           0 :   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
    1033           0 :   int in_what_stride = pre_stride;
    1034             :   unsigned char *best_address;
    1035             : 
    1036             :   int tot_steps;
    1037             :   int_mv this_mv;
    1038             : 
    1039             :   unsigned int bestsad;
    1040             :   unsigned int thissad;
    1041           0 :   int best_site = 0;
    1042           0 :   int last_site = 0;
    1043             : 
    1044             :   int ref_row;
    1045             :   int ref_col;
    1046             :   int this_row_offset;
    1047             :   int this_col_offset;
    1048             :   search_site *ss;
    1049             : 
    1050             :   unsigned char *check_here;
    1051             : 
    1052             :   int *mvsadcost[2];
    1053             :   int_mv fcenter_mv;
    1054             : 
    1055           0 :   mvsadcost[0] = x->mvsadcost[0];
    1056           0 :   mvsadcost[1] = x->mvsadcost[1];
    1057           0 :   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
    1058           0 :   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
    1059             : 
    1060           0 :   vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
    1061             :                x->mv_row_max);
    1062           0 :   ref_row = ref_mv->as_mv.row;
    1063           0 :   ref_col = ref_mv->as_mv.col;
    1064           0 :   *num00 = 0;
    1065           0 :   best_mv->as_mv.row = ref_row;
    1066           0 :   best_mv->as_mv.col = ref_col;
    1067             : 
    1068             :   /* Work out the start point for the search */
    1069           0 :   in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) +
    1070             :                               ref_col);
    1071           0 :   best_address = in_what;
    1072             : 
    1073             :   /* Check the starting position */
    1074           0 :   bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
    1075           0 :             mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
    1076             : 
    1077             :   /* search_param determines the length of the initial step and hence
    1078             :    * the number of iterations 0 = initial step (MAX_FIRST_STEP) pel :
    1079             :    * 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
    1080             :    */
    1081           0 :   ss = &x->ss[search_param * x->searches_per_step];
    1082           0 :   tot_steps = (x->ss_count / x->searches_per_step) - search_param;
    1083             : 
    1084           0 :   i = 1;
    1085             : 
    1086           0 :   for (step = 0; step < tot_steps; ++step) {
    1087           0 :     for (j = 0; j < x->searches_per_step; ++j) {
    1088             :       /* Trap illegal vectors */
    1089           0 :       this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
    1090           0 :       this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
    1091             : 
    1092           0 :       if ((this_col_offset > x->mv_col_min) &&
    1093           0 :           (this_col_offset < x->mv_col_max) &&
    1094           0 :           (this_row_offset > x->mv_row_min) &&
    1095           0 :           (this_row_offset < x->mv_row_max))
    1096             : 
    1097             :       {
    1098           0 :         check_here = ss[i].offset + best_address;
    1099           0 :         thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
    1100             : 
    1101           0 :         if (thissad < bestsad) {
    1102           0 :           this_mv.as_mv.row = this_row_offset;
    1103           0 :           this_mv.as_mv.col = this_col_offset;
    1104           0 :           thissad +=
    1105           0 :               mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
    1106             : 
    1107           0 :           if (thissad < bestsad) {
    1108           0 :             bestsad = thissad;
    1109           0 :             best_site = i;
    1110             :           }
    1111             :         }
    1112             :       }
    1113             : 
    1114           0 :       i++;
    1115             :     }
    1116             : 
    1117           0 :     if (best_site != last_site) {
    1118           0 :       best_mv->as_mv.row += ss[best_site].mv.row;
    1119           0 :       best_mv->as_mv.col += ss[best_site].mv.col;
    1120           0 :       best_address += ss[best_site].offset;
    1121           0 :       last_site = best_site;
    1122           0 :     } else if (best_address == in_what) {
    1123           0 :       (*num00)++;
    1124             :     }
    1125             :   }
    1126             : 
    1127           0 :   this_mv.as_mv.row = best_mv->as_mv.row << 3;
    1128           0 :   this_mv.as_mv.col = best_mv->as_mv.col << 3;
    1129             : 
    1130           0 :   return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
    1131           0 :          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
    1132             : }
    1133             : 
    1134           0 : int vp8_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
    1135             :                              int_mv *best_mv, int search_param, int sad_per_bit,
    1136             :                              int *num00, vp8_variance_fn_ptr_t *fn_ptr,
    1137             :                              int *mvcost[2], int_mv *center_mv) {
    1138             :   int i, j, step;
    1139             : 
    1140           0 :   unsigned char *what = (*(b->base_src) + b->src);
    1141           0 :   int what_stride = b->src_stride;
    1142             :   unsigned char *in_what;
    1143           0 :   int pre_stride = x->e_mbd.pre.y_stride;
    1144           0 :   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
    1145           0 :   int in_what_stride = pre_stride;
    1146             :   unsigned char *best_address;
    1147             : 
    1148             :   int tot_steps;
    1149             :   int_mv this_mv;
    1150             : 
    1151             :   unsigned int bestsad;
    1152             :   unsigned int thissad;
    1153           0 :   int best_site = 0;
    1154           0 :   int last_site = 0;
    1155             : 
    1156             :   int ref_row;
    1157             :   int ref_col;
    1158             :   int this_row_offset;
    1159             :   int this_col_offset;
    1160             :   search_site *ss;
    1161             : 
    1162             :   unsigned char *check_here;
    1163             : 
    1164             :   int *mvsadcost[2];
    1165             :   int_mv fcenter_mv;
    1166             : 
    1167           0 :   mvsadcost[0] = x->mvsadcost[0];
    1168           0 :   mvsadcost[1] = x->mvsadcost[1];
    1169           0 :   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
    1170           0 :   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
    1171             : 
    1172           0 :   vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
    1173             :                x->mv_row_max);
    1174           0 :   ref_row = ref_mv->as_mv.row;
    1175           0 :   ref_col = ref_mv->as_mv.col;
    1176           0 :   *num00 = 0;
    1177           0 :   best_mv->as_mv.row = ref_row;
    1178           0 :   best_mv->as_mv.col = ref_col;
    1179             : 
    1180             :   /* Work out the start point for the search */
    1181           0 :   in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) +
    1182             :                               ref_col);
    1183           0 :   best_address = in_what;
    1184             : 
    1185             :   /* Check the starting position */
    1186           0 :   bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
    1187           0 :             mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
    1188             : 
    1189             :   /* search_param determines the length of the initial step and hence the
    1190             :    * number of iterations 0 = initial step (MAX_FIRST_STEP) pel : 1 =
    1191             :    * (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
    1192             :    */
    1193           0 :   ss = &x->ss[search_param * x->searches_per_step];
    1194           0 :   tot_steps = (x->ss_count / x->searches_per_step) - search_param;
    1195             : 
    1196           0 :   i = 1;
    1197             : 
    1198           0 :   for (step = 0; step < tot_steps; ++step) {
    1199           0 :     int all_in = 1, t;
    1200             : 
    1201             :     /* To know if all neighbor points are within the bounds, 4 bounds
    1202             :      * checking are enough instead of checking 4 bounds for each
    1203             :      * points.
    1204             :      */
    1205           0 :     all_in &= ((best_mv->as_mv.row + ss[i].mv.row) > x->mv_row_min);
    1206           0 :     all_in &= ((best_mv->as_mv.row + ss[i + 1].mv.row) < x->mv_row_max);
    1207           0 :     all_in &= ((best_mv->as_mv.col + ss[i + 2].mv.col) > x->mv_col_min);
    1208           0 :     all_in &= ((best_mv->as_mv.col + ss[i + 3].mv.col) < x->mv_col_max);
    1209             : 
    1210           0 :     if (all_in) {
    1211             :       unsigned int sad_array[4];
    1212             : 
    1213           0 :       for (j = 0; j < x->searches_per_step; j += 4) {
    1214             :         const unsigned char *block_offset[4];
    1215             : 
    1216           0 :         for (t = 0; t < 4; ++t) {
    1217           0 :           block_offset[t] = ss[i + t].offset + best_address;
    1218             :         }
    1219             : 
    1220           0 :         fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
    1221             :                        sad_array);
    1222             : 
    1223           0 :         for (t = 0; t < 4; t++, i++) {
    1224           0 :           if (sad_array[t] < bestsad) {
    1225           0 :             this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row;
    1226           0 :             this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col;
    1227           0 :             sad_array[t] +=
    1228           0 :                 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
    1229             : 
    1230           0 :             if (sad_array[t] < bestsad) {
    1231           0 :               bestsad = sad_array[t];
    1232           0 :               best_site = i;
    1233             :             }
    1234             :           }
    1235             :         }
    1236             :       }
    1237             :     } else {
    1238           0 :       for (j = 0; j < x->searches_per_step; ++j) {
    1239             :         /* Trap illegal vectors */
    1240           0 :         this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
    1241           0 :         this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
    1242             : 
    1243           0 :         if ((this_col_offset > x->mv_col_min) &&
    1244           0 :             (this_col_offset < x->mv_col_max) &&
    1245           0 :             (this_row_offset > x->mv_row_min) &&
    1246           0 :             (this_row_offset < x->mv_row_max)) {
    1247           0 :           check_here = ss[i].offset + best_address;
    1248           0 :           thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
    1249             : 
    1250           0 :           if (thissad < bestsad) {
    1251           0 :             this_mv.as_mv.row = this_row_offset;
    1252           0 :             this_mv.as_mv.col = this_col_offset;
    1253           0 :             thissad +=
    1254           0 :                 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
    1255             : 
    1256           0 :             if (thissad < bestsad) {
    1257           0 :               bestsad = thissad;
    1258           0 :               best_site = i;
    1259             :             }
    1260             :           }
    1261             :         }
    1262           0 :         i++;
    1263             :       }
    1264             :     }
    1265             : 
    1266           0 :     if (best_site != last_site) {
    1267           0 :       best_mv->as_mv.row += ss[best_site].mv.row;
    1268           0 :       best_mv->as_mv.col += ss[best_site].mv.col;
    1269           0 :       best_address += ss[best_site].offset;
    1270           0 :       last_site = best_site;
    1271           0 :     } else if (best_address == in_what) {
    1272           0 :       (*num00)++;
    1273             :     }
    1274             :   }
    1275             : 
    1276           0 :   this_mv.as_mv.row = best_mv->as_mv.row * 8;
    1277           0 :   this_mv.as_mv.col = best_mv->as_mv.col * 8;
    1278             : 
    1279           0 :   return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
    1280           0 :          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
    1281             : }
    1282             : 
    1283           0 : int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
    1284             :                           int sad_per_bit, int distance,
    1285             :                           vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
    1286             :                           int_mv *center_mv) {
    1287           0 :   unsigned char *what = (*(b->base_src) + b->src);
    1288           0 :   int what_stride = b->src_stride;
    1289             :   unsigned char *in_what;
    1290           0 :   int pre_stride = x->e_mbd.pre.y_stride;
    1291           0 :   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
    1292           0 :   int in_what_stride = pre_stride;
    1293           0 :   int mv_stride = pre_stride;
    1294             :   unsigned char *bestaddress;
    1295           0 :   int_mv *best_mv = &d->bmi.mv;
    1296             :   int_mv this_mv;
    1297             :   unsigned int bestsad;
    1298             :   unsigned int thissad;
    1299             :   int r, c;
    1300             : 
    1301             :   unsigned char *check_here;
    1302             : 
    1303           0 :   int ref_row = ref_mv->as_mv.row;
    1304           0 :   int ref_col = ref_mv->as_mv.col;
    1305             : 
    1306           0 :   int row_min = ref_row - distance;
    1307           0 :   int row_max = ref_row + distance;
    1308           0 :   int col_min = ref_col - distance;
    1309           0 :   int col_max = ref_col + distance;
    1310             : 
    1311             :   int *mvsadcost[2];
    1312             :   int_mv fcenter_mv;
    1313             : 
    1314           0 :   mvsadcost[0] = x->mvsadcost[0];
    1315           0 :   mvsadcost[1] = x->mvsadcost[1];
    1316           0 :   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
    1317           0 :   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
    1318             : 
    1319             :   /* Work out the mid point for the search */
    1320           0 :   in_what = base_pre + d->offset;
    1321           0 :   bestaddress = in_what + (ref_row * pre_stride) + ref_col;
    1322             : 
    1323           0 :   best_mv->as_mv.row = ref_row;
    1324           0 :   best_mv->as_mv.col = ref_col;
    1325             : 
    1326             :   /* Baseline value at the centre */
    1327           0 :   bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) +
    1328           0 :             mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
    1329             : 
    1330             :   /* Apply further limits to prevent us looking using vectors that
    1331             :    * stretch beyiond the UMV border
    1332             :    */
    1333           0 :   if (col_min < x->mv_col_min) col_min = x->mv_col_min;
    1334             : 
    1335           0 :   if (col_max > x->mv_col_max) col_max = x->mv_col_max;
    1336             : 
    1337           0 :   if (row_min < x->mv_row_min) row_min = x->mv_row_min;
    1338             : 
    1339           0 :   if (row_max > x->mv_row_max) row_max = x->mv_row_max;
    1340             : 
    1341           0 :   for (r = row_min; r < row_max; ++r) {
    1342           0 :     this_mv.as_mv.row = r;
    1343           0 :     check_here = r * mv_stride + in_what + col_min;
    1344             : 
    1345           0 :     for (c = col_min; c < col_max; ++c) {
    1346           0 :       thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
    1347             : 
    1348           0 :       this_mv.as_mv.col = c;
    1349           0 :       thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
    1350             : 
    1351           0 :       if (thissad < bestsad) {
    1352           0 :         bestsad = thissad;
    1353           0 :         best_mv->as_mv.row = r;
    1354           0 :         best_mv->as_mv.col = c;
    1355           0 :         bestaddress = check_here;
    1356             :       }
    1357             : 
    1358           0 :       check_here++;
    1359             :     }
    1360             :   }
    1361             : 
    1362           0 :   this_mv.as_mv.row = best_mv->as_mv.row << 3;
    1363           0 :   this_mv.as_mv.col = best_mv->as_mv.col << 3;
    1364             : 
    1365           0 :   return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) +
    1366           0 :          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
    1367             : }
    1368             : 
    1369           0 : int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
    1370             :                           int sad_per_bit, int distance,
    1371             :                           vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
    1372             :                           int_mv *center_mv) {
    1373           0 :   unsigned char *what = (*(b->base_src) + b->src);
    1374           0 :   int what_stride = b->src_stride;
    1375             :   unsigned char *in_what;
    1376           0 :   int pre_stride = x->e_mbd.pre.y_stride;
    1377           0 :   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
    1378           0 :   int in_what_stride = pre_stride;
    1379           0 :   int mv_stride = pre_stride;
    1380             :   unsigned char *bestaddress;
    1381           0 :   int_mv *best_mv = &d->bmi.mv;
    1382             :   int_mv this_mv;
    1383             :   unsigned int bestsad;
    1384             :   unsigned int thissad;
    1385             :   int r, c;
    1386             : 
    1387             :   unsigned char *check_here;
    1388             : 
    1389           0 :   int ref_row = ref_mv->as_mv.row;
    1390           0 :   int ref_col = ref_mv->as_mv.col;
    1391             : 
    1392           0 :   int row_min = ref_row - distance;
    1393           0 :   int row_max = ref_row + distance;
    1394           0 :   int col_min = ref_col - distance;
    1395           0 :   int col_max = ref_col + distance;
    1396             : 
    1397             :   unsigned int sad_array[3];
    1398             : 
    1399             :   int *mvsadcost[2];
    1400             :   int_mv fcenter_mv;
    1401             : 
    1402           0 :   mvsadcost[0] = x->mvsadcost[0];
    1403           0 :   mvsadcost[1] = x->mvsadcost[1];
    1404           0 :   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
    1405           0 :   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
    1406             : 
    1407             :   /* Work out the mid point for the search */
    1408           0 :   in_what = base_pre + d->offset;
    1409           0 :   bestaddress = in_what + (ref_row * pre_stride) + ref_col;
    1410             : 
    1411           0 :   best_mv->as_mv.row = ref_row;
    1412           0 :   best_mv->as_mv.col = ref_col;
    1413             : 
    1414             :   /* Baseline value at the centre */
    1415           0 :   bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) +
    1416           0 :             mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
    1417             : 
    1418             :   /* Apply further limits to prevent us looking using vectors that stretch
    1419             :    * beyond the UMV border
    1420             :    */
    1421           0 :   if (col_min < x->mv_col_min) col_min = x->mv_col_min;
    1422             : 
    1423           0 :   if (col_max > x->mv_col_max) col_max = x->mv_col_max;
    1424             : 
    1425           0 :   if (row_min < x->mv_row_min) row_min = x->mv_row_min;
    1426             : 
    1427           0 :   if (row_max > x->mv_row_max) row_max = x->mv_row_max;
    1428             : 
    1429           0 :   for (r = row_min; r < row_max; ++r) {
    1430           0 :     this_mv.as_mv.row = r;
    1431           0 :     check_here = r * mv_stride + in_what + col_min;
    1432           0 :     c = col_min;
    1433             : 
    1434           0 :     while ((c + 2) < col_max) {
    1435             :       int i;
    1436             : 
    1437           0 :       fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
    1438             : 
    1439           0 :       for (i = 0; i < 3; ++i) {
    1440           0 :         thissad = sad_array[i];
    1441             : 
    1442           0 :         if (thissad < bestsad) {
    1443           0 :           this_mv.as_mv.col = c;
    1444           0 :           thissad +=
    1445           0 :               mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
    1446             : 
    1447           0 :           if (thissad < bestsad) {
    1448           0 :             bestsad = thissad;
    1449           0 :             best_mv->as_mv.row = r;
    1450           0 :             best_mv->as_mv.col = c;
    1451           0 :             bestaddress = check_here;
    1452             :           }
    1453             :         }
    1454             : 
    1455           0 :         check_here++;
    1456           0 :         c++;
    1457             :       }
    1458             :     }
    1459             : 
    1460           0 :     while (c < col_max) {
    1461           0 :       thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
    1462             : 
    1463           0 :       if (thissad < bestsad) {
    1464           0 :         this_mv.as_mv.col = c;
    1465           0 :         thissad +=
    1466           0 :             mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
    1467             : 
    1468           0 :         if (thissad < bestsad) {
    1469           0 :           bestsad = thissad;
    1470           0 :           best_mv->as_mv.row = r;
    1471           0 :           best_mv->as_mv.col = c;
    1472           0 :           bestaddress = check_here;
    1473             :         }
    1474             :       }
    1475             : 
    1476           0 :       check_here++;
    1477           0 :       c++;
    1478             :     }
    1479             :   }
    1480             : 
    1481           0 :   this_mv.as_mv.row = best_mv->as_mv.row << 3;
    1482           0 :   this_mv.as_mv.col = best_mv->as_mv.col << 3;
    1483             : 
    1484           0 :   return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) +
    1485           0 :          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
    1486             : }
    1487             : 
    1488           0 : int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
    1489             :                           int sad_per_bit, int distance,
    1490             :                           vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
    1491             :                           int_mv *center_mv) {
    1492           0 :   unsigned char *what = (*(b->base_src) + b->src);
    1493           0 :   int what_stride = b->src_stride;
    1494           0 :   int pre_stride = x->e_mbd.pre.y_stride;
    1495           0 :   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
    1496             :   unsigned char *in_what;
    1497           0 :   int in_what_stride = pre_stride;
    1498           0 :   int mv_stride = pre_stride;
    1499             :   unsigned char *bestaddress;
    1500           0 :   int_mv *best_mv = &d->bmi.mv;
    1501             :   int_mv this_mv;
    1502             :   unsigned int bestsad;
    1503             :   unsigned int thissad;
    1504             :   int r, c;
    1505             : 
    1506             :   unsigned char *check_here;
    1507             : 
    1508           0 :   int ref_row = ref_mv->as_mv.row;
    1509           0 :   int ref_col = ref_mv->as_mv.col;
    1510             : 
    1511           0 :   int row_min = ref_row - distance;
    1512           0 :   int row_max = ref_row + distance;
    1513           0 :   int col_min = ref_col - distance;
    1514           0 :   int col_max = ref_col + distance;
    1515             : 
    1516             :   DECLARE_ALIGNED(16, unsigned int, sad_array8[8]);
    1517             :   unsigned int sad_array[3];
    1518             : 
    1519             :   int *mvsadcost[2];
    1520             :   int_mv fcenter_mv;
    1521             : 
    1522           0 :   mvsadcost[0] = x->mvsadcost[0];
    1523           0 :   mvsadcost[1] = x->mvsadcost[1];
    1524           0 :   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
    1525           0 :   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
    1526             : 
    1527             :   /* Work out the mid point for the search */
    1528           0 :   in_what = base_pre + d->offset;
    1529           0 :   bestaddress = in_what + (ref_row * pre_stride) + ref_col;
    1530             : 
    1531           0 :   best_mv->as_mv.row = ref_row;
    1532           0 :   best_mv->as_mv.col = ref_col;
    1533             : 
    1534             :   /* Baseline value at the centre */
    1535           0 :   bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) +
    1536           0 :             mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
    1537             : 
    1538             :   /* Apply further limits to prevent us looking using vectors that stretch
    1539             :    * beyond the UMV border
    1540             :    */
    1541           0 :   if (col_min < x->mv_col_min) col_min = x->mv_col_min;
    1542             : 
    1543           0 :   if (col_max > x->mv_col_max) col_max = x->mv_col_max;
    1544             : 
    1545           0 :   if (row_min < x->mv_row_min) row_min = x->mv_row_min;
    1546             : 
    1547           0 :   if (row_max > x->mv_row_max) row_max = x->mv_row_max;
    1548             : 
    1549           0 :   for (r = row_min; r < row_max; ++r) {
    1550           0 :     this_mv.as_mv.row = r;
    1551           0 :     check_here = r * mv_stride + in_what + col_min;
    1552           0 :     c = col_min;
    1553             : 
    1554           0 :     while ((c + 7) < col_max) {
    1555             :       int i;
    1556             : 
    1557           0 :       fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8);
    1558             : 
    1559           0 :       for (i = 0; i < 8; ++i) {
    1560           0 :         thissad = sad_array8[i];
    1561             : 
    1562           0 :         if (thissad < bestsad) {
    1563           0 :           this_mv.as_mv.col = c;
    1564           0 :           thissad +=
    1565           0 :               mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
    1566             : 
    1567           0 :           if (thissad < bestsad) {
    1568           0 :             bestsad = thissad;
    1569           0 :             best_mv->as_mv.row = r;
    1570           0 :             best_mv->as_mv.col = c;
    1571           0 :             bestaddress = check_here;
    1572             :           }
    1573             :         }
    1574             : 
    1575           0 :         check_here++;
    1576           0 :         c++;
    1577             :       }
    1578             :     }
    1579             : 
    1580           0 :     while ((c + 2) < col_max) {
    1581             :       int i;
    1582             : 
    1583           0 :       fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
    1584             : 
    1585           0 :       for (i = 0; i < 3; ++i) {
    1586           0 :         thissad = sad_array[i];
    1587             : 
    1588           0 :         if (thissad < bestsad) {
    1589           0 :           this_mv.as_mv.col = c;
    1590           0 :           thissad +=
    1591           0 :               mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
    1592             : 
    1593           0 :           if (thissad < bestsad) {
    1594           0 :             bestsad = thissad;
    1595           0 :             best_mv->as_mv.row = r;
    1596           0 :             best_mv->as_mv.col = c;
    1597           0 :             bestaddress = check_here;
    1598             :           }
    1599             :         }
    1600             : 
    1601           0 :         check_here++;
    1602           0 :         c++;
    1603             :       }
    1604             :     }
    1605             : 
    1606           0 :     while (c < col_max) {
    1607           0 :       thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
    1608             : 
    1609           0 :       if (thissad < bestsad) {
    1610           0 :         this_mv.as_mv.col = c;
    1611           0 :         thissad +=
    1612           0 :             mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
    1613             : 
    1614           0 :         if (thissad < bestsad) {
    1615           0 :           bestsad = thissad;
    1616           0 :           best_mv->as_mv.row = r;
    1617           0 :           best_mv->as_mv.col = c;
    1618           0 :           bestaddress = check_here;
    1619             :         }
    1620             :       }
    1621             : 
    1622           0 :       check_here++;
    1623           0 :       c++;
    1624             :     }
    1625             :   }
    1626             : 
    1627           0 :   this_mv.as_mv.row = best_mv->as_mv.row * 8;
    1628           0 :   this_mv.as_mv.col = best_mv->as_mv.col * 8;
    1629             : 
    1630           0 :   return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) +
    1631           0 :          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
    1632             : }
    1633             : 
    1634           0 : int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
    1635             :                               int_mv *ref_mv, int error_per_bit,
    1636             :                               int search_range, vp8_variance_fn_ptr_t *fn_ptr,
    1637             :                               int *mvcost[2], int_mv *center_mv) {
    1638           0 :   MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
    1639             :   int i, j;
    1640             :   short this_row_offset, this_col_offset;
    1641             : 
    1642           0 :   int what_stride = b->src_stride;
    1643           0 :   int pre_stride = x->e_mbd.pre.y_stride;
    1644           0 :   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
    1645           0 :   int in_what_stride = pre_stride;
    1646           0 :   unsigned char *what = (*(b->base_src) + b->src);
    1647           0 :   unsigned char *best_address =
    1648           0 :       (unsigned char *)(base_pre + d->offset +
    1649           0 :                         (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
    1650             :   unsigned char *check_here;
    1651             :   int_mv this_mv;
    1652             :   unsigned int bestsad;
    1653             :   unsigned int thissad;
    1654             : 
    1655             :   int *mvsadcost[2];
    1656             :   int_mv fcenter_mv;
    1657             : 
    1658           0 :   mvsadcost[0] = x->mvsadcost[0];
    1659           0 :   mvsadcost[1] = x->mvsadcost[1];
    1660           0 :   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
    1661           0 :   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
    1662             : 
    1663           0 :   bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride) +
    1664           0 :             mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
    1665             : 
    1666           0 :   for (i = 0; i < search_range; ++i) {
    1667           0 :     int best_site = -1;
    1668             : 
    1669           0 :     for (j = 0; j < 4; ++j) {
    1670           0 :       this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
    1671           0 :       this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
    1672             : 
    1673           0 :       if ((this_col_offset > x->mv_col_min) &&
    1674           0 :           (this_col_offset < x->mv_col_max) &&
    1675           0 :           (this_row_offset > x->mv_row_min) &&
    1676           0 :           (this_row_offset < x->mv_row_max)) {
    1677           0 :         check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
    1678             :                      best_address;
    1679           0 :         thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
    1680             : 
    1681           0 :         if (thissad < bestsad) {
    1682           0 :           this_mv.as_mv.row = this_row_offset;
    1683           0 :           this_mv.as_mv.col = this_col_offset;
    1684           0 :           thissad +=
    1685           0 :               mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
    1686             : 
    1687           0 :           if (thissad < bestsad) {
    1688           0 :             bestsad = thissad;
    1689           0 :             best_site = j;
    1690             :           }
    1691             :         }
    1692             :       }
    1693             :     }
    1694             : 
    1695           0 :     if (best_site == -1) {
    1696           0 :       break;
    1697             :     } else {
    1698           0 :       ref_mv->as_mv.row += neighbors[best_site].row;
    1699           0 :       ref_mv->as_mv.col += neighbors[best_site].col;
    1700           0 :       best_address += (neighbors[best_site].row) * in_what_stride +
    1701           0 :                       neighbors[best_site].col;
    1702             :     }
    1703             :   }
    1704             : 
    1705           0 :   this_mv.as_mv.row = ref_mv->as_mv.row << 3;
    1706           0 :   this_mv.as_mv.col = ref_mv->as_mv.col << 3;
    1707             : 
    1708           0 :   return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
    1709           0 :          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
    1710             : }
    1711             : 
    1712           0 : int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
    1713             :                               int_mv *ref_mv, int error_per_bit,
    1714             :                               int search_range, vp8_variance_fn_ptr_t *fn_ptr,
    1715             :                               int *mvcost[2], int_mv *center_mv) {
    1716           0 :   MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
    1717             :   int i, j;
    1718             :   short this_row_offset, this_col_offset;
    1719             : 
    1720           0 :   int what_stride = b->src_stride;
    1721           0 :   int pre_stride = x->e_mbd.pre.y_stride;
    1722           0 :   unsigned char *base_pre = x->e_mbd.pre.y_buffer;
    1723           0 :   int in_what_stride = pre_stride;
    1724           0 :   unsigned char *what = (*(b->base_src) + b->src);
    1725           0 :   unsigned char *best_address =
    1726           0 :       (unsigned char *)(base_pre + d->offset +
    1727           0 :                         (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
    1728             :   unsigned char *check_here;
    1729             :   int_mv this_mv;
    1730             :   unsigned int bestsad;
    1731             :   unsigned int thissad;
    1732             : 
    1733             :   int *mvsadcost[2];
    1734             :   int_mv fcenter_mv;
    1735             : 
    1736           0 :   mvsadcost[0] = x->mvsadcost[0];
    1737           0 :   mvsadcost[1] = x->mvsadcost[1];
    1738           0 :   fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
    1739           0 :   fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
    1740             : 
    1741           0 :   bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride) +
    1742           0 :             mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
    1743             : 
    1744           0 :   for (i = 0; i < search_range; ++i) {
    1745           0 :     int best_site = -1;
    1746           0 :     int all_in = 1;
    1747             : 
    1748           0 :     all_in &= ((ref_mv->as_mv.row - 1) > x->mv_row_min);
    1749           0 :     all_in &= ((ref_mv->as_mv.row + 1) < x->mv_row_max);
    1750           0 :     all_in &= ((ref_mv->as_mv.col - 1) > x->mv_col_min);
    1751           0 :     all_in &= ((ref_mv->as_mv.col + 1) < x->mv_col_max);
    1752             : 
    1753           0 :     if (all_in) {
    1754             :       unsigned int sad_array[4];
    1755             :       const unsigned char *block_offset[4];
    1756           0 :       block_offset[0] = best_address - in_what_stride;
    1757           0 :       block_offset[1] = best_address - 1;
    1758           0 :       block_offset[2] = best_address + 1;
    1759           0 :       block_offset[3] = best_address + in_what_stride;
    1760             : 
    1761           0 :       fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
    1762             :                      sad_array);
    1763             : 
    1764           0 :       for (j = 0; j < 4; ++j) {
    1765           0 :         if (sad_array[j] < bestsad) {
    1766           0 :           this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row;
    1767           0 :           this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col;
    1768           0 :           sad_array[j] +=
    1769           0 :               mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
    1770             : 
    1771           0 :           if (sad_array[j] < bestsad) {
    1772           0 :             bestsad = sad_array[j];
    1773           0 :             best_site = j;
    1774             :           }
    1775             :         }
    1776             :       }
    1777             :     } else {
    1778           0 :       for (j = 0; j < 4; ++j) {
    1779           0 :         this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
    1780           0 :         this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
    1781             : 
    1782           0 :         if ((this_col_offset > x->mv_col_min) &&
    1783           0 :             (this_col_offset < x->mv_col_max) &&
    1784           0 :             (this_row_offset > x->mv_row_min) &&
    1785           0 :             (this_row_offset < x->mv_row_max)) {
    1786           0 :           check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
    1787             :                        best_address;
    1788           0 :           thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
    1789             : 
    1790           0 :           if (thissad < bestsad) {
    1791           0 :             this_mv.as_mv.row = this_row_offset;
    1792           0 :             this_mv.as_mv.col = this_col_offset;
    1793           0 :             thissad +=
    1794           0 :                 mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
    1795             : 
    1796           0 :             if (thissad < bestsad) {
    1797           0 :               bestsad = thissad;
    1798           0 :               best_site = j;
    1799             :             }
    1800             :           }
    1801             :         }
    1802             :       }
    1803             :     }
    1804             : 
    1805           0 :     if (best_site == -1) {
    1806           0 :       break;
    1807             :     } else {
    1808           0 :       ref_mv->as_mv.row += neighbors[best_site].row;
    1809           0 :       ref_mv->as_mv.col += neighbors[best_site].col;
    1810           0 :       best_address += (neighbors[best_site].row) * in_what_stride +
    1811           0 :                       neighbors[best_site].col;
    1812             :     }
    1813             :   }
    1814             : 
    1815           0 :   this_mv.as_mv.row = ref_mv->as_mv.row * 8;
    1816           0 :   this_mv.as_mv.col = ref_mv->as_mv.col * 8;
    1817             : 
    1818           0 :   return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
    1819           0 :          mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
    1820             : }
    1821             : 
    1822             : #ifdef VP8_ENTROPY_STATS
    1823             : void print_mode_context(void) {
    1824             :   FILE *f = fopen("modecont.c", "w");
    1825             :   int i, j;
    1826             : 
    1827             :   fprintf(f, "#include \"entropy.h\"\n");
    1828             :   fprintf(f, "const int vp8_mode_contexts[6][4] =\n");
    1829             :   fprintf(f, "{\n");
    1830             : 
    1831             :   for (j = 0; j < 6; ++j) {
    1832             :     fprintf(f, "  { /* %d */\n", j);
    1833             :     fprintf(f, "    ");
    1834             : 
    1835             :     for (i = 0; i < 4; ++i) {
    1836             :       int overal_prob;
    1837             :       int this_prob;
    1838             :       int count;
    1839             : 
    1840             :       /* Overall probs */
    1841             :       count = mv_mode_cts[i][0] + mv_mode_cts[i][1];
    1842             : 
    1843             :       if (count)
    1844             :         overal_prob = 256 * mv_mode_cts[i][0] / count;
    1845             :       else
    1846             :         overal_prob = 128;
    1847             : 
    1848             :       if (overal_prob == 0) overal_prob = 1;
    1849             : 
    1850             :       /* context probs */
    1851             :       count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1];
    1852             : 
    1853             :       if (count)
    1854             :         this_prob = 256 * mv_ref_ct[j][i][0] / count;
    1855             :       else
    1856             :         this_prob = 128;
    1857             : 
    1858             :       if (this_prob == 0) this_prob = 1;
    1859             : 
    1860             :       fprintf(f, "%5d, ", this_prob);
    1861             :     }
    1862             : 
    1863             :     fprintf(f, "  },\n");
    1864             :   }
    1865             : 
    1866             :   fprintf(f, "};\n");
    1867             :   fclose(f);
    1868             : }
    1869             : 
    1870             : /* MV ref count VP8_ENTROPY_STATS stats code */
    1871             : #ifdef VP8_ENTROPY_STATS
    1872             : void init_mv_ref_counts() {
    1873             :   memset(mv_ref_ct, 0, sizeof(mv_ref_ct));
    1874             :   memset(mv_mode_cts, 0, sizeof(mv_mode_cts));
    1875             : }
    1876             : 
    1877             : void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4]) {
    1878             :   if (m == ZEROMV) {
    1879             :     ++mv_ref_ct[ct[0]][0][0];
    1880             :     ++mv_mode_cts[0][0];
    1881             :   } else {
    1882             :     ++mv_ref_ct[ct[0]][0][1];
    1883             :     ++mv_mode_cts[0][1];
    1884             : 
    1885             :     if (m == NEARESTMV) {
    1886             :       ++mv_ref_ct[ct[1]][1][0];
    1887             :       ++mv_mode_cts[1][0];
    1888             :     } else {
    1889             :       ++mv_ref_ct[ct[1]][1][1];
    1890             :       ++mv_mode_cts[1][1];
    1891             : 
    1892             :       if (m == NEARMV) {
    1893             :         ++mv_ref_ct[ct[2]][2][0];
    1894             :         ++mv_mode_cts[2][0];
    1895             :       } else {
    1896             :         ++mv_ref_ct[ct[2]][2][1];
    1897             :         ++mv_mode_cts[2][1];
    1898             : 
    1899             :         if (m == NEWMV) {
    1900             :           ++mv_ref_ct[ct[3]][3][0];
    1901             :           ++mv_mode_cts[3][0];
    1902             :         } else {
    1903             :           ++mv_ref_ct[ct[3]][3][1];
    1904             :           ++mv_mode_cts[3][1];
    1905             :         }
    1906             :       }
    1907             :     }
    1908             :   }
    1909             : }
    1910             : 
    1911             : #endif /* END MV ref count VP8_ENTROPY_STATS stats code */
    1912             : 
    1913             : #endif

Generated by: LCOV version 1.13