LCOV - code coverage report
Current view: top level - third_party/aom/av1/encoder - rdopt.c (source / functions) Hit Total Coverage
Test: output.info Lines: 0 4336 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 96 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
       3             :  *
       4             :  * This source code is subject to the terms of the BSD 2 Clause License and
       5             :  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
       6             :  * was not distributed with this source code in the LICENSE file, you can
       7             :  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
       8             :  * Media Patent License 1.0 was not distributed with this source code in the
       9             :  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
      10             :  */
      11             : 
      12             : #include <assert.h>
      13             : #include <math.h>
      14             : 
      15             : #include "./aom_dsp_rtcd.h"
      16             : #include "./av1_rtcd.h"
      17             : 
      18             : #include "aom_dsp/aom_dsp_common.h"
      19             : #include "aom_dsp/blend.h"
      20             : #include "aom_mem/aom_mem.h"
      21             : #include "aom_ports/mem.h"
      22             : #include "aom_ports/system_state.h"
      23             : 
      24             : #include "av1/common/common.h"
      25             : #include "av1/common/common_data.h"
      26             : #include "av1/common/entropy.h"
      27             : #include "av1/common/entropymode.h"
      28             : #include "av1/common/idct.h"
      29             : #include "av1/common/mvref_common.h"
      30             : #include "av1/common/pred_common.h"
      31             : #include "av1/common/quant_common.h"
      32             : #include "av1/common/reconinter.h"
      33             : #include "av1/common/reconintra.h"
      34             : #include "av1/common/scan.h"
      35             : #include "av1/common/seg_common.h"
      36             : #if CONFIG_LV_MAP
      37             : #include "av1/common/txb_common.h"
      38             : #endif
      39             : #if CONFIG_WARPED_MOTION
      40             : #include "av1/common/warped_motion.h"
      41             : #endif  // CONFIG_WARPED_MOTION
      42             : 
      43             : #include "av1/encoder/aq_variance.h"
      44             : #include "av1/encoder/av1_quantize.h"
      45             : #include "av1/encoder/cost.h"
      46             : #include "av1/encoder/encodemb.h"
      47             : #include "av1/encoder/encodemv.h"
      48             : #include "av1/encoder/encoder.h"
      49             : #if CONFIG_LV_MAP
      50             : #include "av1/encoder/encodetxb.h"
      51             : #endif
      52             : #include "av1/encoder/hybrid_fwd_txfm.h"
      53             : #include "av1/encoder/mcomp.h"
      54             : #if CONFIG_PALETTE
      55             : #include "av1/encoder/palette.h"
      56             : #endif  // CONFIG_PALETTE
      57             : #include "av1/encoder/ratectrl.h"
      58             : #include "av1/encoder/rd.h"
      59             : #include "av1/encoder/rdopt.h"
      60             : #include "av1/encoder/tokenize.h"
      61             : #if CONFIG_PVQ
      62             : #include "av1/encoder/pvq_encoder.h"
      63             : #endif  // CONFIG_PVQ
      64             : #if CONFIG_PVQ || CONFIG_DAALA_DIST
      65             : #include "av1/common/pvq.h"
      66             : #endif  // CONFIG_PVQ || CONFIG_DAALA_DIST
      67             : #if CONFIG_DUAL_FILTER
      68             : #define DUAL_FILTER_SET_SIZE (SWITCHABLE_FILTERS * SWITCHABLE_FILTERS)
      69             : #if USE_EXTRA_FILTER
      70             : static const int filter_sets[DUAL_FILTER_SET_SIZE][2] = {
      71             :   { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 }, { 1, 0 }, { 1, 1 },
      72             :   { 1, 2 }, { 1, 3 }, { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 },
      73             :   { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 },
      74             : };
      75             : #else   // USE_EXTRA_FILTER
      76             : static const int filter_sets[DUAL_FILTER_SET_SIZE][2] = {
      77             :   { 0, 0 }, { 0, 1 }, { 0, 2 }, { 1, 0 }, { 1, 1 },
      78             :   { 1, 2 }, { 2, 0 }, { 2, 1 }, { 2, 2 },
      79             : };
      80             : #endif  // USE_EXTRA_FILTER
      81             : #endif  // CONFIG_DUAL_FILTER
      82             : 
      83             : #if CONFIG_EXT_REFS
      84             : 
      85             : #define LAST_FRAME_MODE_MASK                                      \
      86             :   ((1 << INTRA_FRAME) | (1 << LAST2_FRAME) | (1 << LAST3_FRAME) | \
      87             :    (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
      88             : #define LAST2_FRAME_MODE_MASK                                    \
      89             :   ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST3_FRAME) | \
      90             :    (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
      91             : #define LAST3_FRAME_MODE_MASK                                    \
      92             :   ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
      93             :    (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
      94             : #define GOLDEN_FRAME_MODE_MASK                                   \
      95             :   ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
      96             :    (1 << LAST3_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
      97             : #define BWDREF_FRAME_MODE_MASK                                   \
      98             :   ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
      99             :    (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME))
     100             : #define ALTREF_FRAME_MODE_MASK                                   \
     101             :   ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
     102             :    (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME))
     103             : 
     104             : #else
     105             : 
     106             : #define LAST_FRAME_MODE_MASK \
     107             :   ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
     108             : #define GOLDEN_FRAME_MODE_MASK \
     109             :   ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
     110             : #define ALTREF_FRAME_MODE_MASK \
     111             :   ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | (1 << INTRA_FRAME))
     112             : 
     113             : #endif  // CONFIG_EXT_REFS
     114             : 
     115             : #if CONFIG_EXT_REFS
     116             : #define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | (1 << BWDREF_FRAME) | 0x01)
     117             : #else
     118             : #define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | 0x01)
     119             : #endif  // CONFIG_EXT_REFS
     120             : 
     121             : #define MIN_EARLY_TERM_INDEX 3
     122             : #define NEW_MV_DISCOUNT_FACTOR 8
     123             : 
     124             : #if CONFIG_EXT_INTRA
     125             : #define ANGLE_SKIP_THRESH 10
     126             : #define FILTER_FAST_SEARCH 1
     127             : #endif  // CONFIG_EXT_INTRA
     128             : 
     129             : const double ADST_FLIP_SVM[8] = { -6.6623, -2.8062, -3.2531, 3.1671,    // vert
     130             :                                   -7.7051, -3.2234, -3.6193, 3.4533 };  // horz
     131             : 
     132             : typedef struct {
     133             :   PREDICTION_MODE mode;
     134             :   MV_REFERENCE_FRAME ref_frame[2];
     135             : } MODE_DEFINITION;
     136             : 
     137             : typedef struct { MV_REFERENCE_FRAME ref_frame[2]; } REF_DEFINITION;
     138             : 
     139             : struct rdcost_block_args {
     140             :   const AV1_COMP *cpi;
     141             :   MACROBLOCK *x;
     142             :   ENTROPY_CONTEXT t_above[2 * MAX_MIB_SIZE];
     143             :   ENTROPY_CONTEXT t_left[2 * MAX_MIB_SIZE];
     144             :   RD_STATS rd_stats;
     145             :   int64_t this_rd;
     146             :   int64_t best_rd;
     147             :   int exit_early;
     148             :   int use_fast_coef_costing;
     149             : };
     150             : 
     151             : #define LAST_NEW_MV_INDEX 6
     152             : static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
     153             :   { NEARESTMV, { LAST_FRAME, NONE_FRAME } },
     154             : #if CONFIG_EXT_REFS
     155             :   { NEARESTMV, { LAST2_FRAME, NONE_FRAME } },
     156             :   { NEARESTMV, { LAST3_FRAME, NONE_FRAME } },
     157             :   { NEARESTMV, { BWDREF_FRAME, NONE_FRAME } },
     158             : #endif  // CONFIG_EXT_REFS
     159             :   { NEARESTMV, { ALTREF_FRAME, NONE_FRAME } },
     160             :   { NEARESTMV, { GOLDEN_FRAME, NONE_FRAME } },
     161             : 
     162             :   { DC_PRED, { INTRA_FRAME, NONE_FRAME } },
     163             : 
     164             :   { NEWMV, { LAST_FRAME, NONE_FRAME } },
     165             : #if CONFIG_EXT_REFS
     166             :   { NEWMV, { LAST2_FRAME, NONE_FRAME } },
     167             :   { NEWMV, { LAST3_FRAME, NONE_FRAME } },
     168             :   { NEWMV, { BWDREF_FRAME, NONE_FRAME } },
     169             : #endif  // CONFIG_EXT_REFS
     170             :   { NEWMV, { ALTREF_FRAME, NONE_FRAME } },
     171             :   { NEWMV, { GOLDEN_FRAME, NONE_FRAME } },
     172             : 
     173             :   { NEARMV, { LAST_FRAME, NONE_FRAME } },
     174             : #if CONFIG_EXT_REFS
     175             :   { NEARMV, { LAST2_FRAME, NONE_FRAME } },
     176             :   { NEARMV, { LAST3_FRAME, NONE_FRAME } },
     177             :   { NEARMV, { BWDREF_FRAME, NONE_FRAME } },
     178             : #endif  // CONFIG_EXT_REFS
     179             :   { NEARMV, { ALTREF_FRAME, NONE_FRAME } },
     180             :   { NEARMV, { GOLDEN_FRAME, NONE_FRAME } },
     181             : 
     182             :   { ZEROMV, { LAST_FRAME, NONE_FRAME } },
     183             : #if CONFIG_EXT_REFS
     184             :   { ZEROMV, { LAST2_FRAME, NONE_FRAME } },
     185             :   { ZEROMV, { LAST3_FRAME, NONE_FRAME } },
     186             :   { ZEROMV, { BWDREF_FRAME, NONE_FRAME } },
     187             : #endif  // CONFIG_EXT_REFS
     188             :   { ZEROMV, { GOLDEN_FRAME, NONE_FRAME } },
     189             :   { ZEROMV, { ALTREF_FRAME, NONE_FRAME } },
     190             : 
     191             : // TODO(zoeliu): May need to reconsider the order on the modes to check
     192             : 
     193             : #if CONFIG_EXT_INTER
     194             :   { NEAREST_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
     195             : #if CONFIG_EXT_REFS
     196             :   { NEAREST_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
     197             :   { NEAREST_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
     198             : #endif  // CONFIG_EXT_REFS
     199             :   { NEAREST_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
     200             : #if CONFIG_EXT_REFS
     201             :   { NEAREST_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
     202             :   { NEAREST_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
     203             :   { NEAREST_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
     204             :   { NEAREST_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
     205             : #endif  // CONFIG_EXT_REFS
     206             : 
     207             : #else  // CONFIG_EXT_INTER
     208             : 
     209             :   { NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
     210             : #if CONFIG_EXT_REFS
     211             :   { NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
     212             :   { NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
     213             : #endif  // CONFIG_EXT_REFS
     214             :   { NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
     215             : #if CONFIG_EXT_REFS
     216             :   { NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
     217             :   { NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
     218             :   { NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
     219             :   { NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
     220             : #endif  // CONFIG_EXT_REFS
     221             : #endif  // CONFIG_EXT_INTER
     222             : 
     223             :   { TM_PRED, { INTRA_FRAME, NONE_FRAME } },
     224             : 
     225             : #if CONFIG_ALT_INTRA
     226             :   { SMOOTH_PRED, { INTRA_FRAME, NONE_FRAME } },
     227             : #if CONFIG_SMOOTH_HV
     228             :   { SMOOTH_V_PRED, { INTRA_FRAME, NONE_FRAME } },
     229             :   { SMOOTH_H_PRED, { INTRA_FRAME, NONE_FRAME } },
     230             : #endif  // CONFIG_SMOOTH_HV
     231             : #endif  // CONFIG_ALT_INTRA
     232             : 
     233             : #if CONFIG_EXT_INTER
     234             :   { NEAR_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
     235             :   { NEW_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
     236             :   { NEAREST_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
     237             :   { NEW_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
     238             :   { NEAR_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
     239             :   { NEW_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
     240             :   { ZERO_ZEROMV, { LAST_FRAME, ALTREF_FRAME } },
     241             : 
     242             : #if CONFIG_EXT_REFS
     243             :   { NEAR_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
     244             :   { NEW_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
     245             :   { NEAREST_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
     246             :   { NEW_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
     247             :   { NEAR_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
     248             :   { NEW_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
     249             :   { ZERO_ZEROMV, { LAST2_FRAME, ALTREF_FRAME } },
     250             : 
     251             :   { NEAR_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
     252             :   { NEW_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
     253             :   { NEAREST_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
     254             :   { NEW_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
     255             :   { NEAR_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
     256             :   { NEW_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
     257             :   { ZERO_ZEROMV, { LAST3_FRAME, ALTREF_FRAME } },
     258             : #endif  // CONFIG_EXT_REFS
     259             : 
     260             :   { NEAR_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
     261             :   { NEW_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
     262             :   { NEAREST_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
     263             :   { NEW_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
     264             :   { NEAR_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
     265             :   { NEW_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
     266             :   { ZERO_ZEROMV, { GOLDEN_FRAME, ALTREF_FRAME } },
     267             : 
     268             : #if CONFIG_EXT_REFS
     269             :   { NEAR_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
     270             :   { NEW_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
     271             :   { NEAREST_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
     272             :   { NEW_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
     273             :   { NEAR_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
     274             :   { NEW_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
     275             :   { ZERO_ZEROMV, { LAST_FRAME, BWDREF_FRAME } },
     276             : 
     277             :   { NEAR_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
     278             :   { NEW_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
     279             :   { NEAREST_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
     280             :   { NEW_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
     281             :   { NEAR_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
     282             :   { NEW_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
     283             :   { ZERO_ZEROMV, { LAST2_FRAME, BWDREF_FRAME } },
     284             : 
     285             :   { NEAR_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
     286             :   { NEW_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
     287             :   { NEAREST_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
     288             :   { NEW_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
     289             :   { NEAR_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
     290             :   { NEW_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
     291             :   { ZERO_ZEROMV, { LAST3_FRAME, BWDREF_FRAME } },
     292             : 
     293             :   { NEAR_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
     294             :   { NEW_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
     295             :   { NEAREST_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
     296             :   { NEW_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
     297             :   { NEAR_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
     298             :   { NEW_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
     299             :   { ZERO_ZEROMV, { GOLDEN_FRAME, BWDREF_FRAME } },
     300             : #endif  // CONFIG_EXT_REFS
     301             : 
     302             : #else  // CONFIG_EXT_INTER
     303             : 
     304             :   { NEARMV, { LAST_FRAME, ALTREF_FRAME } },
     305             :   { NEWMV, { LAST_FRAME, ALTREF_FRAME } },
     306             : #if CONFIG_EXT_REFS
     307             :   { NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
     308             :   { NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
     309             :   { NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
     310             :   { NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
     311             : #endif  // CONFIG_EXT_REFS
     312             :   { NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
     313             :   { NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
     314             : 
     315             : #if CONFIG_EXT_REFS
     316             :   { NEARMV, { LAST_FRAME, BWDREF_FRAME } },
     317             :   { NEWMV, { LAST_FRAME, BWDREF_FRAME } },
     318             :   { NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
     319             :   { NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
     320             :   { NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
     321             :   { NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
     322             :   { NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
     323             :   { NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
     324             : #endif  // CONFIG_EXT_REFS
     325             : 
     326             :   { ZEROMV, { LAST_FRAME, ALTREF_FRAME } },
     327             : #if CONFIG_EXT_REFS
     328             :   { ZEROMV, { LAST2_FRAME, ALTREF_FRAME } },
     329             :   { ZEROMV, { LAST3_FRAME, ALTREF_FRAME } },
     330             : #endif  // CONFIG_EXT_REFS
     331             :   { ZEROMV, { GOLDEN_FRAME, ALTREF_FRAME } },
     332             : 
     333             : #if CONFIG_EXT_REFS
     334             :   { ZEROMV, { LAST_FRAME, BWDREF_FRAME } },
     335             :   { ZEROMV, { LAST2_FRAME, BWDREF_FRAME } },
     336             :   { ZEROMV, { LAST3_FRAME, BWDREF_FRAME } },
     337             :   { ZEROMV, { GOLDEN_FRAME, BWDREF_FRAME } },
     338             : #endif  // CONFIG_EXT_REFS
     339             : 
     340             : #endif  // CONFIG_EXT_INTER
     341             : 
     342             :   { H_PRED, { INTRA_FRAME, NONE_FRAME } },
     343             :   { V_PRED, { INTRA_FRAME, NONE_FRAME } },
     344             :   { D135_PRED, { INTRA_FRAME, NONE_FRAME } },
     345             :   { D207_PRED, { INTRA_FRAME, NONE_FRAME } },
     346             :   { D153_PRED, { INTRA_FRAME, NONE_FRAME } },
     347             :   { D63_PRED, { INTRA_FRAME, NONE_FRAME } },
     348             :   { D117_PRED, { INTRA_FRAME, NONE_FRAME } },
     349             :   { D45_PRED, { INTRA_FRAME, NONE_FRAME } },
     350             : 
     351             : #if CONFIG_EXT_INTER
     352             :   { ZEROMV, { LAST_FRAME, INTRA_FRAME } },
     353             :   { NEARESTMV, { LAST_FRAME, INTRA_FRAME } },
     354             :   { NEARMV, { LAST_FRAME, INTRA_FRAME } },
     355             :   { NEWMV, { LAST_FRAME, INTRA_FRAME } },
     356             : 
     357             : #if CONFIG_EXT_REFS
     358             :   { ZEROMV, { LAST2_FRAME, INTRA_FRAME } },
     359             :   { NEARESTMV, { LAST2_FRAME, INTRA_FRAME } },
     360             :   { NEARMV, { LAST2_FRAME, INTRA_FRAME } },
     361             :   { NEWMV, { LAST2_FRAME, INTRA_FRAME } },
     362             : 
     363             :   { ZEROMV, { LAST3_FRAME, INTRA_FRAME } },
     364             :   { NEARESTMV, { LAST3_FRAME, INTRA_FRAME } },
     365             :   { NEARMV, { LAST3_FRAME, INTRA_FRAME } },
     366             :   { NEWMV, { LAST3_FRAME, INTRA_FRAME } },
     367             : #endif  // CONFIG_EXT_REFS
     368             : 
     369             :   { ZEROMV, { GOLDEN_FRAME, INTRA_FRAME } },
     370             :   { NEARESTMV, { GOLDEN_FRAME, INTRA_FRAME } },
     371             :   { NEARMV, { GOLDEN_FRAME, INTRA_FRAME } },
     372             :   { NEWMV, { GOLDEN_FRAME, INTRA_FRAME } },
     373             : 
     374             : #if CONFIG_EXT_REFS
     375             :   { ZEROMV, { BWDREF_FRAME, INTRA_FRAME } },
     376             :   { NEARESTMV, { BWDREF_FRAME, INTRA_FRAME } },
     377             :   { NEARMV, { BWDREF_FRAME, INTRA_FRAME } },
     378             :   { NEWMV, { BWDREF_FRAME, INTRA_FRAME } },
     379             : #endif  // CONFIG_EXT_REFS
     380             : 
     381             :   { ZEROMV, { ALTREF_FRAME, INTRA_FRAME } },
     382             :   { NEARESTMV, { ALTREF_FRAME, INTRA_FRAME } },
     383             :   { NEARMV, { ALTREF_FRAME, INTRA_FRAME } },
     384             :   { NEWMV, { ALTREF_FRAME, INTRA_FRAME } },
     385             : #endif  // CONFIG_EXT_INTER
     386             : };
     387             : 
     388             : #if CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE
     389           0 : static INLINE int write_uniform_cost(int n, int v) {
     390           0 :   const int l = get_unsigned_bits(n);
     391           0 :   const int m = (1 << l) - n;
     392           0 :   if (l == 0) return 0;
     393           0 :   if (v < m)
     394           0 :     return (l - 1) * av1_cost_bit(128, 0);
     395             :   else
     396           0 :     return l * av1_cost_bit(128, 0);
     397             : }
     398             : #endif  // CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE
     399             : 
     400             : // constants for prune 1 and prune 2 decision boundaries
     401             : #define FAST_EXT_TX_CORR_MID 0.0
     402             : #define FAST_EXT_TX_EDST_MID 0.1
     403             : #define FAST_EXT_TX_CORR_MARGIN 0.5
     404             : #define FAST_EXT_TX_EDST_MARGIN 0.3
     405             : 
     406             : #if CONFIG_DAALA_DIST
     407             : static int od_compute_var_4x4(od_coeff *x, int stride) {
     408             :   int sum;
     409             :   int s2;
     410             :   int i;
     411             :   sum = 0;
     412             :   s2 = 0;
     413             :   for (i = 0; i < 4; i++) {
     414             :     int j;
     415             :     for (j = 0; j < 4; j++) {
     416             :       int t;
     417             : 
     418             :       t = x[i * stride + j];
     419             :       sum += t;
     420             :       s2 += t * t;
     421             :     }
     422             :   }
     423             :   // TODO(yushin) : Check wheter any changes are required for high bit depth.
     424             :   return (s2 - (sum * sum >> 4)) >> 4;
     425             : }
     426             : 
     427             : /* OD_DIST_LP_MID controls the frequency weighting filter used for computing
     428             :    the distortion. For a value X, the filter is [1 X 1]/(X + 2) and
     429             :    is applied both horizontally and vertically. For X=5, the filter is
     430             :    a good approximation for the OD_QM8_Q4_HVS quantization matrix. */
     431             : #define OD_DIST_LP_MID (5)
     432             : #define OD_DIST_LP_NORM (OD_DIST_LP_MID + 2)
     433             : 
     434             : static double od_compute_dist_8x8(int qm, int use_activity_masking, od_coeff *x,
     435             :                                   od_coeff *y, od_coeff *e_lp, int stride) {
     436             :   double sum;
     437             :   int min_var;
     438             :   double mean_var;
     439             :   double var_stat;
     440             :   double activity;
     441             :   double calibration;
     442             :   int i;
     443             :   int j;
     444             :   double vardist;
     445             : 
     446             :   vardist = 0;
     447             :   OD_ASSERT(qm != OD_FLAT_QM);
     448             :   (void)qm;
     449             : #if 1
     450             :   min_var = INT_MAX;
     451             :   mean_var = 0;
     452             :   for (i = 0; i < 3; i++) {
     453             :     for (j = 0; j < 3; j++) {
     454             :       int varx;
     455             :       int vary;
     456             :       varx = od_compute_var_4x4(x + 2 * i * stride + 2 * j, stride);
     457             :       vary = od_compute_var_4x4(y + 2 * i * stride + 2 * j, stride);
     458             :       min_var = OD_MINI(min_var, varx);
     459             :       mean_var += 1. / (1 + varx);
     460             :       /* The cast to (double) is to avoid an overflow before the sqrt.*/
     461             :       vardist += varx - 2 * sqrt(varx * (double)vary) + vary;
     462             :     }
     463             :   }
     464             :   /* We use a different variance statistic depending on whether activity
     465             :      masking is used, since the harmonic mean appeared slghtly worse with
     466             :      masking off. The calibration constant just ensures that we preserve the
     467             :      rate compared to activity=1. */
     468             :   if (use_activity_masking) {
     469             :     calibration = 1.95;
     470             :     var_stat = 9. / mean_var;
     471             :   } else {
     472             :     calibration = 1.62;
     473             :     var_stat = min_var;
     474             :   }
     475             :   /* 1.62 is a calibration constant, 0.25 is a noise floor and 1/6 is the
     476             :      activity masking constant. */
     477             :   activity = calibration * pow(.25 + var_stat, -1. / 6);
     478             : #else
     479             :   activity = 1;
     480             : #endif  // 1
     481             :   sum = 0;
     482             :   for (i = 0; i < 8; i++) {
     483             :     for (j = 0; j < 8; j++)
     484             :       sum += e_lp[i * stride + j] * (double)e_lp[i * stride + j];
     485             :   }
     486             :   /* Normalize the filter to unit DC response. */
     487             :   sum *= 1. / (OD_DIST_LP_NORM * OD_DIST_LP_NORM * OD_DIST_LP_NORM *
     488             :                OD_DIST_LP_NORM);
     489             :   return activity * activity * (sum + vardist);
     490             : }
     491             : 
     492             : // Note : Inputs x and y are in a pixel domain
     493             : static double od_compute_dist(int qm, int activity_masking, od_coeff *x,
     494             :                               od_coeff *y, int bsize_w, int bsize_h,
     495             :                               int qindex) {
     496             :   int i;
     497             :   double sum;
     498             :   sum = 0;
     499             : 
     500             :   assert(bsize_w >= 8 && bsize_h >= 8);
     501             : 
     502             :   if (qm == OD_FLAT_QM) {
     503             :     for (i = 0; i < bsize_w * bsize_h; i++) {
     504             :       double tmp;
     505             :       tmp = x[i] - y[i];
     506             :       sum += tmp * tmp;
     507             :     }
     508             :   } else {
     509             :     int j;
     510             :     DECLARE_ALIGNED(16, od_coeff, e[MAX_TX_SQUARE]);
     511             :     DECLARE_ALIGNED(16, od_coeff, tmp[MAX_TX_SQUARE]);
     512             :     DECLARE_ALIGNED(16, od_coeff, e_lp[MAX_TX_SQUARE]);
     513             :     int mid = OD_DIST_LP_MID;
     514             :     for (i = 0; i < bsize_h; i++) {
     515             :       for (j = 0; j < bsize_w; j++) {
     516             :         e[i * bsize_w + j] = x[i * bsize_w + j] - y[i * bsize_w + j];
     517             :       }
     518             :     }
     519             :     for (i = 0; i < bsize_h; i++) {
     520             :       tmp[i * bsize_w] = mid * e[i * bsize_w] + 2 * e[i * bsize_w + 1];
     521             :       tmp[i * bsize_w + bsize_w - 1] =
     522             :           mid * e[i * bsize_w + bsize_w - 1] + 2 * e[i * bsize_w + bsize_w - 2];
     523             :       for (j = 1; j < bsize_w - 1; j++) {
     524             :         tmp[i * bsize_w + j] = mid * e[i * bsize_w + j] +
     525             :                                e[i * bsize_w + j - 1] + e[i * bsize_w + j + 1];
     526             :       }
     527             :     }
     528             :     for (j = 0; j < bsize_w; j++) {
     529             :       e_lp[j] = mid * tmp[j] + 2 * tmp[bsize_w + j];
     530             :       e_lp[(bsize_h - 1) * bsize_w + j] =
     531             :           mid * tmp[(bsize_h - 1) * bsize_w + j] +
     532             :           2 * tmp[(bsize_h - 2) * bsize_w + j];
     533             :     }
     534             :     for (i = 1; i < bsize_h - 1; i++) {
     535             :       for (j = 0; j < bsize_w; j++) {
     536             :         e_lp[i * bsize_w + j] = mid * tmp[i * bsize_w + j] +
     537             :                                 tmp[(i - 1) * bsize_w + j] +
     538             :                                 tmp[(i + 1) * bsize_w + j];
     539             :       }
     540             :     }
     541             :     for (i = 0; i < bsize_h; i += 8) {
     542             :       for (j = 0; j < bsize_w; j += 8) {
     543             :         sum += od_compute_dist_8x8(qm, activity_masking, &x[i * bsize_w + j],
     544             :                                    &y[i * bsize_w + j], &e_lp[i * bsize_w + j],
     545             :                                    bsize_w);
     546             :       }
     547             :     }
     548             :     /* Scale according to linear regression against SSE, for 8x8 blocks. */
     549             :     if (activity_masking) {
     550             :       sum *= 2.2 + (1.7 - 2.2) * (qindex - 99) / (210 - 99) +
     551             :              (qindex < 99 ? 2.5 * (qindex - 99) / 99 * (qindex - 99) / 99 : 0);
     552             :     } else {
     553             :       sum *= qindex >= 128
     554             :                  ? 1.4 + (0.9 - 1.4) * (qindex - 128) / (209 - 128)
     555             :                  : qindex <= 43
     556             :                        ? 1.5 + (2.0 - 1.5) * (qindex - 43) / (16 - 43)
     557             :                        : 1.5 + (1.4 - 1.5) * (qindex - 43) / (128 - 43);
     558             :     }
     559             :   }
     560             :   return sum;
     561             : }
     562             : 
     563             : int64_t av1_daala_dist(const uint8_t *src, int src_stride, const uint8_t *dst,
     564             :                        int dst_stride, int bsw, int bsh, int qm,
     565             :                        int use_activity_masking, int qindex) {
     566             :   int i, j;
     567             :   int64_t d;
     568             :   DECLARE_ALIGNED(16, od_coeff, orig[MAX_TX_SQUARE]);
     569             :   DECLARE_ALIGNED(16, od_coeff, rec[MAX_TX_SQUARE]);
     570             : 
     571             :   assert(qm == OD_HVS_QM);
     572             : 
     573             :   for (j = 0; j < bsh; j++)
     574             :     for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i];
     575             : 
     576             :   for (j = 0; j < bsh; j++)
     577             :     for (i = 0; i < bsw; i++) rec[j * bsw + i] = dst[j * dst_stride + i];
     578             : 
     579             :   d = (int64_t)od_compute_dist(qm, use_activity_masking, orig, rec, bsw, bsh,
     580             :                                qindex);
     581             :   return d;
     582             : }
     583             : #endif  // CONFIG_DAALA_DIST
     584             : 
     585           0 : static void get_energy_distribution_fine(const AV1_COMP *cpi, BLOCK_SIZE bsize,
     586             :                                          const uint8_t *src, int src_stride,
     587             :                                          const uint8_t *dst, int dst_stride,
     588             :                                          double *hordist, double *verdist) {
     589           0 :   const int bw = block_size_wide[bsize];
     590           0 :   const int bh = block_size_high[bsize];
     591           0 :   unsigned int esq[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
     592             : 
     593           0 :   const int f_index = bsize - BLOCK_16X16;
     594           0 :   if (f_index < 0) {
     595           0 :     const int w_shift = bw == 8 ? 1 : 2;
     596           0 :     const int h_shift = bh == 8 ? 1 : 2;
     597             : #if CONFIG_HIGHBITDEPTH
     598           0 :     if (cpi->common.use_highbitdepth) {
     599           0 :       const uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
     600           0 :       const uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
     601           0 :       for (int i = 0; i < bh; ++i)
     602           0 :         for (int j = 0; j < bw; ++j) {
     603           0 :           const int index = (j >> w_shift) + ((i >> h_shift) << 2);
     604           0 :           esq[index] +=
     605           0 :               (src16[j + i * src_stride] - dst16[j + i * dst_stride]) *
     606           0 :               (src16[j + i * src_stride] - dst16[j + i * dst_stride]);
     607             :         }
     608             :     } else {
     609             : #endif  // CONFIG_HIGHBITDEPTH
     610             : 
     611           0 :       for (int i = 0; i < bh; ++i)
     612           0 :         for (int j = 0; j < bw; ++j) {
     613           0 :           const int index = (j >> w_shift) + ((i >> h_shift) << 2);
     614           0 :           esq[index] += (src[j + i * src_stride] - dst[j + i * dst_stride]) *
     615           0 :                         (src[j + i * src_stride] - dst[j + i * dst_stride]);
     616             :         }
     617             : #if CONFIG_HIGHBITDEPTH
     618             :     }
     619             : #endif  // CONFIG_HIGHBITDEPTH
     620             :   } else {
     621           0 :     cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[0]);
     622           0 :     cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
     623             :                             &esq[1]);
     624           0 :     cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
     625             :                             &esq[2]);
     626           0 :     cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
     627             :                             dst_stride, &esq[3]);
     628           0 :     src += bh / 4 * src_stride;
     629           0 :     dst += bh / 4 * dst_stride;
     630             : 
     631           0 :     cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[4]);
     632           0 :     cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
     633             :                             &esq[5]);
     634           0 :     cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
     635             :                             &esq[6]);
     636           0 :     cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
     637             :                             dst_stride, &esq[7]);
     638           0 :     src += bh / 4 * src_stride;
     639           0 :     dst += bh / 4 * dst_stride;
     640             : 
     641           0 :     cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[8]);
     642           0 :     cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
     643             :                             &esq[9]);
     644           0 :     cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
     645             :                             &esq[10]);
     646           0 :     cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
     647             :                             dst_stride, &esq[11]);
     648           0 :     src += bh / 4 * src_stride;
     649           0 :     dst += bh / 4 * dst_stride;
     650             : 
     651           0 :     cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[12]);
     652           0 :     cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
     653             :                             &esq[13]);
     654           0 :     cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
     655             :                             &esq[14]);
     656           0 :     cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
     657             :                             dst_stride, &esq[15]);
     658             :   }
     659             : 
     660           0 :   double total = (double)esq[0] + esq[1] + esq[2] + esq[3] + esq[4] + esq[5] +
     661           0 :                  esq[6] + esq[7] + esq[8] + esq[9] + esq[10] + esq[11] +
     662           0 :                  esq[12] + esq[13] + esq[14] + esq[15];
     663           0 :   if (total > 0) {
     664           0 :     const double e_recip = 1.0 / total;
     665           0 :     hordist[0] = ((double)esq[0] + esq[4] + esq[8] + esq[12]) * e_recip;
     666           0 :     hordist[1] = ((double)esq[1] + esq[5] + esq[9] + esq[13]) * e_recip;
     667           0 :     hordist[2] = ((double)esq[2] + esq[6] + esq[10] + esq[14]) * e_recip;
     668           0 :     verdist[0] = ((double)esq[0] + esq[1] + esq[2] + esq[3]) * e_recip;
     669           0 :     verdist[1] = ((double)esq[4] + esq[5] + esq[6] + esq[7]) * e_recip;
     670           0 :     verdist[2] = ((double)esq[8] + esq[9] + esq[10] + esq[11]) * e_recip;
     671             :   } else {
     672           0 :     hordist[0] = verdist[0] = 0.25;
     673           0 :     hordist[1] = verdist[1] = 0.25;
     674           0 :     hordist[2] = verdist[2] = 0.25;
     675             :   }
     676           0 : }
     677             : 
     678           0 : static int adst_vs_flipadst(const AV1_COMP *cpi, BLOCK_SIZE bsize,
     679             :                             const uint8_t *src, int src_stride,
     680             :                             const uint8_t *dst, int dst_stride) {
     681           0 :   int prune_bitmask = 0;
     682           0 :   double svm_proj_h = 0, svm_proj_v = 0;
     683           0 :   double hdist[3] = { 0, 0, 0 }, vdist[3] = { 0, 0, 0 };
     684           0 :   get_energy_distribution_fine(cpi, bsize, src, src_stride, dst, dst_stride,
     685             :                                hdist, vdist);
     686             : 
     687           0 :   svm_proj_v = vdist[0] * ADST_FLIP_SVM[0] + vdist[1] * ADST_FLIP_SVM[1] +
     688           0 :                vdist[2] * ADST_FLIP_SVM[2] + ADST_FLIP_SVM[3];
     689           0 :   svm_proj_h = hdist[0] * ADST_FLIP_SVM[4] + hdist[1] * ADST_FLIP_SVM[5] +
     690           0 :                hdist[2] * ADST_FLIP_SVM[6] + ADST_FLIP_SVM[7];
     691           0 :   if (svm_proj_v > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN)
     692           0 :     prune_bitmask |= 1 << FLIPADST_1D;
     693           0 :   else if (svm_proj_v < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN)
     694           0 :     prune_bitmask |= 1 << ADST_1D;
     695             : 
     696           0 :   if (svm_proj_h > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN)
     697           0 :     prune_bitmask |= 1 << (FLIPADST_1D + 8);
     698           0 :   else if (svm_proj_h < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN)
     699           0 :     prune_bitmask |= 1 << (ADST_1D + 8);
     700             : 
     701           0 :   return prune_bitmask;
     702             : }
     703             : 
     704             : #if CONFIG_EXT_TX
     705           0 : static void get_horver_correlation(const int16_t *diff, int stride, int w,
     706             :                                    int h, double *hcorr, double *vcorr) {
     707             :   // Returns hor/ver correlation coefficient
     708           0 :   const int num = (h - 1) * (w - 1);
     709             :   double num_r;
     710             :   int i, j;
     711           0 :   int64_t xy_sum = 0, xz_sum = 0;
     712           0 :   int64_t x_sum = 0, y_sum = 0, z_sum = 0;
     713           0 :   int64_t x2_sum = 0, y2_sum = 0, z2_sum = 0;
     714             :   double x_var_n, y_var_n, z_var_n, xy_var_n, xz_var_n;
     715           0 :   *hcorr = *vcorr = 1;
     716             : 
     717           0 :   assert(num > 0);
     718           0 :   num_r = 1.0 / num;
     719           0 :   for (i = 1; i < h; ++i) {
     720           0 :     for (j = 1; j < w; ++j) {
     721           0 :       const int16_t x = diff[i * stride + j];
     722           0 :       const int16_t y = diff[i * stride + j - 1];
     723           0 :       const int16_t z = diff[(i - 1) * stride + j];
     724           0 :       xy_sum += x * y;
     725           0 :       xz_sum += x * z;
     726           0 :       x_sum += x;
     727           0 :       y_sum += y;
     728           0 :       z_sum += z;
     729           0 :       x2_sum += x * x;
     730           0 :       y2_sum += y * y;
     731           0 :       z2_sum += z * z;
     732             :     }
     733             :   }
     734           0 :   x_var_n = x2_sum - (x_sum * x_sum) * num_r;
     735           0 :   y_var_n = y2_sum - (y_sum * y_sum) * num_r;
     736           0 :   z_var_n = z2_sum - (z_sum * z_sum) * num_r;
     737           0 :   xy_var_n = xy_sum - (x_sum * y_sum) * num_r;
     738           0 :   xz_var_n = xz_sum - (x_sum * z_sum) * num_r;
     739           0 :   if (x_var_n > 0 && y_var_n > 0) {
     740           0 :     *hcorr = xy_var_n / sqrt(x_var_n * y_var_n);
     741           0 :     *hcorr = *hcorr < 0 ? 0 : *hcorr;
     742             :   }
     743           0 :   if (x_var_n > 0 && z_var_n > 0) {
     744           0 :     *vcorr = xz_var_n / sqrt(x_var_n * z_var_n);
     745           0 :     *vcorr = *vcorr < 0 ? 0 : *vcorr;
     746             :   }
     747           0 : }
     748             : 
     749           0 : int dct_vs_idtx(const int16_t *diff, int stride, int w, int h) {
     750             :   double hcorr, vcorr;
     751           0 :   int prune_bitmask = 0;
     752           0 :   get_horver_correlation(diff, stride, w, h, &hcorr, &vcorr);
     753             : 
     754           0 :   if (vcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN)
     755           0 :     prune_bitmask |= 1 << IDTX_1D;
     756           0 :   else if (vcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN)
     757           0 :     prune_bitmask |= 1 << DCT_1D;
     758             : 
     759           0 :   if (hcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN)
     760           0 :     prune_bitmask |= 1 << (IDTX_1D + 8);
     761           0 :   else if (hcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN)
     762           0 :     prune_bitmask |= 1 << (DCT_1D + 8);
     763           0 :   return prune_bitmask;
     764             : }
     765             : 
     766             : // Performance drop: 0.5%, Speed improvement: 24%
     767           0 : static int prune_two_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize,
     768             :                              MACROBLOCK *x, const MACROBLOCKD *xd,
     769             :                              int adst_flipadst, int dct_idtx) {
     770           0 :   int prune = 0;
     771             : 
     772           0 :   if (adst_flipadst) {
     773           0 :     const struct macroblock_plane *const p = &x->plane[0];
     774           0 :     const struct macroblockd_plane *const pd = &xd->plane[0];
     775           0 :     prune |= adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride,
     776           0 :                               pd->dst.buf, pd->dst.stride);
     777             :   }
     778           0 :   if (dct_idtx) {
     779           0 :     av1_subtract_plane(x, bsize, 0);
     780           0 :     const struct macroblock_plane *const p = &x->plane[0];
     781           0 :     const int bw = 4 << (b_width_log2_lookup[bsize]);
     782           0 :     const int bh = 4 << (b_height_log2_lookup[bsize]);
     783           0 :     prune |= dct_vs_idtx(p->src_diff, bw, bw, bh);
     784             :   }
     785             : 
     786           0 :   return prune;
     787             : }
     788             : #endif  // CONFIG_EXT_TX
     789             : 
     790             : // Performance drop: 0.3%, Speed improvement: 5%
     791           0 : static int prune_one_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize,
     792             :                              const MACROBLOCK *x, const MACROBLOCKD *xd) {
     793           0 :   const struct macroblock_plane *const p = &x->plane[0];
     794           0 :   const struct macroblockd_plane *const pd = &xd->plane[0];
     795           0 :   return adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride, pd->dst.buf,
     796             :                           pd->dst.stride);
     797             : }
     798             : 
     799           0 : static int prune_tx_types(const AV1_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x,
     800             :                           const MACROBLOCKD *const xd, int tx_set) {
     801             : #if CONFIG_EXT_TX
     802           0 :   const int *tx_set_1D = tx_set >= 0 ? ext_tx_used_inter_1D[tx_set] : NULL;
     803             : #else
     804             :   const int tx_set_1D[TX_TYPES_1D] = { 0 };
     805             : #endif  // CONFIG_EXT_TX
     806             : 
     807           0 :   switch (cpi->sf.tx_type_search.prune_mode) {
     808           0 :     case NO_PRUNE: return 0; break;
     809             :     case PRUNE_ONE:
     810           0 :       if ((tx_set >= 0) && !(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D]))
     811           0 :         return 0;
     812           0 :       return prune_one_for_sby(cpi, bsize, x, xd);
     813             :       break;
     814             : #if CONFIG_EXT_TX
     815             :     case PRUNE_TWO:
     816           0 :       if ((tx_set >= 0) && !(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D])) {
     817           0 :         if (!(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D])) return 0;
     818           0 :         return prune_two_for_sby(cpi, bsize, x, xd, 0, 1);
     819             :       }
     820           0 :       if ((tx_set >= 0) && !(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D]))
     821           0 :         return prune_two_for_sby(cpi, bsize, x, xd, 1, 0);
     822           0 :       return prune_two_for_sby(cpi, bsize, x, xd, 1, 1);
     823             :       break;
     824             : #endif  // CONFIG_EXT_TX
     825             :   }
     826           0 :   assert(0);
     827             :   return 0;
     828             : }
     829             : 
     830           0 : static int do_tx_type_search(TX_TYPE tx_type, int prune) {
     831             : // TODO(sarahparker) implement for non ext tx
     832             : #if CONFIG_EXT_TX
     833           0 :   return !(((prune >> vtx_tab[tx_type]) & 1) |
     834           0 :            ((prune >> (htx_tab[tx_type] + 8)) & 1));
     835             : #else
     836             :   // temporary to avoid compiler warnings
     837             :   (void)vtx_tab;
     838             :   (void)htx_tab;
     839             :   (void)tx_type;
     840             :   (void)prune;
     841             :   return 1;
     842             : #endif  // CONFIG_EXT_TX
     843             : }
     844             : 
     845           0 : static void model_rd_from_sse(const AV1_COMP *const cpi,
     846             :                               const MACROBLOCKD *const xd, BLOCK_SIZE bsize,
     847             :                               int plane, int64_t sse, int *rate,
     848             :                               int64_t *dist) {
     849           0 :   const struct macroblockd_plane *const pd = &xd->plane[plane];
     850           0 :   const int dequant_shift =
     851             : #if CONFIG_HIGHBITDEPTH
     852           0 :       (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 :
     853             : #endif  // CONFIG_HIGHBITDEPTH
     854             :                                                     3;
     855             : 
     856             :   // Fast approximate the modelling function.
     857           0 :   if (cpi->sf.simple_model_rd_from_var) {
     858           0 :     const int64_t square_error = sse;
     859           0 :     int quantizer = (pd->dequant[1] >> dequant_shift);
     860             : 
     861           0 :     if (quantizer < 120)
     862           0 :       *rate = (int)((square_error * (280 - quantizer)) >>
     863             :                     (16 - AV1_PROB_COST_SHIFT));
     864             :     else
     865           0 :       *rate = 0;
     866           0 :     *dist = (square_error * quantizer) >> 8;
     867             :   } else {
     868           0 :     av1_model_rd_from_var_lapndz(sse, num_pels_log2_lookup[bsize],
     869           0 :                                  pd->dequant[1] >> dequant_shift, rate, dist);
     870             :   }
     871             : 
     872           0 :   *dist <<= 4;
     873           0 : }
     874             : 
     875           0 : static void model_rd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
     876             :                             MACROBLOCK *x, MACROBLOCKD *xd, int plane_from,
     877             :                             int plane_to, int *out_rate_sum,
     878             :                             int64_t *out_dist_sum, int *skip_txfm_sb,
     879             :                             int64_t *skip_sse_sb) {
     880             :   // Note our transform coeffs are 8 times an orthogonal transform.
     881             :   // Hence quantizer step is also 8 times. To get effective quantizer
     882             :   // we need to divide by 8 before sending to modeling function.
     883             :   int plane;
     884           0 :   const int ref = xd->mi[0]->mbmi.ref_frame[0];
     885             : 
     886           0 :   int64_t rate_sum = 0;
     887           0 :   int64_t dist_sum = 0;
     888           0 :   int64_t total_sse = 0;
     889             : 
     890           0 :   x->pred_sse[ref] = 0;
     891             : 
     892           0 :   for (plane = plane_from; plane <= plane_to; ++plane) {
     893           0 :     struct macroblock_plane *const p = &x->plane[plane];
     894           0 :     struct macroblockd_plane *const pd = &xd->plane[plane];
     895             : #if CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
     896           0 :     const BLOCK_SIZE bs = AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd));
     897             : #else
     898             :     const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
     899             : #endif  // CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
     900             : 
     901             :     unsigned int sse;
     902             :     int rate;
     903             :     int64_t dist;
     904             : 
     905             : #if CONFIG_CB4X4
     906           0 :     if (x->skip_chroma_rd && plane) continue;
     907             : #endif  // CONFIG_CB4X4
     908             : 
     909             :     // TODO(geza): Write direct sse functions that do not compute
     910             :     // variance as well.
     911           0 :     cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride,
     912             :                        &sse);
     913             : 
     914           0 :     if (plane == 0) x->pred_sse[ref] = sse;
     915             : 
     916           0 :     total_sse += sse;
     917             : 
     918           0 :     model_rd_from_sse(cpi, xd, bs, plane, sse, &rate, &dist);
     919             : 
     920           0 :     rate_sum += rate;
     921           0 :     dist_sum += dist;
     922             :   }
     923             : 
     924           0 :   *skip_txfm_sb = total_sse == 0;
     925           0 :   *skip_sse_sb = total_sse << 4;
     926           0 :   *out_rate_sum = (int)rate_sum;
     927           0 :   *out_dist_sum = dist_sum;
     928           0 : }
     929             : 
     930           0 : int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
     931             :                           intptr_t block_size, int64_t *ssz) {
     932             :   int i;
     933           0 :   int64_t error = 0, sqcoeff = 0;
     934             : 
     935           0 :   for (i = 0; i < block_size; i++) {
     936           0 :     const int diff = coeff[i] - dqcoeff[i];
     937           0 :     error += diff * diff;
     938           0 :     sqcoeff += coeff[i] * coeff[i];
     939             :   }
     940             : 
     941           0 :   *ssz = sqcoeff;
     942           0 :   return error;
     943             : }
     944             : 
     945           0 : int64_t av1_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff,
     946             :                              int block_size) {
     947             :   int i;
     948           0 :   int64_t error = 0;
     949             : 
     950           0 :   for (i = 0; i < block_size; i++) {
     951           0 :     const int diff = coeff[i] - dqcoeff[i];
     952           0 :     error += diff * diff;
     953             :   }
     954             : 
     955           0 :   return error;
     956             : }
     957             : 
     958             : #if CONFIG_HIGHBITDEPTH
     959           0 : int64_t av1_highbd_block_error_c(const tran_low_t *coeff,
     960             :                                  const tran_low_t *dqcoeff, intptr_t block_size,
     961             :                                  int64_t *ssz, int bd) {
     962             :   int i;
     963           0 :   int64_t error = 0, sqcoeff = 0;
     964           0 :   int shift = 2 * (bd - 8);
     965           0 :   int rounding = shift > 0 ? 1 << (shift - 1) : 0;
     966             : 
     967           0 :   for (i = 0; i < block_size; i++) {
     968           0 :     const int64_t diff = coeff[i] - dqcoeff[i];
     969           0 :     error += diff * diff;
     970           0 :     sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
     971             :   }
     972           0 :   assert(error >= 0 && sqcoeff >= 0);
     973           0 :   error = (error + rounding) >> shift;
     974           0 :   sqcoeff = (sqcoeff + rounding) >> shift;
     975             : 
     976           0 :   *ssz = sqcoeff;
     977           0 :   return error;
     978             : }
     979             : #endif  // CONFIG_HIGHBITDEPTH
     980             : 
     981             : #if CONFIG_PVQ
     982             : // Without PVQ, av1_block_error_c() return two kind of errors,
     983             : // 1) reconstruction (i.e. decoded) error and
     984             : // 2) Squared sum of transformed residue (i.e. 'coeff')
     985             : // However, if PVQ is enabled, coeff does not keep the transformed residue
     986             : // but instead a transformed original is kept.
     987             : // Hence, new parameter ref vector (i.e. transformed predicted signal)
     988             : // is required to derive the residue signal,
     989             : // i.e. coeff - ref = residue (all transformed).
     990             : 
     991             : #if CONFIG_HIGHBITDEPTH
     992             : static int64_t av1_highbd_block_error2_c(const tran_low_t *coeff,
     993             :                                          const tran_low_t *dqcoeff,
     994             :                                          const tran_low_t *ref,
     995             :                                          intptr_t block_size, int64_t *ssz,
     996             :                                          int bd) {
     997             :   int64_t error;
     998             :   int64_t sqcoeff;
     999             :   int shift = 2 * (bd - 8);
    1000             :   int rounding = shift > 0 ? 1 << (shift - 1) : 0;
    1001             :   // Use the existing sse codes for calculating distortion of decoded signal:
    1002             :   // i.e. (orig - decoded)^2
    1003             :   // For high bit depth, throw away ssz until a 32-bit version of
    1004             :   // av1_block_error_fp is written.
    1005             :   int64_t ssz_trash;
    1006             :   error = av1_block_error(coeff, dqcoeff, block_size, &ssz_trash);
    1007             :   // prediction residue^2 = (orig - ref)^2
    1008             :   sqcoeff = av1_block_error(coeff, ref, block_size, &ssz_trash);
    1009             :   error = (error + rounding) >> shift;
    1010             :   sqcoeff = (sqcoeff + rounding) >> shift;
    1011             :   *ssz = sqcoeff;
    1012             :   return error;
    1013             : }
    1014             : #else
    1015             : // TODO(yushin) : Since 4x4 case does not need ssz, better to refactor into
    1016             : // a separate function that does not do the extra computations for ssz.
    1017             : static int64_t av1_block_error2_c(const tran_low_t *coeff,
    1018             :                                   const tran_low_t *dqcoeff,
    1019             :                                   const tran_low_t *ref, intptr_t block_size,
    1020             :                                   int64_t *ssz) {
    1021             :   int64_t error;
    1022             :   // Use the existing sse codes for calculating distortion of decoded signal:
    1023             :   // i.e. (orig - decoded)^2
    1024             :   error = av1_block_error_fp(coeff, dqcoeff, block_size);
    1025             :   // prediction residue^2 = (orig - ref)^2
    1026             :   *ssz = av1_block_error_fp(coeff, ref, block_size);
    1027             :   return error;
    1028             : }
    1029             : #endif  // CONFIG_HIGHBITDEPTH
    1030             : #endif  // CONFIG_PVQ
    1031             : 
    1032             : #if !CONFIG_PVQ || CONFIG_VAR_TX
    1033             : /* The trailing '0' is a terminator which is used inside av1_cost_coeffs() to
    1034             :  * decide whether to include cost of a trailing EOB node or not (i.e. we
    1035             :  * can skip this if the last coefficient in this transform block, e.g. the
    1036             :  * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
    1037             :  * were non-zero). */
    1038             : #if !CONFIG_LV_MAP
    1039           0 : static int cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane,
    1040             :                        int block, TX_SIZE tx_size, const SCAN_ORDER *scan_order,
    1041             :                        const ENTROPY_CONTEXT *a, const ENTROPY_CONTEXT *l,
    1042             :                        int use_fast_coef_costing) {
    1043           0 :   MACROBLOCKD *const xd = &x->e_mbd;
    1044           0 :   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
    1045           0 :   const struct macroblock_plane *p = &x->plane[plane];
    1046           0 :   const struct macroblockd_plane *pd = &xd->plane[plane];
    1047           0 :   const PLANE_TYPE type = pd->plane_type;
    1048           0 :   const uint16_t *band_count = &band_count_table[tx_size][1];
    1049           0 :   const int eob = p->eobs[block];
    1050           0 :   const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
    1051           0 :   const int tx_size_ctx = txsize_sqr_map[tx_size];
    1052           0 :   unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
    1053           0 :       x->token_costs[tx_size_ctx][type][is_inter_block(mbmi)];
    1054             :   uint8_t token_cache[MAX_TX_SQUARE];
    1055           0 :   int pt = combine_entropy_contexts(*a, *l);
    1056             :   int c, cost;
    1057           0 :   const int16_t *scan = scan_order->scan;
    1058           0 :   const int16_t *nb = scan_order->neighbors;
    1059           0 :   const int ref = is_inter_block(mbmi);
    1060           0 :   aom_prob *blockz_probs =
    1061           0 :       cm->fc->blockzero_probs[txsize_sqr_map[tx_size]][type][ref];
    1062             : 
    1063             : #if CONFIG_HIGHBITDEPTH
    1064           0 :   const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, xd->bd);
    1065             : #else
    1066             :   const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, 8);
    1067             : #endif  // CONFIG_HIGHBITDEPTH
    1068             : 
    1069             : #if !CONFIG_VAR_TX && !CONFIG_SUPERTX
    1070             :   // Check for consistency of tx_size with mode info
    1071             :   assert(tx_size == get_tx_size(plane, xd));
    1072             : #endif  // !CONFIG_VAR_TX && !CONFIG_SUPERTX
    1073             :   (void)cm;
    1074             : 
    1075           0 :   if (eob == 0) {
    1076             :     // single eob token
    1077           0 :     cost = av1_cost_bit(blockz_probs[pt], 0);
    1078             :   } else {
    1079           0 :     if (use_fast_coef_costing) {
    1080           0 :       int band_left = *band_count++;
    1081             : 
    1082             :       // dc token
    1083           0 :       int v = qcoeff[0];
    1084             :       int16_t prev_t;
    1085           0 :       cost = av1_get_token_cost(v, &prev_t, cat6_bits);
    1086           0 :       cost += (*token_costs)[!prev_t][pt][prev_t];
    1087             : 
    1088           0 :       token_cache[0] = av1_pt_energy_class[prev_t];
    1089           0 :       ++token_costs;
    1090             : 
    1091             :       // ac tokens
    1092           0 :       for (c = 1; c < eob; c++) {
    1093           0 :         const int rc = scan[c];
    1094             :         int16_t t;
    1095             : 
    1096           0 :         v = qcoeff[rc];
    1097           0 :         cost += av1_get_token_cost(v, &t, cat6_bits);
    1098           0 :         cost += (*token_costs)[!t][!prev_t][t];
    1099           0 :         prev_t = t;
    1100           0 :         if (!--band_left) {
    1101           0 :           band_left = *band_count++;
    1102           0 :           ++token_costs;
    1103             :         }
    1104             :       }
    1105             : 
    1106             :       // eob token
    1107           0 :       cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
    1108             : 
    1109             :     } else {  // !use_fast_coef_costing
    1110           0 :       int band_left = *band_count++;
    1111             : 
    1112             :       // dc token
    1113           0 :       int v = qcoeff[0];
    1114             :       int16_t tok;
    1115           0 :       cost = av1_get_token_cost(v, &tok, cat6_bits);
    1116           0 :       cost += (*token_costs)[!tok][pt][tok];
    1117             : 
    1118           0 :       token_cache[0] = av1_pt_energy_class[tok];
    1119           0 :       ++token_costs;
    1120             : 
    1121             :       // ac tokens
    1122           0 :       for (c = 1; c < eob; c++) {
    1123           0 :         const int rc = scan[c];
    1124             : 
    1125           0 :         v = qcoeff[rc];
    1126           0 :         cost += av1_get_token_cost(v, &tok, cat6_bits);
    1127           0 :         pt = get_coef_context(nb, token_cache, c);
    1128           0 :         cost += (*token_costs)[!tok][pt][tok];
    1129           0 :         token_cache[rc] = av1_pt_energy_class[tok];
    1130           0 :         if (!--band_left) {
    1131           0 :           band_left = *band_count++;
    1132           0 :           ++token_costs;
    1133             :         }
    1134             :       }
    1135             : 
    1136             :       // eob token
    1137           0 :       pt = get_coef_context(nb, token_cache, c);
    1138           0 :       cost += (*token_costs)[0][pt][EOB_TOKEN];
    1139             :     }
    1140             :   }
    1141             : 
    1142           0 :   return cost;
    1143             : }
    1144             : #endif  // !CONFIG_LV_MAP
    1145             : 
    1146           0 : int av1_cost_coeffs(const AV1_COMP *const cpi, MACROBLOCK *x, int plane,
    1147             :                     int block, TX_SIZE tx_size, const SCAN_ORDER *scan_order,
    1148             :                     const ENTROPY_CONTEXT *a, const ENTROPY_CONTEXT *l,
    1149             :                     int use_fast_coef_costing) {
    1150             : #if !CONFIG_LV_MAP
    1151           0 :   const AV1_COMMON *const cm = &cpi->common;
    1152           0 :   return cost_coeffs(cm, x, plane, block, tx_size, scan_order, a, l,
    1153             :                      use_fast_coef_costing);
    1154             : #else  // !CONFIG_LV_MAP
    1155             :   (void)scan_order;
    1156             :   (void)use_fast_coef_costing;
    1157             :   const MACROBLOCKD *xd = &x->e_mbd;
    1158             :   const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
    1159             :   const struct macroblockd_plane *pd = &xd->plane[plane];
    1160             :   const BLOCK_SIZE bsize = mbmi->sb_type;
    1161             : #if CONFIG_CB4X4
    1162             : #if CONFIG_CHROMA_2X2
    1163             :   const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
    1164             : #else
    1165             :   const BLOCK_SIZE plane_bsize =
    1166             :       AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd));
    1167             : #endif  // CONFIG_CHROMA_2X2
    1168             : #else   // CONFIG_CB4X4
    1169             :   const BLOCK_SIZE plane_bsize =
    1170             :       get_plane_block_size(AOMMAX(BLOCK_8X8, bsize), pd);
    1171             : #endif  // CONFIG_CB4X4
    1172             : 
    1173             :   TXB_CTX txb_ctx;
    1174             :   get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx);
    1175             :   return av1_cost_coeffs_txb(cpi, x, plane, block, &txb_ctx);
    1176             : #endif  // !CONFIG_LV_MAP
    1177             : }
    1178             : #endif  // !CONFIG_PVQ || CONFIG_VAR_TX
    1179             : 
    1180             : // Get transform block visible dimensions cropped to the MI units.
    1181           0 : static void get_txb_dimensions(const MACROBLOCKD *xd, int plane,
    1182             :                                BLOCK_SIZE plane_bsize, int blk_row, int blk_col,
    1183             :                                BLOCK_SIZE tx_bsize, int *width, int *height,
    1184             :                                int *visible_width, int *visible_height) {
    1185             : #if !(CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT)
    1186           0 :   assert(tx_bsize <= plane_bsize);
    1187             : #endif  // !(CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT)
    1188           0 :   int txb_height = block_size_high[tx_bsize];
    1189           0 :   int txb_width = block_size_wide[tx_bsize];
    1190           0 :   const int block_height = block_size_high[plane_bsize];
    1191           0 :   const int block_width = block_size_wide[plane_bsize];
    1192           0 :   const struct macroblockd_plane *const pd = &xd->plane[plane];
    1193             :   // TODO(aconverse@google.com): Investigate using crop_width/height here rather
    1194             :   // than the MI size
    1195           0 :   const int block_rows =
    1196           0 :       (xd->mb_to_bottom_edge >= 0)
    1197             :           ? block_height
    1198           0 :           : (xd->mb_to_bottom_edge >> (3 + pd->subsampling_y)) + block_height;
    1199           0 :   const int block_cols =
    1200           0 :       (xd->mb_to_right_edge >= 0)
    1201             :           ? block_width
    1202           0 :           : (xd->mb_to_right_edge >> (3 + pd->subsampling_x)) + block_width;
    1203           0 :   const int tx_unit_size = tx_size_wide_log2[0];
    1204           0 :   if (width) *width = txb_width;
    1205           0 :   if (height) *height = txb_height;
    1206           0 :   *visible_width = clamp(block_cols - (blk_col << tx_unit_size), 0, txb_width);
    1207           0 :   *visible_height =
    1208           0 :       clamp(block_rows - (blk_row << tx_unit_size), 0, txb_height);
    1209           0 : }
    1210             : 
    1211             : // Compute the pixel domain sum square error on all visible 4x4s in the
    1212             : // transform block.
    1213           0 : static unsigned pixel_sse(const AV1_COMP *const cpi, const MACROBLOCKD *xd,
    1214             :                           int plane, const uint8_t *src, const int src_stride,
    1215             :                           const uint8_t *dst, const int dst_stride, int blk_row,
    1216             :                           int blk_col, const BLOCK_SIZE plane_bsize,
    1217             :                           const BLOCK_SIZE tx_bsize) {
    1218             :   int txb_rows, txb_cols, visible_rows, visible_cols;
    1219           0 :   get_txb_dimensions(xd, plane, plane_bsize, blk_row, blk_col, tx_bsize,
    1220             :                      &txb_cols, &txb_rows, &visible_cols, &visible_rows);
    1221           0 :   assert(visible_rows > 0);
    1222           0 :   assert(visible_cols > 0);
    1223             : #if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
    1224             :   if ((txb_rows == visible_rows && txb_cols == visible_cols) &&
    1225             :       tx_bsize < BLOCK_SIZES) {
    1226             : #else
    1227           0 :   if (txb_rows == visible_rows && txb_cols == visible_cols) {
    1228             : #endif
    1229             :     unsigned sse;
    1230           0 :     cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &sse);
    1231           0 :     return sse;
    1232             :   }
    1233             : #if CONFIG_HIGHBITDEPTH
    1234           0 :   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    1235           0 :     uint64_t sse = aom_highbd_sse_odd_size(src, src_stride, dst, dst_stride,
    1236             :                                            visible_cols, visible_rows);
    1237           0 :     return (unsigned int)ROUND_POWER_OF_TWO(sse, (xd->bd - 8) * 2);
    1238             :   }
    1239             : #endif  // CONFIG_HIGHBITDEPTH
    1240           0 :   unsigned sse = aom_sse_odd_size(src, src_stride, dst, dst_stride,
    1241             :                                   visible_cols, visible_rows);
    1242           0 :   return sse;
    1243             : }
    1244             : 
    1245             : // Compute the squares sum squares on all visible 4x4s in the transform block.
    1246           0 : static int64_t sum_squares_visible(const MACROBLOCKD *xd, int plane,
    1247             :                                    const int16_t *diff, const int diff_stride,
    1248             :                                    int blk_row, int blk_col,
    1249             :                                    const BLOCK_SIZE plane_bsize,
    1250             :                                    const BLOCK_SIZE tx_bsize) {
    1251             :   int visible_rows, visible_cols;
    1252           0 :   get_txb_dimensions(xd, plane, plane_bsize, blk_row, blk_col, tx_bsize, NULL,
    1253             :                      NULL, &visible_cols, &visible_rows);
    1254           0 :   return aom_sum_squares_2d_i16(diff, diff_stride, visible_cols, visible_rows);
    1255             : }
    1256             : 
    1257           0 : void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
    1258             :                     BLOCK_SIZE plane_bsize, int block, int blk_row, int blk_col,
    1259             :                     TX_SIZE tx_size, int64_t *out_dist, int64_t *out_sse,
    1260             :                     OUTPUT_STATUS output_status) {
    1261           0 :   MACROBLOCKD *const xd = &x->e_mbd;
    1262           0 :   const struct macroblock_plane *const p = &x->plane[plane];
    1263             : #if CONFIG_DAALA_DIST
    1264             :   int qm = OD_HVS_QM;
    1265             :   int use_activity_masking = 0;
    1266             : #if CONFIG_PVQ
    1267             :   use_activity_masking = x->daala_enc.use_activity_masking;
    1268             : #endif  // CONFIG_PVQ
    1269             :   struct macroblockd_plane *const pd = &xd->plane[plane];
    1270             : #else   // CONFIG_DAALA_DIST
    1271           0 :   const struct macroblockd_plane *const pd = &xd->plane[plane];
    1272             : #endif  // CONFIG_DAALA_DIST
    1273             : 
    1274           0 :   if (cpi->sf.use_transform_domain_distortion && !CONFIG_DAALA_DIST) {
    1275             :     // Transform domain distortion computation is more efficient as it does
    1276             :     // not involve an inverse transform, but it is less accurate.
    1277           0 :     const int buffer_length = tx_size_2d[tx_size];
    1278             :     int64_t this_sse;
    1279           0 :     int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2;
    1280           0 :     tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
    1281           0 :     tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
    1282             : #if CONFIG_PVQ
    1283             :     tran_low_t *ref_coeff = BLOCK_OFFSET(pd->pvq_ref_coeff, block);
    1284             : 
    1285             : #if CONFIG_HIGHBITDEPTH
    1286             :     const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8;
    1287             :     *out_dist = av1_highbd_block_error2_c(coeff, dqcoeff, ref_coeff,
    1288             :                                           buffer_length, &this_sse, bd) >>
    1289             :                 shift;
    1290             : #else
    1291             :     *out_dist = av1_block_error2_c(coeff, dqcoeff, ref_coeff, buffer_length,
    1292             :                                    &this_sse) >>
    1293             :                 shift;
    1294             : #endif  // CONFIG_HIGHBITDEPTH
    1295             : #elif CONFIG_HIGHBITDEPTH
    1296           0 :     const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8;
    1297           0 :     *out_dist =
    1298           0 :         av1_highbd_block_error(coeff, dqcoeff, buffer_length, &this_sse, bd) >>
    1299             :         shift;
    1300             : #else
    1301             :     *out_dist =
    1302             :         av1_block_error(coeff, dqcoeff, buffer_length, &this_sse) >> shift;
    1303             : #endif  // CONFIG_PVQ
    1304           0 :     *out_sse = this_sse >> shift;
    1305             :   } else {
    1306           0 :     const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
    1307             : #if !CONFIG_PVQ || CONFIG_DAALA_DIST
    1308           0 :     const int bsw = block_size_wide[tx_bsize];
    1309           0 :     const int bsh = block_size_high[tx_bsize];
    1310             : #endif
    1311           0 :     const int src_stride = x->plane[plane].src.stride;
    1312           0 :     const int dst_stride = xd->plane[plane].dst.stride;
    1313             :     // Scale the transform block index to pixel unit.
    1314           0 :     const int src_idx = (blk_row * src_stride + blk_col)
    1315           0 :                         << tx_size_wide_log2[0];
    1316           0 :     const int dst_idx = (blk_row * dst_stride + blk_col)
    1317           0 :                         << tx_size_wide_log2[0];
    1318           0 :     const uint8_t *src = &x->plane[plane].src.buf[src_idx];
    1319           0 :     const uint8_t *dst = &xd->plane[plane].dst.buf[dst_idx];
    1320           0 :     const tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
    1321           0 :     const uint16_t eob = p->eobs[block];
    1322             : 
    1323           0 :     assert(cpi != NULL);
    1324           0 :     assert(tx_size_wide_log2[0] == tx_size_high_log2[0]);
    1325             : 
    1326             : #if CONFIG_DAALA_DIST
    1327             :     if (plane == 0 && bsw >= 8 && bsh >= 8) {
    1328             :       if (output_status == OUTPUT_HAS_DECODED_PIXELS) {
    1329             :         const int pred_stride = block_size_wide[plane_bsize];
    1330             :         const int pred_idx = (blk_row * pred_stride + blk_col)
    1331             :                              << tx_size_wide_log2[0];
    1332             :         const int16_t *pred = &pd->pred[pred_idx];
    1333             :         int i, j;
    1334             :         DECLARE_ALIGNED(16, uint8_t, pred8[MAX_TX_SQUARE]);
    1335             : 
    1336             :         for (j = 0; j < bsh; j++)
    1337             :           for (i = 0; i < bsw; i++)
    1338             :             pred8[j * bsw + i] = pred[j * pred_stride + i];
    1339             :         *out_sse = av1_daala_dist(src, src_stride, pred8, bsw, bsw, bsh, qm,
    1340             :                                   use_activity_masking, x->qindex);
    1341             :       } else {
    1342             :         *out_sse = av1_daala_dist(src, src_stride, dst, dst_stride, bsw, bsh,
    1343             :                                   qm, use_activity_masking, x->qindex);
    1344             :       }
    1345             :     } else
    1346             : #endif  // CONFIG_DAALA_DIST
    1347             :     {
    1348           0 :       const int diff_stride = block_size_wide[plane_bsize];
    1349           0 :       const int diff_idx = (blk_row * diff_stride + blk_col)
    1350           0 :                            << tx_size_wide_log2[0];
    1351           0 :       const int16_t *diff = &p->src_diff[diff_idx];
    1352           0 :       *out_sse = sum_squares_visible(xd, plane, diff, diff_stride, blk_row,
    1353             :                                      blk_col, plane_bsize, tx_bsize);
    1354             : #if CONFIG_HIGHBITDEPTH
    1355           0 :       if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
    1356           0 :         *out_sse = ROUND_POWER_OF_TWO(*out_sse, (xd->bd - 8) * 2);
    1357             : #endif  // CONFIG_HIGHBITDEPTH
    1358             :     }
    1359           0 :     *out_sse *= 16;
    1360             : 
    1361           0 :     if (eob) {
    1362           0 :       if (output_status == OUTPUT_HAS_DECODED_PIXELS) {
    1363             : #if CONFIG_DAALA_DIST
    1364             :         if (plane == 0 && bsw >= 8 && bsh >= 8)
    1365             :           *out_dist = av1_daala_dist(src, src_stride, dst, dst_stride, bsw, bsh,
    1366             :                                      qm, use_activity_masking, x->qindex);
    1367             :         else
    1368             : #endif  // CONFIG_DAALA_DIST
    1369           0 :           *out_dist =
    1370           0 :               pixel_sse(cpi, xd, plane, src, src_stride, dst, dst_stride,
    1371             :                         blk_row, blk_col, plane_bsize, tx_bsize);
    1372             :       } else {
    1373             : #if CONFIG_HIGHBITDEPTH
    1374             :         uint8_t *recon;
    1375             :         DECLARE_ALIGNED(16, uint16_t, recon16[MAX_TX_SQUARE]);
    1376             : 
    1377           0 :         if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
    1378           0 :           recon = CONVERT_TO_BYTEPTR(recon16);
    1379             :         else
    1380           0 :           recon = (uint8_t *)recon16;
    1381             : #else
    1382             :         DECLARE_ALIGNED(16, uint8_t, recon[MAX_TX_SQUARE]);
    1383             : #endif  // CONFIG_HIGHBITDEPTH
    1384             : 
    1385             : #if !CONFIG_PVQ
    1386             : #if CONFIG_HIGHBITDEPTH
    1387           0 :         if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    1388           0 :           aom_highbd_convolve_copy(dst, dst_stride, recon, MAX_TX_SIZE, NULL, 0,
    1389             :                                    NULL, 0, bsw, bsh, xd->bd);
    1390             :         } else {
    1391             : #endif  // CONFIG_HIGHBITDEPTH
    1392           0 :           aom_convolve_copy(dst, dst_stride, recon, MAX_TX_SIZE, NULL, 0, NULL,
    1393             :                             0, bsw, bsh);
    1394             : #if CONFIG_HIGHBITDEPTH
    1395             :         }
    1396             : #endif  // CONFIG_HIGHBITDEPTH
    1397             : #else
    1398             :         (void)dst;
    1399             : #endif  // !CONFIG_PVQ
    1400             : 
    1401           0 :         const PLANE_TYPE plane_type = get_plane_type(plane);
    1402           0 :         TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
    1403             : 
    1404           0 :         av1_inverse_transform_block(xd, dqcoeff, tx_type, tx_size, recon,
    1405             :                                     MAX_TX_SIZE, eob);
    1406             : 
    1407             : #if CONFIG_DAALA_DIST
    1408             :         if (plane == 0 && bsw >= 8 && bsh >= 8) {
    1409             :           *out_dist = av1_daala_dist(src, src_stride, recon, MAX_TX_SIZE, bsw,
    1410             :                                      bsh, qm, use_activity_masking, x->qindex);
    1411             :         } else {
    1412             :           if (plane == 0) {
    1413             :             // Save decoded pixels for inter block in pd->pred to avoid
    1414             :             // block_8x8_rd_txfm_daala_dist() need to produce them
    1415             :             // by calling av1_inverse_transform_block() again.
    1416             :             const int pred_stride = block_size_wide[plane_bsize];
    1417             :             const int pred_idx = (blk_row * pred_stride + blk_col)
    1418             :                                  << tx_size_wide_log2[0];
    1419             :             int16_t *pred = &pd->pred[pred_idx];
    1420             :             int i, j;
    1421             : 
    1422             :             for (j = 0; j < bsh; j++)
    1423             :               for (i = 0; i < bsw; i++)
    1424             :                 pred[j * pred_stride + i] = recon[j * MAX_TX_SIZE + i];
    1425             :           }
    1426             : #endif  // CONFIG_DAALA_DIST
    1427           0 :           *out_dist =
    1428           0 :               pixel_sse(cpi, xd, plane, src, src_stride, recon, MAX_TX_SIZE,
    1429             :                         blk_row, blk_col, plane_bsize, tx_bsize);
    1430             : #if CONFIG_DAALA_DIST
    1431             :         }
    1432             : #endif  // CONFIG_DAALA_DIST
    1433             :       }
    1434           0 :       *out_dist *= 16;
    1435             :     } else {
    1436           0 :       *out_dist = *out_sse;
    1437             :     }
    1438             :   }
    1439           0 : }
    1440             : 
    1441           0 : static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
    1442             :                           BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
    1443           0 :   struct rdcost_block_args *args = arg;
    1444           0 :   MACROBLOCK *const x = args->x;
    1445           0 :   MACROBLOCKD *const xd = &x->e_mbd;
    1446           0 :   const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
    1447           0 :   const AV1_COMP *cpi = args->cpi;
    1448           0 :   ENTROPY_CONTEXT *a = args->t_above + blk_col;
    1449           0 :   ENTROPY_CONTEXT *l = args->t_left + blk_row;
    1450             : #if !CONFIG_TXK_SEL
    1451           0 :   const AV1_COMMON *cm = &cpi->common;
    1452             : #endif
    1453             :   int64_t rd1, rd2, rd;
    1454             :   RD_STATS this_rd_stats;
    1455             : 
    1456           0 :   assert(tx_size == get_tx_size(plane, xd));
    1457             : 
    1458           0 :   av1_init_rd_stats(&this_rd_stats);
    1459             : 
    1460           0 :   if (args->exit_early) return;
    1461             : 
    1462           0 :   if (!is_inter_block(mbmi)) {
    1463             : #if CONFIG_CFL
    1464             : 
    1465             : #if CONFIG_EC_ADAPT
    1466             :     FRAME_CONTEXT *const ec_ctx = xd->tile_ctx;
    1467             : #else
    1468             :     FRAME_CONTEXT *const ec_ctx = cm->fc;
    1469             : #endif  // CONFIG_EC_ADAPT
    1470             : 
    1471             :     av1_predict_intra_block_encoder_facade(x, ec_ctx, plane, block, blk_col,
    1472             :                                            blk_row, tx_size, plane_bsize);
    1473             : #else
    1474           0 :     av1_predict_intra_block_facade(xd, plane, block, blk_col, blk_row, tx_size);
    1475             : #endif
    1476             : #if CONFIG_DPCM_INTRA
    1477             :     const int block_raster_idx =
    1478             :         av1_block_index_to_raster_order(tx_size, block);
    1479             :     const PREDICTION_MODE mode =
    1480             :         (plane == 0) ? get_y_mode(xd->mi[0], block_raster_idx) : mbmi->uv_mode;
    1481             :     TX_TYPE tx_type = get_tx_type((plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV,
    1482             :                                   xd, block, tx_size);
    1483             :     if (av1_use_dpcm_intra(plane, mode, tx_type, mbmi)) {
    1484             :       int8_t skip;
    1485             :       av1_encode_block_intra_dpcm(cm, x, mode, plane, block, blk_row, blk_col,
    1486             :                                   plane_bsize, tx_size, tx_type, a, l, &skip);
    1487             :       av1_dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col,
    1488             :                      tx_size, &this_rd_stats.dist, &this_rd_stats.sse,
    1489             :                      OUTPUT_HAS_DECODED_PIXELS);
    1490             :       goto CALCULATE_RD;
    1491             :     }
    1492             : #endif  // CONFIG_DPCM_INTRA
    1493           0 :     av1_subtract_txb(x, plane, plane_bsize, blk_col, blk_row, tx_size);
    1494             :   }
    1495             : 
    1496             : #if !CONFIG_TXK_SEL
    1497             :   // full forward transform and quantization
    1498           0 :   const int coeff_ctx = combine_entropy_contexts(*a, *l);
    1499           0 :   av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
    1500             :                   coeff_ctx, AV1_XFORM_QUANT_FP);
    1501           0 :   av1_optimize_b(cm, x, plane, block, plane_bsize, tx_size, a, l);
    1502             : 
    1503           0 :   if (!is_inter_block(mbmi)) {
    1504           0 :     struct macroblock_plane *const p = &x->plane[plane];
    1505           0 :     av1_inverse_transform_block_facade(xd, plane, block, blk_row, blk_col,
    1506           0 :                                        p->eobs[block]);
    1507           0 :     av1_dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col,
    1508             :                    tx_size, &this_rd_stats.dist, &this_rd_stats.sse,
    1509             :                    OUTPUT_HAS_DECODED_PIXELS);
    1510             :   } else {
    1511           0 :     av1_dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col,
    1512             :                    tx_size, &this_rd_stats.dist, &this_rd_stats.sse,
    1513             :                    OUTPUT_HAS_PREDICTED_PIXELS);
    1514             :   }
    1515             : #if CONFIG_CFL
    1516             :   if (plane == AOM_PLANE_Y && x->cfl_store_y) {
    1517             :     struct macroblockd_plane *const pd = &xd->plane[plane];
    1518             :     const int dst_stride = pd->dst.stride;
    1519             :     uint8_t *dst =
    1520             :         &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
    1521             :     cfl_store(xd->cfl, dst, dst_stride, blk_row, blk_col, tx_size);
    1522             :   }
    1523             : #endif
    1524             : #if CONFIG_DPCM_INTRA
    1525             : CALCULATE_RD : {}
    1526             : #endif  // CONFIG_DPCM_INTRA
    1527           0 :   rd = RDCOST(x->rdmult, x->rddiv, 0, this_rd_stats.dist);
    1528           0 :   if (args->this_rd + rd > args->best_rd) {
    1529           0 :     args->exit_early = 1;
    1530           0 :     return;
    1531             :   }
    1532             : #if !CONFIG_PVQ
    1533           0 :   const PLANE_TYPE plane_type = get_plane_type(plane);
    1534           0 :   const TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
    1535           0 :   const SCAN_ORDER *scan_order =
    1536           0 :       get_scan(cm, tx_size, tx_type, is_inter_block(mbmi));
    1537           0 :   this_rd_stats.rate =
    1538           0 :       av1_cost_coeffs(cpi, x, plane, block, tx_size, scan_order, a, l,
    1539             :                       args->use_fast_coef_costing);
    1540             : #else   // !CONFIG_PVQ
    1541             :   this_rd_stats.rate = x->rate;
    1542             : #endif  // !CONFIG_PVQ
    1543             : #else   // !CONFIG_TXK_SEL
    1544             :   av1_search_txk_type(cpi, x, plane, block, blk_row, blk_col, plane_bsize,
    1545             :                       tx_size, a, l, args->use_fast_coef_costing,
    1546             :                       &this_rd_stats);
    1547             : #endif  // !CONFIG_TXK_SEL
    1548             : 
    1549             : #if !CONFIG_PVQ
    1550             : #if CONFIG_RD_DEBUG
    1551             :   av1_update_txb_coeff_cost(&this_rd_stats, plane, tx_size, blk_row, blk_col,
    1552             :                             this_rd_stats.rate);
    1553             : #endif  // CONFIG_RD_DEBUG
    1554           0 :   av1_set_txb_context(x, plane, block, tx_size, a, l);
    1555             : #endif  // !CONFIG_PVQ
    1556             : 
    1557           0 :   rd1 = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate, this_rd_stats.dist);
    1558           0 :   rd2 = RDCOST(x->rdmult, x->rddiv, 0, this_rd_stats.sse);
    1559             : 
    1560             :   // TODO(jingning): temporarily enabled only for luma component
    1561           0 :   rd = AOMMIN(rd1, rd2);
    1562             : 
    1563             : #if CONFIG_DAALA_DIST
    1564             :   if (plane == 0 && plane_bsize >= BLOCK_8X8 &&
    1565             :       (tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4)) {
    1566             :     this_rd_stats.dist = 0;
    1567             :     this_rd_stats.sse = 0;
    1568             :     rd = 0;
    1569             :     x->rate_4x4[block] = this_rd_stats.rate;
    1570             :   }
    1571             : #endif  // CONFIG_DAALA_DIST
    1572             : 
    1573             : #if !CONFIG_PVQ
    1574           0 :   this_rd_stats.skip &= !x->plane[plane].eobs[block];
    1575             : #else
    1576             :   this_rd_stats.skip &= x->pvq_skip[plane];
    1577             : #endif  // !CONFIG_PVQ
    1578           0 :   av1_merge_rd_stats(&args->rd_stats, &this_rd_stats);
    1579             : 
    1580           0 :   args->this_rd += rd;
    1581             : 
    1582           0 :   if (args->this_rd > args->best_rd) {
    1583           0 :     args->exit_early = 1;
    1584           0 :     return;
    1585             :   }
    1586             : }
    1587             : 
    1588             : #if CONFIG_DAALA_DIST
    1589             : static void block_8x8_rd_txfm_daala_dist(int plane, int block, int blk_row,
    1590             :                                          int blk_col, BLOCK_SIZE plane_bsize,
    1591             :                                          TX_SIZE tx_size, void *arg) {
    1592             :   struct rdcost_block_args *args = arg;
    1593             :   MACROBLOCK *const x = args->x;
    1594             :   MACROBLOCKD *const xd = &x->e_mbd;
    1595             :   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
    1596             :   int64_t rd, rd1, rd2;
    1597             :   RD_STATS this_rd_stats;
    1598             :   int qm = OD_HVS_QM;
    1599             :   int use_activity_masking = 0;
    1600             : 
    1601             :   (void)tx_size;
    1602             : 
    1603             :   assert(plane == 0);
    1604             :   assert(plane_bsize >= BLOCK_8X8);
    1605             : #if CONFIG_PVQ
    1606             :   use_activity_masking = x->daala_enc.use_activity_masking;
    1607             : #endif  // CONFIG_PVQ
    1608             :   av1_init_rd_stats(&this_rd_stats);
    1609             : 
    1610             :   if (args->exit_early) return;
    1611             : 
    1612             :   {
    1613             :     const struct macroblock_plane *const p = &x->plane[plane];
    1614             :     struct macroblockd_plane *const pd = &xd->plane[plane];
    1615             : 
    1616             :     const int src_stride = p->src.stride;
    1617             :     const int dst_stride = pd->dst.stride;
    1618             :     const int diff_stride = block_size_wide[plane_bsize];
    1619             : 
    1620             :     const uint8_t *src =
    1621             :         &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
    1622             :     const uint8_t *dst =
    1623             :         &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
    1624             : 
    1625             :     unsigned int tmp1, tmp2;
    1626             :     int qindex = x->qindex;
    1627             :     const int pred_stride = block_size_wide[plane_bsize];
    1628             :     const int pred_idx = (blk_row * pred_stride + blk_col)
    1629             :                          << tx_size_wide_log2[0];
    1630             :     int16_t *pred = &pd->pred[pred_idx];
    1631             :     int i, j;
    1632             :     const int tx_blk_size = 8;
    1633             : 
    1634             :     DECLARE_ALIGNED(16, uint8_t, pred8[8 * 8]);
    1635             : 
    1636             :     for (j = 0; j < tx_blk_size; j++)
    1637             :       for (i = 0; i < tx_blk_size; i++)
    1638             :         pred8[j * tx_blk_size + i] = pred[j * diff_stride + i];
    1639             : 
    1640             :     tmp1 = av1_daala_dist(src, src_stride, pred8, tx_blk_size, 8, 8, qm,
    1641             :                           use_activity_masking, qindex);
    1642             :     tmp2 = av1_daala_dist(src, src_stride, dst, dst_stride, 8, 8, qm,
    1643             :                           use_activity_masking, qindex);
    1644             : 
    1645             :     if (!is_inter_block(mbmi)) {
    1646             :       this_rd_stats.sse = (int64_t)tmp1 * 16;
    1647             :       this_rd_stats.dist = (int64_t)tmp2 * 16;
    1648             :     } else {
    1649             :       // For inter mode, the decoded pixels are provided in pd->pred,
    1650             :       // while the predicted pixels are in dst.
    1651             :       this_rd_stats.sse = (int64_t)tmp2 * 16;
    1652             :       this_rd_stats.dist = (int64_t)tmp1 * 16;
    1653             :     }
    1654             :   }
    1655             : 
    1656             :   rd = RDCOST(x->rdmult, x->rddiv, 0, this_rd_stats.dist);
    1657             :   if (args->this_rd + rd > args->best_rd) {
    1658             :     args->exit_early = 1;
    1659             :     return;
    1660             :   }
    1661             : 
    1662             :   {
    1663             :     const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
    1664             :     const uint8_t txw_unit = tx_size_wide_unit[tx_size];
    1665             :     const uint8_t txh_unit = tx_size_high_unit[tx_size];
    1666             :     const int step = txw_unit * txh_unit;
    1667             :     int offset_h = tx_size_high_unit[TX_4X4];
    1668             :     // The rate of the current 8x8 block is the sum of four 4x4 blocks in it.
    1669             :     this_rd_stats.rate =
    1670             :         x->rate_4x4[block - max_blocks_wide * offset_h - step] +
    1671             :         x->rate_4x4[block - max_blocks_wide * offset_h] +
    1672             :         x->rate_4x4[block - step] + x->rate_4x4[block];
    1673             :   }
    1674             :   rd1 = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate, this_rd_stats.dist);
    1675             :   rd2 = RDCOST(x->rdmult, x->rddiv, 0, this_rd_stats.sse);
    1676             :   rd = AOMMIN(rd1, rd2);
    1677             : 
    1678             :   args->rd_stats.dist += this_rd_stats.dist;
    1679             :   args->rd_stats.sse += this_rd_stats.sse;
    1680             : 
    1681             :   args->this_rd += rd;
    1682             : 
    1683             :   if (args->this_rd > args->best_rd) {
    1684             :     args->exit_early = 1;
    1685             :     return;
    1686             :   }
    1687             : }
    1688             : #endif  // CONFIG_DAALA_DIST
    1689             : 
    1690           0 : static void txfm_rd_in_plane(MACROBLOCK *x, const AV1_COMP *cpi,
    1691             :                              RD_STATS *rd_stats, int64_t ref_best_rd, int plane,
    1692             :                              BLOCK_SIZE bsize, TX_SIZE tx_size,
    1693             :                              int use_fast_coef_casting) {
    1694           0 :   MACROBLOCKD *const xd = &x->e_mbd;
    1695           0 :   const struct macroblockd_plane *const pd = &xd->plane[plane];
    1696             :   struct rdcost_block_args args;
    1697           0 :   av1_zero(args);
    1698           0 :   args.x = x;
    1699           0 :   args.cpi = cpi;
    1700           0 :   args.best_rd = ref_best_rd;
    1701           0 :   args.use_fast_coef_costing = use_fast_coef_casting;
    1702           0 :   av1_init_rd_stats(&args.rd_stats);
    1703             : 
    1704           0 :   if (plane == 0) xd->mi[0]->mbmi.tx_size = tx_size;
    1705             : 
    1706           0 :   av1_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
    1707             : 
    1708             : #if CONFIG_DAALA_DIST
    1709             :   if (plane == 0 && bsize >= BLOCK_8X8 &&
    1710             :       (tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4))
    1711             :     av1_foreach_8x8_transformed_block_in_yplane(
    1712             :         xd, bsize, block_rd_txfm, block_8x8_rd_txfm_daala_dist, &args);
    1713             :   else
    1714             : #endif  // CONFIG_DAALA_DIST
    1715           0 :     av1_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm,
    1716             :                                            &args);
    1717             : 
    1718           0 :   if (args.exit_early) {
    1719           0 :     av1_invalid_rd_stats(rd_stats);
    1720             :   } else {
    1721           0 :     *rd_stats = args.rd_stats;
    1722             :   }
    1723           0 : }
    1724             : 
    1725             : #if CONFIG_SUPERTX
    1726             : void av1_txfm_rd_in_plane_supertx(MACROBLOCK *x, const AV1_COMP *cpi, int *rate,
    1727             :                                   int64_t *distortion, int *skippable,
    1728             :                                   int64_t *sse, int64_t ref_best_rd, int plane,
    1729             :                                   BLOCK_SIZE bsize, TX_SIZE tx_size,
    1730             :                                   int use_fast_coef_casting) {
    1731             :   MACROBLOCKD *const xd = &x->e_mbd;
    1732             :   const struct macroblockd_plane *const pd = &xd->plane[plane];
    1733             :   struct rdcost_block_args args;
    1734             :   av1_zero(args);
    1735             :   args.cpi = cpi;
    1736             :   args.x = x;
    1737             :   args.best_rd = ref_best_rd;
    1738             :   args.use_fast_coef_costing = use_fast_coef_casting;
    1739             : 
    1740             : #if CONFIG_EXT_TX
    1741             :   assert(tx_size < TX_SIZES);
    1742             : #endif  // CONFIG_EXT_TX
    1743             : 
    1744             :   if (plane == 0) xd->mi[0]->mbmi.tx_size = tx_size;
    1745             : 
    1746             :   av1_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
    1747             : 
    1748             :   block_rd_txfm(plane, 0, 0, 0, get_plane_block_size(bsize, pd), tx_size,
    1749             :                 &args);
    1750             : 
    1751             :   if (args.exit_early) {
    1752             :     *rate = INT_MAX;
    1753             :     *distortion = INT64_MAX;
    1754             :     *sse = INT64_MAX;
    1755             :     *skippable = 0;
    1756             :   } else {
    1757             :     *distortion = args.rd_stats.dist;
    1758             :     *rate = args.rd_stats.rate;
    1759             :     *sse = args.rd_stats.sse;
    1760             :     *skippable = !x->plane[plane].eobs[0];
    1761             :   }
    1762             : }
    1763             : #endif  // CONFIG_SUPERTX
    1764             : 
    1765           0 : static int tx_size_cost(const AV1_COMP *const cpi, const MACROBLOCK *const x,
    1766             :                         BLOCK_SIZE bsize, TX_SIZE tx_size) {
    1767           0 :   const AV1_COMMON *const cm = &cpi->common;
    1768           0 :   const MACROBLOCKD *const xd = &x->e_mbd;
    1769           0 :   const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
    1770             : 
    1771           0 :   const int tx_select =
    1772           0 :       cm->tx_mode == TX_MODE_SELECT && mbmi->sb_type >= BLOCK_8X8;
    1773             : 
    1774           0 :   if (tx_select) {
    1775           0 :     const int is_inter = is_inter_block(mbmi);
    1776           0 :     const int tx_size_cat = is_inter ? inter_tx_size_cat_lookup[bsize]
    1777           0 :                                      : intra_tx_size_cat_lookup[bsize];
    1778           0 :     const TX_SIZE coded_tx_size = txsize_sqr_up_map[tx_size];
    1779           0 :     const int depth = tx_size_to_depth(coded_tx_size);
    1780           0 :     const int tx_size_ctx = get_tx_size_context(xd);
    1781           0 :     int r_tx_size = cpi->tx_size_cost[tx_size_cat][tx_size_ctx][depth];
    1782             : #if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
    1783             :     if (is_quarter_tx_allowed(xd, mbmi, is_inter) && tx_size != coded_tx_size)
    1784             :       r_tx_size += av1_cost_bit(cm->fc->quarter_tx_size_prob,
    1785             :                                 tx_size == quarter_txsize_lookup[bsize]);
    1786             : #endif  // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
    1787           0 :     return r_tx_size;
    1788             :   } else {
    1789           0 :     return 0;
    1790             :   }
    1791             : }
    1792             : 
    1793             : // #TODO(angiebird): use this function whenever it's possible
    1794           0 : int av1_tx_type_cost(const AV1_COMP *cpi, const MACROBLOCKD *xd,
    1795             :                      BLOCK_SIZE bsize, int plane, TX_SIZE tx_size,
    1796             :                      TX_TYPE tx_type) {
    1797           0 :   if (plane > 0) return 0;
    1798             : 
    1799           0 :   const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
    1800           0 :   const int is_inter = is_inter_block(mbmi);
    1801             : #if CONFIG_EXT_TX
    1802           0 :   const AV1_COMMON *cm = &cpi->common;
    1803           0 :   if (get_ext_tx_types(tx_size, bsize, is_inter, cm->reduced_tx_set_used) > 1 &&
    1804           0 :       !xd->lossless[xd->mi[0]->mbmi.segment_id]) {
    1805           0 :     const int ext_tx_set =
    1806           0 :         get_ext_tx_set(tx_size, bsize, is_inter, cm->reduced_tx_set_used);
    1807           0 :     if (is_inter) {
    1808           0 :       if (ext_tx_set > 0)
    1809             :         return cpi
    1810           0 :             ->inter_tx_type_costs[ext_tx_set][txsize_sqr_map[tx_size]][tx_type];
    1811             :     } else {
    1812           0 :       if (ext_tx_set > 0 && ALLOW_INTRA_EXT_TX)
    1813           0 :         return cpi->intra_tx_type_costs[ext_tx_set][txsize_sqr_map[tx_size]]
    1814           0 :                                        [mbmi->mode][tx_type];
    1815             :     }
    1816             :   }
    1817             : #else
    1818             :   (void)bsize;
    1819             :   if (tx_size < TX_32X32 && !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
    1820             :       !FIXED_TX_TYPE) {
    1821             :     if (is_inter) {
    1822             :       return cpi->inter_tx_type_costs[tx_size][tx_type];
    1823             :     } else {
    1824             :       return cpi->intra_tx_type_costs[tx_size]
    1825             :                                      [intra_mode_to_tx_type_context[mbmi->mode]]
    1826             :                                      [tx_type];
    1827             :     }
    1828             :   }
    1829             : #endif  // CONFIG_EXT_TX
    1830           0 :   return 0;
    1831             : }
    1832           0 : static int64_t txfm_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
    1833             :                         RD_STATS *rd_stats, int64_t ref_best_rd, BLOCK_SIZE bs,
    1834             :                         TX_TYPE tx_type, int tx_size) {
    1835           0 :   const AV1_COMMON *const cm = &cpi->common;
    1836           0 :   MACROBLOCKD *const xd = &x->e_mbd;
    1837           0 :   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
    1838           0 :   int64_t rd = INT64_MAX;
    1839           0 :   aom_prob skip_prob = av1_get_skip_prob(cm, xd);
    1840             :   int s0, s1;
    1841           0 :   const int is_inter = is_inter_block(mbmi);
    1842           0 :   const int tx_select =
    1843           0 :       cm->tx_mode == TX_MODE_SELECT && mbmi->sb_type >= BLOCK_8X8;
    1844             : 
    1845           0 :   const int r_tx_size = tx_size_cost(cpi, x, bs, tx_size);
    1846             : 
    1847           0 :   assert(skip_prob > 0);
    1848             : #if CONFIG_EXT_TX && CONFIG_RECT_TX
    1849           0 :   assert(IMPLIES(is_rect_tx(tx_size), is_rect_tx_allowed_bsize(bs)));
    1850             : #endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
    1851             : 
    1852           0 :   s0 = av1_cost_bit(skip_prob, 0);
    1853           0 :   s1 = av1_cost_bit(skip_prob, 1);
    1854             : 
    1855           0 :   mbmi->tx_type = tx_type;
    1856           0 :   mbmi->tx_size = tx_size;
    1857           0 :   txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, 0, bs, tx_size,
    1858             :                    cpi->sf.use_fast_coef_costing);
    1859           0 :   if (rd_stats->rate == INT_MAX) return INT64_MAX;
    1860             : #if !CONFIG_TXK_SEL
    1861           0 :   int plane = 0;
    1862           0 :   rd_stats->rate += av1_tx_type_cost(cpi, xd, bs, plane, tx_size, tx_type);
    1863             : #endif
    1864             : 
    1865           0 :   if (rd_stats->skip) {
    1866           0 :     if (is_inter) {
    1867           0 :       rd = RDCOST(x->rdmult, x->rddiv, s1, rd_stats->sse);
    1868             :     } else {
    1869           0 :       rd = RDCOST(x->rdmult, x->rddiv, s1 + r_tx_size * tx_select,
    1870             :                   rd_stats->sse);
    1871             :     }
    1872             :   } else {
    1873           0 :     rd = RDCOST(x->rdmult, x->rddiv,
    1874             :                 rd_stats->rate + s0 + r_tx_size * tx_select, rd_stats->dist);
    1875             :   }
    1876             : 
    1877           0 :   if (tx_select) rd_stats->rate += r_tx_size;
    1878             : 
    1879           0 :   if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
    1880           0 :       !(rd_stats->skip))
    1881           0 :     rd = AOMMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, rd_stats->sse));
    1882             : 
    1883           0 :   return rd;
    1884             : }
    1885             : 
    1886           0 : static int skip_txfm_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs,
    1887             :                             TX_TYPE tx_type, TX_SIZE tx_size) {
    1888           0 :   const MACROBLOCKD *const xd = &x->e_mbd;
    1889           0 :   const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
    1890           0 :   const TX_SIZE max_tx_size = max_txsize_lookup[bs];
    1891           0 :   const int is_inter = is_inter_block(mbmi);
    1892           0 :   int prune = 0;
    1893           0 :   if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE)
    1894             :     // passing -1 in for tx_type indicates that all 1D
    1895             :     // transforms should be considered for pruning
    1896           0 :     prune = prune_tx_types(cpi, bs, x, xd, -1);
    1897             : 
    1898           0 :   if (mbmi->ref_mv_idx > 0 && tx_type != DCT_DCT) return 1;
    1899             :   if (FIXED_TX_TYPE && tx_type != get_default_tx_type(0, xd, 0, tx_size))
    1900             :     return 1;
    1901           0 :   if (!is_inter && x->use_default_intra_tx_type &&
    1902           0 :       tx_type != get_default_tx_type(0, xd, 0, tx_size))
    1903           0 :     return 1;
    1904           0 :   if (is_inter && x->use_default_inter_tx_type &&
    1905           0 :       tx_type != get_default_tx_type(0, xd, 0, tx_size))
    1906           0 :     return 1;
    1907           0 :   if (max_tx_size >= TX_32X32 && tx_size == TX_4X4) return 1;
    1908             : #if CONFIG_EXT_TX
    1909           0 :   const AV1_COMMON *const cm = &cpi->common;
    1910           0 :   int ext_tx_set =
    1911           0 :       get_ext_tx_set(tx_size, bs, is_inter, cm->reduced_tx_set_used);
    1912           0 :   if (is_inter) {
    1913           0 :     if (!ext_tx_used_inter[ext_tx_set][tx_type]) return 1;
    1914           0 :     if (cpi->sf.tx_type_search.prune_mode > NO_PRUNE) {
    1915           0 :       if (!do_tx_type_search(tx_type, prune)) return 1;
    1916             :     }
    1917             :   } else {
    1918             :     if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) {
    1919             :       if (tx_type != intra_mode_to_tx_type_context[mbmi->mode]) return 1;
    1920             :     }
    1921           0 :     if (!ext_tx_used_intra[ext_tx_set][tx_type]) return 1;
    1922             :   }
    1923             : #else   // CONFIG_EXT_TX
    1924             :   if (tx_size >= TX_32X32 && tx_type != DCT_DCT) return 1;
    1925             :   if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE &&
    1926             :       !do_tx_type_search(tx_type, prune))
    1927             :     return 1;
    1928             : #endif  // CONFIG_EXT_TX
    1929           0 :   return 0;
    1930             : }
    1931             : 
    1932             : #if CONFIG_EXT_INTER && (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT)
    1933           0 : static int64_t estimate_yrd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bs,
    1934             :                                    MACROBLOCK *x, int *r, int64_t *d, int *s,
    1935             :                                    int64_t *sse, int64_t ref_best_rd) {
    1936             :   RD_STATS rd_stats;
    1937           0 :   int64_t rd = txfm_yrd(cpi, x, &rd_stats, ref_best_rd, bs, DCT_DCT,
    1938           0 :                         max_txsize_lookup[bs]);
    1939           0 :   *r = rd_stats.rate;
    1940           0 :   *d = rd_stats.dist;
    1941           0 :   *s = rd_stats.skip;
    1942           0 :   *sse = rd_stats.sse;
    1943           0 :   return rd;
    1944             : }
    1945             : #endif  // CONFIG_EXT_INTER && (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT)
    1946             : 
    1947           0 : static void choose_largest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
    1948             :                                    RD_STATS *rd_stats, int64_t ref_best_rd,
    1949             :                                    BLOCK_SIZE bs) {
    1950           0 :   const AV1_COMMON *const cm = &cpi->common;
    1951           0 :   MACROBLOCKD *const xd = &x->e_mbd;
    1952           0 :   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
    1953           0 :   TX_TYPE tx_type, best_tx_type = DCT_DCT;
    1954           0 :   int64_t this_rd, best_rd = INT64_MAX;
    1955           0 :   aom_prob skip_prob = av1_get_skip_prob(cm, xd);
    1956           0 :   int s0 = av1_cost_bit(skip_prob, 0);
    1957           0 :   int s1 = av1_cost_bit(skip_prob, 1);
    1958           0 :   const int is_inter = is_inter_block(mbmi);
    1959           0 :   int prune = 0;
    1960           0 :   const int plane = 0;
    1961             : #if CONFIG_EXT_TX
    1962             :   int ext_tx_set;
    1963             : #endif  // CONFIG_EXT_TX
    1964           0 :   av1_invalid_rd_stats(rd_stats);
    1965             : 
    1966           0 :   mbmi->tx_size = tx_size_from_tx_mode(bs, cm->tx_mode, is_inter);
    1967             : #if CONFIG_VAR_TX
    1968           0 :   mbmi->min_tx_size = get_min_tx_size(mbmi->tx_size);
    1969             : #endif  // CONFIG_VAR_TX
    1970             : #if CONFIG_EXT_TX
    1971           0 :   ext_tx_set =
    1972           0 :       get_ext_tx_set(mbmi->tx_size, bs, is_inter, cm->reduced_tx_set_used);
    1973             : #endif  // CONFIG_EXT_TX
    1974             : 
    1975           0 :   if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE)
    1976             : #if CONFIG_EXT_TX
    1977           0 :     prune = prune_tx_types(cpi, bs, x, xd, ext_tx_set);
    1978             : #else
    1979             :     prune = prune_tx_types(cpi, bs, x, xd, 0);
    1980             : #endif  // CONFIG_EXT_TX
    1981             : #if CONFIG_EXT_TX
    1982           0 :   if (get_ext_tx_types(mbmi->tx_size, bs, is_inter, cm->reduced_tx_set_used) >
    1983           0 :           1 &&
    1984           0 :       !xd->lossless[mbmi->segment_id]) {
    1985             : #if CONFIG_PVQ
    1986             :     od_rollback_buffer pre_buf, post_buf;
    1987             : 
    1988             :     od_encode_checkpoint(&x->daala_enc, &pre_buf);
    1989             :     od_encode_checkpoint(&x->daala_enc, &post_buf);
    1990             : #endif  // CONFIG_PVQ
    1991             : 
    1992           0 :     for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
    1993             :       RD_STATS this_rd_stats;
    1994           0 :       if (is_inter) {
    1995           0 :         if (x->use_default_inter_tx_type &&
    1996           0 :             tx_type != get_default_tx_type(0, xd, 0, mbmi->tx_size))
    1997           0 :           continue;
    1998           0 :         if (!ext_tx_used_inter[ext_tx_set][tx_type]) continue;
    1999           0 :         if (cpi->sf.tx_type_search.prune_mode > NO_PRUNE) {
    2000           0 :           if (!do_tx_type_search(tx_type, prune)) continue;
    2001             :         }
    2002             :       } else {
    2003           0 :         if (x->use_default_intra_tx_type &&
    2004           0 :             tx_type != get_default_tx_type(0, xd, 0, mbmi->tx_size))
    2005           0 :           continue;
    2006             :         if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) {
    2007             :           if (tx_type != intra_mode_to_tx_type_context[mbmi->mode]) continue;
    2008             :         }
    2009           0 :         if (!ext_tx_used_intra[ext_tx_set][tx_type]) continue;
    2010             :       }
    2011             : 
    2012           0 :       mbmi->tx_type = tx_type;
    2013             : 
    2014           0 :       txfm_rd_in_plane(x, cpi, &this_rd_stats, ref_best_rd, 0, bs,
    2015           0 :                        mbmi->tx_size, cpi->sf.use_fast_coef_costing);
    2016             : #if CONFIG_PVQ
    2017             :       od_encode_rollback(&x->daala_enc, &pre_buf);
    2018             : #endif  // CONFIG_PVQ
    2019           0 :       if (this_rd_stats.rate == INT_MAX) continue;
    2020           0 :       av1_tx_type_cost(cpi, xd, bs, plane, mbmi->tx_size, tx_type);
    2021             : 
    2022           0 :       if (this_rd_stats.skip)
    2023           0 :         this_rd = RDCOST(x->rdmult, x->rddiv, s1, this_rd_stats.sse);
    2024             :       else
    2025           0 :         this_rd = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate + s0,
    2026             :                          this_rd_stats.dist);
    2027           0 :       if (is_inter_block(mbmi) && !xd->lossless[mbmi->segment_id] &&
    2028           0 :           !this_rd_stats.skip)
    2029           0 :         this_rd =
    2030           0 :             AOMMIN(this_rd, RDCOST(x->rdmult, x->rddiv, s1, this_rd_stats.sse));
    2031             : 
    2032           0 :       if (this_rd < best_rd) {
    2033           0 :         best_rd = this_rd;
    2034           0 :         best_tx_type = mbmi->tx_type;
    2035           0 :         *rd_stats = this_rd_stats;
    2036             : #if CONFIG_PVQ
    2037             :         od_encode_checkpoint(&x->daala_enc, &post_buf);
    2038             : #endif  // CONFIG_PVQ
    2039             :       }
    2040             :     }
    2041             : #if CONFIG_PVQ
    2042             :     od_encode_rollback(&x->daala_enc, &post_buf);
    2043             : #endif  // CONFIG_PVQ
    2044             :   } else {
    2045           0 :     mbmi->tx_type = DCT_DCT;
    2046           0 :     txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, 0, bs, mbmi->tx_size,
    2047             :                      cpi->sf.use_fast_coef_costing);
    2048             :   }
    2049             : #else   // CONFIG_EXT_TX
    2050             :   if (mbmi->tx_size < TX_32X32 && !xd->lossless[mbmi->segment_id]) {
    2051             :     for (tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
    2052             :       RD_STATS this_rd_stats;
    2053             :       if (!is_inter && x->use_default_intra_tx_type &&
    2054             :           tx_type != get_default_tx_type(0, xd, 0, mbmi->tx_size))
    2055             :         continue;
    2056             :       if (is_inter && x->use_default_inter_tx_type &&
    2057             :           tx_type != get_default_tx_type(0, xd, 0, mbmi->tx_size))
    2058             :         continue;
    2059             :       mbmi->tx_type = tx_type;
    2060             :       txfm_rd_in_plane(x, cpi, &this_rd_stats, ref_best_rd, 0, bs,
    2061             :                        mbmi->tx_size, cpi->sf.use_fast_coef_costing);
    2062             :       if (this_rd_stats.rate == INT_MAX) continue;
    2063             : 
    2064             :       av1_tx_type_cost(cpi, xd, bs, plane, mbmi->tx_size, tx_type);
    2065             :       if (is_inter) {
    2066             :         if (cpi->sf.tx_type_search.prune_mode > NO_PRUNE &&
    2067             :             !do_tx_type_search(tx_type, prune))
    2068             :           continue;
    2069             :       }
    2070             :       if (this_rd_stats.skip)
    2071             :         this_rd = RDCOST(x->rdmult, x->rddiv, s1, this_rd_stats.sse);
    2072             :       else
    2073             :         this_rd = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate + s0,
    2074             :                          this_rd_stats.dist);
    2075             :       if (is_inter && !xd->lossless[mbmi->segment_id] && !this_rd_stats.skip)
    2076             :         this_rd =
    2077             :             AOMMIN(this_rd, RDCOST(x->rdmult, x->rddiv, s1, this_rd_stats.sse));
    2078             : 
    2079             :       if (this_rd < best_rd) {
    2080             :         best_rd = this_rd;
    2081             :         best_tx_type = mbmi->tx_type;
    2082             :         *rd_stats = this_rd_stats;
    2083             :       }
    2084             :     }
    2085             :   } else {
    2086             :     mbmi->tx_type = DCT_DCT;
    2087             :     txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, 0, bs, mbmi->tx_size,
    2088             :                      cpi->sf.use_fast_coef_costing);
    2089             :   }
    2090             : #endif  // CONFIG_EXT_TX
    2091           0 :   mbmi->tx_type = best_tx_type;
    2092           0 : }
    2093             : 
    2094           0 : static void choose_smallest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
    2095             :                                     RD_STATS *rd_stats, int64_t ref_best_rd,
    2096             :                                     BLOCK_SIZE bs) {
    2097           0 :   MACROBLOCKD *const xd = &x->e_mbd;
    2098           0 :   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
    2099             : 
    2100           0 :   mbmi->tx_size = TX_4X4;
    2101           0 :   mbmi->tx_type = DCT_DCT;
    2102             : #if CONFIG_VAR_TX
    2103           0 :   mbmi->min_tx_size = get_min_tx_size(TX_4X4);
    2104             : #endif  // CONFIG_VAR_TX
    2105             : 
    2106           0 :   txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, 0, bs, mbmi->tx_size,
    2107             :                    cpi->sf.use_fast_coef_costing);
    2108           0 : }
    2109             : 
    2110             : #if CONFIG_TXK_SEL || CONFIG_VAR_TX
    2111           0 : static INLINE int bsize_to_num_blk(BLOCK_SIZE bsize) {
    2112           0 :   int num_blk = 1 << (num_pels_log2_lookup[bsize] - 2 * tx_size_wide_log2[0]);
    2113           0 :   return num_blk;
    2114             : }
    2115             : #endif  // CONFIG_TXK_SEL || CONFIG_VAR_TX
    2116             : 
    2117           0 : static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi,
    2118             :                                         MACROBLOCK *x, RD_STATS *rd_stats,
    2119             :                                         int64_t ref_best_rd, BLOCK_SIZE bs) {
    2120           0 :   const AV1_COMMON *const cm = &cpi->common;
    2121           0 :   MACROBLOCKD *const xd = &x->e_mbd;
    2122           0 :   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
    2123           0 :   int64_t rd = INT64_MAX;
    2124             :   int n;
    2125             :   int start_tx, end_tx;
    2126           0 :   int64_t best_rd = INT64_MAX, last_rd = INT64_MAX;
    2127           0 :   const TX_SIZE max_tx_size = max_txsize_lookup[bs];
    2128           0 :   TX_SIZE best_tx_size = max_tx_size;
    2129           0 :   TX_TYPE best_tx_type = DCT_DCT;
    2130             : #if CONFIG_TXK_SEL
    2131             :   TX_TYPE best_txk_type[MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
    2132             :   const int num_blk = bsize_to_num_blk(bs);
    2133             : #endif  // CONFIG_TXK_SEL
    2134           0 :   const int tx_select = cm->tx_mode == TX_MODE_SELECT;
    2135           0 :   const int is_inter = is_inter_block(mbmi);
    2136             : #if CONFIG_PVQ
    2137             :   od_rollback_buffer buf;
    2138             :   od_encode_checkpoint(&x->daala_enc, &buf);
    2139             : #endif  // CONFIG_PVQ
    2140             : 
    2141           0 :   av1_invalid_rd_stats(rd_stats);
    2142             : 
    2143             : #if CONFIG_EXT_TX && CONFIG_RECT_TX
    2144           0 :   int evaluate_rect_tx = 0;
    2145           0 :   if (tx_select) {
    2146           0 :     evaluate_rect_tx = is_rect_tx_allowed(xd, mbmi);
    2147             :   } else {
    2148           0 :     const TX_SIZE chosen_tx_size =
    2149           0 :         tx_size_from_tx_mode(bs, cm->tx_mode, is_inter);
    2150           0 :     evaluate_rect_tx = is_rect_tx(chosen_tx_size);
    2151           0 :     assert(IMPLIES(evaluate_rect_tx, is_rect_tx_allowed(xd, mbmi)));
    2152             :   }
    2153           0 :   if (evaluate_rect_tx) {
    2154           0 :     TX_TYPE tx_start = DCT_DCT;
    2155           0 :     TX_TYPE tx_end = TX_TYPES;
    2156             : #if CONFIG_TXK_SEL
    2157             :     // The tx_type becomes dummy when lv_map is on. The tx_type search will be
    2158             :     // performed in av1_search_txk_type()
    2159             :     tx_end = DCT_DCT + 1;
    2160             : #endif
    2161             :     TX_TYPE tx_type;
    2162           0 :     for (tx_type = tx_start; tx_type < tx_end; ++tx_type) {
    2163           0 :       if (mbmi->ref_mv_idx > 0 && tx_type != DCT_DCT) continue;
    2164           0 :       const TX_SIZE rect_tx_size = max_txsize_rect_lookup[bs];
    2165             :       RD_STATS this_rd_stats;
    2166           0 :       int ext_tx_set =
    2167           0 :           get_ext_tx_set(rect_tx_size, bs, is_inter, cm->reduced_tx_set_used);
    2168           0 :       if ((is_inter && ext_tx_used_inter[ext_tx_set][tx_type]) ||
    2169           0 :           (!is_inter && ext_tx_used_intra[ext_tx_set][tx_type])) {
    2170           0 :         rd = txfm_yrd(cpi, x, &this_rd_stats, ref_best_rd, bs, tx_type,
    2171             :                       rect_tx_size);
    2172           0 :         if (rd < best_rd) {
    2173             : #if CONFIG_TXK_SEL
    2174             :           memcpy(best_txk_type, mbmi->txk_type,
    2175             :                  sizeof(best_txk_type[0]) * num_blk);
    2176             : #endif
    2177           0 :           best_tx_type = tx_type;
    2178           0 :           best_tx_size = rect_tx_size;
    2179           0 :           best_rd = rd;
    2180           0 :           *rd_stats = this_rd_stats;
    2181             :         }
    2182             :       }
    2183             : #if CONFIG_CB4X4 && !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
    2184             :       const int is_inter = is_inter_block(mbmi);
    2185             :       if (mbmi->sb_type < BLOCK_8X8 && is_inter) break;
    2186             : #endif  // CONFIG_CB4X4 && !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
    2187             :     }
    2188             :   }
    2189             : 
    2190             : #if CONFIG_RECT_TX_EXT
    2191             :   // test 1:4/4:1 tx
    2192             :   int evaluate_quarter_tx = 0;
    2193             :   if (is_quarter_tx_allowed(xd, mbmi, is_inter)) {
    2194             :     if (tx_select) {
    2195             :       evaluate_quarter_tx = 1;
    2196             :     } else {
    2197             :       const TX_SIZE chosen_tx_size =
    2198             :           tx_size_from_tx_mode(bs, cm->tx_mode, is_inter);
    2199             :       evaluate_quarter_tx = chosen_tx_size == quarter_txsize_lookup[bs];
    2200             :     }
    2201             :   }
    2202             :   if (evaluate_quarter_tx) {
    2203             :     TX_TYPE tx_start = DCT_DCT;
    2204             :     TX_TYPE tx_end = TX_TYPES;
    2205             : #if CONFIG_TXK_SEL
    2206             :     // The tx_type becomes dummy when lv_map is on. The tx_type search will be
    2207             :     // performed in av1_search_txk_type()
    2208             :     tx_end = DCT_DCT + 1;
    2209             : #endif
    2210             :     TX_TYPE tx_type;
    2211             :     for (tx_type = tx_start; tx_type < tx_end; ++tx_type) {
    2212             :       if (mbmi->ref_mv_idx > 0 && tx_type != DCT_DCT) continue;
    2213             :       const TX_SIZE tx_size = quarter_txsize_lookup[bs];
    2214             :       RD_STATS this_rd_stats;
    2215             :       int ext_tx_set =
    2216             :           get_ext_tx_set(tx_size, bs, is_inter, cm->reduced_tx_set_used);
    2217             :       if ((is_inter && ext_tx_used_inter[ext_tx_set][tx_type]) ||
    2218             :           (!is_inter && ext_tx_used_intra[ext_tx_set][tx_type])) {
    2219             :         rd =
    2220             :             txfm_yrd(cpi, x, &this_rd_stats, ref_best_rd, bs, tx_type, tx_size);
    2221             :         if (rd < best_rd) {
    2222             : #if CONFIG_TXK_SEL
    2223             :           memcpy(best_txk_type, mbmi->txk_type,
    2224             :                  sizeof(best_txk_type[0]) * num_blk);
    2225             : #endif
    2226             :           best_tx_type = tx_type;
    2227             :           best_tx_size = tx_size;
    2228             :           best_rd = rd;
    2229             :           *rd_stats = this_rd_stats;
    2230             :         }
    2231             :       }
    2232             : #if CONFIG_CB4X4 && !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
    2233             :       const int is_inter = is_inter_block(mbmi);
    2234             :       if (mbmi->sb_type < BLOCK_8X8 && is_inter) break;
    2235             : #endif  // CONFIG_CB4X4 && !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
    2236             :     }
    2237             :   }
    2238             : #endif  // CONFIG_RECT_TX_EXT
    2239             : #endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
    2240             : 
    2241           0 :   if (tx_select) {
    2242           0 :     start_tx = max_tx_size;
    2243           0 :     end_tx = (max_tx_size >= TX_32X32) ? TX_8X8 : TX_4X4;
    2244             :   } else {
    2245           0 :     const TX_SIZE chosen_tx_size =
    2246           0 :         tx_size_from_tx_mode(bs, cm->tx_mode, is_inter);
    2247           0 :     start_tx = chosen_tx_size;
    2248           0 :     end_tx = chosen_tx_size;
    2249             :   }
    2250             : 
    2251           0 :   last_rd = INT64_MAX;
    2252           0 :   for (n = start_tx; n >= end_tx; --n) {
    2253             : #if CONFIG_EXT_TX && CONFIG_RECT_TX
    2254           0 :     if (is_rect_tx(n)) break;
    2255             : #endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
    2256           0 :     TX_TYPE tx_start = DCT_DCT;
    2257           0 :     TX_TYPE tx_end = TX_TYPES;
    2258             : #if CONFIG_TXK_SEL
    2259             :     // The tx_type becomes dummy when lv_map is on. The tx_type search will be
    2260             :     // performed in av1_search_txk_type()
    2261             :     tx_end = DCT_DCT + 1;
    2262             : #endif
    2263             :     TX_TYPE tx_type;
    2264           0 :     for (tx_type = tx_start; tx_type < tx_end; ++tx_type) {
    2265             :       RD_STATS this_rd_stats;
    2266           0 :       if (skip_txfm_search(cpi, x, bs, tx_type, n)) continue;
    2267           0 :       rd = txfm_yrd(cpi, x, &this_rd_stats, ref_best_rd, bs, tx_type, n);
    2268             : #if CONFIG_PVQ
    2269             :       od_encode_rollback(&x->daala_enc, &buf);
    2270             : #endif  // CONFIG_PVQ
    2271             :       // Early termination in transform size search.
    2272           0 :       if (cpi->sf.tx_size_search_breakout &&
    2273           0 :           (rd == INT64_MAX ||
    2274           0 :            (this_rd_stats.skip == 1 && tx_type != DCT_DCT && n < start_tx) ||
    2275           0 :            (n < (int)max_tx_size && rd > last_rd)))
    2276             :         break;
    2277             : 
    2278           0 :       last_rd = rd;
    2279           0 :       if (rd < best_rd) {
    2280             : #if CONFIG_TXK_SEL
    2281             :         memcpy(best_txk_type, mbmi->txk_type,
    2282             :                sizeof(best_txk_type[0]) * num_blk);
    2283             : #endif
    2284           0 :         best_tx_type = tx_type;
    2285           0 :         best_tx_size = n;
    2286           0 :         best_rd = rd;
    2287           0 :         *rd_stats = this_rd_stats;
    2288             :       }
    2289             : #if CONFIG_CB4X4 && !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
    2290             :       const int is_inter = is_inter_block(mbmi);
    2291             :       if (mbmi->sb_type < BLOCK_8X8 && is_inter) break;
    2292             : #endif  // CONFIG_CB4X4 && !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
    2293             :     }
    2294             :   }
    2295           0 :   mbmi->tx_size = best_tx_size;
    2296           0 :   mbmi->tx_type = best_tx_type;
    2297             : #if CONFIG_TXK_SEL
    2298             :   memcpy(mbmi->txk_type, best_txk_type, sizeof(best_txk_type[0]) * num_blk);
    2299             : #endif
    2300             : 
    2301             : #if CONFIG_VAR_TX
    2302           0 :   mbmi->min_tx_size = get_min_tx_size(mbmi->tx_size);
    2303             : #endif  // CONFIG_VAR_TX
    2304             : 
    2305             : #if !CONFIG_EXT_TX
    2306             :   if (mbmi->tx_size >= TX_32X32) assert(mbmi->tx_type == DCT_DCT);
    2307             : #endif  // !CONFIG_EXT_TX
    2308             : #if CONFIG_PVQ
    2309             :   if (best_rd != INT64_MAX) {
    2310             :     txfm_yrd(cpi, x, rd_stats, ref_best_rd, bs, best_tx_type, best_tx_size);
    2311             :   }
    2312             : #endif  // CONFIG_PVQ
    2313           0 : }
    2314             : 
    2315           0 : static void super_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
    2316             :                             RD_STATS *rd_stats, BLOCK_SIZE bs,
    2317             :                             int64_t ref_best_rd) {
    2318           0 :   MACROBLOCKD *xd = &x->e_mbd;
    2319           0 :   av1_init_rd_stats(rd_stats);
    2320             : 
    2321           0 :   assert(bs == xd->mi[0]->mbmi.sb_type);
    2322             : 
    2323           0 :   if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
    2324           0 :     choose_smallest_tx_size(cpi, x, rd_stats, ref_best_rd, bs);
    2325           0 :   } else if (cpi->sf.tx_size_search_method == USE_LARGESTALL) {
    2326           0 :     choose_largest_tx_size(cpi, x, rd_stats, ref_best_rd, bs);
    2327             :   } else {
    2328           0 :     choose_tx_size_type_from_rd(cpi, x, rd_stats, ref_best_rd, bs);
    2329             :   }
    2330           0 : }
    2331             : 
    2332           0 : static int conditional_skipintra(PREDICTION_MODE mode,
    2333             :                                  PREDICTION_MODE best_intra_mode) {
    2334           0 :   if (mode == D117_PRED && best_intra_mode != V_PRED &&
    2335             :       best_intra_mode != D135_PRED)
    2336           0 :     return 1;
    2337           0 :   if (mode == D63_PRED && best_intra_mode != V_PRED &&
    2338             :       best_intra_mode != D45_PRED)
    2339           0 :     return 1;
    2340           0 :   if (mode == D207_PRED && best_intra_mode != H_PRED &&
    2341             :       best_intra_mode != D45_PRED)
    2342           0 :     return 1;
    2343           0 :   if (mode == D153_PRED && best_intra_mode != H_PRED &&
    2344             :       best_intra_mode != D135_PRED)
    2345           0 :     return 1;
    2346           0 :   return 0;
    2347             : }
    2348             : 
    2349             : // Model based RD estimation for luma intra blocks.
    2350           0 : static int64_t intra_model_yrd(const AV1_COMP *const cpi, MACROBLOCK *const x,
    2351             :                                BLOCK_SIZE bsize, int mode_cost) {
    2352           0 :   MACROBLOCKD *const xd = &x->e_mbd;
    2353           0 :   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
    2354           0 :   assert(!is_inter_block(mbmi));
    2355             :   RD_STATS this_rd_stats;
    2356             :   int row, col;
    2357             :   int64_t temp_sse, this_rd;
    2358           0 :   const TX_SIZE tx_size = tx_size_from_tx_mode(bsize, cpi->common.tx_mode, 0);
    2359           0 :   const int stepr = tx_size_high_unit[tx_size];
    2360           0 :   const int stepc = tx_size_wide_unit[tx_size];
    2361           0 :   const int max_blocks_wide = max_block_wide(xd, bsize, 0);
    2362           0 :   const int max_blocks_high = max_block_high(xd, bsize, 0);
    2363           0 :   mbmi->tx_size = tx_size;
    2364             :   // Prediction.
    2365           0 :   const int step = stepr * stepc;
    2366           0 :   int block = 0;
    2367           0 :   for (row = 0; row < max_blocks_high; row += stepr) {
    2368           0 :     for (col = 0; col < max_blocks_wide; col += stepc) {
    2369             : #if CONFIG_CFL
    2370             :       const struct macroblockd_plane *const pd = &xd->plane[0];
    2371             :       const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
    2372             : 
    2373             : #if CONFIG_EC_ADAPT
    2374             :       FRAME_CONTEXT *const ec_ctx = xd->tile_ctx;
    2375             : #else
    2376             :       FRAME_CONTEXT *const ec_ctx = cpi->common.fc;
    2377             : #endif  // CONFIG_EC_ADAPT
    2378             : 
    2379             :       av1_predict_intra_block_encoder_facade(x, ec_ctx, 0, block, col, row,
    2380             :                                              tx_size, plane_bsize);
    2381             : #else
    2382           0 :       av1_predict_intra_block_facade(xd, 0, block, col, row, tx_size);
    2383             : #endif
    2384           0 :       block += step;
    2385             :     }
    2386             :   }
    2387             :   // RD estimation.
    2388           0 :   model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &this_rd_stats.rate,
    2389             :                   &this_rd_stats.dist, &this_rd_stats.skip, &temp_sse);
    2390             : #if CONFIG_EXT_INTRA
    2391           0 :   if (av1_is_directional_mode(mbmi->mode, bsize)) {
    2392           0 :     mode_cost += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
    2393           0 :                                     MAX_ANGLE_DELTA + mbmi->angle_delta[0]);
    2394             :   }
    2395             : #endif  // CONFIG_EXT_INTRA
    2396             : #if CONFIG_FILTER_INTRA
    2397             :   if (mbmi->mode == DC_PRED) {
    2398             :     const aom_prob prob = cpi->common.fc->filter_intra_probs[0];
    2399             :     if (mbmi->filter_intra_mode_info.use_filter_intra_mode[0]) {
    2400             :       const int mode = mbmi->filter_intra_mode_info.filter_intra_mode[0];
    2401             :       mode_cost += (av1_cost_bit(prob, 1) +
    2402             :                     write_uniform_cost(FILTER_INTRA_MODES, mode));
    2403             :     } else {
    2404             :       mode_cost += av1_cost_bit(prob, 0);
    2405             :     }
    2406             :   }
    2407             : #endif  // CONFIG_FILTER_INTRA
    2408           0 :   this_rd = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate + mode_cost,
    2409             :                    this_rd_stats.dist);
    2410           0 :   return this_rd;
    2411             : }
    2412             : 
    2413             : #if CONFIG_PALETTE
    2414             : // Extends 'color_map' array from 'orig_width x orig_height' to 'new_width x
    2415             : // new_height'. Extra rows and columns are filled in by copying last valid
    2416             : // row/column.
    2417           0 : static void extend_palette_color_map(uint8_t *const color_map, int orig_width,
    2418             :                                      int orig_height, int new_width,
    2419             :                                      int new_height) {
    2420             :   int j;
    2421           0 :   assert(new_width >= orig_width);
    2422           0 :   assert(new_height >= orig_height);
    2423           0 :   if (new_width == orig_width && new_height == orig_height) return;
    2424             : 
    2425           0 :   for (j = orig_height - 1; j >= 0; --j) {
    2426           0 :     memmove(color_map + j * new_width, color_map + j * orig_width, orig_width);
    2427             :     // Copy last column to extra columns.
    2428           0 :     memset(color_map + j * new_width + orig_width,
    2429           0 :            color_map[j * new_width + orig_width - 1], new_width - orig_width);
    2430             :   }
    2431             :   // Copy last row to extra rows.
    2432           0 :   for (j = orig_height; j < new_height; ++j) {
    2433           0 :     memcpy(color_map + j * new_width, color_map + (orig_height - 1) * new_width,
    2434             :            new_width);
    2435             :   }
    2436             : }
    2437             : 
    2438             : #if CONFIG_PALETTE_DELTA_ENCODING
    2439             : // Bias toward using colors in the cache.
    2440             : // TODO(huisu): Try other schemes to improve compression.
    2441             : static void optimize_palette_colors(uint16_t *color_cache, int n_cache,
    2442             :                                     int n_colors, int stride,
    2443             :                                     float *centroids) {
    2444             :   if (n_cache <= 0) return;
    2445             :   for (int i = 0; i < n_colors * stride; i += stride) {
    2446             :     float min_diff = fabsf(centroids[i] - color_cache[0]);
    2447             :     int idx = 0;
    2448             :     for (int j = 1; j < n_cache; ++j) {
    2449             :       float this_diff = fabsf(centroids[i] - color_cache[j]);
    2450             :       if (this_diff < min_diff) {
    2451             :         min_diff = this_diff;
    2452             :         idx = j;
    2453             :       }
    2454             :     }
    2455             :     if (min_diff < 1.5) centroids[i] = color_cache[idx];
    2456             :   }
    2457             : }
    2458             : #endif  // CONFIG_PALETTE_DELTA_ENCODING
    2459             : 
    2460           0 : static int rd_pick_palette_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
    2461             :                                      BLOCK_SIZE bsize, int palette_ctx,
    2462             :                                      int dc_mode_cost, MB_MODE_INFO *best_mbmi,
    2463             :                                      uint8_t *best_palette_color_map,
    2464             :                                      int64_t *best_rd, int64_t *best_model_rd,
    2465             :                                      int *rate, int *rate_tokenonly,
    2466             :                                      int64_t *distortion, int *skippable) {
    2467           0 :   int rate_overhead = 0;
    2468           0 :   MACROBLOCKD *const xd = &x->e_mbd;
    2469           0 :   MODE_INFO *const mic = xd->mi[0];
    2470           0 :   MB_MODE_INFO *const mbmi = &mic->mbmi;
    2471           0 :   assert(!is_inter_block(mbmi));
    2472             :   int this_rate, colors, n;
    2473           0 :   const int src_stride = x->plane[0].src.stride;
    2474           0 :   const uint8_t *const src = x->plane[0].src.buf;
    2475           0 :   uint8_t *const color_map = xd->plane[0].color_index_map;
    2476             :   int block_width, block_height, rows, cols;
    2477           0 :   av1_get_block_dimensions(bsize, 0, xd, &block_width, &block_height, &rows,
    2478             :                            &cols);
    2479             : 
    2480           0 :   assert(cpi->common.allow_screen_content_tools);
    2481             : 
    2482             : #if CONFIG_HIGHBITDEPTH
    2483           0 :   if (cpi->common.use_highbitdepth)
    2484           0 :     colors = av1_count_colors_highbd(src, src_stride, rows, cols,
    2485           0 :                                      cpi->common.bit_depth);
    2486             :   else
    2487             : #endif  // CONFIG_HIGHBITDEPTH
    2488           0 :     colors = av1_count_colors(src, src_stride, rows, cols);
    2489             : #if CONFIG_FILTER_INTRA
    2490             :   mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
    2491             : #endif  // CONFIG_FILTER_INTRA
    2492             : 
    2493           0 :   if (colors > 1 && colors <= 64) {
    2494             :     int r, c, i, j, k, palette_mode_cost;
    2495           0 :     const int max_itr = 50;
    2496             :     uint8_t color_order[PALETTE_MAX_SIZE];
    2497           0 :     float *const data = x->palette_buffer->kmeans_data_buf;
    2498             :     float centroids[PALETTE_MAX_SIZE];
    2499             :     float lb, ub, val;
    2500             :     RD_STATS tokenonly_rd_stats;
    2501             :     int64_t this_rd, this_model_rd;
    2502           0 :     PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
    2503             : #if CONFIG_HIGHBITDEPTH
    2504           0 :     uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
    2505           0 :     if (cpi->common.use_highbitdepth)
    2506           0 :       lb = ub = src16[0];
    2507             :     else
    2508             : #endif  // CONFIG_HIGHBITDEPTH
    2509           0 :       lb = ub = src[0];
    2510             : 
    2511             : #if CONFIG_HIGHBITDEPTH
    2512           0 :     if (cpi->common.use_highbitdepth) {
    2513           0 :       for (r = 0; r < rows; ++r) {
    2514           0 :         for (c = 0; c < cols; ++c) {
    2515           0 :           val = src16[r * src_stride + c];
    2516           0 :           data[r * cols + c] = val;
    2517           0 :           if (val < lb)
    2518           0 :             lb = val;
    2519           0 :           else if (val > ub)
    2520           0 :             ub = val;
    2521             :         }
    2522             :       }
    2523             :     } else {
    2524             : #endif  // CONFIG_HIGHBITDEPTH
    2525           0 :       for (r = 0; r < rows; ++r) {
    2526           0 :         for (c = 0; c < cols; ++c) {
    2527           0 :           val = src[r * src_stride + c];
    2528           0 :           data[r * cols + c] = val;
    2529           0 :           if (val < lb)
    2530           0 :             lb = val;
    2531           0 :           else if (val > ub)
    2532           0 :             ub = val;
    2533             :         }
    2534             :       }
    2535             : #if CONFIG_HIGHBITDEPTH
    2536             :     }
    2537             : #endif  // CONFIG_HIGHBITDEPTH
    2538             : 
    2539           0 :     mbmi->mode = DC_PRED;
    2540             : #if CONFIG_FILTER_INTRA
    2541             :     mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
    2542             : #endif  // CONFIG_FILTER_INTRA
    2543             : 
    2544           0 :     if (rows * cols > PALETTE_MAX_BLOCK_SIZE) return 0;
    2545             : 
    2546             : #if CONFIG_PALETTE_DELTA_ENCODING
    2547             :     const MODE_INFO *above_mi = xd->above_mi;
    2548             :     const MODE_INFO *left_mi = xd->left_mi;
    2549             :     uint16_t color_cache[2 * PALETTE_MAX_SIZE];
    2550             :     const int n_cache =
    2551             :         av1_get_palette_cache(above_mi, left_mi, 0, color_cache);
    2552             : #endif  // CONFIG_PALETTE_DELTA_ENCODING
    2553             : 
    2554           0 :     for (n = colors > PALETTE_MAX_SIZE ? PALETTE_MAX_SIZE : colors; n >= 2;
    2555           0 :          --n) {
    2556           0 :       if (colors == PALETTE_MIN_SIZE) {
    2557             :         // Special case: These colors automatically become the centroids.
    2558           0 :         assert(colors == n);
    2559           0 :         assert(colors == 2);
    2560           0 :         centroids[0] = lb;
    2561           0 :         centroids[1] = ub;
    2562           0 :         k = 2;
    2563             :       } else {
    2564           0 :         for (i = 0; i < n; ++i) {
    2565           0 :           centroids[i] = lb + (2 * i + 1) * (ub - lb) / n / 2;
    2566             :         }
    2567           0 :         av1_k_means(data, centroids, color_map, rows * cols, n, 1, max_itr);
    2568             : #if CONFIG_PALETTE_DELTA_ENCODING
    2569             :         optimize_palette_colors(color_cache, n_cache, n, 1, centroids);
    2570             : #endif  // CONFIG_PALETTE_DELTA_ENCODING
    2571           0 :         k = av1_remove_duplicates(centroids, n);
    2572           0 :         if (k < PALETTE_MIN_SIZE) {
    2573             :           // Too few unique colors to create a palette. And DC_PRED will work
    2574             :           // well for that case anyway. So skip.
    2575           0 :           continue;
    2576             :         }
    2577             :       }
    2578             : 
    2579             : #if CONFIG_HIGHBITDEPTH
    2580           0 :       if (cpi->common.use_highbitdepth)
    2581           0 :         for (i = 0; i < k; ++i)
    2582           0 :           pmi->palette_colors[i] =
    2583           0 :               clip_pixel_highbd((int)centroids[i], cpi->common.bit_depth);
    2584             :       else
    2585             : #endif  // CONFIG_HIGHBITDEPTH
    2586           0 :         for (i = 0; i < k; ++i)
    2587           0 :           pmi->palette_colors[i] = clip_pixel((int)centroids[i]);
    2588           0 :       pmi->palette_size[0] = k;
    2589             : 
    2590           0 :       av1_calc_indices(data, centroids, color_map, rows * cols, k, 1);
    2591           0 :       extend_palette_color_map(color_map, cols, rows, block_width,
    2592             :                                block_height);
    2593           0 :       palette_mode_cost =
    2594           0 :           dc_mode_cost +
    2595           0 :           cpi->palette_y_size_cost[bsize - BLOCK_8X8][k - PALETTE_MIN_SIZE] +
    2596           0 :           write_uniform_cost(k, color_map[0]) +
    2597           0 :           av1_cost_bit(
    2598             :               av1_default_palette_y_mode_prob[bsize - BLOCK_8X8][palette_ctx],
    2599             :               1);
    2600           0 :       palette_mode_cost += av1_palette_color_cost_y(pmi,
    2601             : #if CONFIG_PALETTE_DELTA_ENCODING
    2602             :                                                     color_cache, n_cache,
    2603             : #endif  // CONFIG_PALETTE_DELTA_ENCODING
    2604           0 :                                                     cpi->common.bit_depth);
    2605           0 :       for (i = 0; i < rows; ++i) {
    2606           0 :         for (j = (i == 0 ? 1 : 0); j < cols; ++j) {
    2607             :           int color_idx;
    2608           0 :           const int color_ctx = av1_get_palette_color_index_context(
    2609             :               color_map, block_width, i, j, k, color_order, &color_idx);
    2610           0 :           assert(color_idx >= 0 && color_idx < k);
    2611           0 :           palette_mode_cost += cpi->palette_y_color_cost[k - PALETTE_MIN_SIZE]
    2612           0 :                                                         [color_ctx][color_idx];
    2613             :         }
    2614             :       }
    2615           0 :       this_model_rd = intra_model_yrd(cpi, x, bsize, palette_mode_cost);
    2616           0 :       if (*best_model_rd != INT64_MAX &&
    2617           0 :           this_model_rd > *best_model_rd + (*best_model_rd >> 1))
    2618           0 :         continue;
    2619           0 :       if (this_model_rd < *best_model_rd) *best_model_rd = this_model_rd;
    2620           0 :       super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd);
    2621           0 :       if (tokenonly_rd_stats.rate == INT_MAX) continue;
    2622           0 :       this_rate = tokenonly_rd_stats.rate + palette_mode_cost;
    2623           0 :       this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist);
    2624           0 :       if (!xd->lossless[mbmi->segment_id] && mbmi->sb_type >= BLOCK_8X8) {
    2625           0 :         tokenonly_rd_stats.rate -= tx_size_cost(cpi, x, bsize, mbmi->tx_size);
    2626             :       }
    2627           0 :       if (this_rd < *best_rd) {
    2628           0 :         *best_rd = this_rd;
    2629           0 :         memcpy(best_palette_color_map, color_map,
    2630           0 :                block_width * block_height * sizeof(color_map[0]));
    2631           0 :         *best_mbmi = *mbmi;
    2632           0 :         rate_overhead = this_rate - tokenonly_rd_stats.rate;
    2633           0 :         if (rate) *rate = this_rate;
    2634           0 :         if (rate_tokenonly) *rate_tokenonly = tokenonly_rd_stats.rate;
    2635           0 :         if (distortion) *distortion = tokenonly_rd_stats.dist;
    2636           0 :         if (skippable) *skippable = tokenonly_rd_stats.skip;
    2637             :       }
    2638             :     }
    2639             :   }
    2640             : 
    2641           0 :   if (best_mbmi->palette_mode_info.palette_size[0] > 0) {
    2642           0 :     memcpy(color_map, best_palette_color_map,
    2643           0 :            rows * cols * sizeof(best_palette_color_map[0]));
    2644             :   }
    2645           0 :   *mbmi = *best_mbmi;
    2646           0 :   return rate_overhead;
    2647             : }
    2648             : #endif  // CONFIG_PALETTE
    2649             : 
    2650           0 : static int64_t rd_pick_intra_sub_8x8_y_subblock_mode(
    2651             :     const AV1_COMP *const cpi, MACROBLOCK *x, int row, int col,
    2652             :     PREDICTION_MODE *best_mode, const int *bmode_costs, ENTROPY_CONTEXT *a,
    2653             :     ENTROPY_CONTEXT *l, int *bestrate, int *bestratey, int64_t *bestdistortion,
    2654             :     BLOCK_SIZE bsize, TX_SIZE tx_size, int *y_skip, int64_t rd_thresh) {
    2655           0 :   const AV1_COMMON *const cm = &cpi->common;
    2656             :   PREDICTION_MODE mode;
    2657           0 :   MACROBLOCKD *const xd = &x->e_mbd;
    2658           0 :   assert(!is_inter_block(&xd->mi[0]->mbmi));
    2659           0 :   int64_t best_rd = rd_thresh;
    2660           0 :   struct macroblock_plane *p = &x->plane[0];
    2661           0 :   struct macroblockd_plane *pd = &xd->plane[0];
    2662           0 :   const int src_stride = p->src.stride;
    2663           0 :   const int dst_stride = pd->dst.stride;
    2664           0 :   const uint8_t *src_init = &p->src.buf[row * 4 * src_stride + col * 4];
    2665           0 :   uint8_t *dst_init = &pd->dst.buf[row * 4 * dst_stride + col * 4];
    2666             : #if CONFIG_CHROMA_2X2
    2667             :   // TODO(jingning): This is a temporal change. The whole function should be
    2668             :   // out when cb4x4 is enabled.
    2669             :   ENTROPY_CONTEXT ta[4], tempa[4];
    2670             :   ENTROPY_CONTEXT tl[4], templ[4];
    2671             : #else
    2672             :   ENTROPY_CONTEXT ta[2], tempa[2];
    2673             :   ENTROPY_CONTEXT tl[2], templ[2];
    2674             : #endif  // CONFIG_CHROMA_2X2
    2675             : 
    2676           0 :   const int pred_width_in_4x4_blocks = num_4x4_blocks_wide_lookup[bsize];
    2677           0 :   const int pred_height_in_4x4_blocks = num_4x4_blocks_high_lookup[bsize];
    2678           0 :   const int tx_width_unit = tx_size_wide_unit[tx_size];
    2679           0 :   const int tx_height_unit = tx_size_high_unit[tx_size];
    2680           0 :   const int pred_block_width = block_size_wide[bsize];
    2681           0 :   const int pred_block_height = block_size_high[bsize];
    2682           0 :   const int tx_width = tx_size_wide[tx_size];
    2683           0 :   const int tx_height = tx_size_high[tx_size];
    2684           0 :   const int pred_width_in_transform_blocks = pred_block_width / tx_width;
    2685           0 :   const int pred_height_in_transform_blocks = pred_block_height / tx_height;
    2686             :   int idx, idy;
    2687           0 :   int best_can_skip = 0;
    2688             :   uint8_t best_dst[8 * 8];
    2689             : #if CONFIG_HIGHBITDEPTH
    2690             :   uint16_t best_dst16[8 * 8];
    2691             : #endif  // CONFIG_HIGHBITDEPTH
    2692           0 :   const int is_lossless = xd->lossless[xd->mi[0]->mbmi.segment_id];
    2693             : #if CONFIG_EXT_TX && CONFIG_RECT_TX
    2694           0 :   const int sub_bsize = bsize;
    2695             : #else
    2696             :   const int sub_bsize = BLOCK_4X4;
    2697             : #endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
    2698             : 
    2699             : #if CONFIG_PVQ
    2700             :   od_rollback_buffer pre_buf, post_buf;
    2701             :   od_encode_checkpoint(&x->daala_enc, &pre_buf);
    2702             :   od_encode_checkpoint(&x->daala_enc, &post_buf);
    2703             : #endif  // CONFIG_PVQ
    2704             : 
    2705           0 :   assert(bsize < BLOCK_8X8);
    2706           0 :   assert(tx_width < 8 || tx_height < 8);
    2707             : #if CONFIG_EXT_TX && CONFIG_RECT_TX
    2708           0 :   if (is_lossless)
    2709           0 :     assert(tx_width == 4 && tx_height == 4);
    2710             :   else
    2711           0 :     assert(tx_width == pred_block_width && tx_height == pred_block_height);
    2712             : #else
    2713             :   assert(tx_width == 4 && tx_height == 4);
    2714             : #endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
    2715             : 
    2716           0 :   memcpy(ta, a, pred_width_in_transform_blocks * sizeof(a[0]));
    2717           0 :   memcpy(tl, l, pred_height_in_transform_blocks * sizeof(l[0]));
    2718             : 
    2719           0 :   xd->mi[0]->mbmi.tx_size = tx_size;
    2720             : 
    2721             : #if CONFIG_PALETTE
    2722           0 :   xd->mi[0]->mbmi.palette_mode_info.palette_size[0] = 0;
    2723             : #endif  // CONFIG_PALETTE
    2724             : 
    2725             : #if CONFIG_HIGHBITDEPTH
    2726           0 :   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    2727             : #if CONFIG_PVQ
    2728             :     od_encode_checkpoint(&x->daala_enc, &pre_buf);
    2729             : #endif
    2730           0 :     for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
    2731             :       int64_t this_rd;
    2732           0 :       int ratey = 0;
    2733           0 :       int64_t distortion = 0;
    2734           0 :       int rate = bmode_costs[mode];
    2735           0 :       int can_skip = 1;
    2736             : 
    2737           0 :       if (!(cpi->sf.intra_y_mode_mask[txsize_sqr_up_map[tx_size]] &
    2738             :             (1 << mode)))
    2739           0 :         continue;
    2740             : 
    2741             :       // Only do the oblique modes if the best so far is
    2742             :       // one of the neighboring directional modes
    2743           0 :       if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
    2744           0 :         if (conditional_skipintra(mode, *best_mode)) continue;
    2745             :       }
    2746             : 
    2747           0 :       memcpy(tempa, ta, pred_width_in_transform_blocks * sizeof(ta[0]));
    2748           0 :       memcpy(templ, tl, pred_height_in_transform_blocks * sizeof(tl[0]));
    2749             : 
    2750           0 :       for (idy = 0; idy < pred_height_in_transform_blocks; ++idy) {
    2751           0 :         for (idx = 0; idx < pred_width_in_transform_blocks; ++idx) {
    2752           0 :           const int block_raster_idx = (row + idy) * 2 + (col + idx);
    2753           0 :           const int block =
    2754           0 :               av1_raster_order_to_block_index(tx_size, block_raster_idx);
    2755           0 :           const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
    2756           0 :           uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
    2757             : #if !CONFIG_PVQ
    2758           0 :           int16_t *const src_diff = av1_raster_block_offset_int16(
    2759           0 :               BLOCK_8X8, block_raster_idx, p->src_diff);
    2760             : #endif
    2761             :           int skip;
    2762           0 :           assert(block < 4);
    2763           0 :           assert(IMPLIES(tx_size == TX_4X8 || tx_size == TX_8X4,
    2764             :                          idx == 0 && idy == 0));
    2765           0 :           assert(IMPLIES(tx_size == TX_4X8 || tx_size == TX_8X4,
    2766             :                          block == 0 || block == 2));
    2767           0 :           xd->mi[0]->bmi[block_raster_idx].as_mode = mode;
    2768           0 :           av1_predict_intra_block(
    2769           0 :               xd, pd->width, pd->height, txsize_to_bsize[tx_size], mode, dst,
    2770             :               dst_stride, dst, dst_stride, col + idx, row + idy, 0);
    2771             : #if !CONFIG_PVQ
    2772           0 :           aom_highbd_subtract_block(tx_height, tx_width, src_diff, 8, src,
    2773             :                                     src_stride, dst, dst_stride, xd->bd);
    2774             : #endif
    2775           0 :           if (is_lossless) {
    2776           0 :             TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, tx_size);
    2777           0 :             const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, 0);
    2778           0 :             const int coeff_ctx =
    2779           0 :                 combine_entropy_contexts(tempa[idx], templ[idy]);
    2780             : #if !CONFIG_PVQ
    2781           0 :             av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8,
    2782             :                             tx_size, coeff_ctx, AV1_XFORM_QUANT_FP);
    2783           0 :             ratey += av1_cost_coeffs(cpi, x, 0, block, tx_size, scan_order,
    2784             :                                      tempa + idx, templ + idy,
    2785             :                                      cpi->sf.use_fast_coef_costing);
    2786           0 :             skip = (p->eobs[block] == 0);
    2787           0 :             can_skip &= skip;
    2788           0 :             tempa[idx] = !skip;
    2789           0 :             templ[idy] = !skip;
    2790             : #if CONFIG_EXT_TX
    2791           0 :             if (tx_size == TX_8X4) {
    2792           0 :               tempa[idx + 1] = tempa[idx];
    2793           0 :             } else if (tx_size == TX_4X8) {
    2794           0 :               templ[idy + 1] = templ[idy];
    2795             :             }
    2796             : #endif  // CONFIG_EXT_TX
    2797             : #else
    2798             :             (void)scan_order;
    2799             : 
    2800             :             av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8,
    2801             :                             tx_size, coeff_ctx, AV1_XFORM_QUANT_B);
    2802             : 
    2803             :             ratey += x->rate;
    2804             :             skip = x->pvq_skip[0];
    2805             :             tempa[idx] = !skip;
    2806             :             templ[idy] = !skip;
    2807             :             can_skip &= skip;
    2808             : #endif
    2809           0 :             if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
    2810           0 :               goto next_highbd;
    2811             : #if CONFIG_PVQ
    2812             :             if (!skip)
    2813             : #endif
    2814           0 :               av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block),
    2815             :                                           DCT_DCT, tx_size, dst, dst_stride,
    2816           0 :                                           p->eobs[block]);
    2817             :           } else {
    2818             :             int64_t dist;
    2819             :             unsigned int tmp;
    2820           0 :             TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, tx_size);
    2821           0 :             const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, 0);
    2822           0 :             const int coeff_ctx =
    2823           0 :                 combine_entropy_contexts(tempa[idx], templ[idy]);
    2824             : #if !CONFIG_PVQ
    2825           0 :             av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8,
    2826             :                             tx_size, coeff_ctx, AV1_XFORM_QUANT_FP);
    2827           0 :             av1_optimize_b(cm, x, 0, block, BLOCK_8X8, tx_size, tempa + idx,
    2828             :                            templ + idy);
    2829           0 :             ratey += av1_cost_coeffs(cpi, x, 0, block, tx_size, scan_order,
    2830             :                                      tempa + idx, templ + idy,
    2831             :                                      cpi->sf.use_fast_coef_costing);
    2832           0 :             skip = (p->eobs[block] == 0);
    2833           0 :             can_skip &= skip;
    2834           0 :             tempa[idx] = !skip;
    2835           0 :             templ[idy] = !skip;
    2836             : #if CONFIG_EXT_TX
    2837           0 :             if (tx_size == TX_8X4) {
    2838           0 :               tempa[idx + 1] = tempa[idx];
    2839           0 :             } else if (tx_size == TX_4X8) {
    2840           0 :               templ[idy + 1] = templ[idy];
    2841             :             }
    2842             : #endif  // CONFIG_EXT_TX
    2843             : #else
    2844             :             (void)scan_order;
    2845             : 
    2846             :             av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8,
    2847             :                             tx_size, coeff_ctx, AV1_XFORM_QUANT_FP);
    2848             :             ratey += x->rate;
    2849             :             skip = x->pvq_skip[0];
    2850             :             tempa[idx] = !skip;
    2851             :             templ[idy] = !skip;
    2852             :             can_skip &= skip;
    2853             : #endif
    2854             : #if CONFIG_PVQ
    2855             :             if (!skip)
    2856             : #endif
    2857           0 :               av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block),
    2858             :                                           tx_type, tx_size, dst, dst_stride,
    2859           0 :                                           p->eobs[block]);
    2860           0 :             cpi->fn_ptr[sub_bsize].vf(src, src_stride, dst, dst_stride, &tmp);
    2861           0 :             dist = (int64_t)tmp << 4;
    2862           0 :             distortion += dist;
    2863           0 :             if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
    2864           0 :               goto next_highbd;
    2865             :           }
    2866             :         }
    2867             :       }
    2868             : 
    2869           0 :       rate += ratey;
    2870           0 :       this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
    2871             : 
    2872           0 :       if (this_rd < best_rd) {
    2873           0 :         *bestrate = rate;
    2874           0 :         *bestratey = ratey;
    2875           0 :         *bestdistortion = distortion;
    2876           0 :         best_rd = this_rd;
    2877           0 :         best_can_skip = can_skip;
    2878           0 :         *best_mode = mode;
    2879           0 :         memcpy(a, tempa, pred_width_in_transform_blocks * sizeof(tempa[0]));
    2880           0 :         memcpy(l, templ, pred_height_in_transform_blocks * sizeof(templ[0]));
    2881             : #if CONFIG_PVQ
    2882             :         od_encode_checkpoint(&x->daala_enc, &post_buf);
    2883             : #endif
    2884           0 :         for (idy = 0; idy < pred_height_in_transform_blocks * 4; ++idy) {
    2885           0 :           memcpy(best_dst16 + idy * 8,
    2886           0 :                  CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
    2887             :                  pred_width_in_transform_blocks * 4 * sizeof(uint16_t));
    2888             :         }
    2889             :       }
    2890             :     next_highbd : {}
    2891             : #if CONFIG_PVQ
    2892             :       od_encode_rollback(&x->daala_enc, &pre_buf);
    2893             : #endif
    2894             :     }
    2895             : 
    2896           0 :     if (best_rd >= rd_thresh) return best_rd;
    2897             : 
    2898             : #if CONFIG_PVQ
    2899             :     od_encode_rollback(&x->daala_enc, &post_buf);
    2900             : #endif
    2901             : 
    2902           0 :     if (y_skip) *y_skip &= best_can_skip;
    2903             : 
    2904           0 :     for (idy = 0; idy < pred_height_in_transform_blocks * 4; ++idy) {
    2905           0 :       memcpy(CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
    2906             :              best_dst16 + idy * 8,
    2907             :              pred_width_in_transform_blocks * 4 * sizeof(uint16_t));
    2908             :     }
    2909             : 
    2910           0 :     return best_rd;
    2911             :   }
    2912             : #endif  // CONFIG_HIGHBITDEPTH
    2913             : 
    2914             : #if CONFIG_PVQ
    2915             :   od_encode_checkpoint(&x->daala_enc, &pre_buf);
    2916             : #endif  // CONFIG_PVQ
    2917             : 
    2918           0 :   for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
    2919             :     int64_t this_rd;
    2920           0 :     int ratey = 0;
    2921           0 :     int64_t distortion = 0;
    2922           0 :     int rate = bmode_costs[mode];
    2923           0 :     int can_skip = 1;
    2924             : 
    2925           0 :     if (!(cpi->sf.intra_y_mode_mask[txsize_sqr_up_map[tx_size]] &
    2926             :           (1 << mode))) {
    2927           0 :       continue;
    2928             :     }
    2929             : 
    2930             :     // Only do the oblique modes if the best so far is
    2931             :     // one of the neighboring directional modes
    2932           0 :     if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
    2933           0 :       if (conditional_skipintra(mode, *best_mode)) continue;
    2934             :     }
    2935             : 
    2936           0 :     memcpy(tempa, ta, pred_width_in_transform_blocks * sizeof(ta[0]));
    2937           0 :     memcpy(templ, tl, pred_height_in_transform_blocks * sizeof(tl[0]));
    2938             : 
    2939           0 :     for (idy = 0; idy < pred_height_in_4x4_blocks; idy += tx_height_unit) {
    2940           0 :       for (idx = 0; idx < pred_width_in_4x4_blocks; idx += tx_width_unit) {
    2941           0 :         const int block_raster_idx = (row + idy) * 2 + (col + idx);
    2942           0 :         int block = av1_raster_order_to_block_index(tx_size, block_raster_idx);
    2943           0 :         const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
    2944           0 :         uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
    2945             : #if !CONFIG_PVQ
    2946           0 :         int16_t *const src_diff = av1_raster_block_offset_int16(
    2947           0 :             BLOCK_8X8, block_raster_idx, p->src_diff);
    2948             : #endif  // !CONFIG_PVQ
    2949             :         int skip;
    2950           0 :         assert(block < 4);
    2951           0 :         assert(IMPLIES(tx_size == TX_4X8 || tx_size == TX_8X4,
    2952             :                        idx == 0 && idy == 0));
    2953           0 :         assert(IMPLIES(tx_size == TX_4X8 || tx_size == TX_8X4,
    2954             :                        block == 0 || block == 2));
    2955           0 :         xd->mi[0]->bmi[block_raster_idx].as_mode = mode;
    2956           0 :         av1_predict_intra_block(xd, pd->width, pd->height,
    2957           0 :                                 txsize_to_bsize[tx_size], mode, dst, dst_stride,
    2958             :                                 dst, dst_stride,
    2959             : #if CONFIG_CB4X4
    2960           0 :                                 2 * (col + idx), 2 * (row + idy),
    2961             : #else
    2962             :                                 col + idx, row + idy,
    2963             : #endif  // CONFIG_CB4X4
    2964             :                                 0);
    2965             : #if !CONFIG_PVQ
    2966           0 :         aom_subtract_block(tx_height, tx_width, src_diff, 8, src, src_stride,
    2967             :                            dst, dst_stride);
    2968             : #endif  // !CONFIG_PVQ
    2969             : 
    2970           0 :         TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, tx_size);
    2971           0 :         const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, 0);
    2972           0 :         const int coeff_ctx = combine_entropy_contexts(tempa[idx], templ[idy]);
    2973             : #if CONFIG_CB4X4
    2974           0 :         block = 4 * block;
    2975             : #endif  // CONFIG_CB4X4
    2976             : #if !CONFIG_PVQ
    2977           0 :         const AV1_XFORM_QUANT xform_quant =
    2978           0 :             is_lossless ? AV1_XFORM_QUANT_B : AV1_XFORM_QUANT_FP;
    2979           0 :         av1_xform_quant(cm, x, 0, block,
    2980             : #if CONFIG_CB4X4
    2981           0 :                         2 * (row + idy), 2 * (col + idx),
    2982             : #else
    2983             :                         row + idy, col + idx,
    2984             : #endif  // CONFIG_CB4X4
    2985             :                         BLOCK_8X8, tx_size, coeff_ctx, xform_quant);
    2986             : 
    2987           0 :         av1_optimize_b(cm, x, 0, block, BLOCK_8X8, tx_size, tempa + idx,
    2988             :                        templ + idy);
    2989             : 
    2990           0 :         ratey +=
    2991           0 :             av1_cost_coeffs(cpi, x, 0, block, tx_size, scan_order, tempa + idx,
    2992             :                             templ + idy, cpi->sf.use_fast_coef_costing);
    2993           0 :         skip = (p->eobs[block] == 0);
    2994           0 :         can_skip &= skip;
    2995           0 :         tempa[idx] = !skip;
    2996           0 :         templ[idy] = !skip;
    2997             : #if CONFIG_EXT_TX
    2998           0 :         if (tx_size == TX_8X4) {
    2999           0 :           tempa[idx + 1] = tempa[idx];
    3000           0 :         } else if (tx_size == TX_4X8) {
    3001           0 :           templ[idy + 1] = templ[idy];
    3002             :         }
    3003             : #endif  // CONFIG_EXT_TX
    3004             : #else
    3005             :         (void)scan_order;
    3006             : 
    3007             :         av1_xform_quant(cm, x, 0, block,
    3008             : #if CONFIG_CB4X4
    3009             :                         2 * (row + idy), 2 * (col + idx),
    3010             : #else
    3011             :                         row + idy, col + idx,
    3012             : #endif  // CONFIG_CB4X4
    3013             :                         BLOCK_8X8, tx_size, coeff_ctx, AV1_XFORM_QUANT_FP);
    3014             : 
    3015             :         ratey += x->rate;
    3016             :         skip = x->pvq_skip[0];
    3017             :         tempa[idx] = !skip;
    3018             :         templ[idy] = !skip;
    3019             :         can_skip &= skip;
    3020             : #endif  // !CONFIG_PVQ
    3021             : 
    3022           0 :         if (!is_lossless) {  // To use the pixel domain distortion, we need to
    3023             :                              // calculate inverse txfm *before* calculating RD
    3024             :                              // cost. Compared to calculating the distortion in
    3025             :                              // the frequency domain, the overhead of encoding
    3026             :                              // effort is low.
    3027             : #if CONFIG_PVQ
    3028             :           if (!skip)
    3029             : #endif  // CONFIG_PVQ
    3030           0 :             av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block),
    3031             :                                         tx_type, tx_size, dst, dst_stride,
    3032           0 :                                         p->eobs[block]);
    3033             :           unsigned int tmp;
    3034           0 :           cpi->fn_ptr[sub_bsize].vf(src, src_stride, dst, dst_stride, &tmp);
    3035           0 :           const int64_t dist = (int64_t)tmp << 4;
    3036           0 :           distortion += dist;
    3037             :         }
    3038             : 
    3039           0 :         if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
    3040           0 :           goto next;
    3041             : 
    3042           0 :         if (is_lossless) {  // Calculate inverse txfm *after* RD cost.
    3043             : #if CONFIG_PVQ
    3044             :           if (!skip)
    3045             : #endif  // CONFIG_PVQ
    3046           0 :             av1_inverse_transform_block(xd, BLOCK_OFFSET(pd->dqcoeff, block),
    3047             :                                         DCT_DCT, tx_size, dst, dst_stride,
    3048           0 :                                         p->eobs[block]);
    3049             :         }
    3050             :       }
    3051             :     }
    3052             : 
    3053           0 :     rate += ratey;
    3054           0 :     this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
    3055             : 
    3056           0 :     if (this_rd < best_rd) {
    3057           0 :       *bestrate = rate;
    3058           0 :       *bestratey = ratey;
    3059           0 :       *bestdistortion = distortion;
    3060           0 :       best_rd = this_rd;
    3061           0 :       best_can_skip = can_skip;
    3062           0 :       *best_mode = mode;
    3063           0 :       memcpy(a, tempa, pred_width_in_transform_blocks * sizeof(tempa[0]));
    3064           0 :       memcpy(l, templ, pred_height_in_transform_blocks * sizeof(templ[0]));
    3065             : #if CONFIG_PVQ
    3066             :       od_encode_checkpoint(&x->daala_enc, &post_buf);
    3067             : #endif  // CONFIG_PVQ
    3068           0 :       for (idy = 0; idy < pred_height_in_transform_blocks * 4; ++idy)
    3069           0 :         memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
    3070           0 :                pred_width_in_transform_blocks * 4);
    3071             :     }
    3072             :   next : {}
    3073             : #if CONFIG_PVQ
    3074             :     od_encode_rollback(&x->daala_enc, &pre_buf);
    3075             : #endif  // CONFIG_PVQ
    3076             :   }     // mode decision loop
    3077             : 
    3078           0 :   if (best_rd >= rd_thresh) return best_rd;
    3079             : 
    3080             : #if CONFIG_PVQ
    3081             :   od_encode_rollback(&x->daala_enc, &post_buf);
    3082             : #endif  // CONFIG_PVQ
    3083             : 
    3084           0 :   if (y_skip) *y_skip &= best_can_skip;
    3085             : 
    3086           0 :   for (idy = 0; idy < pred_height_in_transform_blocks * 4; ++idy)
    3087           0 :     memcpy(dst_init + idy * dst_stride, best_dst + idy * 8,
    3088           0 :            pred_width_in_transform_blocks * 4);
    3089             : 
    3090           0 :   return best_rd;
    3091             : }
    3092             : 
    3093           0 : static int64_t rd_pick_intra_sub_8x8_y_mode(const AV1_COMP *const cpi,
    3094             :                                             MACROBLOCK *mb, int *rate,
    3095             :                                             int *rate_y, int64_t *distortion,
    3096             :                                             int *y_skip, int64_t best_rd) {
    3097           0 :   const MACROBLOCKD *const xd = &mb->e_mbd;
    3098           0 :   MODE_INFO *const mic = xd->mi[0];
    3099           0 :   const MODE_INFO *above_mi = xd->above_mi;
    3100           0 :   const MODE_INFO *left_mi = xd->left_mi;
    3101           0 :   MB_MODE_INFO *const mbmi = &mic->mbmi;
    3102           0 :   assert(!is_inter_block(mbmi));
    3103           0 :   const BLOCK_SIZE bsize = mbmi->sb_type;
    3104           0 :   const int pred_width_in_4x4_blocks = num_4x4_blocks_wide_lookup[bsize];
    3105           0 :   const int pred_height_in_4x4_blocks = num_4x4_blocks_high_lookup[bsize];
    3106             :   int idx, idy;
    3107           0 :   int cost = 0;
    3108           0 :   int64_t total_distortion = 0;
    3109           0 :   int tot_rate_y = 0;
    3110           0 :   int64_t total_rd = 0;
    3111           0 :   const int *bmode_costs = cpi->mbmode_cost[0];
    3112           0 :   const int is_lossless = xd->lossless[mbmi->segment_id];
    3113             : #if CONFIG_EXT_TX && CONFIG_RECT_TX
    3114           0 :   const TX_SIZE tx_size = is_lossless ? TX_4X4 : max_txsize_rect_lookup[bsize];
    3115             : #else
    3116             :   const TX_SIZE tx_size = TX_4X4;
    3117             : #endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
    3118             : 
    3119             : #if CONFIG_EXT_INTRA
    3120             : #if CONFIG_INTRA_INTERP
    3121             :   mbmi->intra_filter = INTRA_FILTER_LINEAR;
    3122             : #endif  // CONFIG_INTRA_INTERP
    3123             : #endif  // CONFIG_EXT_INTRA
    3124             : #if CONFIG_FILTER_INTRA
    3125             :   mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
    3126             : #endif  // CONFIG_FILTER_INTRA
    3127             : 
    3128             :   // TODO(any): Add search of the tx_type to improve rd performance at the
    3129             :   // expense of speed.
    3130           0 :   mbmi->tx_type = DCT_DCT;
    3131           0 :   mbmi->tx_size = tx_size;
    3132             : 
    3133           0 :   if (y_skip) *y_skip = 1;
    3134             : 
    3135             :   // Pick modes for each prediction sub-block (of size 4x4, 4x8, or 8x4) in this
    3136             :   // 8x8 coding block.
    3137           0 :   for (idy = 0; idy < 2; idy += pred_height_in_4x4_blocks) {
    3138           0 :     for (idx = 0; idx < 2; idx += pred_width_in_4x4_blocks) {
    3139           0 :       PREDICTION_MODE best_mode = DC_PRED;
    3140           0 :       int r = INT_MAX, ry = INT_MAX;
    3141           0 :       int64_t d = INT64_MAX, this_rd = INT64_MAX;
    3142             :       int j;
    3143           0 :       const int pred_block_idx = idy * 2 + idx;
    3144           0 :       if (cpi->common.frame_type == KEY_FRAME) {
    3145           0 :         const PREDICTION_MODE A =
    3146             :             av1_above_block_mode(mic, above_mi, pred_block_idx);
    3147           0 :         const PREDICTION_MODE L =
    3148             :             av1_left_block_mode(mic, left_mi, pred_block_idx);
    3149             : 
    3150           0 :         bmode_costs = cpi->y_mode_costs[A][L];
    3151             :       }
    3152           0 :       this_rd = rd_pick_intra_sub_8x8_y_subblock_mode(
    3153             :           cpi, mb, idy, idx, &best_mode, bmode_costs,
    3154           0 :           xd->plane[0].above_context + idx, xd->plane[0].left_context + idy, &r,
    3155             :           &ry, &d, bsize, tx_size, y_skip, best_rd - total_rd);
    3156             : #if !CONFIG_DAALA_DIST
    3157           0 :       if (this_rd >= best_rd - total_rd) return INT64_MAX;
    3158             : #endif  // !CONFIG_DAALA_DIST
    3159           0 :       total_rd += this_rd;
    3160           0 :       cost += r;
    3161           0 :       total_distortion += d;
    3162           0 :       tot_rate_y += ry;
    3163             : 
    3164           0 :       mic->bmi[pred_block_idx].as_mode = best_mode;
    3165           0 :       for (j = 1; j < pred_height_in_4x4_blocks; ++j)
    3166           0 :         mic->bmi[pred_block_idx + j * 2].as_mode = best_mode;
    3167           0 :       for (j = 1; j < pred_width_in_4x4_blocks; ++j)
    3168           0 :         mic->bmi[pred_block_idx + j].as_mode = best_mode;
    3169             : 
    3170           0 :       if (total_rd >= best_rd) return INT64_MAX;
    3171             :     }
    3172             :   }
    3173           0 :   mbmi->mode = mic->bmi[3].as_mode;
    3174             : 
    3175             : #if CONFIG_DAALA_DIST
    3176             :   {
    3177             :     const struct macroblock_plane *p = &mb->plane[0];
    3178             :     const struct macroblockd_plane *pd = &xd->plane[0];
    3179             :     const int src_stride = p->src.stride;
    3180             :     const int dst_stride = pd->dst.stride;
    3181             :     uint8_t *src = p->src.buf;
    3182             :     uint8_t *dst = pd->dst.buf;
    3183             :     int use_activity_masking = 0;
    3184             :     int qm = OD_HVS_QM;
    3185             : 
    3186             : #if CONFIG_PVQ
    3187             :     use_activity_masking = mb->daala_enc.use_activity_masking;
    3188             : #endif  // CONFIG_PVQ
    3189             :     // Daala-defined distortion computed for the block of 8x8 pixels
    3190             :     total_distortion = av1_daala_dist(src, src_stride, dst, dst_stride, 8, 8,
    3191             :                                       qm, use_activity_masking, mb->qindex)
    3192             :                        << 4;
    3193             :   }
    3194             : #endif  // CONFIG_DAALA_DIST
    3195             :   // Add in the cost of the transform type
    3196           0 :   if (!is_lossless) {
    3197           0 :     int rate_tx_type = 0;
    3198             : #if CONFIG_EXT_TX
    3199           0 :     if (get_ext_tx_types(tx_size, bsize, 0, cpi->common.reduced_tx_set_used) >
    3200             :         1) {
    3201           0 :       const int eset =
    3202           0 :           get_ext_tx_set(tx_size, bsize, 0, cpi->common.reduced_tx_set_used);
    3203           0 :       rate_tx_type = cpi->intra_tx_type_costs[eset][txsize_sqr_map[tx_size]]
    3204           0 :                                              [mbmi->mode][mbmi->tx_type];
    3205             :     }
    3206             : #else
    3207             :     rate_tx_type =
    3208             :         cpi->intra_tx_type_costs[txsize_sqr_map[tx_size]]
    3209             :                                 [intra_mode_to_tx_type_context[mbmi->mode]]
    3210             :                                 [mbmi->tx_type];
    3211             : #endif  // CONFIG_EXT_TX
    3212           0 :     assert(mbmi->tx_size == tx_size);
    3213           0 :     cost += rate_tx_type;
    3214           0 :     tot_rate_y += rate_tx_type;
    3215             :   }
    3216             : 
    3217           0 :   *rate = cost;
    3218           0 :   *rate_y = tot_rate_y;
    3219           0 :   *distortion = total_distortion;
    3220             : 
    3221           0 :   return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
    3222             : }
    3223             : 
    3224             : #if CONFIG_FILTER_INTRA
    3225             : // Return 1 if an filter intra mode is selected; return 0 otherwise.
    3226             : static int rd_pick_filter_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
    3227             :                                     int *rate, int *rate_tokenonly,
    3228             :                                     int64_t *distortion, int *skippable,
    3229             :                                     BLOCK_SIZE bsize, int mode_cost,
    3230             :                                     int64_t *best_rd, int64_t *best_model_rd,
    3231             :                                     uint16_t skip_mask) {
    3232             :   MACROBLOCKD *const xd = &x->e_mbd;
    3233             :   MODE_INFO *const mic = xd->mi[0];
    3234             :   MB_MODE_INFO *mbmi = &mic->mbmi;
    3235             :   int filter_intra_selected_flag = 0;
    3236             :   FILTER_INTRA_MODE mode;
    3237             :   TX_SIZE best_tx_size = TX_4X4;
    3238             :   FILTER_INTRA_MODE_INFO filter_intra_mode_info;
    3239             :   TX_TYPE best_tx_type;
    3240             : 
    3241             :   av1_zero(filter_intra_mode_info);
    3242             :   mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 1;
    3243             :   mbmi->mode = DC_PRED;
    3244             : #if CONFIG_PALETTE
    3245             :   mbmi->palette_mode_info.palette_size[0] = 0;
    3246             : #endif  // CONFIG_PALETTE
    3247             : 
    3248             :   for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) {
    3249             :     int this_rate;
    3250             :     int64_t this_rd, this_model_rd;
    3251             :     RD_STATS tokenonly_rd_stats;
    3252             :     if (skip_mask & (1 << mode)) continue;
    3253             :     mbmi->filter_intra_mode_info.filter_intra_mode[0] = mode;
    3254             :     this_model_rd = intra_model_yrd(cpi, x, bsize, mode_cost);
    3255             :     if (*best_model_rd != INT64_MAX &&
    3256             :         this_model_rd > *best_model_rd + (*best_model_rd >> 1))
    3257             :       continue;
    3258             :     if (this_model_rd < *best_model_rd) *best_model_rd = this_model_rd;
    3259             :     super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd);
    3260             :     if (tokenonly_rd_stats.rate == INT_MAX) continue;
    3261             :     this_rate = tokenonly_rd_stats.rate +
    3262             :                 av1_cost_bit(cpi->common.fc->filter_intra_probs[0], 1) +
    3263             :                 write_uniform_cost(FILTER_INTRA_MODES, mode) + mode_cost;
    3264             :     this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist);
    3265             : 
    3266             :     if (this_rd < *best_rd) {
    3267             :       *best_rd = this_rd;
    3268             :       best_tx_size = mic->mbmi.tx_size;
    3269             :       filter_intra_mode_info = mbmi->filter_intra_mode_info;
    3270             :       best_tx_type = mic->mbmi.tx_type;
    3271             :       *rate = this_rate;
    3272             :       *rate_tokenonly = tokenonly_rd_stats.rate;
    3273             :       *distortion = tokenonly_rd_stats.dist;
    3274             :       *skippable = tokenonly_rd_stats.skip;
    3275             :       filter_intra_selected_flag = 1;
    3276             :     }
    3277             :   }
    3278             : 
    3279             :   if (filter_intra_selected_flag) {
    3280             :     mbmi->mode = DC_PRED;
    3281             :     mbmi->tx_size = best_tx_size;
    3282             :     mbmi->filter_intra_mode_info.use_filter_intra_mode[0] =
    3283             :         filter_intra_mode_info.use_filter_intra_mode[0];
    3284             :     mbmi->filter_intra_mode_info.filter_intra_mode[0] =
    3285             :         filter_intra_mode_info.filter_intra_mode[0];
    3286             :     mbmi->tx_type = best_tx_type;
    3287             :     return 1;
    3288             :   } else {
    3289             :     return 0;
    3290             :   }
    3291             : }
    3292             : #endif  // CONFIG_FILTER_INTRA
    3293             : 
    3294             : #if CONFIG_EXT_INTRA
    3295             : // Run RD calculation with given luma intra prediction angle., and return
    3296             : // the RD cost. Update the best mode info. if the RD cost is the best so far.
    3297           0 : static int64_t calc_rd_given_intra_angle(
    3298             :     const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mode_cost,
    3299             :     int64_t best_rd_in, int8_t angle_delta, int max_angle_delta, int *rate,
    3300             :     RD_STATS *rd_stats, int *best_angle_delta, TX_SIZE *best_tx_size,
    3301             :     TX_TYPE *best_tx_type,
    3302             : #if CONFIG_INTRA_INTERP
    3303             :     INTRA_FILTER *best_filter,
    3304             : #endif  // CONFIG_INTRA_INTERP
    3305             :     int64_t *best_rd, int64_t *best_model_rd) {
    3306             :   int this_rate;
    3307             :   RD_STATS tokenonly_rd_stats;
    3308             :   int64_t this_rd, this_model_rd;
    3309           0 :   MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi;
    3310           0 :   assert(!is_inter_block(mbmi));
    3311             : 
    3312           0 :   mbmi->angle_delta[0] = angle_delta;
    3313           0 :   this_model_rd = intra_model_yrd(cpi, x, bsize, mode_cost);
    3314           0 :   if (*best_model_rd != INT64_MAX &&
    3315           0 :       this_model_rd > *best_model_rd + (*best_model_rd >> 1))
    3316           0 :     return INT64_MAX;
    3317           0 :   if (this_model_rd < *best_model_rd) *best_model_rd = this_model_rd;
    3318           0 :   super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd_in);
    3319           0 :   if (tokenonly_rd_stats.rate == INT_MAX) return INT64_MAX;
    3320             : 
    3321           0 :   this_rate = tokenonly_rd_stats.rate + mode_cost +
    3322           0 :               write_uniform_cost(2 * max_angle_delta + 1,
    3323           0 :                                  mbmi->angle_delta[0] + max_angle_delta);
    3324           0 :   this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist);
    3325             : 
    3326           0 :   if (this_rd < *best_rd) {
    3327           0 :     *best_rd = this_rd;
    3328           0 :     *best_angle_delta = mbmi->angle_delta[0];
    3329           0 :     *best_tx_size = mbmi->tx_size;
    3330             : #if CONFIG_INTRA_INTERP
    3331             :     *best_filter = mbmi->intra_filter;
    3332             : #endif  // CONFIG_INTRA_INTERP
    3333           0 :     *best_tx_type = mbmi->tx_type;
    3334           0 :     *rate = this_rate;
    3335           0 :     rd_stats->rate = tokenonly_rd_stats.rate;
    3336           0 :     rd_stats->dist = tokenonly_rd_stats.dist;
    3337           0 :     rd_stats->skip = tokenonly_rd_stats.skip;
    3338             :   }
    3339           0 :   return this_rd;
    3340             : }
    3341             : 
    3342             : // With given luma directional intra prediction mode, pick the best angle delta
    3343             : // Return the RD cost corresponding to the best angle delta.
    3344           0 : static int64_t rd_pick_intra_angle_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
    3345             :                                        int *rate, RD_STATS *rd_stats,
    3346             :                                        BLOCK_SIZE bsize, int mode_cost,
    3347             :                                        int64_t best_rd,
    3348             :                                        int64_t *best_model_rd) {
    3349           0 :   MACROBLOCKD *const xd = &x->e_mbd;
    3350           0 :   MODE_INFO *const mic = xd->mi[0];
    3351           0 :   MB_MODE_INFO *mbmi = &mic->mbmi;
    3352           0 :   assert(!is_inter_block(mbmi));
    3353           0 :   int i, angle_delta, best_angle_delta = 0;
    3354           0 :   int first_try = 1;
    3355             : #if CONFIG_INTRA_INTERP
    3356             :   int p_angle;
    3357             :   const int intra_filter_ctx = av1_get_pred_context_intra_interp(xd);
    3358             :   INTRA_FILTER filter, best_filter = INTRA_FILTER_LINEAR;
    3359             : #endif  // CONFIG_INTRA_INTERP
    3360             :   int64_t this_rd, best_rd_in, rd_cost[2 * (MAX_ANGLE_DELTA + 2)];
    3361           0 :   TX_SIZE best_tx_size = mic->mbmi.tx_size;
    3362           0 :   TX_TYPE best_tx_type = mbmi->tx_type;
    3363             : 
    3364           0 :   for (i = 0; i < 2 * (MAX_ANGLE_DELTA + 2); ++i) rd_cost[i] = INT64_MAX;
    3365             : 
    3366           0 :   for (angle_delta = 0; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
    3367             : #if CONFIG_INTRA_INTERP
    3368             :     for (filter = INTRA_FILTER_LINEAR; filter < INTRA_FILTERS; ++filter) {
    3369             :       if (FILTER_FAST_SEARCH && filter != INTRA_FILTER_LINEAR) continue;
    3370             :       mic->mbmi.intra_filter = filter;
    3371             : #endif  // CONFIG_INTRA_INTERP
    3372           0 :       for (i = 0; i < 2; ++i) {
    3373           0 :         best_rd_in = (best_rd == INT64_MAX)
    3374             :                          ? INT64_MAX
    3375           0 :                          : (best_rd + (best_rd >> (first_try ? 3 : 5)));
    3376           0 :         this_rd = calc_rd_given_intra_angle(
    3377             :             cpi, x, bsize,
    3378             : #if CONFIG_INTRA_INTERP
    3379             :             mode_cost + cpi->intra_filter_cost[intra_filter_ctx][filter],
    3380             : #else
    3381             :           mode_cost,
    3382             : #endif  // CONFIG_INTRA_INTERP
    3383           0 :             best_rd_in, (1 - 2 * i) * angle_delta, MAX_ANGLE_DELTA, rate,
    3384             :             rd_stats, &best_angle_delta, &best_tx_size, &best_tx_type,
    3385             : #if CONFIG_INTRA_INTERP
    3386             :             &best_filter,
    3387             : #endif  // CONFIG_INTRA_INTERP
    3388             :             &best_rd, best_model_rd);
    3389           0 :         rd_cost[2 * angle_delta + i] = this_rd;
    3390           0 :         if (first_try && this_rd == INT64_MAX) return best_rd;
    3391           0 :         first_try = 0;
    3392           0 :         if (angle_delta == 0) {
    3393           0 :           rd_cost[1] = this_rd;
    3394           0 :           break;
    3395             :         }
    3396             :       }
    3397             : #if CONFIG_INTRA_INTERP
    3398             :     }
    3399             : #endif  // CONFIG_INTRA_INTERP
    3400             :   }
    3401             : 
    3402           0 :   assert(best_rd != INT64_MAX);
    3403           0 :   for (angle_delta = 1; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
    3404             :     int64_t rd_thresh;
    3405             : #if CONFIG_INTRA_INTERP
    3406             :     for (filter = INTRA_FILTER_LINEAR; filter < INTRA_FILTERS; ++filter) {
    3407             :       if (FILTER_FAST_SEARCH && filter != INTRA_FILTER_LINEAR) continue;
    3408             :       mic->mbmi.intra_filter = filter;
    3409             : #endif  // CONFIG_INTRA_INTERP
    3410           0 :       for (i = 0; i < 2; ++i) {
    3411           0 :         int skip_search = 0;
    3412           0 :         rd_thresh = best_rd + (best_rd >> 5);
    3413           0 :         if (rd_cost[2 * (angle_delta + 1) + i] > rd_thresh &&
    3414           0 :             rd_cost[2 * (angle_delta - 1) + i] > rd_thresh)
    3415           0 :           skip_search = 1;
    3416           0 :         if (!skip_search) {
    3417           0 :           calc_rd_given_intra_angle(
    3418             :               cpi, x, bsize,
    3419             : #if CONFIG_INTRA_INTERP
    3420             :               mode_cost + cpi->intra_filter_cost[intra_filter_ctx][filter],
    3421             : #else
    3422             :             mode_cost,
    3423             : #endif  // CONFIG_INTRA_INTERP
    3424           0 :               best_rd, (1 - 2 * i) * angle_delta, MAX_ANGLE_DELTA, rate,
    3425             :               rd_stats, &best_angle_delta, &best_tx_size, &best_tx_type,
    3426             : #if CONFIG_INTRA_INTERP
    3427             :               &best_filter,
    3428             : #endif  // CONFIG_INTRA_INTERP
    3429             :               &best_rd, best_model_rd);
    3430             :         }
    3431             :       }
    3432             : #if CONFIG_INTRA_INTERP
    3433             :     }
    3434             : #endif  // CONFIG_INTRA_INTERP
    3435             :   }
    3436             : 
    3437             : #if CONFIG_INTRA_INTERP
    3438             :   if (FILTER_FAST_SEARCH && rd_stats->rate < INT_MAX) {
    3439             :     p_angle = mode_to_angle_map[mbmi->mode] + best_angle_delta * ANGLE_STEP;
    3440             :     if (av1_is_intra_filter_switchable(p_angle)) {
    3441             :       for (filter = INTRA_FILTER_LINEAR + 1; filter < INTRA_FILTERS; ++filter) {
    3442             :         mic->mbmi.intra_filter = filter;
    3443             :         this_rd = calc_rd_given_intra_angle(
    3444             :             cpi, x, bsize,
    3445             :             mode_cost + cpi->intra_filter_cost[intra_filter_ctx][filter],
    3446             :             best_rd, best_angle_delta, MAX_ANGLE_DELTA, rate, rd_stats,
    3447             :             &best_angle_delta, &best_tx_size, &best_tx_type, &best_filter,
    3448             :             &best_rd, best_model_rd);
    3449             :       }
    3450             :     }
    3451             :   }
    3452             : #endif  // CONFIG_INTRA_INTERP
    3453             : 
    3454           0 :   mbmi->tx_size = best_tx_size;
    3455           0 :   mbmi->angle_delta[0] = best_angle_delta;
    3456             : #if CONFIG_INTRA_INTERP
    3457             :   mic->mbmi.intra_filter = best_filter;
    3458             : #endif  // CONFIG_INTRA_INTERP
    3459           0 :   mbmi->tx_type = best_tx_type;
    3460           0 :   return best_rd;
    3461             : }
    3462             : 
    3463             : // Indices are sign, integer, and fractional part of the gradient value
    3464             : static const uint8_t gradient_to_angle_bin[2][7][16] = {
    3465             :   {
    3466             :       { 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0 },
    3467             :       { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1 },
    3468             :       { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
    3469             :       { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
    3470             :       { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
    3471             :       { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
    3472             :       { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
    3473             :   },
    3474             :   {
    3475             :       { 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4 },
    3476             :       { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3 },
    3477             :       { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
    3478             :       { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
    3479             :       { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
    3480             :       { 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
    3481             :       { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
    3482             :   },
    3483             : };
    3484             : 
    3485             : /* clang-format off */
    3486             : static const uint8_t mode_to_angle_bin[INTRA_MODES] = {
    3487             :   0, 2, 6, 0, 4, 3, 5, 7, 1, 0,
    3488             : #if CONFIG_ALT_INTRA
    3489             :   0,
    3490             : #endif  // CONFIG_ALT_INTRA
    3491             : };
    3492             : /* clang-format on */
    3493             : 
    3494           0 : static void angle_estimation(const uint8_t *src, int src_stride, int rows,
    3495             :                              int cols, BLOCK_SIZE bsize,
    3496             :                              uint8_t *directional_mode_skip_mask) {
    3497           0 :   memset(directional_mode_skip_mask, 0,
    3498             :          INTRA_MODES * sizeof(*directional_mode_skip_mask));
    3499             :   // Sub-8x8 blocks do not use extra directions.
    3500           0 :   if (bsize < BLOCK_8X8) return;
    3501             :   uint64_t hist[DIRECTIONAL_MODES];
    3502           0 :   memset(hist, 0, DIRECTIONAL_MODES * sizeof(hist[0]));
    3503           0 :   src += src_stride;
    3504             :   int r, c, dx, dy;
    3505           0 :   for (r = 1; r < rows; ++r) {
    3506           0 :     for (c = 1; c < cols; ++c) {
    3507           0 :       dx = src[c] - src[c - 1];
    3508           0 :       dy = src[c] - src[c - src_stride];
    3509             :       int index;
    3510           0 :       const int temp = dx * dx + dy * dy;
    3511           0 :       if (dy == 0) {
    3512           0 :         index = 2;
    3513             :       } else {
    3514           0 :         const int sn = (dx > 0) ^ (dy > 0);
    3515           0 :         dx = abs(dx);
    3516           0 :         dy = abs(dy);
    3517           0 :         const int remd = (dx % dy) * 16 / dy;
    3518           0 :         const int quot = dx / dy;
    3519           0 :         index = gradient_to_angle_bin[sn][AOMMIN(quot, 6)][AOMMIN(remd, 15)];
    3520             :       }
    3521           0 :       hist[index] += temp;
    3522             :     }
    3523           0 :     src += src_stride;
    3524             :   }
    3525             : 
    3526             :   int i;
    3527           0 :   uint64_t hist_sum = 0;
    3528           0 :   for (i = 0; i < DIRECTIONAL_MODES; ++i) hist_sum += hist[i];
    3529           0 :   for (i = 0; i < INTRA_MODES; ++i) {
    3530           0 :     if (av1_is_directional_mode(i, bsize)) {
    3531           0 :       const uint8_t angle_bin = mode_to_angle_bin[i];
    3532           0 :       uint64_t score = 2 * hist[angle_bin];
    3533           0 :       int weight = 2;
    3534           0 :       if (angle_bin > 0) {
    3535           0 :         score += hist[angle_bin - 1];
    3536           0 :         ++weight;
    3537             :       }
    3538           0 :       if (angle_bin < DIRECTIONAL_MODES - 1) {
    3539           0 :         score += hist[angle_bin + 1];
    3540           0 :         ++weight;
    3541             :       }
    3542           0 :       if (score * ANGLE_SKIP_THRESH < hist_sum * weight)
    3543           0 :         directional_mode_skip_mask[i] = 1;
    3544             :     }
    3545             :   }
    3546             : }
    3547             : 
    3548             : #if CONFIG_HIGHBITDEPTH
    3549           0 : static void highbd_angle_estimation(const uint8_t *src8, int src_stride,
    3550             :                                     int rows, int cols, BLOCK_SIZE bsize,
    3551             :                                     uint8_t *directional_mode_skip_mask) {
    3552           0 :   memset(directional_mode_skip_mask, 0,
    3553             :          INTRA_MODES * sizeof(*directional_mode_skip_mask));
    3554             :   // Sub-8x8 blocks do not use extra directions.
    3555           0 :   if (bsize < BLOCK_8X8) return;
    3556           0 :   uint16_t *src = CONVERT_TO_SHORTPTR(src8);
    3557             :   uint64_t hist[DIRECTIONAL_MODES];
    3558           0 :   memset(hist, 0, DIRECTIONAL_MODES * sizeof(hist[0]));
    3559           0 :   src += src_stride;
    3560             :   int r, c, dx, dy;
    3561           0 :   for (r = 1; r < rows; ++r) {
    3562           0 :     for (c = 1; c < cols; ++c) {
    3563           0 :       dx = src[c] - src[c - 1];
    3564           0 :       dy = src[c] - src[c - src_stride];
    3565             :       int index;
    3566           0 :       const int temp = dx * dx + dy * dy;
    3567           0 :       if (dy == 0) {
    3568           0 :         index = 2;
    3569             :       } else {
    3570           0 :         const int sn = (dx > 0) ^ (dy > 0);
    3571           0 :         dx = abs(dx);
    3572           0 :         dy = abs(dy);
    3573           0 :         const int remd = (dx % dy) * 16 / dy;
    3574           0 :         const int quot = dx / dy;
    3575           0 :         index = gradient_to_angle_bin[sn][AOMMIN(quot, 6)][AOMMIN(remd, 15)];
    3576             :       }
    3577           0 :       hist[index] += temp;
    3578             :     }
    3579           0 :     src += src_stride;
    3580             :   }
    3581             : 
    3582             :   int i;
    3583           0 :   uint64_t hist_sum = 0;
    3584           0 :   for (i = 0; i < DIRECTIONAL_MODES; ++i) hist_sum += hist[i];
    3585           0 :   for (i = 0; i < INTRA_MODES; ++i) {
    3586           0 :     if (av1_is_directional_mode(i, bsize)) {
    3587           0 :       const uint8_t angle_bin = mode_to_angle_bin[i];
    3588           0 :       uint64_t score = 2 * hist[angle_bin];
    3589           0 :       int weight = 2;
    3590           0 :       if (angle_bin > 0) {
    3591           0 :         score += hist[angle_bin - 1];
    3592           0 :         ++weight;
    3593             :       }
    3594           0 :       if (angle_bin < DIRECTIONAL_MODES - 1) {
    3595           0 :         score += hist[angle_bin + 1];
    3596           0 :         ++weight;
    3597             :       }
    3598           0 :       if (score * ANGLE_SKIP_THRESH < hist_sum * weight)
    3599           0 :         directional_mode_skip_mask[i] = 1;
    3600             :     }
    3601             :   }
    3602             : }
    3603             : #endif  // CONFIG_HIGHBITDEPTH
    3604             : #endif  // CONFIG_EXT_INTRA
    3605             : 
    3606             : // This function is used only for intra_only frames
    3607           0 : static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
    3608             :                                       int *rate, int *rate_tokenonly,
    3609             :                                       int64_t *distortion, int *skippable,
    3610             :                                       BLOCK_SIZE bsize, int64_t best_rd) {
    3611             :   uint8_t mode_idx;
    3612           0 :   MACROBLOCKD *const xd = &x->e_mbd;
    3613           0 :   MODE_INFO *const mic = xd->mi[0];
    3614           0 :   MB_MODE_INFO *const mbmi = &mic->mbmi;
    3615           0 :   assert(!is_inter_block(mbmi));
    3616           0 :   MB_MODE_INFO best_mbmi = *mbmi;
    3617           0 :   int64_t best_model_rd = INT64_MAX;
    3618             : #if CONFIG_EXT_INTRA
    3619           0 :   const int rows = block_size_high[bsize];
    3620           0 :   const int cols = block_size_wide[bsize];
    3621             : #if CONFIG_INTRA_INTERP
    3622             :   const int intra_filter_ctx = av1_get_pred_context_intra_interp(xd);
    3623             : #endif  // CONFIG_INTRA_INTERP
    3624             :   int is_directional_mode;
    3625             :   uint8_t directional_mode_skip_mask[INTRA_MODES];
    3626           0 :   const int src_stride = x->plane[0].src.stride;
    3627           0 :   const uint8_t *src = x->plane[0].src.buf;
    3628             : #endif  // CONFIG_EXT_INTRA
    3629             : #if CONFIG_FILTER_INTRA
    3630             :   int beat_best_rd = 0;
    3631             :   uint16_t filter_intra_mode_skip_mask = (1 << FILTER_INTRA_MODES) - 1;
    3632             : #endif  // CONFIG_FILTER_INTRA
    3633             :   const int *bmode_costs;
    3634             : #if CONFIG_PALETTE
    3635           0 :   PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
    3636           0 :   uint8_t *best_palette_color_map =
    3637           0 :       cpi->common.allow_screen_content_tools
    3638           0 :           ? x->palette_buffer->best_palette_color_map
    3639           0 :           : NULL;
    3640           0 :   int palette_y_mode_ctx = 0;
    3641           0 :   const int try_palette =
    3642           0 :       cpi->common.allow_screen_content_tools && bsize >= BLOCK_8X8;
    3643             : #endif  // CONFIG_PALETTE
    3644           0 :   const MODE_INFO *above_mi = xd->above_mi;
    3645           0 :   const MODE_INFO *left_mi = xd->left_mi;
    3646           0 :   const PREDICTION_MODE A = av1_above_block_mode(mic, above_mi, 0);
    3647           0 :   const PREDICTION_MODE L = av1_left_block_mode(mic, left_mi, 0);
    3648           0 :   const PREDICTION_MODE FINAL_MODE_SEARCH = TM_PRED + 1;
    3649             : #if CONFIG_PVQ
    3650             :   od_rollback_buffer pre_buf, post_buf;
    3651             : 
    3652             :   od_encode_checkpoint(&x->daala_enc, &pre_buf);
    3653             :   od_encode_checkpoint(&x->daala_enc, &post_buf);
    3654             : #endif  // CONFIG_PVQ
    3655           0 :   bmode_costs = cpi->y_mode_costs[A][L];
    3656             : 
    3657             : #if CONFIG_EXT_INTRA
    3658           0 :   mbmi->angle_delta[0] = 0;
    3659             : #if CONFIG_HIGHBITDEPTH
    3660           0 :   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
    3661           0 :     highbd_angle_estimation(src, src_stride, rows, cols, bsize,
    3662             :                             directional_mode_skip_mask);
    3663             :   else
    3664             : #endif  // CONFIG_HIGHBITDEPTH
    3665           0 :     angle_estimation(src, src_stride, rows, cols, bsize,
    3666             :                      directional_mode_skip_mask);
    3667             : #endif  // CONFIG_EXT_INTRA
    3668             : #if CONFIG_FILTER_INTRA
    3669             :   mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
    3670             : #endif  // CONFIG_FILTER_INTRA
    3671             : #if CONFIG_PALETTE
    3672           0 :   pmi->palette_size[0] = 0;
    3673           0 :   if (above_mi)
    3674           0 :     palette_y_mode_ctx +=
    3675           0 :         (above_mi->mbmi.palette_mode_info.palette_size[0] > 0);
    3676           0 :   if (left_mi)
    3677           0 :     palette_y_mode_ctx += (left_mi->mbmi.palette_mode_info.palette_size[0] > 0);
    3678             : #endif  // CONFIG_PALETTE
    3679             : 
    3680           0 :   if (cpi->sf.tx_type_search.fast_intra_tx_type_search)
    3681           0 :     x->use_default_intra_tx_type = 1;
    3682             :   else
    3683           0 :     x->use_default_intra_tx_type = 0;
    3684             : 
    3685             :   /* Y Search for intra prediction mode */
    3686           0 :   for (mode_idx = DC_PRED; mode_idx <= FINAL_MODE_SEARCH; ++mode_idx) {
    3687             :     RD_STATS this_rd_stats;
    3688             :     int this_rate, this_rate_tokenonly, s;
    3689             :     int64_t this_distortion, this_rd, this_model_rd;
    3690           0 :     if (mode_idx == FINAL_MODE_SEARCH) {
    3691           0 :       if (x->use_default_intra_tx_type == 0) break;
    3692           0 :       mbmi->mode = best_mbmi.mode;
    3693           0 :       x->use_default_intra_tx_type = 0;
    3694             :     } else {
    3695           0 :       mbmi->mode = mode_idx;
    3696             :     }
    3697             : #if CONFIG_PVQ
    3698             :     od_encode_rollback(&x->daala_enc, &pre_buf);
    3699             : #endif  // CONFIG_PVQ
    3700             : #if CONFIG_EXT_INTRA
    3701           0 :     mbmi->angle_delta[0] = 0;
    3702             : #endif  // CONFIG_EXT_INTRA
    3703           0 :     this_model_rd = intra_model_yrd(cpi, x, bsize, bmode_costs[mbmi->mode]);
    3704           0 :     if (best_model_rd != INT64_MAX &&
    3705           0 :         this_model_rd > best_model_rd + (best_model_rd >> 1))
    3706           0 :       continue;
    3707           0 :     if (this_model_rd < best_model_rd) best_model_rd = this_model_rd;
    3708             : #if CONFIG_EXT_INTRA
    3709           0 :     is_directional_mode = av1_is_directional_mode(mbmi->mode, bsize);
    3710           0 :     if (is_directional_mode && directional_mode_skip_mask[mbmi->mode]) continue;
    3711           0 :     if (is_directional_mode) {
    3712           0 :       this_rd_stats.rate = INT_MAX;
    3713           0 :       rd_pick_intra_angle_sby(cpi, x, &this_rate, &this_rd_stats, bsize,
    3714           0 :                               bmode_costs[mbmi->mode], best_rd, &best_model_rd);
    3715             :     } else {
    3716           0 :       super_block_yrd(cpi, x, &this_rd_stats, bsize, best_rd);
    3717             :     }
    3718             : #else
    3719             :     super_block_yrd(cpi, x, &this_rd_stats, bsize, best_rd);
    3720             : #endif  // CONFIG_EXT_INTRA
    3721           0 :     this_rate_tokenonly = this_rd_stats.rate;
    3722           0 :     this_distortion = this_rd_stats.dist;
    3723           0 :     s = this_rd_stats.skip;
    3724             : 
    3725           0 :     if (this_rate_tokenonly == INT_MAX) continue;
    3726             : 
    3727           0 :     this_rate = this_rate_tokenonly + bmode_costs[mbmi->mode];
    3728             : 
    3729           0 :     if (!xd->lossless[mbmi->segment_id] && mbmi->sb_type >= BLOCK_8X8) {
    3730             :       // super_block_yrd above includes the cost of the tx_size in the
    3731             :       // tokenonly rate, but for intra blocks, tx_size is always coded
    3732             :       // (prediction granularity), so we account for it in the full rate,
    3733             :       // not the tokenonly rate.
    3734           0 :       this_rate_tokenonly -= tx_size_cost(cpi, x, bsize, mbmi->tx_size);
    3735             :     }
    3736             : #if CONFIG_PALETTE
    3737           0 :     if (try_palette && mbmi->mode == DC_PRED) {
    3738           0 :       this_rate +=
    3739           0 :           av1_cost_bit(av1_default_palette_y_mode_prob[bsize - BLOCK_8X8]
    3740             :                                                       [palette_y_mode_ctx],
    3741             :                        0);
    3742             :     }
    3743             : #endif  // CONFIG_PALETTE
    3744             : #if CONFIG_FILTER_INTRA
    3745             :     if (mbmi->mode == DC_PRED)
    3746             :       this_rate += av1_cost_bit(cpi->common.fc->filter_intra_probs[0], 0);
    3747             : #endif  // CONFIG_FILTER_INTRA
    3748             : #if CONFIG_EXT_INTRA
    3749           0 :     if (is_directional_mode) {
    3750             : #if CONFIG_INTRA_INTERP
    3751             :       const int p_angle =
    3752             :           mode_to_angle_map[mbmi->mode] + mbmi->angle_delta[0] * ANGLE_STEP;
    3753             :       if (av1_is_intra_filter_switchable(p_angle))
    3754             :         this_rate +=
    3755             :             cpi->intra_filter_cost[intra_filter_ctx][mbmi->intra_filter];
    3756             : #endif  // CONFIG_INTRA_INTERP
    3757           0 :       this_rate += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
    3758           0 :                                       MAX_ANGLE_DELTA + mbmi->angle_delta[0]);
    3759             :     }
    3760             : #endif  // CONFIG_EXT_INTRA
    3761           0 :     this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
    3762             : #if CONFIG_FILTER_INTRA
    3763             :     if (best_rd == INT64_MAX || this_rd - best_rd < (best_rd >> 4)) {
    3764             :       filter_intra_mode_skip_mask ^= (1 << mbmi->mode);
    3765             :     }
    3766             : #endif  // CONFIG_FILTER_INTRA
    3767             : 
    3768           0 :     if (this_rd < best_rd) {
    3769           0 :       best_mbmi = *mbmi;
    3770           0 :       best_rd = this_rd;
    3771             : #if CONFIG_FILTER_INTRA
    3772             :       beat_best_rd = 1;
    3773             : #endif  // CONFIG_FILTER_INTRA
    3774           0 :       *rate = this_rate;
    3775           0 :       *rate_tokenonly = this_rate_tokenonly;
    3776           0 :       *distortion = this_distortion;
    3777           0 :       *skippable = s;
    3778             : #if CONFIG_PVQ
    3779             :       od_encode_checkpoint(&x->daala_enc, &post_buf);
    3780             : #endif  // CONFIG_PVQ
    3781             :     }
    3782             :   }
    3783             : 
    3784             : #if CONFIG_PVQ
    3785             :   od_encode_rollback(&x->daala_enc, &post_buf);
    3786             : #endif  // CONFIG_PVQ
    3787             : 
    3788             : #if CONFIG_CFL
    3789             :   // Perform one extra txfm_rd_in_plane() call, this time with the best value so
    3790             :   // we can store reconstructed luma values
    3791             :   RD_STATS this_rd_stats;
    3792             :   x->cfl_store_y = 1;
    3793             :   txfm_rd_in_plane(x, cpi, &this_rd_stats, INT64_MAX, 0, bsize,
    3794             :                    mic->mbmi.tx_size, cpi->sf.use_fast_coef_costing);
    3795             :   x->cfl_store_y = 0;
    3796             : #endif
    3797             : 
    3798             : #if CONFIG_PALETTE
    3799           0 :   if (try_palette) {
    3800           0 :     rd_pick_palette_intra_sby(cpi, x, bsize, palette_y_mode_ctx,
    3801             :                               bmode_costs[DC_PRED], &best_mbmi,
    3802             :                               best_palette_color_map, &best_rd, &best_model_rd,
    3803             :                               rate, rate_tokenonly, distortion, skippable);
    3804             :   }
    3805             : #endif  // CONFIG_PALETTE
    3806             : 
    3807             : #if CONFIG_FILTER_INTRA
    3808             :   if (beat_best_rd) {
    3809             :     if (rd_pick_filter_intra_sby(cpi, x, rate, rate_tokenonly, distortion,
    3810             :                                  skippable, bsize, bmode_costs[DC_PRED],
    3811             :                                  &best_rd, &best_model_rd,
    3812             :                                  filter_intra_mode_skip_mask)) {
    3813             :       best_mbmi = *mbmi;
    3814             :     }
    3815             :   }
    3816             : #endif  // CONFIG_FILTER_INTRA
    3817             : 
    3818           0 :   *mbmi = best_mbmi;
    3819           0 :   return best_rd;
    3820             : }
    3821             : 
    3822             : // Return value 0: early termination triggered, no valid rd cost available;
    3823             : //              1: rd cost values are valid.
    3824           0 : static int super_block_uvrd(const AV1_COMP *const cpi, MACROBLOCK *x,
    3825             :                             RD_STATS *rd_stats, BLOCK_SIZE bsize,
    3826             :                             int64_t ref_best_rd) {
    3827           0 :   MACROBLOCKD *const xd = &x->e_mbd;
    3828           0 :   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
    3829           0 :   const TX_SIZE uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]);
    3830             :   int plane;
    3831           0 :   int is_cost_valid = 1;
    3832           0 :   av1_init_rd_stats(rd_stats);
    3833             : 
    3834           0 :   if (ref_best_rd < 0) is_cost_valid = 0;
    3835             : 
    3836             : #if CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
    3837           0 :   if (x->skip_chroma_rd) return is_cost_valid;
    3838             : 
    3839           0 :   bsize = scale_chroma_bsize(bsize, xd->plane[1].subsampling_x,
    3840             :                              xd->plane[1].subsampling_y);
    3841             : #endif  // CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
    3842             : 
    3843             : #if !CONFIG_PVQ
    3844           0 :   if (is_inter_block(mbmi) && is_cost_valid) {
    3845           0 :     for (plane = 1; plane < MAX_MB_PLANE; ++plane)
    3846           0 :       av1_subtract_plane(x, bsize, plane);
    3847             :   }
    3848             : #endif  // !CONFIG_PVQ
    3849             : 
    3850           0 :   if (is_cost_valid) {
    3851           0 :     for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
    3852             :       RD_STATS pn_rd_stats;
    3853           0 :       txfm_rd_in_plane(x, cpi, &pn_rd_stats, ref_best_rd, plane, bsize,
    3854             :                        uv_tx_size, cpi->sf.use_fast_coef_costing);
    3855           0 :       if (pn_rd_stats.rate == INT_MAX) {
    3856           0 :         is_cost_valid = 0;
    3857           0 :         break;
    3858             :       }
    3859           0 :       av1_merge_rd_stats(rd_stats, &pn_rd_stats);
    3860           0 :       if (RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist) >
    3861           0 :               ref_best_rd &&
    3862           0 :           RDCOST(x->rdmult, x->rddiv, 0, rd_stats->sse) > ref_best_rd) {
    3863           0 :         is_cost_valid = 0;
    3864           0 :         break;
    3865             :       }
    3866             :     }
    3867             :   }
    3868             : 
    3869           0 :   if (!is_cost_valid) {
    3870             :     // reset cost value
    3871           0 :     av1_invalid_rd_stats(rd_stats);
    3872             :   }
    3873             : 
    3874           0 :   return is_cost_valid;
    3875             : }
    3876             : 
    3877             : #if CONFIG_VAR_TX
    3878             : // FIXME crop these calls
    3879           0 : static uint64_t sum_squares_2d(const int16_t *diff, int diff_stride,
    3880             :                                TX_SIZE tx_size) {
    3881           0 :   return aom_sum_squares_2d_i16(diff, diff_stride, tx_size_wide[tx_size],
    3882             :                                 tx_size_high[tx_size]);
    3883             : }
    3884             : 
    3885           0 : void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
    3886             :                        int blk_row, int blk_col, int plane, int block,
    3887             :                        int plane_bsize, const ENTROPY_CONTEXT *a,
    3888             :                        const ENTROPY_CONTEXT *l, RD_STATS *rd_stats) {
    3889           0 :   const AV1_COMMON *const cm = &cpi->common;
    3890           0 :   MACROBLOCKD *xd = &x->e_mbd;
    3891           0 :   const struct macroblock_plane *const p = &x->plane[plane];
    3892           0 :   struct macroblockd_plane *const pd = &xd->plane[plane];
    3893             :   int64_t tmp;
    3894           0 :   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
    3895           0 :   PLANE_TYPE plane_type = get_plane_type(plane);
    3896           0 :   TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
    3897           0 :   const SCAN_ORDER *const scan_order =
    3898           0 :       get_scan(cm, tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
    3899           0 :   BLOCK_SIZE txm_bsize = txsize_to_bsize[tx_size];
    3900           0 :   int bh = block_size_high[txm_bsize];
    3901           0 :   int bw = block_size_wide[txm_bsize];
    3902           0 :   int txb_h = tx_size_high_unit[tx_size];
    3903           0 :   int txb_w = tx_size_wide_unit[tx_size];
    3904             : 
    3905           0 :   int src_stride = p->src.stride;
    3906           0 :   uint8_t *src =
    3907           0 :       &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
    3908           0 :   uint8_t *dst =
    3909             :       &pd->dst
    3910           0 :            .buf[(blk_row * pd->dst.stride + blk_col) << tx_size_wide_log2[0]];
    3911             : #if CONFIG_HIGHBITDEPTH
    3912             :   DECLARE_ALIGNED(16, uint16_t, rec_buffer16[MAX_TX_SQUARE]);
    3913             :   uint8_t *rec_buffer;
    3914             : #else
    3915             :   DECLARE_ALIGNED(16, uint8_t, rec_buffer[MAX_TX_SQUARE]);
    3916             : #endif  // CONFIG_HIGHBITDEPTH
    3917           0 :   int max_blocks_high = block_size_high[plane_bsize];
    3918           0 :   int max_blocks_wide = block_size_wide[plane_bsize];
    3919           0 :   const int diff_stride = max_blocks_wide;
    3920           0 :   const int16_t *diff =
    3921           0 :       &p->src_diff[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
    3922             :   int txb_coeff_cost;
    3923             : 
    3924           0 :   assert(tx_size < TX_SIZES_ALL);
    3925             : 
    3926           0 :   if (xd->mb_to_bottom_edge < 0)
    3927           0 :     max_blocks_high += xd->mb_to_bottom_edge >> (3 + pd->subsampling_y);
    3928           0 :   if (xd->mb_to_right_edge < 0)
    3929           0 :     max_blocks_wide += xd->mb_to_right_edge >> (3 + pd->subsampling_x);
    3930             : 
    3931           0 :   max_blocks_high >>= tx_size_wide_log2[0];
    3932           0 :   max_blocks_wide >>= tx_size_wide_log2[0];
    3933             : 
    3934           0 :   int coeff_ctx = get_entropy_context(tx_size, a, l);
    3935             : 
    3936           0 :   av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
    3937             :                   coeff_ctx, AV1_XFORM_QUANT_FP);
    3938             : 
    3939           0 :   av1_optimize_b(cm, x, plane, block, plane_bsize, tx_size, a, l);
    3940             : 
    3941             : // TODO(any): Use av1_dist_block to compute distortion
    3942             : #if CONFIG_HIGHBITDEPTH
    3943           0 :   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    3944           0 :     rec_buffer = CONVERT_TO_BYTEPTR(rec_buffer16);
    3945           0 :     aom_highbd_convolve_copy(dst, pd->dst.stride, rec_buffer, MAX_TX_SIZE, NULL,
    3946             :                              0, NULL, 0, bw, bh, xd->bd);
    3947             :   } else {
    3948           0 :     rec_buffer = (uint8_t *)rec_buffer16;
    3949           0 :     aom_convolve_copy(dst, pd->dst.stride, rec_buffer, MAX_TX_SIZE, NULL, 0,
    3950             :                       NULL, 0, bw, bh);
    3951             :   }
    3952             : #else
    3953             :   aom_convolve_copy(dst, pd->dst.stride, rec_buffer, MAX_TX_SIZE, NULL, 0, NULL,
    3954             :                     0, bw, bh);
    3955             : #endif  // CONFIG_HIGHBITDEPTH
    3956             : 
    3957           0 :   if (blk_row + txb_h > max_blocks_high || blk_col + txb_w > max_blocks_wide) {
    3958             :     int idx, idy;
    3959           0 :     int blocks_height = AOMMIN(txb_h, max_blocks_high - blk_row);
    3960           0 :     int blocks_width = AOMMIN(txb_w, max_blocks_wide - blk_col);
    3961           0 :     tmp = 0;
    3962           0 :     for (idy = 0; idy < blocks_height; ++idy) {
    3963           0 :       for (idx = 0; idx < blocks_width; ++idx) {
    3964           0 :         const int16_t *d =
    3965           0 :             diff + ((idy * diff_stride + idx) << tx_size_wide_log2[0]);
    3966           0 :         tmp += sum_squares_2d(d, diff_stride, 0);
    3967             :       }
    3968             :     }
    3969             :   } else {
    3970           0 :     tmp = sum_squares_2d(diff, diff_stride, tx_size);
    3971             :   }
    3972             : 
    3973             : #if CONFIG_HIGHBITDEPTH
    3974           0 :   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
    3975           0 :     tmp = ROUND_POWER_OF_TWO(tmp, (xd->bd - 8) * 2);
    3976             : #endif  // CONFIG_HIGHBITDEPTH
    3977           0 :   rd_stats->sse += tmp * 16;
    3978           0 :   const int eob = p->eobs[block];
    3979             : 
    3980           0 :   av1_inverse_transform_block(xd, dqcoeff, tx_type, tx_size, rec_buffer,
    3981             :                               MAX_TX_SIZE, eob);
    3982           0 :   if (eob > 0) {
    3983           0 :     if (txb_w + blk_col > max_blocks_wide ||
    3984           0 :         txb_h + blk_row > max_blocks_high) {
    3985             :       int idx, idy;
    3986             :       unsigned int this_dist;
    3987           0 :       int blocks_height = AOMMIN(txb_h, max_blocks_high - blk_row);
    3988           0 :       int blocks_width = AOMMIN(txb_w, max_blocks_wide - blk_col);
    3989           0 :       tmp = 0;
    3990           0 :       for (idy = 0; idy < blocks_height; ++idy) {
    3991           0 :         for (idx = 0; idx < blocks_width; ++idx) {
    3992           0 :           uint8_t *const s =
    3993           0 :               src + ((idy * src_stride + idx) << tx_size_wide_log2[0]);
    3994           0 :           uint8_t *const r =
    3995           0 :               rec_buffer + ((idy * MAX_TX_SIZE + idx) << tx_size_wide_log2[0]);
    3996           0 :           cpi->fn_ptr[0].vf(s, src_stride, r, MAX_TX_SIZE, &this_dist);
    3997           0 :           tmp += this_dist;
    3998             :         }
    3999             :       }
    4000             :     } else {
    4001             :       uint32_t this_dist;
    4002           0 :       cpi->fn_ptr[txm_bsize].vf(src, src_stride, rec_buffer, MAX_TX_SIZE,
    4003             :                                 &this_dist);
    4004           0 :       tmp = this_dist;
    4005             :     }
    4006             :   }
    4007           0 :   rd_stats->dist += tmp * 16;
    4008           0 :   txb_coeff_cost =
    4009           0 :       av1_cost_coeffs(cpi, x, plane, block, tx_size, scan_order, a, l, 0);
    4010           0 :   rd_stats->rate += txb_coeff_cost;
    4011           0 :   rd_stats->skip &= (eob == 0);
    4012             : 
    4013             : #if CONFIG_RD_DEBUG
    4014             :   av1_update_txb_coeff_cost(rd_stats, plane, tx_size, blk_row, blk_col,
    4015             :                             txb_coeff_cost);
    4016             : #endif  // CONFIG_RD_DEBUG
    4017           0 : }
    4018             : 
    4019           0 : static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
    4020             :                             int blk_col, int plane, int block, int block32,
    4021             :                             TX_SIZE tx_size, int depth, BLOCK_SIZE plane_bsize,
    4022             :                             ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl,
    4023             :                             TXFM_CONTEXT *tx_above, TXFM_CONTEXT *tx_left,
    4024             :                             RD_STATS *rd_stats, int64_t ref_best_rd,
    4025             :                             int *is_cost_valid, RD_STATS *rd_stats_stack) {
    4026           0 :   MACROBLOCKD *const xd = &x->e_mbd;
    4027           0 :   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
    4028           0 :   struct macroblock_plane *const p = &x->plane[plane];
    4029           0 :   struct macroblockd_plane *const pd = &xd->plane[plane];
    4030           0 :   const int tx_row = blk_row >> (1 - pd->subsampling_y);
    4031           0 :   const int tx_col = blk_col >> (1 - pd->subsampling_x);
    4032           0 :   TX_SIZE(*const inter_tx_size)
    4033             :   [MAX_MIB_SIZE] =
    4034             :       (TX_SIZE(*)[MAX_MIB_SIZE]) & mbmi->inter_tx_size[tx_row][tx_col];
    4035           0 :   const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
    4036           0 :   const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
    4037           0 :   const int bw = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
    4038           0 :   int64_t this_rd = INT64_MAX;
    4039           0 :   ENTROPY_CONTEXT *pta = ta + blk_col;
    4040           0 :   ENTROPY_CONTEXT *ptl = tl + blk_row;
    4041             :   int coeff_ctx, i;
    4042           0 :   int ctx = txfm_partition_context(tx_above + blk_col, tx_left + blk_row,
    4043           0 :                                    mbmi->sb_type, tx_size);
    4044           0 :   int64_t sum_rd = INT64_MAX;
    4045           0 :   int tmp_eob = 0;
    4046             :   int zero_blk_rate;
    4047             :   RD_STATS sum_rd_stats;
    4048           0 :   const int tx_size_ctx = txsize_sqr_map[tx_size];
    4049             : 
    4050           0 :   av1_init_rd_stats(&sum_rd_stats);
    4051             : 
    4052           0 :   assert(tx_size < TX_SIZES_ALL);
    4053             : 
    4054           0 :   if (ref_best_rd < 0) {
    4055           0 :     *is_cost_valid = 0;
    4056           0 :     return;
    4057             :   }
    4058             : 
    4059           0 :   coeff_ctx = get_entropy_context(tx_size, pta, ptl);
    4060             : 
    4061           0 :   av1_init_rd_stats(rd_stats);
    4062             : 
    4063           0 :   if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
    4064             : 
    4065           0 :   zero_blk_rate = x->token_costs[tx_size_ctx][pd->plane_type][1][0][0]
    4066           0 :                                 [coeff_ctx][EOB_TOKEN];
    4067             : 
    4068           0 :   if (cpi->common.tx_mode == TX_MODE_SELECT || tx_size == TX_4X4) {
    4069           0 :     inter_tx_size[0][0] = tx_size;
    4070             : 
    4071           0 :     if (tx_size == TX_32X32 && mbmi->tx_type != DCT_DCT &&
    4072           0 :         rd_stats_stack[block32].rate != INT_MAX) {
    4073           0 :       *rd_stats = rd_stats_stack[block32];
    4074           0 :       p->eobs[block] = !rd_stats->skip;
    4075           0 :       x->blk_skip[plane][blk_row * bw + blk_col] = rd_stats->skip;
    4076             :     } else {
    4077           0 :       av1_tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
    4078             :                         plane_bsize, pta, ptl, rd_stats);
    4079           0 :       if (tx_size == TX_32X32) {
    4080           0 :         rd_stats_stack[block32] = *rd_stats;
    4081             :       }
    4082             :     }
    4083             : 
    4084           0 :     if ((RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist) >=
    4085           0 :              RDCOST(x->rdmult, x->rddiv, zero_blk_rate, rd_stats->sse) ||
    4086           0 :          rd_stats->skip == 1) &&
    4087           0 :         !xd->lossless[mbmi->segment_id]) {
    4088             : #if CONFIG_RD_DEBUG
    4089             :       av1_update_txb_coeff_cost(rd_stats, plane, tx_size, blk_row, blk_col,
    4090             :                                 zero_blk_rate - rd_stats->rate);
    4091             : #endif  // CONFIG_RD_DEBUG
    4092           0 :       rd_stats->rate = zero_blk_rate;
    4093           0 :       rd_stats->dist = rd_stats->sse;
    4094           0 :       rd_stats->skip = 1;
    4095           0 :       x->blk_skip[plane][blk_row * bw + blk_col] = 1;
    4096           0 :       p->eobs[block] = 0;
    4097             :     } else {
    4098           0 :       x->blk_skip[plane][blk_row * bw + blk_col] = 0;
    4099           0 :       rd_stats->skip = 0;
    4100             :     }
    4101             : 
    4102           0 :     if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH)
    4103           0 :       rd_stats->rate +=
    4104           0 :           av1_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 0);
    4105           0 :     this_rd = RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist);
    4106           0 :     tmp_eob = p->eobs[block];
    4107             :   }
    4108             : 
    4109           0 :   if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH) {
    4110           0 :     const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
    4111           0 :     const int bsl = tx_size_wide_unit[sub_txs];
    4112           0 :     int sub_step = tx_size_wide_unit[sub_txs] * tx_size_high_unit[sub_txs];
    4113             :     RD_STATS this_rd_stats;
    4114           0 :     int this_cost_valid = 1;
    4115           0 :     int64_t tmp_rd = 0;
    4116             : 
    4117           0 :     sum_rd_stats.rate =
    4118           0 :         av1_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 1);
    4119             : 
    4120           0 :     assert(tx_size < TX_SIZES_ALL);
    4121             : 
    4122           0 :     for (i = 0; i < 4 && this_cost_valid; ++i) {
    4123           0 :       int offsetr = blk_row + (i >> 1) * bsl;
    4124           0 :       int offsetc = blk_col + (i & 0x01) * bsl;
    4125             : 
    4126           0 :       if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
    4127             : 
    4128           0 :       select_tx_block(cpi, x, offsetr, offsetc, plane, block, block32, sub_txs,
    4129             :                       depth + 1, plane_bsize, ta, tl, tx_above, tx_left,
    4130             :                       &this_rd_stats, ref_best_rd - tmp_rd, &this_cost_valid,
    4131             :                       rd_stats_stack);
    4132             : 
    4133           0 :       av1_merge_rd_stats(&sum_rd_stats, &this_rd_stats);
    4134             : 
    4135           0 :       tmp_rd =
    4136           0 :           RDCOST(x->rdmult, x->rddiv, sum_rd_stats.rate, sum_rd_stats.dist);
    4137           0 :       if (this_rd < tmp_rd) break;
    4138           0 :       block += sub_step;
    4139             :     }
    4140           0 :     if (this_cost_valid) sum_rd = tmp_rd;
    4141             :   }
    4142             : 
    4143           0 :   if (this_rd < sum_rd) {
    4144             :     int idx, idy;
    4145           0 :     for (i = 0; i < tx_size_wide_unit[tx_size]; ++i) pta[i] = !(tmp_eob == 0);
    4146           0 :     for (i = 0; i < tx_size_high_unit[tx_size]; ++i) ptl[i] = !(tmp_eob == 0);
    4147           0 :     txfm_partition_update(tx_above + blk_col, tx_left + blk_row, tx_size,
    4148             :                           tx_size);
    4149           0 :     inter_tx_size[0][0] = tx_size;
    4150           0 :     for (idy = 0; idy < tx_size_high_unit[tx_size] / 2; ++idy)
    4151           0 :       for (idx = 0; idx < tx_size_wide_unit[tx_size] / 2; ++idx)
    4152           0 :         inter_tx_size[idy][idx] = tx_size;
    4153           0 :     mbmi->tx_size = tx_size;
    4154           0 :     if (this_rd == INT64_MAX) *is_cost_valid = 0;
    4155           0 :     x->blk_skip[plane][blk_row * bw + blk_col] = rd_stats->skip;
    4156             :   } else {
    4157           0 :     *rd_stats = sum_rd_stats;
    4158           0 :     if (sum_rd == INT64_MAX) *is_cost_valid = 0;
    4159             :   }
    4160             : }
    4161             : 
    4162           0 : static void inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
    4163             :                             RD_STATS *rd_stats, BLOCK_SIZE bsize,
    4164             :                             int64_t ref_best_rd, RD_STATS *rd_stats_stack) {
    4165           0 :   MACROBLOCKD *const xd = &x->e_mbd;
    4166           0 :   int is_cost_valid = 1;
    4167           0 :   int64_t this_rd = 0;
    4168             : 
    4169           0 :   if (ref_best_rd < 0) is_cost_valid = 0;
    4170             : 
    4171           0 :   av1_init_rd_stats(rd_stats);
    4172             : 
    4173           0 :   if (is_cost_valid) {
    4174           0 :     const struct macroblockd_plane *const pd = &xd->plane[0];
    4175           0 :     const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
    4176           0 :     const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
    4177           0 :     const int mi_height = block_size_high[plane_bsize] >> tx_size_high_log2[0];
    4178           0 :     const TX_SIZE max_tx_size = max_txsize_rect_lookup[plane_bsize];
    4179           0 :     const int bh = tx_size_high_unit[max_tx_size];
    4180           0 :     const int bw = tx_size_wide_unit[max_tx_size];
    4181             :     int idx, idy;
    4182           0 :     int block = 0;
    4183           0 :     int block32 = 0;
    4184           0 :     int step = tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size];
    4185             :     ENTROPY_CONTEXT ctxa[2 * MAX_MIB_SIZE];
    4186             :     ENTROPY_CONTEXT ctxl[2 * MAX_MIB_SIZE];
    4187             :     TXFM_CONTEXT tx_above[MAX_MIB_SIZE * 2];
    4188             :     TXFM_CONTEXT tx_left[MAX_MIB_SIZE * 2];
    4189             : 
    4190             :     RD_STATS pn_rd_stats;
    4191           0 :     av1_init_rd_stats(&pn_rd_stats);
    4192             : 
    4193           0 :     av1_get_entropy_contexts(bsize, 0, pd, ctxa, ctxl);
    4194           0 :     memcpy(tx_above, xd->above_txfm_context, sizeof(TXFM_CONTEXT) * mi_width);
    4195           0 :     memcpy(tx_left, xd->left_txfm_context, sizeof(TXFM_CONTEXT) * mi_height);
    4196             : 
    4197           0 :     for (idy = 0; idy < mi_height; idy += bh) {
    4198           0 :       for (idx = 0; idx < mi_width; idx += bw) {
    4199           0 :         select_tx_block(cpi, x, idy, idx, 0, block, block32, max_tx_size,
    4200             :                         mi_height != mi_width, plane_bsize, ctxa, ctxl,
    4201             :                         tx_above, tx_left, &pn_rd_stats, ref_best_rd - this_rd,
    4202             :                         &is_cost_valid, rd_stats_stack);
    4203           0 :         av1_merge_rd_stats(rd_stats, &pn_rd_stats);
    4204           0 :         this_rd += AOMMIN(
    4205             :             RDCOST(x->rdmult, x->rddiv, pn_rd_stats.rate, pn_rd_stats.dist),
    4206             :             RDCOST(x->rdmult, x->rddiv, 0, pn_rd_stats.sse));
    4207           0 :         block += step;
    4208           0 :         ++block32;
    4209             :       }
    4210             :     }
    4211             :   }
    4212             : 
    4213           0 :   this_rd = AOMMIN(RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist),
    4214             :                    RDCOST(x->rdmult, x->rddiv, 0, rd_stats->sse));
    4215           0 :   if (this_rd > ref_best_rd) is_cost_valid = 0;
    4216             : 
    4217           0 :   if (!is_cost_valid) {
    4218             :     // reset cost value
    4219           0 :     av1_invalid_rd_stats(rd_stats);
    4220             :   }
    4221           0 : }
    4222             : 
    4223           0 : static int64_t select_tx_size_fix_type(const AV1_COMP *cpi, MACROBLOCK *x,
    4224             :                                        RD_STATS *rd_stats, BLOCK_SIZE bsize,
    4225             :                                        int64_t ref_best_rd, TX_TYPE tx_type,
    4226             :                                        RD_STATS *rd_stats_stack) {
    4227           0 :   const AV1_COMMON *const cm = &cpi->common;
    4228           0 :   MACROBLOCKD *const xd = &x->e_mbd;
    4229           0 :   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
    4230           0 :   const int is_inter = is_inter_block(mbmi);
    4231           0 :   aom_prob skip_prob = av1_get_skip_prob(cm, xd);
    4232           0 :   int s0 = av1_cost_bit(skip_prob, 0);
    4233           0 :   int s1 = av1_cost_bit(skip_prob, 1);
    4234             :   int64_t rd;
    4235             :   int row, col;
    4236           0 :   const int max_blocks_high = max_block_high(xd, bsize, 0);
    4237           0 :   const int max_blocks_wide = max_block_wide(xd, bsize, 0);
    4238             : 
    4239           0 :   mbmi->tx_type = tx_type;
    4240           0 :   inter_block_yrd(cpi, x, rd_stats, bsize, ref_best_rd, rd_stats_stack);
    4241           0 :   mbmi->min_tx_size = get_min_tx_size(mbmi->inter_tx_size[0][0]);
    4242             : 
    4243           0 :   if (rd_stats->rate == INT_MAX) return INT64_MAX;
    4244             : 
    4245           0 :   for (row = 0; row < max_blocks_high / 2; ++row)
    4246           0 :     for (col = 0; col < max_blocks_wide / 2; ++col)
    4247           0 :       mbmi->min_tx_size = AOMMIN(
    4248             :           mbmi->min_tx_size, get_min_tx_size(mbmi->inter_tx_size[row][col]));
    4249             : 
    4250             : #if CONFIG_EXT_TX
    4251           0 :   if (get_ext_tx_types(mbmi->min_tx_size, bsize, is_inter,
    4252           0 :                        cm->reduced_tx_set_used) > 1 &&
    4253           0 :       !xd->lossless[xd->mi[0]->mbmi.segment_id]) {
    4254           0 :     const int ext_tx_set = get_ext_tx_set(mbmi->min_tx_size, bsize, is_inter,
    4255             :                                           cm->reduced_tx_set_used);
    4256           0 :     if (is_inter) {
    4257           0 :       if (ext_tx_set > 0)
    4258           0 :         rd_stats->rate +=
    4259             :             cpi->inter_tx_type_costs[ext_tx_set]
    4260           0 :                                     [txsize_sqr_map[mbmi->min_tx_size]]
    4261           0 :                                     [mbmi->tx_type];
    4262             :     } else {
    4263           0 :       if (ext_tx_set > 0 && ALLOW_INTRA_EXT_TX)
    4264           0 :         rd_stats->rate +=
    4265           0 :             cpi->intra_tx_type_costs[ext_tx_set][mbmi->min_tx_size][mbmi->mode]
    4266           0 :                                     [mbmi->tx_type];
    4267             :     }
    4268             :   }
    4269             : #else   // CONFIG_EXT_TX
    4270             :   if (mbmi->min_tx_size < TX_32X32 && !xd->lossless[xd->mi[0]->mbmi.segment_id])
    4271             :     rd_stats->rate +=
    4272             :         cpi->inter_tx_type_costs[mbmi->min_tx_size][mbmi->tx_type];
    4273             : #endif  // CONFIG_EXT_TX
    4274             : 
    4275           0 :   if (rd_stats->skip)
    4276           0 :     rd = RDCOST(x->rdmult, x->rddiv, s1, rd_stats->sse);
    4277             :   else
    4278           0 :     rd = RDCOST(x->rdmult, x->rddiv, rd_stats->rate + s0, rd_stats->dist);
    4279             : 
    4280           0 :   if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
    4281           0 :       !(rd_stats->skip))
    4282           0 :     rd = AOMMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, rd_stats->sse));
    4283             : 
    4284           0 :   return rd;
    4285             : }
    4286             : 
    4287           0 : static void select_tx_type_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
    4288             :                                RD_STATS *rd_stats, BLOCK_SIZE bsize,
    4289             :                                int64_t ref_best_rd) {
    4290           0 :   const AV1_COMMON *cm = &cpi->common;
    4291           0 :   const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
    4292           0 :   MACROBLOCKD *const xd = &x->e_mbd;
    4293           0 :   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
    4294           0 :   int64_t rd = INT64_MAX;
    4295           0 :   int64_t best_rd = INT64_MAX;
    4296           0 :   TX_TYPE tx_type, best_tx_type = DCT_DCT;
    4297           0 :   const int is_inter = is_inter_block(mbmi);
    4298             :   TX_SIZE best_tx_size[MAX_MIB_SIZE][MAX_MIB_SIZE];
    4299           0 :   TX_SIZE best_tx = max_txsize_lookup[bsize];
    4300           0 :   TX_SIZE best_min_tx_size = TX_SIZES_ALL;
    4301             :   uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE * 8];
    4302           0 :   const int n4 = bsize_to_num_blk(bsize);
    4303             :   int idx, idy;
    4304           0 :   int prune = 0;
    4305           0 :   const int count32 =
    4306           0 :       1 << (2 * (cm->mib_size_log2 - mi_width_log2_lookup[BLOCK_32X32]));
    4307             : #if CONFIG_EXT_PARTITION
    4308             :   RD_STATS rd_stats_stack[16];
    4309             : #else
    4310             :   RD_STATS rd_stats_stack[4];
    4311             : #endif  // CONFIG_EXT_PARTITION
    4312             : #if CONFIG_EXT_TX
    4313           0 :   const int ext_tx_set =
    4314           0 :       get_ext_tx_set(max_tx_size, bsize, is_inter, cm->reduced_tx_set_used);
    4315             : #endif  // CONFIG_EXT_TX
    4316             : 
    4317           0 :   if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE)
    4318             : #if CONFIG_EXT_TX
    4319           0 :     prune = prune_tx_types(cpi, bsize, x, xd, ext_tx_set);
    4320             : #else
    4321             :     prune = prune_tx_types(cpi, bsize, x, xd, 0);
    4322             : #endif  // CONFIG_EXT_TX
    4323             : 
    4324           0 :   av1_invalid_rd_stats(rd_stats);
    4325             : 
    4326           0 :   for (idx = 0; idx < count32; ++idx)
    4327           0 :     av1_invalid_rd_stats(&rd_stats_stack[idx]);
    4328             : 
    4329           0 :   for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
    4330             :     RD_STATS this_rd_stats;
    4331           0 :     av1_init_rd_stats(&this_rd_stats);
    4332             : #if CONFIG_EXT_TX
    4333           0 :     if (is_inter) {
    4334           0 :       if (!ext_tx_used_inter[ext_tx_set][tx_type]) continue;
    4335           0 :       if (cpi->sf.tx_type_search.prune_mode > NO_PRUNE) {
    4336           0 :         if (!do_tx_type_search(tx_type, prune)) continue;
    4337             :       }
    4338             :     } else {
    4339             :       if (!ALLOW_INTRA_EXT_TX && bsize >= BLOCK_8X8) {
    4340             :         if (tx_type != intra_mode_to_tx_type_context[mbmi->mode]) continue;
    4341             :       }
    4342           0 :       if (!ext_tx_used_intra[ext_tx_set][tx_type]) continue;
    4343             :     }
    4344             : #else   // CONFIG_EXT_TX
    4345             :     if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE &&
    4346             :         !do_tx_type_search(tx_type, prune))
    4347             :       continue;
    4348             : #endif  // CONFIG_EXT_TX
    4349           0 :     if (is_inter && x->use_default_inter_tx_type &&
    4350           0 :         tx_type != get_default_tx_type(0, xd, 0, max_tx_size))
    4351           0 :       continue;
    4352             : 
    4353           0 :     if (xd->lossless[mbmi->segment_id])
    4354           0 :       if (tx_type != DCT_DCT) continue;
    4355             : 
    4356           0 :     rd = select_tx_size_fix_type(cpi, x, &this_rd_stats, bsize, ref_best_rd,
    4357             :                                  tx_type, rd_stats_stack);
    4358             : 
    4359           0 :     if (rd < best_rd) {
    4360           0 :       best_rd = rd;
    4361           0 :       *rd_stats = this_rd_stats;
    4362           0 :       best_tx_type = mbmi->tx_type;
    4363           0 :       best_tx = mbmi->tx_size;
    4364           0 :       best_min_tx_size = mbmi->min_tx_size;
    4365           0 :       memcpy(best_blk_skip, x->blk_skip[0], sizeof(best_blk_skip[0]) * n4);
    4366           0 :       for (idy = 0; idy < xd->n8_h; ++idy)
    4367           0 :         for (idx = 0; idx < xd->n8_w; ++idx)
    4368           0 :           best_tx_size[idy][idx] = mbmi->inter_tx_size[idy][idx];
    4369             :     }
    4370             :   }
    4371             : 
    4372           0 :   mbmi->tx_type = best_tx_type;
    4373           0 :   for (idy = 0; idy < xd->n8_h; ++idy)
    4374           0 :     for (idx = 0; idx < xd->n8_w; ++idx)
    4375           0 :       mbmi->inter_tx_size[idy][idx] = best_tx_size[idy][idx];
    4376           0 :   mbmi->tx_size = best_tx;
    4377           0 :   mbmi->min_tx_size = best_min_tx_size;
    4378           0 :   memcpy(x->blk_skip[0], best_blk_skip, sizeof(best_blk_skip[0]) * n4);
    4379           0 : }
    4380             : 
    4381           0 : static void tx_block_rd(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
    4382             :                         int blk_col, int plane, int block, TX_SIZE tx_size,
    4383             :                         BLOCK_SIZE plane_bsize, ENTROPY_CONTEXT *above_ctx,
    4384             :                         ENTROPY_CONTEXT *left_ctx, RD_STATS *rd_stats) {
    4385           0 :   MACROBLOCKD *const xd = &x->e_mbd;
    4386           0 :   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
    4387           0 :   struct macroblock_plane *const p = &x->plane[plane];
    4388           0 :   struct macroblockd_plane *const pd = &xd->plane[plane];
    4389           0 :   BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
    4390           0 :   const int tx_row = blk_row >> (1 - pd->subsampling_y);
    4391           0 :   const int tx_col = blk_col >> (1 - pd->subsampling_x);
    4392             :   TX_SIZE plane_tx_size;
    4393           0 :   const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
    4394           0 :   const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
    4395             : 
    4396           0 :   assert(tx_size < TX_SIZES_ALL);
    4397             : 
    4398           0 :   if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
    4399             : 
    4400           0 :   plane_tx_size =
    4401           0 :       plane ? uv_txsize_lookup[bsize][mbmi->inter_tx_size[tx_row][tx_col]][0][0]
    4402           0 :             : mbmi->inter_tx_size[tx_row][tx_col];
    4403             : 
    4404           0 :   if (tx_size == plane_tx_size) {
    4405             :     int i;
    4406           0 :     ENTROPY_CONTEXT *ta = above_ctx + blk_col;
    4407           0 :     ENTROPY_CONTEXT *tl = left_ctx + blk_row;
    4408           0 :     av1_tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
    4409             :                       plane_bsize, ta, tl, rd_stats);
    4410             : 
    4411           0 :     for (i = 0; i < tx_size_wide_unit[tx_size]; ++i)
    4412           0 :       ta[i] = !(p->eobs[block] == 0);
    4413           0 :     for (i = 0; i < tx_size_high_unit[tx_size]; ++i)
    4414           0 :       tl[i] = !(p->eobs[block] == 0);
    4415             :   } else {
    4416           0 :     const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
    4417           0 :     const int bsl = tx_size_wide_unit[sub_txs];
    4418           0 :     int step = tx_size_wide_unit[sub_txs] * tx_size_high_unit[sub_txs];
    4419             :     int i;
    4420             : 
    4421           0 :     assert(bsl > 0);
    4422             : 
    4423           0 :     for (i = 0; i < 4; ++i) {
    4424           0 :       int offsetr = blk_row + (i >> 1) * bsl;
    4425           0 :       int offsetc = blk_col + (i & 0x01) * bsl;
    4426             : 
    4427           0 :       if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
    4428             : 
    4429           0 :       tx_block_rd(cpi, x, offsetr, offsetc, plane, block, sub_txs, plane_bsize,
    4430             :                   above_ctx, left_ctx, rd_stats);
    4431           0 :       block += step;
    4432             :     }
    4433             :   }
    4434             : }
    4435             : 
    4436             : // Return value 0: early termination triggered, no valid rd cost available;
    4437             : //              1: rd cost values are valid.
    4438           0 : static int inter_block_uvrd(const AV1_COMP *cpi, MACROBLOCK *x,
    4439             :                             RD_STATS *rd_stats, BLOCK_SIZE bsize,
    4440             :                             int64_t ref_best_rd) {
    4441           0 :   MACROBLOCKD *const xd = &x->e_mbd;
    4442           0 :   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
    4443             :   int plane;
    4444           0 :   int is_cost_valid = 1;
    4445             :   int64_t this_rd;
    4446             : 
    4447           0 :   if (ref_best_rd < 0) is_cost_valid = 0;
    4448             : 
    4449           0 :   av1_init_rd_stats(rd_stats);
    4450             : 
    4451             : #if CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
    4452           0 :   if (x->skip_chroma_rd) return is_cost_valid;
    4453           0 :   bsize = scale_chroma_bsize(mbmi->sb_type, xd->plane[1].subsampling_x,
    4454             :                              xd->plane[1].subsampling_y);
    4455             : #endif  // CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
    4456             : 
    4457             : #if CONFIG_EXT_TX && CONFIG_RECT_TX
    4458           0 :   if (is_rect_tx(mbmi->tx_size)) {
    4459           0 :     return super_block_uvrd(cpi, x, rd_stats, bsize, ref_best_rd);
    4460             :   }
    4461             : #endif  // CONFIG_EXT_TX && CONFIG_RECT_TX
    4462             : 
    4463           0 :   if (is_inter_block(mbmi) && is_cost_valid) {
    4464           0 :     for (plane = 1; plane < MAX_MB_PLANE; ++plane)
    4465           0 :       av1_subtract_plane(x, bsize, plane);
    4466             :   }
    4467             : 
    4468           0 :   for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
    4469           0 :     const struct macroblockd_plane *const pd = &xd->plane[plane];
    4470           0 :     const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
    4471           0 :     const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
    4472           0 :     const int mi_height = block_size_high[plane_bsize] >> tx_size_high_log2[0];
    4473           0 :     const TX_SIZE max_tx_size = max_txsize_rect_lookup[plane_bsize];
    4474           0 :     const int bh = tx_size_high_unit[max_tx_size];
    4475           0 :     const int bw = tx_size_wide_unit[max_tx_size];
    4476             :     int idx, idy;
    4477           0 :     int block = 0;
    4478           0 :     const int step = bh * bw;
    4479             :     ENTROPY_CONTEXT ta[2 * MAX_MIB_SIZE];
    4480             :     ENTROPY_CONTEXT tl[2 * MAX_MIB_SIZE];
    4481             :     RD_STATS pn_rd_stats;
    4482           0 :     av1_init_rd_stats(&pn_rd_stats);
    4483             : 
    4484           0 :     av1_get_entropy_contexts(bsize, 0, pd, ta, tl);
    4485             : 
    4486           0 :     for (idy = 0; idy < mi_height; idy += bh) {
    4487           0 :       for (idx = 0; idx < mi_width; idx += bw) {
    4488           0 :         tx_block_rd(cpi, x, idy, idx, plane, block, max_tx_size, plane_bsize,
    4489             :                     ta, tl, &pn_rd_stats);
    4490           0 :         block += step;
    4491             :       }
    4492             :     }
    4493             : 
    4494           0 :     if (pn_rd_stats.rate == INT_MAX) {
    4495           0 :       is_cost_valid = 0;
    4496           0 :       break;
    4497             :     }
    4498             : 
    4499           0 :     av1_merge_rd_stats(rd_stats, &pn_rd_stats);
    4500             : 
    4501           0 :     this_rd =
    4502           0 :         AOMMIN(RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist),
    4503             :                RDCOST(x->rdmult, x->rddiv, 0, rd_stats->sse));
    4504             : 
    4505           0 :     if (this_rd > ref_best_rd) {
    4506           0 :       is_cost_valid = 0;
    4507           0 :       break;
    4508             :     }
    4509             :   }
    4510             : 
    4511           0 :   if (!is_cost_valid) {
    4512             :     // reset cost value
    4513           0 :     av1_invalid_rd_stats(rd_stats);
    4514             :   }
    4515             : 
    4516           0 :   return is_cost_valid;
    4517             : }
    4518             : #endif  // CONFIG_VAR_TX
    4519             : 
    4520             : #if CONFIG_PALETTE
    4521           0 : static void rd_pick_palette_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
    4522             :                                        int dc_mode_cost,
    4523             :                                        uint8_t *best_palette_color_map,
    4524             :                                        MB_MODE_INFO *const best_mbmi,
    4525             :                                        int64_t *best_rd, int *rate,
    4526             :                                        int *rate_tokenonly, int64_t *distortion,
    4527             :                                        int *skippable) {
    4528           0 :   MACROBLOCKD *const xd = &x->e_mbd;
    4529           0 :   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
    4530           0 :   assert(!is_inter_block(mbmi));
    4531           0 :   PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
    4532           0 :   const BLOCK_SIZE bsize = mbmi->sb_type;
    4533             :   int this_rate;
    4534             :   int64_t this_rd;
    4535             :   int colors_u, colors_v, colors;
    4536           0 :   const int src_stride = x->plane[1].src.stride;
    4537           0 :   const uint8_t *const src_u = x->plane[1].src.buf;
    4538           0 :   const uint8_t *const src_v = x->plane[2].src.buf;
    4539           0 :   uint8_t *const color_map = xd->plane[1].color_index_map;
    4540             :   RD_STATS tokenonly_rd_stats;
    4541             :   int plane_block_width, plane_block_height, rows, cols;
    4542           0 :   av1_get_block_dimensions(bsize, 1, xd, &plane_block_width,
    4543             :                            &plane_block_height, &rows, &cols);
    4544           0 :   if (rows * cols > PALETTE_MAX_BLOCK_SIZE) return;
    4545             : 
    4546           0 :   mbmi->uv_mode = DC_PRED;
    4547             : #if CONFIG_FILTER_INTRA
    4548             :   mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
    4549             : #endif  // CONFIG_FILTER_INTRA
    4550             : 
    4551             : #if CONFIG_HIGHBITDEPTH
    4552           0 :   if (cpi->common.use_highbitdepth) {
    4553           0 :     colors_u = av1_count_colors_highbd(src_u, src_stride, rows, cols,
    4554           0 :                                        cpi->common.bit_depth);
    4555           0 :     colors_v = av1_count_colors_highbd(src_v, src_stride, rows, cols,
    4556           0 :                                        cpi->common.bit_depth);
    4557             :   } else {
    4558             : #endif  // CONFIG_HIGHBITDEPTH
    4559           0 :     colors_u = av1_count_colors(src_u, src_stride, rows, cols);
    4560           0 :     colors_v = av1_count_colors(src_v, src_stride, rows, cols);
    4561             : #if CONFIG_HIGHBITDEPTH
    4562             :   }
    4563             : #endif  // CONFIG_HIGHBITDEPTH
    4564             : 
    4565             : #if CONFIG_PALETTE_DELTA_ENCODING
    4566             :   const MODE_INFO *above_mi = xd->above_mi;
    4567             :   const MODE_INFO *left_mi = xd->left_mi;
    4568             :   uint16_t color_cache[2 * PALETTE_MAX_SIZE];
    4569             :   const int n_cache = av1_get_palette_cache(above_mi, left_mi, 1, color_cache);
    4570             : #endif  // CONFIG_PALETTE_DELTA_ENCODING
    4571             : 
    4572           0 :   colors = colors_u > colors_v ? colors_u : colors_v;
    4573           0 :   if (colors > 1 && colors <= 64) {
    4574             :     int r, c, n, i, j;
    4575           0 :     const int max_itr = 50;
    4576             :     uint8_t color_order[PALETTE_MAX_SIZE];
    4577             :     float lb_u, ub_u, val_u;
    4578             :     float lb_v, ub_v, val_v;
    4579           0 :     float *const data = x->palette_buffer->kmeans_data_buf;
    4580             :     float centroids[2 * PALETTE_MAX_SIZE];
    4581             : 
    4582             : #if CONFIG_HIGHBITDEPTH
    4583           0 :     uint16_t *src_u16 = CONVERT_TO_SHORTPTR(src_u);
    4584           0 :     uint16_t *src_v16 = CONVERT_TO_SHORTPTR(src_v);
    4585           0 :     if (cpi->common.use_highbitdepth) {
    4586           0 :       lb_u = src_u16[0];
    4587           0 :       ub_u = src_u16[0];
    4588           0 :       lb_v = src_v16[0];
    4589           0 :       ub_v = src_v16[0];
    4590             :     } else {
    4591             : #endif  // CONFIG_HIGHBITDEPTH
    4592           0 :       lb_u = src_u[0];
    4593           0 :       ub_u = src_u[0];
    4594           0 :       lb_v = src_v[0];
    4595           0 :       ub_v = src_v[0];
    4596             : #if CONFIG_HIGHBITDEPTH
    4597             :     }
    4598             : #endif  // CONFIG_HIGHBITDEPTH
    4599             : 
    4600           0 :     for (r = 0; r < rows; ++r) {
    4601           0 :       for (c = 0; c < cols; ++c) {
    4602             : #if CONFIG_HIGHBITDEPTH
    4603           0 :         if (cpi->common.use_highbitdepth) {
    4604           0 :           val_u = src_u16[r * src_stride + c];
    4605           0 :           val_v = src_v16[r * src_stride + c];
    4606           0 :           data[(r * cols + c) * 2] = val_u;
    4607           0 :           data[(r * cols + c) * 2 + 1] = val_v;
    4608             :         } else {
    4609             : #endif  // CONFIG_HIGHBITDEPTH
    4610           0 :           val_u = src_u[r * src_stride + c];
    4611           0 :           val_v = src_v[r * src_stride + c];
    4612           0 :           data[(r * cols + c) * 2] = val_u;
    4613           0 :           data[(r * cols + c) * 2 + 1] = val_v;
    4614             : #if CONFIG_HIGHBITDEPTH
    4615             :         }
    4616             : #endif  // CONFIG_HIGHBITDEPTH
    4617           0 :         if (val_u < lb_u)
    4618           0 :           lb_u = val_u;
    4619           0 :         else if (val_u > ub_u)
    4620           0 :           ub_u = val_u;
    4621           0 :         if (val_v < lb_v)
    4622           0 :           lb_v = val_v;
    4623           0 :         else if (val_v > ub_v)
    4624           0 :           ub_v = val_v;
    4625             :       }
    4626             :     }
    4627             : 
    4628           0 :     for (n = colors > PALETTE_MAX_SIZE ? PALETTE_MAX_SIZE : colors; n >= 2;
    4629           0 :          --n) {
    4630           0 :       for (i = 0; i < n; ++i) {
    4631           0 :         centroids[i * 2] = lb_u + (2 * i + 1) * (ub_u - lb_u) / n / 2;
    4632           0 :         centroids[i * 2 + 1] = lb_v + (2 * i + 1) * (ub_v - lb_v) / n / 2;
    4633             :       }
    4634           0 :       av1_k_means(data, centroids, color_map, rows * cols, n, 2, max_itr);
    4635             : #if CONFIG_PALETTE_DELTA_ENCODING
    4636             :       optimize_palette_colors(color_cache, n_cache, n, 2, centroids);
    4637             :       // Sort the U channel colors in ascending order.
    4638             :       for (i = 0; i < 2 * (n - 1); i += 2) {
    4639             :         int min_idx = i;
    4640             :         float min_val = centroids[i];
    4641             :         for (j = i + 2; j < 2 * n; j += 2)
    4642             :           if (centroids[j] < min_val) min_val = centroids[j], min_idx = j;
    4643             :         if (min_idx != i) {
    4644             :           float temp_u = centroids[i], temp_v = centroids[i + 1];
    4645             :           centroids[i] = centroids[min_idx];
    4646             :           centroids[i + 1] = centroids[min_idx + 1];
    4647             :           centroids[min_idx] = temp_u, centroids[min_idx + 1] = temp_v;
    4648             :         }
    4649             :       }
    4650             :       av1_calc_indices(data, centroids, color_map, rows * cols, n, 2);
    4651             : #endif  // CONFIG_PALETTE_DELTA_ENCODING
    4652           0 :       extend_palette_color_map(color_map, cols, rows, plane_block_width,
    4653             :                                plane_block_height);
    4654           0 :       pmi->palette_size[1] = n;
    4655           0 :       for (i = 1; i < 3; ++i) {
    4656           0 :         for (j = 0; j < n; ++j) {
    4657             : #if CONFIG_HIGHBITDEPTH
    4658           0 :           if (cpi->common.use_highbitdepth)
    4659           0 :             pmi->palette_colors[i * PALETTE_MAX_SIZE + j] = clip_pixel_highbd(
    4660           0 :                 (int)centroids[j * 2 + i - 1], cpi->common.bit_depth);
    4661             :           else
    4662             : #endif  // CONFIG_HIGHBITDEPTH
    4663           0 :             pmi->palette_colors[i * PALETTE_MAX_SIZE + j] =
    4664           0 :                 clip_pixel((int)centroids[j * 2 + i - 1]);
    4665             :         }
    4666             :       }
    4667             : 
    4668           0 :       super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd);
    4669           0 :       if (tokenonly_rd_stats.rate == INT_MAX) continue;
    4670           0 :       this_rate =
    4671           0 :           tokenonly_rd_stats.rate + dc_mode_cost +
    4672           0 :           cpi->palette_uv_size_cost[bsize - BLOCK_8X8][n - PALETTE_MIN_SIZE] +
    4673           0 :           write_uniform_cost(n, color_map[0]) +
    4674           0 :           av1_cost_bit(
    4675             :               av1_default_palette_uv_mode_prob[pmi->palette_size[0] > 0], 1);
    4676           0 :       this_rate += av1_palette_color_cost_uv(pmi,
    4677             : #if CONFIG_PALETTE_DELTA_ENCODING
    4678             :                                              color_cache, n_cache,
    4679             : #endif  // CONFIG_PALETTE_DELTA_ENCODING
    4680           0 :                                              cpi->common.bit_depth);
    4681           0 :       for (i = 0; i < rows; ++i) {
    4682           0 :         for (j = (i == 0 ? 1 : 0); j < cols; ++j) {
    4683             :           int color_idx;
    4684           0 :           const int color_ctx = av1_get_palette_color_index_context(
    4685             :               color_map, plane_block_width, i, j, n, color_order, &color_idx);
    4686           0 :           assert(color_idx >= 0 && color_idx < n);
    4687           0 :           this_rate += cpi->palette_uv_color_cost[n - PALETTE_MIN_SIZE]
    4688           0 :                                                  [color_ctx][color_idx];
    4689             :         }
    4690             :       }
    4691             : 
    4692           0 :       this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist);
    4693           0 :       if (this_rd < *best_rd) {
    4694           0 :         *best_rd = this_rd;
    4695           0 :         *best_mbmi = *mbmi;
    4696           0 :         memcpy(best_palette_color_map, color_map,
    4697           0 :                plane_block_width * plane_block_height *
    4698             :                    sizeof(best_palette_color_map[0]));
    4699           0 :         *rate = this_rate;
    4700           0 :         *distortion = tokenonly_rd_stats.dist;
    4701           0 :         *rate_tokenonly = tokenonly_rd_stats.rate;
    4702           0 :         *skippable = tokenonly_rd_stats.skip;
    4703             :       }
    4704             :     }
    4705             :   }
    4706           0 :   if (best_mbmi->palette_mode_info.palette_size[1] > 0) {
    4707           0 :     memcpy(color_map, best_palette_color_map,
    4708           0 :            rows * cols * sizeof(best_palette_color_map[0]));
    4709             :   }
    4710             : }
    4711             : #endif  // CONFIG_PALETTE
    4712             : 
    4713             : #if CONFIG_FILTER_INTRA
    4714             : // Return 1 if an filter intra mode is selected; return 0 otherwise.
    4715             : static int rd_pick_filter_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
    4716             :                                      int *rate, int *rate_tokenonly,
    4717             :                                      int64_t *distortion, int *skippable,
    4718             :                                      BLOCK_SIZE bsize, int64_t *best_rd) {
    4719             :   MACROBLOCKD *const xd = &x->e_mbd;
    4720             :   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
    4721             :   int filter_intra_selected_flag = 0;
    4722             :   int this_rate;
    4723             :   int64_t this_rd;
    4724             :   FILTER_INTRA_MODE mode;
    4725             :   FILTER_INTRA_MODE_INFO filter_intra_mode_info;
    4726             :   RD_STATS tokenonly_rd_stats;
    4727             : 
    4728             :   av1_zero(filter_intra_mode_info);
    4729             :   mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 1;
    4730             :   mbmi->uv_mode = DC_PRED;
    4731             : #if CONFIG_PALETTE
    4732             :   mbmi->palette_mode_info.palette_size[1] = 0;
    4733             : #endif  // CONFIG_PALETTE
    4734             : 
    4735             :   for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) {
    4736             :     mbmi->filter_intra_mode_info.filter_intra_mode[1] = mode;
    4737             :     if (!super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd))
    4738             :       continue;
    4739             : 
    4740             :     this_rate = tokenonly_rd_stats.rate +
    4741             :                 av1_cost_bit(cpi->common.fc->filter_intra_probs[1], 1) +
    4742             :                 cpi->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode] +
    4743             :                 write_uniform_cost(FILTER_INTRA_MODES, mode);
    4744             :     this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist);
    4745             :     if (this_rd < *best_rd) {
    4746             :       *best_rd = this_rd;
    4747             :       *rate = this_rate;
    4748             :       *rate_tokenonly = tokenonly_rd_stats.rate;
    4749             :       *distortion = tokenonly_rd_stats.dist;
    4750             :       *skippable = tokenonly_rd_stats.skip;
    4751             :       filter_intra_mode_info = mbmi->filter_intra_mode_info;
    4752             :       filter_intra_selected_flag = 1;
    4753             :     }
    4754             :   }
    4755             : 
    4756             :   if (filter_intra_selected_flag) {
    4757             :     mbmi->uv_mode = DC_PRED;
    4758             :     mbmi->filter_intra_mode_info.use_filter_intra_mode[1] =
    4759             :         filter_intra_mode_info.use_filter_intra_mode[1];
    4760             :     mbmi->filter_intra_mode_info.filter_intra_mode[1] =
    4761             :         filter_intra_mode_info.filter_intra_mode[1];
    4762             :     return 1;
    4763             :   } else {
    4764             :     return 0;
    4765             :   }
    4766             : }
    4767             : #endif  // CONFIG_FILTER_INTRA
    4768             : 
    4769             : #if CONFIG_EXT_INTRA
    4770             : // Run RD calculation with given chroma intra prediction angle., and return
    4771             : // the RD cost. Update the best mode info. if the RD cost is the best so far.
    4772           0 : static int64_t pick_intra_angle_routine_sbuv(
    4773             :     const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
    4774             :     int rate_overhead, int64_t best_rd_in, int *rate, RD_STATS *rd_stats,
    4775             :     int *best_angle_delta, int64_t *best_rd) {
    4776           0 :   MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi;
    4777           0 :   assert(!is_inter_block(mbmi));
    4778             :   int this_rate;
    4779             :   int64_t this_rd;
    4780             :   RD_STATS tokenonly_rd_stats;
    4781             : 
    4782           0 :   if (!super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd_in))
    4783           0 :     return INT64_MAX;
    4784           0 :   this_rate = tokenonly_rd_stats.rate + rate_overhead;
    4785           0 :   this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist);
    4786           0 :   if (this_rd < *best_rd) {
    4787           0 :     *best_rd = this_rd;
    4788           0 :     *best_angle_delta = mbmi->angle_delta[1];
    4789           0 :     *rate = this_rate;
    4790           0 :     rd_stats->rate = tokenonly_rd_stats.rate;
    4791           0 :     rd_stats->dist = tokenonly_rd_stats.dist;
    4792           0 :     rd_stats->skip = tokenonly_rd_stats.skip;
    4793             :   }
    4794           0 :   return this_rd;
    4795             : }
    4796             : 
    4797             : // With given chroma directional intra prediction mode, pick the best angle
    4798             : // delta. Return true if a RD cost that is smaller than the input one is found.
    4799           0 : static int rd_pick_intra_angle_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
    4800             :                                     BLOCK_SIZE bsize, int rate_overhead,
    4801             :                                     int64_t best_rd, int *rate,
    4802             :                                     RD_STATS *rd_stats) {
    4803           0 :   MACROBLOCKD *const xd = &x->e_mbd;
    4804           0 :   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
    4805           0 :   assert(!is_inter_block(mbmi));
    4806           0 :   int i, angle_delta, best_angle_delta = 0;
    4807             :   int64_t this_rd, best_rd_in, rd_cost[2 * (MAX_ANGLE_DELTA + 2)];
    4808             : 
    4809           0 :   rd_stats->rate = INT_MAX;
    4810           0 :   rd_stats->skip = 0;
    4811           0 :   rd_stats->dist = INT64_MAX;
    4812           0 :   for (i = 0; i < 2 * (MAX_ANGLE_DELTA + 2); ++i) rd_cost[i] = INT64_MAX;
    4813             : 
    4814           0 :   for (angle_delta = 0; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
    4815           0 :     for (i = 0; i < 2; ++i) {
    4816           0 :       best_rd_in = (best_rd == INT64_MAX)
    4817             :                        ? INT64_MAX
    4818           0 :                        : (best_rd + (best_rd >> ((angle_delta == 0) ? 3 : 5)));
    4819           0 :       mbmi->angle_delta[1] = (1 - 2 * i) * angle_delta;
    4820           0 :       this_rd = pick_intra_angle_routine_sbuv(cpi, x, bsize, rate_overhead,
    4821             :                                               best_rd_in, rate, rd_stats,
    4822             :                                               &best_angle_delta, &best_rd);
    4823           0 :       rd_cost[2 * angle_delta + i] = this_rd;
    4824           0 :       if (angle_delta == 0) {
    4825           0 :         if (this_rd == INT64_MAX) return 0;
    4826           0 :         rd_cost[1] = this_rd;
    4827           0 :         break;
    4828             :       }
    4829             :     }
    4830             :   }
    4831             : 
    4832           0 :   assert(best_rd != INT64_MAX);
    4833           0 :   for (angle_delta = 1; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
    4834             :     int64_t rd_thresh;
    4835           0 :     for (i = 0; i < 2; ++i) {
    4836           0 :       int skip_search = 0;
    4837           0 :       rd_thresh = best_rd + (best_rd >> 5);
    4838           0 :       if (rd_cost[2 * (angle_delta + 1) + i] > rd_thresh &&
    4839           0 :           rd_cost[2 * (angle_delta - 1) + i] > rd_thresh)
    4840           0 :         skip_search = 1;
    4841           0 :       if (!skip_search) {
    4842           0 :         mbmi->angle_delta[1] = (1 - 2 * i) * angle_delta;
    4843           0 :         pick_intra_angle_routine_sbuv(cpi, x, bsize, rate_overhead, best_rd,
    4844             :                                       rate, rd_stats, &best_angle_delta,
    4845             :                                       &best_rd);
    4846             :       }
    4847             :     }
    4848             :   }
    4849             : 
    4850           0 :   mbmi->angle_delta[1] = best_angle_delta;
    4851           0 :   return rd_stats->rate != INT_MAX;
    4852             : }
    4853             : #endif  // CONFIG_EXT_INTRA
    4854             : 
    4855           0 : static void init_sbuv_mode(MB_MODE_INFO *const mbmi) {
    4856           0 :   mbmi->uv_mode = DC_PRED;
    4857             : #if CONFIG_PALETTE
    4858           0 :   mbmi->palette_mode_info.palette_size[1] = 0;
    4859             : #endif  // CONFIG_PALETTE
    4860             : #if CONFIG_FILTER_INTRA
    4861             :   mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
    4862             : #endif  // CONFIG_FILTER_INTRA
    4863           0 : }
    4864             : 
    4865           0 : static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
    4866             :                                        int *rate, int *rate_tokenonly,
    4867             :                                        int64_t *distortion, int *skippable,
    4868             :                                        BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
    4869           0 :   MACROBLOCKD *xd = &x->e_mbd;
    4870           0 :   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
    4871           0 :   assert(!is_inter_block(mbmi));
    4872           0 :   MB_MODE_INFO best_mbmi = *mbmi;
    4873             :   PREDICTION_MODE mode;
    4874           0 :   int64_t best_rd = INT64_MAX, this_rd;
    4875             :   int this_rate;
    4876             :   RD_STATS tokenonly_rd_stats;
    4877             : #if CONFIG_PVQ
    4878             :   od_rollback_buffer buf;
    4879             :   od_encode_checkpoint(&x->daala_enc, &buf);
    4880             : #endif  // CONFIG_PVQ
    4881             : #if CONFIG_PALETTE
    4882           0 :   PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
    4883           0 :   uint8_t *best_palette_color_map = NULL;
    4884             : #endif  // CONFIG_PALETTE
    4885             : 
    4886           0 :   for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
    4887             : #if CONFIG_EXT_INTRA
    4888           0 :     const int is_directional_mode =
    4889           0 :         av1_is_directional_mode(mode, mbmi->sb_type);
    4890             : #endif  // CONFIG_EXT_INTRA
    4891           0 :     if (!(cpi->sf.intra_uv_mode_mask[txsize_sqr_up_map[max_tx_size]] &
    4892             :           (1 << mode)))
    4893           0 :       continue;
    4894             : 
    4895           0 :     mbmi->uv_mode = mode;
    4896             : #if CONFIG_EXT_INTRA
    4897           0 :     mbmi->angle_delta[1] = 0;
    4898           0 :     if (is_directional_mode) {
    4899           0 :       const int rate_overhead = cpi->intra_uv_mode_cost[mbmi->mode][mode] +
    4900           0 :                                 write_uniform_cost(2 * MAX_ANGLE_DELTA + 1, 0);
    4901           0 :       if (!rd_pick_intra_angle_sbuv(cpi, x, bsize, rate_overhead, best_rd,
    4902             :                                     &this_rate, &tokenonly_rd_stats))
    4903           0 :         continue;
    4904             :     } else {
    4905             : #endif  // CONFIG_EXT_INTRA
    4906           0 :       if (!super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd)) {
    4907             : #if CONFIG_PVQ
    4908             :         od_encode_rollback(&x->daala_enc, &buf);
    4909             : #endif  // CONFIG_PVQ
    4910           0 :         continue;
    4911             :       }
    4912             : #if CONFIG_EXT_INTRA
    4913             :     }
    4914             : #endif  // CONFIG_EXT_INTRA
    4915           0 :     this_rate =
    4916           0 :         tokenonly_rd_stats.rate + cpi->intra_uv_mode_cost[mbmi->mode][mode];
    4917             : 
    4918             : #if CONFIG_EXT_INTRA
    4919           0 :     if (is_directional_mode) {
    4920           0 :       this_rate += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
    4921           0 :                                       MAX_ANGLE_DELTA + mbmi->angle_delta[1]);
    4922             :     }
    4923             : #endif  // CONFIG_EXT_INTRA
    4924             : #if CONFIG_FILTER_INTRA
    4925             :     if (mbmi->sb_type >= BLOCK_8X8 && mode == DC_PRED)
    4926             :       this_rate += av1_cost_bit(cpi->common.fc->filter_intra_probs[1], 0);
    4927             : #endif  // CONFIG_FILTER_INTRA
    4928             : #if CONFIG_PALETTE
    4929           0 :     if (cpi->common.allow_screen_content_tools && mbmi->sb_type >= BLOCK_8X8 &&
    4930             :         mode == DC_PRED)
    4931           0 :       this_rate += av1_cost_bit(
    4932             :           av1_default_palette_uv_mode_prob[pmi->palette_size[0] > 0], 0);
    4933             : #endif  // CONFIG_PALETTE
    4934             : 
    4935             : #if CONFIG_PVQ
    4936             :     od_encode_rollback(&x->daala_enc, &buf);
    4937             : #endif  // CONFIG_PVQ
    4938           0 :     this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, tokenonly_rd_stats.dist);
    4939             : 
    4940           0 :     if (this_rd < best_rd) {
    4941           0 :       best_mbmi = *mbmi;
    4942           0 :       best_rd = this_rd;
    4943           0 :       *rate = this_rate;
    4944           0 :       *rate_tokenonly = tokenonly_rd_stats.rate;
    4945           0 :       *distortion = tokenonly_rd_stats.dist;
    4946           0 :       *skippable = tokenonly_rd_stats.skip;
    4947             :     }
    4948             :   }
    4949             : 
    4950             : #if CONFIG_PALETTE
    4951           0 :   if (cpi->common.allow_screen_content_tools && mbmi->sb_type >= BLOCK_8X8) {
    4952           0 :     best_palette_color_map = x->palette_buffer->best_palette_color_map;
    4953           0 :     rd_pick_palette_intra_sbuv(cpi, x,
    4954           0 :                                cpi->intra_uv_mode_cost[mbmi->mode][DC_PRED],
    4955             :                                best_palette_color_map, &best_mbmi, &best_rd,
    4956             :                                rate, rate_tokenonly, distortion, skippable);
    4957             :   }
    4958             : #endif  // CONFIG_PALETTE
    4959             : 
    4960             : #if CONFIG_FILTER_INTRA
    4961             :   if (mbmi->sb_type >= BLOCK_8X8) {
    4962             :     if (rd_pick_filter_intra_sbuv(cpi, x, rate, rate_tokenonly, distortion,
    4963             :                                   skippable, bsize, &best_rd))
    4964             :       best_mbmi = *mbmi;
    4965             :   }
    4966             : #endif  // CONFIG_FILTER_INTRA
    4967             : 
    4968           0 :   *mbmi = best_mbmi;
    4969             :   // Make sure we actually chose a mode
    4970           0 :   assert(best_rd < INT64_MAX);
    4971           0 :   return best_rd;
    4972             : }
    4973             : 
    4974           0 : static void choose_intra_uv_mode(const AV1_COMP *const cpi, MACROBLOCK *const x,
    4975             :                                  PICK_MODE_CONTEXT *ctx, BLOCK_SIZE bsize,
    4976             :                                  TX_SIZE max_tx_size, int *rate_uv,
    4977             :                                  int *rate_uv_tokenonly, int64_t *dist_uv,
    4978             :                                  int *skip_uv, PREDICTION_MODE *mode_uv) {
    4979             :   // Use an estimated rd for uv_intra based on DC_PRED if the
    4980             :   // appropriate speed flag is set.
    4981             :   (void)ctx;
    4982           0 :   init_sbuv_mode(&x->e_mbd.mi[0]->mbmi);
    4983             : #if CONFIG_CB4X4
    4984             : #if CONFIG_CHROMA_2X2
    4985             :   rd_pick_intra_sbuv_mode(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
    4986             :                           bsize, max_tx_size);
    4987             : #else
    4988           0 :   if (x->skip_chroma_rd) {
    4989           0 :     *rate_uv = 0;
    4990           0 :     *rate_uv_tokenonly = 0;
    4991           0 :     *dist_uv = 0;
    4992           0 :     *skip_uv = 1;
    4993           0 :     *mode_uv = DC_PRED;
    4994           0 :     return;
    4995             :   }
    4996           0 :   BLOCK_SIZE bs = scale_chroma_bsize(bsize, x->e_mbd.plane[1].subsampling_x,
    4997             :                                      x->e_mbd.plane[1].subsampling_y);
    4998           0 :   rd_pick_intra_sbuv_mode(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
    4999             :                           bs, max_tx_size);
    5000             : #endif  // CONFIG_CHROMA_2X2
    5001             : #else
    5002             :   rd_pick_intra_sbuv_mode(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
    5003             :                           bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, max_tx_size);
    5004             : #endif  // CONFIG_CB4X4
    5005           0 :   *mode_uv = x->e_mbd.mi[0]->mbmi.uv_mode;
    5006             : }
    5007             : 
    5008           0 : static int cost_mv_ref(const AV1_COMP *const cpi, PREDICTION_MODE mode,
    5009             :                        int16_t mode_context) {
    5010             : #if CONFIG_EXT_INTER
    5011           0 :   if (is_inter_compound_mode(mode)) {
    5012             :     return cpi
    5013           0 :         ->inter_compound_mode_cost[mode_context][INTER_COMPOUND_OFFSET(mode)];
    5014             :   }
    5015             : #endif
    5016             : 
    5017           0 :   int mode_cost = 0;
    5018           0 :   int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
    5019           0 :   int16_t is_all_zero_mv = mode_context & (1 << ALL_ZERO_FLAG_OFFSET);
    5020             : 
    5021           0 :   assert(is_inter_mode(mode));
    5022             : 
    5023           0 :   if (mode == NEWMV) {
    5024           0 :     mode_cost = cpi->newmv_mode_cost[mode_ctx][0];
    5025           0 :     return mode_cost;
    5026             :   } else {
    5027           0 :     mode_cost = cpi->newmv_mode_cost[mode_ctx][1];
    5028           0 :     mode_ctx = (mode_context >> ZEROMV_OFFSET) & ZEROMV_CTX_MASK;
    5029             : 
    5030           0 :     if (is_all_zero_mv) return mode_cost;
    5031             : 
    5032           0 :     if (mode == ZEROMV) {
    5033           0 :       mode_cost += cpi->zeromv_mode_cost[mode_ctx][0];
    5034           0 :       return mode_cost;
    5035             :     } else {
    5036           0 :       mode_cost += cpi->zeromv_mode_cost[mode_ctx][1];
    5037           0 :       mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
    5038             : 
    5039           0 :       if (mode_context & (1 << SKIP_NEARESTMV_OFFSET)) mode_ctx = 6;
    5040           0 :       if (mode_context & (1 << SKIP_NEARMV_OFFSET)) mode_ctx = 7;
    5041           0 :       if (mode_context & (1 << SKIP_NEARESTMV_SUB8X8_OFFSET)) mode_ctx = 8;
    5042             : 
    5043           0 :       mode_cost += cpi->refmv_mode_cost[mode_ctx][mode != NEARESTMV];
    5044           0 :       return mode_cost;
    5045             :     }
    5046             :   }
    5047             : }
    5048             : 
    5049             : #if CONFIG_EXT_INTER && (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT)
    5050           0 : static int get_interinter_compound_type_bits(BLOCK_SIZE bsize,
    5051             :                                              COMPOUND_TYPE comp_type) {
    5052             :   (void)bsize;
    5053           0 :   switch (comp_type) {
    5054           0 :     case COMPOUND_AVERAGE: return 0;
    5055             : #if CONFIG_WEDGE
    5056           0 :     case COMPOUND_WEDGE: return get_interinter_wedge_bits(bsize);
    5057             : #endif  // CONFIG_WEDGE
    5058             : #if CONFIG_COMPOUND_SEGMENT
    5059           0 :     case COMPOUND_SEG: return 1;
    5060             : #endif  // CONFIG_COMPOUND_SEGMENT
    5061           0 :     default: assert(0); return 0;
    5062             :   }
    5063             : }
    5064             : #endif  // CONFIG_EXT_INTER && (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT)
    5065             : 
    5066             : typedef struct {
    5067             :   int eobs;
    5068             :   int brate;
    5069             :   int byrate;
    5070             :   int64_t bdist;
    5071             :   int64_t bsse;
    5072             :   int64_t brdcost;
    5073             :   int_mv mvs[2];
    5074             :   int_mv pred_mv[2];
    5075             : #if CONFIG_EXT_INTER
    5076             :   int_mv ref_mv[2];
    5077             : #endif  // CONFIG_EXT_INTER
    5078             : 
    5079             : #if CONFIG_CHROMA_2X2
    5080             :   ENTROPY_CONTEXT ta[4];
    5081             :   ENTROPY_CONTEXT tl[4];
    5082             : #else
    5083             :   ENTROPY_CONTEXT ta[2];
    5084             :   ENTROPY_CONTEXT tl[2];
    5085             : #endif  // CONFIG_CHROMA_2X2
    5086             : } SEG_RDSTAT;
    5087             : 
    5088             : typedef struct {
    5089             :   int_mv *ref_mv[2];
    5090             :   int_mv mvp;
    5091             : 
    5092             :   int64_t segment_rd;
    5093             :   int r;
    5094             :   int64_t d;
    5095             :   int64_t sse;
    5096             :   int segment_yrate;
    5097             :   PREDICTION_MODE modes[4];
    5098             : #if CONFIG_EXT_INTER
    5099             :   SEG_RDSTAT rdstat[4][INTER_MODES + INTER_COMPOUND_MODES];
    5100             : #else
    5101             :   SEG_RDSTAT rdstat[4][INTER_MODES];
    5102             : #endif  // CONFIG_EXT_INTER
    5103             :   int mvthresh;
    5104             : } BEST_SEG_INFO;
    5105             : 
    5106           0 : static INLINE int mv_check_bounds(const MvLimits *mv_limits, const MV *mv) {
    5107           0 :   return (mv->row >> 3) < mv_limits->row_min ||
    5108           0 :          (mv->row >> 3) > mv_limits->row_max ||
    5109           0 :          (mv->col >> 3) < mv_limits->col_min ||
    5110           0 :          (mv->col >> 3) > mv_limits->col_max;
    5111             : }
    5112             : 
    5113             : // Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion.
    5114             : // TODO(aconverse): Find out if this is still productive then clean up or remove
    5115           0 : static int check_best_zero_mv(
    5116             :     const AV1_COMP *const cpi, const int16_t mode_context[TOTAL_REFS_PER_FRAME],
    5117             : #if CONFIG_EXT_INTER
    5118             :     const int16_t compound_mode_context[TOTAL_REFS_PER_FRAME],
    5119             : #endif  // CONFIG_EXT_INTER
    5120             :     int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME], int this_mode,
    5121             :     const MV_REFERENCE_FRAME ref_frames[2], const BLOCK_SIZE bsize, int block,
    5122             :     int mi_row, int mi_col) {
    5123             :   int_mv zeromv[2];
    5124           0 :   int comp_pred_mode = ref_frames[1] > INTRA_FRAME;
    5125             :   int cur_frm;
    5126             :   (void)mi_row;
    5127             :   (void)mi_col;
    5128           0 :   for (cur_frm = 0; cur_frm < 1 + comp_pred_mode; cur_frm++) {
    5129             : #if CONFIG_GLOBAL_MOTION
    5130           0 :     if (this_mode == ZEROMV
    5131             : #if CONFIG_EXT_INTER
    5132           0 :         || this_mode == ZERO_ZEROMV
    5133             : #endif  // CONFIG_EXT_INTER
    5134             :         )
    5135           0 :       zeromv[cur_frm].as_int =
    5136           0 :           gm_get_motion_vector(&cpi->common.global_motion[ref_frames[cur_frm]],
    5137             :                                cpi->common.allow_high_precision_mv, bsize,
    5138             :                                mi_col, mi_row, block)
    5139           0 :               .as_int;
    5140             :     else
    5141             : #endif  // CONFIG_GLOBAL_MOTION
    5142           0 :       zeromv[cur_frm].as_int = 0;
    5143             :   }
    5144             : #if !CONFIG_EXT_INTER
    5145             :   assert(ref_frames[1] != INTRA_FRAME);  // Just sanity check
    5146             : #endif                                   // !CONFIG_EXT_INTER
    5147           0 :   if ((this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
    5148           0 :       frame_mv[this_mode][ref_frames[0]].as_int == zeromv[0].as_int &&
    5149           0 :       (ref_frames[1] <= INTRA_FRAME ||
    5150           0 :        frame_mv[this_mode][ref_frames[1]].as_int == zeromv[1].as_int)) {
    5151           0 :     int16_t rfc =
    5152           0 :         av1_mode_context_analyzer(mode_context, ref_frames, bsize, block);
    5153           0 :     int c1 = cost_mv_ref(cpi, NEARMV, rfc);
    5154           0 :     int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
    5155           0 :     int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
    5156             : 
    5157           0 :     if (this_mode == NEARMV) {
    5158           0 :       if (c1 > c3) return 0;
    5159           0 :     } else if (this_mode == NEARESTMV) {
    5160           0 :       if (c2 > c3) return 0;
    5161             :     } else {
    5162           0 :       assert(this_mode == ZEROMV);
    5163           0 :       if (ref_frames[1] <= INTRA_FRAME) {
    5164           0 :         if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0) ||
    5165           0 :             (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0))
    5166           0 :           return 0;
    5167             :       } else {
    5168           0 :         if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0 &&
    5169           0 :              frame_mv[NEARESTMV][ref_frames[1]].as_int == 0) ||
    5170           0 :             (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0 &&
    5171           0 :              frame_mv[NEARMV][ref_frames[1]].as_int == 0))
    5172           0 :           return 0;
    5173             :       }
    5174             :     }
    5175             :   }
    5176             : #if CONFIG_EXT_INTER
    5177           0 :   else if ((this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
    5178           0 :             this_mode == ZERO_ZEROMV) &&
    5179           0 :            frame_mv[this_mode][ref_frames[0]].as_int == zeromv[0].as_int &&
    5180           0 :            frame_mv[this_mode][ref_frames[1]].as_int == zeromv[1].as_int) {
    5181           0 :     int16_t rfc = compound_mode_context[ref_frames[0]];
    5182           0 :     int c2 = cost_mv_ref(cpi, NEAREST_NEARESTMV, rfc);
    5183           0 :     int c3 = cost_mv_ref(cpi, ZERO_ZEROMV, rfc);
    5184           0 :     int c5 = cost_mv_ref(cpi, NEAR_NEARMV, rfc);
    5185             : 
    5186           0 :     if (this_mode == NEAREST_NEARESTMV) {
    5187           0 :       if (c2 > c3) return 0;
    5188           0 :     } else if (this_mode == NEAR_NEARMV) {
    5189           0 :       if (c5 > c3) return 0;
    5190             :     } else {
    5191           0 :       assert(this_mode == ZERO_ZEROMV);
    5192           0 :       if ((c3 >= c2 && frame_mv[NEAREST_NEARESTMV][ref_frames[0]].as_int == 0 &&
    5193           0 :            frame_mv[NEAREST_NEARESTMV][ref_frames[1]].as_int == 0) ||
    5194           0 :           (c3 >= c5 && frame_mv[NEAR_NEARMV][ref_frames[0]].as_int == 0 &&
    5195           0 :            frame_mv[NEAR_NEARMV][ref_frames[1]].as_int == 0))
    5196           0 :         return 0;
    5197             :     }
    5198             :   }
    5199             : #endif  // CONFIG_EXT_INTER
    5200           0 :   return 1;
    5201             : }
    5202             : 
    5203           0 : static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
    5204             :                                 BLOCK_SIZE bsize, int_mv *frame_mv, int mi_row,
    5205             :                                 int mi_col,
    5206             : #if CONFIG_EXT_INTER
    5207             :                                 int_mv *ref_mv_sub8x8[2], const uint8_t *mask,
    5208             :                                 int mask_stride,
    5209             : #endif  // CONFIG_EXT_INTER
    5210             :                                 int *rate_mv, const int block) {
    5211           0 :   const AV1_COMMON *const cm = &cpi->common;
    5212           0 :   const int pw = block_size_wide[bsize];
    5213           0 :   const int ph = block_size_high[bsize];
    5214           0 :   MACROBLOCKD *xd = &x->e_mbd;
    5215           0 :   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
    5216             :   // This function should only ever be called for compound modes
    5217           0 :   assert(has_second_ref(mbmi));
    5218           0 :   const int refs[2] = { mbmi->ref_frame[0], mbmi->ref_frame[1] };
    5219             :   int_mv ref_mv[2];
    5220             :   int ite, ref;
    5221             : #if CONFIG_DUAL_FILTER
    5222           0 :   InterpFilter interp_filter[4] = {
    5223           0 :     mbmi->interp_filter[0], mbmi->interp_filter[1], mbmi->interp_filter[2],
    5224           0 :     mbmi->interp_filter[3],
    5225             :   };
    5226             : #else
    5227             :   const InterpFilter interp_filter = mbmi->interp_filter;
    5228             : #endif  // CONFIG_DUAL_FILTER
    5229             :   struct scale_factors sf;
    5230           0 :   struct macroblockd_plane *const pd = &xd->plane[0];
    5231             : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
    5232             :   // ic and ir are the 4x4 coordiantes of the sub8x8 at index "block"
    5233           0 :   const int ic = block & 1;
    5234           0 :   const int ir = (block - ic) >> 1;
    5235           0 :   const int p_col = ((mi_col * MI_SIZE) >> pd->subsampling_x) + 4 * ic;
    5236           0 :   const int p_row = ((mi_row * MI_SIZE) >> pd->subsampling_y) + 4 * ir;
    5237             : #if CONFIG_GLOBAL_MOTION
    5238             :   int is_global[2];
    5239           0 :   for (ref = 0; ref < 2; ++ref) {
    5240           0 :     WarpedMotionParams *const wm =
    5241           0 :         &xd->global_motion[xd->mi[0]->mbmi.ref_frame[ref]];
    5242           0 :     is_global[ref] = is_global_mv_block(xd->mi[0], block, wm->wmtype);
    5243             :   }
    5244             : #endif  // CONFIG_GLOBAL_MOTION
    5245             : #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
    5246             : 
    5247             :   // Do joint motion search in compound mode to get more accurate mv.
    5248             :   struct buf_2d backup_yv12[2][MAX_MB_PLANE];
    5249           0 :   int last_besterr[2] = { INT_MAX, INT_MAX };
    5250           0 :   const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = {
    5251           0 :     av1_get_scaled_ref_frame(cpi, refs[0]),
    5252           0 :     av1_get_scaled_ref_frame(cpi, refs[1])
    5253             :   };
    5254             : 
    5255             : // Prediction buffer from second frame.
    5256             : #if CONFIG_HIGHBITDEPTH
    5257             :   DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[MAX_SB_SQUARE]);
    5258             :   uint8_t *second_pred;
    5259             : #else
    5260             :   DECLARE_ALIGNED(16, uint8_t, second_pred[MAX_SB_SQUARE]);
    5261             : #endif  // CONFIG_HIGHBITDEPTH
    5262             : 
    5263             : #if CONFIG_EXT_INTER && CONFIG_CB4X4
    5264             :   (void)ref_mv_sub8x8;
    5265             : #endif  // CONFIG_EXT_INTER && CONFIG_CB4X4
    5266             : 
    5267           0 :   for (ref = 0; ref < 2; ++ref) {
    5268             : #if CONFIG_EXT_INTER && !CONFIG_CB4X4
    5269             :     if (bsize < BLOCK_8X8 && ref_mv_sub8x8 != NULL)
    5270             :       ref_mv[ref].as_int = ref_mv_sub8x8[ref]->as_int;
    5271             :     else
    5272             : #endif  // CONFIG_EXT_INTER && !CONFIG_CB4X4
    5273           0 :       ref_mv[ref] = x->mbmi_ext->ref_mvs[refs[ref]][0];
    5274             : 
    5275           0 :     if (scaled_ref_frame[ref]) {
    5276             :       int i;
    5277             :       // Swap out the reference frame for a version that's been scaled to
    5278             :       // match the resolution of the current frame, allowing the existing
    5279             :       // motion search code to be used without additional modifications.
    5280           0 :       for (i = 0; i < MAX_MB_PLANE; i++)
    5281           0 :         backup_yv12[ref][i] = xd->plane[i].pre[ref];
    5282           0 :       av1_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col,
    5283             :                            NULL);
    5284             :     }
    5285             :   }
    5286             : 
    5287             : // Since we have scaled the reference frames to match the size of the current
    5288             : // frame we must use a unit scaling factor during mode selection.
    5289             : #if CONFIG_HIGHBITDEPTH
    5290           0 :   av1_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width,
    5291             :                                     cm->height, cm->use_highbitdepth);
    5292             : #else
    5293             :   av1_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width,
    5294             :                                     cm->height);
    5295             : #endif  // CONFIG_HIGHBITDEPTH
    5296             : 
    5297             :   // Allow joint search multiple times iteratively for each reference frame
    5298             :   // and break out of the search loop if it couldn't find a better mv.
    5299           0 :   for (ite = 0; ite < 4; ite++) {
    5300             :     struct buf_2d ref_yv12[2];
    5301           0 :     int bestsme = INT_MAX;
    5302           0 :     int sadpb = x->sadperbit16;
    5303           0 :     MV *const best_mv = &x->best_mv.as_mv;
    5304           0 :     int search_range = 3;
    5305             : 
    5306           0 :     MvLimits tmp_mv_limits = x->mv_limits;
    5307           0 :     int id = ite % 2;  // Even iterations search in the first reference frame,
    5308             :                        // odd iterations search in the second. The predictor
    5309             :                        // found for the 'other' reference frame is factored in.
    5310           0 :     const int plane = 0;
    5311           0 :     ConvolveParams conv_params = get_conv_params(0, plane);
    5312             : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
    5313             :     WarpTypesAllowed warp_types;
    5314             : #if CONFIG_GLOBAL_MOTION
    5315           0 :     warp_types.global_warp_allowed = is_global[!id];
    5316             : #endif  // CONFIG_GLOBAL_MOTION
    5317             : #if CONFIG_WARPED_MOTION
    5318           0 :     warp_types.local_warp_allowed = mbmi->motion_mode == WARPED_CAUSAL;
    5319             : #endif  // CONFIG_WARPED_MOTION
    5320             : #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
    5321             : 
    5322             :     // Initialized here because of compiler problem in Visual Studio.
    5323           0 :     ref_yv12[0] = xd->plane[plane].pre[0];
    5324           0 :     ref_yv12[1] = xd->plane[plane].pre[1];
    5325             : 
    5326             : #if CONFIG_DUAL_FILTER
    5327             :     // reload the filter types
    5328           0 :     interp_filter[0] =
    5329           0 :         (id == 0) ? mbmi->interp_filter[2] : mbmi->interp_filter[0];
    5330           0 :     interp_filter[1] =
    5331           0 :         (id == 0) ? mbmi->interp_filter[3] : mbmi->interp_filter[1];
    5332             : #endif  // CONFIG_DUAL_FILTER
    5333             : 
    5334             : // Get the prediction block from the 'other' reference frame.
    5335             : #if CONFIG_HIGHBITDEPTH
    5336           0 :     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    5337           0 :       second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16);
    5338           0 :       av1_highbd_build_inter_predictor(
    5339           0 :           ref_yv12[!id].buf, ref_yv12[!id].stride, second_pred, pw,
    5340           0 :           &frame_mv[refs[!id]].as_mv, &sf, pw, ph, 0, interp_filter,
    5341             : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
    5342             :           &warp_types, p_col, p_row,
    5343             : #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
    5344             :           plane, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE, xd);
    5345             :     } else {
    5346           0 :       second_pred = (uint8_t *)second_pred_alloc_16;
    5347             : #endif  // CONFIG_HIGHBITDEPTH
    5348           0 :       av1_build_inter_predictor(
    5349           0 :           ref_yv12[!id].buf, ref_yv12[!id].stride, second_pred, pw,
    5350           0 :           &frame_mv[refs[!id]].as_mv, &sf, pw, ph, &conv_params, interp_filter,
    5351             : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
    5352             :           &warp_types, p_col, p_row, plane, !id,
    5353             : #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
    5354             :           MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE, xd);
    5355             : #if CONFIG_HIGHBITDEPTH
    5356             :     }
    5357             : #endif  // CONFIG_HIGHBITDEPTH
    5358             : 
    5359             :     // Do compound motion search on the current reference frame.
    5360           0 :     if (id) xd->plane[plane].pre[0] = ref_yv12[id];
    5361           0 :     av1_set_mv_search_range(&x->mv_limits, &ref_mv[id].as_mv);
    5362             : 
    5363             :     // Use the mv result from the single mode as mv predictor.
    5364           0 :     *best_mv = frame_mv[refs[id]].as_mv;
    5365             : 
    5366           0 :     best_mv->col >>= 3;
    5367           0 :     best_mv->row >>= 3;
    5368             : 
    5369           0 :     av1_set_mvcost(x, refs[id], id, mbmi->ref_mv_idx);
    5370             : 
    5371             :     // Small-range full-pixel motion search.
    5372           0 :     bestsme =
    5373           0 :         av1_refining_search_8p_c(x, sadpb, search_range, &cpi->fn_ptr[bsize],
    5374             : #if CONFIG_EXT_INTER
    5375             :                                  mask, mask_stride, id,
    5376             : #endif
    5377           0 :                                  &ref_mv[id].as_mv, second_pred);
    5378           0 :     if (bestsme < INT_MAX) {
    5379             : #if CONFIG_EXT_INTER
    5380           0 :       if (mask)
    5381           0 :         bestsme = av1_get_mvpred_mask_var(x, best_mv, &ref_mv[id].as_mv,
    5382             :                                           second_pred, mask, mask_stride, id,
    5383           0 :                                           &cpi->fn_ptr[bsize], 1);
    5384             :       else
    5385             : #endif
    5386           0 :         bestsme = av1_get_mvpred_av_var(x, best_mv, &ref_mv[id].as_mv,
    5387           0 :                                         second_pred, &cpi->fn_ptr[bsize], 1);
    5388             :     }
    5389             : 
    5390           0 :     x->mv_limits = tmp_mv_limits;
    5391             : 
    5392           0 :     if (bestsme < INT_MAX) {
    5393             :       int dis; /* TODO: use dis in distortion calculation later. */
    5394             :       unsigned int sse;
    5395           0 :       if (cpi->sf.use_upsampled_references) {
    5396             :         // Use up-sampled reference frames.
    5397           0 :         struct buf_2d backup_pred = pd->pre[0];
    5398           0 :         const YV12_BUFFER_CONFIG *upsampled_ref =
    5399           0 :             get_upsampled_ref(cpi, refs[id]);
    5400             : 
    5401             :         // Set pred for Y plane
    5402           0 :         setup_pred_plane(&pd->pre[0], bsize, upsampled_ref->y_buffer,
    5403             :                          upsampled_ref->y_crop_width,
    5404             :                          upsampled_ref->y_crop_height, upsampled_ref->y_stride,
    5405             :                          (mi_row << 3), (mi_col << 3), NULL, pd->subsampling_x,
    5406             :                          pd->subsampling_y);
    5407             : 
    5408             : // If bsize < BLOCK_8X8, adjust pred pointer for this block
    5409             : #if !CONFIG_CB4X4
    5410             :         if (bsize < BLOCK_8X8)
    5411             :           pd->pre[0].buf =
    5412             :               &pd->pre[0].buf[(av1_raster_block_offset(BLOCK_8X8, block,
    5413             :                                                        pd->pre[0].stride))
    5414             :                               << 3];
    5415             : #endif  // !CONFIG_CB4X4
    5416             : 
    5417           0 :         bestsme = cpi->find_fractional_mv_step(
    5418           0 :             x, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv,
    5419           0 :             x->errorperbit, &cpi->fn_ptr[bsize], 0,
    5420             :             cpi->sf.mv.subpel_iters_per_step, NULL, x->nmvjointcost, x->mvcost,
    5421             :             &dis, &sse, second_pred,
    5422             : #if CONFIG_EXT_INTER
    5423             :             mask, mask_stride, id,
    5424             : #endif
    5425             :             pw, ph, 1);
    5426             : 
    5427             :         // Restore the reference frames.
    5428           0 :         pd->pre[0] = backup_pred;
    5429             :       } else {
    5430             :         (void)block;
    5431           0 :         bestsme = cpi->find_fractional_mv_step(
    5432           0 :             x, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv,
    5433           0 :             x->errorperbit, &cpi->fn_ptr[bsize], 0,
    5434             :             cpi->sf.mv.subpel_iters_per_step, NULL, x->nmvjointcost, x->mvcost,
    5435             :             &dis, &sse, second_pred,
    5436             : #if CONFIG_EXT_INTER
    5437             :             mask, mask_stride, id,
    5438             : #endif
    5439             :             pw, ph, 0);
    5440             :       }
    5441             :     }
    5442             : 
    5443             :     // Restore the pointer to the first (possibly scaled) prediction buffer.
    5444           0 :     if (id) xd->plane[plane].pre[0] = ref_yv12[0];
    5445             : 
    5446           0 :     if (bestsme < last_besterr[id]) {
    5447           0 :       frame_mv[refs[id]].as_mv = *best_mv;
    5448           0 :       last_besterr[id] = bestsme;
    5449             :     } else {
    5450           0 :       break;
    5451             :     }
    5452             :   }
    5453             : 
    5454           0 :   *rate_mv = 0;
    5455             : 
    5456           0 :   for (ref = 0; ref < 2; ++ref) {
    5457           0 :     if (scaled_ref_frame[ref]) {
    5458             :       // Restore the prediction frame pointers to their unscaled versions.
    5459             :       int i;
    5460           0 :       for (i = 0; i < MAX_MB_PLANE; i++)
    5461           0 :         xd->plane[i].pre[ref] = backup_yv12[ref][i];
    5462             :     }
    5463           0 :     av1_set_mvcost(x, refs[ref], ref, mbmi->ref_mv_idx);
    5464             : #if CONFIG_EXT_INTER && !CONFIG_CB4X4
    5465             :     if (bsize >= BLOCK_8X8)
    5466             : #endif  // CONFIG_EXT_INTER && !CONFIG_CB4X4
    5467           0 :       *rate_mv += av1_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
    5468           0 :                                   &x->mbmi_ext->ref_mvs[refs[ref]][0].as_mv,
    5469           0 :                                   x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
    5470             : #if CONFIG_EXT_INTER && !CONFIG_CB4X4
    5471             :     else
    5472             :       *rate_mv += av1_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
    5473             :                                   &ref_mv_sub8x8[ref]->as_mv, x->nmvjointcost,
    5474             :                                   x->mvcost, MV_COST_WEIGHT);
    5475             : #endif  // CONFIG_EXT_INTER && !CONFIG_CB4X4
    5476             :   }
    5477           0 : }
    5478             : 
    5479           0 : static void estimate_ref_frame_costs(const AV1_COMMON *cm,
    5480             :                                      const MACROBLOCKD *xd, int segment_id,
    5481             :                                      unsigned int *ref_costs_single,
    5482             :                                      unsigned int *ref_costs_comp,
    5483             :                                      aom_prob *comp_mode_p) {
    5484           0 :   int seg_ref_active =
    5485           0 :       segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
    5486           0 :   if (seg_ref_active) {
    5487           0 :     memset(ref_costs_single, 0,
    5488             :            TOTAL_REFS_PER_FRAME * sizeof(*ref_costs_single));
    5489           0 :     memset(ref_costs_comp, 0, TOTAL_REFS_PER_FRAME * sizeof(*ref_costs_comp));
    5490           0 :     *comp_mode_p = 128;
    5491             :   } else {
    5492           0 :     aom_prob intra_inter_p = av1_get_intra_inter_prob(cm, xd);
    5493           0 :     aom_prob comp_inter_p = 128;
    5494             : 
    5495           0 :     if (cm->reference_mode == REFERENCE_MODE_SELECT) {
    5496           0 :       comp_inter_p = av1_get_reference_mode_prob(cm, xd);
    5497           0 :       *comp_mode_p = comp_inter_p;
    5498             :     } else {
    5499           0 :       *comp_mode_p = 128;
    5500             :     }
    5501             : 
    5502           0 :     ref_costs_single[INTRA_FRAME] = av1_cost_bit(intra_inter_p, 0);
    5503             : 
    5504           0 :     if (cm->reference_mode != COMPOUND_REFERENCE) {
    5505           0 :       aom_prob ref_single_p1 = av1_get_pred_prob_single_ref_p1(cm, xd);
    5506           0 :       aom_prob ref_single_p2 = av1_get_pred_prob_single_ref_p2(cm, xd);
    5507             : #if CONFIG_EXT_REFS
    5508           0 :       aom_prob ref_single_p3 = av1_get_pred_prob_single_ref_p3(cm, xd);
    5509           0 :       aom_prob ref_single_p4 = av1_get_pred_prob_single_ref_p4(cm, xd);
    5510           0 :       aom_prob ref_single_p5 = av1_get_pred_prob_single_ref_p5(cm, xd);
    5511             : #endif  // CONFIG_EXT_REFS
    5512             : 
    5513           0 :       unsigned int base_cost = av1_cost_bit(intra_inter_p, 1);
    5514             : 
    5515           0 :       ref_costs_single[LAST_FRAME] =
    5516             : #if CONFIG_EXT_REFS
    5517           0 :           ref_costs_single[LAST2_FRAME] = ref_costs_single[LAST3_FRAME] =
    5518           0 :               ref_costs_single[BWDREF_FRAME] =
    5519             : #endif  // CONFIG_EXT_REFS
    5520           0 :                   ref_costs_single[GOLDEN_FRAME] =
    5521           0 :                       ref_costs_single[ALTREF_FRAME] = base_cost;
    5522             : 
    5523             : #if CONFIG_EXT_REFS
    5524           0 :       ref_costs_single[LAST_FRAME] += av1_cost_bit(ref_single_p1, 0);
    5525           0 :       ref_costs_single[LAST2_FRAME] += av1_cost_bit(ref_single_p1, 0);
    5526           0 :       ref_costs_single[LAST3_FRAME] += av1_cost_bit(ref_single_p1, 0);
    5527           0 :       ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p1, 0);
    5528           0 :       ref_costs_single[BWDREF_FRAME] += av1_cost_bit(ref_single_p1, 1);
    5529           0 :       ref_costs_single[ALTREF_FRAME] += av1_cost_bit(ref_single_p1, 1);
    5530             : 
    5531           0 :       ref_costs_single[LAST_FRAME] += av1_cost_bit(ref_single_p3, 0);
    5532           0 :       ref_costs_single[LAST2_FRAME] += av1_cost_bit(ref_single_p3, 0);
    5533           0 :       ref_costs_single[LAST3_FRAME] += av1_cost_bit(ref_single_p3, 1);
    5534           0 :       ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p3, 1);
    5535             : 
    5536           0 :       ref_costs_single[BWDREF_FRAME] += av1_cost_bit(ref_single_p2, 0);
    5537           0 :       ref_costs_single[ALTREF_FRAME] += av1_cost_bit(ref_single_p2, 1);
    5538             : 
    5539           0 :       ref_costs_single[LAST_FRAME] += av1_cost_bit(ref_single_p4, 0);
    5540           0 :       ref_costs_single[LAST2_FRAME] += av1_cost_bit(ref_single_p4, 1);
    5541             : 
    5542           0 :       ref_costs_single[LAST3_FRAME] += av1_cost_bit(ref_single_p5, 0);
    5543           0 :       ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p5, 1);
    5544             : #else
    5545             :       ref_costs_single[LAST_FRAME] += av1_cost_bit(ref_single_p1, 0);
    5546             :       ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p1, 1);
    5547             :       ref_costs_single[ALTREF_FRAME] += av1_cost_bit(ref_single_p1, 1);
    5548             : 
    5549             :       ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p2, 0);
    5550             :       ref_costs_single[ALTREF_FRAME] += av1_cost_bit(ref_single_p2, 1);
    5551             : #endif  // CONFIG_EXT_REFS
    5552             :     } else {
    5553           0 :       ref_costs_single[LAST_FRAME] = 512;
    5554             : #if CONFIG_EXT_REFS
    5555           0 :       ref_costs_single[LAST2_FRAME] = 512;
    5556           0 :       ref_costs_single[LAST3_FRAME] = 512;
    5557           0 :       ref_costs_single[BWDREF_FRAME] = 512;
    5558             : #endif  // CONFIG_EXT_REFS
    5559           0 :       ref_costs_single[GOLDEN_FRAME] = 512;
    5560           0 :       ref_costs_single[ALTREF_FRAME] = 512;
    5561             :     }
    5562             : 
    5563           0 :     if (cm->reference_mode != SINGLE_REFERENCE) {
    5564           0 :       aom_prob ref_comp_p = av1_get_pred_prob_comp_ref_p(cm, xd);
    5565             : #if CONFIG_EXT_REFS
    5566           0 :       aom_prob ref_comp_p1 = av1_get_pred_prob_comp_ref_p1(cm, xd);
    5567           0 :       aom_prob ref_comp_p2 = av1_get_pred_prob_comp_ref_p2(cm, xd);
    5568           0 :       aom_prob bwdref_comp_p = av1_get_pred_prob_comp_bwdref_p(cm, xd);
    5569             : #endif  // CONFIG_EXT_REFS
    5570             : 
    5571           0 :       unsigned int base_cost = av1_cost_bit(intra_inter_p, 1);
    5572             : 
    5573           0 :       ref_costs_comp[LAST_FRAME] =
    5574             : #if CONFIG_EXT_REFS
    5575           0 :           ref_costs_comp[LAST2_FRAME] = ref_costs_comp[LAST3_FRAME] =
    5576             : #endif  // CONFIG_EXT_REFS
    5577           0 :               ref_costs_comp[GOLDEN_FRAME] = base_cost;
    5578             : 
    5579             : #if CONFIG_EXT_REFS
    5580           0 :       ref_costs_comp[BWDREF_FRAME] = ref_costs_comp[ALTREF_FRAME] = 0;
    5581             : #endif  // CONFIG_EXT_REFS
    5582             : 
    5583             : #if CONFIG_EXT_REFS
    5584           0 :       ref_costs_comp[LAST_FRAME] += av1_cost_bit(ref_comp_p, 0);
    5585           0 :       ref_costs_comp[LAST2_FRAME] += av1_cost_bit(ref_comp_p, 0);
    5586           0 :       ref_costs_comp[LAST3_FRAME] += av1_cost_bit(ref_comp_p, 1);
    5587           0 :       ref_costs_comp[GOLDEN_FRAME] += av1_cost_bit(ref_comp_p, 1);
    5588             : 
    5589           0 :       ref_costs_comp[LAST_FRAME] += av1_cost_bit(ref_comp_p1, 1);
    5590           0 :       ref_costs_comp[LAST2_FRAME] += av1_cost_bit(ref_comp_p1, 0);
    5591             : 
    5592           0 :       ref_costs_comp[LAST3_FRAME] += av1_cost_bit(ref_comp_p2, 0);
    5593           0 :       ref_costs_comp[GOLDEN_FRAME] += av1_cost_bit(ref_comp_p2, 1);
    5594             : 
    5595             :       // NOTE(zoeliu): BWDREF and ALTREF each add an extra cost by coding 1
    5596             :       //               more bit.
    5597           0 :       ref_costs_comp[BWDREF_FRAME] += av1_cost_bit(bwdref_comp_p, 0);
    5598           0 :       ref_costs_comp[ALTREF_FRAME] += av1_cost_bit(bwdref_comp_p, 1);
    5599             : #else
    5600             :       ref_costs_comp[LAST_FRAME] += av1_cost_bit(ref_comp_p, 0);
    5601             :       ref_costs_comp[GOLDEN_FRAME] += av1_cost_bit(ref_comp_p, 1);
    5602             : #endif  // CONFIG_EXT_REFS
    5603             :     } else {
    5604           0 :       ref_costs_comp[LAST_FRAME] = 512;
    5605             : #if CONFIG_EXT_REFS
    5606           0 :       ref_costs_comp[LAST2_FRAME] = 512;
    5607           0 :       ref_costs_comp[LAST3_FRAME] = 512;
    5608           0 :       ref_costs_comp[BWDREF_FRAME] = 512;
    5609           0 :       ref_costs_comp[ALTREF_FRAME] = 512;
    5610             : #endif  // CONFIG_EXT_REFS
    5611           0 :       ref_costs_comp[GOLDEN_FRAME] = 512;
    5612             :     }
    5613             :   }
    5614           0 : }
    5615             : 
    5616           0 : static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
    5617             :                                  int mode_index,
    5618             :                                  int64_t comp_pred_diff[REFERENCE_MODES],
    5619             :                                  int skippable) {
    5620           0 :   MACROBLOCKD *const xd = &x->e_mbd;
    5621             : 
    5622             :   // Take a snapshot of the coding context so it can be
    5623             :   // restored if we decide to encode this way
    5624           0 :   ctx->skip = x->skip;
    5625           0 :   ctx->skippable = skippable;
    5626           0 :   ctx->best_mode_index = mode_index;
    5627           0 :   ctx->mic = *xd->mi[0];
    5628           0 :   ctx->mbmi_ext = *x->mbmi_ext;
    5629           0 :   ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
    5630           0 :   ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE];
    5631           0 :   ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
    5632           0 : }
    5633             : 
    5634           0 : static void setup_buffer_inter(
    5635             :     const AV1_COMP *const cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
    5636             :     BLOCK_SIZE block_size, int mi_row, int mi_col,
    5637             :     int_mv frame_nearest_mv[TOTAL_REFS_PER_FRAME],
    5638             :     int_mv frame_near_mv[TOTAL_REFS_PER_FRAME],
    5639             :     struct buf_2d yv12_mb[TOTAL_REFS_PER_FRAME][MAX_MB_PLANE]) {
    5640           0 :   const AV1_COMMON *cm = &cpi->common;
    5641           0 :   const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
    5642           0 :   MACROBLOCKD *const xd = &x->e_mbd;
    5643           0 :   MODE_INFO *const mi = xd->mi[0];
    5644           0 :   int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
    5645           0 :   const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
    5646           0 :   MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
    5647             : 
    5648           0 :   assert(yv12 != NULL);
    5649             : 
    5650             :   // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
    5651             :   // use the UV scaling factors.
    5652           0 :   av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf);
    5653             : 
    5654             :   // Gets an initial list of candidate vectors from neighbours and orders them
    5655           0 :   av1_find_mv_refs(cm, xd, mi, ref_frame, &mbmi_ext->ref_mv_count[ref_frame],
    5656           0 :                    mbmi_ext->ref_mv_stack[ref_frame],
    5657             : #if CONFIG_EXT_INTER
    5658           0 :                    mbmi_ext->compound_mode_context,
    5659             : #endif  // CONFIG_EXT_INTER
    5660             :                    candidates, mi_row, mi_col, NULL, NULL,
    5661           0 :                    mbmi_ext->mode_context);
    5662             : 
    5663             :   // Candidate refinement carried out at encoder and decoder
    5664           0 :   av1_find_best_ref_mvs(cm->allow_high_precision_mv, candidates,
    5665           0 :                         &frame_nearest_mv[ref_frame],
    5666           0 :                         &frame_near_mv[ref_frame]);
    5667             : 
    5668             : // Further refinement that is encode side only to test the top few candidates
    5669             : // in full and choose the best as the centre point for subsequent searches.
    5670             : // The current implementation doesn't support scaling.
    5671             : #if CONFIG_CB4X4
    5672           0 :   av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, ref_frame,
    5673             :               block_size);
    5674             : #else
    5675             :   if (!av1_is_scaled(sf) && block_size >= BLOCK_8X8)
    5676             :     av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, ref_frame,
    5677             :                 block_size);
    5678             : #endif  // CONFIG_CB4X4
    5679           0 : }
    5680             : 
    5681           0 : static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
    5682             :                                  BLOCK_SIZE bsize, int mi_row, int mi_col,
    5683             : #if CONFIG_EXT_INTER
    5684             :                                  int ref_idx,
    5685             : #endif  // CONFIG_EXT_INTER
    5686             :                                  int *rate_mv) {
    5687           0 :   MACROBLOCKD *xd = &x->e_mbd;
    5688           0 :   const AV1_COMMON *cm = &cpi->common;
    5689           0 :   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
    5690           0 :   struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0, 0, 0, 0 } };
    5691           0 :   int bestsme = INT_MAX;
    5692             :   int step_param;
    5693           0 :   int sadpb = x->sadperbit16;
    5694             :   MV mvp_full;
    5695             : #if CONFIG_EXT_INTER
    5696           0 :   int ref = mbmi->ref_frame[ref_idx];
    5697             : #else
    5698             :   int ref = mbmi->ref_frame[0];
    5699             :   int ref_idx = 0;
    5700             : #endif  // CONFIG_EXT_INTER
    5701           0 :   MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
    5702             : 
    5703           0 :   MvLimits tmp_mv_limits = x->mv_limits;
    5704             :   int cost_list[5];
    5705             : 
    5706           0 :   const YV12_BUFFER_CONFIG *scaled_ref_frame =
    5707             :       av1_get_scaled_ref_frame(cpi, ref);
    5708             : 
    5709             :   MV pred_mv[3];
    5710           0 :   pred_mv[0] = x->mbmi_ext->ref_mvs[ref][0].as_mv;
    5711           0 :   pred_mv[1] = x->mbmi_ext->ref_mvs[ref][1].as_mv;
    5712           0 :   pred_mv[2] = x->pred_mv[ref];
    5713             : 
    5714           0 :   if (scaled_ref_frame) {
    5715             :     int i;
    5716             :     // Swap out the reference frame for a version that's been scaled to
    5717             :     // match the resolution of the current frame, allowing the existing
    5718             :     // motion search code to be used without additional modifications.
    5719           0 :     for (i = 0; i < MAX_MB_PLANE; i++)
    5720           0 :       backup_yv12[i] = xd->plane[i].pre[ref_idx];
    5721             : 
    5722           0 :     av1_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL);
    5723             :   }
    5724             : 
    5725           0 :   av1_set_mv_search_range(&x->mv_limits, &ref_mv);
    5726             : 
    5727           0 :   av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx);
    5728             : 
    5729             :   // Work out the size of the first step in the mv step search.
    5730             :   // 0 here is maximum length first step. 1 is AOMMAX >> 1 etc.
    5731           0 :   if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
    5732             :     // Take wtd average of the step_params based on the last frame's
    5733             :     // max mv magnitude and that based on the best ref mvs of the current
    5734             :     // block for the given reference.
    5735           0 :     step_param =
    5736           0 :         (av1_init_search_range(x->max_mv_context[ref]) + cpi->mv_step_param) /
    5737             :         2;
    5738             :   } else {
    5739           0 :     step_param = cpi->mv_step_param;
    5740             :   }
    5741             : 
    5742           0 :   if (cpi->sf.adaptive_motion_search && bsize < cm->sb_size) {
    5743           0 :     int boffset =
    5744           0 :         2 * (b_width_log2_lookup[cm->sb_size] -
    5745           0 :              AOMMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize]));
    5746           0 :     step_param = AOMMAX(step_param, boffset);
    5747             :   }
    5748             : 
    5749           0 :   if (cpi->sf.adaptive_motion_search) {
    5750           0 :     int bwl = b_width_log2_lookup[bsize];
    5751           0 :     int bhl = b_height_log2_lookup[bsize];
    5752           0 :     int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4);
    5753             : 
    5754           0 :     if (tlevel < 5) step_param += 2;
    5755             : 
    5756             :     // prev_mv_sad is not setup for dynamically scaled frames.
    5757           0 :     if (cpi->oxcf.resize_mode != RESIZE_DYNAMIC) {
    5758             :       int i;
    5759           0 :       for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) {
    5760           0 :         if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
    5761           0 :           x->pred_mv[ref].row = 0;
    5762           0 :           x->pred_mv[ref].col = 0;
    5763           0 :           x->best_mv.as_int = INVALID_MV;
    5764             : 
    5765           0 :           if (scaled_ref_frame) {
    5766             :             int j;
    5767           0 :             for (j = 0; j < MAX_MB_PLANE; ++j)
    5768           0 :               xd->plane[j].pre[ref_idx] = backup_yv12[j];
    5769             :           }
    5770           0 :           return;
    5771             :         }
    5772             :       }
    5773             :     }
    5774             :   }
    5775             : 
    5776           0 :   av1_set_mv_search_range(&x->mv_limits, &ref_mv);
    5777             : 
    5778             : #if CONFIG_MOTION_VAR
    5779           0 :   if (mbmi->motion_mode != SIMPLE_TRANSLATION)
    5780           0 :     mvp_full = mbmi->mv[0].as_mv;
    5781             :   else
    5782             : #endif  // CONFIG_MOTION_VAR
    5783           0 :     mvp_full = pred_mv[x->mv_best_ref_index[ref]];
    5784             : 
    5785           0 :   mvp_full.col >>= 3;
    5786           0 :   mvp_full.row >>= 3;
    5787             : 
    5788           0 :   x->best_mv.as_int = x->second_best_mv.as_int = INVALID_MV;
    5789             : 
    5790             : #if CONFIG_MOTION_VAR
    5791           0 :   switch (mbmi->motion_mode) {
    5792             :     case SIMPLE_TRANSLATION:
    5793             : #endif  // CONFIG_MOTION_VAR
    5794           0 :       bestsme = av1_full_pixel_search(cpi, x, bsize, &mvp_full, step_param,
    5795             :                                       sadpb, cond_cost_list(cpi, cost_list),
    5796             :                                       &ref_mv, INT_MAX, 1);
    5797             : #if CONFIG_MOTION_VAR
    5798           0 :       break;
    5799             :     case OBMC_CAUSAL:
    5800           0 :       bestsme = av1_obmc_full_pixel_diamond(
    5801             :           cpi, x, &mvp_full, step_param, sadpb,
    5802           0 :           MAX_MVSEARCH_STEPS - 1 - step_param, 1, &cpi->fn_ptr[bsize], &ref_mv,
    5803             :           &(x->best_mv.as_mv), 0);
    5804           0 :       break;
    5805             :     default: assert("Invalid motion mode!\n");
    5806             :   }
    5807             : #endif  // CONFIG_MOTION_VAR
    5808             : 
    5809           0 :   x->mv_limits = tmp_mv_limits;
    5810             : 
    5811           0 :   if (bestsme < INT_MAX) {
    5812             :     int dis; /* TODO: use dis in distortion calculation later. */
    5813             : #if CONFIG_MOTION_VAR
    5814           0 :     switch (mbmi->motion_mode) {
    5815             :       case SIMPLE_TRANSLATION:
    5816             : #endif  // CONFIG_MOTION_VAR
    5817           0 :         if (cpi->sf.use_upsampled_references) {
    5818             :           int best_mv_var;
    5819           0 :           const int try_second = x->second_best_mv.as_int != INVALID_MV &&
    5820           0 :                                  x->second_best_mv.as_int != x->best_mv.as_int;
    5821           0 :           const int pw = block_size_wide[bsize];
    5822           0 :           const int ph = block_size_high[bsize];
    5823             :           // Use up-sampled reference frames.
    5824           0 :           struct macroblockd_plane *const pd = &xd->plane[0];
    5825           0 :           struct buf_2d backup_pred = pd->pre[ref_idx];
    5826           0 :           const YV12_BUFFER_CONFIG *upsampled_ref = get_upsampled_ref(cpi, ref);
    5827             : 
    5828             :           // Set pred for Y plane
    5829           0 :           setup_pred_plane(
    5830             :               &pd->pre[ref_idx], bsize, upsampled_ref->y_buffer,
    5831             :               upsampled_ref->y_crop_width, upsampled_ref->y_crop_height,
    5832             :               upsampled_ref->y_stride, (mi_row << 3), (mi_col << 3), NULL,
    5833             :               pd->subsampling_x, pd->subsampling_y);
    5834             : 
    5835           0 :           best_mv_var = cpi->find_fractional_mv_step(
    5836             :               x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
    5837           0 :               &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
    5838             :               cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
    5839             :               x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL,
    5840             : #if CONFIG_EXT_INTER
    5841             :               NULL, 0, 0,
    5842             : #endif
    5843             :               pw, ph, 1);
    5844             : 
    5845           0 :           if (try_second) {
    5846           0 :             const int minc =
    5847           0 :                 AOMMAX(x->mv_limits.col_min * 8, ref_mv.col - MV_MAX);
    5848           0 :             const int maxc =
    5849           0 :                 AOMMIN(x->mv_limits.col_max * 8, ref_mv.col + MV_MAX);
    5850           0 :             const int minr =
    5851           0 :                 AOMMAX(x->mv_limits.row_min * 8, ref_mv.row - MV_MAX);
    5852           0 :             const int maxr =
    5853           0 :                 AOMMIN(x->mv_limits.row_max * 8, ref_mv.row + MV_MAX);
    5854             :             int this_var;
    5855           0 :             MV best_mv = x->best_mv.as_mv;
    5856             : 
    5857           0 :             x->best_mv = x->second_best_mv;
    5858           0 :             if (x->best_mv.as_mv.row * 8 <= maxr &&
    5859           0 :                 x->best_mv.as_mv.row * 8 >= minr &&
    5860           0 :                 x->best_mv.as_mv.col * 8 <= maxc &&
    5861           0 :                 x->best_mv.as_mv.col * 8 >= minc) {
    5862           0 :               this_var = cpi->find_fractional_mv_step(
    5863             :                   x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
    5864           0 :                   &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
    5865             :                   cpi->sf.mv.subpel_iters_per_step,
    5866             :                   cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost,
    5867             :                   &dis, &x->pred_sse[ref], NULL,
    5868             : #if CONFIG_EXT_INTER
    5869             :                   NULL, 0, 0,
    5870             : #endif
    5871             :                   pw, ph, 1);
    5872           0 :               if (this_var < best_mv_var) best_mv = x->best_mv.as_mv;
    5873           0 :               x->best_mv.as_mv = best_mv;
    5874             :             }
    5875             :           }
    5876             : 
    5877             :           // Restore the reference frames.
    5878           0 :           pd->pre[ref_idx] = backup_pred;
    5879             :         } else {
    5880           0 :           cpi->find_fractional_mv_step(
    5881             :               x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
    5882           0 :               &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
    5883             :               cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
    5884             :               x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL,
    5885             : #if CONFIG_EXT_INTER
    5886             :               NULL, 0, 0,
    5887             : #endif
    5888             :               0, 0, 0);
    5889             :         }
    5890             : #if CONFIG_MOTION_VAR
    5891           0 :         break;
    5892             :       case OBMC_CAUSAL:
    5893           0 :         av1_find_best_obmc_sub_pixel_tree_up(
    5894             :             cpi, x, mi_row, mi_col, &x->best_mv.as_mv, &ref_mv,
    5895           0 :             cm->allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize],
    5896             :             cpi->sf.mv.subpel_force_stop, cpi->sf.mv.subpel_iters_per_step,
    5897             :             x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], 0,
    5898             :             cpi->sf.use_upsampled_references);
    5899           0 :         break;
    5900             :       default: assert("Invalid motion mode!\n");
    5901             :     }
    5902             : #endif  // CONFIG_MOTION_VAR
    5903             :   }
    5904           0 :   *rate_mv = av1_mv_bit_cost(&x->best_mv.as_mv, &ref_mv, x->nmvjointcost,
    5905             :                              x->mvcost, MV_COST_WEIGHT);
    5906             : 
    5907             : #if CONFIG_MOTION_VAR
    5908           0 :   if (cpi->sf.adaptive_motion_search && mbmi->motion_mode == SIMPLE_TRANSLATION)
    5909             : #else
    5910             :   if (cpi->sf.adaptive_motion_search)
    5911             : #endif  // CONFIG_MOTION_VAR
    5912           0 :     x->pred_mv[ref] = x->best_mv.as_mv;
    5913             : 
    5914           0 :   if (scaled_ref_frame) {
    5915             :     int i;
    5916           0 :     for (i = 0; i < MAX_MB_PLANE; i++)
    5917           0 :       xd->plane[i].pre[ref_idx] = backup_yv12[i];
    5918             :   }
    5919             : }
    5920             : 
    5921           0 : static INLINE void restore_dst_buf(MACROBLOCKD *xd, BUFFER_SET dst) {
    5922             :   int i;
    5923           0 :   for (i = 0; i < MAX_MB_PLANE; i++) {
    5924           0 :     xd->plane[i].dst.buf = dst.plane[i];
    5925           0 :     xd->plane[i].dst.stride = dst.stride[i];
    5926             :   }
    5927           0 : }
    5928             : 
    5929             : #if CONFIG_EXT_INTER
    5930           0 : static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x,
    5931             :                                     BLOCK_SIZE bsize, const MV *other_mv,
    5932             :                                     int mi_row, int mi_col, const int block,
    5933             :                                     int ref_idx, uint8_t *second_pred) {
    5934           0 :   const AV1_COMMON *const cm = &cpi->common;
    5935           0 :   const int pw = block_size_wide[bsize];
    5936           0 :   const int ph = block_size_high[bsize];
    5937           0 :   MACROBLOCKD *xd = &x->e_mbd;
    5938           0 :   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
    5939           0 :   const int other_ref = mbmi->ref_frame[!ref_idx];
    5940             : #if CONFIG_DUAL_FILTER
    5941           0 :   InterpFilter interp_filter[2] = {
    5942           0 :     (ref_idx == 0) ? mbmi->interp_filter[2] : mbmi->interp_filter[0],
    5943           0 :     (ref_idx == 0) ? mbmi->interp_filter[3] : mbmi->interp_filter[1]
    5944             :   };
    5945             : #else
    5946             :   const InterpFilter interp_filter = mbmi->interp_filter;
    5947             : #endif  // CONFIG_DUAL_FILTER
    5948             :   struct scale_factors sf;
    5949             : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
    5950           0 :   struct macroblockd_plane *const pd = &xd->plane[0];
    5951             :   // ic and ir are the 4x4 coordiantes of the sub8x8 at index "block"
    5952           0 :   const int ic = block & 1;
    5953           0 :   const int ir = (block - ic) >> 1;
    5954           0 :   const int p_col = ((mi_col * MI_SIZE) >> pd->subsampling_x) + 4 * ic;
    5955           0 :   const int p_row = ((mi_row * MI_SIZE) >> pd->subsampling_y) + 4 * ir;
    5956             : #if CONFIG_GLOBAL_MOTION
    5957           0 :   WarpedMotionParams *const wm = &xd->global_motion[other_ref];
    5958           0 :   int is_global = is_global_mv_block(xd->mi[0], block, wm->wmtype);
    5959             : #endif  // CONFIG_GLOBAL_MOTION
    5960             : #else
    5961             :   (void)block;
    5962             : #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
    5963             : 
    5964             :   // This function should only ever be called for compound modes
    5965           0 :   assert(has_second_ref(mbmi));
    5966             : 
    5967             :   struct buf_2d backup_yv12[MAX_MB_PLANE];
    5968           0 :   const YV12_BUFFER_CONFIG *const scaled_ref_frame =
    5969             :       av1_get_scaled_ref_frame(cpi, other_ref);
    5970             : 
    5971           0 :   if (scaled_ref_frame) {
    5972             :     int i;
    5973             :     // Swap out the reference frame for a version that's been scaled to
    5974             :     // match the resolution of the current frame, allowing the existing
    5975             :     // motion search code to be used without additional modifications.
    5976           0 :     for (i = 0; i < MAX_MB_PLANE; i++)
    5977           0 :       backup_yv12[i] = xd->plane[i].pre[!ref_idx];
    5978           0 :     av1_setup_pre_planes(xd, !ref_idx, scaled_ref_frame, mi_row, mi_col, NULL);
    5979             :   }
    5980             : 
    5981             : // Since we have scaled the reference frames to match the size of the current
    5982             : // frame we must use a unit scaling factor during mode selection.
    5983             : #if CONFIG_HIGHBITDEPTH
    5984           0 :   av1_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width,
    5985             :                                     cm->height, cm->use_highbitdepth);
    5986             : #else
    5987             :   av1_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width,
    5988             :                                     cm->height);
    5989             : #endif  // CONFIG_HIGHBITDEPTH
    5990             : 
    5991             :   struct buf_2d ref_yv12;
    5992             : 
    5993           0 :   const int plane = 0;
    5994           0 :   ConvolveParams conv_params = get_conv_params(0, plane);
    5995             : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
    5996             :   WarpTypesAllowed warp_types;
    5997             : #if CONFIG_GLOBAL_MOTION
    5998           0 :   warp_types.global_warp_allowed = is_global;
    5999             : #endif  // CONFIG_GLOBAL_MOTION
    6000             : #if CONFIG_WARPED_MOTION
    6001           0 :   warp_types.local_warp_allowed = mbmi->motion_mode == WARPED_CAUSAL;
    6002             : #endif  // CONFIG_WARPED_MOTION
    6003             : #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
    6004             : 
    6005             :   // Initialized here because of compiler problem in Visual Studio.
    6006           0 :   ref_yv12 = xd->plane[plane].pre[!ref_idx];
    6007             : 
    6008             : // Get the prediction block from the 'other' reference frame.
    6009             : #if CONFIG_HIGHBITDEPTH
    6010           0 :   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    6011           0 :     av1_highbd_build_inter_predictor(
    6012           0 :         ref_yv12.buf, ref_yv12.stride, second_pred, pw, other_mv, &sf, pw, ph,
    6013             :         0, interp_filter,
    6014             : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
    6015             :         &warp_types, p_col, p_row,
    6016             : #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
    6017             :         plane, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE, xd);
    6018             :   } else {
    6019             : #endif  // CONFIG_HIGHBITDEPTH
    6020           0 :     av1_build_inter_predictor(
    6021           0 :         ref_yv12.buf, ref_yv12.stride, second_pred, pw, other_mv, &sf, pw, ph,
    6022             :         &conv_params, interp_filter,
    6023             : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
    6024             :         &warp_types, p_col, p_row, plane, !ref_idx,
    6025             : #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
    6026             :         MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE, xd);
    6027             : #if CONFIG_HIGHBITDEPTH
    6028             :   }
    6029             : #endif  // CONFIG_HIGHBITDEPTH
    6030             : 
    6031           0 :   if (scaled_ref_frame) {
    6032             :     // Restore the prediction frame pointers to their unscaled versions.
    6033             :     int i;
    6034           0 :     for (i = 0; i < MAX_MB_PLANE; i++)
    6035           0 :       xd->plane[i].pre[!ref_idx] = backup_yv12[i];
    6036             :   }
    6037           0 : }
    6038             : 
    6039             : // Search for the best mv for one component of a compound,
    6040             : // given that the other component is fixed.
    6041           0 : static void compound_single_motion_search(
    6042             :     const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, MV *this_mv,
    6043             :     int mi_row, int mi_col, const uint8_t *second_pred, const uint8_t *mask,
    6044             :     int mask_stride, int *rate_mv, const int block, int ref_idx) {
    6045           0 :   const int pw = block_size_wide[bsize];
    6046           0 :   const int ph = block_size_high[bsize];
    6047           0 :   MACROBLOCKD *xd = &x->e_mbd;
    6048           0 :   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
    6049           0 :   const int ref = mbmi->ref_frame[ref_idx];
    6050           0 :   int_mv ref_mv = x->mbmi_ext->ref_mvs[ref][0];
    6051           0 :   struct macroblockd_plane *const pd = &xd->plane[0];
    6052             : 
    6053             :   struct buf_2d backup_yv12[MAX_MB_PLANE];
    6054           0 :   const YV12_BUFFER_CONFIG *const scaled_ref_frame =
    6055             :       av1_get_scaled_ref_frame(cpi, ref);
    6056             : 
    6057             :   // Check that this is either an interinter or an interintra block
    6058           0 :   assert(has_second_ref(mbmi) ||
    6059             :          (ref_idx == 0 && mbmi->ref_frame[1] == INTRA_FRAME));
    6060             : 
    6061           0 :   if (scaled_ref_frame) {
    6062             :     int i;
    6063             :     // Swap out the reference frame for a version that's been scaled to
    6064             :     // match the resolution of the current frame, allowing the existing
    6065             :     // motion search code to be used without additional modifications.
    6066           0 :     for (i = 0; i < MAX_MB_PLANE; i++)
    6067           0 :       backup_yv12[i] = xd->plane[i].pre[ref_idx];
    6068           0 :     av1_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL);
    6069             :   }
    6070             : 
    6071             :   struct buf_2d orig_yv12;
    6072           0 :   int bestsme = INT_MAX;
    6073           0 :   int sadpb = x->sadperbit16;
    6074           0 :   MV *const best_mv = &x->best_mv.as_mv;
    6075           0 :   int search_range = 3;
    6076             : 
    6077           0 :   MvLimits tmp_mv_limits = x->mv_limits;
    6078             : 
    6079             :   // Initialized here because of compiler problem in Visual Studio.
    6080           0 :   if (ref_idx) {
    6081           0 :     orig_yv12 = pd->pre[0];
    6082           0 :     pd->pre[0] = pd->pre[ref_idx];
    6083             :   }
    6084             : 
    6085             :   // Do compound motion search on the current reference frame.
    6086           0 :   av1_set_mv_search_range(&x->mv_limits, &ref_mv.as_mv);
    6087             : 
    6088             :   // Use the mv result from the single mode as mv predictor.
    6089           0 :   *best_mv = *this_mv;
    6090             : 
    6091           0 :   best_mv->col >>= 3;
    6092           0 :   best_mv->row >>= 3;
    6093             : 
    6094           0 :   av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx);
    6095             : 
    6096             :   // Small-range full-pixel motion search.
    6097           0 :   bestsme = av1_refining_search_8p_c(x, sadpb, search_range,
    6098           0 :                                      &cpi->fn_ptr[bsize], mask, mask_stride,
    6099             :                                      ref_idx, &ref_mv.as_mv, second_pred);
    6100           0 :   if (bestsme < INT_MAX) {
    6101           0 :     if (mask)
    6102           0 :       bestsme =
    6103           0 :           av1_get_mvpred_mask_var(x, best_mv, &ref_mv.as_mv, second_pred, mask,
    6104           0 :                                   mask_stride, ref_idx, &cpi->fn_ptr[bsize], 1);
    6105             :     else
    6106           0 :       bestsme = av1_get_mvpred_av_var(x, best_mv, &ref_mv.as_mv, second_pred,
    6107           0 :                                       &cpi->fn_ptr[bsize], 1);
    6108             :   }
    6109             : 
    6110           0 :   x->mv_limits = tmp_mv_limits;
    6111             : 
    6112           0 :   if (bestsme < INT_MAX) {
    6113             :     int dis; /* TODO: use dis in distortion calculation later. */
    6114             :     unsigned int sse;
    6115           0 :     if (cpi->sf.use_upsampled_references) {
    6116             :       // Use up-sampled reference frames.
    6117           0 :       struct buf_2d backup_pred = pd->pre[0];
    6118           0 :       const YV12_BUFFER_CONFIG *upsampled_ref = get_upsampled_ref(cpi, ref);
    6119             : 
    6120             :       // Set pred for Y plane
    6121           0 :       setup_pred_plane(&pd->pre[0], bsize, upsampled_ref->y_buffer,
    6122             :                        upsampled_ref->y_crop_width,
    6123             :                        upsampled_ref->y_crop_height, upsampled_ref->y_stride,
    6124             :                        (mi_row << 3), (mi_col << 3), NULL, pd->subsampling_x,
    6125             :                        pd->subsampling_y);
    6126             : 
    6127             : // If bsize < BLOCK_8X8, adjust pred pointer for this block
    6128             : #if !CONFIG_CB4X4
    6129             :       if (bsize < BLOCK_8X8)
    6130             :         pd->pre[0].buf =
    6131             :             &pd->pre[0].buf[(av1_raster_block_offset(BLOCK_8X8, block,
    6132             :                                                      pd->pre[0].stride))
    6133             :                             << 3];
    6134             : #endif  // !CONFIG_CB4X4
    6135             : 
    6136           0 :       bestsme = cpi->find_fractional_mv_step(
    6137             :           x, &ref_mv.as_mv, cpi->common.allow_high_precision_mv, x->errorperbit,
    6138           0 :           &cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_iters_per_step, NULL,
    6139             :           x->nmvjointcost, x->mvcost, &dis, &sse, second_pred, mask,
    6140             :           mask_stride, ref_idx, pw, ph, 1);
    6141             : 
    6142             :       // Restore the reference frames.
    6143           0 :       pd->pre[0] = backup_pred;
    6144             :     } else {
    6145             :       (void)block;
    6146           0 :       bestsme = cpi->find_fractional_mv_step(
    6147             :           x, &ref_mv.as_mv, cpi->common.allow_high_precision_mv, x->errorperbit,
    6148           0 :           &cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_iters_per_step, NULL,
    6149             :           x->nmvjointcost, x->mvcost, &dis, &sse, second_pred, mask,
    6150             :           mask_stride, ref_idx, pw, ph, 0);
    6151             :     }
    6152             :   }
    6153             : 
    6154             :   // Restore the pointer to the first (possibly scaled) prediction buffer.
    6155           0 :   if (ref_idx) pd->pre[0] = orig_yv12;
    6156             : 
    6157           0 :   if (bestsme < INT_MAX) *this_mv = *best_mv;
    6158             : 
    6159           0 :   *rate_mv = 0;
    6160             : 
    6161           0 :   if (scaled_ref_frame) {
    6162             :     // Restore the prediction frame pointers to their unscaled versions.
    6163             :     int i;
    6164           0 :     for (i = 0; i < MAX_MB_PLANE; i++)
    6165           0 :       xd->plane[i].pre[ref_idx] = backup_yv12[i];
    6166             :   }
    6167             : 
    6168           0 :   av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx);
    6169           0 :   *rate_mv += av1_mv_bit_cost(this_mv, &ref_mv.as_mv, x->nmvjointcost,
    6170             :                               x->mvcost, MV_COST_WEIGHT);
    6171           0 : }
    6172             : 
    6173             : // Wrapper for compound_single_motion_search, for the common case
    6174             : // where the second prediction is also an inter mode.
    6175           0 : static void compound_single_motion_search_interinter(
    6176             :     const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int_mv *frame_mv,
    6177             :     int mi_row, int mi_col, const uint8_t *mask, int mask_stride, int *rate_mv,
    6178             :     const int block, int ref_idx) {
    6179           0 :   MACROBLOCKD *xd = &x->e_mbd;
    6180           0 :   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
    6181             : 
    6182             :   // This function should only ever be called for compound modes
    6183           0 :   assert(has_second_ref(mbmi));
    6184             : 
    6185             : // Prediction buffer from second frame.
    6186             : #if CONFIG_HIGHBITDEPTH
    6187             :   DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[MAX_SB_SQUARE]);
    6188             :   uint8_t *second_pred;
    6189           0 :   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
    6190           0 :     second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16);
    6191             :   else
    6192           0 :     second_pred = (uint8_t *)second_pred_alloc_16;
    6193             : #else
    6194             :   DECLARE_ALIGNED(16, uint8_t, second_pred[MAX_SB_SQUARE]);
    6195             : #endif  // CONFIG_HIGHBITDEPTH
    6196             : 
    6197           0 :   MV *this_mv = &frame_mv[mbmi->ref_frame[ref_idx]].as_mv;
    6198           0 :   const MV *other_mv = &frame_mv[mbmi->ref_frame[!ref_idx]].as_mv;
    6199             : 
    6200           0 :   build_second_inter_pred(cpi, x, bsize, other_mv, mi_row, mi_col, block,
    6201             :                           ref_idx, second_pred);
    6202             : 
    6203           0 :   compound_single_motion_search(cpi, x, bsize, this_mv, mi_row, mi_col,
    6204             :                                 second_pred, mask, mask_stride, rate_mv, block,
    6205             :                                 ref_idx);
    6206           0 : }
    6207             : 
    6208             : #if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
    6209           0 : static void do_masked_motion_search_indexed(
    6210             :     const AV1_COMP *const cpi, MACROBLOCK *x, const int_mv *const cur_mv,
    6211             :     const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE bsize,
    6212             :     int mi_row, int mi_col, int_mv *tmp_mv, int *rate_mv, int which) {
    6213             :   // NOTE: which values: 0 - 0 only, 1 - 1 only, 2 - both
    6214           0 :   MACROBLOCKD *xd = &x->e_mbd;
    6215           0 :   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
    6216           0 :   BLOCK_SIZE sb_type = mbmi->sb_type;
    6217             :   const uint8_t *mask;
    6218           0 :   const int mask_stride = block_size_wide[bsize];
    6219             : 
    6220           0 :   mask = av1_get_compound_type_mask(comp_data, sb_type);
    6221             : 
    6222             :   int_mv frame_mv[TOTAL_REFS_PER_FRAME];
    6223           0 :   MV_REFERENCE_FRAME rf[2] = { mbmi->ref_frame[0], mbmi->ref_frame[1] };
    6224             :   assert(bsize >= BLOCK_8X8 || CONFIG_CB4X4);
    6225             : 
    6226           0 :   frame_mv[rf[0]].as_int = cur_mv[0].as_int;
    6227           0 :   frame_mv[rf[1]].as_int = cur_mv[1].as_int;
    6228           0 :   if (which == 0 || which == 1) {
    6229           0 :     compound_single_motion_search_interinter(cpi, x, bsize, frame_mv, mi_row,
    6230             :                                              mi_col, mask, mask_stride, rate_mv,
    6231             :                                              0, which);
    6232           0 :   } else if (which == 2) {
    6233           0 :     joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, mask,
    6234             :                         mask_stride, rate_mv, 0);
    6235             :   }
    6236           0 :   tmp_mv[0].as_int = frame_mv[rf[0]].as_int;
    6237           0 :   tmp_mv[1].as_int = frame_mv[rf[1]].as_int;
    6238           0 : }
    6239             : #endif  // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
    6240             : #endif  // CONFIG_EXT_INTER
    6241             : 
    6242             : // In some situations we want to discount tha pparent cost of a new motion
    6243             : // vector. Where there is a subtle motion field and especially where there is
    6244             : // low spatial complexity then it can be hard to cover the cost of a new motion
    6245             : // vector in a single block, even if that motion vector reduces distortion.
    6246             : // However, once established that vector may be usable through the nearest and
    6247             : // near mv modes to reduce distortion in subsequent blocks and also improve
    6248             : // visual quality.
    6249           0 : static int discount_newmv_test(const AV1_COMP *const cpi, int this_mode,
    6250             :                                int_mv this_mv,
    6251             :                                int_mv (*mode_mv)[TOTAL_REFS_PER_FRAME],
    6252             :                                int ref_frame) {
    6253           0 :   return (!cpi->rc.is_src_frame_alt_ref && (this_mode == NEWMV) &&
    6254           0 :           (this_mv.as_int != 0) &&
    6255           0 :           ((mode_mv[NEARESTMV][ref_frame].as_int == 0) ||
    6256           0 :            (mode_mv[NEARESTMV][ref_frame].as_int == INVALID_MV)) &&
    6257           0 :           ((mode_mv[NEARMV][ref_frame].as_int == 0) ||
    6258           0 :            (mode_mv[NEARMV][ref_frame].as_int == INVALID_MV)));
    6259             : }
    6260             : 
    6261             : #define LEFT_TOP_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
    6262             : #define RIGHT_BOTTOM_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
    6263             : 
    6264             : // TODO(jingning): this mv clamping function should be block size dependent.
    6265           0 : static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
    6266           0 :   clamp_mv(mv, xd->mb_to_left_edge - LEFT_TOP_MARGIN,
    6267           0 :            xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
    6268           0 :            xd->mb_to_top_edge - LEFT_TOP_MARGIN,
    6269           0 :            xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
    6270           0 : }
    6271             : 
    6272             : #if CONFIG_EXT_INTER
    6273             : #if CONFIG_WEDGE
    6274           0 : static int estimate_wedge_sign(const AV1_COMP *cpi, const MACROBLOCK *x,
    6275             :                                const BLOCK_SIZE bsize, const uint8_t *pred0,
    6276             :                                int stride0, const uint8_t *pred1, int stride1) {
    6277           0 :   const struct macroblock_plane *const p = &x->plane[0];
    6278           0 :   const uint8_t *src = p->src.buf;
    6279           0 :   int src_stride = p->src.stride;
    6280           0 :   const int f_index = bsize - BLOCK_8X8;
    6281           0 :   const int bw = block_size_wide[bsize];
    6282           0 :   const int bh = block_size_high[bsize];
    6283             :   uint32_t esq[2][4];
    6284             :   int64_t tl, br;
    6285             : 
    6286             : #if CONFIG_HIGHBITDEPTH
    6287           0 :   if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    6288           0 :     pred0 = CONVERT_TO_BYTEPTR(pred0);
    6289           0 :     pred1 = CONVERT_TO_BYTEPTR(pred1);
    6290             :   }
    6291             : #endif  // CONFIG_HIGHBITDEPTH
    6292             : 
    6293           0 :   cpi->fn_ptr[f_index].vf(src, src_stride, pred0, stride0, &esq[0][0]);
    6294           0 :   cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, pred0 + bw / 2, stride0,
    6295             :                           &esq[0][1]);
    6296           0 :   cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride, src_stride,
    6297           0 :                           pred0 + bh / 2 * stride0, stride0, &esq[0][2]);
    6298           0 :   cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride + bw / 2, src_stride,
    6299           0 :                           pred0 + bh / 2 * stride0 + bw / 2, stride0,
    6300             :                           &esq[0][3]);
    6301           0 :   cpi->fn_ptr[f_index].vf(src, src_stride, pred1, stride1, &esq[1][0]);
    6302           0 :   cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, pred1 + bw / 2, stride1,
    6303             :                           &esq[1][1]);
    6304           0 :   cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride, src_stride,
    6305           0 :                           pred1 + bh / 2 * stride1, stride0, &esq[1][2]);
    6306           0 :   cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride + bw / 2, src_stride,
    6307           0 :                           pred1 + bh / 2 * stride1 + bw / 2, stride0,
    6308             :                           &esq[1][3]);
    6309             : 
    6310           0 :   tl = (int64_t)(esq[0][0] + esq[0][1] + esq[0][2]) -
    6311           0 :        (int64_t)(esq[1][0] + esq[1][1] + esq[1][2]);
    6312           0 :   br = (int64_t)(esq[1][3] + esq[1][1] + esq[1][2]) -
    6313           0 :        (int64_t)(esq[0][3] + esq[0][1] + esq[0][2]);
    6314           0 :   return (tl + br > 0);
    6315             : }
    6316             : #endif  // CONFIG_WEDGE
    6317             : #endif  // CONFIG_EXT_INTER
    6318             : 
    6319             : #if !CONFIG_DUAL_FILTER
    6320             : static InterpFilter predict_interp_filter(
    6321             :     const AV1_COMP *cpi, const MACROBLOCK *x, const BLOCK_SIZE bsize,
    6322             :     const int mi_row, const int mi_col,
    6323             :     InterpFilter (*single_filter)[TOTAL_REFS_PER_FRAME]) {
    6324             :   InterpFilter best_filter = SWITCHABLE;
    6325             :   const AV1_COMMON *cm = &cpi->common;
    6326             :   const MACROBLOCKD *xd = &x->e_mbd;
    6327             :   int bsl = mi_width_log2_lookup[bsize];
    6328             :   int pred_filter_search =
    6329             :       cpi->sf.cb_pred_filter_search
    6330             :           ? (((mi_row + mi_col) >> bsl) +
    6331             :              get_chessboard_index(cm->current_video_frame)) &
    6332             :                 0x1
    6333             :           : 0;
    6334             :   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
    6335             :   const int is_comp_pred = has_second_ref(mbmi);
    6336             :   const int this_mode = mbmi->mode;
    6337             :   int refs[2] = { mbmi->ref_frame[0],
    6338             :                   (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
    6339             :   if (pred_filter_search) {
    6340             :     InterpFilter af = SWITCHABLE, lf = SWITCHABLE;
    6341             :     if (xd->up_available) af = xd->mi[-xd->mi_stride]->mbmi.interp_filter;
    6342             :     if (xd->left_available) lf = xd->mi[-1]->mbmi.interp_filter;
    6343             : 
    6344             : #if CONFIG_EXT_INTER
    6345             :     if ((this_mode != NEWMV && this_mode != NEW_NEWMV) || (af == lf))
    6346             : #else
    6347             :     if ((this_mode != NEWMV) || (af == lf))
    6348             : #endif  // CONFIG_EXT_INTER
    6349             :       best_filter = af;
    6350             :   }
    6351             :   if (is_comp_pred) {
    6352             :     if (cpi->sf.adaptive_mode_search) {
    6353             : #if CONFIG_EXT_INTER
    6354             :       switch (this_mode) {
    6355             :         case NEAREST_NEARESTMV:
    6356             :           if (single_filter[NEARESTMV][refs[0]] ==
    6357             :               single_filter[NEARESTMV][refs[1]])
    6358             :             best_filter = single_filter[NEARESTMV][refs[0]];
    6359             :           break;
    6360             :         case NEAR_NEARMV:
    6361             :           if (single_filter[NEARMV][refs[0]] == single_filter[NEARMV][refs[1]])
    6362             :             best_filter = single_filter[NEARMV][refs[0]];
    6363             :           break;
    6364             :         case ZERO_ZEROMV:
    6365             :           if (single_filter[ZEROMV][refs[0]] == single_filter[ZEROMV][refs[1]])
    6366             :             best_filter = single_filter[ZEROMV][refs[0]];
    6367             :           break;
    6368             :         case NEW_NEWMV:
    6369             :           if (single_filter[NEWMV][refs[0]] == single_filter[NEWMV][refs[1]])
    6370             :             best_filter = single_filter[NEWMV][refs[0]];
    6371             :           break;
    6372             :         case NEAREST_NEWMV:
    6373             :           if (single_filter[NEARESTMV][refs[0]] ==
    6374             :               single_filter[NEWMV][refs[1]])
    6375             :             best_filter = single_filter[NEARESTMV][refs[0]];
    6376             :           break;
    6377             :         case NEAR_NEWMV:
    6378             :           if (single_filter[NEARMV][refs[0]] == single_filter[NEWMV][refs[1]])
    6379             :             best_filter = single_filter[NEARMV][refs[0]];
    6380             :           break;
    6381             :         case NEW_NEARESTMV:
    6382             :           if (single_filter[NEWMV][refs[0]] ==
    6383             :               single_filter[NEARESTMV][refs[1]])
    6384             :             best_filter = single_filter[NEWMV][refs[0]];
    6385             :           break;
    6386             :         case NEW_NEARMV:
    6387             :           if (single_filter[NEWMV][refs[0]] == single_filter[NEARMV][refs[1]])
    6388             :             best_filter = single_filter[NEWMV][refs[0]];
    6389             :           break;
    6390             :         default:
    6391             :           if (single_filter[this_mode][refs[0]] ==
    6392             :               single_filter[this_mode][refs[1]])
    6393             :             best_filter = single_filter[this_mode][refs[0]];
    6394             :           break;
    6395             :       }
    6396             : #else
    6397             :       if (single_filter[this_mode][refs[0]] ==
    6398             :           single_filter[this_mode][refs[1]])
    6399             :         best_filter = single_filter[this_mode][refs[0]];
    6400             : #endif  // CONFIG_EXT_INTER
    6401             :     }
    6402             :   }
    6403             :   if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
    6404             :     best_filter = EIGHTTAP_REGULAR;
    6405             :   }
    6406             :   return best_filter;
    6407             : }
    6408             : #endif  // !CONFIG_DUAL_FILTER
    6409             : 
    6410             : #if CONFIG_EXT_INTER
    6411             : // Choose the best wedge index and sign
    6412             : #if CONFIG_WEDGE
    6413           0 : static int64_t pick_wedge(const AV1_COMP *const cpi, const MACROBLOCK *const x,
    6414             :                           const BLOCK_SIZE bsize, const uint8_t *const p0,
    6415             :                           const uint8_t *const p1, int *const best_wedge_sign,
    6416             :                           int *const best_wedge_index) {
    6417           0 :   const MACROBLOCKD *const xd = &x->e_mbd;
    6418           0 :   const struct buf_2d *const src = &x->plane[0].src;
    6419           0 :   const int bw = block_size_wide[bsize];
    6420           0 :   const int bh = block_size_high[bsize];
    6421           0 :   const int N = bw * bh;
    6422             :   int rate;
    6423             :   int64_t dist;
    6424           0 :   int64_t rd, best_rd = INT64_MAX;
    6425             :   int wedge_index;
    6426             :   int wedge_sign;
    6427           0 :   int wedge_types = (1 << get_wedge_bits_lookup(bsize));
    6428             :   const uint8_t *mask;
    6429             :   uint64_t sse;
    6430             : #if CONFIG_HIGHBITDEPTH
    6431           0 :   const int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
    6432           0 :   const int bd_round = hbd ? (xd->bd - 8) * 2 : 0;
    6433             : #else
    6434             :   const int bd_round = 0;
    6435             : #endif  // CONFIG_HIGHBITDEPTH
    6436             : 
    6437             :   DECLARE_ALIGNED(32, int16_t, r0[MAX_SB_SQUARE]);
    6438             :   DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]);
    6439             :   DECLARE_ALIGNED(32, int16_t, d10[MAX_SB_SQUARE]);
    6440             :   DECLARE_ALIGNED(32, int16_t, ds[MAX_SB_SQUARE]);
    6441             : 
    6442             :   int64_t sign_limit;
    6443             : 
    6444             : #if CONFIG_HIGHBITDEPTH
    6445           0 :   if (hbd) {
    6446           0 :     aom_highbd_subtract_block(bh, bw, r0, bw, src->buf, src->stride,
    6447           0 :                               CONVERT_TO_BYTEPTR(p0), bw, xd->bd);
    6448           0 :     aom_highbd_subtract_block(bh, bw, r1, bw, src->buf, src->stride,
    6449           0 :                               CONVERT_TO_BYTEPTR(p1), bw, xd->bd);
    6450           0 :     aom_highbd_subtract_block(bh, bw, d10, bw, CONVERT_TO_BYTEPTR(p1), bw,
    6451           0 :                               CONVERT_TO_BYTEPTR(p0), bw, xd->bd);
    6452             :   } else  // NOLINT
    6453             : #endif    // CONFIG_HIGHBITDEPTH
    6454             :   {
    6455           0 :     aom_subtract_block(bh, bw, r0, bw, src->buf, src->stride, p0, bw);
    6456           0 :     aom_subtract_block(bh, bw, r1, bw, src->buf, src->stride, p1, bw);
    6457           0 :     aom_subtract_block(bh, bw, d10, bw, p1, bw, p0, bw);
    6458             :   }
    6459             : 
    6460           0 :   sign_limit = ((int64_t)aom_sum_squares_i16(r0, N) -
    6461           0 :                 (int64_t)aom_sum_squares_i16(r1, N)) *
    6462             :                (1 << WEDGE_WEIGHT_BITS) / 2;
    6463             : 
    6464           0 :   if (N < 64)
    6465           0 :     av1_wedge_compute_delta_squares_c(ds, r0, r1, N);
    6466             :   else
    6467           0 :     av1_wedge_compute_delta_squares(ds, r0, r1, N);
    6468             : 
    6469           0 :   for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
    6470           0 :     mask = av1_get_contiguous_soft_mask(wedge_index, 0, bsize);
    6471             : 
    6472             :     // TODO(jingning): Make sse2 functions support N = 16 case
    6473           0 :     if (N < 64)
    6474           0 :       wedge_sign = av1_wedge_sign_from_residuals_c(ds, mask, N, sign_limit);
    6475             :     else
    6476           0 :       wedge_sign = av1_wedge_sign_from_residuals(ds, mask, N, sign_limit);
    6477             : 
    6478           0 :     mask = av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
    6479           0 :     if (N < 64)
    6480           0 :       sse = av1_wedge_sse_from_residuals_c(r1, d10, mask, N);
    6481             :     else
    6482           0 :       sse = av1_wedge_sse_from_residuals(r1, d10, mask, N);
    6483           0 :     sse = ROUND_POWER_OF_TWO(sse, bd_round);
    6484             : 
    6485           0 :     model_rd_from_sse(cpi, xd, bsize, 0, sse, &rate, &dist);
    6486           0 :     rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
    6487             : 
    6488           0 :     if (rd < best_rd) {
    6489           0 :       *best_wedge_index = wedge_index;
    6490           0 :       *best_wedge_sign = wedge_sign;
    6491           0 :       best_rd = rd;
    6492             :     }
    6493             :   }
    6494             : 
    6495           0 :   return best_rd;
    6496             : }
    6497             : 
    6498             : // Choose the best wedge index the specified sign
    6499           0 : static int64_t pick_wedge_fixed_sign(
    6500             :     const AV1_COMP *const cpi, const MACROBLOCK *const x,
    6501             :     const BLOCK_SIZE bsize, const uint8_t *const p0, const uint8_t *const p1,
    6502             :     const int wedge_sign, int *const best_wedge_index) {
    6503           0 :   const MACROBLOCKD *const xd = &x->e_mbd;
    6504           0 :   const struct buf_2d *const src = &x->plane[0].src;
    6505           0 :   const int bw = block_size_wide[bsize];
    6506           0 :   const int bh = block_size_high[bsize];
    6507           0 :   const int N = bw * bh;
    6508             :   int rate;
    6509             :   int64_t dist;
    6510           0 :   int64_t rd, best_rd = INT64_MAX;
    6511             :   int wedge_index;
    6512           0 :   int wedge_types = (1 << get_wedge_bits_lookup(bsize));
    6513             :   const uint8_t *mask;
    6514             :   uint64_t sse;
    6515             : #if CONFIG_HIGHBITDEPTH
    6516           0 :   const int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
    6517           0 :   const int bd_round = hbd ? (xd->bd - 8) * 2 : 0;
    6518             : #else
    6519             :   const int bd_round = 0;
    6520             : #endif  // CONFIG_HIGHBITDEPTH
    6521             : 
    6522             :   DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]);
    6523             :   DECLARE_ALIGNED(32, int16_t, d10[MAX_SB_SQUARE]);
    6524             : 
    6525             : #if CONFIG_HIGHBITDEPTH
    6526           0 :   if (hbd) {
    6527           0 :     aom_highbd_subtract_block(bh, bw, r1, bw, src->buf, src->stride,
    6528           0 :                               CONVERT_TO_BYTEPTR(p1), bw, xd->bd);
    6529           0 :     aom_highbd_subtract_block(bh, bw, d10, bw, CONVERT_TO_BYTEPTR(p1), bw,
    6530           0 :                               CONVERT_TO_BYTEPTR(p0), bw, xd->bd);
    6531             :   } else  // NOLINT
    6532             : #endif    // CONFIG_HIGHBITDEPTH
    6533             :   {
    6534           0 :     aom_subtract_block(bh, bw, r1, bw, src->buf, src->stride, p1, bw);
    6535           0 :     aom_subtract_block(bh, bw, d10, bw, p1, bw, p0, bw);
    6536             :   }
    6537             : 
    6538           0 :   for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
    6539           0 :     mask = av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
    6540           0 :     if (N < 64)
    6541           0 :       sse = av1_wedge_sse_from_residuals_c(r1, d10, mask, N);
    6542             :     else
    6543           0 :       sse = av1_wedge_sse_from_residuals(r1, d10, mask, N);
    6544           0 :     sse = ROUND_POWER_OF_TWO(sse, bd_round);
    6545             : 
    6546           0 :     model_rd_from_sse(cpi, xd, bsize, 0, sse, &rate, &dist);
    6547           0 :     rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
    6548             : 
    6549           0 :     if (rd < best_rd) {
    6550           0 :       *best_wedge_index = wedge_index;
    6551           0 :       best_rd = rd;
    6552             :     }
    6553             :   }
    6554             : 
    6555           0 :   return best_rd;
    6556             : }
    6557             : 
    6558           0 : static int64_t pick_interinter_wedge(const AV1_COMP *const cpi,
    6559             :                                      MACROBLOCK *const x,
    6560             :                                      const BLOCK_SIZE bsize,
    6561             :                                      const uint8_t *const p0,
    6562             :                                      const uint8_t *const p1) {
    6563           0 :   MACROBLOCKD *const xd = &x->e_mbd;
    6564           0 :   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
    6565           0 :   const int bw = block_size_wide[bsize];
    6566             : 
    6567             :   int64_t rd;
    6568           0 :   int wedge_index = -1;
    6569           0 :   int wedge_sign = 0;
    6570             : 
    6571           0 :   assert(is_interinter_compound_used(COMPOUND_WEDGE, bsize));
    6572           0 :   assert(cpi->common.allow_masked_compound);
    6573             : 
    6574           0 :   if (cpi->sf.fast_wedge_sign_estimate) {
    6575           0 :     wedge_sign = estimate_wedge_sign(cpi, x, bsize, p0, bw, p1, bw);
    6576           0 :     rd = pick_wedge_fixed_sign(cpi, x, bsize, p0, p1, wedge_sign, &wedge_index);
    6577             :   } else {
    6578           0 :     rd = pick_wedge(cpi, x, bsize, p0, p1, &wedge_sign, &wedge_index);
    6579             :   }
    6580             : 
    6581           0 :   mbmi->wedge_sign = wedge_sign;
    6582           0 :   mbmi->wedge_index = wedge_index;
    6583           0 :   return rd;
    6584             : }
    6585             : #endif  // CONFIG_WEDGE
    6586             : 
    6587             : #if CONFIG_COMPOUND_SEGMENT
    6588           0 : static int64_t pick_interinter_seg(const AV1_COMP *const cpi,
    6589             :                                    MACROBLOCK *const x, const BLOCK_SIZE bsize,
    6590             :                                    const uint8_t *const p0,
    6591             :                                    const uint8_t *const p1) {
    6592           0 :   MACROBLOCKD *const xd = &x->e_mbd;
    6593           0 :   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
    6594           0 :   const struct buf_2d *const src = &x->plane[0].src;
    6595           0 :   const int bw = block_size_wide[bsize];
    6596           0 :   const int bh = block_size_high[bsize];
    6597           0 :   const int N = bw * bh;
    6598             :   int rate;
    6599             :   uint64_t sse;
    6600             :   int64_t dist;
    6601             :   int64_t rd0;
    6602             :   SEG_MASK_TYPE cur_mask_type;
    6603           0 :   int64_t best_rd = INT64_MAX;
    6604           0 :   SEG_MASK_TYPE best_mask_type = 0;
    6605             : #if CONFIG_HIGHBITDEPTH
    6606           0 :   const int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
    6607           0 :   const int bd_round = hbd ? (xd->bd - 8) * 2 : 0;
    6608             : #else
    6609             :   const int bd_round = 0;
    6610             : #endif  // CONFIG_HIGHBITDEPTH
    6611             :   DECLARE_ALIGNED(32, int16_t, r0[MAX_SB_SQUARE]);
    6612             :   DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]);
    6613             :   DECLARE_ALIGNED(32, int16_t, d10[MAX_SB_SQUARE]);
    6614             : 
    6615             : #if CONFIG_HIGHBITDEPTH
    6616           0 :   if (hbd) {
    6617           0 :     aom_highbd_subtract_block(bh, bw, r0, bw, src->buf, src->stride,
    6618           0 :                               CONVERT_TO_BYTEPTR(p0), bw, xd->bd);
    6619           0 :     aom_highbd_subtract_block(bh, bw, r1, bw, src->buf, src->stride,
    6620           0 :                               CONVERT_TO_BYTEPTR(p1), bw, xd->bd);
    6621           0 :     aom_highbd_subtract_block(bh, bw, d10, bw, CONVERT_TO_BYTEPTR(p1), bw,
    6622           0 :                               CONVERT_TO_BYTEPTR(p0), bw, xd->bd);
    6623             :   } else  // NOLINT
    6624             : #endif    // CONFIG_HIGHBITDEPTH
    6625             :   {
    6626           0 :     aom_subtract_block(bh, bw, r0, bw, src->buf, src->stride, p0, bw);
    6627           0 :     aom_subtract_block(bh, bw, r1, bw, src->buf, src->stride, p1, bw);
    6628           0 :     aom_subtract_block(bh, bw, d10, bw, p1, bw, p0, bw);
    6629             :   }
    6630             : 
    6631             :   // try each mask type and its inverse
    6632           0 :   for (cur_mask_type = 0; cur_mask_type < SEG_MASK_TYPES; cur_mask_type++) {
    6633             : // build mask and inverse
    6634             : #if CONFIG_HIGHBITDEPTH
    6635           0 :     if (hbd)
    6636           0 :       build_compound_seg_mask_highbd(
    6637           0 :           xd->seg_mask, cur_mask_type, CONVERT_TO_BYTEPTR(p0), bw,
    6638           0 :           CONVERT_TO_BYTEPTR(p1), bw, bsize, bh, bw, xd->bd);
    6639             :     else
    6640             : #endif  // CONFIG_HIGHBITDEPTH
    6641           0 :       build_compound_seg_mask(xd->seg_mask, cur_mask_type, p0, bw, p1, bw,
    6642             :                               bsize, bh, bw);
    6643             : 
    6644             :     // compute rd for mask
    6645           0 :     sse = av1_wedge_sse_from_residuals(r1, d10, xd->seg_mask, N);
    6646           0 :     sse = ROUND_POWER_OF_TWO(sse, bd_round);
    6647             : 
    6648           0 :     model_rd_from_sse(cpi, xd, bsize, 0, sse, &rate, &dist);
    6649           0 :     rd0 = RDCOST(x->rdmult, x->rddiv, rate, dist);
    6650             : 
    6651           0 :     if (rd0 < best_rd) {
    6652           0 :       best_mask_type = cur_mask_type;
    6653           0 :       best_rd = rd0;
    6654             :     }
    6655             :   }
    6656             : 
    6657             :   // make final mask
    6658           0 :   mbmi->mask_type = best_mask_type;
    6659             : #if CONFIG_HIGHBITDEPTH
    6660           0 :   if (hbd)
    6661           0 :     build_compound_seg_mask_highbd(
    6662           0 :         xd->seg_mask, mbmi->mask_type, CONVERT_TO_BYTEPTR(p0), bw,
    6663           0 :         CONVERT_TO_BYTEPTR(p1), bw, bsize, bh, bw, xd->bd);
    6664             :   else
    6665             : #endif  // CONFIG_HIGHBITDEPTH
    6666           0 :     build_compound_seg_mask(xd->seg_mask, mbmi->mask_type, p0, bw, p1, bw,
    6667             :                             bsize, bh, bw);
    6668             : 
    6669           0 :   return best_rd;
    6670             : }
    6671             : #endif  // CONFIG_COMPOUND_SEGMENT
    6672             : 
    6673             : #if CONFIG_WEDGE && CONFIG_INTERINTRA
    6674           0 : static int64_t pick_interintra_wedge(const AV1_COMP *const cpi,
    6675             :                                      const MACROBLOCK *const x,
    6676             :                                      const BLOCK_SIZE bsize,
    6677             :                                      const uint8_t *const p0,
    6678             :                                      const uint8_t *const p1) {
    6679           0 :   const MACROBLOCKD *const xd = &x->e_mbd;
    6680           0 :   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
    6681             : 
    6682             :   int64_t rd;
    6683           0 :   int wedge_index = -1;
    6684             : 
    6685           0 :   assert(is_interintra_wedge_used(bsize));
    6686           0 :   assert(cpi->common.allow_interintra_compound);
    6687             : 
    6688           0 :   rd = pick_wedge_fixed_sign(cpi, x, bsize, p0, p1, 0, &wedge_index);
    6689             : 
    6690           0 :   mbmi->interintra_wedge_sign = 0;
    6691           0 :   mbmi->interintra_wedge_index = wedge_index;
    6692           0 :   return rd;
    6693             : }
    6694             : #endif  // CONFIG_WEDGE && CONFIG_INTERINTRA
    6695             : 
    6696             : #if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
    6697           0 : static int64_t pick_interinter_mask(const AV1_COMP *const cpi, MACROBLOCK *x,
    6698             :                                     const BLOCK_SIZE bsize,
    6699             :                                     const uint8_t *const p0,
    6700             :                                     const uint8_t *const p1) {
    6701           0 :   const COMPOUND_TYPE compound_type =
    6702           0 :       x->e_mbd.mi[0]->mbmi.interinter_compound_type;
    6703           0 :   switch (compound_type) {
    6704             : #if CONFIG_WEDGE
    6705           0 :     case COMPOUND_WEDGE: return pick_interinter_wedge(cpi, x, bsize, p0, p1);
    6706             : #endif  // CONFIG_WEDGE
    6707             : #if CONFIG_COMPOUND_SEGMENT
    6708           0 :     case COMPOUND_SEG: return pick_interinter_seg(cpi, x, bsize, p0, p1);
    6709             : #endif  // CONFIG_COMPOUND_SEGMENT
    6710           0 :     default: assert(0); return 0;
    6711             :   }
    6712             : }
    6713             : 
    6714           0 : static int interinter_compound_motion_search(
    6715             :     const AV1_COMP *const cpi, MACROBLOCK *x, const int_mv *const cur_mv,
    6716             :     const BLOCK_SIZE bsize, const int this_mode, int mi_row, int mi_col) {
    6717           0 :   MACROBLOCKD *const xd = &x->e_mbd;
    6718           0 :   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
    6719             :   int_mv tmp_mv[2];
    6720           0 :   int tmp_rate_mv = 0;
    6721           0 :   const INTERINTER_COMPOUND_DATA compound_data = {
    6722             : #if CONFIG_WEDGE
    6723           0 :     mbmi->wedge_index,
    6724           0 :     mbmi->wedge_sign,
    6725             : #endif  // CONFIG_WEDGE
    6726             : #if CONFIG_COMPOUND_SEGMENT
    6727           0 :     mbmi->mask_type,
    6728           0 :     xd->seg_mask,
    6729             : #endif  // CONFIG_COMPOUND_SEGMENT
    6730           0 :     mbmi->interinter_compound_type
    6731             :   };
    6732           0 :   if (this_mode == NEW_NEWMV) {
    6733           0 :     do_masked_motion_search_indexed(cpi, x, cur_mv, &compound_data, bsize,
    6734             :                                     mi_row, mi_col, tmp_mv, &tmp_rate_mv, 2);
    6735           0 :     mbmi->mv[0].as_int = tmp_mv[0].as_int;
    6736           0 :     mbmi->mv[1].as_int = tmp_mv[1].as_int;
    6737           0 :   } else if (this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV) {
    6738           0 :     do_masked_motion_search_indexed(cpi, x, cur_mv, &compound_data, bsize,
    6739             :                                     mi_row, mi_col, tmp_mv, &tmp_rate_mv, 0);
    6740           0 :     mbmi->mv[0].as_int = tmp_mv[0].as_int;
    6741           0 :   } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
    6742           0 :     do_masked_motion_search_indexed(cpi, x, cur_mv, &compound_data, bsize,
    6743             :                                     mi_row, mi_col, tmp_mv, &tmp_rate_mv, 1);
    6744           0 :     mbmi->mv[1].as_int = tmp_mv[1].as_int;
    6745             :   }
    6746           0 :   return tmp_rate_mv;
    6747             : }
    6748             : 
    6749           0 : static int64_t build_and_cost_compound_type(
    6750             :     const AV1_COMP *const cpi, MACROBLOCK *x, const int_mv *const cur_mv,
    6751             :     const BLOCK_SIZE bsize, const int this_mode, int rs2, int rate_mv,
    6752             :     BUFFER_SET *ctx, int *out_rate_mv, uint8_t **preds0, uint8_t **preds1,
    6753             :     int *strides, int mi_row, int mi_col) {
    6754           0 :   const AV1_COMMON *const cm = &cpi->common;
    6755           0 :   MACROBLOCKD *xd = &x->e_mbd;
    6756           0 :   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
    6757             :   int rate_sum;
    6758             :   int64_t dist_sum;
    6759           0 :   int64_t best_rd_cur = INT64_MAX;
    6760           0 :   int64_t rd = INT64_MAX;
    6761             :   int tmp_skip_txfm_sb;
    6762             :   int64_t tmp_skip_sse_sb;
    6763           0 :   const COMPOUND_TYPE compound_type = mbmi->interinter_compound_type;
    6764             : 
    6765           0 :   best_rd_cur = pick_interinter_mask(cpi, x, bsize, *preds0, *preds1);
    6766           0 :   best_rd_cur += RDCOST(x->rdmult, x->rddiv, rs2 + rate_mv, 0);
    6767             : 
    6768           0 :   if (have_newmv_in_inter_mode(this_mode) &&
    6769           0 :       use_masked_motion_search(compound_type)) {
    6770           0 :     *out_rate_mv = interinter_compound_motion_search(cpi, x, cur_mv, bsize,
    6771             :                                                      this_mode, mi_row, mi_col);
    6772           0 :     av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, ctx, bsize);
    6773           0 :     model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
    6774             :                     &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
    6775           0 :     rd = RDCOST(x->rdmult, x->rddiv, rs2 + *out_rate_mv + rate_sum, dist_sum);
    6776           0 :     if (rd >= best_rd_cur) {
    6777           0 :       mbmi->mv[0].as_int = cur_mv[0].as_int;
    6778           0 :       mbmi->mv[1].as_int = cur_mv[1].as_int;
    6779           0 :       *out_rate_mv = rate_mv;
    6780           0 :       av1_build_wedge_inter_predictor_from_buf(xd, bsize, 0, 0,
    6781             : #if CONFIG_SUPERTX
    6782             :                                                0, 0,
    6783             : #endif  // CONFIG_SUPERTX
    6784             :                                                preds0, strides, preds1,
    6785             :                                                strides);
    6786             :     }
    6787           0 :     av1_subtract_plane(x, bsize, 0);
    6788           0 :     rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
    6789             :                              &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
    6790           0 :     if (rd != INT64_MAX)
    6791           0 :       rd = RDCOST(x->rdmult, x->rddiv, rs2 + *out_rate_mv + rate_sum, dist_sum);
    6792           0 :     best_rd_cur = rd;
    6793             : 
    6794             :   } else {
    6795           0 :     av1_build_wedge_inter_predictor_from_buf(xd, bsize, 0, 0,
    6796             : #if CONFIG_SUPERTX
    6797             :                                              0, 0,
    6798             : #endif  // CONFIG_SUPERTX
    6799             :                                              preds0, strides, preds1, strides);
    6800           0 :     av1_subtract_plane(x, bsize, 0);
    6801           0 :     rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
    6802             :                              &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
    6803           0 :     if (rd != INT64_MAX)
    6804           0 :       rd = RDCOST(x->rdmult, x->rddiv, rs2 + rate_mv + rate_sum, dist_sum);
    6805           0 :     best_rd_cur = rd;
    6806             :   }
    6807           0 :   return best_rd_cur;
    6808             : }
    6809             : #endif  // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
    6810             : #endif  // CONFIG_EXT_INTER
    6811             : 
    6812             : typedef struct {
    6813             : #if CONFIG_MOTION_VAR
    6814             :   // Inter prediction buffers and respective strides
    6815             :   uint8_t *above_pred_buf[MAX_MB_PLANE];
    6816             :   int above_pred_stride[MAX_MB_PLANE];
    6817             :   uint8_t *left_pred_buf[MAX_MB_PLANE];
    6818             :   int left_pred_stride[MAX_MB_PLANE];
    6819             : #endif  // CONFIG_MOTION_VAR
    6820             :   int_mv *single_newmv;
    6821             : #if CONFIG_EXT_INTER
    6822             :   // Pointer to array of motion vectors to use for each ref and their rates
    6823             :   // Should point to first of 2 arrays in 2D array
    6824             :   int *single_newmv_rate;
    6825             :   // Pointer to array of predicted rate-distortion
    6826             :   // Should point to first of 2 arrays in 2D array
    6827             :   int64_t (*modelled_rd)[TOTAL_REFS_PER_FRAME];
    6828             : #endif  // CONFIG_EXT_INTER
    6829             :   InterpFilter single_filter[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
    6830             : } HandleInterModeArgs;
    6831             : 
    6832           0 : static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
    6833             :                             const BLOCK_SIZE bsize,
    6834             :                             int_mv (*const mode_mv)[TOTAL_REFS_PER_FRAME],
    6835             :                             const int mi_row, const int mi_col,
    6836             :                             int *const rate_mv, int_mv *const single_newmv,
    6837             :                             HandleInterModeArgs *const args) {
    6838           0 :   const MACROBLOCKD *const xd = &x->e_mbd;
    6839           0 :   const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
    6840           0 :   const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
    6841           0 :   const int is_comp_pred = has_second_ref(mbmi);
    6842           0 :   const PREDICTION_MODE this_mode = mbmi->mode;
    6843             : #if CONFIG_EXT_INTER
    6844           0 :   const int is_comp_interintra_pred = (mbmi->ref_frame[1] == INTRA_FRAME);
    6845             : #endif  // CONFIG_EXT_INTER
    6846           0 :   int_mv *const frame_mv = mode_mv[this_mode];
    6847           0 :   const int refs[2] = { mbmi->ref_frame[0],
    6848           0 :                         mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
    6849             :   int i;
    6850             : 
    6851             :   (void)args;
    6852             : 
    6853           0 :   if (is_comp_pred) {
    6854             : #if CONFIG_EXT_INTER
    6855           0 :     for (i = 0; i < 2; ++i) {
    6856           0 :       single_newmv[refs[i]].as_int = args->single_newmv[refs[i]].as_int;
    6857             :     }
    6858             : 
    6859           0 :     if (this_mode == NEW_NEWMV) {
    6860           0 :       frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
    6861           0 :       frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
    6862             : 
    6863           0 :       if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
    6864           0 :         joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, NULL,
    6865             :                             0, rate_mv, 0);
    6866             :       } else {
    6867           0 :         *rate_mv = 0;
    6868           0 :         for (i = 0; i < 2; ++i) {
    6869           0 :           av1_set_mvcost(x, refs[i], i, mbmi->ref_mv_idx);
    6870           0 :           *rate_mv += av1_mv_bit_cost(
    6871           0 :               &frame_mv[refs[i]].as_mv, &mbmi_ext->ref_mvs[refs[i]][0].as_mv,
    6872           0 :               x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
    6873             :         }
    6874             :       }
    6875           0 :     } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
    6876           0 :       frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
    6877           0 :       if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
    6878           0 :         frame_mv[refs[0]].as_int =
    6879           0 :             mode_mv[compound_ref0_mode(this_mode)][refs[0]].as_int;
    6880           0 :         compound_single_motion_search_interinter(
    6881             :             cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, 0, rate_mv, 0, 1);
    6882             :       } else {
    6883           0 :         av1_set_mvcost(x, refs[1], 1, mbmi->ref_mv_idx);
    6884           0 :         *rate_mv = av1_mv_bit_cost(&frame_mv[refs[1]].as_mv,
    6885           0 :                                    &mbmi_ext->ref_mvs[refs[1]][0].as_mv,
    6886           0 :                                    x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
    6887             :       }
    6888             :     } else {
    6889           0 :       assert(this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV);
    6890           0 :       frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
    6891           0 :       if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
    6892           0 :         frame_mv[refs[1]].as_int =
    6893           0 :             mode_mv[compound_ref1_mode(this_mode)][refs[1]].as_int;
    6894           0 :         compound_single_motion_search_interinter(
    6895             :             cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, 0, rate_mv, 0, 0);
    6896             :       } else {
    6897           0 :         av1_set_mvcost(x, refs[0], 0, mbmi->ref_mv_idx);
    6898           0 :         *rate_mv = av1_mv_bit_cost(&frame_mv[refs[0]].as_mv,
    6899           0 :                                    &mbmi_ext->ref_mvs[refs[0]][0].as_mv,
    6900           0 :                                    x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
    6901             :       }
    6902             :     }
    6903             : #else
    6904             :     // Initialize mv using single prediction mode result.
    6905             :     frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
    6906             :     frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
    6907             : 
    6908             :     if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
    6909             :       joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col, rate_mv, 0);
    6910             :     } else {
    6911             :       *rate_mv = 0;
    6912             :       for (i = 0; i < 2; ++i) {
    6913             :         av1_set_mvcost(x, refs[i], i, mbmi->ref_mv_idx);
    6914             :         *rate_mv += av1_mv_bit_cost(&frame_mv[refs[i]].as_mv,
    6915             :                                     &mbmi_ext->ref_mvs[refs[i]][0].as_mv,
    6916             :                                     x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
    6917             :       }
    6918             :     }
    6919             : #endif  // CONFIG_EXT_INTER
    6920             :   } else {
    6921             : #if CONFIG_EXT_INTER
    6922           0 :     if (is_comp_interintra_pred) {
    6923           0 :       x->best_mv = args->single_newmv[refs[0]];
    6924           0 :       *rate_mv = args->single_newmv_rate[refs[0]];
    6925             :     } else {
    6926           0 :       single_motion_search(cpi, x, bsize, mi_row, mi_col, 0, rate_mv);
    6927           0 :       args->single_newmv[refs[0]] = x->best_mv;
    6928           0 :       args->single_newmv_rate[refs[0]] = *rate_mv;
    6929             :     }
    6930             : #else
    6931             :     single_motion_search(cpi, x, bsize, mi_row, mi_col, rate_mv);
    6932             :     single_newmv[refs[0]] = x->best_mv;
    6933             : #endif  // CONFIG_EXT_INTER
    6934             : 
    6935           0 :     if (x->best_mv.as_int == INVALID_MV) return INT64_MAX;
    6936             : 
    6937           0 :     frame_mv[refs[0]] = x->best_mv;
    6938           0 :     xd->mi[0]->bmi[0].as_mv[0] = x->best_mv;
    6939             : 
    6940             :     // Estimate the rate implications of a new mv but discount this
    6941             :     // under certain circumstances where we want to help initiate a weak
    6942             :     // motion field, where the distortion gain for a single block may not
    6943             :     // be enough to overcome the cost of a new mv.
    6944           0 :     if (discount_newmv_test(cpi, this_mode, x->best_mv, mode_mv, refs[0])) {
    6945           0 :       *rate_mv = AOMMAX(*rate_mv / NEW_MV_DISCOUNT_FACTOR, 1);
    6946             :     }
    6947             :   }
    6948             : 
    6949           0 :   return 0;
    6950             : }
    6951             : 
    6952           0 : int64_t interpolation_filter_search(
    6953             :     MACROBLOCK *const x, const AV1_COMP *const cpi, BLOCK_SIZE bsize,
    6954             :     int mi_row, int mi_col, const BUFFER_SET *const tmp_dst,
    6955             :     BUFFER_SET *const orig_dst,
    6956             :     InterpFilter (*const single_filter)[TOTAL_REFS_PER_FRAME],
    6957             :     int64_t *const rd, int *const switchable_rate, int *const skip_txfm_sb,
    6958             :     int64_t *const skip_sse_sb) {
    6959           0 :   const AV1_COMMON *cm = &cpi->common;
    6960           0 :   MACROBLOCKD *const xd = &x->e_mbd;
    6961           0 :   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
    6962             :   int i;
    6963             :   int tmp_rate;
    6964             :   int64_t tmp_dist;
    6965             : 
    6966             :   (void)single_filter;
    6967             : 
    6968           0 :   InterpFilter assign_filter = SWITCHABLE;
    6969             : 
    6970           0 :   if (cm->interp_filter == SWITCHABLE) {
    6971             : #if !CONFIG_DUAL_FILTER
    6972             :     assign_filter = av1_is_interp_needed(xd)
    6973             :                         ? predict_interp_filter(cpi, x, bsize, mi_row, mi_col,
    6974             :                                                 single_filter)
    6975             :                         : cm->interp_filter;
    6976             : #endif  // !CONFIG_DUAL_FILTER
    6977             :   } else {
    6978           0 :     assign_filter = cm->interp_filter;
    6979             :   }
    6980             : 
    6981           0 :   set_default_interp_filters(mbmi, assign_filter);
    6982             : 
    6983           0 :   *switchable_rate = av1_get_switchable_rate(cpi, xd);
    6984           0 :   av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
    6985           0 :   model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate, &tmp_dist,
    6986             :                   skip_txfm_sb, skip_sse_sb);
    6987           0 :   *rd = RDCOST(x->rdmult, x->rddiv, *switchable_rate + tmp_rate, tmp_dist);
    6988             : 
    6989           0 :   if (assign_filter == SWITCHABLE) {
    6990             :     // do interp_filter search
    6991           0 :     if (av1_is_interp_needed(xd) && av1_is_interp_search_needed(xd)) {
    6992             : #if CONFIG_DUAL_FILTER
    6993           0 :       const int filter_set_size = DUAL_FILTER_SET_SIZE;
    6994             : #else
    6995             :       const int filter_set_size = SWITCHABLE_FILTERS;
    6996             : #endif  // CONFIG_DUAL_FILTER
    6997           0 :       int best_in_temp = 0;
    6998             : #if CONFIG_DUAL_FILTER
    6999             :       InterpFilter best_filter[4];
    7000           0 :       av1_copy(best_filter, mbmi->interp_filter);
    7001             : #else
    7002             :       InterpFilter best_filter = mbmi->interp_filter;
    7003             : #endif  // CONFIG_DUAL_FILTER
    7004           0 :       restore_dst_buf(xd, *tmp_dst);
    7005             :       // EIGHTTAP_REGULAR mode is calculated beforehand
    7006           0 :       for (i = 1; i < filter_set_size; ++i) {
    7007           0 :         int tmp_skip_sb = 0;
    7008           0 :         int64_t tmp_skip_sse = INT64_MAX;
    7009             :         int tmp_rs;
    7010             :         int64_t tmp_rd;
    7011             : #if CONFIG_DUAL_FILTER
    7012           0 :         mbmi->interp_filter[0] = filter_sets[i][0];
    7013           0 :         mbmi->interp_filter[1] = filter_sets[i][1];
    7014           0 :         mbmi->interp_filter[2] = filter_sets[i][0];
    7015           0 :         mbmi->interp_filter[3] = filter_sets[i][1];
    7016             : #else
    7017             :         mbmi->interp_filter = (InterpFilter)i;
    7018             : #endif  // CONFIG_DUAL_FILTER
    7019           0 :         tmp_rs = av1_get_switchable_rate(cpi, xd);
    7020           0 :         av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
    7021           0 :         model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
    7022             :                         &tmp_dist, &tmp_skip_sb, &tmp_skip_sse);
    7023           0 :         tmp_rd = RDCOST(x->rdmult, x->rddiv, tmp_rs + tmp_rate, tmp_dist);
    7024             : 
    7025           0 :         if (tmp_rd < *rd) {
    7026           0 :           *rd = tmp_rd;
    7027           0 :           *switchable_rate = av1_get_switchable_rate(cpi, xd);
    7028             : #if CONFIG_DUAL_FILTER
    7029           0 :           av1_copy(best_filter, mbmi->interp_filter);
    7030             : #else
    7031             :           best_filter = mbmi->interp_filter;
    7032             : #endif  // CONFIG_DUAL_FILTER
    7033           0 :           *skip_txfm_sb = tmp_skip_sb;
    7034           0 :           *skip_sse_sb = tmp_skip_sse;
    7035           0 :           best_in_temp = !best_in_temp;
    7036           0 :           if (best_in_temp) {
    7037           0 :             restore_dst_buf(xd, *orig_dst);
    7038             :           } else {
    7039           0 :             restore_dst_buf(xd, *tmp_dst);
    7040             :           }
    7041             :         }
    7042             :       }
    7043           0 :       if (best_in_temp) {
    7044           0 :         restore_dst_buf(xd, *tmp_dst);
    7045             :       } else {
    7046           0 :         restore_dst_buf(xd, *orig_dst);
    7047             :       }
    7048             : #if CONFIG_DUAL_FILTER
    7049           0 :       av1_copy(mbmi->interp_filter, best_filter);
    7050             : #else
    7051             :       mbmi->interp_filter = best_filter;
    7052             : #endif  // CONFIG_DUAL_FILTER
    7053             :     } else {
    7054             : #if CONFIG_DUAL_FILTER
    7055           0 :       for (i = 0; i < 4; ++i)
    7056           0 :         assert(mbmi->interp_filter[i] == EIGHTTAP_REGULAR);
    7057             : #else
    7058             :       assert(mbmi->interp_filter == EIGHTTAP_REGULAR);
    7059             : #endif  // CONFIG_DUAL_FILTER
    7060             :     }
    7061             :   }
    7062             : 
    7063           0 :   return 0;
    7064             : }
    7065             : 
    7066             : // TODO(afergs): Refactor the MBMI references in here - there's four
    7067             : // TODO(afergs): Refactor optional args - add them to a struct or remove
    7068           0 : static int64_t motion_mode_rd(
    7069             :     const AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize,
    7070             :     RD_STATS *rd_stats, RD_STATS *rd_stats_y, RD_STATS *rd_stats_uv,
    7071             :     int *disable_skip, int_mv (*mode_mv)[TOTAL_REFS_PER_FRAME], int mi_row,
    7072             :     int mi_col, HandleInterModeArgs *const args, const int64_t ref_best_rd,
    7073             :     const int *refs, int rate_mv,
    7074             : #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
    7075             :     int_mv *const single_newmv,
    7076             : #if CONFIG_EXT_INTER
    7077             :     int rate2_bmc_nocoeff, MB_MODE_INFO *best_bmc_mbmi,
    7078             : #if CONFIG_MOTION_VAR
    7079             :     int rate_mv_bmc,
    7080             : #endif  // CONFIG_MOTION_VAR
    7081             : #endif  // CONFIG_EXT_INTER
    7082             : #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
    7083             :     int rs, int *skip_txfm_sb, int64_t *skip_sse_sb, BUFFER_SET *orig_dst) {
    7084           0 :   const AV1_COMMON *const cm = &cpi->common;
    7085           0 :   MACROBLOCKD *xd = &x->e_mbd;
    7086           0 :   MODE_INFO *mi = xd->mi[0];
    7087           0 :   MB_MODE_INFO *mbmi = &mi->mbmi;
    7088           0 :   const int is_comp_pred = has_second_ref(mbmi);
    7089           0 :   const PREDICTION_MODE this_mode = mbmi->mode;
    7090             : 
    7091             :   (void)mode_mv;
    7092             :   (void)mi_row;
    7093             :   (void)mi_col;
    7094             :   (void)args;
    7095             :   (void)refs;
    7096             :   (void)rate_mv;
    7097             :   (void)is_comp_pred;
    7098             :   (void)this_mode;
    7099             : 
    7100             : #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
    7101             :   MOTION_MODE motion_mode, last_motion_mode_allowed;
    7102           0 :   int rate2_nocoeff = 0, best_xskip, best_disable_skip = 0;
    7103             :   RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
    7104             :   MB_MODE_INFO base_mbmi, best_mbmi;
    7105             : #if CONFIG_VAR_TX
    7106             :   uint8_t best_blk_skip[MAX_MB_PLANE][MAX_MIB_SIZE * MAX_MIB_SIZE * 4];
    7107             : #endif  // CONFIG_VAR_TX
    7108             : #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
    7109             : 
    7110             : #if CONFIG_WARPED_MOTION
    7111             :   int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
    7112             : #endif  // CONFIG_WARPED_MOTION
    7113             : 
    7114             : #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
    7115           0 :   av1_invalid_rd_stats(&best_rd_stats);
    7116             : #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
    7117             : 
    7118           0 :   if (cm->interp_filter == SWITCHABLE) rd_stats->rate += rs;
    7119             : #if CONFIG_WARPED_MOTION
    7120           0 :   aom_clear_system_state();
    7121           0 :   mbmi->num_proj_ref[0] = findSamples(cm, xd, mi_row, mi_col, pts, pts_inref);
    7122             : #if CONFIG_EXT_INTER
    7123           0 :   best_bmc_mbmi->num_proj_ref[0] = mbmi->num_proj_ref[0];
    7124             : #endif  // CONFIG_EXT_INTER
    7125             : #endif  // CONFIG_WARPED_MOTION
    7126             : #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
    7127           0 :   rate2_nocoeff = rd_stats->rate;
    7128           0 :   last_motion_mode_allowed = motion_mode_allowed(
    7129             : #if CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION
    7130           0 :       0, xd->global_motion,
    7131             : #endif  // CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION
    7132             :       mi);
    7133           0 :   base_mbmi = *mbmi;
    7134             : #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
    7135             : 
    7136             : #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
    7137           0 :   int64_t best_rd = INT64_MAX;
    7138           0 :   for (motion_mode = SIMPLE_TRANSLATION;
    7139           0 :        motion_mode <= last_motion_mode_allowed; motion_mode++) {
    7140           0 :     int64_t tmp_rd = INT64_MAX;
    7141             :     int tmp_rate;
    7142             :     int64_t tmp_dist;
    7143             : #if CONFIG_EXT_INTER
    7144           0 :     int tmp_rate2 =
    7145           0 :         motion_mode != SIMPLE_TRANSLATION ? rate2_bmc_nocoeff : rate2_nocoeff;
    7146             : #else
    7147             :     int tmp_rate2 = rate2_nocoeff;
    7148             : #endif  // CONFIG_EXT_INTER
    7149             : 
    7150           0 :     *mbmi = base_mbmi;
    7151           0 :     mbmi->motion_mode = motion_mode;
    7152             : #if CONFIG_MOTION_VAR
    7153           0 :     if (mbmi->motion_mode == OBMC_CAUSAL) {
    7154             : #if CONFIG_EXT_INTER
    7155           0 :       *mbmi = *best_bmc_mbmi;
    7156           0 :       mbmi->motion_mode = OBMC_CAUSAL;
    7157             : #endif  // CONFIG_EXT_INTER
    7158           0 :       if (!is_comp_pred && have_newmv_in_inter_mode(this_mode)) {
    7159           0 :         int tmp_rate_mv = 0;
    7160             : 
    7161           0 :         single_motion_search(cpi, x, bsize, mi_row, mi_col,
    7162             : #if CONFIG_EXT_INTER
    7163             :                              0,
    7164             : #endif  // CONFIG_EXT_INTER
    7165             :                              &tmp_rate_mv);
    7166           0 :         mbmi->mv[0].as_int = x->best_mv.as_int;
    7167           0 :         if (discount_newmv_test(cpi, this_mode, mbmi->mv[0], mode_mv,
    7168             :                                 refs[0])) {
    7169           0 :           tmp_rate_mv = AOMMAX((tmp_rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
    7170             :         }
    7171             : #if CONFIG_EXT_INTER
    7172           0 :         tmp_rate2 = rate2_bmc_nocoeff - rate_mv_bmc + tmp_rate_mv;
    7173             : #else
    7174             :         tmp_rate2 = rate2_nocoeff - rate_mv + tmp_rate_mv;
    7175             : #endif  // CONFIG_EXT_INTER
    7176             : #if CONFIG_DUAL_FILTER
    7177           0 :         if (!has_subpel_mv_component(xd->mi[0], xd, 0))
    7178           0 :           mbmi->interp_filter[0] = EIGHTTAP_REGULAR;
    7179           0 :         if (!has_subpel_mv_component(xd->mi[0], xd, 1))
    7180           0 :           mbmi->interp_filter[1] = EIGHTTAP_REGULAR;
    7181             : #endif  // CONFIG_DUAL_FILTER
    7182           0 :         av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
    7183             : #if CONFIG_EXT_INTER
    7184             :       } else {
    7185           0 :         av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
    7186             : #endif  // CONFIG_EXT_INTER
    7187             :       }
    7188           0 :       av1_build_obmc_inter_prediction(
    7189           0 :           cm, xd, mi_row, mi_col, args->above_pred_buf, args->above_pred_stride,
    7190           0 :           args->left_pred_buf, args->left_pred_stride);
    7191           0 :       model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
    7192             :                       &tmp_dist, skip_txfm_sb, skip_sse_sb);
    7193             :     }
    7194             : #endif  // CONFIG_MOTION_VAR
    7195             : 
    7196             : #if CONFIG_WARPED_MOTION
    7197           0 :     if (mbmi->motion_mode == WARPED_CAUSAL) {
    7198             : #if CONFIG_EXT_INTER
    7199           0 :       *mbmi = *best_bmc_mbmi;
    7200           0 :       mbmi->motion_mode = WARPED_CAUSAL;
    7201             : #endif  // CONFIG_EXT_INTER
    7202           0 :       mbmi->wm_params[0].wmtype = DEFAULT_WMTYPE;
    7203             : #if CONFIG_DUAL_FILTER
    7204           0 :       for (int dir = 0; dir < 4; ++dir)
    7205           0 :         mbmi->interp_filter[dir] = cm->interp_filter == SWITCHABLE
    7206             :                                        ? EIGHTTAP_REGULAR
    7207           0 :                                        : cm->interp_filter;
    7208             : #else
    7209             :       mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP_REGULAR
    7210             :                                                             : cm->interp_filter;
    7211             : #endif  // CONFIG_DUAL_FILTER
    7212             : 
    7213           0 :       if (!find_projection(mbmi->num_proj_ref[0], pts, pts_inref, bsize,
    7214           0 :                            mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col,
    7215             :                            &mbmi->wm_params[0], mi_row, mi_col)) {
    7216             :         // Refine MV for NEWMV mode
    7217           0 :         if (!is_comp_pred && have_newmv_in_inter_mode(this_mode)) {
    7218           0 :           int tmp_rate_mv = 0;
    7219           0 :           const int_mv mv0 = mbmi->mv[0];
    7220           0 :           WarpedMotionParams wm_params0 = mbmi->wm_params[0];
    7221             : 
    7222             :           // Refine MV in a small range.
    7223           0 :           av1_refine_warped_mv(cpi, x, bsize, mi_row, mi_col, pts, pts_inref);
    7224             : 
    7225             :           // Keep the refined MV and WM parameters.
    7226           0 :           if (mv0.as_int != mbmi->mv[0].as_int) {
    7227           0 :             const int ref = refs[0];
    7228           0 :             const MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
    7229             : 
    7230           0 :             tmp_rate_mv =
    7231           0 :                 av1_mv_bit_cost(&mbmi->mv[0].as_mv, &ref_mv, x->nmvjointcost,
    7232             :                                 x->mvcost, MV_COST_WEIGHT);
    7233             : 
    7234           0 :             if (cpi->sf.adaptive_motion_search)
    7235           0 :               x->pred_mv[ref] = mbmi->mv[0].as_mv;
    7236             : 
    7237           0 :             single_newmv[ref] = mbmi->mv[0];
    7238             : 
    7239           0 :             if (discount_newmv_test(cpi, this_mode, mbmi->mv[0], mode_mv,
    7240             :                                     refs[0])) {
    7241           0 :               tmp_rate_mv = AOMMAX((tmp_rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
    7242             :             }
    7243             : #if CONFIG_EXT_INTER
    7244           0 :             tmp_rate2 = rate2_bmc_nocoeff - rate_mv_bmc + tmp_rate_mv;
    7245             : #else
    7246             :             tmp_rate2 = rate2_nocoeff - rate_mv + tmp_rate_mv;
    7247             : #endif  // CONFIG_EXT_INTER
    7248             : #if CONFIG_DUAL_FILTER
    7249           0 :             if (!has_subpel_mv_component(xd->mi[0], xd, 0))
    7250           0 :               mbmi->interp_filter[0] = EIGHTTAP_REGULAR;
    7251           0 :             if (!has_subpel_mv_component(xd->mi[0], xd, 1))
    7252           0 :               mbmi->interp_filter[1] = EIGHTTAP_REGULAR;
    7253             : #endif  // CONFIG_DUAL_FILTER
    7254             :           } else {
    7255             :             // Restore the old MV and WM parameters.
    7256           0 :             mbmi->mv[0] = mv0;
    7257           0 :             mbmi->wm_params[0] = wm_params0;
    7258             :           }
    7259             :         }
    7260             : 
    7261           0 :         av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
    7262           0 :         model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
    7263             :                         &tmp_dist, skip_txfm_sb, skip_sse_sb);
    7264             :       } else {
    7265           0 :         continue;
    7266             :       }
    7267             :     }
    7268             : #endif  // CONFIG_WARPED_MOTION
    7269           0 :     x->skip = 0;
    7270             : 
    7271           0 :     rd_stats->dist = 0;
    7272           0 :     rd_stats->sse = 0;
    7273           0 :     rd_stats->skip = 1;
    7274           0 :     rd_stats->rate = tmp_rate2;
    7275           0 :     if (last_motion_mode_allowed > SIMPLE_TRANSLATION) {
    7276             : #if CONFIG_WARPED_MOTION && CONFIG_MOTION_VAR
    7277           0 :       if (last_motion_mode_allowed == WARPED_CAUSAL)
    7278             : #endif  // CONFIG_WARPED_MOTION && CONFIG_MOTION_VAR
    7279           0 :         rd_stats->rate += cpi->motion_mode_cost[bsize][mbmi->motion_mode];
    7280             : #if CONFIG_WARPED_MOTION && CONFIG_MOTION_VAR
    7281             :       else
    7282           0 :         rd_stats->rate += cpi->motion_mode_cost1[bsize][mbmi->motion_mode];
    7283             : #endif  // CONFIG_WARPED_MOTION && CONFIG_MOTION_VAR
    7284             :     }
    7285             : #if CONFIG_WARPED_MOTION
    7286           0 :     if (mbmi->motion_mode == WARPED_CAUSAL) {
    7287           0 :       rd_stats->rate -= rs;
    7288             :     }
    7289             : #endif  // CONFIG_WARPED_MOTION
    7290             : #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
    7291           0 :     if (!*skip_txfm_sb) {
    7292           0 :       int64_t rdcosty = INT64_MAX;
    7293           0 :       int is_cost_valid_uv = 0;
    7294             : 
    7295             :       // cost and distortion
    7296           0 :       av1_subtract_plane(x, bsize, 0);
    7297             : #if CONFIG_VAR_TX
    7298           0 :       if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) {
    7299           0 :         select_tx_type_yrd(cpi, x, rd_stats_y, bsize, ref_best_rd);
    7300             :       } else {
    7301             :         int idx, idy;
    7302           0 :         super_block_yrd(cpi, x, rd_stats_y, bsize, ref_best_rd);
    7303           0 :         for (idy = 0; idy < xd->n8_h; ++idy)
    7304           0 :           for (idx = 0; idx < xd->n8_w; ++idx)
    7305           0 :             mbmi->inter_tx_size[idy][idx] = mbmi->tx_size;
    7306           0 :         memset(x->blk_skip[0], rd_stats_y->skip,
    7307           0 :                sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
    7308             :       }
    7309             : #else
    7310             :     /* clang-format off */
    7311             :       super_block_yrd(cpi, x, rd_stats_y, bsize, ref_best_rd);
    7312             : /* clang-format on */
    7313             : #endif  // CONFIG_VAR_TX
    7314             : 
    7315           0 :       if (rd_stats_y->rate == INT_MAX) {
    7316           0 :         av1_invalid_rd_stats(rd_stats);
    7317             : #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
    7318           0 :         if (mbmi->motion_mode != SIMPLE_TRANSLATION) {
    7319           0 :           continue;
    7320             :         } else {
    7321             : #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
    7322           0 :           restore_dst_buf(xd, *orig_dst);
    7323           0 :           return INT64_MAX;
    7324             : #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
    7325             :         }
    7326             : #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
    7327             :       }
    7328             : 
    7329           0 :       av1_merge_rd_stats(rd_stats, rd_stats_y);
    7330             : 
    7331           0 :       rdcosty = RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist);
    7332           0 :       rdcosty = AOMMIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, rd_stats->sse));
    7333             : /* clang-format off */
    7334             : #if CONFIG_VAR_TX
    7335           0 :       is_cost_valid_uv =
    7336           0 :           inter_block_uvrd(cpi, x, rd_stats_uv, bsize, ref_best_rd - rdcosty);
    7337             : #else
    7338             :       is_cost_valid_uv =
    7339             :           super_block_uvrd(cpi, x, rd_stats_uv, bsize, ref_best_rd - rdcosty);
    7340             : #endif  // CONFIG_VAR_TX
    7341           0 :       if (!is_cost_valid_uv) {
    7342             : #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
    7343           0 :         continue;
    7344             : #else
    7345             :         restore_dst_buf(xd, *orig_dst);
    7346             :         return INT64_MAX;
    7347             : #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
    7348             :       }
    7349             :       /* clang-format on */
    7350           0 :       av1_merge_rd_stats(rd_stats, rd_stats_uv);
    7351             : #if CONFIG_RD_DEBUG
    7352             :       // record transform block coefficient cost
    7353             :       // TODO(angiebird): So far rd_debug tool only detects discrepancy of
    7354             :       // coefficient cost. Therefore, it is fine to copy rd_stats into mbmi
    7355             :       // here because we already collect the coefficient cost. Move this part to
    7356             :       // other place when we need to compare non-coefficient cost.
    7357             :       mbmi->rd_stats = *rd_stats;
    7358             : #endif  // CONFIG_RD_DEBUG
    7359             : #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
    7360           0 :       if (rd_stats->skip) {
    7361           0 :         rd_stats->rate -= rd_stats_uv->rate + rd_stats_y->rate;
    7362           0 :         rd_stats_y->rate = 0;
    7363           0 :         rd_stats_uv->rate = 0;
    7364           0 :         rd_stats->rate += av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
    7365           0 :         mbmi->skip = 0;
    7366             :         // here mbmi->skip temporarily plays a role as what this_skip2 does
    7367           0 :       } else if (!xd->lossless[mbmi->segment_id] &&
    7368           0 :                  (RDCOST(x->rdmult, x->rddiv,
    7369             :                          rd_stats_y->rate + rd_stats_uv->rate +
    7370             :                              av1_cost_bit(av1_get_skip_prob(cm, xd), 0),
    7371             :                          rd_stats->dist) >=
    7372           0 :                   RDCOST(x->rdmult, x->rddiv,
    7373             :                          av1_cost_bit(av1_get_skip_prob(cm, xd), 1),
    7374             :                          rd_stats->sse))) {
    7375           0 :         rd_stats->rate -= rd_stats_uv->rate + rd_stats_y->rate;
    7376           0 :         rd_stats->rate += av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
    7377           0 :         rd_stats->dist = rd_stats->sse;
    7378           0 :         rd_stats_y->rate = 0;
    7379           0 :         rd_stats_uv->rate = 0;
    7380           0 :         mbmi->skip = 1;
    7381             :       } else {
    7382           0 :         rd_stats->rate += av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
    7383           0 :         mbmi->skip = 0;
    7384             :       }
    7385           0 :       *disable_skip = 0;
    7386             : #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
    7387             :     } else {
    7388           0 :       x->skip = 1;
    7389           0 :       *disable_skip = 1;
    7390           0 :       mbmi->tx_size = tx_size_from_tx_mode(bsize, cm->tx_mode, 1);
    7391             : 
    7392             : // The cost of skip bit needs to be added.
    7393             : #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
    7394           0 :       mbmi->skip = 0;
    7395             : #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
    7396           0 :       rd_stats->rate += av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
    7397             : 
    7398           0 :       rd_stats->dist = *skip_sse_sb;
    7399           0 :       rd_stats->sse = *skip_sse_sb;
    7400           0 :       rd_stats_y->rate = 0;
    7401           0 :       rd_stats_uv->rate = 0;
    7402           0 :       rd_stats->skip = 1;
    7403             :     }
    7404             : 
    7405             : #if CONFIG_GLOBAL_MOTION
    7406           0 :     if (this_mode == ZEROMV
    7407             : #if CONFIG_EXT_INTER
    7408           0 :         || this_mode == ZERO_ZEROMV
    7409             : #endif  // CONFIG_EXT_INTER
    7410             :         ) {
    7411           0 :       if (is_nontrans_global_motion(xd)) {
    7412           0 :         rd_stats->rate -= rs;
    7413             : #if CONFIG_DUAL_FILTER
    7414           0 :         mbmi->interp_filter[0] = cm->interp_filter == SWITCHABLE
    7415             :                                      ? EIGHTTAP_REGULAR
    7416           0 :                                      : cm->interp_filter;
    7417           0 :         mbmi->interp_filter[1] = cm->interp_filter == SWITCHABLE
    7418             :                                      ? EIGHTTAP_REGULAR
    7419           0 :                                      : cm->interp_filter;
    7420             : #else
    7421             :         mbmi->interp_filter = cm->interp_filter == SWITCHABLE
    7422             :                                   ? EIGHTTAP_REGULAR
    7423             :                                   : cm->interp_filter;
    7424             : #endif  // CONFIG_DUAL_FILTER
    7425             :       }
    7426             :     }
    7427             : #endif  // CONFIG_GLOBAL_MOTION
    7428             : 
    7429             : #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
    7430           0 :     tmp_rd = RDCOST(x->rdmult, x->rddiv, rd_stats->rate, rd_stats->dist);
    7431           0 :     if (mbmi->motion_mode == SIMPLE_TRANSLATION || (tmp_rd < best_rd)) {
    7432           0 :       best_mbmi = *mbmi;
    7433           0 :       best_rd = tmp_rd;
    7434           0 :       best_rd_stats = *rd_stats;
    7435           0 :       best_rd_stats_y = *rd_stats_y;
    7436           0 :       best_rd_stats_uv = *rd_stats_uv;
    7437             : #if CONFIG_VAR_TX
    7438           0 :       for (int i = 0; i < MAX_MB_PLANE; ++i)
    7439           0 :         memcpy(best_blk_skip[i], x->blk_skip[i],
    7440           0 :                sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
    7441             : #endif  // CONFIG_VAR_TX
    7442           0 :       best_xskip = x->skip;
    7443           0 :       best_disable_skip = *disable_skip;
    7444             :     }
    7445             :   }
    7446             : 
    7447           0 :   if (best_rd == INT64_MAX) {
    7448           0 :     av1_invalid_rd_stats(rd_stats);
    7449           0 :     restore_dst_buf(xd, *orig_dst);
    7450           0 :     return INT64_MAX;
    7451             :   }
    7452           0 :   *mbmi = best_mbmi;
    7453           0 :   *rd_stats = best_rd_stats;
    7454           0 :   *rd_stats_y = best_rd_stats_y;
    7455           0 :   *rd_stats_uv = best_rd_stats_uv;
    7456             : #if CONFIG_VAR_TX
    7457           0 :   for (int i = 0; i < MAX_MB_PLANE; ++i)
    7458           0 :     memcpy(x->blk_skip[i], best_blk_skip[i],
    7459           0 :            sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
    7460             : #endif  // CONFIG_VAR_TX
    7461           0 :   x->skip = best_xskip;
    7462           0 :   *disable_skip = best_disable_skip;
    7463             : #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
    7464             : 
    7465           0 :   restore_dst_buf(xd, *orig_dst);
    7466           0 :   return 0;
    7467             : }
    7468             : 
    7469           0 : static int64_t handle_inter_mode(
    7470             :     const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
    7471             :     RD_STATS *rd_stats, RD_STATS *rd_stats_y, RD_STATS *rd_stats_uv,
    7472             :     int *disable_skip, int_mv (*mode_mv)[TOTAL_REFS_PER_FRAME], int mi_row,
    7473             :     int mi_col, HandleInterModeArgs *args, const int64_t ref_best_rd) {
    7474           0 :   const AV1_COMMON *cm = &cpi->common;
    7475             :   (void)cm;
    7476           0 :   MACROBLOCKD *xd = &x->e_mbd;
    7477           0 :   MODE_INFO *mi = xd->mi[0];
    7478           0 :   MB_MODE_INFO *mbmi = &mi->mbmi;
    7479           0 :   MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
    7480           0 :   const int is_comp_pred = has_second_ref(mbmi);
    7481           0 :   const int this_mode = mbmi->mode;
    7482           0 :   int_mv *frame_mv = mode_mv[this_mode];
    7483             :   int i;
    7484           0 :   int refs[2] = { mbmi->ref_frame[0],
    7485           0 :                   (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
    7486             :   int_mv cur_mv[2];
    7487           0 :   int rate_mv = 0;
    7488             : #if CONFIG_EXT_INTER
    7489           0 :   int pred_exists = 1;
    7490             : #if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
    7491           0 :   const int bw = block_size_wide[bsize];
    7492             : #endif  // ONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
    7493             :   int_mv single_newmv[TOTAL_REFS_PER_FRAME];
    7494             : #if CONFIG_INTERINTRA
    7495           0 :   const unsigned int *const interintra_mode_cost =
    7496           0 :       cpi->interintra_mode_cost[size_group_lookup[bsize]];
    7497             : #endif  // CONFIG_INTERINTRA
    7498           0 :   const int is_comp_interintra_pred = (mbmi->ref_frame[1] == INTRA_FRAME);
    7499           0 :   uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
    7500             : #else
    7501             :   int_mv *const single_newmv = args->single_newmv;
    7502             : #endif  // CONFIG_EXT_INTER
    7503             : #if CONFIG_HIGHBITDEPTH
    7504             :   DECLARE_ALIGNED(16, uint8_t, tmp_buf_[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
    7505             : #else
    7506             :   DECLARE_ALIGNED(16, uint8_t, tmp_buf_[MAX_MB_PLANE * MAX_SB_SQUARE]);
    7507             : #endif  // CONFIG_HIGHBITDEPTH
    7508             :   uint8_t *tmp_buf;
    7509             : 
    7510             : #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
    7511             : #if CONFIG_EXT_INTER
    7512             :   int rate2_bmc_nocoeff;
    7513             :   MB_MODE_INFO best_bmc_mbmi;
    7514             : #if CONFIG_MOTION_VAR
    7515             :   int rate_mv_bmc;
    7516             : #endif  // CONFIG_MOTION_VAR
    7517             : #endif  // CONFIG_EXT_INTER
    7518             : #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
    7519           0 :   int64_t rd = INT64_MAX;
    7520             :   BUFFER_SET orig_dst, tmp_dst;
    7521           0 :   int rs = 0;
    7522             : 
    7523           0 :   int skip_txfm_sb = 0;
    7524           0 :   int64_t skip_sse_sb = INT64_MAX;
    7525             :   int16_t mode_ctx;
    7526             : 
    7527             : #if CONFIG_EXT_INTER
    7528             : #if CONFIG_INTERINTRA
    7529           0 :   int compmode_interintra_cost = 0;
    7530           0 :   mbmi->use_wedge_interintra = 0;
    7531             : #endif
    7532             : #if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
    7533           0 :   int compmode_interinter_cost = 0;
    7534           0 :   mbmi->interinter_compound_type = COMPOUND_AVERAGE;
    7535             : #endif
    7536             : 
    7537             : #if CONFIG_INTERINTRA
    7538           0 :   if (!cm->allow_interintra_compound && is_comp_interintra_pred)
    7539           0 :     return INT64_MAX;
    7540             : #endif  // CONFIG_INTERINTRA
    7541             : 
    7542             :   // is_comp_interintra_pred implies !is_comp_pred
    7543           0 :   assert(!is_comp_interintra_pred || (!is_comp_pred));
    7544             :   // is_comp_interintra_pred implies is_interintra_allowed(mbmi->sb_type)
    7545           0 :   assert(!is_comp_interintra_pred || is_interintra_allowed(mbmi));
    7546             : #endif  // CONFIG_EXT_INTER
    7547             : 
    7548             : #if CONFIG_EXT_INTER
    7549           0 :   if (is_comp_pred)
    7550           0 :     mode_ctx = mbmi_ext->compound_mode_context[refs[0]];
    7551             :   else
    7552             : #endif  // CONFIG_EXT_INTER
    7553           0 :     mode_ctx = av1_mode_context_analyzer(mbmi_ext->mode_context,
    7554           0 :                                          mbmi->ref_frame, bsize, -1);
    7555             : 
    7556             : #if CONFIG_HIGHBITDEPTH
    7557           0 :   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
    7558           0 :     tmp_buf = CONVERT_TO_BYTEPTR(tmp_buf_);
    7559             :   else
    7560             : #endif  // CONFIG_HIGHBITDEPTH
    7561           0 :     tmp_buf = tmp_buf_;
    7562             :   // Make sure that we didn't leave the plane destination buffers set
    7563             :   // to tmp_buf at the end of the last iteration
    7564           0 :   assert(xd->plane[0].dst.buf != tmp_buf);
    7565             : 
    7566             : #if CONFIG_WARPED_MOTION
    7567           0 :   mbmi->num_proj_ref[0] = 0;
    7568           0 :   mbmi->num_proj_ref[1] = 0;
    7569             : #endif  // CONFIG_WARPED_MOTION
    7570             : 
    7571           0 :   if (is_comp_pred) {
    7572           0 :     if (frame_mv[refs[0]].as_int == INVALID_MV ||
    7573           0 :         frame_mv[refs[1]].as_int == INVALID_MV)
    7574           0 :       return INT64_MAX;
    7575             :   }
    7576             : 
    7577           0 :   mbmi->motion_mode = SIMPLE_TRANSLATION;
    7578           0 :   if (have_newmv_in_inter_mode(this_mode)) {
    7579           0 :     const int64_t ret_val = handle_newmv(cpi, x, bsize, mode_mv, mi_row, mi_col,
    7580             :                                          &rate_mv, single_newmv, args);
    7581           0 :     if (ret_val != 0)
    7582           0 :       return ret_val;
    7583             :     else
    7584           0 :       rd_stats->rate += rate_mv;
    7585             :   }
    7586           0 :   for (i = 0; i < is_comp_pred + 1; ++i) {
    7587           0 :     cur_mv[i] = frame_mv[refs[i]];
    7588             :     // Clip "next_nearest" so that it does not extend to far out of image
    7589           0 :     if (this_mode != NEWMV) clamp_mv2(&cur_mv[i].as_mv, xd);
    7590           0 :     if (mv_check_bounds(&x->mv_limits, &cur_mv[i].as_mv)) return INT64_MAX;
    7591           0 :     mbmi->mv[i].as_int = cur_mv[i].as_int;
    7592             :   }
    7593             : 
    7594             : #if CONFIG_EXT_INTER
    7595           0 :   if (this_mode == NEAREST_NEARESTMV)
    7596             : #else
    7597             :   if (this_mode == NEARESTMV && is_comp_pred)
    7598             : #endif  // CONFIG_EXT_INTER
    7599             :   {
    7600             : #if !CONFIG_EXT_INTER
    7601             :     uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
    7602             : #endif  // !CONFIG_EXT_INTER
    7603           0 :     if (mbmi_ext->ref_mv_count[ref_frame_type] > 0) {
    7604           0 :       cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv;
    7605           0 :       cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
    7606             : 
    7607           0 :       for (i = 0; i < 2; ++i) {
    7608           0 :         clamp_mv2(&cur_mv[i].as_mv, xd);
    7609           0 :         if (mv_check_bounds(&x->mv_limits, &cur_mv[i].as_mv)) return INT64_MAX;
    7610           0 :         mbmi->mv[i].as_int = cur_mv[i].as_int;
    7611             :       }
    7612             :     }
    7613             :   }
    7614             : 
    7615             : #if CONFIG_EXT_INTER
    7616           0 :   if (mbmi_ext->ref_mv_count[ref_frame_type] > 0) {
    7617           0 :     if (this_mode == NEAREST_NEWMV) {
    7618           0 :       cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv;
    7619             : 
    7620           0 :       lower_mv_precision(&cur_mv[0].as_mv, cm->allow_high_precision_mv);
    7621           0 :       clamp_mv2(&cur_mv[0].as_mv, xd);
    7622           0 :       if (mv_check_bounds(&x->mv_limits, &cur_mv[0].as_mv)) return INT64_MAX;
    7623           0 :       mbmi->mv[0].as_int = cur_mv[0].as_int;
    7624             :     }
    7625             : 
    7626           0 :     if (this_mode == NEW_NEARESTMV) {
    7627           0 :       cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
    7628             : 
    7629           0 :       lower_mv_precision(&cur_mv[1].as_mv, cm->allow_high_precision_mv);
    7630           0 :       clamp_mv2(&cur_mv[1].as_mv, xd);
    7631           0 :       if (mv_check_bounds(&x->mv_limits, &cur_mv[1].as_mv)) return INT64_MAX;
    7632           0 :       mbmi->mv[1].as_int = cur_mv[1].as_int;
    7633             :     }
    7634             :   }
    7635             : 
    7636           0 :   if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
    7637           0 :     int ref_mv_idx = mbmi->ref_mv_idx + 1;
    7638           0 :     if (this_mode == NEAR_NEWMV || this_mode == NEAR_NEARMV) {
    7639           0 :       cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
    7640             : 
    7641           0 :       lower_mv_precision(&cur_mv[0].as_mv, cm->allow_high_precision_mv);
    7642           0 :       clamp_mv2(&cur_mv[0].as_mv, xd);
    7643           0 :       if (mv_check_bounds(&x->mv_limits, &cur_mv[0].as_mv)) return INT64_MAX;
    7644           0 :       mbmi->mv[0].as_int = cur_mv[0].as_int;
    7645             :     }
    7646             : 
    7647           0 :     if (this_mode == NEW_NEARMV || this_mode == NEAR_NEARMV) {
    7648           0 :       cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
    7649             : 
    7650           0 :       lower_mv_precision(&cur_mv[1].as_mv, cm->allow_high_precision_mv);
    7651           0 :       clamp_mv2(&cur_mv[1].as_mv, xd);
    7652           0 :       if (mv_check_bounds(&x->mv_limits, &cur_mv[1].as_mv)) return INT64_MAX;
    7653           0 :       mbmi->mv[1].as_int = cur_mv[1].as_int;
    7654             :     }
    7655             :   }
    7656             : #else
    7657             :   if (this_mode == NEARMV && is_comp_pred) {
    7658             :     uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
    7659             :     if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
    7660             :       int ref_mv_idx = mbmi->ref_mv_idx + 1;
    7661             :       cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
    7662             :       cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
    7663             : 
    7664             :       for (i = 0; i < 2; ++i) {
    7665             :         clamp_mv2(&cur_mv[i].as_mv, xd);
    7666             :         if (mv_check_bounds(&x->mv_limits, &cur_mv[i].as_mv)) return INT64_MAX;
    7667             :         mbmi->mv[i].as_int = cur_mv[i].as_int;
    7668             :       }
    7669             :     }
    7670             :   }
    7671             : #endif  // CONFIG_EXT_INTER
    7672             : 
    7673             :   // do first prediction into the destination buffer. Do the next
    7674             :   // prediction into a temporary buffer. Then keep track of which one
    7675             :   // of these currently holds the best predictor, and use the other
    7676             :   // one for future predictions. In the end, copy from tmp_buf to
    7677             :   // dst if necessary.
    7678           0 :   for (i = 0; i < MAX_MB_PLANE; i++) {
    7679           0 :     tmp_dst.plane[i] = tmp_buf + i * MAX_SB_SQUARE;
    7680           0 :     tmp_dst.stride[i] = MAX_SB_SIZE;
    7681             :   }
    7682           0 :   for (i = 0; i < MAX_MB_PLANE; i++) {
    7683           0 :     orig_dst.plane[i] = xd->plane[i].dst.buf;
    7684           0 :     orig_dst.stride[i] = xd->plane[i].dst.stride;
    7685             :   }
    7686             : 
    7687             :   // We don't include the cost of the second reference here, because there
    7688             :   // are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
    7689             :   // words if you present them in that order, the second one is always known
    7690             :   // if the first is known.
    7691             :   //
    7692             :   // Under some circumstances we discount the cost of new mv mode to encourage
    7693             :   // initiation of a motion field.
    7694           0 :   if (discount_newmv_test(cpi, this_mode, frame_mv[refs[0]], mode_mv,
    7695             :                           refs[0])) {
    7696             : #if CONFIG_EXT_INTER
    7697           0 :     rd_stats->rate +=
    7698           0 :         AOMMIN(cost_mv_ref(cpi, this_mode, mode_ctx),
    7699             :                cost_mv_ref(cpi, is_comp_pred ? NEAREST_NEARESTMV : NEARESTMV,
    7700             :                            mode_ctx));
    7701             : #else
    7702             :     rd_stats->rate += AOMMIN(cost_mv_ref(cpi, this_mode, mode_ctx),
    7703             :                              cost_mv_ref(cpi, NEARESTMV, mode_ctx));
    7704             : #endif  // CONFIG_EXT_INTER
    7705             :   } else {
    7706           0 :     rd_stats->rate += cost_mv_ref(cpi, this_mode, mode_ctx);
    7707             :   }
    7708             : 
    7709           0 :   if (RDCOST(x->rdmult, x->rddiv, rd_stats->rate, 0) > ref_best_rd &&
    7710             : #if CONFIG_EXT_INTER
    7711           0 :       mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV
    7712             : #else
    7713             :       mbmi->mode != NEARESTMV
    7714             : #endif  // CONFIG_EXT_INTER
    7715             :       )
    7716           0 :     return INT64_MAX;
    7717             : 
    7718           0 :   int64_t ret_val = interpolation_filter_search(
    7719           0 :       x, cpi, bsize, mi_row, mi_col, &tmp_dst, &orig_dst, args->single_filter,
    7720             :       &rd, &rs, &skip_txfm_sb, &skip_sse_sb);
    7721           0 :   if (ret_val != 0) return ret_val;
    7722             : 
    7723             : #if CONFIG_EXT_INTER
    7724             : #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
    7725           0 :   best_bmc_mbmi = *mbmi;
    7726           0 :   rate2_bmc_nocoeff = rd_stats->rate;
    7727           0 :   if (cm->interp_filter == SWITCHABLE) rate2_bmc_nocoeff += rs;
    7728             : #if CONFIG_MOTION_VAR
    7729           0 :   rate_mv_bmc = rate_mv;
    7730             : #endif  // CONFIG_MOTION_VAR
    7731             : #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
    7732             : 
    7733             : #if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
    7734           0 :   if (is_comp_pred) {
    7735             :     int rate_sum, rs2;
    7736             :     int64_t dist_sum;
    7737           0 :     int64_t best_rd_compound = INT64_MAX, best_rd_cur = INT64_MAX;
    7738             :     INTERINTER_COMPOUND_DATA best_compound_data;
    7739             :     int_mv best_mv[2];
    7740           0 :     int best_tmp_rate_mv = rate_mv;
    7741             :     int tmp_skip_txfm_sb;
    7742             :     int64_t tmp_skip_sse_sb;
    7743             :     int compound_type_cost[COMPOUND_TYPES];
    7744             :     uint8_t pred0[2 * MAX_SB_SQUARE];
    7745             :     uint8_t pred1[2 * MAX_SB_SQUARE];
    7746           0 :     uint8_t *preds0[1] = { pred0 };
    7747           0 :     uint8_t *preds1[1] = { pred1 };
    7748           0 :     int strides[1] = { bw };
    7749             :     int tmp_rate_mv;
    7750           0 :     int masked_compound_used = is_any_masked_compound_used(bsize);
    7751             : #if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
    7752           0 :     masked_compound_used = masked_compound_used && cm->allow_masked_compound;
    7753             : #endif  // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE
    7754             :     COMPOUND_TYPE cur_type;
    7755             : 
    7756           0 :     best_mv[0].as_int = cur_mv[0].as_int;
    7757           0 :     best_mv[1].as_int = cur_mv[1].as_int;
    7758           0 :     memset(&best_compound_data, 0, sizeof(best_compound_data));
    7759             : #if CONFIG_COMPOUND_SEGMENT
    7760             :     uint8_t tmp_mask_buf[2 * MAX_SB_SQUARE];
    7761           0 :     best_compound_data.seg_mask = tmp_mask_buf;
    7762             : #endif  // CONFIG_COMPOUND_SEGMENT
    7763             : 
    7764           0 :     if (masked_compound_used) {
    7765           0 :       av1_cost_tokens(compound_type_cost, cm->fc->compound_type_prob[bsize],
    7766             :                       av1_compound_type_tree);
    7767             :       // get inter predictors to use for masked compound modes
    7768           0 :       av1_build_inter_predictors_for_planes_single_buf(
    7769             :           xd, bsize, 0, 0, mi_row, mi_col, 0, preds0, strides);
    7770           0 :       av1_build_inter_predictors_for_planes_single_buf(
    7771             :           xd, bsize, 0, 0, mi_row, mi_col, 1, preds1, strides);
    7772             :     }
    7773             : 
    7774           0 :     for (cur_type = COMPOUND_AVERAGE; cur_type < COMPOUND_TYPES; cur_type++) {
    7775           0 :       if (cur_type != COMPOUND_AVERAGE && !masked_compound_used) break;
    7776           0 :       if (!is_interinter_compound_used(cur_type, bsize)) break;
    7777           0 :       tmp_rate_mv = rate_mv;
    7778           0 :       best_rd_cur = INT64_MAX;
    7779           0 :       mbmi->interinter_compound_type = cur_type;
    7780           0 :       rs2 = av1_cost_literal(get_interinter_compound_type_bits(
    7781             :                 bsize, mbmi->interinter_compound_type)) +
    7782             :             (masked_compound_used
    7783           0 :                  ? compound_type_cost[mbmi->interinter_compound_type]
    7784           0 :                  : 0);
    7785             : 
    7786           0 :       switch (cur_type) {
    7787             :         case COMPOUND_AVERAGE:
    7788           0 :           av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, &orig_dst,
    7789             :                                          bsize);
    7790           0 :           av1_subtract_plane(x, bsize, 0);
    7791           0 :           rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
    7792             :                                    &tmp_skip_txfm_sb, &tmp_skip_sse_sb,
    7793             :                                    INT64_MAX);
    7794           0 :           if (rd != INT64_MAX)
    7795           0 :             best_rd_cur =
    7796           0 :                 RDCOST(x->rdmult, x->rddiv, rs2 + rate_mv + rate_sum, dist_sum);
    7797           0 :           best_rd_compound = best_rd_cur;
    7798           0 :           break;
    7799             : #if CONFIG_WEDGE
    7800             :         case COMPOUND_WEDGE:
    7801           0 :           if (x->source_variance > cpi->sf.disable_wedge_search_var_thresh &&
    7802           0 :               best_rd_compound / 3 < ref_best_rd) {
    7803           0 :             best_rd_cur = build_and_cost_compound_type(
    7804             :                 cpi, x, cur_mv, bsize, this_mode, rs2, rate_mv, &orig_dst,
    7805             :                 &tmp_rate_mv, preds0, preds1, strides, mi_row, mi_col);
    7806             :           }
    7807           0 :           break;
    7808             : #endif  // CONFIG_WEDGE
    7809             : #if CONFIG_COMPOUND_SEGMENT
    7810             :         case COMPOUND_SEG:
    7811           0 :           if (x->source_variance > cpi->sf.disable_wedge_search_var_thresh &&
    7812           0 :               best_rd_compound / 3 < ref_best_rd) {
    7813           0 :             best_rd_cur = build_and_cost_compound_type(
    7814             :                 cpi, x, cur_mv, bsize, this_mode, rs2, rate_mv, &orig_dst,
    7815             :                 &tmp_rate_mv, preds0, preds1, strides, mi_row, mi_col);
    7816             :           }
    7817           0 :           break;
    7818             : #endif  // CONFIG_COMPOUND_SEGMENT
    7819           0 :         default: assert(0); return 0;
    7820             :       }
    7821             : 
    7822           0 :       if (best_rd_cur < best_rd_compound) {
    7823           0 :         best_rd_compound = best_rd_cur;
    7824             : #if CONFIG_WEDGE
    7825           0 :         best_compound_data.wedge_index = mbmi->wedge_index;
    7826           0 :         best_compound_data.wedge_sign = mbmi->wedge_sign;
    7827             : #endif  // CONFIG_WEDGE
    7828             : #if CONFIG_COMPOUND_SEGMENT
    7829           0 :         best_compound_data.mask_type = mbmi->mask_type;
    7830           0 :         memcpy(best_compound_data.seg_mask, xd->seg_mask,
    7831             :                2 * MAX_SB_SQUARE * sizeof(uint8_t));
    7832             : #endif  // CONFIG_COMPOUND_SEGMENT
    7833           0 :         best_compound_data.interinter_compound_type =
    7834           0 :             mbmi->interinter_compound_type;
    7835           0 :         if (have_newmv_in_inter_mode(this_mode)) {
    7836           0 :           if (use_masked_motion_search(cur_type)) {
    7837           0 :             best_tmp_rate_mv = tmp_rate_mv;
    7838           0 :             best_mv[0].as_int = mbmi->mv[0].as_int;
    7839           0 :             best_mv[1].as_int = mbmi->mv[1].as_int;
    7840             :           } else {
    7841           0 :             best_mv[0].as_int = cur_mv[0].as_int;
    7842           0 :             best_mv[1].as_int = cur_mv[1].as_int;
    7843             :           }
    7844             :         }
    7845             :       }
    7846             :       // reset to original mvs for next iteration
    7847           0 :       mbmi->mv[0].as_int = cur_mv[0].as_int;
    7848           0 :       mbmi->mv[1].as_int = cur_mv[1].as_int;
    7849             :     }
    7850             : #if CONFIG_WEDGE
    7851           0 :     mbmi->wedge_index = best_compound_data.wedge_index;
    7852           0 :     mbmi->wedge_sign = best_compound_data.wedge_sign;
    7853             : #endif  // CONFIG_WEDGE
    7854             : #if CONFIG_COMPOUND_SEGMENT
    7855           0 :     mbmi->mask_type = best_compound_data.mask_type;
    7856           0 :     memcpy(xd->seg_mask, best_compound_data.seg_mask,
    7857             :            2 * MAX_SB_SQUARE * sizeof(uint8_t));
    7858             : #endif  // CONFIG_COMPOUND_SEGMENT
    7859           0 :     mbmi->interinter_compound_type =
    7860           0 :         best_compound_data.interinter_compound_type;
    7861           0 :     if (have_newmv_in_inter_mode(this_mode)) {
    7862           0 :       mbmi->mv[0].as_int = best_mv[0].as_int;
    7863           0 :       mbmi->mv[1].as_int = best_mv[1].as_int;
    7864           0 :       xd->mi[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int;
    7865           0 :       xd->mi[0]->bmi[0].as_mv[1].as_int = mbmi->mv[1].as_int;
    7866           0 :       if (use_masked_motion_search(mbmi->interinter_compound_type)) {
    7867           0 :         rd_stats->rate += best_tmp_rate_mv - rate_mv;
    7868           0 :         rate_mv = best_tmp_rate_mv;
    7869             :       }
    7870             :     }
    7871             : 
    7872           0 :     if (ref_best_rd < INT64_MAX && best_rd_compound / 3 > ref_best_rd) {
    7873           0 :       restore_dst_buf(xd, orig_dst);
    7874           0 :       return INT64_MAX;
    7875             :     }
    7876             : 
    7877           0 :     pred_exists = 0;
    7878             : 
    7879           0 :     compmode_interinter_cost =
    7880           0 :         av1_cost_literal(get_interinter_compound_type_bits(
    7881             :             bsize, mbmi->interinter_compound_type)) +
    7882             :         (masked_compound_used
    7883           0 :              ? compound_type_cost[mbmi->interinter_compound_type]
    7884           0 :              : 0);
    7885             :   }
    7886             : #endif  // CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
    7887             : 
    7888             : #if CONFIG_INTERINTRA
    7889           0 :   if (is_comp_interintra_pred) {
    7890           0 :     INTERINTRA_MODE best_interintra_mode = II_DC_PRED;
    7891           0 :     int64_t best_interintra_rd = INT64_MAX;
    7892             :     int rmode, rate_sum;
    7893             :     int64_t dist_sum;
    7894             :     int j;
    7895           0 :     int tmp_rate_mv = 0;
    7896             :     int tmp_skip_txfm_sb;
    7897             :     int64_t tmp_skip_sse_sb;
    7898             :     DECLARE_ALIGNED(16, uint8_t, intrapred_[2 * MAX_SB_SQUARE]);
    7899             :     uint8_t *intrapred;
    7900             : 
    7901             : #if CONFIG_HIGHBITDEPTH
    7902           0 :     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
    7903           0 :       intrapred = CONVERT_TO_BYTEPTR(intrapred_);
    7904             :     else
    7905             : #endif  // CONFIG_HIGHBITDEPTH
    7906           0 :       intrapred = intrapred_;
    7907             : 
    7908           0 :     mbmi->ref_frame[1] = NONE_FRAME;
    7909           0 :     for (j = 0; j < MAX_MB_PLANE; j++) {
    7910           0 :       xd->plane[j].dst.buf = tmp_buf + j * MAX_SB_SQUARE;
    7911           0 :       xd->plane[j].dst.stride = bw;
    7912             :     }
    7913           0 :     av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, &orig_dst, bsize);
    7914           0 :     restore_dst_buf(xd, orig_dst);
    7915           0 :     mbmi->ref_frame[1] = INTRA_FRAME;
    7916           0 :     mbmi->use_wedge_interintra = 0;
    7917             : 
    7918           0 :     for (j = 0; j < INTERINTRA_MODES; ++j) {
    7919           0 :       mbmi->interintra_mode = (INTERINTRA_MODE)j;
    7920           0 :       rmode = interintra_mode_cost[mbmi->interintra_mode];
    7921           0 :       av1_build_intra_predictors_for_interintra(xd, bsize, 0, &orig_dst,
    7922             :                                                 intrapred, bw);
    7923           0 :       av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw);
    7924           0 :       model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
    7925             :                       &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
    7926           0 :       rd =
    7927           0 :           RDCOST(x->rdmult, x->rddiv, tmp_rate_mv + rate_sum + rmode, dist_sum);
    7928           0 :       if (rd < best_interintra_rd) {
    7929           0 :         best_interintra_rd = rd;
    7930           0 :         best_interintra_mode = mbmi->interintra_mode;
    7931             :       }
    7932             :     }
    7933           0 :     mbmi->interintra_mode = best_interintra_mode;
    7934           0 :     rmode = interintra_mode_cost[mbmi->interintra_mode];
    7935           0 :     av1_build_intra_predictors_for_interintra(xd, bsize, 0, &orig_dst,
    7936             :                                               intrapred, bw);
    7937           0 :     av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw);
    7938           0 :     av1_subtract_plane(x, bsize, 0);
    7939           0 :     rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
    7940             :                              &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
    7941           0 :     if (rd != INT64_MAX)
    7942           0 :       rd = RDCOST(x->rdmult, x->rddiv, rate_mv + rmode + rate_sum, dist_sum);
    7943           0 :     best_interintra_rd = rd;
    7944             : 
    7945           0 :     if (ref_best_rd < INT64_MAX && best_interintra_rd > 2 * ref_best_rd) {
    7946             :       // Don't need to call restore_dst_buf here
    7947           0 :       return INT64_MAX;
    7948             :     }
    7949             : #if CONFIG_WEDGE
    7950           0 :     if (is_interintra_wedge_used(bsize)) {
    7951           0 :       int64_t best_interintra_rd_nowedge = INT64_MAX;
    7952           0 :       int64_t best_interintra_rd_wedge = INT64_MAX;
    7953             :       int_mv tmp_mv;
    7954           0 :       int rwedge = av1_cost_bit(cm->fc->wedge_interintra_prob[bsize], 0);
    7955           0 :       if (rd != INT64_MAX)
    7956           0 :         rd = RDCOST(x->rdmult, x->rddiv, rmode + rate_mv + rwedge + rate_sum,
    7957             :                     dist_sum);
    7958           0 :       best_interintra_rd_nowedge = best_interintra_rd;
    7959             : 
    7960             :       // Disable wedge search if source variance is small
    7961           0 :       if (x->source_variance > cpi->sf.disable_wedge_search_var_thresh) {
    7962           0 :         mbmi->use_wedge_interintra = 1;
    7963             : 
    7964           0 :         rwedge = av1_cost_literal(get_interintra_wedge_bits(bsize)) +
    7965           0 :                  av1_cost_bit(cm->fc->wedge_interintra_prob[bsize], 1);
    7966             : 
    7967           0 :         best_interintra_rd_wedge =
    7968           0 :             pick_interintra_wedge(cpi, x, bsize, intrapred_, tmp_buf_);
    7969             : 
    7970           0 :         best_interintra_rd_wedge +=
    7971           0 :             RDCOST(x->rdmult, x->rddiv, rmode + rate_mv + rwedge, 0);
    7972             :         // Refine motion vector.
    7973           0 :         if (have_newmv_in_inter_mode(this_mode)) {
    7974             :           // get negative of mask
    7975           0 :           const uint8_t *mask = av1_get_contiguous_soft_mask(
    7976             :               mbmi->interintra_wedge_index, 1, bsize);
    7977           0 :           tmp_mv.as_int = x->mbmi_ext->ref_mvs[refs[0]][0].as_int;
    7978           0 :           compound_single_motion_search(cpi, x, bsize, &tmp_mv.as_mv, mi_row,
    7979             :                                         mi_col, intrapred, mask, bw,
    7980             :                                         &tmp_rate_mv, 0, 0);
    7981           0 :           mbmi->mv[0].as_int = tmp_mv.as_int;
    7982           0 :           av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, &orig_dst,
    7983             :                                          bsize);
    7984           0 :           model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
    7985             :                           &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
    7986           0 :           rd = RDCOST(x->rdmult, x->rddiv,
    7987             :                       rmode + tmp_rate_mv + rwedge + rate_sum, dist_sum);
    7988           0 :           if (rd >= best_interintra_rd_wedge) {
    7989           0 :             tmp_mv.as_int = cur_mv[0].as_int;
    7990           0 :             tmp_rate_mv = rate_mv;
    7991             :           }
    7992             :         } else {
    7993           0 :           tmp_mv.as_int = cur_mv[0].as_int;
    7994           0 :           tmp_rate_mv = rate_mv;
    7995           0 :           av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw);
    7996             :         }
    7997             :         // Evaluate closer to true rd
    7998           0 :         av1_subtract_plane(x, bsize, 0);
    7999           0 :         rd =
    8000           0 :             estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
    8001             :                                 &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
    8002           0 :         if (rd != INT64_MAX)
    8003           0 :           rd = RDCOST(x->rdmult, x->rddiv,
    8004             :                       rmode + tmp_rate_mv + rwedge + rate_sum, dist_sum);
    8005           0 :         best_interintra_rd_wedge = rd;
    8006           0 :         if (best_interintra_rd_wedge < best_interintra_rd_nowedge) {
    8007           0 :           mbmi->use_wedge_interintra = 1;
    8008           0 :           mbmi->mv[0].as_int = tmp_mv.as_int;
    8009           0 :           rd_stats->rate += tmp_rate_mv - rate_mv;
    8010           0 :           rate_mv = tmp_rate_mv;
    8011             :         } else {
    8012           0 :           mbmi->use_wedge_interintra = 0;
    8013           0 :           mbmi->mv[0].as_int = cur_mv[0].as_int;
    8014             :         }
    8015             :       } else {
    8016           0 :         mbmi->use_wedge_interintra = 0;
    8017             :       }
    8018             :     }
    8019             : #endif  // CONFIG_WEDGE
    8020             : 
    8021           0 :     pred_exists = 0;
    8022           0 :     compmode_interintra_cost =
    8023           0 :         av1_cost_bit(cm->fc->interintra_prob[size_group_lookup[bsize]], 1) +
    8024           0 :         interintra_mode_cost[mbmi->interintra_mode];
    8025           0 :     if (is_interintra_wedge_used(bsize)) {
    8026           0 :       compmode_interintra_cost += av1_cost_bit(
    8027             :           cm->fc->wedge_interintra_prob[bsize], mbmi->use_wedge_interintra);
    8028           0 :       if (mbmi->use_wedge_interintra) {
    8029           0 :         compmode_interintra_cost +=
    8030           0 :             av1_cost_literal(get_interintra_wedge_bits(bsize));
    8031             :       }
    8032             :     }
    8033           0 :   } else if (is_interintra_allowed(mbmi)) {
    8034           0 :     compmode_interintra_cost =
    8035           0 :         av1_cost_bit(cm->fc->interintra_prob[size_group_lookup[bsize]], 0);
    8036             :   }
    8037             : #endif  // CONFIG_INTERINTRA
    8038             : 
    8039           0 :   if (pred_exists == 0) {
    8040             :     int tmp_rate;
    8041             :     int64_t tmp_dist;
    8042           0 :     av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, &orig_dst, bsize);
    8043           0 :     model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
    8044             :                     &tmp_dist, &skip_txfm_sb, &skip_sse_sb);
    8045           0 :     rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
    8046             :   }
    8047             : #endif  // CONFIG_EXT_INTER
    8048             : 
    8049           0 :   if (!is_comp_pred)
    8050             : #if CONFIG_DUAL_FILTER
    8051           0 :     args->single_filter[this_mode][refs[0]] = mbmi->interp_filter[0];
    8052             : #else
    8053             :     args->single_filter[this_mode][refs[0]] = mbmi->interp_filter;
    8054             : #endif  // CONFIG_DUAL_FILTER
    8055             : 
    8056             : #if CONFIG_EXT_INTER
    8057           0 :   if (args->modelled_rd != NULL) {
    8058           0 :     if (is_comp_pred) {
    8059           0 :       const int mode0 = compound_ref0_mode(this_mode);
    8060           0 :       const int mode1 = compound_ref1_mode(this_mode);
    8061           0 :       const int64_t mrd = AOMMIN(args->modelled_rd[mode0][refs[0]],
    8062             :                                  args->modelled_rd[mode1][refs[1]]);
    8063           0 :       if (rd / 4 * 3 > mrd && ref_best_rd < INT64_MAX) {
    8064           0 :         restore_dst_buf(xd, orig_dst);
    8065           0 :         return INT64_MAX;
    8066             :       }
    8067           0 :     } else if (!is_comp_interintra_pred) {
    8068           0 :       args->modelled_rd[this_mode][refs[0]] = rd;
    8069             :     }
    8070             :   }
    8071             : #endif  // CONFIG_EXT_INTER
    8072             : 
    8073           0 :   if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
    8074             :     // if current pred_error modeled rd is substantially more than the best
    8075             :     // so far, do not bother doing full rd
    8076           0 :     if (rd / 2 > ref_best_rd) {
    8077           0 :       restore_dst_buf(xd, orig_dst);
    8078           0 :       return INT64_MAX;
    8079             :     }
    8080             :   }
    8081             : 
    8082             : #if CONFIG_EXT_INTER
    8083             : #if CONFIG_INTERINTRA
    8084           0 :   rd_stats->rate += compmode_interintra_cost;
    8085             : #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
    8086           0 :   rate2_bmc_nocoeff += compmode_interintra_cost;
    8087             : #endif
    8088             : #endif
    8089             : #if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT
    8090           0 :   rd_stats->rate += compmode_interinter_cost;
    8091             : #endif
    8092             : #endif
    8093             : 
    8094           0 :   ret_val = motion_mode_rd(cpi, x, bsize, rd_stats, rd_stats_y, rd_stats_uv,
    8095             :                            disable_skip, mode_mv, mi_row, mi_col, args,
    8096             :                            ref_best_rd, refs, rate_mv,
    8097             : #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
    8098             :                            single_newmv,
    8099             : #if CONFIG_EXT_INTER
    8100             :                            rate2_bmc_nocoeff, &best_bmc_mbmi,
    8101             : #if CONFIG_MOTION_VAR
    8102             :                            rate_mv_bmc,
    8103             : #endif  // CONFIG_MOTION_VAR
    8104             : #endif  // CONFIG_EXT_INTER
    8105             : #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
    8106             :                            rs, &skip_txfm_sb, &skip_sse_sb, &orig_dst);
    8107           0 :   if (ret_val != 0) return ret_val;
    8108             : 
    8109           0 :   return 0;  // The rate-distortion cost will be re-calculated by caller.
    8110             : }
    8111             : 
    8112             : #if CONFIG_INTRABC
    8113             : static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
    8114             :                                        RD_STATS *rd_cost, BLOCK_SIZE bsize,
    8115             :                                        int64_t best_rd) {
    8116             :   const AV1_COMMON *const cm = &cpi->common;
    8117             :   if (bsize < BLOCK_8X8 || !cm->allow_screen_content_tools) return INT64_MAX;
    8118             : 
    8119             :   MACROBLOCKD *const xd = &x->e_mbd;
    8120             :   const TileInfo *tile = &xd->tile;
    8121             : #if CONFIG_EC_ADAPT
    8122             :   FRAME_CONTEXT *const ec_ctx = xd->tile_ctx;
    8123             : #else
    8124             :   FRAME_CONTEXT *const ec_ctx = cm->fc;
    8125             : #endif  // CONFIG_EC_ADAPT
    8126             :   MODE_INFO *const mi = xd->mi[0];
    8127             :   const int mi_row = -xd->mb_to_top_edge / (8 * MI_SIZE);
    8128             :   const int mi_col = -xd->mb_to_left_edge / (8 * MI_SIZE);
    8129             :   const int w = block_size_wide[bsize];
    8130             :   const int h = block_size_high[bsize];
    8131             :   const int sb_row = mi_row / MAX_MIB_SIZE;
    8132             :   const int sb_col = mi_col / MAX_MIB_SIZE;
    8133             : 
    8134             :   MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
    8135             :   MV_REFERENCE_FRAME ref_frame = INTRA_FRAME;
    8136             :   int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
    8137             :   av1_find_mv_refs(cm, xd, mi, ref_frame, &mbmi_ext->ref_mv_count[ref_frame],
    8138             :                    mbmi_ext->ref_mv_stack[ref_frame],
    8139             : #if CONFIG_EXT_INTER
    8140             :                    mbmi_ext->compound_mode_context,
    8141             : #endif  // CONFIG_EXT_INTER
    8142             :                    candidates, mi_row, mi_col, NULL, NULL,
    8143             :                    mbmi_ext->mode_context);
    8144             : 
    8145             :   int_mv nearestmv, nearmv;
    8146             :   av1_find_best_ref_mvs(0, candidates, &nearestmv, &nearmv);
    8147             : 
    8148             :   int_mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv;
    8149             :   if (dv_ref.as_int == 0) av1_find_ref_dv(&dv_ref, mi_row, mi_col);
    8150             :   mbmi_ext->ref_mvs[INTRA_FRAME][0] = dv_ref;
    8151             : 
    8152             :   struct buf_2d yv12_mb[MAX_MB_PLANE];
    8153             :   av1_setup_pred_block(xd, yv12_mb, xd->cur_buf, mi_row, mi_col, NULL, NULL);
    8154             :   for (int i = 0; i < MAX_MB_PLANE; ++i) {
    8155             :     xd->plane[i].pre[0] = yv12_mb[i];
    8156             :   }
    8157             : 
    8158             :   enum IntrabcMotionDirection {
    8159             :     IBC_MOTION_ABOVE,
    8160             :     IBC_MOTION_LEFT,
    8161             :     IBC_MOTION_DIRECTIONS
    8162             :   };
    8163             : 
    8164             :   MB_MODE_INFO *mbmi = &mi->mbmi;
    8165             :   MB_MODE_INFO best_mbmi = *mbmi;
    8166             :   RD_STATS best_rdcost = *rd_cost;
    8167             :   int best_skip = x->skip;
    8168             : 
    8169             :   for (enum IntrabcMotionDirection dir = IBC_MOTION_ABOVE;
    8170             :        dir < IBC_MOTION_DIRECTIONS; ++dir) {
    8171             :     const MvLimits tmp_mv_limits = x->mv_limits;
    8172             :     switch (dir) {
    8173             :       case IBC_MOTION_ABOVE:
    8174             :         x->mv_limits.col_min = (tile->mi_col_start - mi_col) * MI_SIZE;
    8175             :         x->mv_limits.col_max = (tile->mi_col_end - mi_col) * MI_SIZE - w;
    8176             :         x->mv_limits.row_min = (tile->mi_row_start - mi_row) * MI_SIZE;
    8177             :         x->mv_limits.row_max = (sb_row * MAX_MIB_SIZE - mi_row) * MI_SIZE - h;
    8178             :         break;
    8179             :       case IBC_MOTION_LEFT:
    8180             :         x->mv_limits.col_min = (tile->mi_col_start - mi_col) * MI_SIZE;
    8181             :         x->mv_limits.col_max = (sb_col * MAX_MIB_SIZE - mi_col) * MI_SIZE - w;
    8182             :         // TODO(aconverse@google.com): Minimize the overlap between above and
    8183             :         // left areas.
    8184             :         x->mv_limits.row_min = (tile->mi_row_start - mi_row) * MI_SIZE;
    8185             :         int bottom_coded_mi_edge =
    8186             :             AOMMIN((sb_row + 1) * MAX_MIB_SIZE, tile->mi_row_end);
    8187             :         x->mv_limits.row_max = (bottom_coded_mi_edge - mi_row) * MI_SIZE - h;
    8188             :         break;
    8189             :       default: assert(0);
    8190             :     }
    8191             :     assert(x->mv_limits.col_min >= tmp_mv_limits.col_min);
    8192             :     assert(x->mv_limits.col_max <= tmp_mv_limits.col_max);
    8193             :     assert(x->mv_limits.row_min >= tmp_mv_limits.row_min);
    8194             :     assert(x->mv_limits.row_max <= tmp_mv_limits.row_max);
    8195             :     av1_set_mv_search_range(&x->mv_limits, &dv_ref.as_mv);
    8196             : 
    8197             :     if (x->mv_limits.col_max < x->mv_limits.col_min ||
    8198             :         x->mv_limits.row_max < x->mv_limits.row_min) {
    8199             :       x->mv_limits = tmp_mv_limits;
    8200             :       continue;
    8201             :     }
    8202             : 
    8203             :     int step_param = cpi->mv_step_param;
    8204             :     MV mvp_full = dv_ref.as_mv;
    8205             :     mvp_full.col >>= 3;
    8206             :     mvp_full.row >>= 3;
    8207             :     int sadpb = x->sadperbit16;
    8208             :     int cost_list[5];
    8209             :     int bestsme = av1_full_pixel_search(cpi, x, bsize, &mvp_full, step_param,
    8210             :                                         sadpb, cond_cost_list(cpi, cost_list),
    8211             :                                         &dv_ref.as_mv, INT_MAX, 1);
    8212             : 
    8213             :     x->mv_limits = tmp_mv_limits;
    8214             :     if (bestsme == INT_MAX) continue;
    8215             :     mvp_full = x->best_mv.as_mv;
    8216             :     MV dv = {.row = mvp_full.row * 8, .col = mvp_full.col * 8 };
    8217             :     if (mv_check_bounds(&x->mv_limits, &dv)) continue;
    8218             :     if (!is_dv_valid(dv, tile, mi_row, mi_col, bsize)) continue;
    8219             : 
    8220             : #if CONFIG_PALETTE
    8221             :     memset(&mbmi->palette_mode_info, 0, sizeof(mbmi->palette_mode_info));
    8222             : #endif
    8223             :     mbmi->use_intrabc = 1;
    8224             :     mbmi->mode = DC_PRED;
    8225             :     mbmi->uv_mode = DC_PRED;
    8226             :     mbmi->mv[0].as_mv = dv;
    8227             : #if CONFIG_DUAL_FILTER
    8228             :     for (int idx = 0; idx < 4; ++idx) mbmi->interp_filter[idx] = BILINEAR;
    8229             : #else
    8230             :     mbmi->interp_filter = BILINEAR;
    8231             : #endif
    8232             :     mbmi->skip = 0;
    8233             :     x->skip = 0;
    8234             :     av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
    8235             : 
    8236             :     int rate_mv = av1_mv_bit_cost(&dv, &dv_ref.as_mv, x->nmvjointcost,
    8237             :                                   x->mvcost, MV_COST_WEIGHT);
    8238             :     const PREDICTION_MODE A = av1_above_block_mode(mi, xd->above_mi, 0);
    8239             :     const PREDICTION_MODE L = av1_left_block_mode(mi, xd->left_mi, 0);
    8240             :     const int rate_mode = cpi->y_mode_costs[A][L][DC_PRED] +
    8241             :                           av1_cost_bit(ec_ctx->intrabc_prob, 1);
    8242             : 
    8243             :     RD_STATS rd_stats, rd_stats_uv;
    8244             :     av1_subtract_plane(x, bsize, 0);
    8245             :     super_block_yrd(cpi, x, &rd_stats, bsize, INT64_MAX);
    8246             :     super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
    8247             :     av1_merge_rd_stats(&rd_stats, &rd_stats_uv);
    8248             : #if CONFIG_RD_DEBUG
    8249             :     mbmi->rd_stats = rd_stats;
    8250             : #endif
    8251             : 
    8252             : #if CONFIG_VAR_TX
    8253             :     // TODO(aconverse@google.com): Evaluate allowing VAR TX on intrabc blocks
    8254             :     const int width = block_size_wide[bsize] >> tx_size_wide_log2[0];
    8255             :     const int height = block_size_high[bsize] >> tx_size_high_log2[0];
    8256             :     int idx, idy;
    8257             :     for (idy = 0; idy < height; ++idy)
    8258             :       for (idx = 0; idx < width; ++idx)
    8259             :         mbmi->inter_tx_size[idy >> 1][idx >> 1] = mbmi->tx_size;
    8260             :     mbmi->min_tx_size = get_min_tx_size(mbmi->tx_size);
    8261             : #endif  // CONFIG_VAR_TX
    8262             : 
    8263             :     const aom_prob skip_prob = av1_get_skip_prob(cm, xd);
    8264             : 
    8265             :     RD_STATS rdc_noskip;
    8266             :     av1_init_rd_stats(&rdc_noskip);
    8267             :     rdc_noskip.rate =
    8268             :         rate_mode + rate_mv + rd_stats.rate + av1_cost_bit(skip_prob, 0);
    8269             :     rdc_noskip.dist = rd_stats.dist;
    8270             :     rdc_noskip.rdcost =
    8271             :         RDCOST(x->rdmult, x->rddiv, rdc_noskip.rate, rdc_noskip.dist);
    8272             :     if (rdc_noskip.rdcost < best_rd) {
    8273             :       best_rd = rdc_noskip.rdcost;
    8274             :       best_mbmi = *mbmi;
    8275             :       best_skip = x->skip;
    8276             :       best_rdcost = rdc_noskip;
    8277             :     }
    8278             : 
    8279             :     x->skip = 1;
    8280             :     mbmi->skip = 1;
    8281             :     RD_STATS rdc_skip;
    8282             :     av1_init_rd_stats(&rdc_skip);
    8283             :     rdc_skip.rate = rate_mode + rate_mv + av1_cost_bit(skip_prob, 1);
    8284             :     rdc_skip.dist = rd_stats.sse;
    8285             :     rdc_skip.rdcost = RDCOST(x->rdmult, x->rddiv, rdc_skip.rate, rdc_skip.dist);
    8286             :     if (rdc_skip.rdcost < best_rd) {
    8287             :       best_rd = rdc_skip.rdcost;
    8288             :       best_mbmi = *mbmi;
    8289             :       best_skip = x->skip;
    8290             :       best_rdcost = rdc_skip;
    8291             :     }
    8292             :   }
    8293             :   *mbmi = best_mbmi;
    8294             :   *rd_cost = best_rdcost;
    8295             :   x->skip = best_skip;
    8296             :   return best_rd;
    8297             : }
    8298             : #endif  // CONFIG_INTRABC
    8299             : 
    8300           0 : void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
    8301             :                                RD_STATS *rd_cost, BLOCK_SIZE bsize,
    8302             :                                PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
    8303           0 :   const AV1_COMMON *const cm = &cpi->common;
    8304           0 :   MACROBLOCKD *const xd = &x->e_mbd;
    8305           0 :   struct macroblockd_plane *const pd = xd->plane;
    8306           0 :   int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
    8307           0 :   int y_skip = 0, uv_skip = 0;
    8308           0 :   int64_t dist_y = 0, dist_uv = 0;
    8309             :   TX_SIZE max_uv_tx_size;
    8310           0 :   const int unify_bsize = CONFIG_CB4X4;
    8311             : 
    8312           0 :   ctx->skip = 0;
    8313           0 :   xd->mi[0]->mbmi.ref_frame[0] = INTRA_FRAME;
    8314           0 :   xd->mi[0]->mbmi.ref_frame[1] = NONE_FRAME;
    8315             : #if CONFIG_INTRABC
    8316             :   xd->mi[0]->mbmi.use_intrabc = 0;
    8317             :   xd->mi[0]->mbmi.mv[0].as_int = 0;
    8318             : #endif  // CONFIG_INTRABC
    8319             : 
    8320           0 :   const int64_t intra_yrd =
    8321           0 :       (bsize >= BLOCK_8X8 || unify_bsize)
    8322           0 :           ? rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y,
    8323             :                                    &y_skip, bsize, best_rd)
    8324           0 :           : rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
    8325             :                                          &dist_y, &y_skip, best_rd);
    8326             : 
    8327           0 :   if (intra_yrd < best_rd) {
    8328           0 :     max_uv_tx_size = uv_txsize_lookup[bsize][xd->mi[0]->mbmi.tx_size]
    8329           0 :                                      [pd[1].subsampling_x][pd[1].subsampling_y];
    8330           0 :     init_sbuv_mode(&xd->mi[0]->mbmi);
    8331             : #if CONFIG_CB4X4
    8332           0 :     if (!x->skip_chroma_rd)
    8333           0 :       rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, &dist_uv,
    8334             :                               &uv_skip, bsize, max_uv_tx_size);
    8335             : #else
    8336             :     rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, &dist_uv,
    8337             :                             &uv_skip, AOMMAX(BLOCK_8X8, bsize), max_uv_tx_size);
    8338             : #endif  // CONFIG_CB4X4
    8339             : 
    8340           0 :     if (y_skip && uv_skip) {
    8341           0 :       rd_cost->rate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
    8342           0 :                       av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
    8343           0 :       rd_cost->dist = dist_y + dist_uv;
    8344             :     } else {
    8345           0 :       rd_cost->rate =
    8346           0 :           rate_y + rate_uv + av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
    8347           0 :       rd_cost->dist = dist_y + dist_uv;
    8348             :     }
    8349           0 :     rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
    8350             : #if CONFIG_DAALA_DIST && CONFIG_CB4X4
    8351             :     rd_cost->dist_y = dist_y;
    8352             : #endif
    8353             :   } else {
    8354           0 :     rd_cost->rate = INT_MAX;
    8355             :   }
    8356             : 
    8357             : #if CONFIG_INTRABC
    8358             :   if (rd_cost->rate != INT_MAX && rd_cost->rdcost < best_rd)
    8359             :     best_rd = rd_cost->rdcost;
    8360             :   if (rd_pick_intrabc_mode_sb(cpi, x, rd_cost, bsize, best_rd) < best_rd) {
    8361             :     ctx->skip = x->skip;  // FIXME where is the proper place to set this?!
    8362             :     assert(rd_cost->rate != INT_MAX);
    8363             :     rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
    8364             :   }
    8365             : #endif
    8366           0 :   if (rd_cost->rate == INT_MAX) return;
    8367             : 
    8368           0 :   ctx->mic = *xd->mi[0];
    8369           0 :   ctx->mbmi_ext = *x->mbmi_ext;
    8370             : }
    8371             : 
    8372             : // Do we have an internal image edge (e.g. formatting bars).
    8373           0 : int av1_internal_image_edge(const AV1_COMP *cpi) {
    8374           0 :   return (cpi->oxcf.pass == 2) &&
    8375           0 :          ((cpi->twopass.this_frame_stats.inactive_zone_rows > 0) ||
    8376           0 :           (cpi->twopass.this_frame_stats.inactive_zone_cols > 0));
    8377             : }
    8378             : 
    8379             : // Checks to see if a super block is on a horizontal image edge.
    8380             : // In most cases this is the "real" edge unless there are formatting
    8381             : // bars embedded in the stream.
    8382           0 : int av1_active_h_edge(const AV1_COMP *cpi, int mi_row, int mi_step) {
    8383           0 :   int top_edge = 0;
    8384           0 :   int bottom_edge = cpi->common.mi_rows;
    8385           0 :   int is_active_h_edge = 0;
    8386             : 
    8387             :   // For two pass account for any formatting bars detected.
    8388           0 :   if (cpi->oxcf.pass == 2) {
    8389           0 :     const TWO_PASS *const twopass = &cpi->twopass;
    8390             : 
    8391             :     // The inactive region is specified in MBs not mi units.
    8392             :     // The image edge is in the following MB row.
    8393           0 :     top_edge += (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
    8394             : 
    8395           0 :     bottom_edge -= (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
    8396           0 :     bottom_edge = AOMMAX(top_edge, bottom_edge);
    8397             :   }
    8398             : 
    8399           0 :   if (((top_edge >= mi_row) && (top_edge < (mi_row + mi_step))) ||
    8400           0 :       ((bottom_edge >= mi_row) && (bottom_edge < (mi_row + mi_step)))) {
    8401           0 :     is_active_h_edge = 1;
    8402             :   }
    8403           0 :   return is_active_h_edge;
    8404             : }
    8405             : 
    8406             : // Checks to see if a super block is on a vertical image edge.
    8407             : // In most cases this is the "real" edge unless there are formatting
    8408             : // bars embedded in the stream.
    8409           0 : int av1_active_v_edge(const AV1_COMP *cpi, int mi_col, int mi_step) {
    8410           0 :   int left_edge = 0;
    8411           0 :   int right_edge = cpi->common.mi_cols;
    8412           0 :   int is_active_v_edge = 0;
    8413             : 
    8414             :   // For two pass account for any formatting bars detected.
    8415           0 :   if (cpi->oxcf.pass == 2) {
    8416           0 :     const TWO_PASS *const twopass = &cpi->twopass;
    8417             : 
    8418             :     // The inactive region is specified in MBs not mi units.
    8419             :     // The image edge is in the following MB row.
    8420           0 :     left_edge += (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
    8421             : 
    8422           0 :     right_edge -= (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
    8423           0 :     right_edge = AOMMAX(left_edge, right_edge);
    8424             :   }
    8425             : 
    8426           0 :   if (((left_edge >= mi_col) && (left_edge < (mi_col + mi_step))) ||
    8427           0 :       ((right_edge >= mi_col) && (right_edge < (mi_col + mi_step)))) {
    8428           0 :     is_active_v_edge = 1;
    8429             :   }
    8430           0 :   return is_active_v_edge;
    8431             : }
    8432             : 
    8433             : // Checks to see if a super block is at the edge of the active image.
    8434             : // In most cases this is the "real" edge unless there are formatting
    8435             : // bars embedded in the stream.
    8436           0 : int av1_active_edge_sb(const AV1_COMP *cpi, int mi_row, int mi_col) {
    8437           0 :   return av1_active_h_edge(cpi, mi_row, cpi->common.mib_size) ||
    8438           0 :          av1_active_v_edge(cpi, mi_col, cpi->common.mib_size);
    8439             : }
    8440             : 
    8441             : #if CONFIG_PALETTE
    8442           0 : static void restore_uv_color_map(const AV1_COMP *const cpi, MACROBLOCK *x) {
    8443           0 :   MACROBLOCKD *const xd = &x->e_mbd;
    8444           0 :   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
    8445           0 :   PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
    8446           0 :   const BLOCK_SIZE bsize = mbmi->sb_type;
    8447           0 :   int src_stride = x->plane[1].src.stride;
    8448           0 :   const uint8_t *const src_u = x->plane[1].src.buf;
    8449           0 :   const uint8_t *const src_v = x->plane[2].src.buf;
    8450           0 :   float *const data = x->palette_buffer->kmeans_data_buf;
    8451             :   float centroids[2 * PALETTE_MAX_SIZE];
    8452           0 :   uint8_t *const color_map = xd->plane[1].color_index_map;
    8453             :   int r, c;
    8454             : #if CONFIG_HIGHBITDEPTH
    8455           0 :   const uint16_t *const src_u16 = CONVERT_TO_SHORTPTR(src_u);
    8456           0 :   const uint16_t *const src_v16 = CONVERT_TO_SHORTPTR(src_v);
    8457             : #endif  // CONFIG_HIGHBITDEPTH
    8458             :   int plane_block_width, plane_block_height, rows, cols;
    8459           0 :   av1_get_block_dimensions(bsize, 1, xd, &plane_block_width,
    8460             :                            &plane_block_height, &rows, &cols);
    8461             :   (void)cpi;
    8462             : 
    8463           0 :   for (r = 0; r < rows; ++r) {
    8464           0 :     for (c = 0; c < cols; ++c) {
    8465             : #if CONFIG_HIGHBITDEPTH
    8466           0 :       if (cpi->common.use_highbitdepth) {
    8467           0 :         data[(r * cols + c) * 2] = src_u16[r * src_stride + c];
    8468           0 :         data[(r * cols + c) * 2 + 1] = src_v16[r * src_stride + c];
    8469             :       } else {
    8470             : #endif  // CONFIG_HIGHBITDEPTH
    8471           0 :         data[(r * cols + c) * 2] = src_u[r * src_stride + c];
    8472           0 :         data[(r * cols + c) * 2 + 1] = src_v[r * src_stride + c];
    8473             : #if CONFIG_HIGHBITDEPTH
    8474             :       }
    8475             : #endif  // CONFIG_HIGHBITDEPTH
    8476             :     }
    8477             :   }
    8478             : 
    8479           0 :   for (r = 1; r < 3; ++r) {
    8480           0 :     for (c = 0; c < pmi->palette_size[1]; ++c) {
    8481           0 :       centroids[c * 2 + r - 1] = pmi->palette_colors[r * PALETTE_MAX_SIZE + c];
    8482             :     }
    8483             :   }
    8484             : 
    8485           0 :   av1_calc_indices(data, centroids, color_map, rows * cols,
    8486           0 :                    pmi->palette_size[1], 2);
    8487           0 :   extend_palette_color_map(color_map, cols, rows, plane_block_width,
    8488             :                            plane_block_height);
    8489           0 : }
    8490             : #endif  // CONFIG_PALETTE
    8491             : 
    8492             : #if CONFIG_FILTER_INTRA
    8493             : static void pick_filter_intra_interframe(
    8494             :     const AV1_COMP *cpi, MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
    8495             :     BLOCK_SIZE bsize, int mi_row, int mi_col, int *rate_uv_intra,
    8496             :     int *rate_uv_tokenonly, int64_t *dist_uv, int *skip_uv,
    8497             :     PREDICTION_MODE *mode_uv, FILTER_INTRA_MODE_INFO *filter_intra_mode_info_uv,
    8498             : #if CONFIG_EXT_INTRA
    8499             :     int8_t *uv_angle_delta,
    8500             : #endif  // CONFIG_EXT_INTRA
    8501             : #if CONFIG_PALETTE
    8502             :     PALETTE_MODE_INFO *pmi_uv, int palette_ctx,
    8503             : #endif  // CONFIG_PALETTE
    8504             :     int skip_mask, unsigned int *ref_costs_single, int64_t *best_rd,
    8505             :     int64_t *best_intra_rd, PREDICTION_MODE *best_intra_mode,
    8506             :     int *best_mode_index, int *best_skip2, int *best_mode_skippable,
    8507             : #if CONFIG_SUPERTX
    8508             :     int *returnrate_nocoef,
    8509             : #endif  // CONFIG_SUPERTX
    8510             :     int64_t *best_pred_rd, MB_MODE_INFO *best_mbmode, RD_STATS *rd_cost) {
    8511             :   const AV1_COMMON *const cm = &cpi->common;
    8512             :   MACROBLOCKD *const xd = &x->e_mbd;
    8513             :   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
    8514             : #if CONFIG_PALETTE
    8515             :   PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
    8516             : #endif  // CONFIG_PALETTE
    8517             :   int rate2 = 0, rate_y = INT_MAX, skippable = 0, rate_uv, rate_dummy, i;
    8518             :   int dc_mode_index;
    8519             :   const int *const intra_mode_cost = cpi->mbmode_cost[size_group_lookup[bsize]];
    8520             :   int64_t distortion2 = 0, distortion_y = 0, this_rd = *best_rd;
    8521             :   int64_t distortion_uv, model_rd = INT64_MAX;
    8522             :   TX_SIZE uv_tx;
    8523             : 
    8524             :   for (i = 0; i < MAX_MODES; ++i)
    8525             :     if (av1_mode_order[i].mode == DC_PRED &&
    8526             :         av1_mode_order[i].ref_frame[0] == INTRA_FRAME)
    8527             :       break;
    8528             :   dc_mode_index = i;
    8529             :   assert(i < MAX_MODES);
    8530             : 
    8531             :   // TODO(huisu): use skip_mask for further speedup.
    8532             :   (void)skip_mask;
    8533             :   mbmi->mode = DC_PRED;
    8534             :   mbmi->uv_mode = DC_PRED;
    8535             :   mbmi->ref_frame[0] = INTRA_FRAME;
    8536             :   mbmi->ref_frame[1] = NONE_FRAME;
    8537             :   if (!rd_pick_filter_intra_sby(cpi, x, &rate_dummy, &rate_y, &distortion_y,
    8538             :                                 &skippable, bsize, intra_mode_cost[mbmi->mode],
    8539             :                                 &this_rd, &model_rd, 0)) {
    8540             :     return;
    8541             :   }
    8542             :   if (rate_y == INT_MAX) return;
    8543             : 
    8544             :   uv_tx = uv_txsize_lookup[bsize][mbmi->tx_size][xd->plane[1].subsampling_x]
    8545             :                           [xd->plane[1].subsampling_y];
    8546             :   if (rate_uv_intra[uv_tx] == INT_MAX) {
    8547             :     choose_intra_uv_mode(cpi, x, ctx, bsize, uv_tx, &rate_uv_intra[uv_tx],
    8548             :                          &rate_uv_tokenonly[uv_tx], &dist_uv[uv_tx],
    8549             :                          &skip_uv[uv_tx], &mode_uv[uv_tx]);
    8550             : #if CONFIG_PALETTE
    8551             :     if (cm->allow_screen_content_tools) pmi_uv[uv_tx] = *pmi;
    8552             : #endif  // CONFIG_PALETTE
    8553             :     filter_intra_mode_info_uv[uv_tx] = mbmi->filter_intra_mode_info;
    8554             : #if CONFIG_EXT_INTRA
    8555             :     uv_angle_delta[uv_tx] = mbmi->angle_delta[1];
    8556             : #endif  // CONFIG_EXT_INTRA
    8557             :   }
    8558             : 
    8559             :   rate_uv = rate_uv_tokenonly[uv_tx];
    8560             :   distortion_uv = dist_uv[uv_tx];
    8561             :   skippable = skippable && skip_uv[uv_tx];
    8562             :   mbmi->uv_mode = mode_uv[uv_tx];
    8563             : #if CONFIG_PALETTE
    8564             :   if (cm->allow_screen_content_tools) {
    8565             :     pmi->palette_size[1] = pmi_uv[uv_tx].palette_size[1];
    8566             :     memcpy(pmi->palette_colors + PALETTE_MAX_SIZE,
    8567             :            pmi_uv[uv_tx].palette_colors + PALETTE_MAX_SIZE,
    8568             :            2 * PALETTE_MAX_SIZE * sizeof(pmi->palette_colors[0]));
    8569             :   }
    8570             : #endif  // CONFIG_PALETTE
    8571             : #if CONFIG_EXT_INTRA
    8572             :   mbmi->angle_delta[1] = uv_angle_delta[uv_tx];
    8573             : #endif  // CONFIG_EXT_INTRA
    8574             :   mbmi->filter_intra_mode_info.use_filter_intra_mode[1] =
    8575             :       filter_intra_mode_info_uv[uv_tx].use_filter_intra_mode[1];
    8576             :   if (filter_intra_mode_info_uv[uv_tx].use_filter_intra_mode[1]) {
    8577             :     mbmi->filter_intra_mode_info.filter_intra_mode[1] =
    8578             :         filter_intra_mode_info_uv[uv_tx].filter_intra_mode[1];
    8579             :   }
    8580             : 
    8581             :   rate2 = rate_y + intra_mode_cost[mbmi->mode] + rate_uv +
    8582             :           cpi->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode];
    8583             : #if CONFIG_PALETTE
    8584             :   if (cpi->common.allow_screen_content_tools && mbmi->mode == DC_PRED &&
    8585             :       bsize >= BLOCK_8X8)
    8586             :     rate2 += av1_cost_bit(
    8587             :         av1_default_palette_y_mode_prob[bsize - BLOCK_8X8][palette_ctx], 0);
    8588             : #endif  // CONFIG_PALETTE
    8589             : 
    8590             :   if (!xd->lossless[mbmi->segment_id]) {
    8591             :     // super_block_yrd above includes the cost of the tx_size in the
    8592             :     // tokenonly rate, but for intra blocks, tx_size is always coded
    8593             :     // (prediction granularity), so we account for it in the full rate,
    8594             :     // not the tokenonly rate.
    8595             :     rate_y -= tx_size_cost(cpi, x, bsize, mbmi->tx_size);
    8596             :   }
    8597             : 
    8598             :   rate2 += av1_cost_bit(cm->fc->filter_intra_probs[0],
    8599             :                         mbmi->filter_intra_mode_info.use_filter_intra_mode[0]);
    8600             :   rate2 += write_uniform_cost(
    8601             :       FILTER_INTRA_MODES, mbmi->filter_intra_mode_info.filter_intra_mode[0]);
    8602             : #if CONFIG_EXT_INTRA
    8603             :   if (av1_is_directional_mode(mbmi->uv_mode, bsize)) {
    8604             :     rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
    8605             :                                 MAX_ANGLE_DELTA + mbmi->angle_delta[1]);
    8606             :   }
    8607             : #endif  // CONFIG_EXT_INTRA
    8608             :   if (mbmi->mode == DC_PRED) {
    8609             :     rate2 +=
    8610             :         av1_cost_bit(cpi->common.fc->filter_intra_probs[1],
    8611             :                      mbmi->filter_intra_mode_info.use_filter_intra_mode[1]);
    8612             :     if (mbmi->filter_intra_mode_info.use_filter_intra_mode[1])
    8613             :       rate2 +=
    8614             :           write_uniform_cost(FILTER_INTRA_MODES,
    8615             :                              mbmi->filter_intra_mode_info.filter_intra_mode[1]);
    8616             :   }
    8617             :   distortion2 = distortion_y + distortion_uv;
    8618             :   av1_encode_intra_block_plane((AV1_COMMON *)cm, x, bsize, 0, 0, mi_row,
    8619             :                                mi_col);
    8620             : 
    8621             :   rate2 += ref_costs_single[INTRA_FRAME];
    8622             : 
    8623             :   if (skippable) {
    8624             :     rate2 -= (rate_y + rate_uv);
    8625             :     rate_y = 0;
    8626             :     rate_uv = 0;
    8627             :     rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
    8628             :   } else {
    8629             :     rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
    8630             :   }
    8631             :   this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
    8632             : 
    8633             :   if (this_rd < *best_intra_rd) {
    8634             :     *best_intra_rd = this_rd;
    8635             :     *best_intra_mode = mbmi->mode;
    8636             :   }
    8637             :   for (i = 0; i < REFERENCE_MODES; ++i)
    8638             :     best_pred_rd[i] = AOMMIN(best_pred_rd[i], this_rd);
    8639             : 
    8640             :   if (this_rd < *best_rd) {
    8641             :     *best_mode_index = dc_mode_index;
    8642             :     mbmi->mv[0].as_int = 0;
    8643             :     rd_cost->rate = rate2;
    8644             : #if CONFIG_SUPERTX
    8645             :     if (x->skip)
    8646             :       *returnrate_nocoef = rate2;
    8647             :     else
    8648             :       *returnrate_nocoef = rate2 - rate_y - rate_uv;
    8649             :     *returnrate_nocoef -= av1_cost_bit(av1_get_skip_prob(cm, xd), skippable);
    8650             :     *returnrate_nocoef -= av1_cost_bit(av1_get_intra_inter_prob(cm, xd),
    8651             :                                        mbmi->ref_frame[0] != INTRA_FRAME);
    8652             : #endif  // CONFIG_SUPERTX
    8653             :     rd_cost->dist = distortion2;
    8654             :     rd_cost->rdcost = this_rd;
    8655             :     *best_rd = this_rd;
    8656             :     *best_mbmode = *mbmi;
    8657             :     *best_skip2 = 0;
    8658             :     *best_mode_skippable = skippable;
    8659             :   }
    8660             : }
    8661             : #endif  // CONFIG_FILTER_INTRA
    8662             : 
    8663             : #if CONFIG_MOTION_VAR
    8664             : static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x,
    8665             :                                       const MACROBLOCKD *xd, int mi_row,
    8666             :                                       int mi_col, const uint8_t *above,
    8667             :                                       int above_stride, const uint8_t *left,
    8668             :                                       int left_stride);
    8669             : #endif  // CONFIG_MOTION_VAR
    8670             : 
    8671           0 : void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
    8672             :                                MACROBLOCK *x, int mi_row, int mi_col,
    8673             :                                RD_STATS *rd_cost,
    8674             : #if CONFIG_SUPERTX
    8675             :                                int *returnrate_nocoef,
    8676             : #endif  // CONFIG_SUPERTX
    8677             :                                BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
    8678             :                                int64_t best_rd_so_far) {
    8679           0 :   const AV1_COMMON *const cm = &cpi->common;
    8680           0 :   const RD_OPT *const rd_opt = &cpi->rd;
    8681           0 :   const SPEED_FEATURES *const sf = &cpi->sf;
    8682           0 :   MACROBLOCKD *const xd = &x->e_mbd;
    8683           0 :   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
    8684             : #if CONFIG_PALETTE
    8685           0 :   const int try_palette =
    8686           0 :       cpi->common.allow_screen_content_tools && bsize >= BLOCK_8X8;
    8687           0 :   PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
    8688             : #endif  // CONFIG_PALETTE
    8689           0 :   MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
    8690           0 :   const struct segmentation *const seg = &cm->seg;
    8691             :   PREDICTION_MODE this_mode;
    8692             :   MV_REFERENCE_FRAME ref_frame, second_ref_frame;
    8693           0 :   unsigned char segment_id = mbmi->segment_id;
    8694             :   int comp_pred, i, k;
    8695             :   int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
    8696             :   struct buf_2d yv12_mb[TOTAL_REFS_PER_FRAME][MAX_MB_PLANE];
    8697           0 :   int_mv single_newmv[TOTAL_REFS_PER_FRAME] = { { 0 } };
    8698             : #if CONFIG_EXT_INTER
    8699           0 :   int single_newmv_rate[TOTAL_REFS_PER_FRAME] = { 0 };
    8700             :   int64_t modelled_rd[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
    8701             : #endif  // CONFIG_EXT_INTER
    8702             :   static const int flag_list[TOTAL_REFS_PER_FRAME] = {
    8703             :     0,
    8704             :     AOM_LAST_FLAG,
    8705             : #if CONFIG_EXT_REFS
    8706             :     AOM_LAST2_FLAG,
    8707             :     AOM_LAST3_FLAG,
    8708             : #endif  // CONFIG_EXT_REFS
    8709             :     AOM_GOLD_FLAG,
    8710             : #if CONFIG_EXT_REFS
    8711             :     AOM_BWD_FLAG,
    8712             : #endif  // CONFIG_EXT_REFS
    8713             :     AOM_ALT_FLAG
    8714             :   };
    8715           0 :   int64_t best_rd = best_rd_so_far;
    8716           0 :   int best_rate_y = INT_MAX, best_rate_uv = INT_MAX;
    8717             :   int64_t best_pred_diff[REFERENCE_MODES];
    8718             :   int64_t best_pred_rd[REFERENCE_MODES];
    8719             :   MB_MODE_INFO best_mbmode;
    8720           0 :   int rate_skip0 = av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
    8721           0 :   int rate_skip1 = av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
    8722           0 :   int best_mode_skippable = 0;
    8723           0 :   int midx, best_mode_index = -1;
    8724             :   unsigned int ref_costs_single[TOTAL_REFS_PER_FRAME];
    8725             :   unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME];
    8726             :   aom_prob comp_mode_p;
    8727           0 :   int64_t best_intra_rd = INT64_MAX;
    8728           0 :   unsigned int best_pred_sse = UINT_MAX;
    8729           0 :   PREDICTION_MODE best_intra_mode = DC_PRED;
    8730             :   int rate_uv_intra[TX_SIZES_ALL], rate_uv_tokenonly[TX_SIZES_ALL];
    8731             :   int64_t dist_uvs[TX_SIZES_ALL];
    8732             :   int skip_uvs[TX_SIZES_ALL];
    8733             :   PREDICTION_MODE mode_uv[TX_SIZES_ALL];
    8734             : #if CONFIG_PALETTE
    8735             :   PALETTE_MODE_INFO pmi_uv[TX_SIZES_ALL];
    8736             : #endif  // CONFIG_PALETTE
    8737             : #if CONFIG_EXT_INTRA
    8738             :   int8_t uv_angle_delta[TX_SIZES_ALL];
    8739           0 :   int is_directional_mode, angle_stats_ready = 0;
    8740             :   uint8_t directional_mode_skip_mask[INTRA_MODES];
    8741             : #endif  // CONFIG_EXT_INTRA
    8742             : #if CONFIG_FILTER_INTRA
    8743             :   int8_t dc_skipped = 1;
    8744             :   FILTER_INTRA_MODE_INFO filter_intra_mode_info_uv[TX_SIZES_ALL];
    8745             : #endif  // CONFIG_FILTER_INTRA
    8746           0 :   const int intra_cost_penalty = av1_get_intra_cost_penalty(
    8747             :       cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
    8748           0 :   const int *const intra_mode_cost = cpi->mbmode_cost[size_group_lookup[bsize]];
    8749           0 :   int best_skip2 = 0;
    8750           0 :   uint8_t ref_frame_skip_mask[2] = { 0 };
    8751           0 :   uint32_t mode_skip_mask[TOTAL_REFS_PER_FRAME] = { 0 };
    8752             : #if CONFIG_EXT_INTER && CONFIG_INTERINTRA
    8753           0 :   MV_REFERENCE_FRAME best_single_inter_ref = LAST_FRAME;
    8754           0 :   int64_t best_single_inter_rd = INT64_MAX;
    8755             : #endif  // CONFIG_EXT_INTER && CONFIG_INTERINTRA
    8756           0 :   int mode_skip_start = sf->mode_skip_start + 1;
    8757           0 :   const int *const rd_threshes = rd_opt->threshes[segment_id][bsize];
    8758           0 :   const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize];
    8759             :   int64_t mode_threshold[MAX_MODES];
    8760           0 :   int *mode_map = tile_data->mode_map[bsize];
    8761           0 :   const int mode_search_skip_flags = sf->mode_search_skip_flags;
    8762             : #if CONFIG_PVQ
    8763             :   od_rollback_buffer pre_buf;
    8764             : #endif  // CONFIG_PVQ
    8765             : 
    8766           0 :   HandleInterModeArgs args = {
    8767             : #if CONFIG_MOTION_VAR
    8768             :     { NULL },
    8769             :     { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE },
    8770             :     { NULL },
    8771             :     { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE },
    8772             : #endif  // CONFIG_MOTION_VAR
    8773             : #if CONFIG_EXT_INTER
    8774             :     NULL,
    8775             :     NULL,
    8776             :     NULL,
    8777             : #else   // CONFIG_EXT_INTER
    8778             :     NULL,
    8779             : #endif  // CONFIG_EXT_INTER
    8780             :     { { 0 } },
    8781             :   };
    8782             : 
    8783             : #if CONFIG_PALETTE || CONFIG_EXT_INTRA
    8784           0 :   const int rows = block_size_high[bsize];
    8785           0 :   const int cols = block_size_wide[bsize];
    8786             : #endif  // CONFIG_PALETTE || CONFIG_EXT_INTRA
    8787             : #if CONFIG_PALETTE
    8788           0 :   int palette_ctx = 0;
    8789           0 :   const MODE_INFO *above_mi = xd->above_mi;
    8790           0 :   const MODE_INFO *left_mi = xd->left_mi;
    8791             : #endif  // CONFIG_PALETTE
    8792             : #if CONFIG_MOTION_VAR
    8793           0 :   int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
    8794           0 :   int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
    8795           0 :   int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
    8796           0 :   int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
    8797             : 
    8798             : #if CONFIG_HIGHBITDEPTH
    8799           0 :   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    8800           0 :     int len = sizeof(uint16_t);
    8801           0 :     args.above_pred_buf[0] = CONVERT_TO_BYTEPTR(x->above_pred_buf);
    8802           0 :     args.above_pred_buf[1] =
    8803           0 :         CONVERT_TO_BYTEPTR(x->above_pred_buf + MAX_SB_SQUARE * len);
    8804           0 :     args.above_pred_buf[2] =
    8805           0 :         CONVERT_TO_BYTEPTR(x->above_pred_buf + 2 * MAX_SB_SQUARE * len);
    8806           0 :     args.left_pred_buf[0] = CONVERT_TO_BYTEPTR(x->left_pred_buf);
    8807           0 :     args.left_pred_buf[1] =
    8808           0 :         CONVERT_TO_BYTEPTR(x->left_pred_buf + MAX_SB_SQUARE * len);
    8809           0 :     args.left_pred_buf[2] =
    8810           0 :         CONVERT_TO_BYTEPTR(x->left_pred_buf + 2 * MAX_SB_SQUARE * len);
    8811             :   } else {
    8812             : #endif  // CONFIG_HIGHBITDEPTH
    8813           0 :     args.above_pred_buf[0] = x->above_pred_buf;
    8814           0 :     args.above_pred_buf[1] = x->above_pred_buf + MAX_SB_SQUARE;
    8815           0 :     args.above_pred_buf[2] = x->above_pred_buf + 2 * MAX_SB_SQUARE;
    8816           0 :     args.left_pred_buf[0] = x->left_pred_buf;
    8817           0 :     args.left_pred_buf[1] = x->left_pred_buf + MAX_SB_SQUARE;
    8818           0 :     args.left_pred_buf[2] = x->left_pred_buf + 2 * MAX_SB_SQUARE;
    8819             : #if CONFIG_HIGHBITDEPTH
    8820             :   }
    8821             : #endif  // CONFIG_HIGHBITDEPTH
    8822             : #endif  // CONFIG_MOTION_VAR
    8823             : 
    8824           0 :   av1_zero(best_mbmode);
    8825             : 
    8826             : #if CONFIG_PALETTE
    8827           0 :   av1_zero(pmi_uv);
    8828           0 :   if (try_palette) {
    8829           0 :     if (above_mi)
    8830           0 :       palette_ctx += (above_mi->mbmi.palette_mode_info.palette_size[0] > 0);
    8831           0 :     if (left_mi)
    8832           0 :       palette_ctx += (left_mi->mbmi.palette_mode_info.palette_size[0] > 0);
    8833             :   }
    8834             : #endif  // CONFIG_PALETTE
    8835             : 
    8836           0 :   estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
    8837             :                            &comp_mode_p);
    8838             : 
    8839           0 :   for (i = 0; i < REFERENCE_MODES; ++i) best_pred_rd[i] = INT64_MAX;
    8840           0 :   for (i = 0; i < TX_SIZES_ALL; i++) rate_uv_intra[i] = INT_MAX;
    8841           0 :   for (i = 0; i < TOTAL_REFS_PER_FRAME; ++i) x->pred_sse[i] = INT_MAX;
    8842           0 :   for (i = 0; i < MB_MODE_COUNT; ++i) {
    8843           0 :     for (k = 0; k < TOTAL_REFS_PER_FRAME; ++k) {
    8844           0 :       args.single_filter[i][k] = SWITCHABLE;
    8845             :     }
    8846             :   }
    8847             : 
    8848           0 :   rd_cost->rate = INT_MAX;
    8849             : #if CONFIG_SUPERTX
    8850             :   *returnrate_nocoef = INT_MAX;
    8851             : #endif  // CONFIG_SUPERTX
    8852             : 
    8853           0 :   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
    8854           0 :     x->pred_mv_sad[ref_frame] = INT_MAX;
    8855           0 :     x->mbmi_ext->mode_context[ref_frame] = 0;
    8856             : #if CONFIG_EXT_INTER
    8857           0 :     x->mbmi_ext->compound_mode_context[ref_frame] = 0;
    8858             : #endif  // CONFIG_EXT_INTER
    8859           0 :     if (cpi->ref_frame_flags & flag_list[ref_frame]) {
    8860           0 :       assert(get_ref_frame_buffer(cpi, ref_frame) != NULL);
    8861           0 :       setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col,
    8862             :                          frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
    8863             :     }
    8864           0 :     frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
    8865             : #if CONFIG_GLOBAL_MOTION
    8866           0 :     frame_mv[ZEROMV][ref_frame].as_int =
    8867           0 :         gm_get_motion_vector(&cm->global_motion[ref_frame],
    8868             :                              cm->allow_high_precision_mv, bsize, mi_col, mi_row,
    8869             :                              0)
    8870           0 :             .as_int;
    8871             : #else   // CONFIG_GLOBAL_MOTION
    8872             :     frame_mv[ZEROMV][ref_frame].as_int = 0;
    8873             : #endif  // CONFIG_GLOBAL_MOTION
    8874             : #if CONFIG_EXT_INTER
    8875           0 :     frame_mv[NEW_NEWMV][ref_frame].as_int = INVALID_MV;
    8876             : #if CONFIG_GLOBAL_MOTION
    8877           0 :     frame_mv[ZERO_ZEROMV][ref_frame].as_int =
    8878           0 :         gm_get_motion_vector(&cm->global_motion[ref_frame],
    8879             :                              cm->allow_high_precision_mv, bsize, mi_col, mi_row,
    8880             :                              0)
    8881           0 :             .as_int;
    8882             : #else   // CONFIG_GLOBAL_MOTION
    8883             :     frame_mv[ZERO_ZEROMV][ref_frame].as_int = 0;
    8884             : #endif  // CONFIG_GLOBAL_MOTION
    8885             : #endif  // CONFIG_EXT_INTER
    8886             :   }
    8887             : 
    8888           0 :   for (; ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) {
    8889           0 :     MODE_INFO *const mi = xd->mi[0];
    8890           0 :     int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
    8891           0 :     x->mbmi_ext->mode_context[ref_frame] = 0;
    8892           0 :     av1_find_mv_refs(cm, xd, mi, ref_frame, &mbmi_ext->ref_mv_count[ref_frame],
    8893           0 :                      mbmi_ext->ref_mv_stack[ref_frame],
    8894             : #if CONFIG_EXT_INTER
    8895           0 :                      mbmi_ext->compound_mode_context,
    8896             : #endif  // CONFIG_EXT_INTER
    8897             :                      candidates, mi_row, mi_col, NULL, NULL,
    8898           0 :                      mbmi_ext->mode_context);
    8899           0 :     if (mbmi_ext->ref_mv_count[ref_frame] < 2) {
    8900             :       MV_REFERENCE_FRAME rf[2];
    8901           0 :       av1_set_ref_frame(rf, ref_frame);
    8902           0 :       if (mbmi_ext->ref_mvs[rf[0]][0].as_int !=
    8903           0 :               frame_mv[ZEROMV][rf[0]].as_int ||
    8904           0 :           mbmi_ext->ref_mvs[rf[0]][1].as_int !=
    8905           0 :               frame_mv[ZEROMV][rf[0]].as_int ||
    8906           0 :           mbmi_ext->ref_mvs[rf[1]][0].as_int !=
    8907           0 :               frame_mv[ZEROMV][rf[1]].as_int ||
    8908           0 :           mbmi_ext->ref_mvs[rf[1]][1].as_int != frame_mv[ZEROMV][rf[1]].as_int)
    8909           0 :         mbmi_ext->mode_context[ref_frame] &= ~(1 << ALL_ZERO_FLAG_OFFSET);
    8910             :     }
    8911             :   }
    8912             : 
    8913             : #if CONFIG_MOTION_VAR
    8914           0 :   av1_count_overlappable_neighbors(cm, xd, mi_row, mi_col);
    8915             : 
    8916           0 :   if (check_num_overlappable_neighbors(mbmi) &&
    8917           0 :       is_motion_variation_allowed_bsize(bsize)) {
    8918           0 :     av1_build_prediction_by_above_preds(cm, xd, mi_row, mi_col,
    8919             :                                         args.above_pred_buf, dst_width1,
    8920             :                                         dst_height1, args.above_pred_stride);
    8921           0 :     av1_build_prediction_by_left_preds(cm, xd, mi_row, mi_col,
    8922             :                                        args.left_pred_buf, dst_width2,
    8923             :                                        dst_height2, args.left_pred_stride);
    8924           0 :     av1_setup_dst_planes(xd->plane, bsize, get_frame_new_buffer(cm), mi_row,
    8925             :                          mi_col);
    8926           0 :     calc_target_weighted_pred(cm, x, xd, mi_row, mi_col, args.above_pred_buf[0],
    8927           0 :                               args.above_pred_stride[0], args.left_pred_buf[0],
    8928             :                               args.left_pred_stride[0]);
    8929             :   }
    8930             : #endif  // CONFIG_MOTION_VAR
    8931             : 
    8932           0 :   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
    8933           0 :     if (!(cpi->ref_frame_flags & flag_list[ref_frame])) {
    8934             : // Skip checking missing references in both single and compound reference
    8935             : // modes. Note that a mode will be skipped iff both reference frames
    8936             : // are masked out.
    8937             : #if CONFIG_EXT_REFS
    8938           0 :       if (ref_frame == BWDREF_FRAME || ref_frame == ALTREF_FRAME) {
    8939           0 :         ref_frame_skip_mask[0] |= (1 << ref_frame);
    8940           0 :         ref_frame_skip_mask[1] |= ((1 << ref_frame) | 0x01);
    8941             :       } else {
    8942             : #endif  // CONFIG_EXT_REFS
    8943           0 :         ref_frame_skip_mask[0] |= (1 << ref_frame);
    8944           0 :         ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
    8945             : #if CONFIG_EXT_REFS
    8946             :       }
    8947             : #endif  // CONFIG_EXT_REFS
    8948             :     } else {
    8949           0 :       for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
    8950             :         // Skip fixed mv modes for poor references
    8951           0 :         if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) {
    8952           0 :           mode_skip_mask[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
    8953           0 :           break;
    8954             :         }
    8955             :       }
    8956             :     }
    8957             :     // If the segment reference frame feature is enabled....
    8958             :     // then do nothing if the current ref frame is not allowed..
    8959           0 :     if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
    8960           0 :         get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
    8961           0 :       ref_frame_skip_mask[0] |= (1 << ref_frame);
    8962           0 :       ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
    8963             :     }
    8964             :   }
    8965             : 
    8966             :   // Disable this drop out case if the ref frame
    8967             :   // segment level feature is enabled for this segment. This is to
    8968             :   // prevent the possibility that we end up unable to pick any mode.
    8969           0 :   if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
    8970             :     // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
    8971             :     // unless ARNR filtering is enabled in which case we want
    8972             :     // an unfiltered alternative. We allow near/nearest as well
    8973             :     // because they may result in zero-zero MVs but be cheaper.
    8974           0 :     if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
    8975             :       int_mv zeromv;
    8976           0 :       ref_frame_skip_mask[0] = (1 << LAST_FRAME) |
    8977             : #if CONFIG_EXT_REFS
    8978             :                                (1 << LAST2_FRAME) | (1 << LAST3_FRAME) |
    8979             :                                (1 << BWDREF_FRAME) |
    8980             : #endif  // CONFIG_EXT_REFS
    8981             :                                (1 << GOLDEN_FRAME);
    8982           0 :       ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
    8983             :       // TODO(zoeliu): To further explore whether following needs to be done for
    8984             :       //               BWDREF_FRAME as well.
    8985           0 :       mode_skip_mask[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
    8986             : #if CONFIG_GLOBAL_MOTION
    8987           0 :       zeromv.as_int = gm_get_motion_vector(&cm->global_motion[ALTREF_FRAME],
    8988             :                                            cm->allow_high_precision_mv, bsize,
    8989             :                                            mi_col, mi_row, 0)
    8990           0 :                           .as_int;
    8991             : #else
    8992             :       zeromv.as_int = 0;
    8993             : #endif  // CONFIG_GLOBAL_MOTION
    8994           0 :       if (frame_mv[NEARMV][ALTREF_FRAME].as_int != zeromv.as_int)
    8995           0 :         mode_skip_mask[ALTREF_FRAME] |= (1 << NEARMV);
    8996           0 :       if (frame_mv[NEARESTMV][ALTREF_FRAME].as_int != zeromv.as_int)
    8997           0 :         mode_skip_mask[ALTREF_FRAME] |= (1 << NEARESTMV);
    8998             : #if CONFIG_EXT_INTER
    8999           0 :       if (frame_mv[NEAREST_NEARESTMV][ALTREF_FRAME].as_int != zeromv.as_int)
    9000           0 :         mode_skip_mask[ALTREF_FRAME] |= (1 << NEAREST_NEARESTMV);
    9001           0 :       if (frame_mv[NEAR_NEARMV][ALTREF_FRAME].as_int != zeromv.as_int)
    9002           0 :         mode_skip_mask[ALTREF_FRAME] |= (1 << NEAR_NEARMV);
    9003             : #endif  // CONFIG_EXT_INTER
    9004             :     }
    9005             :   }
    9006             : 
    9007           0 :   if (cpi->rc.is_src_frame_alt_ref) {
    9008           0 :     if (sf->alt_ref_search_fp) {
    9009           0 :       assert(cpi->ref_frame_flags & flag_list[ALTREF_FRAME]);
    9010           0 :       mode_skip_mask[ALTREF_FRAME] = 0;
    9011           0 :       ref_frame_skip_mask[0] = ~(1 << ALTREF_FRAME);
    9012           0 :       ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
    9013             :     }
    9014             :   }
    9015             : 
    9016           0 :   if (sf->alt_ref_search_fp)
    9017           0 :     if (!cm->show_frame && x->pred_mv_sad[GOLDEN_FRAME] < INT_MAX)
    9018           0 :       if (x->pred_mv_sad[ALTREF_FRAME] > (x->pred_mv_sad[GOLDEN_FRAME] << 1))
    9019           0 :         mode_skip_mask[ALTREF_FRAME] |= INTER_ALL;
    9020             : 
    9021           0 :   if (sf->adaptive_mode_search) {
    9022           0 :     if (cm->show_frame && !cpi->rc.is_src_frame_alt_ref &&
    9023           0 :         cpi->rc.frames_since_golden >= 3)
    9024           0 :       if ((x->pred_mv_sad[GOLDEN_FRAME] >> 1) > x->pred_mv_sad[LAST_FRAME])
    9025           0 :         mode_skip_mask[GOLDEN_FRAME] |= INTER_ALL;
    9026             :   }
    9027             : 
    9028           0 :   if (bsize > sf->max_intra_bsize) {
    9029           0 :     ref_frame_skip_mask[0] |= (1 << INTRA_FRAME);
    9030           0 :     ref_frame_skip_mask[1] |= (1 << INTRA_FRAME);
    9031             :   }
    9032             : 
    9033           0 :   mode_skip_mask[INTRA_FRAME] |=
    9034           0 :       ~(sf->intra_y_mode_mask[max_txsize_lookup[bsize]]);
    9035             : 
    9036           0 :   for (i = 0; i <= LAST_NEW_MV_INDEX; ++i) mode_threshold[i] = 0;
    9037           0 :   for (i = LAST_NEW_MV_INDEX + 1; i < MAX_MODES; ++i)
    9038           0 :     mode_threshold[i] = ((int64_t)rd_threshes[i] * rd_thresh_freq_fact[i]) >> 5;
    9039             : 
    9040           0 :   midx = sf->schedule_mode_search ? mode_skip_start : 0;
    9041           0 :   while (midx > 4) {
    9042           0 :     uint8_t end_pos = 0;
    9043           0 :     for (i = 5; i < midx; ++i) {
    9044           0 :       if (mode_threshold[mode_map[i - 1]] > mode_threshold[mode_map[i]]) {
    9045           0 :         uint8_t tmp = mode_map[i];
    9046           0 :         mode_map[i] = mode_map[i - 1];
    9047           0 :         mode_map[i - 1] = tmp;
    9048           0 :         end_pos = i;
    9049             :       }
    9050             :     }
    9051           0 :     midx = end_pos;
    9052             :   }
    9053             : 
    9054           0 :   if (cpi->sf.tx_type_search.fast_intra_tx_type_search)
    9055           0 :     x->use_default_intra_tx_type = 1;
    9056             :   else
    9057           0 :     x->use_default_intra_tx_type = 0;
    9058             : 
    9059           0 :   if (cpi->sf.tx_type_search.fast_inter_tx_type_search)
    9060           0 :     x->use_default_inter_tx_type = 1;
    9061             :   else
    9062           0 :     x->use_default_inter_tx_type = 0;
    9063             : #if CONFIG_PVQ
    9064             :   od_encode_checkpoint(&x->daala_enc, &pre_buf);
    9065             : #endif  // CONFIG_PVQ
    9066             : #if CONFIG_EXT_INTER
    9067           0 :   for (i = 0; i < MB_MODE_COUNT; ++i)
    9068           0 :     for (ref_frame = 0; ref_frame < TOTAL_REFS_PER_FRAME; ++ref_frame)
    9069           0 :       modelled_rd[i][ref_frame] = INT64_MAX;
    9070             : #endif  // CONFIG_EXT_INTER
    9071             : 
    9072           0 :   for (midx = 0; midx < MAX_MODES; ++midx) {
    9073             :     int mode_index;
    9074           0 :     int mode_excluded = 0;
    9075           0 :     int64_t this_rd = INT64_MAX;
    9076           0 :     int disable_skip = 0;
    9077           0 :     int compmode_cost = 0;
    9078           0 :     int rate2 = 0, rate_y = 0, rate_uv = 0;
    9079           0 :     int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
    9080             : #if CONFIG_DAALA_DIST && CONFIG_CB4X4
    9081             :     int64_t distortion2_y = 0;
    9082             :     int64_t total_sse_y = INT64_MAX;
    9083             : #endif
    9084           0 :     int skippable = 0;
    9085           0 :     int this_skip2 = 0;
    9086           0 :     int64_t total_sse = INT64_MAX;
    9087             :     uint8_t ref_frame_type;
    9088             : #if CONFIG_PVQ
    9089             :     od_encode_rollback(&x->daala_enc, &pre_buf);
    9090             : #endif  // CONFIG_PVQ
    9091           0 :     mode_index = mode_map[midx];
    9092           0 :     this_mode = av1_mode_order[mode_index].mode;
    9093           0 :     ref_frame = av1_mode_order[mode_index].ref_frame[0];
    9094           0 :     second_ref_frame = av1_mode_order[mode_index].ref_frame[1];
    9095           0 :     mbmi->ref_mv_idx = 0;
    9096             : 
    9097             : #if CONFIG_EXT_INTER
    9098           0 :     if (ref_frame > INTRA_FRAME && second_ref_frame == INTRA_FRAME) {
    9099             :       // Mode must by compatible
    9100           0 :       if (!is_interintra_allowed_mode(this_mode)) continue;
    9101           0 :       if (!is_interintra_allowed_bsize(bsize)) continue;
    9102             :     }
    9103             : 
    9104           0 :     if (is_inter_compound_mode(this_mode)) {
    9105           0 :       frame_mv[this_mode][ref_frame].as_int =
    9106           0 :           frame_mv[compound_ref0_mode(this_mode)][ref_frame].as_int;
    9107           0 :       frame_mv[this_mode][second_ref_frame].as_int =
    9108           0 :           frame_mv[compound_ref1_mode(this_mode)][second_ref_frame].as_int;
    9109             :     }
    9110             : #endif  // CONFIG_EXT_INTER
    9111             : 
    9112             :     // Look at the reference frame of the best mode so far and set the
    9113             :     // skip mask to look at a subset of the remaining modes.
    9114           0 :     if (midx == mode_skip_start && best_mode_index >= 0) {
    9115           0 :       switch (best_mbmode.ref_frame[0]) {
    9116           0 :         case INTRA_FRAME: break;
    9117             :         case LAST_FRAME:
    9118           0 :           ref_frame_skip_mask[0] |= LAST_FRAME_MODE_MASK;
    9119           0 :           ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
    9120           0 :           break;
    9121             : #if CONFIG_EXT_REFS
    9122             :         case LAST2_FRAME:
    9123           0 :           ref_frame_skip_mask[0] |= LAST2_FRAME_MODE_MASK;
    9124           0 :           ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
    9125           0 :           break;
    9126             :         case LAST3_FRAME:
    9127           0 :           ref_frame_skip_mask[0] |= LAST3_FRAME_MODE_MASK;
    9128           0 :           ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
    9129           0 :           break;
    9130             : #endif  // CONFIG_EXT_REFS
    9131             :         case GOLDEN_FRAME:
    9132           0 :           ref_frame_skip_mask[0] |= GOLDEN_FRAME_MODE_MASK;
    9133           0 :           ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
    9134           0 :           break;
    9135             : #if CONFIG_EXT_REFS
    9136             :         case BWDREF_FRAME:
    9137           0 :           ref_frame_skip_mask[0] |= BWDREF_FRAME_MODE_MASK;
    9138           0 :           ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
    9139           0 :           break;
    9140             : #endif  // CONFIG_EXT_REFS
    9141           0 :         case ALTREF_FRAME: ref_frame_skip_mask[0] |= ALTREF_FRAME_MODE_MASK;
    9142             : #if CONFIG_EXT_REFS
    9143           0 :           ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
    9144             : #endif  // CONFIG_EXT_REFS
    9145           0 :           break;
    9146             :         case NONE_FRAME:
    9147             :         case TOTAL_REFS_PER_FRAME:
    9148           0 :           assert(0 && "Invalid Reference frame");
    9149             :           break;
    9150             :       }
    9151             :     }
    9152             : 
    9153           0 :     if ((ref_frame_skip_mask[0] & (1 << ref_frame)) &&
    9154           0 :         (ref_frame_skip_mask[1] & (1 << AOMMAX(0, second_ref_frame))))
    9155           0 :       continue;
    9156             : 
    9157           0 :     if (mode_skip_mask[ref_frame] & (1 << this_mode)) continue;
    9158             : 
    9159             :     // Test best rd so far against threshold for trying this mode.
    9160           0 :     if (best_mode_skippable && sf->schedule_mode_search)
    9161           0 :       mode_threshold[mode_index] <<= 1;
    9162             : 
    9163           0 :     if (best_rd < mode_threshold[mode_index]) continue;
    9164             : 
    9165             :     // This is only used in motion vector unit test.
    9166           0 :     if (cpi->oxcf.motion_vector_unit_test && ref_frame == INTRA_FRAME) continue;
    9167             : 
    9168             : #if CONFIG_ONE_SIDED_COMPOUND  // Changes LL bitstream
    9169             : #if CONFIG_EXT_REFS
    9170           0 :     if (cpi->oxcf.pass == 0) {
    9171             :       // Complexity-compression trade-offs
    9172             :       // if (ref_frame == ALTREF_FRAME) continue;
    9173             :       // if (ref_frame == BWDREF_FRAME) continue;
    9174           0 :       if (second_ref_frame == ALTREF_FRAME) continue;
    9175             :       // if (second_ref_frame == BWDREF_FRAME) continue;
    9176             :     }
    9177             : #endif
    9178             : #endif
    9179           0 :     comp_pred = second_ref_frame > INTRA_FRAME;
    9180           0 :     if (comp_pred) {
    9181           0 :       if (!cpi->allow_comp_inter_inter) continue;
    9182             : 
    9183             :       // Skip compound inter modes if ARF is not available.
    9184           0 :       if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) continue;
    9185             : 
    9186             :       // Do not allow compound prediction if the segment level reference frame
    9187             :       // feature is in use as in this case there can only be one reference.
    9188           0 :       if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) continue;
    9189             : 
    9190           0 :       if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
    9191           0 :           best_mode_index >= 0 && best_mbmode.ref_frame[0] == INTRA_FRAME)
    9192           0 :         continue;
    9193             : 
    9194           0 :       mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
    9195             :     } else {
    9196           0 :       if (ref_frame != INTRA_FRAME)
    9197           0 :         mode_excluded = cm->reference_mode == COMPOUND_REFERENCE;
    9198             :     }
    9199             : 
    9200           0 :     if (ref_frame == INTRA_FRAME) {
    9201           0 :       if (sf->adaptive_mode_search)
    9202           0 :         if ((x->source_variance << num_pels_log2_lookup[bsize]) > best_pred_sse)
    9203           0 :           continue;
    9204             : 
    9205           0 :       if (this_mode != DC_PRED) {
    9206             :         // Disable intra modes other than DC_PRED for blocks with low variance
    9207             :         // Threshold for intra skipping based on source variance
    9208             :         // TODO(debargha): Specialize the threshold for super block sizes
    9209           0 :         const unsigned int skip_intra_var_thresh = 64;
    9210           0 :         if ((mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
    9211           0 :             x->source_variance < skip_intra_var_thresh)
    9212           0 :           continue;
    9213             :         // Only search the oblique modes if the best so far is
    9214             :         // one of the neighboring directional modes
    9215           0 :         if ((mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
    9216           0 :             (this_mode >= D45_PRED && this_mode <= TM_PRED)) {
    9217           0 :           if (best_mode_index >= 0 && best_mbmode.ref_frame[0] > INTRA_FRAME)
    9218           0 :             continue;
    9219             :         }
    9220           0 :         if (mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
    9221           0 :           if (conditional_skipintra(this_mode, best_intra_mode)) continue;
    9222             :         }
    9223             :       }
    9224             : #if CONFIG_GLOBAL_MOTION
    9225           0 :     } else if (cm->global_motion[ref_frame].wmtype == IDENTITY &&
    9226           0 :                (!comp_pred ||
    9227           0 :                 cm->global_motion[second_ref_frame].wmtype == IDENTITY)) {
    9228             : #else   // CONFIG_GLOBAL_MOTION
    9229             :     } else {
    9230             : #endif  // CONFIG_GLOBAL_MOTION
    9231           0 :       const MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, second_ref_frame };
    9232           0 :       if (!check_best_zero_mv(cpi, mbmi_ext->mode_context,
    9233             : #if CONFIG_EXT_INTER
    9234           0 :                               mbmi_ext->compound_mode_context,
    9235             : #endif  // CONFIG_EXT_INTER
    9236             :                               frame_mv, this_mode, ref_frames, bsize, -1,
    9237             :                               mi_row, mi_col))
    9238           0 :         continue;
    9239             :     }
    9240             : 
    9241           0 :     mbmi->mode = this_mode;
    9242           0 :     mbmi->uv_mode = DC_PRED;
    9243           0 :     mbmi->ref_frame[0] = ref_frame;
    9244           0 :     mbmi->ref_frame[1] = second_ref_frame;
    9245             : #if CONFIG_PALETTE
    9246           0 :     pmi->palette_size[0] = 0;
    9247           0 :     pmi->palette_size[1] = 0;
    9248             : #endif  // CONFIG_PALETTE
    9249             : #if CONFIG_FILTER_INTRA
    9250             :     mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
    9251             :     mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
    9252             : #endif  // CONFIG_FILTER_INTRA
    9253             :         // Evaluate all sub-pel filters irrespective of whether we can use
    9254             :         // them for this frame.
    9255             : 
    9256           0 :     set_default_interp_filters(mbmi, cm->interp_filter);
    9257             : 
    9258           0 :     mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
    9259           0 :     mbmi->motion_mode = SIMPLE_TRANSLATION;
    9260             : 
    9261           0 :     x->skip = 0;
    9262           0 :     set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
    9263             : 
    9264             :     // Select prediction reference frames.
    9265           0 :     for (i = 0; i < MAX_MB_PLANE; i++) {
    9266           0 :       xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
    9267           0 :       if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
    9268             :     }
    9269             : 
    9270             : #if CONFIG_EXT_INTER && CONFIG_INTERINTRA
    9271           0 :     mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
    9272             : #endif  // CONFIG_EXT_INTER && CONFIG_INTERINTRA
    9273             : 
    9274           0 :     if (ref_frame == INTRA_FRAME) {
    9275             :       RD_STATS rd_stats_y;
    9276             :       TX_SIZE uv_tx;
    9277           0 :       struct macroblockd_plane *const pd = &xd->plane[1];
    9278             : #if CONFIG_EXT_INTRA
    9279           0 :       is_directional_mode = av1_is_directional_mode(mbmi->mode, bsize);
    9280           0 :       if (is_directional_mode) {
    9281             :         int rate_dummy;
    9282           0 :         int64_t model_rd = INT64_MAX;
    9283           0 :         if (!angle_stats_ready) {
    9284           0 :           const int src_stride = x->plane[0].src.stride;
    9285           0 :           const uint8_t *src = x->plane[0].src.buf;
    9286             : #if CONFIG_HIGHBITDEPTH
    9287           0 :           if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
    9288           0 :             highbd_angle_estimation(src, src_stride, rows, cols, bsize,
    9289             :                                     directional_mode_skip_mask);
    9290             :           else
    9291             : #endif  // CONFIG_HIGHBITDEPTH
    9292           0 :             angle_estimation(src, src_stride, rows, cols, bsize,
    9293             :                              directional_mode_skip_mask);
    9294           0 :           angle_stats_ready = 1;
    9295             :         }
    9296           0 :         if (directional_mode_skip_mask[mbmi->mode]) continue;
    9297           0 :         rd_stats_y.rate = INT_MAX;
    9298           0 :         rd_pick_intra_angle_sby(cpi, x, &rate_dummy, &rd_stats_y, bsize,
    9299           0 :                                 intra_mode_cost[mbmi->mode], best_rd,
    9300             :                                 &model_rd);
    9301             :       } else {
    9302           0 :         mbmi->angle_delta[0] = 0;
    9303           0 :         super_block_yrd(cpi, x, &rd_stats_y, bsize, best_rd);
    9304             :       }
    9305             : #else
    9306             :       super_block_yrd(cpi, x, &rd_stats_y, bsize, best_rd);
    9307             : #endif  // CONFIG_EXT_INTRA
    9308           0 :       rate_y = rd_stats_y.rate;
    9309           0 :       distortion_y = rd_stats_y.dist;
    9310           0 :       skippable = rd_stats_y.skip;
    9311             : 
    9312           0 :       if (rate_y == INT_MAX) continue;
    9313             : 
    9314             : #if CONFIG_FILTER_INTRA
    9315             :       if (mbmi->mode == DC_PRED) dc_skipped = 0;
    9316             : #endif  // CONFIG_FILTER_INTRA
    9317             : 
    9318           0 :       uv_tx = uv_txsize_lookup[bsize][mbmi->tx_size][pd->subsampling_x]
    9319           0 :                               [pd->subsampling_y];
    9320           0 :       if (rate_uv_intra[uv_tx] == INT_MAX) {
    9321           0 :         choose_intra_uv_mode(cpi, x, ctx, bsize, uv_tx, &rate_uv_intra[uv_tx],
    9322           0 :                              &rate_uv_tokenonly[uv_tx], &dist_uvs[uv_tx],
    9323           0 :                              &skip_uvs[uv_tx], &mode_uv[uv_tx]);
    9324             : #if CONFIG_PALETTE
    9325           0 :         if (try_palette) pmi_uv[uv_tx] = *pmi;
    9326             : #endif  // CONFIG_PALETTE
    9327             : 
    9328             : #if CONFIG_EXT_INTRA
    9329           0 :         uv_angle_delta[uv_tx] = mbmi->angle_delta[1];
    9330             : #endif  // CONFIG_EXT_INTRA
    9331             : #if CONFIG_FILTER_INTRA
    9332             :         filter_intra_mode_info_uv[uv_tx] = mbmi->filter_intra_mode_info;
    9333             : #endif  // CONFIG_FILTER_INTRA
    9334             :       }
    9335             : 
    9336           0 :       rate_uv = rate_uv_tokenonly[uv_tx];
    9337           0 :       distortion_uv = dist_uvs[uv_tx];
    9338           0 :       skippable = skippable && skip_uvs[uv_tx];
    9339           0 :       mbmi->uv_mode = mode_uv[uv_tx];
    9340             : #if CONFIG_PALETTE
    9341           0 :       if (try_palette) {
    9342           0 :         pmi->palette_size[1] = pmi_uv[uv_tx].palette_size[1];
    9343           0 :         memcpy(pmi->palette_colors + PALETTE_MAX_SIZE,
    9344           0 :                pmi_uv[uv_tx].palette_colors + PALETTE_MAX_SIZE,
    9345             :                2 * PALETTE_MAX_SIZE * sizeof(pmi->palette_colors[0]));
    9346             :       }
    9347             : #endif  // CONFIG_PALETTE
    9348             : 
    9349             : #if CONFIG_EXT_INTRA
    9350           0 :       mbmi->angle_delta[1] = uv_angle_delta[uv_tx];
    9351             : #endif  // CONFIG_EXT_INTRA
    9352             : #if CONFIG_FILTER_INTRA
    9353             :       mbmi->filter_intra_mode_info.use_filter_intra_mode[1] =
    9354             :           filter_intra_mode_info_uv[uv_tx].use_filter_intra_mode[1];
    9355             :       if (filter_intra_mode_info_uv[uv_tx].use_filter_intra_mode[1]) {
    9356             :         mbmi->filter_intra_mode_info.filter_intra_mode[1] =
    9357             :             filter_intra_mode_info_uv[uv_tx].filter_intra_mode[1];
    9358             :       }
    9359             : #endif  // CONFIG_FILTER_INTRA
    9360             : 
    9361             : #if CONFIG_CB4X4
    9362           0 :       rate2 = rate_y + intra_mode_cost[mbmi->mode];
    9363           0 :       if (!x->skip_chroma_rd)
    9364           0 :         rate2 += rate_uv + cpi->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode];
    9365             : #else
    9366             :       rate2 = rate_y + intra_mode_cost[mbmi->mode] + rate_uv +
    9367             :               cpi->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode];
    9368             : #endif  // CONFIG_CB4X4
    9369             : 
    9370             : #if CONFIG_PALETTE
    9371           0 :       if (try_palette && mbmi->mode == DC_PRED) {
    9372           0 :         rate2 += av1_cost_bit(
    9373             :             av1_default_palette_y_mode_prob[bsize - BLOCK_8X8][palette_ctx], 0);
    9374             :       }
    9375             : #endif  // CONFIG_PALETTE
    9376             : 
    9377           0 :       if (!xd->lossless[mbmi->segment_id] && bsize >= BLOCK_8X8) {
    9378             :         // super_block_yrd above includes the cost of the tx_size in the
    9379             :         // tokenonly rate, but for intra blocks, tx_size is always coded
    9380             :         // (prediction granularity), so we account for it in the full rate,
    9381             :         // not the tokenonly rate.
    9382           0 :         rate_y -= tx_size_cost(cpi, x, bsize, mbmi->tx_size);
    9383             :       }
    9384             : #if CONFIG_EXT_INTRA
    9385           0 :       if (is_directional_mode) {
    9386             : #if CONFIG_INTRA_INTERP
    9387             :         const int intra_filter_ctx = av1_get_pred_context_intra_interp(xd);
    9388             :         const int p_angle =
    9389             :             mode_to_angle_map[mbmi->mode] + mbmi->angle_delta[0] * ANGLE_STEP;
    9390             :         if (av1_is_intra_filter_switchable(p_angle))
    9391             :           rate2 += cpi->intra_filter_cost[intra_filter_ctx][mbmi->intra_filter];
    9392             : #endif  // CONFIG_INTRA_INTERP
    9393           0 :         rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
    9394           0 :                                     MAX_ANGLE_DELTA + mbmi->angle_delta[0]);
    9395             :       }
    9396           0 :       if (mbmi->uv_mode != DC_PRED && mbmi->uv_mode != TM_PRED) {
    9397           0 :         rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
    9398           0 :                                     MAX_ANGLE_DELTA + mbmi->angle_delta[1]);
    9399             :       }
    9400             : #endif  // CONFIG_EXT_INTRA
    9401             : #if CONFIG_FILTER_INTRA
    9402             :       if (mbmi->mode == DC_PRED) {
    9403             :         rate2 +=
    9404             :             av1_cost_bit(cm->fc->filter_intra_probs[0],
    9405             :                          mbmi->filter_intra_mode_info.use_filter_intra_mode[0]);
    9406             :         if (mbmi->filter_intra_mode_info.use_filter_intra_mode[0]) {
    9407             :           rate2 += write_uniform_cost(
    9408             :               FILTER_INTRA_MODES,
    9409             :               mbmi->filter_intra_mode_info.filter_intra_mode[0]);
    9410             :         }
    9411             :       }
    9412             :       if (mbmi->uv_mode == DC_PRED) {
    9413             :         rate2 +=
    9414             :             av1_cost_bit(cpi->common.fc->filter_intra_probs[1],
    9415             :                          mbmi->filter_intra_mode_info.use_filter_intra_mode[1]);
    9416             :         if (mbmi->filter_intra_mode_info.use_filter_intra_mode[1])
    9417             :           rate2 += write_uniform_cost(
    9418             :               FILTER_INTRA_MODES,
    9419             :               mbmi->filter_intra_mode_info.filter_intra_mode[1]);
    9420             :       }
    9421             : #endif  // CONFIG_FILTER_INTRA
    9422           0 :       if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED)
    9423           0 :         rate2 += intra_cost_penalty;
    9424           0 :       distortion2 = distortion_y + distortion_uv;
    9425             : #if CONFIG_DAALA_DIST && CONFIG_CB4X4
    9426             :       if (bsize < BLOCK_8X8) distortion2_y = distortion_y;
    9427             : #endif
    9428             :     } else {
    9429             :       int_mv backup_ref_mv[2];
    9430             : 
    9431             : #if !SUB8X8_COMP_REF
    9432           0 :       if (bsize == BLOCK_4X4 && mbmi->ref_frame[1] > INTRA_FRAME) continue;
    9433             : #endif  // !SUB8X8_COMP_REF
    9434             : 
    9435           0 :       backup_ref_mv[0] = mbmi_ext->ref_mvs[ref_frame][0];
    9436           0 :       if (comp_pred) backup_ref_mv[1] = mbmi_ext->ref_mvs[second_ref_frame][0];
    9437             : #if CONFIG_EXT_INTER && CONFIG_INTERINTRA
    9438           0 :       if (second_ref_frame == INTRA_FRAME) {
    9439           0 :         if (best_single_inter_ref != ref_frame) continue;
    9440           0 :         mbmi->interintra_mode = intra_to_interintra_mode[best_intra_mode];
    9441             : // TODO(debargha|geza.lore):
    9442             : // Should we use ext_intra modes for interintra?
    9443             : #if CONFIG_EXT_INTRA
    9444           0 :         mbmi->angle_delta[0] = 0;
    9445           0 :         mbmi->angle_delta[1] = 0;
    9446             : #if CONFIG_INTRA_INTERP
    9447             :         mbmi->intra_filter = INTRA_FILTER_LINEAR;
    9448             : #endif  // CONFIG_INTRA_INTERP
    9449             : #endif  // CONFIG_EXT_INTRA
    9450             : #if CONFIG_FILTER_INTRA
    9451             :         mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
    9452             :         mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
    9453             : #endif  // CONFIG_FILTER_INTRA
    9454             :       }
    9455             : #endif  // CONFIG_EXT_INTER && CONFIG_INTERINTRA
    9456           0 :       mbmi->ref_mv_idx = 0;
    9457           0 :       ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
    9458             : 
    9459             : #if CONFIG_EXT_INTER
    9460           0 :       if (comp_pred) {
    9461           0 :         if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
    9462           0 :           int ref_mv_idx = 0;
    9463             :           // Special case: NEAR_NEWMV and NEW_NEARMV modes use
    9464             :           // 1 + mbmi->ref_mv_idx (like NEARMV) instead of
    9465             :           // mbmi->ref_mv_idx (like NEWMV)
    9466           0 :           if (mbmi->mode == NEAR_NEWMV || mbmi->mode == NEW_NEARMV)
    9467           0 :             ref_mv_idx = 1;
    9468             : 
    9469           0 :           if (compound_ref0_mode(mbmi->mode) == NEWMV) {
    9470           0 :             int_mv this_mv =
    9471           0 :                 mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
    9472           0 :             clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
    9473           0 :                          xd->n8_h << MI_SIZE_LOG2, xd);
    9474           0 :             mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0] = this_mv;
    9475             :           }
    9476           0 :           if (compound_ref1_mode(mbmi->mode) == NEWMV) {
    9477           0 :             int_mv this_mv =
    9478           0 :                 mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
    9479           0 :             clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
    9480           0 :                          xd->n8_h << MI_SIZE_LOG2, xd);
    9481           0 :             mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0] = this_mv;
    9482             :           }
    9483             :         }
    9484             :       } else {
    9485             : #endif  // CONFIG_EXT_INTER
    9486           0 :         if (mbmi->mode == NEWMV && mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
    9487             :           int ref;
    9488           0 :           for (ref = 0; ref < 1 + comp_pred; ++ref) {
    9489           0 :             int_mv this_mv =
    9490           0 :                 (ref == 0) ? mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv
    9491           0 :                            : mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
    9492           0 :             clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
    9493           0 :                          xd->n8_h << MI_SIZE_LOG2, xd);
    9494           0 :             mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0] = this_mv;
    9495             :           }
    9496             :         }
    9497             : #if CONFIG_EXT_INTER
    9498             :       }
    9499             : #endif  // CONFIG_EXT_INTER
    9500             :       {
    9501             :         RD_STATS rd_stats, rd_stats_y, rd_stats_uv;
    9502           0 :         av1_init_rd_stats(&rd_stats);
    9503           0 :         rd_stats.rate = rate2;
    9504             : 
    9505             :         // Point to variables that are maintained between loop iterations
    9506           0 :         args.single_newmv = single_newmv;
    9507             : #if CONFIG_EXT_INTER
    9508           0 :         args.single_newmv_rate = single_newmv_rate;
    9509           0 :         args.modelled_rd = modelled_rd;
    9510             : #endif  // CONFIG_EXT_INTER
    9511           0 :         this_rd = handle_inter_mode(cpi, x, bsize, &rd_stats, &rd_stats_y,
    9512             :                                     &rd_stats_uv, &disable_skip, frame_mv,
    9513             :                                     mi_row, mi_col, &args, best_rd);
    9514             : 
    9515           0 :         rate2 = rd_stats.rate;
    9516           0 :         skippable = rd_stats.skip;
    9517           0 :         distortion2 = rd_stats.dist;
    9518           0 :         total_sse = rd_stats.sse;
    9519           0 :         rate_y = rd_stats_y.rate;
    9520           0 :         rate_uv = rd_stats_uv.rate;
    9521             : #if CONFIG_DAALA_DIST && CONFIG_CB4X4
    9522             :         if (bsize < BLOCK_8X8) distortion2_y = rd_stats_y.dist;
    9523             : #endif
    9524             :       }
    9525             : 
    9526             : // TODO(jingning): This needs some refactoring to improve code quality
    9527             : // and reduce redundant steps.
    9528             : #if CONFIG_EXT_INTER
    9529           0 :       if ((have_nearmv_in_inter_mode(mbmi->mode) &&
    9530           0 :            mbmi_ext->ref_mv_count[ref_frame_type] > 2) ||
    9531           0 :           ((mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) &&
    9532           0 :            mbmi_ext->ref_mv_count[ref_frame_type] > 1)) {
    9533             : #else
    9534             :       if ((mbmi->mode == NEARMV &&
    9535             :            mbmi_ext->ref_mv_count[ref_frame_type] > 2) ||
    9536             :           (mbmi->mode == NEWMV && mbmi_ext->ref_mv_count[ref_frame_type] > 1)) {
    9537             : #endif
    9538           0 :         int_mv backup_mv = frame_mv[NEARMV][ref_frame];
    9539           0 :         MB_MODE_INFO backup_mbmi = *mbmi;
    9540           0 :         int backup_skip = x->skip;
    9541           0 :         int64_t tmp_ref_rd = this_rd;
    9542             :         int ref_idx;
    9543             : 
    9544             : // TODO(jingning): This should be deprecated shortly.
    9545             : #if CONFIG_EXT_INTER
    9546           0 :         int idx_offset = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
    9547             : #else
    9548             :         int idx_offset = (mbmi->mode == NEARMV) ? 1 : 0;
    9549             : #endif  // CONFIG_EXT_INTER
    9550           0 :         int ref_set =
    9551           0 :             AOMMIN(2, mbmi_ext->ref_mv_count[ref_frame_type] - 1 - idx_offset);
    9552             : 
    9553           0 :         uint8_t drl_ctx =
    9554           0 :             av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx_offset);
    9555             :         // Dummy
    9556             :         int_mv backup_fmv[2];
    9557           0 :         backup_fmv[0] = frame_mv[NEWMV][ref_frame];
    9558           0 :         if (comp_pred) backup_fmv[1] = frame_mv[NEWMV][second_ref_frame];
    9559             : 
    9560           0 :         rate2 += (rate2 < INT_MAX ? cpi->drl_mode_cost0[drl_ctx][0] : 0);
    9561             : 
    9562           0 :         if (this_rd < INT64_MAX) {
    9563           0 :           if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
    9564           0 :               RDCOST(x->rdmult, x->rddiv, 0, total_sse))
    9565           0 :             tmp_ref_rd =
    9566           0 :                 RDCOST(x->rdmult, x->rddiv,
    9567             :                        rate2 + av1_cost_bit(av1_get_skip_prob(cm, xd), 0),
    9568             :                        distortion2);
    9569             :           else
    9570           0 :             tmp_ref_rd =
    9571           0 :                 RDCOST(x->rdmult, x->rddiv,
    9572             :                        rate2 + av1_cost_bit(av1_get_skip_prob(cm, xd), 1) -
    9573             :                            rate_y - rate_uv,
    9574             :                        total_sse);
    9575             :         }
    9576             : #if CONFIG_VAR_TX
    9577           0 :         for (i = 0; i < MAX_MB_PLANE; ++i)
    9578           0 :           memcpy(x->blk_skip_drl[i], x->blk_skip[i],
    9579           0 :                  sizeof(uint8_t) * ctx->num_4x4_blk);
    9580             : #endif  // CONFIG_VAR_TX
    9581             : 
    9582           0 :         for (ref_idx = 0; ref_idx < ref_set; ++ref_idx) {
    9583           0 :           int64_t tmp_alt_rd = INT64_MAX;
    9584           0 :           int dummy_disable_skip = 0;
    9585             :           int ref;
    9586             :           int_mv cur_mv;
    9587             :           RD_STATS tmp_rd_stats, tmp_rd_stats_y, tmp_rd_stats_uv;
    9588             : 
    9589           0 :           av1_invalid_rd_stats(&tmp_rd_stats);
    9590           0 :           x->skip = 0;
    9591             : 
    9592           0 :           mbmi->ref_mv_idx = 1 + ref_idx;
    9593             : 
    9594             : #if CONFIG_EXT_INTER
    9595           0 :           if (comp_pred) {
    9596           0 :             int ref_mv_idx = mbmi->ref_mv_idx;
    9597             :             // Special case: NEAR_NEWMV and NEW_NEARMV modes use
    9598             :             // 1 + mbmi->ref_mv_idx (like NEARMV) instead of
    9599             :             // mbmi->ref_mv_idx (like NEWMV)
    9600           0 :             if (mbmi->mode == NEAR_NEWMV || mbmi->mode == NEW_NEARMV)
    9601           0 :               ref_mv_idx = 1 + mbmi->ref_mv_idx;
    9602             : 
    9603           0 :             if (compound_ref0_mode(mbmi->mode) == NEWMV) {
    9604           0 :               int_mv this_mv =
    9605           0 :                   mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
    9606           0 :               clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
    9607           0 :                            xd->n8_h << MI_SIZE_LOG2, xd);
    9608           0 :               mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0] = this_mv;
    9609           0 :             } else if (compound_ref0_mode(mbmi->mode) == NEARESTMV) {
    9610           0 :               int_mv this_mv =
    9611           0 :                   mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv;
    9612           0 :               clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
    9613           0 :                            xd->n8_h << MI_SIZE_LOG2, xd);
    9614           0 :               mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0] = this_mv;
    9615             :             }
    9616             : 
    9617           0 :             if (compound_ref1_mode(mbmi->mode) == NEWMV) {
    9618           0 :               int_mv this_mv =
    9619           0 :                   mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
    9620           0 :               clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
    9621           0 :                            xd->n8_h << MI_SIZE_LOG2, xd);
    9622           0 :               mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0] = this_mv;
    9623           0 :             } else if (compound_ref1_mode(mbmi->mode) == NEARESTMV) {
    9624           0 :               int_mv this_mv =
    9625           0 :                   mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
    9626           0 :               clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
    9627           0 :                            xd->n8_h << MI_SIZE_LOG2, xd);
    9628           0 :               mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0] = this_mv;
    9629             :             }
    9630             :           } else {
    9631             : #endif  // CONFIG_EXT_INTER
    9632           0 :             for (ref = 0; ref < 1 + comp_pred; ++ref) {
    9633           0 :               int_mv this_mv =
    9634             :                   (ref == 0)
    9635           0 :                       ? mbmi_ext->ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
    9636           0 :                             .this_mv
    9637           0 :                       : mbmi_ext->ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
    9638           0 :                             .comp_mv;
    9639           0 :               clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
    9640           0 :                            xd->n8_h << MI_SIZE_LOG2, xd);
    9641           0 :               mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0] = this_mv;
    9642             :             }
    9643             : #if CONFIG_EXT_INTER
    9644             :           }
    9645             : #endif
    9646             : 
    9647           0 :           cur_mv =
    9648           0 :               mbmi_ext->ref_mv_stack[ref_frame][mbmi->ref_mv_idx + idx_offset]
    9649           0 :                   .this_mv;
    9650           0 :           clamp_mv2(&cur_mv.as_mv, xd);
    9651             : 
    9652           0 :           if (!mv_check_bounds(&x->mv_limits, &cur_mv.as_mv)) {
    9653           0 :             int_mv dummy_single_newmv[TOTAL_REFS_PER_FRAME] = { { 0 } };
    9654             : #if CONFIG_EXT_INTER
    9655           0 :             int dummy_single_newmv_rate[TOTAL_REFS_PER_FRAME] = { 0 };
    9656             : #endif  // CONFIG_EXT_INTER
    9657             : 
    9658           0 :             frame_mv[NEARMV][ref_frame] = cur_mv;
    9659           0 :             av1_init_rd_stats(&tmp_rd_stats);
    9660             : 
    9661             :             // Point to variables that are not maintained between iterations
    9662           0 :             args.single_newmv = dummy_single_newmv;
    9663             : #if CONFIG_EXT_INTER
    9664           0 :             args.single_newmv_rate = dummy_single_newmv_rate;
    9665           0 :             args.modelled_rd = NULL;
    9666             : #endif  // CONFIG_EXT_INTER
    9667           0 :             tmp_alt_rd = handle_inter_mode(
    9668             :                 cpi, x, bsize, &tmp_rd_stats, &tmp_rd_stats_y, &tmp_rd_stats_uv,
    9669             :                 &dummy_disable_skip, frame_mv, mi_row, mi_col, &args, best_rd);
    9670             :             // Prevent pointers from escaping local scope
    9671           0 :             args.single_newmv = NULL;
    9672             : #if CONFIG_EXT_INTER
    9673           0 :             args.single_newmv_rate = NULL;
    9674             : #endif  // CONFIG_EXT_INTER
    9675             :           }
    9676             : 
    9677           0 :           for (i = 0; i < mbmi->ref_mv_idx; ++i) {
    9678           0 :             uint8_t drl1_ctx = 0;
    9679           0 :             drl1_ctx = av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type],
    9680             :                                    i + idx_offset);
    9681           0 :             tmp_rd_stats.rate +=
    9682           0 :                 (tmp_rd_stats.rate < INT_MAX ? cpi->drl_mode_cost0[drl1_ctx][1]
    9683           0 :                                              : 0);
    9684             :           }
    9685             : 
    9686           0 :           if (mbmi_ext->ref_mv_count[ref_frame_type] >
    9687           0 :                   mbmi->ref_mv_idx + idx_offset + 1 &&
    9688           0 :               ref_idx < ref_set - 1) {
    9689           0 :             uint8_t drl1_ctx =
    9690           0 :                 av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type],
    9691           0 :                             mbmi->ref_mv_idx + idx_offset);
    9692           0 :             tmp_rd_stats.rate +=
    9693           0 :                 (tmp_rd_stats.rate < INT_MAX ? cpi->drl_mode_cost0[drl1_ctx][0]
    9694           0 :                                              : 0);
    9695             :           }
    9696             : 
    9697           0 :           if (tmp_alt_rd < INT64_MAX) {
    9698             : #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
    9699           0 :             tmp_alt_rd = RDCOST(x->rdmult, x->rddiv, tmp_rd_stats.rate,
    9700             :                                 tmp_rd_stats.dist);
    9701             : #else
    9702             :             if (RDCOST(x->rdmult, x->rddiv,
    9703             :                        tmp_rd_stats_y.rate + tmp_rd_stats_uv.rate,
    9704             :                        tmp_rd_stats.dist) <
    9705             :                 RDCOST(x->rdmult, x->rddiv, 0, tmp_rd_stats.sse))
    9706             :               tmp_alt_rd =
    9707             :                   RDCOST(x->rdmult, x->rddiv,
    9708             :                          tmp_rd_stats.rate +
    9709             :                              av1_cost_bit(av1_get_skip_prob(cm, xd), 0),
    9710             :                          tmp_rd_stats.dist);
    9711             :             else
    9712             :               tmp_alt_rd =
    9713             :                   RDCOST(x->rdmult, x->rddiv,
    9714             :                          tmp_rd_stats.rate +
    9715             :                              av1_cost_bit(av1_get_skip_prob(cm, xd), 1) -
    9716             :                              tmp_rd_stats_y.rate - tmp_rd_stats_uv.rate,
    9717             :                          tmp_rd_stats.sse);
    9718             : #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
    9719             :           }
    9720             : 
    9721           0 :           if (tmp_ref_rd > tmp_alt_rd) {
    9722           0 :             rate2 = tmp_rd_stats.rate;
    9723           0 :             disable_skip = dummy_disable_skip;
    9724           0 :             distortion2 = tmp_rd_stats.dist;
    9725           0 :             skippable = tmp_rd_stats.skip;
    9726           0 :             rate_y = tmp_rd_stats_y.rate;
    9727           0 :             rate_uv = tmp_rd_stats_uv.rate;
    9728           0 :             total_sse = tmp_rd_stats.sse;
    9729           0 :             this_rd = tmp_alt_rd;
    9730           0 :             tmp_ref_rd = tmp_alt_rd;
    9731           0 :             backup_mbmi = *mbmi;
    9732           0 :             backup_skip = x->skip;
    9733             : #if CONFIG_DAALA_DIST && CONFIG_CB4X4
    9734             :             if (bsize < BLOCK_8X8) {
    9735             :               total_sse_y = tmp_rd_stats_y.sse;
    9736             :               distortion2_y = tmp_rd_stats_y.dist;
    9737             :             }
    9738             : #endif
    9739             : #if CONFIG_VAR_TX
    9740           0 :             for (i = 0; i < MAX_MB_PLANE; ++i)
    9741           0 :               memcpy(x->blk_skip_drl[i], x->blk_skip[i],
    9742           0 :                      sizeof(uint8_t) * ctx->num_4x4_blk);
    9743             : #endif  // CONFIG_VAR_TX
    9744             :           } else {
    9745           0 :             *mbmi = backup_mbmi;
    9746           0 :             x->skip = backup_skip;
    9747             :           }
    9748             :         }
    9749             : 
    9750           0 :         frame_mv[NEARMV][ref_frame] = backup_mv;
    9751           0 :         frame_mv[NEWMV][ref_frame] = backup_fmv[0];
    9752           0 :         if (comp_pred) frame_mv[NEWMV][second_ref_frame] = backup_fmv[1];
    9753             : #if CONFIG_VAR_TX
    9754           0 :         for (i = 0; i < MAX_MB_PLANE; ++i)
    9755           0 :           memcpy(x->blk_skip[i], x->blk_skip_drl[i],
    9756           0 :                  sizeof(uint8_t) * ctx->num_4x4_blk);
    9757             : #endif  // CONFIG_VAR_TX
    9758             :       }
    9759           0 :       mbmi_ext->ref_mvs[ref_frame][0] = backup_ref_mv[0];
    9760           0 :       if (comp_pred) mbmi_ext->ref_mvs[second_ref_frame][0] = backup_ref_mv[1];
    9761             : 
    9762           0 :       if (this_rd == INT64_MAX) continue;
    9763             : 
    9764             : #if SUB8X8_COMP_REF
    9765             :       compmode_cost = av1_cost_bit(comp_mode_p, comp_pred);
    9766             : #else
    9767           0 :       if (mbmi->sb_type != BLOCK_4X4)
    9768           0 :         compmode_cost = av1_cost_bit(comp_mode_p, comp_pred);
    9769             : #endif  // SUB8X8_COMP_REF
    9770             : 
    9771           0 :       if (cm->reference_mode == REFERENCE_MODE_SELECT) rate2 += compmode_cost;
    9772             :     }
    9773             : 
    9774             :     // Estimate the reference frame signaling cost and add it
    9775             :     // to the rolling cost variable.
    9776           0 :     if (comp_pred) {
    9777           0 :       rate2 += ref_costs_comp[ref_frame];
    9778             : #if CONFIG_EXT_REFS
    9779           0 :       rate2 += ref_costs_comp[second_ref_frame];
    9780             : #endif  // CONFIG_EXT_REFS
    9781             :     } else {
    9782           0 :       rate2 += ref_costs_single[ref_frame];
    9783             :     }
    9784             : 
    9785             : #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
    9786           0 :     if (ref_frame == INTRA_FRAME) {
    9787             : #else
    9788             :     if (!disable_skip) {
    9789             : #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
    9790           0 :       if (skippable) {
    9791             :         // Back out the coefficient coding costs
    9792           0 :         rate2 -= (rate_y + rate_uv);
    9793           0 :         rate_y = 0;
    9794           0 :         rate_uv = 0;
    9795             :         // Cost the skip mb case
    9796           0 :         rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
    9797           0 :       } else if (ref_frame != INTRA_FRAME && !xd->lossless[mbmi->segment_id]) {
    9798           0 :         if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv + rate_skip0,
    9799             :                    distortion2) <
    9800           0 :             RDCOST(x->rdmult, x->rddiv, rate_skip1, total_sse)) {
    9801             :           // Add in the cost of the no skip flag.
    9802           0 :           rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
    9803             :         } else {
    9804             :           // FIXME(rbultje) make this work for splitmv also
    9805           0 :           rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
    9806           0 :           distortion2 = total_sse;
    9807           0 :           assert(total_sse >= 0);
    9808           0 :           rate2 -= (rate_y + rate_uv);
    9809           0 :           this_skip2 = 1;
    9810           0 :           rate_y = 0;
    9811           0 :           rate_uv = 0;
    9812             : #if CONFIG_DAALA_DIST && CONFIG_CB4X4
    9813             :           if (bsize < BLOCK_8X8) distortion2_y = total_sse_y;
    9814             : #endif
    9815             :         }
    9816             :       } else {
    9817             :         // Add in the cost of the no skip flag.
    9818           0 :         rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
    9819             :       }
    9820             : 
    9821             :       // Calculate the final RD estimate for this mode.
    9822           0 :       this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
    9823             : #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
    9824             :     } else {
    9825           0 :       this_skip2 = mbmi->skip;
    9826           0 :       this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
    9827           0 :       if (this_skip2) {
    9828           0 :         rate_y = 0;
    9829           0 :         rate_uv = 0;
    9830             :       }
    9831             : #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
    9832             :     }
    9833             : 
    9834           0 :     if (ref_frame == INTRA_FRAME) {
    9835             :       // Keep record of best intra rd
    9836           0 :       if (this_rd < best_intra_rd) {
    9837           0 :         best_intra_rd = this_rd;
    9838           0 :         best_intra_mode = mbmi->mode;
    9839             :       }
    9840             : #if CONFIG_EXT_INTER && CONFIG_INTERINTRA
    9841           0 :     } else if (second_ref_frame == NONE_FRAME) {
    9842           0 :       if (this_rd < best_single_inter_rd) {
    9843           0 :         best_single_inter_rd = this_rd;
    9844           0 :         best_single_inter_ref = mbmi->ref_frame[0];
    9845             :       }
    9846             : #endif  // CONFIG_EXT_INTER && CONFIG_INTERINTRA
    9847             :     }
    9848             : 
    9849           0 :     if (!disable_skip && ref_frame == INTRA_FRAME) {
    9850           0 :       for (i = 0; i < REFERENCE_MODES; ++i)
    9851           0 :         best_pred_rd[i] = AOMMIN(best_pred_rd[i], this_rd);
    9852             :     }
    9853             : 
    9854             :     // Did this mode help.. i.e. is it the new best mode
    9855           0 :     if (this_rd < best_rd || x->skip) {
    9856           0 :       if (!mode_excluded) {
    9857             :         // Note index of best mode so far
    9858           0 :         best_mode_index = mode_index;
    9859             : 
    9860           0 :         if (ref_frame == INTRA_FRAME) {
    9861             :           /* required for left and above block mv */
    9862           0 :           mbmi->mv[0].as_int = 0;
    9863             :         } else {
    9864           0 :           best_pred_sse = x->pred_sse[ref_frame];
    9865             :         }
    9866             : 
    9867           0 :         rd_cost->rate = rate2;
    9868             : #if CONFIG_SUPERTX
    9869             :         if (x->skip)
    9870             :           *returnrate_nocoef = rate2;
    9871             :         else
    9872             :           *returnrate_nocoef = rate2 - rate_y - rate_uv;
    9873             :         *returnrate_nocoef -= av1_cost_bit(
    9874             :             av1_get_skip_prob(cm, xd), disable_skip || skippable || this_skip2);
    9875             :         *returnrate_nocoef -= av1_cost_bit(av1_get_intra_inter_prob(cm, xd),
    9876             :                                            mbmi->ref_frame[0] != INTRA_FRAME);
    9877             : #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
    9878             : #if CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
    9879             :         MODE_INFO *const mi = xd->mi[0];
    9880             :         const MOTION_MODE motion_allowed = motion_mode_allowed(
    9881             : #if CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION
    9882             :             0, xd->global_motion,
    9883             : #endif  // CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION
    9884             :             mi);
    9885             :         if (motion_allowed == WARPED_CAUSAL)
    9886             :           *returnrate_nocoef -= cpi->motion_mode_cost[bsize][mbmi->motion_mode];
    9887             :         else if (motion_allowed == OBMC_CAUSAL)
    9888             :           *returnrate_nocoef -=
    9889             :               cpi->motion_mode_cost1[bsize][mbmi->motion_mode];
    9890             : #else
    9891             :         *returnrate_nocoef -= cpi->motion_mode_cost[bsize][mbmi->motion_mode];
    9892             : #endif  // CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION
    9893             : #endif  // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
    9894             : #endif  // CONFIG_SUPERTX
    9895           0 :         rd_cost->dist = distortion2;
    9896           0 :         rd_cost->rdcost = this_rd;
    9897           0 :         best_rd = this_rd;
    9898           0 :         best_mbmode = *mbmi;
    9899           0 :         best_skip2 = this_skip2;
    9900           0 :         best_mode_skippable = skippable;
    9901           0 :         best_rate_y = rate_y + av1_cost_bit(av1_get_skip_prob(cm, xd),
    9902             :                                             this_skip2 || skippable);
    9903           0 :         best_rate_uv = rate_uv;
    9904             : #if CONFIG_DAALA_DIST && CONFIG_CB4X4
    9905             :         if (bsize < BLOCK_8X8) rd_cost->dist_y = distortion2_y;
    9906             : #endif
    9907             : #if CONFIG_VAR_TX
    9908           0 :         for (i = 0; i < MAX_MB_PLANE; ++i)
    9909           0 :           memcpy(ctx->blk_skip[i], x->blk_skip[i],
    9910           0 :                  sizeof(uint8_t) * ctx->num_4x4_blk);
    9911             : #endif  // CONFIG_VAR_TX
    9912             :       }
    9913             :     }
    9914             : 
    9915             :     /* keep record of best compound/single-only prediction */
    9916           0 :     if (!disable_skip && ref_frame != INTRA_FRAME) {
    9917             :       int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
    9918             : 
    9919           0 :       if (cm->reference_mode == REFERENCE_MODE_SELECT) {
    9920           0 :         single_rate = rate2 - compmode_cost;
    9921           0 :         hybrid_rate = rate2;
    9922             :       } else {
    9923           0 :         single_rate = rate2;
    9924           0 :         hybrid_rate = rate2 + compmode_cost;
    9925             :       }
    9926             : 
    9927           0 :       single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
    9928           0 :       hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
    9929             : 
    9930           0 :       if (!comp_pred) {
    9931           0 :         if (single_rd < best_pred_rd[SINGLE_REFERENCE])
    9932           0 :           best_pred_rd[SINGLE_REFERENCE] = single_rd;
    9933             :       } else {
    9934           0 :         if (single_rd < best_pred_rd[COMPOUND_REFERENCE])
    9935           0 :           best_pred_rd[COMPOUND_REFERENCE] = single_rd;
    9936             :       }
    9937           0 :       if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
    9938           0 :         best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
    9939             :     }
    9940             : 
    9941           0 :     if (x->skip && !comp_pred) break;
    9942             :   }
    9943             : 
    9944           0 :   if (xd->lossless[mbmi->segment_id] == 0 && best_mode_index >= 0 &&
    9945           0 :       ((sf->tx_type_search.fast_inter_tx_type_search == 1 &&
    9946           0 :         is_inter_mode(best_mbmode.mode)) ||
    9947           0 :        (sf->tx_type_search.fast_intra_tx_type_search == 1 &&
    9948           0 :         !is_inter_mode(best_mbmode.mode)))) {
    9949           0 :     int skip_blk = 0;
    9950             :     RD_STATS rd_stats_y, rd_stats_uv;
    9951             : 
    9952           0 :     x->use_default_inter_tx_type = 0;
    9953           0 :     x->use_default_intra_tx_type = 0;
    9954             : 
    9955           0 :     *mbmi = best_mbmode;
    9956             : 
    9957           0 :     set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
    9958             : 
    9959             :     // Select prediction reference frames.
    9960           0 :     for (i = 0; i < MAX_MB_PLANE; i++) {
    9961           0 :       xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
    9962           0 :       if (has_second_ref(mbmi))
    9963           0 :         xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
    9964             :     }
    9965             : 
    9966           0 :     if (is_inter_mode(mbmi->mode)) {
    9967           0 :       av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
    9968             : #if CONFIG_MOTION_VAR
    9969           0 :       if (mbmi->motion_mode == OBMC_CAUSAL) {
    9970           0 :         av1_build_obmc_inter_prediction(
    9971             :             cm, xd, mi_row, mi_col, args.above_pred_buf, args.above_pred_stride,
    9972             :             args.left_pred_buf, args.left_pred_stride);
    9973             :       }
    9974             : #endif  // CONFIG_MOTION_VAR
    9975           0 :       av1_subtract_plane(x, bsize, 0);
    9976             : #if CONFIG_VAR_TX
    9977           0 :       if (cm->tx_mode == TX_MODE_SELECT || xd->lossless[mbmi->segment_id]) {
    9978           0 :         select_tx_type_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
    9979             :       } else {
    9980             :         int idx, idy;
    9981           0 :         super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
    9982           0 :         for (idy = 0; idy < xd->n8_h; ++idy)
    9983           0 :           for (idx = 0; idx < xd->n8_w; ++idx)
    9984           0 :             mbmi->inter_tx_size[idy][idx] = mbmi->tx_size;
    9985           0 :         memset(x->blk_skip[0], rd_stats_y.skip,
    9986           0 :                sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
    9987             :       }
    9988             : 
    9989           0 :       inter_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
    9990             : #else
    9991             :       super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
    9992             :       super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
    9993             : #endif  // CONFIG_VAR_TX
    9994             :     } else {
    9995           0 :       super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
    9996           0 :       super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
    9997             :     }
    9998             : 
    9999           0 :     if (RDCOST(x->rdmult, x->rddiv, rd_stats_y.rate + rd_stats_uv.rate,
   10000             :                (rd_stats_y.dist + rd_stats_uv.dist)) >
   10001           0 :         RDCOST(x->rdmult, x->rddiv, 0, (rd_stats_y.sse + rd_stats_uv.sse))) {
   10002           0 :       skip_blk = 1;
   10003           0 :       rd_stats_y.rate = av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
   10004           0 :       rd_stats_uv.rate = 0;
   10005           0 :       rd_stats_y.dist = rd_stats_y.sse;
   10006           0 :       rd_stats_uv.dist = rd_stats_uv.sse;
   10007             :     } else {
   10008           0 :       skip_blk = 0;
   10009           0 :       rd_stats_y.rate += av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
   10010             :     }
   10011             : 
   10012           0 :     if (RDCOST(x->rdmult, x->rddiv, best_rate_y + best_rate_uv, rd_cost->dist) >
   10013           0 :         RDCOST(x->rdmult, x->rddiv, rd_stats_y.rate + rd_stats_uv.rate,
   10014             :                (rd_stats_y.dist + rd_stats_uv.dist))) {
   10015             : #if CONFIG_VAR_TX
   10016             :       int idx, idy;
   10017             : #endif  // CONFIG_VAR_TX
   10018           0 :       best_mbmode.tx_type = mbmi->tx_type;
   10019           0 :       best_mbmode.tx_size = mbmi->tx_size;
   10020             : #if CONFIG_VAR_TX
   10021           0 :       for (idy = 0; idy < xd->n8_h; ++idy)
   10022           0 :         for (idx = 0; idx < xd->n8_w; ++idx)
   10023           0 :           best_mbmode.inter_tx_size[idy][idx] = mbmi->inter_tx_size[idy][idx];
   10024             : 
   10025           0 :       for (i = 0; i < MAX_MB_PLANE; ++i)
   10026           0 :         memcpy(ctx->blk_skip[i], x->blk_skip[i],
   10027           0 :                sizeof(uint8_t) * ctx->num_4x4_blk);
   10028             : 
   10029           0 :       best_mbmode.min_tx_size = mbmi->min_tx_size;
   10030             : #endif  // CONFIG_VAR_TX
   10031           0 :       rd_cost->rate +=
   10032           0 :           (rd_stats_y.rate + rd_stats_uv.rate - best_rate_y - best_rate_uv);
   10033           0 :       rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist;
   10034             : #if CONFIG_DAALA_DIST && CONFIG_CB4X4
   10035             :       if (bsize < BLOCK_8X8) rd_cost->dist_y = rd_stats_y.dist;
   10036             : #endif
   10037           0 :       rd_cost->rdcost =
   10038           0 :           RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
   10039           0 :       best_skip2 = skip_blk;
   10040             :     }
   10041             :   }
   10042             : 
   10043             : #if CONFIG_PALETTE
   10044             :   // Only try palette mode when the best mode so far is an intra mode.
   10045           0 :   if (try_palette && !is_inter_mode(best_mbmode.mode)) {
   10046           0 :     int rate2 = 0;
   10047             : #if CONFIG_SUPERTX
   10048             :     int best_rate_nocoef;
   10049             : #endif  // CONFIG_SUPERTX
   10050           0 :     int64_t distortion2 = 0, best_rd_palette = best_rd, this_rd,
   10051           0 :             best_model_rd_palette = INT64_MAX;
   10052           0 :     int skippable = 0, rate_overhead_palette = 0;
   10053             :     RD_STATS rd_stats_y;
   10054             :     TX_SIZE uv_tx;
   10055           0 :     uint8_t *const best_palette_color_map =
   10056           0 :         x->palette_buffer->best_palette_color_map;
   10057           0 :     uint8_t *const color_map = xd->plane[0].color_index_map;
   10058           0 :     MB_MODE_INFO best_mbmi_palette = best_mbmode;
   10059             : 
   10060           0 :     mbmi->mode = DC_PRED;
   10061           0 :     mbmi->uv_mode = DC_PRED;
   10062           0 :     mbmi->ref_frame[0] = INTRA_FRAME;
   10063           0 :     mbmi->ref_frame[1] = NONE_FRAME;
   10064           0 :     rate_overhead_palette = rd_pick_palette_intra_sby(
   10065             :         cpi, x, bsize, palette_ctx, intra_mode_cost[DC_PRED],
   10066             :         &best_mbmi_palette, best_palette_color_map, &best_rd_palette,
   10067             :         &best_model_rd_palette, NULL, NULL, NULL, NULL);
   10068           0 :     if (pmi->palette_size[0] == 0) goto PALETTE_EXIT;
   10069           0 :     memcpy(color_map, best_palette_color_map,
   10070           0 :            rows * cols * sizeof(best_palette_color_map[0]));
   10071           0 :     super_block_yrd(cpi, x, &rd_stats_y, bsize, best_rd);
   10072           0 :     if (rd_stats_y.rate == INT_MAX) goto PALETTE_EXIT;
   10073           0 :     uv_tx = uv_txsize_lookup[bsize][mbmi->tx_size][xd->plane[1].subsampling_x]
   10074           0 :                             [xd->plane[1].subsampling_y];
   10075           0 :     if (rate_uv_intra[uv_tx] == INT_MAX) {
   10076           0 :       choose_intra_uv_mode(cpi, x, ctx, bsize, uv_tx, &rate_uv_intra[uv_tx],
   10077           0 :                            &rate_uv_tokenonly[uv_tx], &dist_uvs[uv_tx],
   10078           0 :                            &skip_uvs[uv_tx], &mode_uv[uv_tx]);
   10079           0 :       pmi_uv[uv_tx] = *pmi;
   10080             : #if CONFIG_EXT_INTRA
   10081           0 :       uv_angle_delta[uv_tx] = mbmi->angle_delta[1];
   10082             : #endif  // CONFIG_EXT_INTRA
   10083             : #if CONFIG_FILTER_INTRA
   10084             :       filter_intra_mode_info_uv[uv_tx] = mbmi->filter_intra_mode_info;
   10085             : #endif  // CONFIG_FILTER_INTRA
   10086             :     }
   10087           0 :     mbmi->uv_mode = mode_uv[uv_tx];
   10088           0 :     pmi->palette_size[1] = pmi_uv[uv_tx].palette_size[1];
   10089           0 :     if (pmi->palette_size[1] > 0) {
   10090           0 :       memcpy(pmi->palette_colors + PALETTE_MAX_SIZE,
   10091           0 :              pmi_uv[uv_tx].palette_colors + PALETTE_MAX_SIZE,
   10092             :              2 * PALETTE_MAX_SIZE * sizeof(pmi->palette_colors[0]));
   10093             :     }
   10094             : #if CONFIG_EXT_INTRA
   10095           0 :     mbmi->angle_delta[1] = uv_angle_delta[uv_tx];
   10096             : #endif  // CONFIG_EXT_INTRA
   10097             : #if CONFIG_FILTER_INTRA
   10098             :     mbmi->filter_intra_mode_info.use_filter_intra_mode[1] =
   10099             :         filter_intra_mode_info_uv[uv_tx].use_filter_intra_mode[1];
   10100             :     if (filter_intra_mode_info_uv[uv_tx].use_filter_intra_mode[1]) {
   10101             :       mbmi->filter_intra_mode_info.filter_intra_mode[1] =
   10102             :           filter_intra_mode_info_uv[uv_tx].filter_intra_mode[1];
   10103             :     }
   10104             : #endif  // CONFIG_FILTER_INTRA
   10105           0 :     skippable = rd_stats_y.skip && skip_uvs[uv_tx];
   10106           0 :     distortion2 = rd_stats_y.dist + dist_uvs[uv_tx];
   10107           0 :     rate2 = rd_stats_y.rate + rate_overhead_palette + rate_uv_intra[uv_tx];
   10108           0 :     rate2 += ref_costs_single[INTRA_FRAME];
   10109             : 
   10110           0 :     if (skippable) {
   10111           0 :       rate2 -= (rd_stats_y.rate + rate_uv_tokenonly[uv_tx]);
   10112             : #if CONFIG_SUPERTX
   10113             :       best_rate_nocoef = rate2;
   10114             : #endif  // CONFIG_SUPERTX
   10115           0 :       rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
   10116             :     } else {
   10117             : #if CONFIG_SUPERTX
   10118             :       best_rate_nocoef = rate2 - (rd_stats_y.rate + rate_uv_tokenonly[uv_tx]);
   10119             : #endif  // CONFIG_SUPERTX
   10120           0 :       rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
   10121             :     }
   10122           0 :     this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
   10123           0 :     if (this_rd < best_rd) {
   10124           0 :       best_mode_index = 3;
   10125           0 :       mbmi->mv[0].as_int = 0;
   10126           0 :       rd_cost->rate = rate2;
   10127             : #if CONFIG_SUPERTX
   10128             :       *returnrate_nocoef = best_rate_nocoef;
   10129             : #endif  // CONFIG_SUPERTX
   10130           0 :       rd_cost->dist = distortion2;
   10131           0 :       rd_cost->rdcost = this_rd;
   10132           0 :       best_rd = this_rd;
   10133           0 :       best_mbmode = *mbmi;
   10134           0 :       best_skip2 = 0;
   10135           0 :       best_mode_skippable = skippable;
   10136             :     }
   10137             :   }
   10138             : PALETTE_EXIT:
   10139             : #endif  // CONFIG_PALETTE
   10140             : 
   10141             : #if CONFIG_FILTER_INTRA
   10142             :   // TODO(huisu): filter-intra is turned off in lossless mode for now to
   10143             :   // avoid a unit test failure
   10144             :   if (!xd->lossless[mbmi->segment_id] &&
   10145             : #if CONFIG_PALETTE
   10146             :       pmi->palette_size[0] == 0 &&
   10147             : #endif  // CONFIG_PALETTE
   10148             :       !dc_skipped && best_mode_index >= 0 &&
   10149             :       best_intra_rd < (best_rd + (best_rd >> 3))) {
   10150             :     pick_filter_intra_interframe(
   10151             :         cpi, x, ctx, bsize, mi_row, mi_col, rate_uv_intra, rate_uv_tokenonly,
   10152             :         dist_uvs, skip_uvs, mode_uv, filter_intra_mode_info_uv,
   10153             : #if CONFIG_EXT_INTRA
   10154             :         uv_angle_delta,
   10155             : #endif  // CONFIG_EXT_INTRA
   10156             : #if CONFIG_PALETTE
   10157             :         pmi_uv, palette_ctx,
   10158             : #endif  // CONFIG_PALETTE
   10159             :         0, ref_costs_single, &best_rd, &best_intra_rd, &best_intra_mode,
   10160             :         &best_mode_index, &best_skip2, &best_mode_skippable,
   10161             : #if CONFIG_SUPERTX
   10162             :         returnrate_nocoef,
   10163             : #endif  // CONFIG_SUPERTX
   10164             :         best_pred_rd, &best_mbmode, rd_cost);
   10165             :   }
   10166             : #endif  // CONFIG_FILTER_INTRA
   10167             : 
   10168             :   // The inter modes' rate costs are not calculated precisely in some cases.
   10169             :   // Therefore, sometimes, NEWMV is chosen instead of NEARESTMV, NEARMV, and
   10170             :   // ZEROMV. Here, checks are added for those cases, and the mode decisions
   10171             :   // are corrected.
   10172           0 :   if (best_mbmode.mode == NEWMV
   10173             : #if CONFIG_EXT_INTER
   10174           0 :       || best_mbmode.mode == NEW_NEWMV
   10175             : #endif  // CONFIG_EXT_INTER
   10176             :       ) {
   10177           0 :     const MV_REFERENCE_FRAME refs[2] = { best_mbmode.ref_frame[0],
   10178           0 :                                          best_mbmode.ref_frame[1] };
   10179           0 :     int comp_pred_mode = refs[1] > INTRA_FRAME;
   10180             :     int_mv zeromv[2];
   10181           0 :     const uint8_t rf_type = av1_ref_frame_type(best_mbmode.ref_frame);
   10182             : #if CONFIG_GLOBAL_MOTION
   10183           0 :     zeromv[0].as_int = gm_get_motion_vector(&cm->global_motion[refs[0]],
   10184             :                                             cm->allow_high_precision_mv, bsize,
   10185             :                                             mi_col, mi_row, 0)
   10186           0 :                            .as_int;
   10187           0 :     zeromv[1].as_int = comp_pred_mode
   10188           0 :                            ? gm_get_motion_vector(&cm->global_motion[refs[1]],
   10189             :                                                   cm->allow_high_precision_mv,
   10190             :                                                   bsize, mi_col, mi_row, 0)
   10191             :                                  .as_int
   10192           0 :                            : 0;
   10193             : #else
   10194             :     zeromv[0].as_int = 0;
   10195             :     zeromv[1].as_int = 0;
   10196             : #endif  // CONFIG_GLOBAL_MOTION
   10197           0 :     if (!comp_pred_mode) {
   10198           0 :       int ref_set = (mbmi_ext->ref_mv_count[rf_type] >= 2)
   10199           0 :                         ? AOMMIN(2, mbmi_ext->ref_mv_count[rf_type] - 2)
   10200           0 :                         : INT_MAX;
   10201             : 
   10202           0 :       for (i = 0; i <= ref_set && ref_set != INT_MAX; ++i) {
   10203           0 :         int_mv cur_mv = mbmi_ext->ref_mv_stack[rf_type][i + 1].this_mv;
   10204           0 :         if (cur_mv.as_int == best_mbmode.mv[0].as_int) {
   10205           0 :           best_mbmode.mode = NEARMV;
   10206           0 :           best_mbmode.ref_mv_idx = i;
   10207             :         }
   10208             :       }
   10209             : 
   10210           0 :       if (frame_mv[NEARESTMV][refs[0]].as_int == best_mbmode.mv[0].as_int)
   10211           0 :         best_mbmode.mode = NEARESTMV;
   10212           0 :       else if (best_mbmode.mv[0].as_int == zeromv[0].as_int)
   10213           0 :         best_mbmode.mode = ZEROMV;
   10214             :     } else {
   10215             :       int_mv nearestmv[2];
   10216             :       int_mv nearmv[2];
   10217             : 
   10218             : #if CONFIG_EXT_INTER
   10219           0 :       if (mbmi_ext->ref_mv_count[rf_type] > 1) {
   10220           0 :         nearmv[0] = mbmi_ext->ref_mv_stack[rf_type][1].this_mv;
   10221           0 :         nearmv[1] = mbmi_ext->ref_mv_stack[rf_type][1].comp_mv;
   10222             :       } else {
   10223           0 :         nearmv[0] = frame_mv[NEARMV][refs[0]];
   10224           0 :         nearmv[1] = frame_mv[NEARMV][refs[1]];
   10225             :       }
   10226             : #else
   10227             :       int ref_set = (mbmi_ext->ref_mv_count[rf_type] >= 2)
   10228             :                         ? AOMMIN(2, mbmi_ext->ref_mv_count[rf_type] - 2)
   10229             :                         : INT_MAX;
   10230             : 
   10231             :       for (i = 0; i <= ref_set && ref_set != INT_MAX; ++i) {
   10232             :         nearmv[0] = mbmi_ext->ref_mv_stack[rf_type][i + 1].this_mv;
   10233             :         nearmv[1] = mbmi_ext->ref_mv_stack[rf_type][i + 1].comp_mv;
   10234             : 
   10235             :         if (nearmv[0].as_int == best_mbmode.mv[0].as_int &&
   10236             :             nearmv[1].as_int == best_mbmode.mv[1].as_int) {
   10237             :           best_mbmode.mode = NEARMV;
   10238             :           best_mbmode.ref_mv_idx = i;
   10239             :         }
   10240             :       }
   10241             : #endif  // CONFIG_EXT_INTER
   10242           0 :       if (mbmi_ext->ref_mv_count[rf_type] >= 1) {
   10243           0 :         nearestmv[0] = mbmi_ext->ref_mv_stack[rf_type][0].this_mv;
   10244           0 :         nearestmv[1] = mbmi_ext->ref_mv_stack[rf_type][0].comp_mv;
   10245             :       } else {
   10246           0 :         nearestmv[0] = frame_mv[NEARESTMV][refs[0]];
   10247           0 :         nearestmv[1] = frame_mv[NEARESTMV][refs[1]];
   10248             :       }
   10249             : 
   10250           0 :       if (nearestmv[0].as_int == best_mbmode.mv[0].as_int &&
   10251           0 :           nearestmv[1].as_int == best_mbmode.mv[1].as_int) {
   10252             : #if CONFIG_EXT_INTER
   10253           0 :         best_mbmode.mode = NEAREST_NEARESTMV;
   10254             :       } else {
   10255           0 :         int ref_set = (mbmi_ext->ref_mv_count[rf_type] >= 2)
   10256           0 :                           ? AOMMIN(2, mbmi_ext->ref_mv_count[rf_type] - 2)
   10257           0 :                           : INT_MAX;
   10258             : 
   10259           0 :         for (i = 0; i <= ref_set && ref_set != INT_MAX; ++i) {
   10260           0 :           nearmv[0] = mbmi_ext->ref_mv_stack[rf_type][i + 1].this_mv;
   10261           0 :           nearmv[1] = mbmi_ext->ref_mv_stack[rf_type][i + 1].comp_mv;
   10262             : 
   10263             :           // Try switching to the NEAR_NEARMV mode
   10264           0 :           if (nearmv[0].as_int == best_mbmode.mv[0].as_int &&
   10265           0 :               nearmv[1].as_int == best_mbmode.mv[1].as_int) {
   10266           0 :             best_mbmode.mode = NEAR_NEARMV;
   10267           0 :             best_mbmode.ref_mv_idx = i;
   10268             :           }
   10269             :         }
   10270             : 
   10271           0 :         if (best_mbmode.mode == NEW_NEWMV &&
   10272           0 :             best_mbmode.mv[0].as_int == zeromv[0].as_int &&
   10273           0 :             best_mbmode.mv[1].as_int == zeromv[1].as_int)
   10274           0 :           best_mbmode.mode = ZERO_ZEROMV;
   10275             :       }
   10276             : #else
   10277             :         best_mbmode.mode = NEARESTMV;
   10278             :       } else if (best_mbmode.mv[0].as_int == zeromv[0].as_int &&
   10279             :                  best_mbmode.mv[1].as_int == zeromv[1].as_int) {
   10280             :         best_mbmode.mode = ZEROMV;
   10281             :       }
   10282             : #endif  // CONFIG_EXT_INTER
   10283             :     }
   10284             :   }
   10285             : 
   10286             :   // Make sure that the ref_mv_idx is only nonzero when we're
   10287             :   // using a mode which can support ref_mv_idx
   10288           0 :   if (best_mbmode.ref_mv_idx != 0 &&
   10289             : #if CONFIG_EXT_INTER
   10290           0 :       !(best_mbmode.mode == NEWMV || best_mbmode.mode == NEW_NEWMV ||
   10291           0 :         have_nearmv_in_inter_mode(best_mbmode.mode))) {
   10292             : #else
   10293             :       !(best_mbmode.mode == NEARMV || best_mbmode.mode == NEWMV)) {
   10294             : #endif
   10295           0 :     best_mbmode.ref_mv_idx = 0;
   10296             :   }
   10297             : 
   10298             :   {
   10299           0 :     int8_t ref_frame_type = av1_ref_frame_type(best_mbmode.ref_frame);
   10300           0 :     int16_t mode_ctx = mbmi_ext->mode_context[ref_frame_type];
   10301           0 :     if (mode_ctx & (1 << ALL_ZERO_FLAG_OFFSET)) {
   10302             :       int_mv zeromv[2];
   10303             : #if CONFIG_GLOBAL_MOTION
   10304           0 :       const MV_REFERENCE_FRAME refs[2] = { best_mbmode.ref_frame[0],
   10305           0 :                                            best_mbmode.ref_frame[1] };
   10306           0 :       zeromv[0].as_int = gm_get_motion_vector(&cm->global_motion[refs[0]],
   10307             :                                               cm->allow_high_precision_mv,
   10308             :                                               bsize, mi_col, mi_row, 0)
   10309           0 :                              .as_int;
   10310           0 :       zeromv[1].as_int = (refs[1] != NONE_FRAME)
   10311           0 :                              ? gm_get_motion_vector(&cm->global_motion[refs[1]],
   10312             :                                                     cm->allow_high_precision_mv,
   10313             :                                                     bsize, mi_col, mi_row, 0)
   10314             :                                    .as_int
   10315           0 :                              : 0;
   10316           0 :       lower_mv_precision(&zeromv[0].as_mv, cm->allow_high_precision_mv);
   10317           0 :       lower_mv_precision(&zeromv[1].as_mv, cm->allow_high_precision_mv);
   10318             : #else
   10319             :       zeromv[0].as_int = zeromv[1].as_int = 0;
   10320             : #endif  // CONFIG_GLOBAL_MOTION
   10321           0 :       if (best_mbmode.ref_frame[0] > INTRA_FRAME &&
   10322           0 :           best_mbmode.mv[0].as_int == zeromv[0].as_int &&
   10323             : #if CONFIG_EXT_INTER
   10324           0 :           (best_mbmode.ref_frame[1] <= INTRA_FRAME)
   10325             : #else
   10326             :           (best_mbmode.ref_frame[1] == NONE_FRAME ||
   10327             :            best_mbmode.mv[1].as_int == zeromv[1].as_int)
   10328             : #endif  // CONFIG_EXT_INTER
   10329             :               ) {
   10330           0 :         best_mbmode.mode = ZEROMV;
   10331             :       }
   10332             :     }
   10333             :   }
   10334             : 
   10335           0 :   if (best_mode_index < 0 || best_rd >= best_rd_so_far) {
   10336           0 :     rd_cost->rate = INT_MAX;
   10337           0 :     rd_cost->rdcost = INT64_MAX;
   10338           0 :     return;
   10339             :   }
   10340             : 
   10341             : #if CONFIG_DUAL_FILTER
   10342           0 :   assert((cm->interp_filter == SWITCHABLE) ||
   10343             :          (cm->interp_filter == best_mbmode.interp_filter[0]) ||
   10344             :          !is_inter_block(&best_mbmode));
   10345           0 :   assert((cm->interp_filter == SWITCHABLE) ||
   10346             :          (cm->interp_filter == best_mbmode.interp_filter[1]) ||
   10347             :          !is_inter_block(&best_mbmode));
   10348           0 :   if (best_mbmode.ref_frame[1] > INTRA_FRAME) {
   10349           0 :     assert((cm->interp_filter == SWITCHABLE) ||
   10350             :            (cm->interp_filter == best_mbmode.interp_filter[2]) ||
   10351             :            !is_inter_block(&best_mbmode));
   10352           0 :     assert((cm->interp_filter == SWITCHABLE) ||
   10353             :            (cm->interp_filter == best_mbmode.interp_filter[3]) ||
   10354             :            !is_inter_block(&best_mbmode));
   10355             :   }
   10356             : #else
   10357             :   assert((cm->interp_filter == SWITCHABLE) ||
   10358             :          (cm->interp_filter == best_mbmode.interp_filter) ||
   10359             :          !is_inter_block(&best_mbmode));
   10360             : #endif  // CONFIG_DUAL_FILTER
   10361             : 
   10362           0 :   if (!cpi->rc.is_src_frame_alt_ref)
   10363           0 :     av1_update_rd_thresh_fact(cm, tile_data->thresh_freq_fact,
   10364             :                               sf->adaptive_rd_thresh, bsize, best_mode_index);
   10365             : 
   10366             :   // macroblock modes
   10367           0 :   *mbmi = best_mbmode;
   10368           0 :   x->skip |= best_skip2;
   10369             : 
   10370             : // Note: this section is needed since the mode may have been forced to
   10371             : // ZEROMV by the all-zero mode handling of ref-mv.
   10372             : #if CONFIG_GLOBAL_MOTION
   10373           0 :   if (mbmi->mode == ZEROMV
   10374             : #if CONFIG_EXT_INTER
   10375           0 :       || mbmi->mode == ZERO_ZEROMV
   10376             : #endif  // CONFIG_EXT_INTER
   10377             :       ) {
   10378             : #if CONFIG_WARPED_MOTION || CONFIG_MOTION_VAR
   10379             :     // Correct the motion mode for ZEROMV
   10380           0 :     const MOTION_MODE last_motion_mode_allowed = motion_mode_allowed(
   10381             : #if SEPARATE_GLOBAL_MOTION
   10382           0 :         0, xd->global_motion,
   10383             : #endif  // SEPARATE_GLOBAL_MOTION
   10384           0 :         xd->mi[0]);
   10385           0 :     if (mbmi->motion_mode > last_motion_mode_allowed)
   10386           0 :       mbmi->motion_mode = last_motion_mode_allowed;
   10387             : #endif  // CONFIG_WARPED_MOTION || CONFIG_MOTION_VAR
   10388             : 
   10389             :     // Correct the interpolation filter for ZEROMV
   10390           0 :     if (is_nontrans_global_motion(xd)) {
   10391             : #if CONFIG_DUAL_FILTER
   10392           0 :       mbmi->interp_filter[0] = cm->interp_filter == SWITCHABLE
   10393             :                                    ? EIGHTTAP_REGULAR
   10394           0 :                                    : cm->interp_filter;
   10395           0 :       mbmi->interp_filter[1] = cm->interp_filter == SWITCHABLE
   10396             :                                    ? EIGHTTAP_REGULAR
   10397           0 :                                    : cm->interp_filter;
   10398             : #else
   10399             :       mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP_REGULAR
   10400             :                                                             : cm->interp_filter;
   10401             : #endif  // CONFIG_DUAL_FILTER
   10402             :     }
   10403             :   }
   10404             : #endif  // CONFIG_GLOBAL_MOTION
   10405             : 
   10406           0 :   for (i = 0; i < 1 + has_second_ref(mbmi); ++i) {
   10407           0 :     if (mbmi->mode != NEWMV)
   10408           0 :       mbmi->pred_mv[i].as_int = mbmi->mv[i].as_int;
   10409             :     else
   10410           0 :       mbmi->pred_mv[i].as_int = mbmi_ext->ref_mvs[mbmi->ref_frame[i]][0].as_int;
   10411             :   }
   10412             : 
   10413           0 :   for (i = 0; i < REFERENCE_MODES; ++i) {
   10414           0 :     if (best_pred_rd[i] == INT64_MAX)
   10415           0 :       best_pred_diff[i] = INT_MIN;
   10416             :     else
   10417           0 :       best_pred_diff[i] = best_rd - best_pred_rd[i];
   10418             :   }
   10419             : 
   10420           0 :   x->skip |= best_mode_skippable;
   10421             : 
   10422           0 :   assert(best_mode_index >= 0);
   10423             : 
   10424           0 :   store_coding_context(x, ctx, best_mode_index, best_pred_diff,
   10425             :                        best_mode_skippable);
   10426             : 
   10427             : #if CONFIG_PALETTE
   10428           0 :   if (cm->allow_screen_content_tools && pmi->palette_size[1] > 0) {
   10429           0 :     restore_uv_color_map(cpi, x);
   10430             :   }
   10431             : #endif  // CONFIG_PALETTE
   10432             : }
   10433             : 
   10434           0 : void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
   10435             :                                         TileDataEnc *tile_data, MACROBLOCK *x,
   10436             :                                         int mi_row, int mi_col,
   10437             :                                         RD_STATS *rd_cost, BLOCK_SIZE bsize,
   10438             :                                         PICK_MODE_CONTEXT *ctx,
   10439             :                                         int64_t best_rd_so_far) {
   10440           0 :   const AV1_COMMON *const cm = &cpi->common;
   10441           0 :   MACROBLOCKD *const xd = &x->e_mbd;
   10442           0 :   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   10443           0 :   unsigned char segment_id = mbmi->segment_id;
   10444           0 :   const int comp_pred = 0;
   10445             :   int i;
   10446             :   int64_t best_pred_diff[REFERENCE_MODES];
   10447             :   unsigned int ref_costs_single[TOTAL_REFS_PER_FRAME];
   10448             :   unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME];
   10449             :   aom_prob comp_mode_p;
   10450           0 :   InterpFilter best_filter = SWITCHABLE;
   10451           0 :   int64_t this_rd = INT64_MAX;
   10452           0 :   int rate2 = 0;
   10453           0 :   const int64_t distortion2 = 0;
   10454             :   (void)mi_row;
   10455             :   (void)mi_col;
   10456             : 
   10457           0 :   estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
   10458             :                            &comp_mode_p);
   10459             : 
   10460           0 :   for (i = 0; i < TOTAL_REFS_PER_FRAME; ++i) x->pred_sse[i] = INT_MAX;
   10461           0 :   for (i = LAST_FRAME; i < TOTAL_REFS_PER_FRAME; ++i)
   10462           0 :     x->pred_mv_sad[i] = INT_MAX;
   10463             : 
   10464           0 :   rd_cost->rate = INT_MAX;
   10465             : 
   10466           0 :   assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
   10467             : 
   10468             : #if CONFIG_PALETTE
   10469           0 :   mbmi->palette_mode_info.palette_size[0] = 0;
   10470           0 :   mbmi->palette_mode_info.palette_size[1] = 0;
   10471             : #endif  // CONFIG_PALETTE
   10472             : 
   10473             : #if CONFIG_FILTER_INTRA
   10474             :   mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
   10475             :   mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
   10476             : #endif  // CONFIG_FILTER_INTRA
   10477           0 :   mbmi->mode = ZEROMV;
   10478           0 :   mbmi->motion_mode = SIMPLE_TRANSLATION;
   10479           0 :   mbmi->uv_mode = DC_PRED;
   10480           0 :   mbmi->ref_frame[0] = LAST_FRAME;
   10481           0 :   mbmi->ref_frame[1] = NONE_FRAME;
   10482             : #if CONFIG_GLOBAL_MOTION
   10483           0 :   mbmi->mv[0].as_int =
   10484           0 :       gm_get_motion_vector(&cm->global_motion[mbmi->ref_frame[0]],
   10485             :                            cm->allow_high_precision_mv, bsize, mi_col, mi_row,
   10486             :                            0)
   10487           0 :           .as_int;
   10488             : #else   // CONFIG_GLOBAL_MOTION
   10489             :   mbmi->mv[0].as_int = 0;
   10490             : #endif  // CONFIG_GLOBAL_MOTION
   10491           0 :   mbmi->tx_size = max_txsize_lookup[bsize];
   10492           0 :   x->skip = 1;
   10493             : 
   10494           0 :   mbmi->ref_mv_idx = 0;
   10495           0 :   mbmi->pred_mv[0].as_int = 0;
   10496             : 
   10497           0 :   mbmi->motion_mode = SIMPLE_TRANSLATION;
   10498             : #if CONFIG_MOTION_VAR
   10499           0 :   av1_count_overlappable_neighbors(cm, xd, mi_row, mi_col);
   10500             : #endif
   10501             : #if CONFIG_WARPED_MOTION
   10502           0 :   if (is_motion_variation_allowed_bsize(bsize) && !has_second_ref(mbmi)) {
   10503             :     int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
   10504           0 :     mbmi->num_proj_ref[0] = findSamples(cm, xd, mi_row, mi_col, pts, pts_inref);
   10505             :   }
   10506             : #endif
   10507             : 
   10508           0 :   set_default_interp_filters(mbmi, cm->interp_filter);
   10509             : 
   10510           0 :   if (cm->interp_filter != SWITCHABLE) {
   10511           0 :     best_filter = cm->interp_filter;
   10512             :   } else {
   10513           0 :     best_filter = EIGHTTAP_REGULAR;
   10514           0 :     if (av1_is_interp_needed(xd) && av1_is_interp_search_needed(xd) &&
   10515           0 :         x->source_variance >= cpi->sf.disable_filter_search_var_thresh) {
   10516             :       int rs;
   10517           0 :       int best_rs = INT_MAX;
   10518           0 :       for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
   10519             : #if CONFIG_DUAL_FILTER
   10520             :         int k;
   10521           0 :         for (k = 0; k < 4; ++k) mbmi->interp_filter[k] = i;
   10522             : #else
   10523             :         mbmi->interp_filter = i;
   10524             : #endif  // CONFIG_DUAL_FILTER
   10525           0 :         rs = av1_get_switchable_rate(cpi, xd);
   10526           0 :         if (rs < best_rs) {
   10527           0 :           best_rs = rs;
   10528             : #if CONFIG_DUAL_FILTER
   10529           0 :           best_filter = mbmi->interp_filter[0];
   10530             : #else
   10531             :           best_filter = mbmi->interp_filter;
   10532             : #endif  // CONFIG_DUAL_FILTER
   10533             :         }
   10534             :       }
   10535             :     }
   10536             :   }
   10537             : // Set the appropriate filter
   10538             : #if CONFIG_DUAL_FILTER
   10539           0 :   for (i = 0; i < 4; ++i) mbmi->interp_filter[i] = best_filter;
   10540             : #else
   10541             :   mbmi->interp_filter = best_filter;
   10542             : #endif  // CONFIG_DUAL_FILTER
   10543           0 :   rate2 += av1_get_switchable_rate(cpi, xd);
   10544             : 
   10545           0 :   if (cm->reference_mode == REFERENCE_MODE_SELECT)
   10546           0 :     rate2 += av1_cost_bit(comp_mode_p, comp_pred);
   10547             : 
   10548             :   // Estimate the reference frame signaling cost and add it
   10549             :   // to the rolling cost variable.
   10550           0 :   rate2 += ref_costs_single[LAST_FRAME];
   10551           0 :   this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
   10552             : 
   10553           0 :   rd_cost->rate = rate2;
   10554           0 :   rd_cost->dist = distortion2;
   10555           0 :   rd_cost->rdcost = this_rd;
   10556             : #if CONFIG_DAALA_DIST && CONFIG_CB4X4
   10557             :   if (bsize < BLOCK_8X8) rd_cost->dist_y = distortion2;
   10558             : #endif
   10559           0 :   if (this_rd >= best_rd_so_far) {
   10560           0 :     rd_cost->rate = INT_MAX;
   10561           0 :     rd_cost->rdcost = INT64_MAX;
   10562           0 :     return;
   10563             :   }
   10564             : 
   10565             : #if CONFIG_DUAL_FILTER
   10566           0 :   assert((cm->interp_filter == SWITCHABLE) ||
   10567             :          (cm->interp_filter == mbmi->interp_filter[0]));
   10568             : #else
   10569             :   assert((cm->interp_filter == SWITCHABLE) ||
   10570             :          (cm->interp_filter == mbmi->interp_filter));
   10571             : #endif  // CONFIG_DUAL_FILTER
   10572             : 
   10573           0 :   av1_update_rd_thresh_fact(cm, tile_data->thresh_freq_fact,
   10574             :                             cpi->sf.adaptive_rd_thresh, bsize, THR_ZEROMV);
   10575             : 
   10576           0 :   av1_zero(best_pred_diff);
   10577             : 
   10578           0 :   store_coding_context(x, ctx, THR_ZEROMV, best_pred_diff, 0);
   10579             : }
   10580             : 
   10581             : #if CONFIG_MOTION_VAR
   10582             : // This function has a structure similar to av1_build_obmc_inter_prediction
   10583             : //
   10584             : // The OBMC predictor is computed as:
   10585             : //
   10586             : //  PObmc(x,y) =
   10587             : //    AOM_BLEND_A64(Mh(x),
   10588             : //                  AOM_BLEND_A64(Mv(y), P(x,y), PAbove(x,y)),
   10589             : //                  PLeft(x, y))
   10590             : //
   10591             : // Scaling up by AOM_BLEND_A64_MAX_ALPHA ** 2 and omitting the intermediate
   10592             : // rounding, this can be written as:
   10593             : //
   10594             : //  AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * Pobmc(x,y) =
   10595             : //    Mh(x) * Mv(y) * P(x,y) +
   10596             : //      Mh(x) * Cv(y) * Pabove(x,y) +
   10597             : //      AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
   10598             : //
   10599             : // Where :
   10600             : //
   10601             : //  Cv(y) = AOM_BLEND_A64_MAX_ALPHA - Mv(y)
   10602             : //  Ch(y) = AOM_BLEND_A64_MAX_ALPHA - Mh(y)
   10603             : //
   10604             : // This function computes 'wsrc' and 'mask' as:
   10605             : //
   10606             : //  wsrc(x, y) =
   10607             : //    AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * src(x, y) -
   10608             : //      Mh(x) * Cv(y) * Pabove(x,y) +
   10609             : //      AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
   10610             : //
   10611             : //  mask(x, y) = Mh(x) * Mv(y)
   10612             : //
   10613             : // These can then be used to efficiently approximate the error for any
   10614             : // predictor P in the context of the provided neighbouring predictors by
   10615             : // computing:
   10616             : //
   10617             : //  error(x, y) =
   10618             : //    wsrc(x, y) - mask(x, y) * P(x, y) / (AOM_BLEND_A64_MAX_ALPHA ** 2)
   10619             : //
   10620           0 : static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x,
   10621             :                                       const MACROBLOCKD *xd, int mi_row,
   10622             :                                       int mi_col, const uint8_t *above,
   10623             :                                       int above_stride, const uint8_t *left,
   10624             :                                       int left_stride) {
   10625           0 :   const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
   10626             :   int row, col, i;
   10627           0 :   const int bw = xd->n8_w << MI_SIZE_LOG2;
   10628           0 :   const int bh = xd->n8_h << MI_SIZE_LOG2;
   10629           0 :   int32_t *mask_buf = x->mask_buf;
   10630           0 :   int32_t *wsrc_buf = x->wsrc_buf;
   10631           0 :   const int wsrc_stride = bw;
   10632           0 :   const int mask_stride = bw;
   10633           0 :   const int src_scale = AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA;
   10634             : #if CONFIG_HIGHBITDEPTH
   10635           0 :   const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
   10636             : #else
   10637             :   const int is_hbd = 0;
   10638             : #endif  // CONFIG_HIGHBITDEPTH
   10639             : 
   10640             :   // plane 0 should not be subsampled
   10641           0 :   assert(xd->plane[0].subsampling_x == 0);
   10642           0 :   assert(xd->plane[0].subsampling_y == 0);
   10643             : 
   10644           0 :   av1_zero_array(wsrc_buf, bw * bh);
   10645           0 :   for (i = 0; i < bw * bh; ++i) mask_buf[i] = AOM_BLEND_A64_MAX_ALPHA;
   10646             : 
   10647             :   // handle above row
   10648           0 :   if (xd->up_available) {
   10649           0 :     const int overlap = num_4x4_blocks_high_lookup[bsize] * 2;
   10650           0 :     const int miw = AOMMIN(xd->n8_w, cm->mi_cols - mi_col);
   10651           0 :     const int mi_row_offset = -1;
   10652           0 :     const uint8_t *const mask1d = av1_get_obmc_mask(overlap);
   10653           0 :     const int neighbor_limit = max_neighbor_obmc[b_width_log2_lookup[bsize]];
   10654           0 :     int neighbor_count = 0;
   10655             : 
   10656           0 :     assert(miw > 0);
   10657             : 
   10658           0 :     i = 0;
   10659             :     do {  // for each mi in the above row
   10660           0 :       const int mi_col_offset = i;
   10661           0 :       const MB_MODE_INFO *above_mbmi =
   10662           0 :           &xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
   10663             : #if CONFIG_CHROMA_SUB8X8
   10664           0 :       if (above_mbmi->sb_type < BLOCK_8X8)
   10665           0 :         above_mbmi =
   10666           0 :             &xd->mi[mi_col_offset + 1 + mi_row_offset * xd->mi_stride]->mbmi;
   10667             : #endif
   10668           0 :       const BLOCK_SIZE a_bsize = AOMMAX(above_mbmi->sb_type, BLOCK_8X8);
   10669           0 :       const int mi_step = AOMMIN(xd->n8_w, mi_size_wide[a_bsize]);
   10670           0 :       const int neighbor_bw = mi_step * MI_SIZE;
   10671             : 
   10672           0 :       if (is_neighbor_overlappable(above_mbmi)) {
   10673             :         if (!CONFIG_CB4X4 && (a_bsize == BLOCK_4X4 || a_bsize == BLOCK_4X8))
   10674             :           neighbor_count += 2;
   10675             :         else
   10676           0 :           neighbor_count++;
   10677           0 :         if (neighbor_count > neighbor_limit) break;
   10678             : 
   10679           0 :         const int tmp_stride = above_stride;
   10680           0 :         int32_t *wsrc = wsrc_buf + (i * MI_SIZE);
   10681           0 :         int32_t *mask = mask_buf + (i * MI_SIZE);
   10682             : 
   10683           0 :         if (!is_hbd) {
   10684           0 :           const uint8_t *tmp = above;
   10685             : 
   10686           0 :           for (row = 0; row < overlap; ++row) {
   10687           0 :             const uint8_t m0 = mask1d[row];
   10688           0 :             const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
   10689           0 :             for (col = 0; col < neighbor_bw; ++col) {
   10690           0 :               wsrc[col] = m1 * tmp[col];
   10691           0 :               mask[col] = m0;
   10692             :             }
   10693           0 :             wsrc += wsrc_stride;
   10694           0 :             mask += mask_stride;
   10695           0 :             tmp += tmp_stride;
   10696             :           }
   10697             : #if CONFIG_HIGHBITDEPTH
   10698             :         } else {
   10699           0 :           const uint16_t *tmp = CONVERT_TO_SHORTPTR(above);
   10700             : 
   10701           0 :           for (row = 0; row < overlap; ++row) {
   10702           0 :             const uint8_t m0 = mask1d[row];
   10703           0 :             const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
   10704           0 :             for (col = 0; col < neighbor_bw; ++col) {
   10705           0 :               wsrc[col] = m1 * tmp[col];
   10706           0 :               mask[col] = m0;
   10707             :             }
   10708           0 :             wsrc += wsrc_stride;
   10709           0 :             mask += mask_stride;
   10710           0 :             tmp += tmp_stride;
   10711             :           }
   10712             : #endif  // CONFIG_HIGHBITDEPTH
   10713             :         }
   10714             :       }
   10715             : 
   10716           0 :       above += neighbor_bw;
   10717           0 :       i += mi_step;
   10718           0 :     } while (i < miw);
   10719             :   }
   10720             : 
   10721           0 :   for (i = 0; i < bw * bh; ++i) {
   10722           0 :     wsrc_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
   10723           0 :     mask_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
   10724             :   }
   10725             : 
   10726             :   // handle left column
   10727           0 :   if (xd->left_available) {
   10728           0 :     const int overlap = num_4x4_blocks_wide_lookup[bsize] * 2;
   10729           0 :     const int mih = AOMMIN(xd->n8_h, cm->mi_rows - mi_row);
   10730           0 :     const int mi_col_offset = -1;
   10731           0 :     const uint8_t *const mask1d = av1_get_obmc_mask(overlap);
   10732           0 :     const int neighbor_limit = max_neighbor_obmc[b_height_log2_lookup[bsize]];
   10733           0 :     int neighbor_count = 0;
   10734             : 
   10735           0 :     assert(mih > 0);
   10736             : 
   10737           0 :     i = 0;
   10738             :     do {  // for each mi in the left column
   10739           0 :       const int mi_row_offset = i;
   10740           0 :       MB_MODE_INFO *left_mbmi =
   10741           0 :           &xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
   10742             : 
   10743             : #if CONFIG_CHROMA_SUB8X8
   10744           0 :       if (left_mbmi->sb_type < BLOCK_8X8)
   10745           0 :         left_mbmi =
   10746           0 :             &xd->mi[mi_col_offset + (mi_row_offset + 1) * xd->mi_stride]->mbmi;
   10747             : #endif
   10748           0 :       const BLOCK_SIZE l_bsize = AOMMAX(left_mbmi->sb_type, BLOCK_8X8);
   10749           0 :       const int mi_step = AOMMIN(xd->n8_h, mi_size_high[l_bsize]);
   10750           0 :       const int neighbor_bh = mi_step * MI_SIZE;
   10751             : 
   10752           0 :       if (is_neighbor_overlappable(left_mbmi)) {
   10753             :         if (!CONFIG_CB4X4 && (l_bsize == BLOCK_4X4 || l_bsize == BLOCK_8X4))
   10754             :           neighbor_count += 2;
   10755             :         else
   10756           0 :           neighbor_count++;
   10757           0 :         if (neighbor_count > neighbor_limit) break;
   10758             : 
   10759           0 :         const int tmp_stride = left_stride;
   10760           0 :         int32_t *wsrc = wsrc_buf + (i * MI_SIZE * wsrc_stride);
   10761           0 :         int32_t *mask = mask_buf + (i * MI_SIZE * mask_stride);
   10762             : 
   10763           0 :         if (!is_hbd) {
   10764           0 :           const uint8_t *tmp = left;
   10765             : 
   10766           0 :           for (row = 0; row < neighbor_bh; ++row) {
   10767           0 :             for (col = 0; col < overlap; ++col) {
   10768           0 :               const uint8_t m0 = mask1d[col];
   10769           0 :               const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
   10770           0 :               wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
   10771           0 :                           (tmp[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
   10772           0 :               mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
   10773             :             }
   10774           0 :             wsrc += wsrc_stride;
   10775           0 :             mask += mask_stride;
   10776           0 :             tmp += tmp_stride;
   10777             :           }
   10778             : #if CONFIG_HIGHBITDEPTH
   10779             :         } else {
   10780           0 :           const uint16_t *tmp = CONVERT_TO_SHORTPTR(left);
   10781             : 
   10782           0 :           for (row = 0; row < neighbor_bh; ++row) {
   10783           0 :             for (col = 0; col < overlap; ++col) {
   10784           0 :               const uint8_t m0 = mask1d[col];
   10785           0 :               const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
   10786           0 :               wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
   10787           0 :                           (tmp[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
   10788           0 :               mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
   10789             :             }
   10790           0 :             wsrc += wsrc_stride;
   10791           0 :             mask += mask_stride;
   10792           0 :             tmp += tmp_stride;
   10793             :           }
   10794             : #endif  // CONFIG_HIGHBITDEPTH
   10795             :         }
   10796             :       }
   10797             : 
   10798           0 :       left += neighbor_bh * left_stride;
   10799           0 :       i += mi_step;
   10800           0 :     } while (i < mih);
   10801             :   }
   10802             : 
   10803           0 :   if (!is_hbd) {
   10804           0 :     const uint8_t *src = x->plane[0].src.buf;
   10805             : 
   10806           0 :     for (row = 0; row < bh; ++row) {
   10807           0 :       for (col = 0; col < bw; ++col) {
   10808           0 :         wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
   10809             :       }
   10810           0 :       wsrc_buf += wsrc_stride;
   10811           0 :       src += x->plane[0].src.stride;
   10812             :     }
   10813             : #if CONFIG_HIGHBITDEPTH
   10814             :   } else {
   10815           0 :     const uint16_t *src = CONVERT_TO_SHORTPTR(x->plane[0].src.buf);
   10816             : 
   10817           0 :     for (row = 0; row < bh; ++row) {
   10818           0 :       for (col = 0; col < bw; ++col) {
   10819           0 :         wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
   10820             :       }
   10821           0 :       wsrc_buf += wsrc_stride;
   10822           0 :       src += x->plane[0].src.stride;
   10823             :     }
   10824             : #endif  // CONFIG_HIGHBITDEPTH
   10825             :   }
   10826           0 : }
   10827             : 
   10828             : #if CONFIG_NCOBMC
   10829             : void av1_check_ncobmc_rd(const struct AV1_COMP *cpi, struct macroblock *x,
   10830             :                          int mi_row, int mi_col) {
   10831             :   const AV1_COMMON *const cm = &cpi->common;
   10832             :   MACROBLOCKD *const xd = &x->e_mbd;
   10833             :   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   10834             :   MB_MODE_INFO backup_mbmi;
   10835             :   BLOCK_SIZE bsize = mbmi->sb_type;
   10836             :   int ref, skip_blk, backup_skip = x->skip;
   10837             :   int64_t rd_causal;
   10838             :   RD_STATS rd_stats_y, rd_stats_uv;
   10839             :   int rate_skip0 = av1_cost_bit(av1_get_skip_prob(cm, xd), 0);
   10840             :   int rate_skip1 = av1_cost_bit(av1_get_skip_prob(cm, xd), 1);
   10841             : 
   10842             :   // Recompute the best causal predictor and rd
   10843             :   mbmi->motion_mode = SIMPLE_TRANSLATION;
   10844             :   set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
   10845             :   for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
   10846             :     YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, mbmi->ref_frame[ref]);
   10847             :     assert(cfg != NULL);
   10848             :     av1_setup_pre_planes(xd, ref, cfg, mi_row, mi_col,
   10849             :                          &xd->block_refs[ref]->sf);
   10850             :   }
   10851             :   av1_setup_dst_planes(x->e_mbd.plane, bsize,
   10852             :                        get_frame_new_buffer(&cpi->common), mi_row, mi_col);
   10853             : 
   10854             :   av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
   10855             : 
   10856             :   av1_subtract_plane(x, bsize, 0);
   10857             :   super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
   10858             :   super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
   10859             :   assert(rd_stats_y.rate != INT_MAX && rd_stats_uv.rate != INT_MAX);
   10860             :   if (rd_stats_y.skip && rd_stats_uv.skip) {
   10861             :     rd_stats_y.rate = rate_skip1;
   10862             :     rd_stats_uv.rate = 0;
   10863             :     rd_stats_y.dist = rd_stats_y.sse;
   10864             :     rd_stats_uv.dist = rd_stats_uv.sse;
   10865             :     skip_blk = 0;
   10866             :   } else if (RDCOST(x->rdmult, x->rddiv,
   10867             :                     (rd_stats_y.rate + rd_stats_uv.rate + rate_skip0),
   10868             :                     (rd_stats_y.dist + rd_stats_uv.dist)) >
   10869             :              RDCOST(x->rdmult, x->rddiv, rate_skip1,
   10870             :                     (rd_stats_y.sse + rd_stats_uv.sse))) {
   10871             :     rd_stats_y.rate = rate_skip1;
   10872             :     rd_stats_uv.rate = 0;
   10873             :     rd_stats_y.dist = rd_stats_y.sse;
   10874             :     rd_stats_uv.dist = rd_stats_uv.sse;
   10875             :     skip_blk = 1;
   10876             :   } else {
   10877             :     rd_stats_y.rate += rate_skip0;
   10878             :     skip_blk = 0;
   10879             :   }
   10880             :   backup_skip = skip_blk;
   10881             :   backup_mbmi = *mbmi;
   10882             :   rd_causal = RDCOST(x->rdmult, x->rddiv, (rd_stats_y.rate + rd_stats_uv.rate),
   10883             :                      (rd_stats_y.dist + rd_stats_uv.dist));
   10884             :   rd_causal += RDCOST(x->rdmult, x->rddiv,
   10885             :                       av1_cost_bit(cm->fc->motion_mode_prob[bsize][0], 0), 0);
   10886             : 
   10887             :   // Check non-causal mode
   10888             :   mbmi->motion_mode = OBMC_CAUSAL;
   10889             :   av1_build_ncobmc_inter_predictors_sb(cm, xd, mi_row, mi_col);
   10890             : 
   10891             :   av1_subtract_plane(x, bsize, 0);
   10892             :   super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
   10893             :   super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
   10894             :   assert(rd_stats_y.rate != INT_MAX && rd_stats_uv.rate != INT_MAX);
   10895             :   if (rd_stats_y.skip && rd_stats_uv.skip) {
   10896             :     rd_stats_y.rate = rate_skip1;
   10897             :     rd_stats_uv.rate = 0;
   10898             :     rd_stats_y.dist = rd_stats_y.sse;
   10899             :     rd_stats_uv.dist = rd_stats_uv.sse;
   10900             :     skip_blk = 0;
   10901             :   } else if (RDCOST(x->rdmult, x->rddiv,
   10902             :                     (rd_stats_y.rate + rd_stats_uv.rate + rate_skip0),
   10903             :                     (rd_stats_y.dist + rd_stats_uv.dist)) >
   10904             :              RDCOST(x->rdmult, x->rddiv, rate_skip1,
   10905             :                     (rd_stats_y.sse + rd_stats_uv.sse))) {
   10906             :     rd_stats_y.rate = rate_skip1;
   10907             :     rd_stats_uv.rate = 0;
   10908             :     rd_stats_y.dist = rd_stats_y.sse;
   10909             :     rd_stats_uv.dist = rd_stats_uv.sse;
   10910             :     skip_blk = 1;
   10911             :   } else {
   10912             :     rd_stats_y.rate += rate_skip0;
   10913             :     skip_blk = 0;
   10914             :   }
   10915             : 
   10916             :   if (rd_causal >
   10917             :       RDCOST(x->rdmult, x->rddiv,
   10918             :              rd_stats_y.rate + rd_stats_uv.rate +
   10919             :                  av1_cost_bit(cm->fc->motion_mode_prob[bsize][0], 1),
   10920             :              (rd_stats_y.dist + rd_stats_uv.dist))) {
   10921             :     x->skip = skip_blk;
   10922             :   } else {
   10923             :     *mbmi = backup_mbmi;
   10924             :     x->skip = backup_skip;
   10925             :   }
   10926             : }
   10927             : #endif  // CONFIG_NCOBMC
   10928             : #endif  // CONFIG_MOTION_VAR

Generated by: LCOV version 1.13