LCOV - output.info - third_party/aom/av1/common/reconinter.c

LCOV - code coverage report

Current view:	top level - third_party/aom/av1/common - reconinter.c (source / functions)		Hit	Total	Coverage
Test:	output.info	Lines:	0	941	0.0 %
Date:	2017-07-14 16:53:18	Functions:	0	47	0.0 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : /*
       2             :  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
       3             :  *
       4             :  * This source code is subject to the terms of the BSD 2 Clause License and
       5             :  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
       6             :  * was not distributed with this source code in the LICENSE file, you can
       7             :  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
       8             :  * Media Patent License 1.0 was not distributed with this source code in the
       9             :  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
      10             :  */
      11             : 
      12             : #include <assert.h>
      13             : 
      14             : #include "./aom_scale_rtcd.h"
      15             : #include "./aom_dsp_rtcd.h"
      16             : #include "./aom_config.h"
      17             : 
      18             : #include "aom/aom_integer.h"
      19             : #include "aom_dsp/blend.h"
      20             : 
      21             : #include "av1/common/blockd.h"
      22             : #include "av1/common/reconinter.h"
      23             : #include "av1/common/reconintra.h"
      24             : #if CONFIG_MOTION_VAR
      25             : #include "av1/common/onyxc_int.h"
      26             : #endif  // CONFIG_MOTION_VAR
      27             : 
      28             : #if CONFIG_EXT_INTER
      29             : 
      30             : #define NSMOOTHERS 1
      31             : 
      32             : // [smoother][negative][direction]
      33             : DECLARE_ALIGNED(16, static uint8_t,
      34             :                 wedge_mask_obl[NSMOOTHERS][2][WEDGE_DIRECTIONS]
      35             :                               [MASK_MASTER_SIZE * MASK_MASTER_SIZE]);
      36             : 
      37             : DECLARE_ALIGNED(16, static uint8_t,
      38             :                 wedge_signflip_lookup[BLOCK_SIZES][MAX_WEDGE_TYPES]);
      39             : 
      40             : // 3 * MAX_WEDGE_SQUARE is an easy to compute and fairly tight upper bound
      41             : // on the sum of all mask sizes up to an including MAX_WEDGE_SQUARE.
      42             : DECLARE_ALIGNED(16, static uint8_t,
      43             :                 wedge_mask_buf[2 * MAX_WEDGE_TYPES * 3 * MAX_WEDGE_SQUARE]);
      44             : 
      45             : static wedge_masks_type wedge_masks[BLOCK_SIZES][2];
      46             : 
      47             : // Some unused wedge codebooks left temporarily to facilitate experiments.
      48             : // To be removed when settled.
      49             : /*
      50             : static wedge_code_type wedge_codebook_8_hgtw[8] = {
      51             :   { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
      52             :   { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
      53             :   { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
      54             :   { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
      55             : };
      56             : 
      57             : static wedge_code_type wedge_codebook_8_hltw[8] = {
      58             :   { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
      59             :   { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
      60             :   { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
      61             :   { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
      62             : };
      63             : 
      64             : static wedge_code_type wedge_codebook_8_heqw[8] = {
      65             :   { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
      66             :   { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
      67             :   { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
      68             :   { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 6, 4 },
      69             : };
      70             : 
      71             : static const wedge_code_type wedge_codebook_32_hgtw[32] = {
      72             :   { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
      73             :   { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
      74             :   { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
      75             :   { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 },
      76             :   { WEDGE_OBLIQUE27, 4, 1 },  { WEDGE_OBLIQUE27, 4, 2 },
      77             :   { WEDGE_OBLIQUE27, 4, 3 },  { WEDGE_OBLIQUE27, 4, 5 },
      78             :   { WEDGE_OBLIQUE27, 4, 6 },  { WEDGE_OBLIQUE27, 4, 7 },
      79             :   { WEDGE_OBLIQUE153, 4, 1 }, { WEDGE_OBLIQUE153, 4, 2 },
      80             :   { WEDGE_OBLIQUE153, 4, 3 }, { WEDGE_OBLIQUE153, 4, 5 },
      81             :   { WEDGE_OBLIQUE153, 4, 6 }, { WEDGE_OBLIQUE153, 4, 7 },
      82             :   { WEDGE_OBLIQUE63, 1, 4 },  { WEDGE_OBLIQUE63, 2, 4 },
      83             :   { WEDGE_OBLIQUE63, 3, 4 },  { WEDGE_OBLIQUE63, 5, 4 },
      84             :   { WEDGE_OBLIQUE63, 6, 4 },  { WEDGE_OBLIQUE63, 7, 4 },
      85             :   { WEDGE_OBLIQUE117, 1, 4 }, { WEDGE_OBLIQUE117, 2, 4 },
      86             :   { WEDGE_OBLIQUE117, 3, 4 }, { WEDGE_OBLIQUE117, 5, 4 },
      87             :   { WEDGE_OBLIQUE117, 6, 4 }, { WEDGE_OBLIQUE117, 7, 4 },
      88             : };
      89             : 
      90             : static const wedge_code_type wedge_codebook_32_hltw[32] = {
      91             :   { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
      92             :   { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
      93             :   { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 4, 4 },
      94             :   { WEDGE_VERTICAL, 6, 4 },   { WEDGE_HORIZONTAL, 4, 4 },
      95             :   { WEDGE_OBLIQUE27, 4, 1 },  { WEDGE_OBLIQUE27, 4, 2 },
      96             :   { WEDGE_OBLIQUE27, 4, 3 },  { WEDGE_OBLIQUE27, 4, 5 },
      97             :   { WEDGE_OBLIQUE27, 4, 6 },  { WEDGE_OBLIQUE27, 4, 7 },
      98             :   { WEDGE_OBLIQUE153, 4, 1 }, { WEDGE_OBLIQUE153, 4, 2 },
      99             :   { WEDGE_OBLIQUE153, 4, 3 }, { WEDGE_OBLIQUE153, 4, 5 },
     100             :   { WEDGE_OBLIQUE153, 4, 6 }, { WEDGE_OBLIQUE153, 4, 7 },
     101             :   { WEDGE_OBLIQUE63, 1, 4 },  { WEDGE_OBLIQUE63, 2, 4 },
     102             :   { WEDGE_OBLIQUE63, 3, 4 },  { WEDGE_OBLIQUE63, 5, 4 },
     103             :   { WEDGE_OBLIQUE63, 6, 4 },  { WEDGE_OBLIQUE63, 7, 4 },
     104             :   { WEDGE_OBLIQUE117, 1, 4 }, { WEDGE_OBLIQUE117, 2, 4 },
     105             :   { WEDGE_OBLIQUE117, 3, 4 }, { WEDGE_OBLIQUE117, 5, 4 },
     106             :   { WEDGE_OBLIQUE117, 6, 4 }, { WEDGE_OBLIQUE117, 7, 4 },
     107             : };
     108             : 
     109             : static const wedge_code_type wedge_codebook_32_heqw[32] = {
     110             :   { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
     111             :   { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
     112             :   { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
     113             :   { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 6, 4 },
     114             :   { WEDGE_OBLIQUE27, 4, 1 },  { WEDGE_OBLIQUE27, 4, 2 },
     115             :   { WEDGE_OBLIQUE27, 4, 3 },  { WEDGE_OBLIQUE27, 4, 5 },
     116             :   { WEDGE_OBLIQUE27, 4, 6 },  { WEDGE_OBLIQUE27, 4, 7 },
     117             :   { WEDGE_OBLIQUE153, 4, 1 }, { WEDGE_OBLIQUE153, 4, 2 },
     118             :   { WEDGE_OBLIQUE153, 4, 3 }, { WEDGE_OBLIQUE153, 4, 5 },
     119             :   { WEDGE_OBLIQUE153, 4, 6 }, { WEDGE_OBLIQUE153, 4, 7 },
     120             :   { WEDGE_OBLIQUE63, 1, 4 },  { WEDGE_OBLIQUE63, 2, 4 },
     121             :   { WEDGE_OBLIQUE63, 3, 4 },  { WEDGE_OBLIQUE63, 5, 4 },
     122             :   { WEDGE_OBLIQUE63, 6, 4 },  { WEDGE_OBLIQUE63, 7, 4 },
     123             :   { WEDGE_OBLIQUE117, 1, 4 }, { WEDGE_OBLIQUE117, 2, 4 },
     124             :   { WEDGE_OBLIQUE117, 3, 4 }, { WEDGE_OBLIQUE117, 5, 4 },
     125             :   { WEDGE_OBLIQUE117, 6, 4 }, { WEDGE_OBLIQUE117, 7, 4 },
     126             : };
     127             : */
     128             : 
     129             : static const wedge_code_type wedge_codebook_16_hgtw[16] = {
     130             :   { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
     131             :   { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
     132             :   { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
     133             :   { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 },
     134             :   { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
     135             :   { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
     136             :   { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
     137             :   { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
     138             : };
     139             : 
     140             : static const wedge_code_type wedge_codebook_16_hltw[16] = {
     141             :   { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
     142             :   { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
     143             :   { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 4, 4 },
     144             :   { WEDGE_VERTICAL, 6, 4 },   { WEDGE_HORIZONTAL, 4, 4 },
     145             :   { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
     146             :   { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
     147             :   { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
     148             :   { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
     149             : };
     150             : 
     151             : static const wedge_code_type wedge_codebook_16_heqw[16] = {
     152             :   { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
     153             :   { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
     154             :   { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
     155             :   { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 6, 4 },
     156             :   { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
     157             :   { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
     158             :   { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
     159             :   { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
     160             : };
     161             : 
     162             : const wedge_params_type wedge_params_lookup[BLOCK_SIZES] = {
     163             : #if CONFIG_CB4X4
     164             :   { 0, NULL, NULL, 0, NULL },
     165             :   { 0, NULL, NULL, 0, NULL },
     166             :   { 0, NULL, NULL, 0, NULL },
     167             : #endif  // CONFIG_CB4X4
     168             :   { 0, NULL, NULL, 0, NULL },
     169             :   { 0, NULL, NULL, 0, NULL },
     170             :   { 0, NULL, NULL, 0, NULL },
     171             : #if CONFIG_WEDGE
     172             :   { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_8X8], 0,
     173             :     wedge_masks[BLOCK_8X8] },
     174             :   { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X16], 0,
     175             :     wedge_masks[BLOCK_8X16] },
     176             :   { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X8], 0,
     177             :     wedge_masks[BLOCK_16X8] },
     178             :   { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_16X16], 0,
     179             :     wedge_masks[BLOCK_16X16] },
     180             :   { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_16X32], 0,
     181             :     wedge_masks[BLOCK_16X32] },
     182             :   { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X16], 0,
     183             :     wedge_masks[BLOCK_32X16] },
     184             :   { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_32X32], 0,
     185             :     wedge_masks[BLOCK_32X32] },
     186             :   { 0, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_32X64], 0,
     187             :     wedge_masks[BLOCK_32X64] },
     188             :   { 0, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_64X32], 0,
     189             :     wedge_masks[BLOCK_64X32] },
     190             :   { 0, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_64X64], 0,
     191             :     wedge_masks[BLOCK_64X64] },
     192             : #else
     193             :   { 0, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_8X8], 0,
     194             :     wedge_masks[BLOCK_8X8] },
     195             :   { 0, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X16], 0,
     196             :     wedge_masks[BLOCK_8X16] },
     197             :   { 0, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X8], 0,
     198             :     wedge_masks[BLOCK_16X8] },
     199             :   { 0, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_16X16], 0,
     200             :     wedge_masks[BLOCK_16X16] },
     201             :   { 0, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_16X32], 0,
     202             :     wedge_masks[BLOCK_16X32] },
     203             :   { 0, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X16], 0,
     204             :     wedge_masks[BLOCK_32X16] },
     205             :   { 0, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_32X32], 0,
     206             :     wedge_masks[BLOCK_32X32] },
     207             :   { 0, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_32X64], 0,
     208             :     wedge_masks[BLOCK_32X64] },
     209             :   { 0, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_64X32], 0,
     210             :     wedge_masks[BLOCK_64X32] },
     211             :   { 0, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_64X64], 0,
     212             :     wedge_masks[BLOCK_64X64] },
     213             : #endif  // CONFIG_WEDGE
     214             : #if CONFIG_EXT_PARTITION
     215             :   { 0, NULL, NULL, 0, NULL },
     216             :   { 0, NULL, NULL, 0, NULL },
     217             :   { 0, NULL, NULL, 0, NULL },
     218             : #endif  // CONFIG_EXT_PARTITION
     219             : };
     220             : 
     221           0 : static const uint8_t *get_wedge_mask_inplace(int wedge_index, int neg,
     222             :                                              BLOCK_SIZE sb_type) {
     223             :   const uint8_t *master;
     224           0 :   const int bh = block_size_high[sb_type];
     225           0 :   const int bw = block_size_wide[sb_type];
     226           0 :   const wedge_code_type *a =
     227           0 :       wedge_params_lookup[sb_type].codebook + wedge_index;
     228           0 :   const int smoother = wedge_params_lookup[sb_type].smoother;
     229             :   int woff, hoff;
     230           0 :   const uint8_t wsignflip = wedge_params_lookup[sb_type].signflip[wedge_index];
     231             : 
     232           0 :   assert(wedge_index >= 0 &&
     233             :          wedge_index < (1 << get_wedge_bits_lookup(sb_type)));
     234           0 :   woff = (a->x_offset * bw) >> 3;
     235           0 :   hoff = (a->y_offset * bh) >> 3;
     236           0 :   master = wedge_mask_obl[smoother][neg ^ wsignflip][a->direction] +
     237           0 :            MASK_MASTER_STRIDE * (MASK_MASTER_SIZE / 2 - hoff) +
     238           0 :            MASK_MASTER_SIZE / 2 - woff;
     239           0 :   return master;
     240             : }
     241             : 
     242           0 : const uint8_t *av1_get_soft_mask(int wedge_index, int wedge_sign,
     243             :                                  BLOCK_SIZE sb_type, int offset_x,
     244             :                                  int offset_y) {
     245           0 :   const uint8_t *mask =
     246           0 :       get_wedge_mask_inplace(wedge_index, wedge_sign, sb_type);
     247           0 :   if (mask) mask -= (offset_x + offset_y * MASK_MASTER_STRIDE);
     248           0 :   return mask;
     249             : }
     250             : 
     251             : #if CONFIG_COMPOUND_SEGMENT
     252           0 : static uint8_t *invert_mask(uint8_t *mask_inv_buffer, const uint8_t *const mask,
     253             :                             int h, int w, int stride) {
     254             :   int i, j;
     255             : 
     256           0 :   for (i = 0; i < h; ++i)
     257           0 :     for (j = 0; j < w; ++j) {
     258           0 :       mask_inv_buffer[i * stride + j] =
     259           0 :           AOM_BLEND_A64_MAX_ALPHA - mask[i * stride + j];
     260             :     }
     261           0 :   return mask_inv_buffer;
     262             : }
     263             : #endif  // CONFIG_COMPOUND_SEGMENT
     264             : 
     265           0 : const uint8_t *av1_get_compound_type_mask_inverse(
     266             :     const INTERINTER_COMPOUND_DATA *const comp_data,
     267             : #if CONFIG_COMPOUND_SEGMENT
     268             :     uint8_t *mask_buffer, int h, int w, int stride,
     269             : #endif
     270             :     BLOCK_SIZE sb_type) {
     271           0 :   assert(is_masked_compound_type(comp_data->interinter_compound_type));
     272             :   (void)sb_type;
     273           0 :   switch (comp_data->interinter_compound_type) {
     274             : #if CONFIG_WEDGE
     275             :     case COMPOUND_WEDGE:
     276           0 :       return av1_get_contiguous_soft_mask(comp_data->wedge_index,
     277           0 :                                           !comp_data->wedge_sign, sb_type);
     278             : #endif  // CONFIG_WEDGE
     279             : #if CONFIG_COMPOUND_SEGMENT
     280             :     case COMPOUND_SEG:
     281           0 :       return invert_mask(mask_buffer, comp_data->seg_mask, h, w, stride);
     282             : #endif  // CONFIG_COMPOUND_SEGMENT
     283           0 :     default: assert(0); return NULL;
     284             :   }
     285             : }
     286             : 
     287           0 : const uint8_t *av1_get_compound_type_mask(
     288             :     const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type) {
     289           0 :   assert(is_masked_compound_type(comp_data->interinter_compound_type));
     290             :   (void)sb_type;
     291           0 :   switch (comp_data->interinter_compound_type) {
     292             : #if CONFIG_WEDGE
     293             :     case COMPOUND_WEDGE:
     294           0 :       return av1_get_contiguous_soft_mask(comp_data->wedge_index,
     295             :                                           comp_data->wedge_sign, sb_type);
     296             : #endif  // CONFIG_WEDGE
     297             : #if CONFIG_COMPOUND_SEGMENT
     298           0 :     case COMPOUND_SEG: return comp_data->seg_mask;
     299             : #endif  // CONFIG_COMPOUND_SEGMENT
     300           0 :     default: assert(0); return NULL;
     301             :   }
     302             : }
     303             : 
     304             : #if CONFIG_COMPOUND_SEGMENT
     305             : #if COMPOUND_SEGMENT_TYPE == 0
     306             : static void uniform_mask(uint8_t *mask, int which_inverse, BLOCK_SIZE sb_type,
     307             :                          int h, int w, int mask_val) {
     308             :   int i, j;
     309             :   int block_stride = block_size_wide[sb_type];
     310             :   for (i = 0; i < h; ++i)
     311             :     for (j = 0; j < w; ++j) {
     312             :       mask[i * block_stride + j] =
     313             :           which_inverse ? AOM_BLEND_A64_MAX_ALPHA - mask_val : mask_val;
     314             :     }
     315             : }
     316             : 
     317             : void build_compound_seg_mask(uint8_t *mask, SEG_MASK_TYPE mask_type,
     318             :                              const uint8_t *src0, int src0_stride,
     319             :                              const uint8_t *src1, int src1_stride,
     320             :                              BLOCK_SIZE sb_type, int h, int w) {
     321             :   (void)src0;
     322             :   (void)src1;
     323             :   (void)src0_stride;
     324             :   (void)src1_stride;
     325             :   switch (mask_type) {
     326             :     case UNIFORM_45: uniform_mask(mask, 0, sb_type, h, w, 45); break;
     327             :     case UNIFORM_45_INV: uniform_mask(mask, 1, sb_type, h, w, 45); break;
     328             :     default: assert(0);
     329             :   }
     330             : }
     331             : 
     332             : #if CONFIG_HIGHBITDEPTH
     333             : void build_compound_seg_mask_highbd(uint8_t *mask, SEG_MASK_TYPE mask_type,
     334             :                                     const uint8_t *src0, int src0_stride,
     335             :                                     const uint8_t *src1, int src1_stride,
     336             :                                     BLOCK_SIZE sb_type, int h, int w, int bd) {
     337             :   (void)src0;
     338             :   (void)src1;
     339             :   (void)src0_stride;
     340             :   (void)src1_stride;
     341             :   (void)bd;
     342             :   switch (mask_type) {
     343             :     case UNIFORM_45: uniform_mask(mask, 0, sb_type, h, w, 45); break;
     344             :     case UNIFORM_45_INV: uniform_mask(mask, 1, sb_type, h, w, 45); break;
     345             :     default: assert(0);
     346             :   }
     347             : }
     348             : #endif  // CONFIG_HIGHBITDEPTH
     349             : 
     350             : #elif COMPOUND_SEGMENT_TYPE == 1
     351             : #define DIFF_FACTOR 16
     352           0 : static void diffwtd_mask(uint8_t *mask, int which_inverse, int mask_base,
     353             :                          const uint8_t *src0, int src0_stride,
     354             :                          const uint8_t *src1, int src1_stride,
     355             :                          BLOCK_SIZE sb_type, int h, int w) {
     356             :   int i, j, m, diff;
     357           0 :   int block_stride = block_size_wide[sb_type];
     358           0 :   for (i = 0; i < h; ++i) {
     359           0 :     for (j = 0; j < w; ++j) {
     360           0 :       diff =
     361           0 :           abs((int)src0[i * src0_stride + j] - (int)src1[i * src1_stride + j]);
     362           0 :       m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
     363           0 :       mask[i * block_stride + j] =
     364           0 :           which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
     365             :     }
     366             :   }
     367           0 : }
     368             : 
     369           0 : void build_compound_seg_mask(uint8_t *mask, SEG_MASK_TYPE mask_type,
     370             :                              const uint8_t *src0, int src0_stride,
     371             :                              const uint8_t *src1, int src1_stride,
     372             :                              BLOCK_SIZE sb_type, int h, int w) {
     373           0 :   switch (mask_type) {
     374             :     case DIFFWTD_38:
     375           0 :       diffwtd_mask(mask, 0, 38, src0, src0_stride, src1, src1_stride, sb_type,
     376             :                    h, w);
     377           0 :       break;
     378             :     case DIFFWTD_38_INV:
     379           0 :       diffwtd_mask(mask, 1, 38, src0, src0_stride, src1, src1_stride, sb_type,
     380             :                    h, w);
     381           0 :       break;
     382           0 :     default: assert(0);
     383             :   }
     384           0 : }
     385             : 
     386             : #if CONFIG_HIGHBITDEPTH
     387           0 : static void diffwtd_mask_highbd(uint8_t *mask, int which_inverse, int mask_base,
     388             :                                 const uint16_t *src0, int src0_stride,
     389             :                                 const uint16_t *src1, int src1_stride,
     390             :                                 BLOCK_SIZE sb_type, int h, int w, int bd) {
     391             :   int i, j, m, diff;
     392           0 :   int block_stride = block_size_wide[sb_type];
     393           0 :   for (i = 0; i < h; ++i) {
     394           0 :     for (j = 0; j < w; ++j) {
     395           0 :       diff = abs((int)src0[i * src0_stride + j] -
     396           0 :                  (int)src1[i * src1_stride + j]) >>
     397           0 :              (bd - 8);
     398           0 :       m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
     399           0 :       mask[i * block_stride + j] =
     400           0 :           which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
     401             :     }
     402             :   }
     403           0 : }
     404             : 
     405           0 : void build_compound_seg_mask_highbd(uint8_t *mask, SEG_MASK_TYPE mask_type,
     406             :                                     const uint8_t *src0, int src0_stride,
     407             :                                     const uint8_t *src1, int src1_stride,
     408             :                                     BLOCK_SIZE sb_type, int h, int w, int bd) {
     409           0 :   switch (mask_type) {
     410             :     case DIFFWTD_38:
     411           0 :       diffwtd_mask_highbd(mask, 0, 42, CONVERT_TO_SHORTPTR(src0), src0_stride,
     412           0 :                           CONVERT_TO_SHORTPTR(src1), src1_stride, sb_type, h, w,
     413             :                           bd);
     414           0 :       break;
     415             :     case DIFFWTD_38_INV:
     416           0 :       diffwtd_mask_highbd(mask, 1, 42, CONVERT_TO_SHORTPTR(src0), src0_stride,
     417           0 :                           CONVERT_TO_SHORTPTR(src1), src1_stride, sb_type, h, w,
     418             :                           bd);
     419           0 :       break;
     420           0 :     default: assert(0);
     421             :   }
     422           0 : }
     423             : #endif  // CONFIG_HIGHBITDEPTH
     424             : #endif  // COMPOUND_SEGMENT_TYPE
     425             : #endif  // CONFIG_COMPOUND_SEGMENT
     426             : 
     427             : #if MASK_MASTER_SIZE == 64
     428             : static const uint8_t wedge_master_oblique_odd[NSMOOTHERS][MASK_MASTER_SIZE] = {
     429             :   {
     430             :       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
     431             :       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  2,  6,  18,
     432             :       37, 53, 60, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
     433             :       64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
     434             :   }
     435             : };
     436             : static const uint8_t wedge_master_oblique_even[NSMOOTHERS][MASK_MASTER_SIZE] = {
     437             :   {
     438             :       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
     439             :       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  4,  11, 27,
     440             :       46, 58, 62, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
     441             :       64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
     442             :   }
     443             : };
     444             : static const uint8_t wedge_master_vertical[NSMOOTHERS][MASK_MASTER_SIZE] = { {
     445             :     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
     446             :     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  7,  21,
     447             :     43, 57, 62, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
     448             :     64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
     449             : } };
     450             : 
     451           0 : static void shift_copy(const uint8_t *src, uint8_t *dst, int shift, int width) {
     452           0 :   if (shift >= 0) {
     453           0 :     memcpy(dst + shift, src, width - shift);
     454           0 :     memset(dst, src[0], shift);
     455             :   } else {
     456           0 :     shift = -shift;
     457           0 :     memcpy(dst, src + shift, width - shift);
     458           0 :     memset(dst + width - shift, src[width - 1], shift);
     459             :   }
     460           0 : }
     461             : #else
     462             : static const double smoother_param[NSMOOTHERS] = { 3.0 };
     463             : #endif  // MASK_MASTER_SIZE == 64
     464             : 
     465           0 : static void init_wedge_master_masks() {
     466             :   int i, j, s;
     467           0 :   const int w = MASK_MASTER_SIZE;
     468           0 :   const int h = MASK_MASTER_SIZE;
     469           0 :   const int stride = MASK_MASTER_STRIDE;
     470           0 :   for (s = 0; s < NSMOOTHERS; s++) {
     471             : // Note: index [0] stores the masters, and [1] its complement.
     472             : #if MASK_MASTER_SIZE == 64
     473             :     // Generate prototype by shifting the masters
     474           0 :     int shift = h / 4;
     475           0 :     for (i = 0; i < h; i += 2) {
     476           0 :       shift_copy(wedge_master_oblique_even[s],
     477           0 :                  &wedge_mask_obl[s][0][WEDGE_OBLIQUE63][i * stride], shift,
     478             :                  MASK_MASTER_SIZE);
     479           0 :       shift--;
     480           0 :       shift_copy(wedge_master_oblique_odd[s],
     481           0 :                  &wedge_mask_obl[s][0][WEDGE_OBLIQUE63][(i + 1) * stride],
     482             :                  shift, MASK_MASTER_SIZE);
     483           0 :       memcpy(&wedge_mask_obl[s][0][WEDGE_VERTICAL][i * stride],
     484           0 :              wedge_master_vertical[s],
     485             :              MASK_MASTER_SIZE * sizeof(wedge_master_vertical[s][0]));
     486           0 :       memcpy(&wedge_mask_obl[s][0][WEDGE_VERTICAL][(i + 1) * stride],
     487           0 :              wedge_master_vertical[s],
     488             :              MASK_MASTER_SIZE * sizeof(wedge_master_vertical[s][0]));
     489             :     }
     490             : #else
     491             :     const int a[2] = { 2, 1 };
     492             :     const double asqrt = sqrt(a[0] * a[0] + a[1] * a[1]);
     493             :     for (i = 0; i < h; i++) {
     494             :       for (j = 0; j < w; ++j) {
     495             :         int x = (2 * j + 1 - w);
     496             :         int y = (2 * i + 1 - h);
     497             :         double d = (a[0] * x + a[1] * y) / asqrt;
     498             :         const int msk = (int)rint((1.0 + tanh(d / smoother_param[s])) * 32);
     499             :         wedge_mask_obl[s][0][WEDGE_OBLIQUE63][i * stride + j] = msk;
     500             :         const int mskx = (int)rint((1.0 + tanh(x / smoother_param[s])) * 32);
     501             :         wedge_mask_obl[s][0][WEDGE_VERTICAL][i * stride + j] = mskx;
     502             :       }
     503             :     }
     504             : #endif  // MASK_MASTER_SIZE == 64
     505           0 :     for (i = 0; i < h; ++i) {
     506           0 :       for (j = 0; j < w; ++j) {
     507           0 :         const int msk = wedge_mask_obl[s][0][WEDGE_OBLIQUE63][i * stride + j];
     508           0 :         wedge_mask_obl[s][0][WEDGE_OBLIQUE27][j * stride + i] = msk;
     509           0 :         wedge_mask_obl[s][0][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
     510           0 :             wedge_mask_obl[s][0][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
     511           0 :                 (1 << WEDGE_WEIGHT_BITS) - msk;
     512           0 :         wedge_mask_obl[s][1][WEDGE_OBLIQUE63][i * stride + j] =
     513           0 :             wedge_mask_obl[s][1][WEDGE_OBLIQUE27][j * stride + i] =
     514           0 :                 (1 << WEDGE_WEIGHT_BITS) - msk;
     515           0 :         wedge_mask_obl[s][1][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
     516           0 :             wedge_mask_obl[s][1][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
     517             :                 msk;
     518           0 :         const int mskx = wedge_mask_obl[s][0][WEDGE_VERTICAL][i * stride + j];
     519           0 :         wedge_mask_obl[s][0][WEDGE_HORIZONTAL][j * stride + i] = mskx;
     520           0 :         wedge_mask_obl[s][1][WEDGE_VERTICAL][i * stride + j] =
     521           0 :             wedge_mask_obl[s][1][WEDGE_HORIZONTAL][j * stride + i] =
     522           0 :                 (1 << WEDGE_WEIGHT_BITS) - mskx;
     523             :       }
     524             :     }
     525             :   }
     526           0 : }
     527             : 
     528             : // If the signs for the wedges for various blocksizes are
     529             : // inconsistent flip the sign flag. Do it only once for every
     530             : // wedge codebook.
     531           0 : static void init_wedge_signs() {
     532             :   BLOCK_SIZE sb_type;
     533           0 :   memset(wedge_signflip_lookup, 0, sizeof(wedge_signflip_lookup));
     534           0 :   for (sb_type = BLOCK_4X4; sb_type < BLOCK_SIZES; ++sb_type) {
     535           0 :     const int bw = block_size_wide[sb_type];
     536           0 :     const int bh = block_size_high[sb_type];
     537           0 :     const wedge_params_type wedge_params = wedge_params_lookup[sb_type];
     538           0 :     const int wbits = wedge_params.bits;
     539           0 :     const int wtypes = 1 << wbits;
     540             :     int i, w;
     541           0 :     if (wbits == 0) continue;
     542           0 :     for (w = 0; w < wtypes; ++w) {
     543             :       // Get the mask master, i.e. index [0]
     544           0 :       const uint8_t *mask = get_wedge_mask_inplace(w, 0, sb_type);
     545           0 :       int avg = 0;
     546           0 :       for (i = 0; i < bw; ++i) avg += mask[i];
     547           0 :       for (i = 1; i < bh; ++i) avg += mask[i * MASK_MASTER_STRIDE];
     548           0 :       avg = (avg + (bw + bh - 1) / 2) / (bw + bh - 1);
     549             :       // Default sign of this wedge is 1 if the average < 32, 0 otherwise.
     550             :       // If default sign is 1:
     551             :       //   If sign requested is 0, we need to flip the sign and return
     552             :       //   the complement i.e. index [1] instead. If sign requested is 1
     553             :       //   we need to flip the sign and return index [0] instead.
     554             :       // If default sign is 0:
     555             :       //   If sign requested is 0, we need to return index [0] the master
     556             :       //   if sign requested is 1, we need to return the complement index [1]
     557             :       //   instead.
     558           0 :       wedge_params.signflip[w] = (avg < 32);
     559             :       // printf("%d[%d] = %d\n", sb_type, w, wedge_params.signflip[w]);
     560             :     }
     561             :   }
     562           0 : }
     563             : 
     564           0 : static void init_wedge_masks() {
     565           0 :   uint8_t *dst = wedge_mask_buf;
     566             :   BLOCK_SIZE bsize;
     567           0 :   memset(wedge_masks, 0, sizeof(wedge_masks));
     568           0 :   for (bsize = BLOCK_4X4; bsize < BLOCK_SIZES; ++bsize) {
     569             :     const uint8_t *mask;
     570           0 :     const int bw = block_size_wide[bsize];
     571           0 :     const int bh = block_size_high[bsize];
     572           0 :     const wedge_params_type *wedge_params = &wedge_params_lookup[bsize];
     573           0 :     const int wbits = wedge_params->bits;
     574           0 :     const int wtypes = 1 << wbits;
     575             :     int w;
     576           0 :     if (wbits == 0) continue;
     577           0 :     for (w = 0; w < wtypes; ++w) {
     578           0 :       mask = get_wedge_mask_inplace(w, 0, bsize);
     579           0 :       aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw, NULL, 0, NULL, 0, bw,
     580             :                         bh);
     581           0 :       wedge_params->masks[0][w] = dst;
     582           0 :       dst += bw * bh;
     583             : 
     584           0 :       mask = get_wedge_mask_inplace(w, 1, bsize);
     585           0 :       aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw, NULL, 0, NULL, 0, bw,
     586             :                         bh);
     587           0 :       wedge_params->masks[1][w] = dst;
     588           0 :       dst += bw * bh;
     589             :     }
     590           0 :     assert(sizeof(wedge_mask_buf) >= (size_t)(dst - wedge_mask_buf));
     591             :   }
     592           0 : }
     593             : 
     594             : // Equation of line: f(x, y) = a[0]*(x - a[2]*w/8) + a[1]*(y - a[3]*h/8) = 0
     595           0 : void av1_init_wedge_masks() {
     596           0 :   init_wedge_master_masks();
     597           0 :   init_wedge_signs();
     598           0 :   init_wedge_masks();
     599           0 : }
     600             : 
     601             : #if CONFIG_SUPERTX
     602             : static void build_masked_compound_wedge_extend(
     603             :     uint8_t *dst, int dst_stride, const uint8_t *src0, int src0_stride,
     604             :     const uint8_t *src1, int src1_stride,
     605             :     const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type,
     606             :     int wedge_offset_x, int wedge_offset_y, int h, int w) {
     607             :   const int subh = (2 << b_height_log2_lookup[sb_type]) == h;
     608             :   const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
     609             :   const uint8_t *mask;
     610             :   size_t mask_stride;
     611             :   switch (comp_data->interinter_compound_type) {
     612             :     case COMPOUND_WEDGE:
     613             :       mask = av1_get_soft_mask(comp_data->wedge_index, comp_data->wedge_sign,
     614             :                                sb_type, wedge_offset_x, wedge_offset_y);
     615             :       mask_stride = MASK_MASTER_STRIDE;
     616             :       break;
     617             : #if CONFIG_COMPOUND_SEGMENT
     618             :     case COMPOUND_SEG:
     619             :       mask = comp_data->seg_mask;
     620             :       mask_stride = block_size_wide[sb_type];
     621             :       break;
     622             : #endif
     623             :     default: assert(0); return;
     624             :   }
     625             :   aom_blend_a64_mask(dst, dst_stride, src0, src0_stride, src1, src1_stride,
     626             :                      mask, (int)mask_stride, h, w, subh, subw);
     627             : }
     628             : 
     629             : #if CONFIG_HIGHBITDEPTH
     630             : static void build_masked_compound_wedge_extend_highbd(
     631             :     uint8_t *dst_8, int dst_stride, const uint8_t *src0_8, int src0_stride,
     632             :     const uint8_t *src1_8, int src1_stride,
     633             :     const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type,
     634             :     int wedge_offset_x, int wedge_offset_y, int h, int w, int bd) {
     635             :   const int subh = (2 << b_height_log2_lookup[sb_type]) == h;
     636             :   const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
     637             :   const uint8_t *mask;
     638             :   size_t mask_stride;
     639             :   switch (comp_data->interinter_compound_type) {
     640             :     case COMPOUND_WEDGE:
     641             :       mask = av1_get_soft_mask(comp_data->wedge_index, comp_data->wedge_sign,
     642             :                                sb_type, wedge_offset_x, wedge_offset_y);
     643             :       mask_stride = MASK_MASTER_STRIDE;
     644             :       break;
     645             : #if CONFIG_COMPOUND_SEGMENT
     646             :     case COMPOUND_SEG:
     647             :       mask = comp_data->seg_mask;
     648             :       mask_stride = block_size_wide[sb_type];
     649             :       break;
     650             : #endif
     651             :     default: assert(0); return;
     652             :   }
     653             :   aom_highbd_blend_a64_mask(dst_8, dst_stride, src0_8, src0_stride, src1_8,
     654             :                             src1_stride, mask, (int)mask_stride, h, w, subh,
     655             :                             subw, bd);
     656             : }
     657             : #endif  // CONFIG_HIGHBITDEPTH
     658             : #else
     659           0 : static void build_masked_compound(
     660             :     uint8_t *dst, int dst_stride, const uint8_t *src0, int src0_stride,
     661             :     const uint8_t *src1, int src1_stride,
     662             :     const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
     663             :     int w) {
     664             :   // Derive subsampling from h and w passed in. May be refactored to
     665             :   // pass in subsampling factors directly.
     666           0 :   const int subh = (2 << b_height_log2_lookup[sb_type]) == h;
     667           0 :   const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
     668           0 :   const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
     669           0 :   aom_blend_a64_mask(dst, dst_stride, src0, src0_stride, src1, src1_stride,
     670           0 :                      mask, block_size_wide[sb_type], h, w, subh, subw);
     671           0 : }
     672             : 
     673             : #if CONFIG_HIGHBITDEPTH
     674           0 : static void build_masked_compound_highbd(
     675             :     uint8_t *dst_8, int dst_stride, const uint8_t *src0_8, int src0_stride,
     676             :     const uint8_t *src1_8, int src1_stride,
     677             :     const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
     678             :     int w, int bd) {
     679             :   // Derive subsampling from h and w passed in. May be refactored to
     680             :   // pass in subsampling factors directly.
     681           0 :   const int subh = (2 << b_height_log2_lookup[sb_type]) == h;
     682           0 :   const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
     683           0 :   const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
     684             :   // const uint8_t *mask =
     685             :   //     av1_get_contiguous_soft_mask(wedge_index, wedge_sign, sb_type);
     686           0 :   aom_highbd_blend_a64_mask(dst_8, dst_stride, src0_8, src0_stride, src1_8,
     687           0 :                             src1_stride, mask, block_size_wide[sb_type], h, w,
     688             :                             subh, subw, bd);
     689           0 : }
     690             : #endif  // CONFIG_HIGHBITDEPTH
     691             : #endif  // CONFIG_SUPERTX
     692             : 
     693           0 : void av1_make_masked_inter_predictor(const uint8_t *pre, int pre_stride,
     694             :                                      uint8_t *dst, int dst_stride,
     695             :                                      const int subpel_x, const int subpel_y,
     696             :                                      const struct scale_factors *sf, int w,
     697             :                                      int h,
     698             : #if CONFIG_DUAL_FILTER
     699             :                                      const InterpFilter *interp_filter,
     700             : #else
     701             :                                      const InterpFilter interp_filter,
     702             : #endif
     703             :                                      int xs, int ys,
     704             : #if CONFIG_SUPERTX
     705             :                                      int wedge_offset_x, int wedge_offset_y,
     706             : #endif  // CONFIG_SUPERTX
     707             :                                      int plane,
     708             : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
     709             :                                      const WarpTypesAllowed *warp_types,
     710             :                                      int p_col, int p_row, int ref,
     711             : #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
     712             :                                      MACROBLOCKD *xd) {
     713           0 :   MODE_INFO *mi = xd->mi[0];
     714           0 :   const INTERINTER_COMPOUND_DATA comp_data = {
     715             : #if CONFIG_WEDGE
     716           0 :     mi->mbmi.wedge_index,
     717           0 :     mi->mbmi.wedge_sign,
     718             : #endif  // CONFIG_WEDGE
     719             : #if CONFIG_COMPOUND_SEGMENT
     720           0 :     mi->mbmi.mask_type,
     721           0 :     xd->seg_mask,
     722             : #endif  // CONFIG_COMPOUND_SEGMENT
     723           0 :     mi->mbmi.interinter_compound_type
     724             :   };
     725             : // The prediction filter types used here should be those for
     726             : // the second reference block.
     727             : #if CONFIG_DUAL_FILTER
     728           0 :   InterpFilter tmp_ipf[4] = {
     729             :     interp_filter[2], interp_filter[3], interp_filter[2], interp_filter[3],
     730             :   };
     731             : #else
     732             :   InterpFilter tmp_ipf = interp_filter;
     733             : #endif  // CONFIG_DUAL_FILTER
     734           0 :   ConvolveParams conv_params = get_conv_params(0, plane);
     735             : 
     736             : #if CONFIG_HIGHBITDEPTH
     737             :   DECLARE_ALIGNED(16, uint8_t, tmp_dst_[2 * MAX_SB_SQUARE]);
     738           0 :   uint8_t *tmp_dst = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
     739           0 :                          ? CONVERT_TO_BYTEPTR(tmp_dst_)
     740           0 :                          : tmp_dst_;
     741           0 :   av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE, subpel_x,
     742             :                            subpel_y, sf, w, h, &conv_params, tmp_ipf,
     743             : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
     744             :                            warp_types, p_col, p_row, plane, ref,
     745             : #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
     746             : #if CONFIG_MOTION_VAR
     747             :                            0, 0,
     748             : #endif
     749             :                            xs, ys, xd);
     750             : #if CONFIG_COMPOUND_SEGMENT
     751           0 :   if (!plane && comp_data.interinter_compound_type == COMPOUND_SEG) {
     752           0 :     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
     753           0 :       build_compound_seg_mask_highbd(comp_data.seg_mask, comp_data.mask_type,
     754             :                                      dst, dst_stride, tmp_dst, MAX_SB_SIZE,
     755           0 :                                      mi->mbmi.sb_type, h, w, xd->bd);
     756             :     else
     757           0 :       build_compound_seg_mask(comp_data.seg_mask, comp_data.mask_type, dst,
     758             :                               dst_stride, tmp_dst, MAX_SB_SIZE,
     759           0 :                               mi->mbmi.sb_type, h, w);
     760             :   }
     761             : #endif  // CONFIG_COMPOUND_SEGMENT
     762             : 
     763             : #if CONFIG_SUPERTX
     764             :   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
     765             :     build_masked_compound_wedge_extend_highbd(
     766             :         dst, dst_stride, dst, dst_stride, tmp_dst, MAX_SB_SIZE, &comp_data,
     767             :         mi->mbmi.sb_type, wedge_offset_x, wedge_offset_y, h, w, xd->bd);
     768             :   else
     769             :     build_masked_compound_wedge_extend(
     770             :         dst, dst_stride, dst, dst_stride, tmp_dst, MAX_SB_SIZE, &comp_data,
     771             :         mi->mbmi.sb_type, wedge_offset_x, wedge_offset_y, h, w);
     772             : #else
     773           0 :   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
     774           0 :     build_masked_compound_highbd(dst, dst_stride, dst, dst_stride, tmp_dst,
     775           0 :                                  MAX_SB_SIZE, &comp_data, mi->mbmi.sb_type, h,
     776             :                                  w, xd->bd);
     777             :   else
     778           0 :     build_masked_compound(dst, dst_stride, dst, dst_stride, tmp_dst,
     779           0 :                           MAX_SB_SIZE, &comp_data, mi->mbmi.sb_type, h, w);
     780             : #endif  // CONFIG_SUPERTX
     781             : 
     782             : #else  // CONFIG_HIGHBITDEPTH
     783             :   DECLARE_ALIGNED(16, uint8_t, tmp_dst[MAX_SB_SQUARE]);
     784             :   av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE, subpel_x,
     785             :                            subpel_y, sf, w, h, &conv_params, tmp_ipf,
     786             : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
     787             :                            warp_types, p_col, p_row, plane, ref,
     788             : #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
     789             : #if CONFIG_MOTION_VAR
     790             :                            0, 0,
     791             : #endif
     792             :                            xs, ys, xd);
     793             : #if CONFIG_COMPOUND_SEGMENT
     794             :   if (!plane && comp_data.interinter_compound_type == COMPOUND_SEG)
     795             :     build_compound_seg_mask(comp_data.seg_mask, comp_data.mask_type, dst,
     796             :                             dst_stride, tmp_dst, MAX_SB_SIZE, mi->mbmi.sb_type,
     797             :                             h, w);
     798             : #endif  // CONFIG_COMPOUND_SEGMENT
     799             : #if CONFIG_SUPERTX
     800             :   build_masked_compound_wedge_extend(dst, dst_stride, dst, dst_stride, tmp_dst,
     801             :                                      MAX_SB_SIZE, &comp_data, mi->mbmi.sb_type,
     802             :                                      wedge_offset_x, wedge_offset_y, h, w);
     803             : #else
     804             :   build_masked_compound(dst, dst_stride, dst, dst_stride, tmp_dst, MAX_SB_SIZE,
     805             :                         &comp_data, mi->mbmi.sb_type, h, w);
     806             : #endif  // CONFIG_SUPERTX
     807             : #endif  // CONFIG_HIGHBITDEPTH
     808             : #if CONFIG_COMPOUND_SEGMENT
     809             :   (void)plane;
     810             : #endif  // CONFIG_COMPOUND_SEGMENT
     811           0 : }
     812             : #endif  // CONFIG_EXT_INTER
     813             : 
     814             : // TODO(sarahparker) av1_highbd_build_inter_predictor and
     815             : // av1_build_inter_predictor should be combined with
     816             : // av1_make_inter_predictor
     817             : #if CONFIG_HIGHBITDEPTH
     818           0 : void av1_highbd_build_inter_predictor(
     819             :     const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride,
     820             :     const MV *src_mv, const struct scale_factors *sf, int w, int h, int ref,
     821             : #if CONFIG_DUAL_FILTER
     822             :     const InterpFilter *interp_filter,
     823             : #else
     824             :     const InterpFilter interp_filter,
     825             : #endif
     826             : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
     827             :     const WarpTypesAllowed *warp_types, int p_col, int p_row,
     828             : #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
     829             :     int plane, enum mv_precision precision, int x, int y,
     830             :     const MACROBLOCKD *xd) {
     831           0 :   const int is_q4 = precision == MV_PRECISION_Q4;
     832           0 :   const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2,
     833           0 :                      is_q4 ? src_mv->col : src_mv->col * 2 };
     834           0 :   MV32 mv = av1_scale_mv(&mv_q4, x, y, sf);
     835           0 :   const int subpel_x = mv.col & SUBPEL_MASK;
     836           0 :   const int subpel_y = mv.row & SUBPEL_MASK;
     837           0 :   ConvolveParams conv_params = get_conv_params(ref, plane);
     838             : 
     839           0 :   src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS);
     840             : 
     841           0 :   av1_make_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
     842             :                            sf, w, h, &conv_params, interp_filter,
     843             : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
     844             :                            warp_types, p_col, p_row, plane, ref,
     845             : #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
     846             : #if CONFIG_MOTION_VAR
     847             :                            0, 0,
     848             : #endif
     849             :                            sf->x_step_q4, sf->y_step_q4, xd);
     850           0 : }
     851             : #endif  // CONFIG_HIGHBITDEPTH
     852             : 
     853           0 : void av1_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
     854             :                                int dst_stride, const MV *src_mv,
     855             :                                const struct scale_factors *sf, int w, int h,
     856             :                                ConvolveParams *conv_params,
     857             : #if CONFIG_DUAL_FILTER
     858             :                                const InterpFilter *interp_filter,
     859             : #else
     860             :                                const InterpFilter interp_filter,
     861             : #endif
     862             : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
     863             :                                const WarpTypesAllowed *warp_types, int p_col,
     864             :                                int p_row, int plane, int ref,
     865             : #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
     866             :                                enum mv_precision precision, int x, int y,
     867             :                                const MACROBLOCKD *xd) {
     868           0 :   const int is_q4 = precision == MV_PRECISION_Q4;
     869           0 :   const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2,
     870           0 :                      is_q4 ? src_mv->col : src_mv->col * 2 };
     871           0 :   MV32 mv = av1_scale_mv(&mv_q4, x, y, sf);
     872           0 :   const int subpel_x = mv.col & SUBPEL_MASK;
     873           0 :   const int subpel_y = mv.row & SUBPEL_MASK;
     874             : 
     875           0 :   src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS);
     876             : 
     877           0 :   av1_make_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
     878             :                            sf, w, h, conv_params, interp_filter,
     879             : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
     880             :                            warp_types, p_col, p_row, plane, ref,
     881             : #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
     882             : #if CONFIG_MOTION_VAR
     883             :                            0, 0,
     884             : #endif
     885             :                            sf->x_step_q4, sf->y_step_q4, xd);
     886           0 : }
     887             : 
     888             : typedef struct SubpelParams {
     889             :   int xs;
     890             :   int ys;
     891             :   int subpel_x;
     892             :   int subpel_y;
     893             : } SubpelParams;
     894             : 
     895           0 : void build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd, int plane,
     896             : #if CONFIG_MOTION_VAR
     897             :                             int mi_col_offset, int mi_row_offset,
     898             : #endif  // CONFIG_MOTION_VAR
     899             :                             int block, int bw, int bh, int x, int y, int w,
     900             :                             int h,
     901             : #if CONFIG_SUPERTX && CONFIG_EXT_INTER
     902             :                             int wedge_offset_x, int wedge_offset_y,
     903             : #endif  // CONFIG_SUPERTX && CONFIG_EXT_INTER
     904             :                             int mi_x, int mi_y) {
     905           0 :   struct macroblockd_plane *const pd = &xd->plane[plane];
     906             : #if CONFIG_MOTION_VAR
     907           0 :   const MODE_INFO *mi = xd->mi[mi_col_offset + xd->mi_stride * mi_row_offset];
     908             : #else
     909             :   const MODE_INFO *mi = xd->mi[0];
     910             : #endif  // CONFIG_MOTION_VAR
     911           0 :   int is_compound = has_second_ref(&mi->mbmi);
     912             :   int ref;
     913             : #if CONFIG_INTRABC
     914             :   const int is_intrabc = is_intrabc_block(&mi->mbmi);
     915             :   assert(IMPLIES(is_intrabc, !is_compound));
     916             : #endif  // CONFIG_INTRABC
     917             : #if CONFIG_GLOBAL_MOTION
     918           0 :   int is_global[2] = { 0, 0 };
     919           0 :   for (ref = 0; ref < 1 + is_compound; ++ref) {
     920           0 :     WarpedMotionParams *const wm = &xd->global_motion[mi->mbmi.ref_frame[ref]];
     921           0 :     is_global[ref] = is_global_mv_block(mi, block, wm->wmtype);
     922             :   }
     923             : #endif  // CONFIG_GLOBAL_MOTION
     924             : 
     925             : #if CONFIG_CB4X4
     926             :   (void)block;
     927             :   (void)cm;
     928             : #endif
     929             : 
     930             : #if CONFIG_MOTION_VAR && (CONFIG_CHROMA_SUB8X8 || !CONFIG_CB4X4)
     931           0 :   const int build_for_obmc = !(mi_col_offset == 0 && mi_row_offset == 0);
     932             : #endif  // CONFIG_MOTION_VAR && (CONFIG_CHROMA_SUB8X8 || !CONFIG_CB4X4)
     933             : 
     934             : #if CONFIG_CHROMA_SUB8X8
     935           0 :   const BLOCK_SIZE bsize = mi->mbmi.sb_type;
     936           0 :   const int ss_x = pd->subsampling_x;
     937           0 :   const int ss_y = pd->subsampling_y;
     938           0 :   int sub8x8_inter = bsize < BLOCK_8X8 && (ss_x || ss_y);
     939           0 :   const int row_start = (block_size_high[bsize] == 4) && ss_y ? -1 : 0;
     940           0 :   const int col_start = (block_size_wide[bsize] == 4) && ss_x ? -1 : 0;
     941             : 
     942             : #if CONFIG_MOTION_VAR
     943           0 :   if (!build_for_obmc && sub8x8_inter) {
     944             : #else
     945             :   if (sub8x8_inter) {
     946             : #endif  // CONFIG_MOTION_VAR
     947           0 :     for (int row = row_start; row <= 0 && sub8x8_inter; ++row)
     948           0 :       for (int col = col_start; col <= 0; ++col)
     949           0 :         if (!is_inter_block(&xd->mi[row * xd->mi_stride + col]->mbmi))
     950           0 :           sub8x8_inter = 0;
     951             :   }
     952             : 
     953             : #if CONFIG_MOTION_VAR
     954           0 :   if (!build_for_obmc && sub8x8_inter) {
     955             : #else
     956             :   if (sub8x8_inter) {
     957             : #endif  // CONFIG_MOTION_VAR
     958             :     // block size
     959           0 :     const int b4_w = block_size_wide[bsize] >> ss_x;
     960           0 :     const int b4_h = block_size_high[bsize] >> ss_y;
     961           0 :     const BLOCK_SIZE plane_bsize = scale_chroma_bsize(bsize, ss_x, ss_y);
     962           0 :     const int b8_w = block_size_wide[plane_bsize] >> ss_x;
     963           0 :     const int b8_h = block_size_high[plane_bsize] >> ss_y;
     964             :     int idx, idy;
     965             : 
     966           0 :     const int x_base = x;
     967           0 :     const int y_base = y;
     968             : 
     969           0 :     const struct buf_2d orig_pred_buf[2] = { pd->pre[0], pd->pre[1] };
     970             : 
     971           0 :     int row = row_start;
     972           0 :     for (idy = 0; idy < b8_h; idy += b4_h) {
     973           0 :       int col = col_start;
     974           0 :       for (idx = 0; idx < b8_w; idx += b4_w) {
     975           0 :         MB_MODE_INFO *this_mbmi = &xd->mi[row * xd->mi_stride + col]->mbmi;
     976           0 :         is_compound = has_second_ref(this_mbmi);
     977             : 
     978           0 :         for (ref = 0; ref < 1 + is_compound; ++ref) {
     979           0 :           struct buf_2d *const dst_buf = &pd->dst;
     980             : 
     981           0 :           const RefBuffer *ref_buf =
     982           0 :               &cm->frame_refs[this_mbmi->ref_frame[ref] - LAST_FRAME];
     983             : 
     984           0 :           const int c_offset = (mi_x + MI_SIZE * col_start) >> ss_x;
     985           0 :           const int r_offset = (mi_y + MI_SIZE * row_start) >> ss_y;
     986           0 :           pd->pre[ref].buf0 =
     987           0 :               (plane == 1) ? ref_buf->buf->u_buffer : ref_buf->buf->v_buffer;
     988           0 :           pd->pre[ref].buf =
     989           0 :               pd->pre[ref].buf0 + scaled_buffer_offset(c_offset, r_offset,
     990           0 :                                                        ref_buf->buf->uv_stride,
     991             :                                                        &ref_buf->sf);
     992           0 :           pd->pre[ref].width = ref_buf->buf->uv_crop_width;
     993           0 :           pd->pre[ref].height = ref_buf->buf->uv_crop_height;
     994           0 :           pd->pre[ref].stride = ref_buf->buf->uv_stride;
     995             : 
     996             : #if CONFIG_INTRABC
     997             :           const struct scale_factors *const sf =
     998             :               is_intrabc ? &xd->sf_identity : &xd->block_refs[ref]->sf;
     999             :           struct buf_2d *const pre_buf = is_intrabc ? dst_buf : &pd->pre[ref];
    1000             : #else
    1001           0 :           const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
    1002           0 :           struct buf_2d *const pre_buf = &pd->pre[ref];
    1003             : #endif  // CONFIG_INTRABC
    1004           0 :           uint8_t *dst = dst_buf->buf;
    1005             : 
    1006           0 :           const MV mv = this_mbmi->mv[ref].as_mv;
    1007             : 
    1008           0 :           const MV mv_q4 = clamp_mv_to_umv_border_sb(
    1009             :               xd, &mv, bw, bh, pd->subsampling_x, pd->subsampling_y);
    1010             :           uint8_t *pre;
    1011             :           MV32 scaled_mv;
    1012             :           int xs, ys, subpel_x, subpel_y;
    1013           0 :           const int is_scaled = av1_is_scaled(sf);
    1014           0 :           ConvolveParams conv_params = get_conv_params(ref, plane);
    1015             : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
    1016             :           WarpTypesAllowed warp_types;
    1017             : #if CONFIG_GLOBAL_MOTION
    1018           0 :           warp_types.global_warp_allowed = is_global[ref];
    1019             : #endif  // CONFIG_GLOBAL_MOTION
    1020             : #if CONFIG_WARPED_MOTION
    1021           0 :           warp_types.local_warp_allowed =
    1022           0 :               this_mbmi->motion_mode == WARPED_CAUSAL;
    1023             : #endif  // CONFIG_WARPED_MOTION
    1024             : #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
    1025             : 
    1026           0 :           x = x_base + idx;
    1027           0 :           y = y_base + idy;
    1028             : 
    1029           0 :           dst += dst_buf->stride * y + x;
    1030             : 
    1031           0 :           if (is_scaled) {
    1032           0 :             pre =
    1033           0 :                 pre_buf->buf + scaled_buffer_offset(x, y, pre_buf->stride, sf);
    1034           0 :             scaled_mv = av1_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf);
    1035           0 :             xs = sf->x_step_q4;
    1036           0 :             ys = sf->y_step_q4;
    1037             :           } else {
    1038           0 :             pre = pre_buf->buf + y * pre_buf->stride + x;
    1039           0 :             scaled_mv.row = mv_q4.row;
    1040           0 :             scaled_mv.col = mv_q4.col;
    1041           0 :             xs = ys = 16;
    1042             :           }
    1043             : 
    1044           0 :           subpel_x = scaled_mv.col & SUBPEL_MASK;
    1045           0 :           subpel_y = scaled_mv.row & SUBPEL_MASK;
    1046           0 :           pre += (scaled_mv.row >> SUBPEL_BITS) * pre_buf->stride +
    1047           0 :                  (scaled_mv.col >> SUBPEL_BITS);
    1048             : 
    1049             : #if CONFIG_EXT_INTER
    1050           0 :           if (ref && is_masked_compound_type(mi->mbmi.interinter_compound_type))
    1051           0 :             av1_make_masked_inter_predictor(
    1052             :                 pre, pre_buf->stride, dst, dst_buf->stride, subpel_x, subpel_y,
    1053           0 :                 sf, w, h, mi->mbmi.interp_filter, xs, ys,
    1054             : #if CONFIG_SUPERTX
    1055             :                 wedge_offset_x, wedge_offset_y,
    1056             : #endif  // CONFIG_SUPERTX
    1057             :                 plane,
    1058             : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
    1059           0 :                 &warp_types, (mi_x >> pd->subsampling_x) + x,
    1060           0 :                 (mi_y >> pd->subsampling_y) + y, ref,
    1061             : #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
    1062             :                 xd);
    1063             :           else
    1064             : #endif  // CONFIG_EXT_INTER
    1065           0 :             av1_make_inter_predictor(
    1066             :                 pre, pre_buf->stride, dst, dst_buf->stride, subpel_x, subpel_y,
    1067           0 :                 sf, b4_w, b4_h, &conv_params, this_mbmi->interp_filter,
    1068             : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
    1069           0 :                 &warp_types, (mi_x >> pd->subsampling_x) + x,
    1070           0 :                 (mi_y >> pd->subsampling_y) + y, plane, ref,
    1071             : #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
    1072             : #if CONFIG_MOTION_VAR
    1073             :                 mi_col_offset, mi_row_offset,
    1074             : #endif
    1075             :                 xs, ys, xd);
    1076             :         }
    1077           0 :         ++col;
    1078             :       }
    1079           0 :       ++row;
    1080             :     }
    1081             : 
    1082           0 :     for (ref = 0; ref < 2; ++ref) pd->pre[ref] = orig_pred_buf[ref];
    1083           0 :     return;
    1084             :   }
    1085             : #else
    1086             :   (void)cm;
    1087             : #endif  // CONFIG_CHROMA_SUB8X8
    1088             : 
    1089             :   {
    1090           0 :     struct buf_2d *const dst_buf = &pd->dst;
    1091           0 :     uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x;
    1092             :     uint8_t *pre[2];
    1093             :     MV32 scaled_mv[2];
    1094             :     SubpelParams subpel_params[2];
    1095             : #if CONFIG_CONVOLVE_ROUND
    1096             :     DECLARE_ALIGNED(16, int32_t, tmp_dst[MAX_SB_SIZE * MAX_SB_SIZE]);
    1097             :     av1_zero(tmp_dst);
    1098             : #endif  // CONFIG_CONVOLVE_ROUND
    1099             : 
    1100           0 :     for (ref = 0; ref < 1 + is_compound; ++ref) {
    1101             : #if CONFIG_INTRABC
    1102             :       const struct scale_factors *const sf =
    1103             :           is_intrabc ? &xd->sf_identity : &xd->block_refs[ref]->sf;
    1104             :       struct buf_2d *const pre_buf = is_intrabc ? dst_buf : &pd->pre[ref];
    1105             : #else
    1106           0 :       const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
    1107           0 :       struct buf_2d *const pre_buf = &pd->pre[ref];
    1108             : #endif  // CONFIG_INTRABC
    1109             : #if CONFIG_CB4X4
    1110           0 :       const MV mv = mi->mbmi.mv[ref].as_mv;
    1111             : #else
    1112             :       const MV mv =
    1113             : #if CONFIG_MOTION_VAR
    1114             :           (mi->mbmi.sb_type < BLOCK_8X8 && !build_for_obmc)
    1115             :               ?
    1116             : #else
    1117             :           mi->mbmi.sb_type < BLOCK_8X8 ?
    1118             : #endif
    1119             :               average_split_mvs(pd, mi, ref, block)
    1120             :               : mi->mbmi.mv[ref].as_mv;
    1121             : #endif
    1122             : 
    1123             :       // TODO(jkoleszar): This clamping is done in the incorrect place for the
    1124             :       // scaling case. It needs to be done on the scaled MV, not the pre-scaling
    1125             :       // MV. Note however that it performs the subsampling aware scaling so
    1126             :       // that the result is always q4.
    1127             :       // mv_precision precision is MV_PRECISION_Q4.
    1128           0 :       const MV mv_q4 = clamp_mv_to_umv_border_sb(
    1129             :           xd, &mv, bw, bh, pd->subsampling_x, pd->subsampling_y);
    1130             : 
    1131           0 :       const int is_scaled = av1_is_scaled(sf);
    1132           0 :       if (is_scaled) {
    1133           0 :         pre[ref] =
    1134           0 :             pre_buf->buf + scaled_buffer_offset(x, y, pre_buf->stride, sf);
    1135           0 :         scaled_mv[ref] = av1_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf);
    1136           0 :         subpel_params[ref].xs = sf->x_step_q4;
    1137           0 :         subpel_params[ref].ys = sf->y_step_q4;
    1138             :       } else {
    1139           0 :         pre[ref] = pre_buf->buf + (y * pre_buf->stride + x);
    1140           0 :         scaled_mv[ref].row = mv_q4.row;
    1141           0 :         scaled_mv[ref].col = mv_q4.col;
    1142           0 :         subpel_params[ref].xs = 16;
    1143           0 :         subpel_params[ref].ys = 16;
    1144             :       }
    1145             : 
    1146           0 :       subpel_params[ref].subpel_x = scaled_mv[ref].col & SUBPEL_MASK;
    1147           0 :       subpel_params[ref].subpel_y = scaled_mv[ref].row & SUBPEL_MASK;
    1148           0 :       pre[ref] += (scaled_mv[ref].row >> SUBPEL_BITS) * pre_buf->stride +
    1149           0 :                   (scaled_mv[ref].col >> SUBPEL_BITS);
    1150             :     }
    1151             : 
    1152             : #if CONFIG_CONVOLVE_ROUND
    1153             :     ConvolveParams conv_params =
    1154             :         get_conv_params_no_round(ref, plane, tmp_dst, MAX_SB_SIZE);
    1155             : #else
    1156           0 :     ConvolveParams conv_params = get_conv_params(ref, plane);
    1157             : #endif  // CONFIG_CONVOLVE_ROUND
    1158           0 :     for (ref = 0; ref < 1 + is_compound; ++ref) {
    1159             : #if CONFIG_INTRABC
    1160             :       const struct scale_factors *const sf =
    1161             :           is_intrabc ? &xd->sf_identity : &xd->block_refs[ref]->sf;
    1162             :       struct buf_2d *const pre_buf = is_intrabc ? dst_buf : &pd->pre[ref];
    1163             : #else
    1164           0 :       const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
    1165           0 :       struct buf_2d *const pre_buf = &pd->pre[ref];
    1166             : #endif  // CONFIG_INTRABC
    1167             : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
    1168             :       WarpTypesAllowed warp_types;
    1169             : #if CONFIG_GLOBAL_MOTION
    1170           0 :       warp_types.global_warp_allowed = is_global[ref];
    1171             : #endif  // CONFIG_GLOBAL_MOTION
    1172             : #if CONFIG_WARPED_MOTION
    1173           0 :       warp_types.local_warp_allowed = mi->mbmi.motion_mode == WARPED_CAUSAL;
    1174             : #endif  // CONFIG_WARPED_MOTION
    1175             : #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
    1176           0 :       conv_params.ref = ref;
    1177             : #if CONFIG_EXT_INTER
    1178           0 :       if (ref && is_masked_compound_type(mi->mbmi.interinter_compound_type))
    1179           0 :         av1_make_masked_inter_predictor(
    1180           0 :             pre[ref], pre_buf->stride, dst, dst_buf->stride,
    1181             :             subpel_params[ref].subpel_x, subpel_params[ref].subpel_y, sf, w, h,
    1182           0 :             mi->mbmi.interp_filter, subpel_params[ref].xs,
    1183             :             subpel_params[ref].ys,
    1184             : #if CONFIG_SUPERTX
    1185             :             wedge_offset_x, wedge_offset_y,
    1186             : #endif  // CONFIG_SUPERTX
    1187             :             plane,
    1188             : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
    1189           0 :             &warp_types, (mi_x >> pd->subsampling_x) + x,
    1190           0 :             (mi_y >> pd->subsampling_y) + y, ref,
    1191             : #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
    1192             :             xd);
    1193             :       else
    1194             : #endif  // CONFIG_EXT_INTER
    1195           0 :         av1_make_inter_predictor(
    1196           0 :             pre[ref], pre_buf->stride, dst, dst_buf->stride,
    1197             :             subpel_params[ref].subpel_x, subpel_params[ref].subpel_y, sf, w, h,
    1198           0 :             &conv_params, mi->mbmi.interp_filter,
    1199             : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
    1200           0 :             &warp_types, (mi_x >> pd->subsampling_x) + x,
    1201           0 :             (mi_y >> pd->subsampling_y) + y, plane, ref,
    1202             : #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
    1203             : #if CONFIG_MOTION_VAR
    1204             :             mi_col_offset, mi_row_offset,
    1205             : #endif
    1206             :             subpel_params[ref].xs, subpel_params[ref].ys, xd);
    1207             :     }
    1208             : 
    1209             : #if CONFIG_CONVOLVE_ROUND
    1210             :     // TODO(angiebird): This part needs optimization
    1211             :     if (conv_params.do_post_rounding) {
    1212             : #if CONFIG_HIGHBITDEPTH
    1213             :       if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
    1214             :         av1_highbd_convolve_rounding(tmp_dst, MAX_SB_SIZE, dst, dst_buf->stride,
    1215             :                                      w, h, FILTER_BITS * 2 + is_compound -
    1216             :                                                conv_params.round_0 -
    1217             :                                                conv_params.round_1,
    1218             :                                      xd->bd);
    1219             :       else
    1220             : #endif  // CONFIG_HIGHBITDEPTH
    1221             :         av1_convolve_rounding(tmp_dst, MAX_SB_SIZE, dst, dst_buf->stride, w, h,
    1222             :                               FILTER_BITS * 2 + is_compound -
    1223             :                                   conv_params.round_0 - conv_params.round_1);
    1224             :     }
    1225             : #endif  // CONFIG_CONVOLVE_ROUND
    1226             :   }
    1227             : }
    1228             : 
    1229           0 : void av1_build_inter_predictor_sub8x8(const AV1_COMMON *cm, MACROBLOCKD *xd,
    1230             :                                       int plane, int i, int ir, int ic,
    1231             :                                       int mi_row, int mi_col) {
    1232           0 :   struct macroblockd_plane *const pd = &xd->plane[plane];
    1233           0 :   MODE_INFO *const mi = xd->mi[0];
    1234           0 :   const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd);
    1235           0 :   const int width = block_size_wide[plane_bsize];
    1236           0 :   const int height = block_size_high[plane_bsize];
    1237           0 :   uint8_t *const dst = &pd->dst.buf[(ir * pd->dst.stride + ic) << 2];
    1238             :   int ref;
    1239           0 :   const int is_compound = has_second_ref(&mi->mbmi);
    1240             :   (void)cm;
    1241             : 
    1242             : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
    1243             :   WarpTypesAllowed warp_types;
    1244           0 :   const int p_col = ((mi_col * MI_SIZE) >> pd->subsampling_x) + 4 * ic;
    1245           0 :   const int p_row = ((mi_row * MI_SIZE) >> pd->subsampling_y) + 4 * ir;
    1246             : #if CONFIG_GLOBAL_MOTION
    1247             :   int is_global[2];
    1248           0 :   for (ref = 0; ref < 1 + is_compound; ++ref) {
    1249           0 :     WarpedMotionParams *const wm = &xd->global_motion[mi->mbmi.ref_frame[ref]];
    1250           0 :     is_global[ref] = is_global_mv_block(mi, i, wm->wmtype);
    1251             :   }
    1252             : #endif  // CONFIG_GLOBAL_MOTION
    1253             : #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
    1254             : 
    1255           0 :   for (ref = 0; ref < 1 + is_compound; ++ref) {
    1256           0 :     ConvolveParams conv_params = get_conv_params(ref, plane);
    1257           0 :     const uint8_t *pre =
    1258           0 :         &pd->pre[ref].buf[(ir * pd->pre[ref].stride + ic) << 2];
    1259             : #if CONFIG_GLOBAL_MOTION
    1260           0 :     warp_types.global_warp_allowed = is_global[ref];
    1261             : #endif  // CONFIG_GLOBAL_MOTION
    1262             : #if CONFIG_WARPED_MOTION
    1263           0 :     warp_types.local_warp_allowed = mi->mbmi.motion_mode == WARPED_CAUSAL;
    1264             : #endif  // CONFIG_WARPED_MOTION
    1265             : 
    1266             : #if CONFIG_HIGHBITDEPTH
    1267           0 :     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
    1268           0 :       av1_highbd_build_inter_predictor(
    1269             :           pre, pd->pre[ref].stride, dst, pd->dst.stride,
    1270           0 :           &mi->bmi[i].as_mv[ref].as_mv, &xd->block_refs[ref]->sf, width, height,
    1271           0 :           ref, mi->mbmi.interp_filter,
    1272             : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
    1273             :           &warp_types, p_col, p_row,
    1274             : #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
    1275           0 :           plane, MV_PRECISION_Q3, mi_col * MI_SIZE + 4 * ic,
    1276           0 :           mi_row * MI_SIZE + 4 * ir, xd);
    1277             :     else
    1278             : #endif  // CONFIG_HIGHBITDEPTH
    1279           0 :       av1_build_inter_predictor(pre, pd->pre[ref].stride, dst, pd->dst.stride,
    1280           0 :                                 &mi->bmi[i].as_mv[ref].as_mv,
    1281           0 :                                 &xd->block_refs[ref]->sf, width, height,
    1282           0 :                                 &conv_params, mi->mbmi.interp_filter,
    1283             : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
    1284             :                                 &warp_types, p_col, p_row, plane, ref,
    1285             : #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
    1286           0 :                                 MV_PRECISION_Q3, mi_col * MI_SIZE + 4 * ic,
    1287           0 :                                 mi_row * MI_SIZE + 4 * ir, xd);
    1288             :   }
    1289           0 : }
    1290             : 
    1291           0 : static void build_inter_predictors_for_planes(const AV1_COMMON *cm,
    1292             :                                               MACROBLOCKD *xd, BLOCK_SIZE bsize,
    1293             :                                               int mi_row, int mi_col,
    1294             :                                               int plane_from, int plane_to) {
    1295             :   int plane;
    1296           0 :   const int mi_x = mi_col * MI_SIZE;
    1297           0 :   const int mi_y = mi_row * MI_SIZE;
    1298             : #if CONFIG_CB4X4
    1299           0 :   const int unify_bsize = 1;
    1300             : #else
    1301             :   const int unify_bsize = 0;
    1302             : #endif
    1303           0 :   for (plane = plane_from; plane <= plane_to; ++plane) {
    1304           0 :     const struct macroblockd_plane *pd = &xd->plane[plane];
    1305           0 :     const int bw = pd->width;
    1306           0 :     const int bh = pd->height;
    1307             : 
    1308             : #if CONFIG_CB4X4
    1309           0 :     if (!is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x,
    1310             :                              pd->subsampling_y))
    1311           0 :       continue;
    1312             : #endif
    1313             : 
    1314           0 :     if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8 && !unify_bsize) {
    1315           0 :       const PARTITION_TYPE bp = bsize - xd->mi[0]->mbmi.sb_type;
    1316           0 :       const int have_vsplit = bp != PARTITION_HORZ;
    1317           0 :       const int have_hsplit = bp != PARTITION_VERT;
    1318           0 :       const int num_4x4_w = 2 >> ((!have_vsplit) | pd->subsampling_x);
    1319           0 :       const int num_4x4_h = 2 >> ((!have_hsplit) | pd->subsampling_y);
    1320           0 :       const int pw = 8 >> (have_vsplit | pd->subsampling_x);
    1321           0 :       const int ph = 8 >> (have_hsplit | pd->subsampling_y);
    1322             :       int x, y;
    1323           0 :       assert(bp != PARTITION_NONE && bp < PARTITION_TYPES);
    1324           0 :       assert(bsize == BLOCK_8X8);
    1325           0 :       assert(pw * num_4x4_w == bw && ph * num_4x4_h == bh);
    1326           0 :       for (y = 0; y < num_4x4_h; ++y)
    1327           0 :         for (x = 0; x < num_4x4_w; ++x)
    1328           0 :           build_inter_predictors(cm, xd, plane,
    1329             : #if CONFIG_MOTION_VAR
    1330             :                                  0, 0,
    1331             : #endif  // CONFIG_MOTION_VAR
    1332           0 :                                  y * 2 + x, bw, bh, 4 * x, 4 * y, pw, ph,
    1333             : #if CONFIG_SUPERTX && CONFIG_EXT_INTER
    1334             :                                  0, 0,
    1335             : #endif  // CONFIG_SUPERTX && CONFIG_EXT_INTER
    1336             :                                  mi_x, mi_y);
    1337             :     } else {
    1338           0 :       build_inter_predictors(cm, xd, plane,
    1339             : #if CONFIG_MOTION_VAR
    1340             :                              0, 0,
    1341             : #endif  // CONFIG_MOTION_VAR
    1342             :                              0, bw, bh, 0, 0, bw, bh,
    1343             : #if CONFIG_SUPERTX && CONFIG_EXT_INTER
    1344             :                              0, 0,
    1345             : #endif  // CONFIG_SUPERTX && CONFIG_EXT_INTER
    1346             :                              mi_x, mi_y);
    1347             :     }
    1348             :   }
    1349           0 : }
    1350             : 
    1351           0 : void av1_build_inter_predictors_sby(const AV1_COMMON *cm, MACROBLOCKD *xd,
    1352             :                                     int mi_row, int mi_col, BUFFER_SET *ctx,
    1353             :                                     BLOCK_SIZE bsize) {
    1354           0 :   build_inter_predictors_for_planes(cm, xd, bsize, mi_row, mi_col, 0, 0);
    1355             : #if CONFIG_EXT_INTER && CONFIG_INTERINTRA
    1356           0 :   if (is_interintra_pred(&xd->mi[0]->mbmi)) {
    1357           0 :     BUFFER_SET default_ctx = { { xd->plane[0].dst.buf, NULL, NULL },
    1358           0 :                                { xd->plane[0].dst.stride, 0, 0 } };
    1359           0 :     if (!ctx) ctx = &default_ctx;
    1360           0 :     av1_build_interintra_predictors_sby(xd, xd->plane[0].dst.buf,
    1361             :                                         xd->plane[0].dst.stride, ctx, bsize);
    1362             :   }
    1363             : #else
    1364             :   (void)ctx;
    1365             : #endif  // CONFIG_EXT_INTER && CONFIG_INTERINTRA
    1366           0 : }
    1367             : 
    1368           0 : void av1_build_inter_predictors_sbuv(const AV1_COMMON *cm, MACROBLOCKD *xd,
    1369             :                                      int mi_row, int mi_col, BUFFER_SET *ctx,
    1370             :                                      BLOCK_SIZE bsize) {
    1371           0 :   build_inter_predictors_for_planes(cm, xd, bsize, mi_row, mi_col, 1,
    1372             :                                     MAX_MB_PLANE - 1);
    1373             : #if CONFIG_EXT_INTER && CONFIG_INTERINTRA
    1374           0 :   if (is_interintra_pred(&xd->mi[0]->mbmi)) {
    1375           0 :     BUFFER_SET default_ctx = {
    1376           0 :       { NULL, xd->plane[1].dst.buf, xd->plane[2].dst.buf },
    1377           0 :       { 0, xd->plane[1].dst.stride, xd->plane[2].dst.stride }
    1378             :     };
    1379           0 :     if (!ctx) ctx = &default_ctx;
    1380           0 :     av1_build_interintra_predictors_sbuv(
    1381             :         xd, xd->plane[1].dst.buf, xd->plane[2].dst.buf, xd->plane[1].dst.stride,
    1382             :         xd->plane[2].dst.stride, ctx, bsize);
    1383             :   }
    1384             : #else
    1385             :   (void)ctx;
    1386             : #endif  // CONFIG_EXT_INTER && CONFIG_INTERINTRA
    1387           0 : }
    1388             : 
    1389             : // TODO(afergs): Check if ctx can be made constant
    1390           0 : void av1_build_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
    1391             :                                    int mi_row, int mi_col, BUFFER_SET *ctx,
    1392             :                                    BLOCK_SIZE bsize) {
    1393           0 :   build_inter_predictors_for_planes(cm, xd, bsize, mi_row, mi_col, 0,
    1394             :                                     MAX_MB_PLANE - 1);
    1395             : #if CONFIG_EXT_INTER && CONFIG_INTERINTRA
    1396           0 :   if (is_interintra_pred(&xd->mi[0]->mbmi)) {
    1397           0 :     BUFFER_SET default_ctx = {
    1398           0 :       { xd->plane[0].dst.buf, xd->plane[1].dst.buf, xd->plane[2].dst.buf },
    1399           0 :       { xd->plane[0].dst.stride, xd->plane[1].dst.stride,
    1400           0 :         xd->plane[2].dst.stride }
    1401             :     };
    1402           0 :     if (!ctx) ctx = &default_ctx;
    1403           0 :     av1_build_interintra_predictors(
    1404             :         xd, xd->plane[0].dst.buf, xd->plane[1].dst.buf, xd->plane[2].dst.buf,
    1405             :         xd->plane[0].dst.stride, xd->plane[1].dst.stride,
    1406             :         xd->plane[2].dst.stride, ctx, bsize);
    1407             :   }
    1408             : #else
    1409             :   (void)ctx;
    1410             : #endif  // CONFIG_EXT_INTER && CONFIG_INTERINTRA
    1411           0 : }
    1412             : 
    1413           0 : void av1_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE],
    1414             :                           BLOCK_SIZE bsize, const YV12_BUFFER_CONFIG *src,
    1415             :                           int mi_row, int mi_col) {
    1416           0 :   uint8_t *const buffers[MAX_MB_PLANE] = { src->y_buffer, src->u_buffer,
    1417           0 :                                            src->v_buffer };
    1418           0 :   const int widths[MAX_MB_PLANE] = { src->y_crop_width, src->uv_crop_width,
    1419           0 :                                      src->uv_crop_width };
    1420           0 :   const int heights[MAX_MB_PLANE] = { src->y_crop_height, src->uv_crop_height,
    1421           0 :                                       src->uv_crop_height };
    1422           0 :   const int strides[MAX_MB_PLANE] = { src->y_stride, src->uv_stride,
    1423           0 :                                       src->uv_stride };
    1424             :   int i;
    1425             : 
    1426           0 :   for (i = 0; i < MAX_MB_PLANE; ++i) {
    1427           0 :     struct macroblockd_plane *const pd = &planes[i];
    1428           0 :     setup_pred_plane(&pd->dst, bsize, buffers[i], widths[i], heights[i],
    1429             :                      strides[i], mi_row, mi_col, NULL, pd->subsampling_x,
    1430             :                      pd->subsampling_y);
    1431             :   }
    1432           0 : }
    1433             : 
    1434           0 : void av1_setup_pre_planes(MACROBLOCKD *xd, int idx,
    1435             :                           const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
    1436             :                           const struct scale_factors *sf) {
    1437           0 :   if (src != NULL) {
    1438             :     int i;
    1439           0 :     uint8_t *const buffers[MAX_MB_PLANE] = { src->y_buffer, src->u_buffer,
    1440           0 :                                              src->v_buffer };
    1441           0 :     const int widths[MAX_MB_PLANE] = { src->y_crop_width, src->uv_crop_width,
    1442           0 :                                        src->uv_crop_width };
    1443           0 :     const int heights[MAX_MB_PLANE] = { src->y_crop_height, src->uv_crop_height,
    1444           0 :                                         src->uv_crop_height };
    1445           0 :     const int strides[MAX_MB_PLANE] = { src->y_stride, src->uv_stride,
    1446           0 :                                         src->uv_stride };
    1447           0 :     for (i = 0; i < MAX_MB_PLANE; ++i) {
    1448           0 :       struct macroblockd_plane *const pd = &xd->plane[i];
    1449           0 :       setup_pred_plane(&pd->pre[idx], xd->mi[0]->mbmi.sb_type, buffers[i],
    1450             :                        widths[i], heights[i], strides[i], mi_row, mi_col, sf,
    1451             :                        pd->subsampling_x, pd->subsampling_y);
    1452             :     }
    1453             :   }
    1454           0 : }
    1455             : 
    1456             : #if CONFIG_SUPERTX
    1457             : #if CONFIG_CB4X4
    1458             : static const uint8_t mask_4[4] = { 64, 52, 12, 0 };
    1459             : static const uint8_t mask_4_uv[4] = { 64, 52, 12, 0 };
    1460             : #endif  // CONFIG_CB4X4
    1461             : static const uint8_t mask_8[8] = { 64, 64, 62, 52, 12, 2, 0, 0 };
    1462             : 
    1463             : static const uint8_t mask_16[16] = { 63, 62, 60, 58, 55, 50, 43, 36,
    1464             :                                      28, 21, 14, 9,  6,  4,  2,  1 };
    1465             : 
    1466             : static const uint8_t mask_32[32] = { 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 63,
    1467             :                                      61, 57, 52, 45, 36, 28, 19, 12, 7,  3,  1,
    1468             :                                      0,  0,  0,  0,  0,  0,  0,  0,  0,  0 };
    1469             : 
    1470             : static const uint8_t mask_8_uv[8] = { 64, 64, 62, 52, 12, 2, 0, 0 };
    1471             : 
    1472             : static const uint8_t mask_16_uv[16] = { 64, 64, 64, 64, 61, 53, 45, 36,
    1473             :                                         28, 19, 11, 3,  0,  0,  0,  0 };
    1474             : 
    1475             : static const uint8_t mask_32_uv[32] = { 64, 64, 64, 64, 64, 64, 64, 64,
    1476             :                                         64, 64, 64, 64, 60, 54, 46, 36,
    1477             :                                         28, 18, 10, 4,  0,  0,  0,  0,
    1478             :                                         0,  0,  0,  0,  0,  0,  0,  0 };
    1479             : 
    1480             : static const uint8_t *get_supertx_mask(int length, int plane) {
    1481             :   switch (length) {
    1482             : #if CONFIG_CB4X4
    1483             :     case 4: return plane ? mask_4_uv : mask_4;
    1484             : #endif  // CONFIG_CB4X4
    1485             :     case 8: return plane ? mask_8_uv : mask_8;
    1486             :     case 16: return plane ? mask_16_uv : mask_16;
    1487             :     case 32: return plane ? mask_32_uv : mask_32;
    1488             :     default: assert(0);
    1489             :   }
    1490             :   return NULL;
    1491             : }
    1492             : 
    1493             : void av1_build_masked_inter_predictor_complex(
    1494             :     MACROBLOCKD *xd, uint8_t *dst, int dst_stride, const uint8_t *pre,
    1495             :     int pre_stride, int mi_row, int mi_col, int mi_row_ori, int mi_col_ori,
    1496             :     BLOCK_SIZE bsize, BLOCK_SIZE top_bsize, PARTITION_TYPE partition,
    1497             :     int plane) {
    1498             :   const struct macroblockd_plane *pd = &xd->plane[plane];
    1499             :   const int ssx = pd->subsampling_x;
    1500             :   const int ssy = pd->subsampling_y;
    1501             :   const int top_w = block_size_wide[top_bsize] >> ssx;
    1502             :   const int top_h = block_size_high[top_bsize] >> ssy;
    1503             :   const int w = block_size_wide[bsize] >> ssx;
    1504             :   const int h = block_size_high[bsize] >> ssy;
    1505             :   const int w_offset = ((mi_col - mi_col_ori) * MI_SIZE) >> ssx;
    1506             :   const int h_offset = ((mi_row - mi_row_ori) * MI_SIZE) >> ssy;
    1507             : 
    1508             :   int w_remain, h_remain;
    1509             : 
    1510             : #if CONFIG_HIGHBITDEPTH
    1511             :   const int is_hdb = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
    1512             : #endif  // CONFIG_HIGHBITDEPTH
    1513             : 
    1514             :   assert(bsize <= BLOCK_32X32);
    1515             :   assert(IMPLIES(plane == 0, ssx == 0));
    1516             :   assert(IMPLIES(plane == 0, ssy == 0));
    1517             : 
    1518             :   switch (partition) {
    1519             :     case PARTITION_HORZ: {
    1520             :       const uint8_t *const mask = get_supertx_mask(h, ssy);
    1521             : 
    1522             :       w_remain = top_w;
    1523             :       h_remain = top_h - h_offset - h;
    1524             :       dst += h_offset * dst_stride;
    1525             :       pre += h_offset * pre_stride;
    1526             : 
    1527             : #if CONFIG_HIGHBITDEPTH
    1528             :       if (is_hdb)
    1529             :         aom_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, pre,
    1530             :                                    pre_stride, mask, h, top_w, xd->bd);
    1531             :       else
    1532             : #endif  // CONFIG_HIGHBITDEPTH
    1533             :         aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, pre, pre_stride,
    1534             :                             mask, h, top_w);
    1535             : 
    1536             :       dst += h * dst_stride;
    1537             :       pre += h * pre_stride;
    1538             :       break;
    1539             :     }
    1540             :     case PARTITION_VERT: {
    1541             :       const uint8_t *const mask = get_supertx_mask(w, ssx);
    1542             : 
    1543             :       w_remain = top_w - w_offset - w;
    1544             :       h_remain = top_h;
    1545             :       dst += w_offset;
    1546             :       pre += w_offset;
    1547             : 
    1548             : #if CONFIG_HIGHBITDEPTH
    1549             :       if (is_hdb)
    1550             :         aom_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, pre,
    1551             :                                    pre_stride, mask, top_h, w, xd->bd);
    1552             :       else
    1553             : #endif  // CONFIG_HIGHBITDEPTH
    1554             :         aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, pre, pre_stride,
    1555             :                             mask, top_h, w);
    1556             : 
    1557             :       dst += w;
    1558             :       pre += w;
    1559             :       break;
    1560             :     }
    1561             :     default: {
    1562             :       assert(0);
    1563             :       return;
    1564             :     }
    1565             :   }
    1566             : 
    1567             :   if (w_remain == 0 || h_remain == 0) {
    1568             :     return;
    1569             :   }
    1570             : 
    1571             : #if CONFIG_HIGHBITDEPTH
    1572             :   if (is_hdb) {
    1573             :     dst = (uint8_t *)CONVERT_TO_SHORTPTR(dst);
    1574             :     pre = (const uint8_t *)CONVERT_TO_SHORTPTR(pre);
    1575             :     dst_stride *= 2;
    1576             :     pre_stride *= 2;
    1577             :     w_remain *= 2;
    1578             :   }
    1579             : #endif  // CONFIG_HIGHBITDEPTH
    1580             : 
    1581             :   do {
    1582             :     memcpy(dst, pre, w_remain * sizeof(uint8_t));
    1583             :     dst += dst_stride;
    1584             :     pre += pre_stride;
    1585             :   } while (--h_remain);
    1586             : }
    1587             : 
    1588             : void av1_build_inter_predictors_sb_sub8x8_extend(const AV1_COMMON *cm,
    1589             :                                                  MACROBLOCKD *xd,
    1590             : #if CONFIG_EXT_INTER
    1591             :                                                  int mi_row_ori, int mi_col_ori,
    1592             : #endif  // CONFIG_EXT_INTER
    1593             :                                                  int mi_row, int mi_col,
    1594             :                                                  BLOCK_SIZE bsize, int block) {
    1595             :   // Prediction function used in supertx:
    1596             :   // Use the mv at current block (which is less than 8x8)
    1597             :   // to get prediction of a block located at (mi_row, mi_col) at size of bsize
    1598             :   // bsize can be larger than 8x8.
    1599             :   // block (0-3): the sub8x8 location of current block
    1600             :   int plane;
    1601             :   const int mi_x = mi_col * MI_SIZE;
    1602             :   const int mi_y = mi_row * MI_SIZE;
    1603             : #if CONFIG_EXT_INTER
    1604             :   const int wedge_offset_x = (mi_col_ori - mi_col) * MI_SIZE;
    1605             :   const int wedge_offset_y = (mi_row_ori - mi_row) * MI_SIZE;
    1606             : #endif  // CONFIG_EXT_INTER
    1607             : 
    1608             :   // For sub8x8 uv:
    1609             :   // Skip uv prediction in supertx except the first block (block = 0)
    1610             :   int max_plane = block ? 1 : MAX_MB_PLANE;
    1611             : 
    1612             :   for (plane = 0; plane < max_plane; plane++) {
    1613             :     const BLOCK_SIZE plane_bsize =
    1614             :         get_plane_block_size(bsize, &xd->plane[plane]);
    1615             :     const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
    1616             :     const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
    1617             :     const int bw = 4 * num_4x4_w;
    1618             :     const int bh = 4 * num_4x4_h;
    1619             : 
    1620             :     build_inter_predictors(cm, xd, plane,
    1621             : #if CONFIG_MOTION_VAR
    1622             :                            0, 0,
    1623             : #endif  // CONFIG_MOTION_VAR
    1624             :                            block, bw, bh, 0, 0, bw, bh,
    1625             : #if CONFIG_EXT_INTER
    1626             :                            wedge_offset_x, wedge_offset_y,
    1627             : #endif  // CONFIG_EXT_INTER
    1628             :                            mi_x, mi_y);
    1629             :   }
    1630             : #if CONFIG_EXT_INTER
    1631             :   if (is_interintra_pred(&xd->mi[0]->mbmi)) {
    1632             :     BUFFER_SET ctx = { { xd->plane[0].dst.buf, xd->plane[1].dst.buf,
    1633             :                          xd->plane[2].dst.buf },
    1634             :                        { xd->plane[0].dst.stride, xd->plane[1].dst.stride,
    1635             :                          xd->plane[2].dst.stride } };
    1636             :     av1_build_interintra_predictors(
    1637             :         xd, xd->plane[0].dst.buf, xd->plane[1].dst.buf, xd->plane[2].dst.buf,
    1638             :         xd->plane[0].dst.stride, xd->plane[1].dst.stride,
    1639             :         xd->plane[2].dst.stride, &ctx, bsize);
    1640             :   }
    1641             : #endif  // CONFIG_EXT_INTER
    1642             : }
    1643             : 
    1644             : void av1_build_inter_predictors_sb_extend(const AV1_COMMON *cm, MACROBLOCKD *xd,
    1645             : #if CONFIG_EXT_INTER
    1646             :                                           int mi_row_ori, int mi_col_ori,
    1647             : #endif  // CONFIG_EXT_INTER
    1648             :                                           int mi_row, int mi_col,
    1649             :                                           BLOCK_SIZE bsize) {
    1650             :   int plane;
    1651             :   const int mi_x = mi_col * MI_SIZE;
    1652             :   const int mi_y = mi_row * MI_SIZE;
    1653             : #if CONFIG_EXT_INTER
    1654             :   const int wedge_offset_x = (mi_col_ori - mi_col) * MI_SIZE;
    1655             :   const int wedge_offset_y = (mi_row_ori - mi_row) * MI_SIZE;
    1656             : #endif  // CONFIG_EXT_INTER
    1657             :   for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
    1658             :     const BLOCK_SIZE plane_bsize =
    1659             :         get_plane_block_size(bsize, &xd->plane[plane]);
    1660             :     const int bw = block_size_wide[plane_bsize];
    1661             :     const int bh = block_size_high[plane_bsize];
    1662             : 
    1663             :     build_inter_predictors(cm, xd, plane,
    1664             : #if CONFIG_MOTION_VAR
    1665             :                            0, 0,
    1666             : #endif  // CONFIG_MOTION_VAR
    1667             :                            0, bw, bh, 0, 0, bw, bh,
    1668             : #if CONFIG_EXT_INTER
    1669             :                            wedge_offset_x, wedge_offset_y,
    1670             : #endif  // CONFIG_EXT_INTER
    1671             :                            mi_x, mi_y);
    1672             :   }
    1673             : }
    1674             : #endif  // CONFIG_SUPERTX
    1675             : 
    1676             : #if CONFIG_MOTION_VAR
    1677             : // obmc_mask_N[overlap_position]
    1678             : static const uint8_t obmc_mask_1[1] = { 64 };
    1679             : 
    1680             : static const uint8_t obmc_mask_2[2] = { 45, 64 };
    1681             : 
    1682             : static const uint8_t obmc_mask_4[4] = { 39, 50, 59, 64 };
    1683             : 
    1684             : static const uint8_t obmc_mask_8[8] = { 36, 42, 48, 53, 57, 61, 64, 64 };
    1685             : 
    1686             : static const uint8_t obmc_mask_16[16] = { 34, 37, 40, 43, 46, 49, 52, 54,
    1687             :                                           56, 58, 60, 61, 64, 64, 64, 64 };
    1688             : 
    1689             : static const uint8_t obmc_mask_32[32] = { 33, 35, 36, 38, 40, 41, 43, 44,
    1690             :                                           45, 47, 48, 50, 51, 52, 53, 55,
    1691             :                                           56, 57, 58, 59, 60, 60, 61, 62,
    1692             :                                           64, 64, 64, 64, 64, 64, 64, 64 };
    1693             : 
    1694             : #if CONFIG_EXT_PARTITION
    1695             : static const uint8_t obmc_mask_64[64] = {
    1696             :   33, 34, 35, 35, 36, 37, 38, 39, 40, 40, 41, 42, 43, 44, 44, 44,
    1697             :   45, 46, 47, 47, 48, 49, 50, 51, 51, 51, 52, 52, 53, 54, 55, 56,
    1698             :   56, 56, 57, 57, 58, 58, 59, 60, 60, 60, 60, 60, 61, 62, 62, 62,
    1699             :   62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
    1700             : };
    1701             : #endif  // CONFIG_EXT_PARTITION
    1702             : 
    1703           0 : const uint8_t *av1_get_obmc_mask(int length) {
    1704           0 :   switch (length) {
    1705           0 :     case 1: return obmc_mask_1;
    1706           0 :     case 2: return obmc_mask_2;
    1707           0 :     case 4: return obmc_mask_4;
    1708           0 :     case 8: return obmc_mask_8;
    1709           0 :     case 16: return obmc_mask_16;
    1710           0 :     case 32: return obmc_mask_32;
    1711             : #if CONFIG_EXT_PARTITION
    1712             :     case 64: return obmc_mask_64;
    1713             : #endif  // CONFIG_EXT_PARTITION
    1714           0 :     default: assert(0); return NULL;
    1715             :   }
    1716             : }
    1717             : 
    1718             : #if CONFIG_NCOBMC
    1719             : // obmc_mask_flipN[overlap_position]
    1720             : static const uint8_t obmc_mask_flip1[1] = { 55 };
    1721             : 
    1722             : static const uint8_t obmc_mask_flip2[2] = { 62, 45 };
    1723             : 
    1724             : static const uint8_t obmc_mask_flip4[4] = { 64, 59, 50, 39 };
    1725             : 
    1726             : static const uint8_t obmc_mask_flip8[8] = { 64, 63, 61, 57, 53, 48, 42, 36 };
    1727             : 
    1728             : static const uint8_t obmc_mask_flip16[16] = { 64, 64, 64, 63, 61, 60, 58, 56,
    1729             :                                               54, 52, 49, 46, 43, 40, 37, 34 };
    1730             : 
    1731             : static const uint8_t obmc_mask_flip32[32] = { 64, 64, 64, 64, 64, 63, 63, 62,
    1732             :                                               62, 61, 60, 60, 59, 58, 57, 56,
    1733             :                                               55, 53, 52, 51, 50, 48, 47, 45,
    1734             :                                               44, 43, 41, 40, 38, 36, 35, 33 };
    1735             : 
    1736             : #if CONFIG_EXT_PARTITION
    1737             : static const uint8_t obmc_mask_flip64[64] = {
    1738             :   64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 63, 63, 63, 63, 62, 62,
    1739             :   62, 62, 62, 61, 60, 60, 60, 60, 60, 59, 58, 58, 57, 57, 56, 56,
    1740             :   56, 55, 54, 53, 52, 52, 51, 51, 51, 50, 49, 48, 47, 47, 46, 45,
    1741             :   44, 44, 44, 43, 42, 41, 40, 40, 39, 38, 37, 36, 35, 35, 34, 33,
    1742             : };
    1743             : #endif  // CONFIG_EXT_PARTITION
    1744             : 
    1745             : const uint8_t *av1_get_obmc_mask_flipped(int length) {
    1746             :   switch (length) {
    1747             :     case 1: return obmc_mask_flip1;
    1748             :     case 2: return obmc_mask_flip2;
    1749             :     case 4: return obmc_mask_flip4;
    1750             :     case 8: return obmc_mask_flip8;
    1751             :     case 16: return obmc_mask_flip16;
    1752             :     case 32: return obmc_mask_flip32;
    1753             : #if CONFIG_EXT_PARTITION
    1754             :     case 64: return obmc_mask_flip64;
    1755             : #endif  // CONFIG_EXT_PARTITION
    1756             :     default: assert(0); return NULL;
    1757             :   }
    1758             : }
    1759             : #endif  // CONFIG_NCOBMC
    1760             : 
    1761           0 : void av1_count_overlappable_neighbors(const AV1_COMMON *cm, MACROBLOCKD *xd,
    1762             :                                       int mi_row, int mi_col) {
    1763             :   int i, mi_step;
    1764           0 :   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
    1765             : 
    1766           0 :   xd->mi[0]->mbmi.overlappable_neighbors[0] = 0;
    1767           0 :   xd->mi[0]->mbmi.overlappable_neighbors[1] = 0;
    1768             : 
    1769           0 :   if (!is_motion_variation_allowed_bsize(mbmi->sb_type)) return;
    1770             : 
    1771           0 :   if (xd->up_available) {
    1772           0 :     const int ilimit = AOMMIN(xd->n8_w, cm->mi_cols - mi_col);
    1773           0 :     for (i = 0; i < ilimit; i += mi_step) {
    1774           0 :       int mi_row_offset = -1;
    1775           0 :       int mi_col_offset = i;
    1776           0 :       MODE_INFO *above_mi =
    1777           0 :           xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
    1778           0 :       MB_MODE_INFO *above_mbmi = &above_mi->mbmi;
    1779             : #if CONFIG_CHROMA_SUB8X8
    1780           0 :       if (above_mbmi->sb_type < BLOCK_8X8) {
    1781           0 :         ++mi_col_offset;
    1782           0 :         above_mbmi =
    1783           0 :             &xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
    1784             :       }
    1785             : #endif
    1786           0 :       BLOCK_SIZE above_bsize = AOMMAX(above_mbmi->sb_type, BLOCK_8X8);
    1787           0 :       mi_step = AOMMIN(xd->n8_w, mi_size_wide[above_bsize]);
    1788             : 
    1789           0 :       if (is_neighbor_overlappable(above_mbmi))
    1790           0 :         xd->mi[0]->mbmi.overlappable_neighbors[0]++;
    1791             :     }
    1792             :   }
    1793             : 
    1794           0 :   if (xd->left_available) {
    1795           0 :     const int ilimit = AOMMIN(xd->n8_h, cm->mi_rows - mi_row);
    1796           0 :     for (i = 0; i < ilimit; i += mi_step) {
    1797           0 :       int mi_row_offset = i;
    1798           0 :       int mi_col_offset = -1;
    1799           0 :       MODE_INFO *left_mi =
    1800           0 :           xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
    1801           0 :       MB_MODE_INFO *left_mbmi = &left_mi->mbmi;
    1802             : 
    1803             : #if CONFIG_CHROMA_SUB8X8
    1804           0 :       if (left_mbmi->sb_type < BLOCK_8X8) {
    1805           0 :         ++mi_row_offset;
    1806           0 :         left_mbmi =
    1807           0 :             &xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
    1808             :       }
    1809             : #endif
    1810           0 :       BLOCK_SIZE left_bsize = AOMMAX(left_mbmi->sb_type, BLOCK_8X8);
    1811           0 :       mi_step = AOMMIN(xd->n8_h, mi_size_high[left_bsize]);
    1812             : 
    1813           0 :       if (is_neighbor_overlappable(left_mbmi))
    1814           0 :         xd->mi[0]->mbmi.overlappable_neighbors[1]++;
    1815             :     }
    1816             :   }
    1817             : }
    1818             : 
    1819             : // HW does not support < 4x4 prediction. To limit the bandwidth requirement, for
    1820             : // small blocks, only blend with neighbors from one side. If block-size of
    1821             : // current plane is 4x4 or 8x4, the above neighbor (dir = 0) will be skipped. If
    1822             : // it is 4x8, the left neighbor (dir = 1) will be skipped.
    1823             : #define DISABLE_CHROMA_U8X8_OBMC 0  // 0: one-sided obmc; 1: disable
    1824             : 
    1825           0 : int skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize, const struct macroblockd_plane *pd,
    1826             :                            int dir) {
    1827           0 :   assert(is_motion_variation_allowed_bsize(bsize));
    1828             : 
    1829           0 :   BLOCK_SIZE bsize_plane =
    1830           0 :       ss_size_lookup[bsize][pd->subsampling_x][pd->subsampling_y];
    1831             : #if CONFIG_CB4X4
    1832           0 :   if (bsize_plane < BLOCK_4X4) return 1;
    1833             : #endif
    1834           0 :   switch (bsize_plane) {
    1835             : #if DISABLE_CHROMA_U8X8_OBMC
    1836             :     case BLOCK_4X4:
    1837             :     case BLOCK_8X4:
    1838             :     case BLOCK_4X8: return 1; break;
    1839             : #else
    1840             :     case BLOCK_4X4:
    1841             :     case BLOCK_8X4:
    1842           0 :     case BLOCK_4X8: return dir == 0; break;
    1843             : #endif
    1844           0 :     default: return 0;
    1845             :   }
    1846             : }
    1847             : 
    1848             : // This function combines motion compensated predictions that is generated by
    1849             : // top/left neighboring blocks' inter predictors with the regular inter
    1850             : // prediction. We assume the original prediction (bmc) is stored in
    1851             : // xd->plane[].dst.buf
    1852           0 : void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd,
    1853             :                                      int mi_row, int mi_col,
    1854             :                                      uint8_t *above[MAX_MB_PLANE],
    1855             :                                      int above_stride[MAX_MB_PLANE],
    1856             :                                      uint8_t *left[MAX_MB_PLANE],
    1857             :                                      int left_stride[MAX_MB_PLANE]) {
    1858           0 :   const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
    1859             :   int plane, i;
    1860             : #if CONFIG_HIGHBITDEPTH
    1861           0 :   const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
    1862             : #endif  // CONFIG_HIGHBITDEPTH
    1863             : 
    1864             :   // handle above row
    1865           0 :   if (xd->up_available) {
    1866           0 :     const int overlap = num_4x4_blocks_high_lookup[bsize] * 2;
    1867           0 :     const int miw = AOMMIN(xd->n8_w, cm->mi_cols - mi_col);
    1868           0 :     const int mi_row_offset = -1;
    1869           0 :     const int neighbor_limit = max_neighbor_obmc[b_width_log2_lookup[bsize]];
    1870           0 :     int neighbor_count = 0;
    1871             : 
    1872           0 :     assert(miw > 0);
    1873             : 
    1874           0 :     i = 0;
    1875             :     do {  // for each mi in the above row
    1876           0 :       int mi_col_offset = i;
    1877           0 :       MB_MODE_INFO *above_mbmi =
    1878           0 :           &xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
    1879             : #if CONFIG_CHROMA_SUB8X8
    1880           0 :       if (above_mbmi->sb_type < BLOCK_8X8) {
    1881           0 :         ++mi_col_offset;
    1882           0 :         above_mbmi =
    1883           0 :             &xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
    1884             :       }
    1885             : #endif
    1886             : 
    1887           0 :       const BLOCK_SIZE a_bsize = AOMMAX(BLOCK_8X8, above_mbmi->sb_type);
    1888           0 :       const int mi_step = AOMMIN(xd->n8_w, mi_size_wide[a_bsize]);
    1889             : 
    1890           0 :       if (is_neighbor_overlappable(above_mbmi)) {
    1891           0 :         neighbor_count++;
    1892           0 :         if (neighbor_count > neighbor_limit) break;
    1893           0 :         for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
    1894           0 :           const struct macroblockd_plane *pd = &xd->plane[plane];
    1895           0 :           const int bw = (mi_step * MI_SIZE) >> pd->subsampling_x;
    1896           0 :           const int bh = overlap >> pd->subsampling_y;
    1897             : 
    1898           0 :           if (skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue;
    1899             : 
    1900           0 :           const int dst_stride = pd->dst.stride;
    1901           0 :           uint8_t *const dst = &pd->dst.buf[(i * MI_SIZE) >> pd->subsampling_x];
    1902           0 :           const int tmp_stride = above_stride[plane];
    1903           0 :           const uint8_t *const tmp =
    1904           0 :               &above[plane][(i * MI_SIZE) >> pd->subsampling_x];
    1905           0 :           const uint8_t *const mask = av1_get_obmc_mask(bh);
    1906             : 
    1907             : #if CONFIG_HIGHBITDEPTH
    1908           0 :           if (is_hbd)
    1909           0 :             aom_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp,
    1910             :                                        tmp_stride, mask, bh, bw, xd->bd);
    1911             :           else
    1912             : #endif  // CONFIG_HIGHBITDEPTH
    1913           0 :             aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp,
    1914             :                                 tmp_stride, mask, bh, bw);
    1915             :         }
    1916             :       }
    1917           0 :       i += mi_step;
    1918           0 :     } while (i < miw);
    1919             :   }
    1920             : 
    1921             :   // handle left column
    1922           0 :   if (xd->left_available) {
    1923           0 :     const int overlap = num_4x4_blocks_wide_lookup[bsize] * 2;
    1924           0 :     const int mih = AOMMIN(xd->n8_h, cm->mi_rows - mi_row);
    1925           0 :     const int mi_col_offset = -1;
    1926           0 :     const int neighbor_limit = max_neighbor_obmc[b_height_log2_lookup[bsize]];
    1927           0 :     int neighbor_count = 0;
    1928             : 
    1929           0 :     assert(mih > 0);
    1930             : 
    1931           0 :     i = 0;
    1932             :     do {  // for each mi in the left column
    1933           0 :       int mi_row_offset = i;
    1934           0 :       MB_MODE_INFO *left_mbmi =
    1935           0 :           &xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
    1936             : #if CONFIG_CHROMA_SUB8X8
    1937           0 :       if (left_mbmi->sb_type < BLOCK_8X8) {
    1938           0 :         ++mi_row_offset;
    1939           0 :         left_mbmi =
    1940           0 :             &xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
    1941             :       }
    1942             : #endif
    1943             : 
    1944           0 :       const BLOCK_SIZE l_bsize = AOMMAX(BLOCK_8X8, left_mbmi->sb_type);
    1945           0 :       const int mi_step = AOMMIN(xd->n8_h, mi_size_high[l_bsize]);
    1946             : 
    1947           0 :       if (is_neighbor_overlappable(left_mbmi)) {
    1948           0 :         neighbor_count++;
    1949           0 :         if (neighbor_count > neighbor_limit) break;
    1950           0 :         for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
    1951           0 :           const struct macroblockd_plane *pd = &xd->plane[plane];
    1952           0 :           const int bw = overlap >> pd->subsampling_x;
    1953           0 :           const int bh = (mi_step * MI_SIZE) >> pd->subsampling_y;
    1954             : 
    1955           0 :           if (skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue;
    1956             : 
    1957           0 :           const int dst_stride = pd->dst.stride;
    1958           0 :           uint8_t *const dst =
    1959           0 :               &pd->dst.buf[(i * MI_SIZE * dst_stride) >> pd->subsampling_y];
    1960           0 :           const int tmp_stride = left_stride[plane];
    1961           0 :           const uint8_t *const tmp =
    1962           0 :               &left[plane][(i * MI_SIZE * tmp_stride) >> pd->subsampling_y];
    1963           0 :           const uint8_t *const mask = av1_get_obmc_mask(bw);
    1964             : 
    1965             : #if CONFIG_HIGHBITDEPTH
    1966           0 :           if (is_hbd)
    1967           0 :             aom_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp,
    1968             :                                        tmp_stride, mask, bh, bw, xd->bd);
    1969             :           else
    1970             : #endif  // CONFIG_HIGHBITDEPTH
    1971           0 :             aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp,
    1972             :                                 tmp_stride, mask, bh, bw);
    1973             :         }
    1974             :       }
    1975           0 :       i += mi_step;
    1976           0 :     } while (i < mih);
    1977             :   }
    1978           0 : }
    1979             : 
    1980           0 : void modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi) {
    1981             : #if CONFIG_EXT_INTER
    1982           0 :   if (is_interintra_pred(mbmi)) {
    1983           0 :     mbmi->ref_frame[1] = NONE_FRAME;
    1984           0 :   } else if (has_second_ref(mbmi) &&
    1985           0 :              is_masked_compound_type(mbmi->interinter_compound_type)) {
    1986           0 :     mbmi->interinter_compound_type = COMPOUND_AVERAGE;
    1987           0 :     mbmi->ref_frame[1] = NONE_FRAME;
    1988             :   }
    1989             : #endif  // CONFIG_EXT_INTER
    1990           0 :   if (has_second_ref(mbmi)) mbmi->ref_frame[1] = NONE_FRAME;
    1991           0 :   return;
    1992             : }
    1993             : 
    1994           0 : void av1_build_prediction_by_above_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
    1995             :                                          int mi_row, int mi_col,
    1996             :                                          uint8_t *tmp_buf[MAX_MB_PLANE],
    1997             :                                          int tmp_width[MAX_MB_PLANE],
    1998             :                                          int tmp_height[MAX_MB_PLANE],
    1999             :                                          int tmp_stride[MAX_MB_PLANE]) {
    2000           0 :   const TileInfo *const tile = &xd->tile;
    2001           0 :   BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
    2002             :   int i, j, mi_step, ref;
    2003           0 :   const int ilimit = AOMMIN(xd->n8_w, cm->mi_cols - mi_col);
    2004           0 :   int mb_to_right_edge_base = xd->mb_to_right_edge;
    2005           0 :   const int neighbor_limit = max_neighbor_obmc[b_width_log2_lookup[bsize]];
    2006           0 :   int neighbor_count = 0;
    2007             : 
    2008           0 :   if (mi_row <= tile->mi_row_start) return;
    2009             : 
    2010           0 :   xd->mb_to_bottom_edge += xd->n8_h * 32;
    2011           0 :   for (i = 0; i < ilimit; i += mi_step) {
    2012           0 :     int mi_row_offset = -1;
    2013           0 :     int mi_col_offset = i;
    2014             :     int mi_x, mi_y, bw, bh;
    2015           0 :     MODE_INFO *above_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
    2016           0 :     MB_MODE_INFO *above_mbmi = &above_mi->mbmi;
    2017             : 
    2018             : #if CONFIG_CHROMA_SUB8X8
    2019           0 :     if (above_mbmi->sb_type < BLOCK_8X8) {
    2020           0 :       ++mi_col_offset;
    2021           0 :       above_mbmi = &xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
    2022             :     }
    2023             : #endif
    2024             : 
    2025           0 :     const BLOCK_SIZE a_bsize = AOMMAX(BLOCK_8X8, above_mbmi->sb_type);
    2026             :     MB_MODE_INFO backup_mbmi;
    2027             : 
    2028           0 :     mi_step = AOMMIN(xd->n8_w, mi_size_wide[a_bsize]);
    2029             : 
    2030           0 :     if (!is_neighbor_overlappable(above_mbmi)) continue;
    2031             : 
    2032           0 :     neighbor_count++;
    2033           0 :     if (neighbor_count > neighbor_limit) break;
    2034             : 
    2035           0 :     backup_mbmi = *above_mbmi;
    2036           0 :     modify_neighbor_predictor_for_obmc(above_mbmi);
    2037             : 
    2038           0 :     for (j = 0; j < MAX_MB_PLANE; ++j) {
    2039           0 :       struct macroblockd_plane *const pd = &xd->plane[j];
    2040           0 :       setup_pred_plane(&pd->dst, a_bsize, tmp_buf[j], tmp_width[j],
    2041           0 :                        tmp_height[j], tmp_stride[j], 0, i, NULL,
    2042             :                        pd->subsampling_x, pd->subsampling_y);
    2043             :     }
    2044           0 :     for (ref = 0; ref < 1 + has_second_ref(above_mbmi); ++ref) {
    2045           0 :       const MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref];
    2046           0 :       const RefBuffer *const ref_buf = &cm->frame_refs[frame - LAST_FRAME];
    2047             : 
    2048           0 :       xd->block_refs[ref] = ref_buf;
    2049           0 :       if ((!av1_is_valid_scale(&ref_buf->sf)))
    2050           0 :         aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
    2051             :                            "Reference frame has invalid dimensions");
    2052           0 :       av1_setup_pre_planes(xd, ref, ref_buf->buf, mi_row, mi_col + i,
    2053             :                            &ref_buf->sf);
    2054             :     }
    2055             : 
    2056           0 :     xd->mb_to_left_edge = -(((mi_col + i) * MI_SIZE) * 8);
    2057           0 :     xd->mb_to_right_edge =
    2058           0 :         mb_to_right_edge_base + (xd->n8_w - i - mi_step) * 64;
    2059           0 :     mi_x = (mi_col + i) << MI_SIZE_LOG2;
    2060           0 :     mi_y = mi_row << MI_SIZE_LOG2;
    2061             : 
    2062           0 :     for (j = 0; j < MAX_MB_PLANE; ++j) {
    2063           0 :       const struct macroblockd_plane *pd = &xd->plane[j];
    2064           0 :       bw = (mi_step * MI_SIZE) >> pd->subsampling_x;
    2065           0 :       bh = AOMMAX((num_4x4_blocks_high_lookup[bsize] * 2) >> pd->subsampling_y,
    2066             :                   4);
    2067             : 
    2068           0 :       if (skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue;
    2069           0 :       build_inter_predictors(cm, xd, j, mi_col_offset, mi_row_offset, 0, bw, bh,
    2070             :                              0, 0, bw, bh,
    2071             : #if CONFIG_SUPERTX && CONFIG_EXT_INTER
    2072             :                              0, 0,
    2073             : #endif  // CONFIG_SUPERTX && CONFIG_EXT_INTER
    2074             :                              mi_x, mi_y);
    2075             :     }
    2076           0 :     *above_mbmi = backup_mbmi;
    2077             :   }
    2078           0 :   xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
    2079           0 :   xd->mb_to_right_edge = mb_to_right_edge_base;
    2080           0 :   xd->mb_to_bottom_edge -= xd->n8_h * 32;
    2081             : }
    2082             : 
    2083           0 : void av1_build_prediction_by_left_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
    2084             :                                         int mi_row, int mi_col,
    2085             :                                         uint8_t *tmp_buf[MAX_MB_PLANE],
    2086             :                                         int tmp_width[MAX_MB_PLANE],
    2087             :                                         int tmp_height[MAX_MB_PLANE],
    2088             :                                         int tmp_stride[MAX_MB_PLANE]) {
    2089           0 :   const TileInfo *const tile = &xd->tile;
    2090           0 :   BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
    2091             :   int i, j, mi_step, ref;
    2092           0 :   const int ilimit = AOMMIN(xd->n8_h, cm->mi_rows - mi_row);
    2093           0 :   int mb_to_bottom_edge_base = xd->mb_to_bottom_edge;
    2094           0 :   const int neighbor_limit = max_neighbor_obmc[b_height_log2_lookup[bsize]];
    2095           0 :   int neighbor_count = 0;
    2096             : 
    2097           0 :   if (mi_col == 0 || (mi_col - 1 < tile->mi_col_start)) return;
    2098             : 
    2099           0 :   xd->mb_to_right_edge += xd->n8_w * 32;
    2100           0 :   for (i = 0; i < ilimit; i += mi_step) {
    2101           0 :     int mi_row_offset = i;
    2102           0 :     int mi_col_offset = -1;
    2103             :     int mi_x, mi_y, bw, bh;
    2104           0 :     MODE_INFO *left_mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
    2105           0 :     MB_MODE_INFO *left_mbmi = &left_mi->mbmi;
    2106             : 
    2107             : #if CONFIG_CHROMA_SUB8X8
    2108           0 :     if (left_mbmi->sb_type < BLOCK_8X8) {
    2109           0 :       ++mi_row_offset;
    2110           0 :       left_mbmi = &xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
    2111             :     }
    2112             : #endif
    2113             : 
    2114           0 :     const BLOCK_SIZE l_bsize = AOMMAX(left_mbmi->sb_type, BLOCK_8X8);
    2115             :     MB_MODE_INFO backup_mbmi;
    2116             : 
    2117           0 :     mi_step = AOMMIN(xd->n8_h, mi_size_high[l_bsize]);
    2118             : 
    2119           0 :     if (!is_neighbor_overlappable(left_mbmi)) continue;
    2120             : 
    2121           0 :     neighbor_count++;
    2122           0 :     if (neighbor_count > neighbor_limit) break;
    2123             : 
    2124           0 :     backup_mbmi = *left_mbmi;
    2125           0 :     modify_neighbor_predictor_for_obmc(left_mbmi);
    2126             : 
    2127           0 :     for (j = 0; j < MAX_MB_PLANE; ++j) {
    2128           0 :       struct macroblockd_plane *const pd = &xd->plane[j];
    2129           0 :       setup_pred_plane(&pd->dst, l_bsize, tmp_buf[j], tmp_width[j],
    2130           0 :                        tmp_height[j], tmp_stride[j], i, 0, NULL,
    2131             :                        pd->subsampling_x, pd->subsampling_y);
    2132             :     }
    2133           0 :     for (ref = 0; ref < 1 + has_second_ref(left_mbmi); ++ref) {
    2134           0 :       const MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref];
    2135           0 :       const RefBuffer *const ref_buf = &cm->frame_refs[frame - LAST_FRAME];
    2136             : 
    2137           0 :       xd->block_refs[ref] = ref_buf;
    2138           0 :       if ((!av1_is_valid_scale(&ref_buf->sf)))
    2139           0 :         aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
    2140             :                            "Reference frame has invalid dimensions");
    2141           0 :       av1_setup_pre_planes(xd, ref, ref_buf->buf, mi_row + i, mi_col,
    2142             :                            &ref_buf->sf);
    2143             :     }
    2144             : 
    2145           0 :     xd->mb_to_top_edge = -(((mi_row + i) * MI_SIZE) * 8);
    2146           0 :     xd->mb_to_bottom_edge =
    2147           0 :         mb_to_bottom_edge_base + (xd->n8_h - i - mi_step) * 64;
    2148           0 :     mi_x = mi_col << MI_SIZE_LOG2;
    2149           0 :     mi_y = (mi_row + i) << MI_SIZE_LOG2;
    2150             : 
    2151           0 :     for (j = 0; j < MAX_MB_PLANE; ++j) {
    2152           0 :       const struct macroblockd_plane *pd = &xd->plane[j];
    2153           0 :       bw = AOMMAX((num_4x4_blocks_wide_lookup[bsize] * 2) >> pd->subsampling_x,
    2154             :                   4);
    2155           0 :       bh = (mi_step << MI_SIZE_LOG2) >> pd->subsampling_y;
    2156             : 
    2157           0 :       if (skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue;
    2158           0 :       build_inter_predictors(cm, xd, j, mi_col_offset, mi_row_offset, 0, bw, bh,
    2159             :                              0, 0, bw, bh,
    2160             : #if CONFIG_SUPERTX && CONFIG_EXT_INTER
    2161             :                              0, 0,
    2162             : #endif  // CONFIG_SUPERTX && CONFIG_EXT_INTER
    2163             :                              mi_x, mi_y);
    2164             :     }
    2165           0 :     *left_mbmi = backup_mbmi;
    2166             :   }
    2167           0 :   xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
    2168           0 :   xd->mb_to_bottom_edge = mb_to_bottom_edge_base;
    2169           0 :   xd->mb_to_right_edge -= xd->n8_w * 32;
    2170             : }
    2171             : 
    2172           0 : void av1_build_obmc_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
    2173             :                                         int mi_row, int mi_col) {
    2174             : #if CONFIG_HIGHBITDEPTH
    2175             :   DECLARE_ALIGNED(16, uint8_t, tmp_buf1[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
    2176             :   DECLARE_ALIGNED(16, uint8_t, tmp_buf2[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
    2177             : #else
    2178             :   DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * MAX_SB_SQUARE]);
    2179             :   DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * MAX_SB_SQUARE]);
    2180             : #endif  // CONFIG_HIGHBITDEPTH
    2181             :   uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE];
    2182           0 :   int dst_stride1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
    2183           0 :   int dst_stride2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
    2184           0 :   int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
    2185           0 :   int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
    2186           0 :   int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
    2187           0 :   int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
    2188             : 
    2189             : #if CONFIG_HIGHBITDEPTH
    2190           0 :   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    2191           0 :     int len = sizeof(uint16_t);
    2192           0 :     dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1);
    2193           0 :     dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * len);
    2194           0 :     dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * 2 * len);
    2195           0 :     dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2);
    2196           0 :     dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * len);
    2197           0 :     dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * 2 * len);
    2198             :   } else {
    2199             : #endif  // CONFIG_HIGHBITDEPTH
    2200           0 :     dst_buf1[0] = tmp_buf1;
    2201           0 :     dst_buf1[1] = tmp_buf1 + MAX_SB_SQUARE;
    2202           0 :     dst_buf1[2] = tmp_buf1 + MAX_SB_SQUARE * 2;
    2203           0 :     dst_buf2[0] = tmp_buf2;
    2204           0 :     dst_buf2[1] = tmp_buf2 + MAX_SB_SQUARE;
    2205           0 :     dst_buf2[2] = tmp_buf2 + MAX_SB_SQUARE * 2;
    2206             : #if CONFIG_HIGHBITDEPTH
    2207             :   }
    2208             : #endif  // CONFIG_HIGHBITDEPTH
    2209           0 :   av1_build_prediction_by_above_preds(cm, xd, mi_row, mi_col, dst_buf1,
    2210             :                                       dst_width1, dst_height1, dst_stride1);
    2211           0 :   av1_build_prediction_by_left_preds(cm, xd, mi_row, mi_col, dst_buf2,
    2212             :                                      dst_width2, dst_height2, dst_stride2);
    2213           0 :   av1_setup_dst_planes(xd->plane, xd->mi[0]->mbmi.sb_type,
    2214           0 :                        get_frame_new_buffer(cm), mi_row, mi_col);
    2215           0 :   av1_build_obmc_inter_prediction(cm, xd, mi_row, mi_col, dst_buf1, dst_stride1,
    2216             :                                   dst_buf2, dst_stride2);
    2217           0 : }
    2218             : 
    2219             : #if CONFIG_NCOBMC
    2220             : void av1_build_prediction_by_bottom_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
    2221             :                                           int mi_row, int mi_col,
    2222             :                                           uint8_t *tmp_buf[MAX_MB_PLANE],
    2223             :                                           int tmp_width[MAX_MB_PLANE],
    2224             :                                           int tmp_height[MAX_MB_PLANE],
    2225             :                                           int tmp_stride[MAX_MB_PLANE]) {
    2226             :   const TileInfo *const tile = &xd->tile;
    2227             :   BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
    2228             :   int i, j, mi_step, ref;
    2229             :   const int ilimit = AOMMIN(xd->n8_w, cm->mi_cols - mi_col);
    2230             :   int mb_to_right_edge_base = xd->mb_to_right_edge;
    2231             : 
    2232             :   if (mi_row + xd->n8_h >= tile->mi_row_end ||
    2233             :       (mi_row + xd->n8_h) % MI_SIZE == 0 || (mi_row + xd->n8_h) >= cm->mi_rows)
    2234             :     return;
    2235             :   assert(bsize >= BLOCK_8X8);
    2236             : 
    2237             :   xd->mb_to_top_edge -= xd->n8_h * 32;
    2238             :   for (i = 0; i < ilimit; i += mi_step) {
    2239             :     int mi_row_offset = xd->n8_h;
    2240             :     int mi_col_offset = i;
    2241             :     int mi_x, mi_y, bw, bh;
    2242             :     MODE_INFO *mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
    2243             :     MB_MODE_INFO *mbmi = &mi->mbmi;
    2244             : #if CONFIG_EXT_INTER
    2245             :     MB_MODE_INFO backup_mbmi;
    2246             : #endif  // CONFIG_EXT_INTER
    2247             : 
    2248             :     mi_step = AOMMIN(xd->n8_w, mi_size_wide[mbmi->sb_type]);
    2249             : 
    2250             :     if (!is_neighbor_overlappable(mbmi)) continue;
    2251             : 
    2252             : #if CONFIG_EXT_INTER
    2253             :     backup_mbmi = *mbmi;
    2254             :     modify_neighbor_predictor_for_obmc(mbmi);
    2255             : #endif  // CONFIG_EXT_INTER
    2256             : 
    2257             :     for (j = 0; j < MAX_MB_PLANE; ++j) {
    2258             :       struct macroblockd_plane *const pd = &xd->plane[j];
    2259             :       setup_pred_plane(&pd->dst, AOMMAX(mbmi->sb_type, BLOCK_8X8), tmp_buf[j],
    2260             :                        tmp_width[j], tmp_height[j], tmp_stride[j],
    2261             :                        (xd->n8_h >> 1), i, NULL, pd->subsampling_x,
    2262             :                        pd->subsampling_y);
    2263             :     }
    2264             :     for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
    2265             :       const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
    2266             :       const RefBuffer *const ref_buf = &cm->frame_refs[frame - LAST_FRAME];
    2267             : 
    2268             :       xd->block_refs[ref] = ref_buf;
    2269             :       if ((!av1_is_valid_scale(&ref_buf->sf)))
    2270             :         aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
    2271             :                            "Reference frame has invalid dimensions");
    2272             :       av1_setup_pre_planes(xd, ref, ref_buf->buf, mi_row + (xd->n8_h >> 1),
    2273             :                            mi_col + i, &ref_buf->sf);
    2274             :     }
    2275             : 
    2276             :     xd->mb_to_left_edge = -(((mi_col + i) * MI_SIZE) * 8);
    2277             :     xd->mb_to_right_edge =
    2278             :         mb_to_right_edge_base + (xd->n8_w - i - mi_step) * 64;
    2279             :     mi_x = (mi_col + i) << MI_SIZE_LOG2;
    2280             :     mi_y = (mi_row << MI_SIZE_LOG2) + xd->n8_h * 4;
    2281             : 
    2282             :     for (j = 0; j < MAX_MB_PLANE; ++j) {
    2283             :       const struct macroblockd_plane *pd = &xd->plane[j];
    2284             :       bw = (mi_step << MI_SIZE_LOG2) >> pd->subsampling_x;
    2285             :       bh = (num_4x4_blocks_high_lookup[bsize] << 1) >> pd->subsampling_y;
    2286             : 
    2287             :       if (mbmi->sb_type < BLOCK_8X8 && !CONFIG_CB4X4) {
    2288             :         const PARTITION_TYPE bp = BLOCK_8X8 - mbmi->sb_type;
    2289             :         const int have_vsplit = bp != PARTITION_HORZ;
    2290             :         const int have_hsplit = bp != PARTITION_VERT;
    2291             :         const int num_4x4_w = 2 >> (!have_vsplit);
    2292             :         const int num_4x4_h = 2 >> (!have_hsplit);
    2293             :         const int pw = 8 >> (have_vsplit + pd->subsampling_x);
    2294             :         int x, y;
    2295             : 
    2296             :         for (y = 0; y < num_4x4_h; ++y)
    2297             :           for (x = 0; x < num_4x4_w; ++x) {
    2298             :             if ((bp == PARTITION_HORZ || bp == PARTITION_SPLIT) && y != 0)
    2299             :               continue;
    2300             : 
    2301             :             build_inter_predictors(
    2302             :                 cm, xd, j, mi_col_offset, mi_row_offset, y * 2 + x, bw, bh,
    2303             :                 (4 * x) >> pd->subsampling_x,
    2304             :                 xd->n8_h == 1 ? (4 >> pd->subsampling_y) : 0, pw, bh,
    2305             : #if CONFIG_SUPERTX && CONFIG_EXT_INTER
    2306             :                 0, 0,
    2307             : #endif  // CONFIG_SUPERTX && CONFIG_EXT_INTER
    2308             :                 mi_x, mi_y);
    2309             :           }
    2310             :       } else {
    2311             :         build_inter_predictors(
    2312             :             cm, xd, j, mi_col_offset, mi_row_offset, 0, bw, bh, 0,
    2313             :             xd->n8_h == 1 ? (4 >> pd->subsampling_y) : 0, bw, bh,
    2314             : #if CONFIG_SUPERTX && CONFIG_EXT_INTER
    2315             :             0, 0,
    2316             : #endif  // CONFIG_SUPERTX && CONFIG_EXT_INTER
    2317             :             mi_x, mi_y);
    2318             :       }
    2319             :     }
    2320             : #if CONFIG_EXT_INTER
    2321             :     *mbmi = backup_mbmi;
    2322             : #endif  // CONFIG_EXT_INTER
    2323             :   }
    2324             :   xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
    2325             :   xd->mb_to_right_edge = mb_to_right_edge_base;
    2326             :   xd->mb_to_top_edge += xd->n8_h * 32;
    2327             : }
    2328             : 
    2329             : void av1_build_prediction_by_right_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
    2330             :                                          int mi_row, int mi_col,
    2331             :                                          uint8_t *tmp_buf[MAX_MB_PLANE],
    2332             :                                          int tmp_width[MAX_MB_PLANE],
    2333             :                                          int tmp_height[MAX_MB_PLANE],
    2334             :                                          const int tmp_stride[MAX_MB_PLANE]) {
    2335             :   const TileInfo *const tile = &xd->tile;
    2336             :   BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
    2337             :   int i, j, mi_step, ref;
    2338             :   const int ilimit = AOMMIN(xd->n8_h, cm->mi_rows - mi_row);
    2339             :   int mb_to_bottom_edge_base = xd->mb_to_bottom_edge;
    2340             : 
    2341             :   if (mi_col + xd->n8_w >= tile->mi_col_end ||
    2342             :       (mi_col + xd->n8_w) % MI_SIZE == 0 || (mi_col + xd->n8_w) >= cm->mi_cols)
    2343             :     return;
    2344             : 
    2345             :   xd->mb_to_left_edge -= xd->n8_w * 32;
    2346             :   for (i = 0; i < ilimit; i += mi_step) {
    2347             :     int mi_row_offset = i;
    2348             :     int mi_col_offset = xd->n8_w;
    2349             :     int mi_x, mi_y, bw, bh;
    2350             :     MODE_INFO *mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
    2351             :     MB_MODE_INFO *mbmi = &mi->mbmi;
    2352             : #if CONFIG_EXT_INTER
    2353             :     MB_MODE_INFO backup_mbmi;
    2354             : #endif  // CONFIG_EXT_INTER
    2355             : 
    2356             :     mi_step = AOMMIN(xd->n8_h, mi_size_high[mbmi->sb_type]);
    2357             : 
    2358             :     if (!is_neighbor_overlappable(mbmi)) continue;
    2359             : 
    2360             : #if CONFIG_EXT_INTER
    2361             :     backup_mbmi = *mbmi;
    2362             :     modify_neighbor_predictor_for_obmc(mbmi);
    2363             : #endif  // CONFIG_EXT_INTER
    2364             : 
    2365             :     for (j = 0; j < MAX_MB_PLANE; ++j) {
    2366             :       struct macroblockd_plane *const pd = &xd->plane[j];
    2367             :       setup_pred_plane(&pd->dst, AOMMAX(mbmi->sb_type, BLOCK_8X8), tmp_buf[j],
    2368             :                        tmp_width[j], tmp_height[j], tmp_stride[j], i,
    2369             :                        xd->n8_w >> 1, NULL, pd->subsampling_x,
    2370             :                        pd->subsampling_y);
    2371             :     }
    2372             :     for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
    2373             :       const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
    2374             :       const RefBuffer *const ref_buf = &cm->frame_refs[frame - LAST_FRAME];
    2375             : 
    2376             :       xd->block_refs[ref] = ref_buf;
    2377             :       if ((!av1_is_valid_scale(&ref_buf->sf)))
    2378             :         aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
    2379             :                            "Reference frame has invalid dimensions");
    2380             :       av1_setup_pre_planes(xd, ref, ref_buf->buf, mi_row + i,
    2381             :                            mi_col + (xd->n8_w >> 1), &ref_buf->sf);
    2382             :     }
    2383             : 
    2384             :     xd->mb_to_top_edge = -(((mi_row + i) * MI_SIZE) * 8);
    2385             :     xd->mb_to_bottom_edge =
    2386             :         mb_to_bottom_edge_base + (xd->n8_h - i - mi_step) * 64;
    2387             :     mi_x = (mi_col << MI_SIZE_LOG2) + xd->n8_w * 4;
    2388             :     mi_y = (mi_row + i) << MI_SIZE_LOG2;
    2389             : 
    2390             :     for (j = 0; j < MAX_MB_PLANE; ++j) {
    2391             :       const struct macroblockd_plane *pd = &xd->plane[j];
    2392             :       bw = (num_4x4_blocks_wide_lookup[bsize] << 1) >> pd->subsampling_x;
    2393             :       bh = (mi_step << MI_SIZE_LOG2) >> pd->subsampling_y;
    2394             : 
    2395             :       if (mbmi->sb_type < BLOCK_8X8 && !CONFIG_CB4X4) {
    2396             :         const PARTITION_TYPE bp = BLOCK_8X8 - mbmi->sb_type;
    2397             :         const int have_vsplit = bp != PARTITION_HORZ;
    2398             :         const int have_hsplit = bp != PARTITION_VERT;
    2399             :         const int num_4x4_w = 2 >> (!have_vsplit);
    2400             :         const int num_4x4_h = 2 >> (!have_hsplit);
    2401             :         const int ph = 8 >> (have_hsplit + pd->subsampling_y);
    2402             :         int x, y;
    2403             : 
    2404             :         for (y = 0; y < num_4x4_h; ++y)
    2405             :           for (x = 0; x < num_4x4_w; ++x) {
    2406             :             if ((bp == PARTITION_VERT || bp == PARTITION_SPLIT) && x != 0)
    2407             :               continue;
    2408             : 
    2409             :             build_inter_predictors(cm, xd, j, mi_col_offset, mi_row_offset,
    2410             :                                    y * 2 + x, bw, bh,
    2411             :                                    xd->n8_w == 1 ? 4 >> pd->subsampling_x : 0,
    2412             :                                    (4 * y) >> pd->subsampling_y, bw, ph,
    2413             : #if CONFIG_SUPERTX && CONFIG_EXT_INTER
    2414             :                                    0, 0,
    2415             : #endif  // CONFIG_SUPERTX && CONFIG_EXT_INTER
    2416             :                                    mi_x, mi_y);
    2417             :           }
    2418             :       } else {
    2419             :         build_inter_predictors(cm, xd, j, mi_col_offset, mi_row_offset, 0, bw,
    2420             :                                bh, xd->n8_w == 1 ? 4 >> pd->subsampling_x : 0,
    2421             :                                0, bw, bh,
    2422             : #if CONFIG_SUPERTX && CONFIG_EXT_INTER
    2423             :                                0, 0,
    2424             : #endif  // CONFIG_SUPERTX && CONFIG_EXT_INTER
    2425             :                                mi_x, mi_y);
    2426             :       }
    2427             :     }
    2428             : #if CONFIG_EXT_INTER
    2429             :     *mbmi = backup_mbmi;
    2430             : #endif  // CONFIG_EXT_INTER
    2431             :   }
    2432             :   xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
    2433             :   xd->mb_to_bottom_edge = mb_to_bottom_edge_base;
    2434             :   xd->mb_to_left_edge += xd->n8_w * 32;
    2435             : }
    2436             : 
    2437             : // This function combines motion compensated predictions that is generated by
    2438             : // bottom/right neighboring blocks' inter predictors with prediction in dst
    2439             : // buffer.
    2440             : void av1_merge_dst_bottom_right_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
    2441             :                                       int mi_row, int mi_col,
    2442             :                                       uint8_t *bottom[MAX_MB_PLANE],
    2443             :                                       const int bottom_stride[MAX_MB_PLANE],
    2444             :                                       uint8_t *right[MAX_MB_PLANE],
    2445             :                                       const int right_stride[MAX_MB_PLANE]) {
    2446             :   const TileInfo *const tile = &xd->tile;
    2447             :   BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
    2448             :   int plane, i, mi_step;
    2449             :   const int bottom_available = mi_row + xd->n8_h < tile->mi_row_end &&
    2450             :                                (mi_row + xd->n8_h) % MI_SIZE != 0 &&
    2451             :                                (mi_row + xd->n8_h) < cm->mi_rows;
    2452             : #if CONFIG_HIGHBITDEPTH
    2453             :   int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
    2454             : #endif  // CONFIG_HIGHBITDEPTH
    2455             : 
    2456             :   // handle bottom row
    2457             :   for (i = 0; bottom_available && i < AOMMIN(xd->n8_w, cm->mi_cols - mi_col);
    2458             :        i += mi_step) {
    2459             :     int mi_row_offset = xd->n8_h;
    2460             :     int mi_col_offset = i;
    2461             :     MODE_INFO *mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
    2462             :     MB_MODE_INFO *mbmi = &mi->mbmi;
    2463             :     int overlap;
    2464             : 
    2465             :     mi_step = AOMMIN(xd->n8_w, mi_size_wide[mbmi->sb_type]);
    2466             : 
    2467             :     if (!is_neighbor_overlappable(mbmi)) continue;
    2468             : 
    2469             :     overlap = num_4x4_blocks_high_lookup[bsize] << 1;
    2470             : 
    2471             :     for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
    2472             :       const struct macroblockd_plane *pd = &xd->plane[plane];
    2473             :       const int bw = (mi_step * MI_SIZE) >> pd->subsampling_x;
    2474             :       const int bh = overlap >> pd->subsampling_y;
    2475             :       const int dst_stride = pd->dst.stride;
    2476             :       uint8_t *dst =
    2477             :           &pd->dst.buf[((i * MI_SIZE) >> pd->subsampling_x) +
    2478             :                        (((xd->n8_h * MI_SIZE - overlap) * dst_stride) >>
    2479             :                         pd->subsampling_y)];
    2480             :       const int tmp_stride = bottom_stride[plane];
    2481             :       const uint8_t *const tmp =
    2482             :           &bottom[plane][((i * MI_SIZE) >> pd->subsampling_x) +
    2483             :                          (((xd->n8_h * MI_SIZE - overlap) * tmp_stride) >>
    2484             :                           pd->subsampling_y)];
    2485             :       const uint8_t *const mask = av1_get_obmc_mask_flipped(bh);
    2486             : 
    2487             : #if CONFIG_HIGHBITDEPTH
    2488             :       if (is_hbd)
    2489             :         aom_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp,
    2490             :                                    tmp_stride, mask, bh, bw, xd->bd);
    2491             :       else
    2492             : #endif  // CONFIG_HIGHBITDEPTH
    2493             :         aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
    2494             :                             mask, bh, bw);
    2495             :     }
    2496             :   }  // each mi in the bottom row
    2497             : 
    2498             :   // handle right column
    2499             :   if (mi_col + xd->n8_w >= tile->mi_col_end ||
    2500             :       (mi_col + xd->n8_w) % MI_SIZE == 0 || (mi_col + xd->n8_w) >= cm->mi_cols)
    2501             :     return;
    2502             : 
    2503             :   for (i = 0; i < AOMMIN(xd->n8_h, cm->mi_rows - mi_row); i += mi_step) {
    2504             :     int mi_row_offset = i;
    2505             :     int mi_col_offset = xd->n8_w;
    2506             :     int overlap;
    2507             :     MODE_INFO *mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
    2508             :     MB_MODE_INFO *mbmi = &mi->mbmi;
    2509             : 
    2510             :     mi_step = AOMMIN(xd->n8_h, mi_size_high[mbmi->sb_type]);
    2511             : 
    2512             :     if (!is_neighbor_overlappable(mbmi)) continue;
    2513             : 
    2514             :     overlap = num_4x4_blocks_wide_lookup[bsize] << 1;
    2515             : 
    2516             :     for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
    2517             :       const struct macroblockd_plane *pd = &xd->plane[plane];
    2518             :       const int bw = overlap >> pd->subsampling_x;
    2519             :       const int bh = (mi_step * MI_SIZE) >> pd->subsampling_y;
    2520             :       const int dst_stride = pd->dst.stride;
    2521             :       uint8_t *dst =
    2522             :           &pd->dst.buf[((i * MI_SIZE * dst_stride) >> pd->subsampling_y) +
    2523             :                        ((xd->n8_w * MI_SIZE - overlap) >> pd->subsampling_x)];
    2524             :       const int tmp_stride = right_stride[plane];
    2525             :       const uint8_t *const tmp =
    2526             :           &right[plane][((i * MI_SIZE * tmp_stride) >> pd->subsampling_y) +
    2527             :                         ((xd->n8_w * MI_SIZE - overlap) >> pd->subsampling_x)];
    2528             :       const uint8_t *const mask = av1_get_obmc_mask_flipped(bw);
    2529             : 
    2530             : #if CONFIG_HIGHBITDEPTH
    2531             :       if (is_hbd)
    2532             :         aom_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp,
    2533             :                                    tmp_stride, mask, bh, bw, xd->bd);
    2534             :       else
    2535             : #endif  // CONFIG_HIGHBITDEPTH
    2536             :         aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
    2537             :                             mask, bh, bw);
    2538             :     }
    2539             :   }  // each mi in the right column
    2540             : }
    2541             : 
    2542             : // This function generates 4 sided obmc. (1) Prediction blocks generated by
    2543             : // bottom and right motion vectors are calculated. (2) Combine them with the
    2544             : // original prediction block (which should be pre-stored in xd->plane[].dst.buf
    2545             : // before calling this function). The results is updated in xd->plane[].dst.buf
    2546             : // (3) Call causal obmc prediction function, which will generate left and above
    2547             : // preds, and then merge them and xd->plane[].dst.buf.
    2548             : void av1_build_ncobmc_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
    2549             :                                           int mi_row, int mi_col) {
    2550             : #if CONFIG_HIGHBITDEPTH
    2551             :   DECLARE_ALIGNED(16, uint8_t, tmp_buf1[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
    2552             :   DECLARE_ALIGNED(16, uint8_t, tmp_buf2[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
    2553             : #else
    2554             :   DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * MAX_SB_SQUARE]);
    2555             :   DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * MAX_SB_SQUARE]);
    2556             : #endif  // CONFIG_HIGHBITDEPTH
    2557             :   uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE];
    2558             :   int dst_stride1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
    2559             :   int dst_stride2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
    2560             :   int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
    2561             :   int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
    2562             :   int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
    2563             :   int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
    2564             : 
    2565             : #if CONFIG_HIGHBITDEPTH
    2566             :   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    2567             :     int len = sizeof(uint16_t);
    2568             :     dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1);
    2569             :     dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * len);
    2570             :     dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * 2 * len);
    2571             :     dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2);
    2572             :     dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * len);
    2573             :     dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * 2 * len);
    2574             :   } else {
    2575             : #endif  // CONFIG_HIGHBITDEPTH
    2576             :     dst_buf1[0] = tmp_buf1;
    2577             :     dst_buf1[1] = tmp_buf1 + MAX_SB_SQUARE;
    2578             :     dst_buf1[2] = tmp_buf1 + MAX_SB_SQUARE * 2;
    2579             :     dst_buf2[0] = tmp_buf2;
    2580             :     dst_buf2[1] = tmp_buf2 + MAX_SB_SQUARE;
    2581             :     dst_buf2[2] = tmp_buf2 + MAX_SB_SQUARE * 2;
    2582             : #if CONFIG_HIGHBITDEPTH
    2583             :   }
    2584             : #endif  // CONFIG_HIGHBITDEPTH
    2585             : 
    2586             :   const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
    2587             :   av1_build_prediction_by_bottom_preds(cm, xd, mi_row, mi_col, dst_buf1,
    2588             :                                        dst_width1, dst_height1, dst_stride1);
    2589             :   av1_build_prediction_by_right_preds(cm, xd, mi_row, mi_col, dst_buf2,
    2590             :                                       dst_width2, dst_height2, dst_stride2);
    2591             :   av1_setup_dst_planes(xd->plane, bsize, get_frame_new_buffer(cm), mi_row,
    2592             :                        mi_col);
    2593             :   av1_merge_dst_bottom_right_preds(cm, xd, mi_row, mi_col, dst_buf1,
    2594             :                                    dst_stride1, dst_buf2, dst_stride2);
    2595             :   av1_setup_dst_planes(xd->plane, bsize, get_frame_new_buffer(cm), mi_row,
    2596             :                        mi_col);
    2597             :   av1_build_obmc_inter_predictors_sb(cm, xd, mi_row, mi_col);
    2598             :   av1_setup_dst_planes(xd->plane, bsize, get_frame_new_buffer(cm), mi_row,
    2599             :                        mi_col);
    2600             : }
    2601             : #endif  // CONFIG_NCOBMC
    2602             : #endif  // CONFIG_MOTION_VAR
    2603             : 
    2604             : #if CONFIG_EXT_INTER
    2605             : /* clang-format off */
    2606             : #if CONFIG_INTERINTRA
    2607             : #if CONFIG_EXT_PARTITION
    2608             : static const int ii_weights1d[MAX_SB_SIZE] = {
    2609             :   60, 58, 56, 54, 52, 50, 48, 47, 45, 44, 42, 41, 39, 38, 37, 35, 34, 33, 32,
    2610             :   31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 22, 21, 20, 19, 19, 18, 18, 17, 16,
    2611             :   16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10,  9,  9,  9,  8,
    2612             :   8,  8,  8,  7,  7,  7,  7,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,  4,  4,
    2613             :   4,  4,  4,  4,  4,  4,  3,  3,  3,  3,  3,  3,  3,  3,  3,  2,  2,  2,  2,
    2614             :   2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1,  1,  1,  1,  1,
    2615             :   1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1
    2616             : };
    2617             : static int ii_size_scales[BLOCK_SIZES] = {
    2618             : #if CONFIG_CB4X4
    2619             :     32, 32, 32,
    2620             : #endif
    2621             :     32, 16, 16, 16, 8, 8, 8, 4,
    2622             :     4,  4,  2,  2,  2, 1, 1, 1,
    2623             : };
    2624             : #else
    2625             : static const int ii_weights1d[MAX_SB_SIZE] = {
    2626             :   60, 56, 52, 48, 45, 42, 39, 37, 34, 32, 30, 28, 26, 24, 22, 21,
    2627             :   19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 10,  9,  8,  8,  7,  7,
    2628             :   6,  6,  6,  5,  5,  4,  4,  4,  4,  3,  3,  3,  3,  3,  2,  2,
    2629             :   2,  2,  2,  2,  2,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1
    2630             : };
    2631             : static int ii_size_scales[BLOCK_SIZES] = {
    2632             : #if CONFIG_CB4X4
    2633             :     16, 16, 16,
    2634             : #endif
    2635             :     16, 8, 8, 8, 4, 4, 4,
    2636             :     2,  2, 2, 1, 1, 1,
    2637             : };
    2638             : /* clang-format on */
    2639             : #endif  // CONFIG_EXT_PARTITION
    2640             : 
    2641           0 : static void combine_interintra(INTERINTRA_MODE mode, int use_wedge_interintra,
    2642             :                                int wedge_index, int wedge_sign,
    2643             :                                BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
    2644             :                                uint8_t *comppred, int compstride,
    2645             :                                const uint8_t *interpred, int interstride,
    2646             :                                const uint8_t *intrapred, int intrastride) {
    2647           0 :   const int bw = block_size_wide[plane_bsize];
    2648           0 :   const int bh = block_size_high[plane_bsize];
    2649           0 :   const int size_scale = ii_size_scales[plane_bsize];
    2650             :   int i, j;
    2651             : 
    2652           0 :   if (use_wedge_interintra) {
    2653           0 :     if (is_interintra_wedge_used(bsize)) {
    2654           0 :       const uint8_t *mask =
    2655           0 :           av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
    2656           0 :       const int subw = 2 * num_4x4_blocks_wide_lookup[bsize] == bw;
    2657           0 :       const int subh = 2 * num_4x4_blocks_high_lookup[bsize] == bh;
    2658           0 :       aom_blend_a64_mask(comppred, compstride, intrapred, intrastride,
    2659           0 :                          interpred, interstride, mask, block_size_wide[bsize],
    2660             :                          bh, bw, subh, subw);
    2661             :     }
    2662           0 :     return;
    2663             :   }
    2664             : 
    2665           0 :   switch (mode) {
    2666             :     case II_V_PRED:
    2667           0 :       for (i = 0; i < bh; ++i) {
    2668           0 :         for (j = 0; j < bw; ++j) {
    2669           0 :           int scale = ii_weights1d[i * size_scale];
    2670           0 :           comppred[i * compstride + j] =
    2671           0 :               AOM_BLEND_A64(scale, intrapred[i * intrastride + j],
    2672             :                             interpred[i * interstride + j]);
    2673             :         }
    2674             :       }
    2675           0 :       break;
    2676             : 
    2677             :     case II_H_PRED:
    2678           0 :       for (i = 0; i < bh; ++i) {
    2679           0 :         for (j = 0; j < bw; ++j) {
    2680           0 :           int scale = ii_weights1d[j * size_scale];
    2681           0 :           comppred[i * compstride + j] =
    2682           0 :               AOM_BLEND_A64(scale, intrapred[i * intrastride + j],
    2683             :                             interpred[i * interstride + j]);
    2684             :         }
    2685             :       }
    2686           0 :       break;
    2687             : 
    2688             : #if CONFIG_ALT_INTRA
    2689             :     case II_SMOOTH_PRED:
    2690           0 :       for (i = 0; i < bh; ++i) {
    2691           0 :         for (j = 0; j < bw; ++j) {
    2692           0 :           int scale = ii_weights1d[(i < j ? i : j) * size_scale];
    2693           0 :           comppred[i * compstride + j] =
    2694           0 :               AOM_BLEND_A64(scale, intrapred[i * intrastride + j],
    2695             :                             interpred[i * interstride + j]);
    2696             :         }
    2697             :       }
    2698           0 :       break;
    2699             : #endif
    2700             : 
    2701             : #if !CONFIG_ALT_INTRA
    2702             :     case II_TM_PRED:
    2703             : #endif
    2704             :     case II_DC_PRED:
    2705             :     default:
    2706           0 :       for (i = 0; i < bh; ++i) {
    2707           0 :         for (j = 0; j < bw; ++j) {
    2708           0 :           comppred[i * compstride + j] = AOM_BLEND_AVG(
    2709             :               intrapred[i * intrastride + j], interpred[i * interstride + j]);
    2710             :         }
    2711             :       }
    2712           0 :       break;
    2713             :   }
    2714             : }
    2715             : 
    2716             : #if CONFIG_HIGHBITDEPTH
    2717           0 : static void combine_interintra_highbd(
    2718             :     INTERINTRA_MODE mode, int use_wedge_interintra, int wedge_index,
    2719             :     int wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
    2720             :     uint8_t *comppred8, int compstride, const uint8_t *interpred8,
    2721             :     int interstride, const uint8_t *intrapred8, int intrastride, int bd) {
    2722           0 :   const int bw = block_size_wide[plane_bsize];
    2723           0 :   const int bh = block_size_high[plane_bsize];
    2724           0 :   const int size_scale = ii_size_scales[plane_bsize];
    2725             :   int i, j;
    2726             : 
    2727           0 :   uint16_t *comppred = CONVERT_TO_SHORTPTR(comppred8);
    2728           0 :   const uint16_t *interpred = CONVERT_TO_SHORTPTR(interpred8);
    2729           0 :   const uint16_t *intrapred = CONVERT_TO_SHORTPTR(intrapred8);
    2730             : 
    2731           0 :   if (use_wedge_interintra) {
    2732           0 :     if (is_interintra_wedge_used(bsize)) {
    2733           0 :       const uint8_t *mask =
    2734           0 :           av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
    2735           0 :       const int subh = 2 * num_4x4_blocks_high_lookup[bsize] == bh;
    2736           0 :       const int subw = 2 * num_4x4_blocks_wide_lookup[bsize] == bw;
    2737           0 :       aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
    2738             :                                 interpred8, interstride, mask, bw, bh, bw, subh,
    2739             :                                 subw, bd);
    2740             :     }
    2741           0 :     return;
    2742             :   }
    2743             : 
    2744           0 :   switch (mode) {
    2745             :     case II_V_PRED:
    2746           0 :       for (i = 0; i < bh; ++i) {
    2747           0 :         for (j = 0; j < bw; ++j) {
    2748           0 :           int scale = ii_weights1d[i * size_scale];
    2749           0 :           comppred[i * compstride + j] =
    2750           0 :               AOM_BLEND_A64(scale, intrapred[i * intrastride + j],
    2751             :                             interpred[i * interstride + j]);
    2752             :         }
    2753             :       }
    2754           0 :       break;
    2755             : 
    2756             :     case II_H_PRED:
    2757           0 :       for (i = 0; i < bh; ++i) {
    2758           0 :         for (j = 0; j < bw; ++j) {
    2759           0 :           int scale = ii_weights1d[j * size_scale];
    2760           0 :           comppred[i * compstride + j] =
    2761           0 :               AOM_BLEND_A64(scale, intrapred[i * intrastride + j],
    2762             :                             interpred[i * interstride + j]);
    2763             :         }
    2764             :       }
    2765           0 :       break;
    2766             : 
    2767             : #if CONFIG_ALT_INTRA
    2768             :     case II_SMOOTH_PRED:
    2769           0 :       for (i = 0; i < bh; ++i) {
    2770           0 :         for (j = 0; j < bw; ++j) {
    2771           0 :           int scale = ii_weights1d[(i < j ? i : j) * size_scale];
    2772           0 :           comppred[i * compstride + j] =
    2773           0 :               AOM_BLEND_A64(scale, intrapred[i * intrastride + j],
    2774             :                             interpred[i * interstride + j]);
    2775             :         }
    2776             :       }
    2777           0 :       break;
    2778             : #endif
    2779             : 
    2780             : #if !CONFIG_ALT_INTRA
    2781             :     case II_TM_PRED:
    2782             : #endif
    2783             :     case II_DC_PRED:
    2784             :     default:
    2785           0 :       for (i = 0; i < bh; ++i) {
    2786           0 :         for (j = 0; j < bw; ++j) {
    2787           0 :           comppred[i * compstride + j] = AOM_BLEND_AVG(
    2788             :               interpred[i * interstride + j], intrapred[i * intrastride + j]);
    2789             :         }
    2790             :       }
    2791           0 :       break;
    2792             :   }
    2793             : }
    2794             : #endif  // CONFIG_HIGHBITDEPTH
    2795             : 
    2796           0 : void av1_build_intra_predictors_for_interintra(MACROBLOCKD *xd,
    2797             :                                                BLOCK_SIZE bsize, int plane,
    2798             :                                                BUFFER_SET *ctx, uint8_t *dst,
    2799             :                                                int dst_stride) {
    2800           0 :   struct macroblockd_plane *const pd = &xd->plane[plane];
    2801           0 :   BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, &xd->plane[plane]);
    2802           0 :   PREDICTION_MODE mode =
    2803           0 :       interintra_to_intra_mode[xd->mi[0]->mbmi.interintra_mode];
    2804             : 
    2805           0 :   av1_predict_intra_block(xd, pd->width, pd->height, plane_bsize, mode,
    2806           0 :                           ctx->plane[plane], ctx->stride[plane], dst,
    2807             :                           dst_stride, 0, 0, plane);
    2808           0 : }
    2809             : 
    2810           0 : void av1_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
    2811             :                             const uint8_t *inter_pred, int inter_stride,
    2812             :                             const uint8_t *intra_pred, int intra_stride) {
    2813           0 :   const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, &xd->plane[plane]);
    2814             : #if CONFIG_HIGHBITDEPTH
    2815           0 :   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    2816           0 :     combine_interintra_highbd(
    2817           0 :         xd->mi[0]->mbmi.interintra_mode, xd->mi[0]->mbmi.use_wedge_interintra,
    2818           0 :         xd->mi[0]->mbmi.interintra_wedge_index,
    2819           0 :         xd->mi[0]->mbmi.interintra_wedge_sign, bsize, plane_bsize,
    2820             :         xd->plane[plane].dst.buf, xd->plane[plane].dst.stride, inter_pred,
    2821             :         inter_stride, intra_pred, intra_stride, xd->bd);
    2822           0 :     return;
    2823             :   }
    2824             : #endif  // CONFIG_HIGHBITDEPTH
    2825           0 :   combine_interintra(xd->mi[0]->mbmi.interintra_mode,
    2826           0 :                      xd->mi[0]->mbmi.use_wedge_interintra,
    2827           0 :                      xd->mi[0]->mbmi.interintra_wedge_index,
    2828           0 :                      xd->mi[0]->mbmi.interintra_wedge_sign, bsize, plane_bsize,
    2829             :                      xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
    2830             :                      inter_pred, inter_stride, intra_pred, intra_stride);
    2831             : }
    2832             : 
    2833           0 : void av1_build_interintra_predictors_sby(MACROBLOCKD *xd, uint8_t *ypred,
    2834             :                                          int ystride, BUFFER_SET *ctx,
    2835             :                                          BLOCK_SIZE bsize) {
    2836             : #if CONFIG_HIGHBITDEPTH
    2837           0 :   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    2838             :     DECLARE_ALIGNED(16, uint16_t, intrapredictor[MAX_SB_SQUARE]);
    2839           0 :     av1_build_intra_predictors_for_interintra(
    2840           0 :         xd, bsize, 0, ctx, CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE);
    2841           0 :     av1_combine_interintra(xd, bsize, 0, ypred, ystride,
    2842           0 :                            CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE);
    2843           0 :     return;
    2844             :   }
    2845             : #endif  // CONFIG_HIGHBITDEPTH
    2846             :   {
    2847             :     DECLARE_ALIGNED(16, uint8_t, intrapredictor[MAX_SB_SQUARE]);
    2848           0 :     av1_build_intra_predictors_for_interintra(xd, bsize, 0, ctx, intrapredictor,
    2849             :                                               MAX_SB_SIZE);
    2850           0 :     av1_combine_interintra(xd, bsize, 0, ypred, ystride, intrapredictor,
    2851             :                            MAX_SB_SIZE);
    2852             :   }
    2853             : }
    2854             : 
    2855           0 : void av1_build_interintra_predictors_sbc(MACROBLOCKD *xd, uint8_t *upred,
    2856             :                                          int ustride, BUFFER_SET *ctx,
    2857             :                                          int plane, BLOCK_SIZE bsize) {
    2858             : #if CONFIG_HIGHBITDEPTH
    2859           0 :   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    2860             :     DECLARE_ALIGNED(16, uint16_t, uintrapredictor[MAX_SB_SQUARE]);
    2861           0 :     av1_build_intra_predictors_for_interintra(
    2862           0 :         xd, bsize, plane, ctx, CONVERT_TO_BYTEPTR(uintrapredictor),
    2863             :         MAX_SB_SIZE);
    2864           0 :     av1_combine_interintra(xd, bsize, plane, upred, ustride,
    2865           0 :                            CONVERT_TO_BYTEPTR(uintrapredictor), MAX_SB_SIZE);
    2866           0 :     return;
    2867             :   }
    2868             : #endif  // CONFIG_HIGHBITDEPTH
    2869             :   {
    2870             :     DECLARE_ALIGNED(16, uint8_t, uintrapredictor[MAX_SB_SQUARE]);
    2871           0 :     av1_build_intra_predictors_for_interintra(xd, bsize, plane, ctx,
    2872             :                                               uintrapredictor, MAX_SB_SIZE);
    2873           0 :     av1_combine_interintra(xd, bsize, plane, upred, ustride, uintrapredictor,
    2874             :                            MAX_SB_SIZE);
    2875             :   }
    2876             : }
    2877             : 
    2878           0 : void av1_build_interintra_predictors_sbuv(MACROBLOCKD *xd, uint8_t *upred,
    2879             :                                           uint8_t *vpred, int ustride,
    2880             :                                           int vstride, BUFFER_SET *ctx,
    2881             :                                           BLOCK_SIZE bsize) {
    2882           0 :   av1_build_interintra_predictors_sbc(xd, upred, ustride, ctx, 1, bsize);
    2883           0 :   av1_build_interintra_predictors_sbc(xd, vpred, vstride, ctx, 2, bsize);
    2884           0 : }
    2885             : 
    2886           0 : void av1_build_interintra_predictors(MACROBLOCKD *xd, uint8_t *ypred,
    2887             :                                      uint8_t *upred, uint8_t *vpred,
    2888             :                                      int ystride, int ustride, int vstride,
    2889             :                                      BUFFER_SET *ctx, BLOCK_SIZE bsize) {
    2890           0 :   av1_build_interintra_predictors_sby(xd, ypred, ystride, ctx, bsize);
    2891           0 :   av1_build_interintra_predictors_sbuv(xd, upred, vpred, ustride, vstride, ctx,
    2892             :                                        bsize);
    2893           0 : }
    2894             : #endif  // CONFIG_INTERINTRA
    2895             : 
    2896             : // Builds the inter-predictor for the single ref case
    2897             : // for use in the encoder to search the wedges efficiently.
    2898           0 : static void build_inter_predictors_single_buf(MACROBLOCKD *xd, int plane,
    2899             :                                               int block, int bw, int bh, int x,
    2900             :                                               int y, int w, int h, int mi_x,
    2901             :                                               int mi_y, int ref,
    2902             :                                               uint8_t *const ext_dst,
    2903             :                                               int ext_dst_stride) {
    2904           0 :   struct macroblockd_plane *const pd = &xd->plane[plane];
    2905           0 :   const MODE_INFO *mi = xd->mi[0];
    2906             : 
    2907           0 :   const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
    2908           0 :   struct buf_2d *const pre_buf = &pd->pre[ref];
    2909             : #if CONFIG_HIGHBITDEPTH
    2910           0 :   uint8_t *const dst =
    2911           0 :       (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH ? CONVERT_TO_BYTEPTR(ext_dst)
    2912           0 :                                                    : ext_dst) +
    2913           0 :       ext_dst_stride * y + x;
    2914             : #else
    2915             :   uint8_t *const dst = ext_dst + ext_dst_stride * y + x;
    2916             : #endif
    2917           0 :   const MV mv = mi->mbmi.sb_type < BLOCK_8X8
    2918             :                     ? average_split_mvs(pd, mi, ref, block)
    2919             :                     : mi->mbmi.mv[ref].as_mv;
    2920             : 
    2921             :   // TODO(jkoleszar): This clamping is done in the incorrect place for the
    2922             :   // scaling case. It needs to be done on the scaled MV, not the pre-scaling
    2923             :   // MV. Note however that it performs the subsampling aware scaling so
    2924             :   // that the result is always q4.
    2925             :   // mv_precision precision is MV_PRECISION_Q4.
    2926           0 :   const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, &mv, bw, bh, pd->subsampling_x,
    2927             :                                              pd->subsampling_y);
    2928             : 
    2929             :   uint8_t *pre;
    2930             :   MV32 scaled_mv;
    2931             :   int xs, ys, subpel_x, subpel_y;
    2932           0 :   const int is_scaled = av1_is_scaled(sf);
    2933           0 :   ConvolveParams conv_params = get_conv_params(0, plane);
    2934             : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
    2935             :   WarpTypesAllowed warp_types;
    2936             : #if CONFIG_GLOBAL_MOTION
    2937           0 :   WarpedMotionParams *const wm = &xd->global_motion[mi->mbmi.ref_frame[ref]];
    2938           0 :   warp_types.global_warp_allowed = is_global_mv_block(mi, block, wm->wmtype);
    2939             : #endif  // CONFIG_GLOBAL_MOTION
    2940             : #if CONFIG_WARPED_MOTION
    2941           0 :   warp_types.local_warp_allowed = mi->mbmi.motion_mode == WARPED_CAUSAL;
    2942             : #endif  // CONFIG_WARPED_MOTION
    2943             : #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
    2944             : 
    2945           0 :   if (is_scaled) {
    2946           0 :     pre = pre_buf->buf + scaled_buffer_offset(x, y, pre_buf->stride, sf);
    2947           0 :     scaled_mv = av1_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf);
    2948           0 :     xs = sf->x_step_q4;
    2949           0 :     ys = sf->y_step_q4;
    2950             :   } else {
    2951           0 :     pre = pre_buf->buf + (y * pre_buf->stride + x);
    2952           0 :     scaled_mv.row = mv_q4.row;
    2953           0 :     scaled_mv.col = mv_q4.col;
    2954           0 :     xs = ys = 16;
    2955             :   }
    2956             : 
    2957           0 :   subpel_x = scaled_mv.col & SUBPEL_MASK;
    2958           0 :   subpel_y = scaled_mv.row & SUBPEL_MASK;
    2959           0 :   pre += (scaled_mv.row >> SUBPEL_BITS) * pre_buf->stride +
    2960           0 :          (scaled_mv.col >> SUBPEL_BITS);
    2961             : 
    2962           0 :   av1_make_inter_predictor(pre, pre_buf->stride, dst, ext_dst_stride, subpel_x,
    2963             :                            subpel_y, sf, w, h, &conv_params,
    2964           0 :                            mi->mbmi.interp_filter,
    2965             : #if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
    2966           0 :                            &warp_types, (mi_x >> pd->subsampling_x) + x,
    2967           0 :                            (mi_y >> pd->subsampling_y) + y, plane, ref,
    2968             : #endif  // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
    2969             : #if CONFIG_MOTION_VAR
    2970             :                            0, 0,
    2971             : #endif
    2972             :                            xs, ys, xd);
    2973           0 : }
    2974             : 
    2975           0 : void av1_build_inter_predictors_for_planes_single_buf(
    2976             :     MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane_from, int plane_to, int mi_row,
    2977             :     int mi_col, int ref, uint8_t *ext_dst[3], int ext_dst_stride[3]) {
    2978             :   int plane;
    2979           0 :   const int mi_x = mi_col * MI_SIZE;
    2980           0 :   const int mi_y = mi_row * MI_SIZE;
    2981           0 :   for (plane = plane_from; plane <= plane_to; ++plane) {
    2982           0 :     const BLOCK_SIZE plane_bsize =
    2983           0 :         get_plane_block_size(bsize, &xd->plane[plane]);
    2984           0 :     const int bw = block_size_wide[plane_bsize];
    2985           0 :     const int bh = block_size_high[plane_bsize];
    2986             : 
    2987             :     if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8 && !CONFIG_CB4X4) {
    2988             :       int x, y;
    2989             :       const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
    2990             :       const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
    2991             :       assert(bsize == BLOCK_8X8);
    2992             :       for (y = 0; y < num_4x4_h; ++y)
    2993             :         for (x = 0; x < num_4x4_w; ++x)
    2994             :           build_inter_predictors_single_buf(
    2995             :               xd, plane, y * 2 + x, bw, bh, 4 * x, 4 * y, 4, 4, mi_x, mi_y, ref,
    2996             :               ext_dst[plane], ext_dst_stride[plane]);
    2997             :     } else {
    2998           0 :       build_inter_predictors_single_buf(xd, plane, 0, bw, bh, 0, 0, bw, bh,
    2999           0 :                                         mi_x, mi_y, ref, ext_dst[plane],
    3000           0 :                                         ext_dst_stride[plane]);
    3001             :     }
    3002             :   }
    3003           0 : }
    3004             : 
    3005           0 : static void build_wedge_inter_predictor_from_buf(
    3006             :     MACROBLOCKD *xd, int plane, int x, int y, int w, int h,
    3007             : #if CONFIG_SUPERTX
    3008             :     int wedge_offset_x, int wedge_offset_y,
    3009             : #endif  // CONFIG_SUPERTX
    3010             :     uint8_t *ext_dst0, int ext_dst_stride0, uint8_t *ext_dst1,
    3011             :     int ext_dst_stride1) {
    3012           0 :   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
    3013           0 :   const int is_compound = has_second_ref(mbmi);
    3014           0 :   MACROBLOCKD_PLANE *const pd = &xd->plane[plane];
    3015           0 :   struct buf_2d *const dst_buf = &pd->dst;
    3016           0 :   uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x;
    3017           0 :   const INTERINTER_COMPOUND_DATA comp_data = {
    3018             : #if CONFIG_WEDGE
    3019           0 :     mbmi->wedge_index,
    3020           0 :     mbmi->wedge_sign,
    3021             : #endif  // CONFIG_WEDGE
    3022             : #if CONFIG_COMPOUND_SEGMENT
    3023           0 :     mbmi->mask_type,
    3024           0 :     xd->seg_mask,
    3025             : #endif  // CONFIG_COMPOUND_SEGMENT
    3026           0 :     mbmi->interinter_compound_type
    3027             :   };
    3028             : 
    3029           0 :   if (is_compound && is_masked_compound_type(mbmi->interinter_compound_type)) {
    3030             : #if CONFIG_COMPOUND_SEGMENT
    3031           0 :     if (!plane && comp_data.interinter_compound_type == COMPOUND_SEG) {
    3032             : #if CONFIG_HIGHBITDEPTH
    3033           0 :       if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
    3034           0 :         build_compound_seg_mask_highbd(
    3035             :             comp_data.seg_mask, comp_data.mask_type,
    3036           0 :             CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0,
    3037           0 :             CONVERT_TO_BYTEPTR(ext_dst1), ext_dst_stride1, mbmi->sb_type, h, w,
    3038             :             xd->bd);
    3039             :       else
    3040             : #endif  // CONFIG_HIGHBITDEPTH
    3041           0 :         build_compound_seg_mask(comp_data.seg_mask, comp_data.mask_type,
    3042             :                                 ext_dst0, ext_dst_stride0, ext_dst1,
    3043           0 :                                 ext_dst_stride1, mbmi->sb_type, h, w);
    3044             :     }
    3045             : #endif  // CONFIG_COMPOUND_SEGMENT
    3046             : 
    3047             : #if CONFIG_SUPERTX
    3048             : #if CONFIG_HIGHBITDEPTH
    3049             :     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
    3050             :       build_masked_compound_wedge_extend_highbd(
    3051             :           dst, dst_buf->stride, CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0,
    3052             :           CONVERT_TO_BYTEPTR(ext_dst1), ext_dst_stride1, &comp_data,
    3053             :           mbmi->sb_type, wedge_offset_x, wedge_offset_y, h, w, xd->bd);
    3054             :     else
    3055             : #endif  // CONFIG_HIGHBITDEPTH
    3056             :       build_masked_compound_wedge_extend(
    3057             :           dst, dst_buf->stride, ext_dst0, ext_dst_stride0, ext_dst1,
    3058             :           ext_dst_stride1, &comp_data, mbmi->sb_type, wedge_offset_x,
    3059             :           wedge_offset_y, h, w);
    3060             : #else
    3061             : #if CONFIG_HIGHBITDEPTH
    3062           0 :     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
    3063           0 :       build_masked_compound_highbd(
    3064           0 :           dst, dst_buf->stride, CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0,
    3065           0 :           CONVERT_TO_BYTEPTR(ext_dst1), ext_dst_stride1, &comp_data,
    3066           0 :           mbmi->sb_type, h, w, xd->bd);
    3067             :     else
    3068             : #endif  // CONFIG_HIGHBITDEPTH
    3069           0 :       build_masked_compound(dst, dst_buf->stride, ext_dst0, ext_dst_stride0,
    3070             :                             ext_dst1, ext_dst_stride1, &comp_data,
    3071           0 :                             mbmi->sb_type, h, w);
    3072             : #endif  // CONFIG_SUPERTX
    3073             :   } else {
    3074             : #if CONFIG_HIGHBITDEPTH
    3075           0 :     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
    3076           0 :       aom_highbd_convolve_copy(CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0,
    3077           0 :                                dst, dst_buf->stride, NULL, 0, NULL, 0, w, h,
    3078             :                                xd->bd);
    3079             :     else
    3080             : #endif  // CONFIG_HIGHBITDEPTH
    3081           0 :       aom_convolve_copy(ext_dst0, ext_dst_stride0, dst, dst_buf->stride, NULL,
    3082             :                         0, NULL, 0, w, h);
    3083             :   }
    3084           0 : }
    3085             : 
    3086           0 : void av1_build_wedge_inter_predictor_from_buf(
    3087             :     MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane_from, int plane_to,
    3088             : #if CONFIG_SUPERTX
    3089             :     int wedge_offset_x, int wedge_offset_y,
    3090             : #endif  // CONFIG_SUPERTX
    3091             :     uint8_t *ext_dst0[3], int ext_dst_stride0[3], uint8_t *ext_dst1[3],
    3092             :     int ext_dst_stride1[3]) {
    3093             :   int plane;
    3094           0 :   for (plane = plane_from; plane <= plane_to; ++plane) {
    3095           0 :     const BLOCK_SIZE plane_bsize =
    3096           0 :         get_plane_block_size(bsize, &xd->plane[plane]);
    3097             : 
    3098             :     if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8 && !CONFIG_CB4X4) {
    3099             :       int x, y;
    3100             :       const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
    3101             :       const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
    3102             :       assert(bsize == BLOCK_8X8);
    3103             :       for (y = 0; y < num_4x4_h; ++y)
    3104             :         for (x = 0; x < num_4x4_w; ++x)
    3105             :           build_wedge_inter_predictor_from_buf(
    3106             :               xd, plane, 4 * x, 4 * y, 4, 4,
    3107             : #if CONFIG_SUPERTX
    3108             :               wedge_offset_x, wedge_offset_y,
    3109             : #endif  // CONFIG_SUPERTX
    3110             :               ext_dst0[plane], ext_dst_stride0[plane], ext_dst1[plane],
    3111             :               ext_dst_stride1[plane]);
    3112             :     } else {
    3113           0 :       const int bw = block_size_wide[plane_bsize];
    3114           0 :       const int bh = block_size_high[plane_bsize];
    3115           0 :       build_wedge_inter_predictor_from_buf(
    3116             :           xd, plane, 0, 0, bw, bh,
    3117             : #if CONFIG_SUPERTX
    3118             :           wedge_offset_x, wedge_offset_y,
    3119             : #endif  // CONFIG_SUPERTX
    3120           0 :           ext_dst0[plane], ext_dst_stride0[plane], ext_dst1[plane],
    3121           0 :           ext_dst_stride1[plane]);
    3122             :     }
    3123             :   }
    3124           0 : }
    3125             : #endif  // CONFIG_EXT_INTER

Generated by: LCOV version 1.13