LCOV - code coverage report
Current view: top level - third_party/aom/av1/common - convolve.c (source / functions) Hit Total Coverage
Test: output.info Lines: 0 247 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 19 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
       3             :  *
       4             :  * This source code is subject to the terms of the BSD 2 Clause License and
       5             :  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
       6             :  * was not distributed with this source code in the LICENSE file, you can
       7             :  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
       8             :  * Media Patent License 1.0 was not distributed with this source code in the
       9             :  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
      10             :  */
      11             : 
      12             : #include <assert.h>
      13             : #include <string.h>
      14             : 
      15             : #include "./aom_dsp_rtcd.h"
      16             : #include "./av1_rtcd.h"
      17             : #include "av1/common/convolve.h"
      18             : #include "av1/common/filter.h"
      19             : #include "av1/common/onyxc_int.h"
      20             : #include "aom_dsp/aom_dsp_common.h"
      21             : #include "aom_ports/mem.h"
      22             : 
      23             : #define MAX_BLOCK_WIDTH (MAX_SB_SIZE)
      24             : #define MAX_BLOCK_HEIGHT (MAX_SB_SIZE)
      25             : #define MAX_STEP (32)
      26             : 
      27           0 : void av1_convolve_horiz_c(const uint8_t *src, int src_stride, uint8_t *dst,
      28             :                           int dst_stride, int w, int h,
      29             :                           const InterpFilterParams filter_params,
      30             :                           const int subpel_x_q4, int x_step_q4,
      31             :                           ConvolveParams *conv_params) {
      32             :   int x, y;
      33           0 :   int filter_size = filter_params.taps;
      34           0 :   assert(conv_params->round == CONVOLVE_OPT_ROUND);
      35           0 :   src -= filter_size / 2 - 1;
      36           0 :   for (y = 0; y < h; ++y) {
      37           0 :     int x_q4 = subpel_x_q4;
      38           0 :     for (x = 0; x < w; ++x) {
      39           0 :       const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
      40           0 :       const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
      41             :           filter_params, x_q4 & SUBPEL_MASK);
      42           0 :       int k, sum = 0;
      43           0 :       for (k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k];
      44             : 
      45           0 :       sum = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
      46           0 :       if (conv_params->ref)
      47           0 :         dst[x] = ROUND_POWER_OF_TWO(dst[x] + sum, 1);
      48             :       else
      49           0 :         dst[x] = sum;
      50             : 
      51           0 :       x_q4 += x_step_q4;
      52             :     }
      53           0 :     src += src_stride;
      54           0 :     dst += dst_stride;
      55             :   }
      56           0 : }
      57             : 
      58           0 : void av1_convolve_vert_c(const uint8_t *src, int src_stride, uint8_t *dst,
      59             :                          int dst_stride, int w, int h,
      60             :                          const InterpFilterParams filter_params,
      61             :                          const int subpel_y_q4, int y_step_q4,
      62             :                          ConvolveParams *conv_params) {
      63             :   int x, y;
      64           0 :   int filter_size = filter_params.taps;
      65           0 :   assert(conv_params->round == CONVOLVE_OPT_ROUND);
      66           0 :   src -= src_stride * (filter_size / 2 - 1);
      67           0 :   for (x = 0; x < w; ++x) {
      68           0 :     int y_q4 = subpel_y_q4;
      69           0 :     for (y = 0; y < h; ++y) {
      70           0 :       const uint8_t *const src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
      71           0 :       const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
      72             :           filter_params, y_q4 & SUBPEL_MASK);
      73           0 :       int k, sum = 0;
      74           0 :       for (k = 0; k < filter_size; ++k)
      75           0 :         sum += src_y[k * src_stride] * y_filter[k];
      76             : 
      77           0 :       sum = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
      78           0 :       if (conv_params->ref)
      79           0 :         dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] + sum, 1);
      80             :       else
      81           0 :         dst[y * dst_stride] = sum;
      82             : 
      83           0 :       y_q4 += y_step_q4;
      84             :     }
      85           0 :     ++src;
      86           0 :     ++dst;
      87             :   }
      88           0 : }
      89             : 
      90           0 : static void convolve_copy(const uint8_t *src, int src_stride, uint8_t *dst,
      91             :                           int dst_stride, int w, int h,
      92             :                           ConvolveParams *conv_params) {
      93           0 :   assert(conv_params->round == CONVOLVE_OPT_ROUND);
      94           0 :   if (conv_params->ref == 0) {
      95             :     int r;
      96           0 :     for (r = 0; r < h; ++r) {
      97           0 :       memcpy(dst, src, w);
      98           0 :       src += src_stride;
      99           0 :       dst += dst_stride;
     100             :     }
     101             :   } else {
     102             :     int r, c;
     103           0 :     for (r = 0; r < h; ++r) {
     104           0 :       for (c = 0; c < w; ++c) {
     105           0 :         dst[c] = clip_pixel(ROUND_POWER_OF_TWO(dst[c] + src[c], 1));
     106             :       }
     107           0 :       src += src_stride;
     108           0 :       dst += dst_stride;
     109             :     }
     110             :   }
     111           0 : }
     112             : 
     113           0 : void av1_convolve_horiz_facade(const uint8_t *src, int src_stride, uint8_t *dst,
     114             :                                int dst_stride, int w, int h,
     115             :                                const InterpFilterParams filter_params,
     116             :                                const int subpel_x_q4, int x_step_q4,
     117             :                                ConvolveParams *conv_params) {
     118           0 :   assert(conv_params->round == CONVOLVE_OPT_ROUND);
     119           0 :   if (filter_params.taps == SUBPEL_TAPS) {
     120           0 :     const int16_t *filter_x =
     121             :         av1_get_interp_filter_subpel_kernel(filter_params, subpel_x_q4);
     122           0 :     if (conv_params->ref == 0)
     123           0 :       aom_convolve8_horiz(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
     124             :                           NULL, -1, w, h);
     125             :     else
     126           0 :       aom_convolve8_avg_horiz(src, src_stride, dst, dst_stride, filter_x,
     127             :                               x_step_q4, NULL, -1, w, h);
     128             :   } else {
     129           0 :     av1_convolve_horiz(src, src_stride, dst, dst_stride, w, h, filter_params,
     130             :                        subpel_x_q4, x_step_q4, conv_params);
     131             :   }
     132           0 : }
     133             : 
     134           0 : void av1_convolve_horiz_facade_c(const uint8_t *src, int src_stride,
     135             :                                  uint8_t *dst, int dst_stride, int w, int h,
     136             :                                  const InterpFilterParams filter_params,
     137             :                                  const int subpel_x_q4, int x_step_q4,
     138             :                                  ConvolveParams *conv_params) {
     139           0 :   assert(conv_params->round == CONVOLVE_OPT_ROUND);
     140           0 :   if (filter_params.taps == SUBPEL_TAPS) {
     141           0 :     const int16_t *filter_x =
     142             :         av1_get_interp_filter_subpel_kernel(filter_params, subpel_x_q4);
     143           0 :     if (conv_params->ref == 0)
     144           0 :       aom_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x,
     145             :                             x_step_q4, NULL, -1, w, h);
     146             :     else
     147           0 :       aom_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x,
     148             :                                 x_step_q4, NULL, -1, w, h);
     149             :   } else {
     150           0 :     av1_convolve_horiz_c(src, src_stride, dst, dst_stride, w, h, filter_params,
     151             :                          subpel_x_q4, x_step_q4, conv_params);
     152             :   }
     153           0 : }
     154             : 
     155           0 : void av1_convolve_vert_facade(const uint8_t *src, int src_stride, uint8_t *dst,
     156             :                               int dst_stride, int w, int h,
     157             :                               const InterpFilterParams filter_params,
     158             :                               const int subpel_y_q4, int y_step_q4,
     159             :                               ConvolveParams *conv_params) {
     160           0 :   assert(conv_params->round == CONVOLVE_OPT_ROUND);
     161           0 :   if (filter_params.taps == SUBPEL_TAPS) {
     162           0 :     const int16_t *filter_y =
     163             :         av1_get_interp_filter_subpel_kernel(filter_params, subpel_y_q4);
     164           0 :     if (conv_params->ref == 0) {
     165           0 :       aom_convolve8_vert(src, src_stride, dst, dst_stride, NULL, -1, filter_y,
     166             :                          y_step_q4, w, h);
     167             :     } else {
     168           0 :       aom_convolve8_avg_vert(src, src_stride, dst, dst_stride, NULL, -1,
     169             :                              filter_y, y_step_q4, w, h);
     170             :     }
     171             :   } else {
     172           0 :     av1_convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params,
     173             :                       subpel_y_q4, y_step_q4, conv_params);
     174             :   }
     175           0 : }
     176             : 
     177           0 : void av1_convolve_vert_facade_c(const uint8_t *src, int src_stride,
     178             :                                 uint8_t *dst, int dst_stride, int w, int h,
     179             :                                 const InterpFilterParams filter_params,
     180             :                                 const int subpel_y_q4, int y_step_q4,
     181             :                                 ConvolveParams *conv_params) {
     182           0 :   assert(conv_params->round == CONVOLVE_OPT_ROUND);
     183           0 :   if (filter_params.taps == SUBPEL_TAPS) {
     184           0 :     const int16_t *filter_y =
     185             :         av1_get_interp_filter_subpel_kernel(filter_params, subpel_y_q4);
     186           0 :     if (conv_params->ref == 0) {
     187           0 :       aom_convolve8_vert_c(src, src_stride, dst, dst_stride, NULL, -1, filter_y,
     188             :                            y_step_q4, w, h);
     189             :     } else {
     190           0 :       aom_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, NULL, -1,
     191             :                                filter_y, y_step_q4, w, h);
     192             :     }
     193             :   } else {
     194           0 :     av1_convolve_vert_c(src, src_stride, dst, dst_stride, w, h, filter_params,
     195             :                         subpel_y_q4, y_step_q4, conv_params);
     196             :   }
     197           0 : }
     198             : 
     199             : #if CONFIG_CONVOLVE_ROUND
     200             : void av1_convolve_rounding(const int32_t *src, int src_stride, uint8_t *dst,
     201             :                            int dst_stride, int w, int h, int bits) {
     202             :   int r, c;
     203             :   for (r = 0; r < h; ++r) {
     204             :     for (c = 0; c < w; ++c) {
     205             :       dst[r * dst_stride + c] =
     206             :           clip_pixel(ROUND_POWER_OF_TWO_SIGNED(src[r * src_stride + c], bits));
     207             :     }
     208             :   }
     209             : }
     210             : 
     211             : void av1_convolve_2d(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
     212             :                      int dst_stride, int w, int h,
     213             :                      InterpFilterParams *filter_params_x,
     214             :                      InterpFilterParams *filter_params_y, const int subpel_x_q4,
     215             :                      const int subpel_y_q4, ConvolveParams *conv_params) {
     216             :   int x, y, k;
     217             :   CONV_BUF_TYPE im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
     218             :   int im_h = h + filter_params_y->taps - 1;
     219             :   int im_stride = w;
     220             :   const int fo_vert = filter_params_y->taps / 2 - 1;
     221             :   const int fo_horiz = filter_params_x->taps / 2 - 1;
     222             :   (void)conv_params;
     223             :   // horizontal filter
     224             :   const uint8_t *src_horiz = src - fo_vert * src_stride;
     225             :   const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
     226             :       *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
     227             :   for (y = 0; y < im_h; ++y) {
     228             :     for (x = 0; x < w; ++x) {
     229             :       CONV_BUF_TYPE sum = 0;
     230             :       for (k = 0; k < filter_params_x->taps; ++k) {
     231             :         sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k];
     232             :       }
     233             : #if CONFIG_COMPOUND_ROUND
     234             :       im_block[y * im_stride + x] =
     235             :           clip_pixel(ROUND_POWER_OF_TWO_SIGNED(sum, conv_params->round_0));
     236             : #else
     237             :       im_block[y * im_stride + x] =
     238             :           ROUND_POWER_OF_TWO_SIGNED(sum, conv_params->round_0);
     239             : #endif
     240             :     }
     241             :   }
     242             : 
     243             :   // vertical filter
     244             :   CONV_BUF_TYPE *src_vert = im_block + fo_vert * im_stride;
     245             :   const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
     246             :       *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
     247             :   for (y = 0; y < h; ++y) {
     248             :     for (x = 0; x < w; ++x) {
     249             :       CONV_BUF_TYPE sum = 0;
     250             :       for (k = 0; k < filter_params_y->taps; ++k) {
     251             :         sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
     252             :       }
     253             :       dst[y * dst_stride + x] +=
     254             :           ROUND_POWER_OF_TWO_SIGNED(sum, conv_params->round_1);
     255             :     }
     256             :   }
     257             : }
     258             : 
     259             : static INLINE void transpose_uint8(uint8_t *dst, int dst_stride,
     260             :                                    const uint8_t *src, int src_stride, int w,
     261             :                                    int h) {
     262             :   int r, c;
     263             :   for (r = 0; r < h; ++r)
     264             :     for (c = 0; c < w; ++c)
     265             :       dst[c * (dst_stride) + r] = src[r * (src_stride) + c];
     266             : }
     267             : 
     268             : static INLINE void transpose_int32(int32_t *dst, int dst_stride,
     269             :                                    const int32_t *src, int src_stride, int w,
     270             :                                    int h) {
     271             :   int r, c;
     272             :   for (r = 0; r < h; ++r)
     273             :     for (c = 0; c < w; ++c)
     274             :       dst[c * (dst_stride) + r] = src[r * (src_stride) + c];
     275             : }
     276             : 
     277             : void av1_convolve_2d_facade(const uint8_t *src, int src_stride, uint8_t *dst,
     278             :                             int dst_stride, int w, int h,
     279             :                             const InterpFilter *interp_filter,
     280             :                             const int subpel_x_q4, int x_step_q4,
     281             :                             const int subpel_y_q4, int y_step_q4,
     282             :                             ConvolveParams *conv_params) {
     283             :   (void)x_step_q4;
     284             :   (void)y_step_q4;
     285             :   (void)dst;
     286             :   (void)dst_stride;
     287             : #if CONFIG_DUAL_FILTER
     288             :   InterpFilterParams filter_params_x =
     289             :       av1_get_interp_filter_params(interp_filter[1 + 2 * conv_params->ref]);
     290             :   InterpFilterParams filter_params_y =
     291             :       av1_get_interp_filter_params(interp_filter[0 + 2 * conv_params->ref]);
     292             : 
     293             : #if USE_EXTRA_FILTER
     294             :   if (filter_params_x.interp_filter == MULTITAP_SHARP &&
     295             :       filter_params_y.interp_filter == MULTITAP_SHARP) {
     296             :     // Avoid two directions both using 12-tap filter.
     297             :     // This will reduce hardware implementation cost.
     298             :     filter_params_y = av1_get_interp_filter_params(EIGHTTAP_SHARP);
     299             :   }
     300             : #endif  // USE_EXTRA_FILTER
     301             : #else
     302             :   InterpFilterParams filter_params_x =
     303             :       av1_get_interp_filter_params(*interp_filter);
     304             :   InterpFilterParams filter_params_y =
     305             :       av1_get_interp_filter_params(*interp_filter);
     306             : #endif
     307             : 
     308             :   if (filter_params_y.taps < filter_params_x.taps) {
     309             :     uint8_t tr_src[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) *
     310             :                    (MAX_SB_SIZE + MAX_FILTER_TAP - 1)];
     311             :     int tr_src_stride = MAX_SB_SIZE + MAX_FILTER_TAP - 1;
     312             :     CONV_BUF_TYPE tr_dst[MAX_SB_SIZE * MAX_SB_SIZE];
     313             :     int tr_dst_stride = MAX_SB_SIZE;
     314             :     int fo_vert = filter_params_y.taps / 2 - 1;
     315             :     int fo_horiz = filter_params_x.taps / 2 - 1;
     316             : 
     317             :     transpose_uint8(tr_src, tr_src_stride,
     318             :                     src - fo_vert * src_stride - fo_horiz, src_stride,
     319             :                     w + filter_params_x.taps - 1, h + filter_params_y.taps - 1);
     320             :     transpose_int32(tr_dst, tr_dst_stride, conv_params->dst,
     321             :                     conv_params->dst_stride, w, h);
     322             : 
     323             :     // horizontal and vertical parameters are swapped because of the transpose
     324             :     av1_convolve_2d(tr_src + fo_horiz * tr_src_stride + fo_vert, tr_src_stride,
     325             :                     tr_dst, tr_dst_stride, h, w, &filter_params_y,
     326             :                     &filter_params_x, subpel_y_q4, subpel_x_q4, conv_params);
     327             :     transpose_int32(conv_params->dst, conv_params->dst_stride, tr_dst,
     328             :                     tr_dst_stride, h, w);
     329             :   } else {
     330             :     av1_convolve_2d(src, src_stride, conv_params->dst, conv_params->dst_stride,
     331             :                     w, h, &filter_params_x, &filter_params_y, subpel_x_q4,
     332             :                     subpel_y_q4, conv_params);
     333             :   }
     334             : }
     335             : 
     336             : #if CONFIG_HIGHBITDEPTH
     337             : static INLINE void transpose_uint16(uint16_t *dst, int dst_stride,
     338             :                                     const uint16_t *src, int src_stride, int w,
     339             :                                     int h) {
     340             :   int r, c;
     341             :   for (r = 0; r < h; ++r)
     342             :     for (c = 0; c < w; ++c) dst[c * dst_stride + r] = src[r * src_stride + c];
     343             : }
     344             : 
     345             : void av1_highbd_convolve_rounding(const int32_t *src, int src_stride,
     346             :                                   uint8_t *dst8, int dst_stride, int w, int h,
     347             :                                   int bits, int bd) {
     348             :   uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
     349             :   int r, c;
     350             :   for (r = 0; r < h; ++r) {
     351             :     for (c = 0; c < w; ++c) {
     352             :       dst[r * dst_stride + c] = clip_pixel_highbd(
     353             :           ROUND_POWER_OF_TWO_SIGNED(src[r * src_stride + c], bits), bd);
     354             :     }
     355             :   }
     356             : }
     357             : 
     358             : void av1_highbd_convolve_2d(const uint16_t *src, int src_stride,
     359             :                             CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
     360             :                             InterpFilterParams *filter_params_x,
     361             :                             InterpFilterParams *filter_params_y,
     362             :                             const int subpel_x_q4, const int subpel_y_q4,
     363             :                             ConvolveParams *conv_params, int bd) {
     364             :   int x, y, k;
     365             :   CONV_BUF_TYPE im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
     366             :   int im_h = h + filter_params_y->taps - 1;
     367             :   int im_stride = w;
     368             :   const int fo_vert = filter_params_y->taps / 2 - 1;
     369             :   const int fo_horiz = filter_params_x->taps / 2 - 1;
     370             :   (void)conv_params;
     371             :   // horizontal filter
     372             :   const uint16_t *src_horiz = src - fo_vert * src_stride;
     373             :   const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
     374             :       *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
     375             :   for (y = 0; y < im_h; ++y) {
     376             :     for (x = 0; x < w; ++x) {
     377             :       CONV_BUF_TYPE sum = 0;
     378             :       for (k = 0; k < filter_params_x->taps; ++k) {
     379             :         sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k];
     380             :       }
     381             : #if CONFIG_COMPOUND_ROUND
     382             :       im_block[y * im_stride + x] = clip_pixel_highbd(
     383             :           ROUND_POWER_OF_TWO_SIGNED(sum, conv_params->round_0), bd);
     384             : #else
     385             :       (void)bd;
     386             :       im_block[y * im_stride + x] =
     387             :           ROUND_POWER_OF_TWO_SIGNED(sum, conv_params->round_0);
     388             : #endif
     389             :     }
     390             :   }
     391             : 
     392             :   // vertical filter
     393             :   CONV_BUF_TYPE *src_vert = im_block + fo_vert * im_stride;
     394             :   const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
     395             :       *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
     396             :   for (y = 0; y < h; ++y) {
     397             :     for (x = 0; x < w; ++x) {
     398             :       CONV_BUF_TYPE sum = 0;
     399             :       for (k = 0; k < filter_params_y->taps; ++k) {
     400             :         sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
     401             :       }
     402             :       dst[y * dst_stride + x] +=
     403             :           ROUND_POWER_OF_TWO_SIGNED(sum, conv_params->round_1);
     404             :     }
     405             :   }
     406             : }
     407             : 
     408             : void av1_highbd_convolve_2d_facade(const uint8_t *src8, int src_stride,
     409             :                                    uint8_t *dst, int dst_stride, int w, int h,
     410             :                                    const InterpFilter *interp_filter,
     411             :                                    const int subpel_x_q4, int x_step_q4,
     412             :                                    const int subpel_y_q4, int y_step_q4,
     413             :                                    ConvolveParams *conv_params, int bd) {
     414             :   (void)x_step_q4;
     415             :   (void)y_step_q4;
     416             :   (void)dst;
     417             :   (void)dst_stride;
     418             : #if CONFIG_DUAL_FILTER
     419             :   InterpFilterParams filter_params_x =
     420             :       av1_get_interp_filter_params(interp_filter[1 + 2 * conv_params->ref]);
     421             :   InterpFilterParams filter_params_y =
     422             :       av1_get_interp_filter_params(interp_filter[0 + 2 * conv_params->ref]);
     423             : 
     424             : #if USE_EXTRA_FILTER
     425             :   if (filter_params_x.interp_filter == MULTITAP_SHARP &&
     426             :       filter_params_y.interp_filter == MULTITAP_SHARP) {
     427             :     // Avoid two directions both using 12-tap filter.
     428             :     // This will reduce hardware implementation cost.
     429             :     filter_params_y = av1_get_interp_filter_params(EIGHTTAP_SHARP);
     430             :   }
     431             : #endif
     432             : #else
     433             :   InterpFilterParams filter_params_x =
     434             :       av1_get_interp_filter_params(*interp_filter);
     435             :   InterpFilterParams filter_params_y =
     436             :       av1_get_interp_filter_params(*interp_filter);
     437             : #endif
     438             :   const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
     439             :   if (filter_params_y.taps < filter_params_x.taps) {
     440             :     uint16_t tr_src[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) *
     441             :                     (MAX_SB_SIZE + MAX_FILTER_TAP - 1)];
     442             :     int tr_src_stride = MAX_SB_SIZE + MAX_FILTER_TAP - 1;
     443             :     CONV_BUF_TYPE tr_dst[MAX_SB_SIZE * MAX_SB_SIZE];
     444             :     int tr_dst_stride = MAX_SB_SIZE;
     445             :     int fo_vert = filter_params_y.taps / 2 - 1;
     446             :     int fo_horiz = filter_params_x.taps / 2 - 1;
     447             : 
     448             :     transpose_uint16(
     449             :         tr_src, tr_src_stride, src - fo_vert * src_stride - fo_horiz,
     450             :         src_stride, w + filter_params_x.taps - 1, h + filter_params_y.taps - 1);
     451             :     transpose_int32(tr_dst, tr_dst_stride, conv_params->dst,
     452             :                     conv_params->dst_stride, w, h);
     453             : 
     454             :     // horizontal and vertical parameters are swapped because of the transpose
     455             :     av1_highbd_convolve_2d(tr_src + fo_horiz * tr_src_stride + fo_vert,
     456             :                            tr_src_stride, tr_dst, tr_dst_stride, h, w,
     457             :                            &filter_params_y, &filter_params_x, subpel_y_q4,
     458             :                            subpel_x_q4, conv_params, bd);
     459             :     transpose_int32(conv_params->dst, conv_params->dst_stride, tr_dst,
     460             :                     tr_dst_stride, h, w);
     461             :   } else {
     462             :     av1_highbd_convolve_2d(src, src_stride, conv_params->dst,
     463             :                            conv_params->dst_stride, w, h, &filter_params_x,
     464             :                            &filter_params_y, subpel_x_q4, subpel_y_q4,
     465             :                            conv_params, bd);
     466             :   }
     467             : }
     468             : #endif  // CONFIG_HIGHBITDEPTH
     469             : 
     470             : #endif  // CONFIG_CONVOLVE_ROUND
     471             : 
     472             : typedef void (*ConvolveFunc)(const uint8_t *src, int src_stride, uint8_t *dst,
     473             :                              int dst_stride, int w, int h,
     474             :                              const InterpFilterParams filter_params,
     475             :                              const int subpel_q4, int step_q4,
     476             :                              ConvolveParams *conv_params);
     477             : 
     478           0 : static void convolve_helper(const uint8_t *src, int src_stride, uint8_t *dst,
     479             :                             int dst_stride, int w, int h,
     480             : #if CONFIG_DUAL_FILTER
     481             :                             const InterpFilter *interp_filter,
     482             : #else
     483             :                             const InterpFilter interp_filter,
     484             : #endif
     485             :                             const int subpel_x_q4, int x_step_q4,
     486             :                             const int subpel_y_q4, int y_step_q4,
     487             :                             ConvolveParams *conv_params,
     488             :                             ConvolveFunc convolve_horiz,
     489             :                             ConvolveFunc convolve_vert) {
     490           0 :   int ignore_horiz = x_step_q4 == 16 && subpel_x_q4 == 0;
     491           0 :   int ignore_vert = y_step_q4 == 16 && subpel_y_q4 == 0;
     492             : #if CONFIG_DUAL_FILTER
     493           0 :   InterpFilterParams filter_params_x =
     494           0 :       av1_get_interp_filter_params(interp_filter[1 + 2 * conv_params->ref]);
     495           0 :   InterpFilterParams filter_params_y =
     496           0 :       av1_get_interp_filter_params(interp_filter[0 + 2 * conv_params->ref]);
     497             :   InterpFilterParams filter_params;
     498             : #else
     499             :   InterpFilterParams filter_params =
     500             :       av1_get_interp_filter_params(interp_filter);
     501             : #endif
     502           0 :   assert(conv_params->round == CONVOLVE_OPT_ROUND);
     503             : 
     504           0 :   assert(w <= MAX_BLOCK_WIDTH);
     505           0 :   assert(h <= MAX_BLOCK_HEIGHT);
     506           0 :   assert(y_step_q4 <= MAX_STEP);
     507           0 :   assert(x_step_q4 <= MAX_STEP);
     508             : 
     509           0 :   if (ignore_horiz && ignore_vert) {
     510           0 :     convolve_copy(src, src_stride, dst, dst_stride, w, h, conv_params);
     511           0 :   } else if (ignore_vert) {
     512             : #if CONFIG_DUAL_FILTER
     513           0 :     filter_params = filter_params_x;
     514             : #endif
     515           0 :     assert(filter_params.taps <= MAX_FILTER_TAP);
     516           0 :     convolve_horiz(src, src_stride, dst, dst_stride, w, h, filter_params,
     517             :                    subpel_x_q4, x_step_q4, conv_params);
     518           0 :   } else if (ignore_horiz) {
     519             : #if CONFIG_DUAL_FILTER
     520           0 :     filter_params = filter_params_y;
     521             : #endif
     522           0 :     assert(filter_params.taps <= MAX_FILTER_TAP);
     523           0 :     convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params,
     524             :                   subpel_y_q4, y_step_q4, conv_params);
     525             :   } else {
     526             :     // temp's size is set to a 256 aligned value to facilitate SIMD
     527             :     // implementation. The value is greater than (maximum possible intermediate
     528             :     // height or width) * MAX_SB_SIZE
     529             :     DECLARE_ALIGNED(16, uint8_t,
     530             :                     temp[((MAX_SB_SIZE * 2 + 16) + 16) * MAX_SB_SIZE]);
     531           0 :     int max_intermediate_size = ((MAX_SB_SIZE * 2 + 16) + 16);
     532             :     int filter_size;
     533             : #if CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
     534             :     if (interp_filter[0 + 2 * conv_params->ref] == MULTITAP_SHARP &&
     535             :         interp_filter[1 + 2 * conv_params->ref] == MULTITAP_SHARP) {
     536             :       // Avoid two directions both using 12-tap filter.
     537             :       // This will reduce hardware implementation cost.
     538             :       filter_params_y = av1_get_interp_filter_params(EIGHTTAP_SHARP);
     539             :     }
     540             : 
     541             :     // we do filter with fewer taps first to reduce hardware implementation
     542             :     // complexity
     543             :     if (filter_params_y.taps < filter_params_x.taps) {
     544             :       int intermediate_width;
     545             :       int temp_stride = max_intermediate_size;
     546             :       ConvolveParams temp_conv_params;
     547             :       temp_conv_params.ref = 0;
     548             :       temp_conv_params.round = CONVOLVE_OPT_ROUND;
     549             :       filter_params = filter_params_y;
     550             :       filter_size = filter_params_x.taps;
     551             :       intermediate_width =
     552             :           (((w - 1) * x_step_q4 + subpel_x_q4) >> SUBPEL_BITS) + filter_size;
     553             :       assert(intermediate_width <= max_intermediate_size);
     554             : 
     555             :       assert(filter_params.taps <= MAX_FILTER_TAP);
     556             : 
     557             :       convolve_vert(src - (filter_size / 2 - 1), src_stride, temp, temp_stride,
     558             :                     intermediate_width, h, filter_params, subpel_y_q4,
     559             :                     y_step_q4, &temp_conv_params);
     560             : 
     561             :       filter_params = filter_params_x;
     562             :       assert(filter_params.taps <= MAX_FILTER_TAP);
     563             :       convolve_horiz(temp + (filter_size / 2 - 1), temp_stride, dst, dst_stride,
     564             :                      w, h, filter_params, subpel_x_q4, x_step_q4, conv_params);
     565             :     } else
     566             : #endif  // CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
     567             :     {
     568             :       int intermediate_height;
     569           0 :       int temp_stride = MAX_SB_SIZE;
     570             :       ConvolveParams temp_conv_params;
     571           0 :       temp_conv_params.ref = 0;
     572           0 :       temp_conv_params.round = CONVOLVE_OPT_ROUND;
     573             : #if CONFIG_DUAL_FILTER
     574           0 :       filter_params = filter_params_x;
     575           0 :       filter_size = filter_params_y.taps;
     576             : #else
     577             :       filter_size = filter_params.taps;
     578             : #endif
     579           0 :       intermediate_height =
     580           0 :           (((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
     581           0 :       assert(intermediate_height <= max_intermediate_size);
     582             :       (void)max_intermediate_size;
     583             : 
     584           0 :       assert(filter_params.taps <= MAX_FILTER_TAP);
     585             : 
     586           0 :       convolve_horiz(src - src_stride * (filter_size / 2 - 1), src_stride, temp,
     587             :                      temp_stride, w, intermediate_height, filter_params,
     588             :                      subpel_x_q4, x_step_q4, &temp_conv_params);
     589             : 
     590             : #if CONFIG_DUAL_FILTER
     591           0 :       filter_params = filter_params_y;
     592             : #endif
     593           0 :       assert(filter_params.taps <= MAX_FILTER_TAP);
     594             : 
     595           0 :       convolve_vert(temp + temp_stride * (filter_size / 2 - 1), temp_stride,
     596             :                     dst, dst_stride, w, h, filter_params, subpel_y_q4,
     597             :                     y_step_q4, conv_params);
     598             :     }
     599             :   }
     600           0 : }
     601             : 
     602           0 : void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
     603             :                   int dst_stride, int w, int h,
     604             : #if CONFIG_DUAL_FILTER
     605             :                   const InterpFilter *interp_filter,
     606             : #else
     607             :                   const InterpFilter interp_filter,
     608             : #endif
     609             :                   const int subpel_x_q4, int x_step_q4, const int subpel_y_q4,
     610             :                   int y_step_q4, ConvolveParams *conv_params) {
     611           0 :   convolve_helper(src, src_stride, dst, dst_stride, w, h, interp_filter,
     612             :                   subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, conv_params,
     613             :                   av1_convolve_horiz_facade, av1_convolve_vert_facade);
     614           0 : }
     615             : 
     616           0 : void av1_convolve_c(const uint8_t *src, int src_stride, uint8_t *dst,
     617             :                     int dst_stride, int w, int h,
     618             : #if CONFIG_DUAL_FILTER
     619             :                     const InterpFilter *interp_filter,
     620             : #else
     621             :                     const InterpFilter interp_filter,
     622             : #endif
     623             :                     const int subpel_x_q4, int x_step_q4, const int subpel_y_q4,
     624             :                     int y_step_q4, ConvolveParams *conv_params) {
     625           0 :   convolve_helper(src, src_stride, dst, dst_stride, w, h, interp_filter,
     626             :                   subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, conv_params,
     627             :                   av1_convolve_horiz_facade_c, av1_convolve_vert_facade_c);
     628           0 : }
     629             : 
     630           0 : void av1_lowbd_convolve_init_c(void) {
     631             :   // A placeholder for SIMD initialization
     632           0 :   return;
     633             : }
     634             : 
     635           0 : void av1_highbd_convolve_init_c(void) {
     636             :   // A placeholder for SIMD initialization
     637           0 :   return;
     638             : }
     639             : 
     640           0 : void av1_convolve_init(AV1_COMMON *cm) {
     641             : #if CONFIG_HIGHBITDEPTH
     642           0 :   if (cm->use_highbitdepth)
     643           0 :     av1_highbd_convolve_init();
     644             :   else
     645           0 :     av1_lowbd_convolve_init();
     646             : #else
     647             :   (void)cm;
     648             :   av1_lowbd_convolve_init();
     649             : #endif
     650           0 :   return;
     651             : }
     652             : 
     653             : #if CONFIG_HIGHBITDEPTH
     654           0 : void av1_highbd_convolve_horiz_c(const uint16_t *src, int src_stride,
     655             :                                  uint16_t *dst, int dst_stride, int w, int h,
     656             :                                  const InterpFilterParams filter_params,
     657             :                                  const int subpel_x_q4, int x_step_q4, int avg,
     658             :                                  int bd) {
     659             :   int x, y;
     660           0 :   int filter_size = filter_params.taps;
     661           0 :   src -= filter_size / 2 - 1;
     662           0 :   for (y = 0; y < h; ++y) {
     663           0 :     int x_q4 = subpel_x_q4;
     664           0 :     for (x = 0; x < w; ++x) {
     665           0 :       const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
     666           0 :       const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
     667             :           filter_params, x_q4 & SUBPEL_MASK);
     668           0 :       int k, sum = 0;
     669           0 :       for (k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k];
     670           0 :       if (avg)
     671           0 :         dst[x] = ROUND_POWER_OF_TWO(
     672             :             dst[x] +
     673             :                 clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),
     674             :             1);
     675             :       else
     676           0 :         dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
     677           0 :       x_q4 += x_step_q4;
     678             :     }
     679           0 :     src += src_stride;
     680           0 :     dst += dst_stride;
     681             :   }
     682           0 : }
     683             : 
     684           0 : void av1_highbd_convolve_vert_c(const uint16_t *src, int src_stride,
     685             :                                 uint16_t *dst, int dst_stride, int w, int h,
     686             :                                 const InterpFilterParams filter_params,
     687             :                                 const int subpel_y_q4, int y_step_q4, int avg,
     688             :                                 int bd) {
     689             :   int x, y;
     690           0 :   int filter_size = filter_params.taps;
     691           0 :   src -= src_stride * (filter_size / 2 - 1);
     692             : 
     693           0 :   for (x = 0; x < w; ++x) {
     694           0 :     int y_q4 = subpel_y_q4;
     695           0 :     for (y = 0; y < h; ++y) {
     696           0 :       const uint16_t *const src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
     697           0 :       const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
     698             :           filter_params, y_q4 & SUBPEL_MASK);
     699           0 :       int k, sum = 0;
     700           0 :       for (k = 0; k < filter_size; ++k)
     701           0 :         sum += src_y[k * src_stride] * y_filter[k];
     702           0 :       if (avg) {
     703           0 :         dst[y * dst_stride] = ROUND_POWER_OF_TWO(
     704             :             dst[y * dst_stride] +
     705             :                 clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),
     706             :             1);
     707             :       } else {
     708           0 :         dst[y * dst_stride] =
     709           0 :             clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
     710             :       }
     711           0 :       y_q4 += y_step_q4;
     712             :     }
     713           0 :     ++src;
     714           0 :     ++dst;
     715             :   }
     716           0 : }
     717             : 
     718           0 : static void highbd_convolve_copy(const uint16_t *src, int src_stride,
     719             :                                  uint16_t *dst, int dst_stride, int w, int h,
     720             :                                  int avg, int bd) {
     721           0 :   if (avg == 0) {
     722             :     int r;
     723           0 :     for (r = 0; r < h; ++r) {
     724           0 :       memcpy(dst, src, w * sizeof(*src));
     725           0 :       src += src_stride;
     726           0 :       dst += dst_stride;
     727             :     }
     728             :   } else {
     729             :     int r, c;
     730           0 :     for (r = 0; r < h; ++r) {
     731           0 :       for (c = 0; c < w; ++c) {
     732           0 :         dst[c] = clip_pixel_highbd(ROUND_POWER_OF_TWO(dst[c] + src[c], 1), bd);
     733             :       }
     734           0 :       src += src_stride;
     735           0 :       dst += dst_stride;
     736             :     }
     737             :   }
     738           0 : }
     739             : 
     740           0 : void av1_highbd_convolve_horiz_facade(const uint8_t *src8, int src_stride,
     741             :                                       uint8_t *dst8, int dst_stride, int w,
     742             :                                       int h,
     743             :                                       const InterpFilterParams filter_params,
     744             :                                       const int subpel_x_q4, int x_step_q4,
     745             :                                       int avg, int bd) {
     746           0 :   uint16_t *src = CONVERT_TO_SHORTPTR(src8);
     747           0 :   uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
     748           0 :   if (filter_params.taps == SUBPEL_TAPS) {
     749           0 :     const int16_t *filter_x =
     750             :         av1_get_interp_filter_subpel_kernel(filter_params, subpel_x_q4);
     751           0 :     if (avg == 0)
     752           0 :       aom_highbd_convolve8_horiz(src8, src_stride, dst8, dst_stride, filter_x,
     753             :                                  x_step_q4, NULL, -1, w, h, bd);
     754             :     else
     755           0 :       aom_highbd_convolve8_avg_horiz(src8, src_stride, dst8, dst_stride,
     756             :                                      filter_x, x_step_q4, NULL, -1, w, h, bd);
     757             :   } else {
     758           0 :     av1_highbd_convolve_horiz(src, src_stride, dst, dst_stride, w, h,
     759             :                               filter_params, subpel_x_q4, x_step_q4, avg, bd);
     760             :   }
     761           0 : }
     762             : 
     763           0 : void av1_highbd_convolve_vert_facade(const uint8_t *src8, int src_stride,
     764             :                                      uint8_t *dst8, int dst_stride, int w,
     765             :                                      int h,
     766             :                                      const InterpFilterParams filter_params,
     767             :                                      const int subpel_y_q4, int y_step_q4,
     768             :                                      int avg, int bd) {
     769           0 :   uint16_t *src = CONVERT_TO_SHORTPTR(src8);
     770           0 :   uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
     771             : 
     772           0 :   if (filter_params.taps == SUBPEL_TAPS) {
     773           0 :     const int16_t *filter_y =
     774             :         av1_get_interp_filter_subpel_kernel(filter_params, subpel_y_q4);
     775           0 :     if (avg == 0) {
     776           0 :       aom_highbd_convolve8_vert(src8, src_stride, dst8, dst_stride, NULL, -1,
     777             :                                 filter_y, y_step_q4, w, h, bd);
     778             :     } else {
     779           0 :       aom_highbd_convolve8_avg_vert(src8, src_stride, dst8, dst_stride, NULL,
     780             :                                     -1, filter_y, y_step_q4, w, h, bd);
     781             :     }
     782             :   } else {
     783           0 :     av1_highbd_convolve_vert(src, src_stride, dst, dst_stride, w, h,
     784             :                              filter_params, subpel_y_q4, y_step_q4, avg, bd);
     785             :   }
     786           0 : }
     787             : 
     788           0 : void av1_highbd_convolve(const uint8_t *src8, int src_stride, uint8_t *dst8,
     789             :                          int dst_stride, int w, int h,
     790             : #if CONFIG_DUAL_FILTER
     791             :                          const InterpFilter *interp_filter,
     792             : #else
     793             :                          const InterpFilter interp_filter,
     794             : #endif
     795             :                          const int subpel_x_q4, int x_step_q4,
     796             :                          const int subpel_y_q4, int y_step_q4, int ref_idx,
     797             :                          int bd) {
     798           0 :   uint16_t *src = CONVERT_TO_SHORTPTR(src8);
     799           0 :   uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
     800           0 :   int ignore_horiz = x_step_q4 == 16 && subpel_x_q4 == 0;
     801           0 :   int ignore_vert = y_step_q4 == 16 && subpel_y_q4 == 0;
     802             : 
     803           0 :   assert(w <= MAX_BLOCK_WIDTH);
     804           0 :   assert(h <= MAX_BLOCK_HEIGHT);
     805           0 :   assert(y_step_q4 <= MAX_STEP);
     806           0 :   assert(x_step_q4 <= MAX_STEP);
     807             : 
     808           0 :   if (ignore_horiz && ignore_vert) {
     809           0 :     highbd_convolve_copy(src, src_stride, dst, dst_stride, w, h, ref_idx, bd);
     810           0 :   } else if (ignore_vert) {
     811             : #if CONFIG_DUAL_FILTER
     812           0 :     InterpFilterParams filter_params =
     813           0 :         av1_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
     814             : #else
     815             :     InterpFilterParams filter_params =
     816             :         av1_get_interp_filter_params(interp_filter);
     817             : #endif
     818           0 :     av1_highbd_convolve_horiz_facade(src8, src_stride, dst8, dst_stride, w, h,
     819             :                                      filter_params, subpel_x_q4, x_step_q4,
     820             :                                      ref_idx, bd);
     821           0 :   } else if (ignore_horiz) {
     822             : #if CONFIG_DUAL_FILTER
     823           0 :     InterpFilterParams filter_params =
     824           0 :         av1_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]);
     825             : #else
     826             :     InterpFilterParams filter_params =
     827             :         av1_get_interp_filter_params(interp_filter);
     828             : #endif
     829           0 :     av1_highbd_convolve_vert_facade(src8, src_stride, dst8, dst_stride, w, h,
     830             :                                     filter_params, subpel_y_q4, y_step_q4,
     831             :                                     ref_idx, bd);
     832             :   } else {
     833             :     // temp's size is set to a 256 aligned value to facilitate SIMD
     834             :     // implementation. The value is greater than (maximum possible intermediate
     835             :     // height or width) * MAX_SB_SIZE
     836             :     DECLARE_ALIGNED(16, uint16_t,
     837             :                     temp[((MAX_SB_SIZE * 2 + 16) + 16) * MAX_SB_SIZE]);
     838           0 :     uint8_t *temp8 = CONVERT_TO_BYTEPTR(temp);
     839           0 :     int max_intermediate_size = ((MAX_SB_SIZE * 2 + 16) + 16);
     840             :     int filter_size;
     841             :     InterpFilterParams filter_params;
     842             : #if CONFIG_DUAL_FILTER
     843           0 :     InterpFilterParams filter_params_x =
     844           0 :         av1_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
     845           0 :     InterpFilterParams filter_params_y =
     846           0 :         av1_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]);
     847             : #endif
     848             : 
     849             : #if CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
     850             :     if (interp_filter[0 + 2 * ref_idx] == MULTITAP_SHARP &&
     851             :         interp_filter[1 + 2 * ref_idx] == MULTITAP_SHARP) {
     852             :       // Avoid two directions both using 12-tap filter.
     853             :       // This will reduce hardware implementation cost.
     854             :       filter_params_y = av1_get_interp_filter_params(EIGHTTAP_SHARP);
     855             :     }
     856             :     if (filter_params_y.taps < filter_params_x.taps) {
     857             :       int intermediate_width;
     858             :       int temp_stride = max_intermediate_size;
     859             :       filter_params = filter_params_y;
     860             :       filter_size = filter_params_x.taps;
     861             :       intermediate_width =
     862             :           (((w - 1) * x_step_q4 + subpel_x_q4) >> SUBPEL_BITS) + filter_size;
     863             :       assert(intermediate_width <= max_intermediate_size);
     864             : 
     865             :       assert(filter_params.taps <= MAX_FILTER_TAP);
     866             : 
     867             :       av1_highbd_convolve_vert_facade(
     868             :           src8 - (filter_size / 2 - 1), src_stride, temp8, temp_stride,
     869             :           intermediate_width, h, filter_params, subpel_y_q4, y_step_q4, 0, bd);
     870             : 
     871             :       filter_params = filter_params_x;
     872             :       assert(filter_params.taps <= MAX_FILTER_TAP);
     873             : 
     874             :       av1_highbd_convolve_horiz_facade(
     875             :           temp8 + (filter_size / 2 - 1), temp_stride, dst8, dst_stride, w, h,
     876             :           filter_params, subpel_x_q4, x_step_q4, ref_idx, bd);
     877             :     } else
     878             : #endif  // CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
     879             :     {
     880             :       int intermediate_height;
     881           0 :       int temp_stride = MAX_SB_SIZE;
     882             : #if CONFIG_DUAL_FILTER
     883           0 :       filter_params = filter_params_x;
     884           0 :       filter_size = filter_params_y.taps;
     885             : #else
     886             :       filter_params = av1_get_interp_filter_params(interp_filter);
     887             :       filter_size = filter_params.taps;
     888             : #endif
     889           0 :       intermediate_height =
     890           0 :           (((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
     891           0 :       assert(intermediate_height <= max_intermediate_size);
     892             :       (void)max_intermediate_size;
     893             : 
     894           0 :       av1_highbd_convolve_horiz_facade(
     895           0 :           src8 - src_stride * (filter_size / 2 - 1), src_stride, temp8,
     896             :           temp_stride, w, intermediate_height, filter_params, subpel_x_q4,
     897             :           x_step_q4, 0, bd);
     898             : 
     899             : #if CONFIG_DUAL_FILTER
     900           0 :       filter_params = filter_params_y;
     901             : #endif
     902           0 :       filter_size = filter_params.taps;
     903           0 :       assert(filter_params.taps <= MAX_FILTER_TAP);
     904             : 
     905           0 :       av1_highbd_convolve_vert_facade(
     906           0 :           temp8 + temp_stride * (filter_size / 2 - 1), temp_stride, dst8,
     907             :           dst_stride, w, h, filter_params, subpel_y_q4, y_step_q4, ref_idx, bd);
     908             :     }
     909             :   }
     910           0 : }
     911             : #endif  // CONFIG_HIGHBITDEPTH

Generated by: LCOV version 1.13