LCOV - output.info - media/libvpx/libvpx/vpx_dsp/vpx

LCOV - code coverage report

Current view:	top level - media/libvpx/libvpx/vpx_dsp - vpx_convolve.c (source / functions)		Hit	Total	Coverage
Test:	output.info	Lines:	0	131	0.0 %
Date:	2017-07-14 16:53:18	Functions:	0	19	0.0 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : /*
       2             :  *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
       3             :  *
       4             :  *  Use of this source code is governed by a BSD-style license
       5             :  *  that can be found in the LICENSE file in the root of the source
       6             :  *  tree. An additional intellectual property rights grant can be found
       7             :  *  in the file PATENTS.  All contributing project authors may
       8             :  *  be found in the AUTHORS file in the root of the source tree.
       9             :  */
      10             : 
      11             : #include <assert.h>
      12             : #include <string.h>
      13             : 
      14             : #include "./vpx_config.h"
      15             : #include "./vpx_dsp_rtcd.h"
      16             : #include "vpx/vpx_integer.h"
      17             : #include "vpx_dsp/vpx_convolve.h"
      18             : #include "vpx_dsp/vpx_dsp_common.h"
      19             : #include "vpx_dsp/vpx_filter.h"
      20             : #include "vpx_ports/mem.h"
      21             : 
      22           0 : static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride,
      23             :                            uint8_t *dst, ptrdiff_t dst_stride,
      24             :                            const InterpKernel *x_filters, int x0_q4,
      25             :                            int x_step_q4, int w, int h) {
      26             :   int x, y;
      27           0 :   src -= SUBPEL_TAPS / 2 - 1;
      28             : 
      29           0 :   for (y = 0; y < h; ++y) {
      30           0 :     int x_q4 = x0_q4;
      31           0 :     for (x = 0; x < w; ++x) {
      32           0 :       const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
      33           0 :       const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
      34           0 :       int k, sum = 0;
      35           0 :       for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_x[k] * x_filter[k];
      36           0 :       dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
      37           0 :       x_q4 += x_step_q4;
      38             :     }
      39           0 :     src += src_stride;
      40           0 :     dst += dst_stride;
      41             :   }
      42           0 : }
      43             : 
      44           0 : static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride,
      45             :                                uint8_t *dst, ptrdiff_t dst_stride,
      46             :                                const InterpKernel *x_filters, int x0_q4,
      47             :                                int x_step_q4, int w, int h) {
      48             :   int x, y;
      49           0 :   src -= SUBPEL_TAPS / 2 - 1;
      50             : 
      51           0 :   for (y = 0; y < h; ++y) {
      52           0 :     int x_q4 = x0_q4;
      53           0 :     for (x = 0; x < w; ++x) {
      54           0 :       const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
      55           0 :       const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
      56           0 :       int k, sum = 0;
      57           0 :       for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_x[k] * x_filter[k];
      58           0 :       dst[x] = ROUND_POWER_OF_TWO(
      59             :           dst[x] + clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
      60           0 :       x_q4 += x_step_q4;
      61             :     }
      62           0 :     src += src_stride;
      63           0 :     dst += dst_stride;
      64             :   }
      65           0 : }
      66             : 
      67           0 : static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride,
      68             :                           uint8_t *dst, ptrdiff_t dst_stride,
      69             :                           const InterpKernel *y_filters, int y0_q4,
      70             :                           int y_step_q4, int w, int h) {
      71             :   int x, y;
      72           0 :   src -= src_stride * (SUBPEL_TAPS / 2 - 1);
      73             : 
      74           0 :   for (x = 0; x < w; ++x) {
      75           0 :     int y_q4 = y0_q4;
      76           0 :     for (y = 0; y < h; ++y) {
      77           0 :       const uint8_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
      78           0 :       const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
      79           0 :       int k, sum = 0;
      80           0 :       for (k = 0; k < SUBPEL_TAPS; ++k)
      81           0 :         sum += src_y[k * src_stride] * y_filter[k];
      82           0 :       dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
      83           0 :       y_q4 += y_step_q4;
      84             :     }
      85           0 :     ++src;
      86           0 :     ++dst;
      87             :   }
      88           0 : }
      89             : 
      90           0 : static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride,
      91             :                               uint8_t *dst, ptrdiff_t dst_stride,
      92             :                               const InterpKernel *y_filters, int y0_q4,
      93             :                               int y_step_q4, int w, int h) {
      94             :   int x, y;
      95           0 :   src -= src_stride * (SUBPEL_TAPS / 2 - 1);
      96             : 
      97           0 :   for (x = 0; x < w; ++x) {
      98           0 :     int y_q4 = y0_q4;
      99           0 :     for (y = 0; y < h; ++y) {
     100           0 :       const uint8_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
     101           0 :       const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
     102           0 :       int k, sum = 0;
     103           0 :       for (k = 0; k < SUBPEL_TAPS; ++k)
     104           0 :         sum += src_y[k * src_stride] * y_filter[k];
     105           0 :       dst[y * dst_stride] = ROUND_POWER_OF_TWO(
     106             :           dst[y * dst_stride] +
     107             :               clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)),
     108             :           1);
     109           0 :       y_q4 += y_step_q4;
     110             :     }
     111           0 :     ++src;
     112           0 :     ++dst;
     113             :   }
     114           0 : }
     115             : 
     116           0 : static void convolve(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
     117             :                      ptrdiff_t dst_stride, const InterpKernel *const x_filters,
     118             :                      int x0_q4, int x_step_q4,
     119             :                      const InterpKernel *const y_filters, int y0_q4,
     120             :                      int y_step_q4, int w, int h) {
     121             :   // Note: Fixed size intermediate buffer, temp, places limits on parameters.
     122             :   // 2d filtering proceeds in 2 steps:
     123             :   //   (1) Interpolate horizontally into an intermediate buffer, temp.
     124             :   //   (2) Interpolate temp vertically to derive the sub-pixel result.
     125             :   // Deriving the maximum number of rows in the temp buffer (135):
     126             :   // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).
     127             :   // --Largest block size is 64x64 pixels.
     128             :   // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the
     129             :   //   original frame (in 1/16th pixel units).
     130             :   // --Must round-up because block may be located at sub-pixel position.
     131             :   // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
     132             :   // --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
     133             :   uint8_t temp[64 * 135];
     134           0 :   const int intermediate_height =
     135           0 :       (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
     136             : 
     137           0 :   assert(w <= 64);
     138           0 :   assert(h <= 64);
     139           0 :   assert(y_step_q4 <= 32);
     140           0 :   assert(x_step_q4 <= 32);
     141             : 
     142           0 :   convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64,
     143             :                  x_filters, x0_q4, x_step_q4, w, intermediate_height);
     144           0 :   convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride,
     145             :                 y_filters, y0_q4, y_step_q4, w, h);
     146           0 : }
     147             : 
     148           0 : void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
     149             :                            uint8_t *dst, ptrdiff_t dst_stride,
     150             :                            const int16_t *filter_x, int x_step_q4,
     151             :                            const int16_t *filter_y, int y_step_q4, int w,
     152             :                            int h) {
     153           0 :   const InterpKernel *const filters_x = get_filter_base(filter_x);
     154           0 :   const int x0_q4 = get_filter_offset(filter_x, filters_x);
     155             : 
     156             :   (void)filter_y;
     157             :   (void)y_step_q4;
     158             : 
     159           0 :   convolve_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4, x_step_q4,
     160             :                  w, h);
     161           0 : }
     162             : 
     163           0 : void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
     164             :                                uint8_t *dst, ptrdiff_t dst_stride,
     165             :                                const int16_t *filter_x, int x_step_q4,
     166             :                                const int16_t *filter_y, int y_step_q4, int w,
     167             :                                int h) {
     168           0 :   const InterpKernel *const filters_x = get_filter_base(filter_x);
     169           0 :   const int x0_q4 = get_filter_offset(filter_x, filters_x);
     170             : 
     171             :   (void)filter_y;
     172             :   (void)y_step_q4;
     173             : 
     174           0 :   convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4,
     175             :                      x_step_q4, w, h);
     176           0 : }
     177             : 
     178           0 : void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
     179             :                           uint8_t *dst, ptrdiff_t dst_stride,
     180             :                           const int16_t *filter_x, int x_step_q4,
     181             :                           const int16_t *filter_y, int y_step_q4, int w,
     182             :                           int h) {
     183           0 :   const InterpKernel *const filters_y = get_filter_base(filter_y);
     184           0 :   const int y0_q4 = get_filter_offset(filter_y, filters_y);
     185             : 
     186             :   (void)filter_x;
     187             :   (void)x_step_q4;
     188             : 
     189           0 :   convolve_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4, y_step_q4,
     190             :                 w, h);
     191           0 : }
     192             : 
     193           0 : void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
     194             :                               uint8_t *dst, ptrdiff_t dst_stride,
     195             :                               const int16_t *filter_x, int x_step_q4,
     196             :                               const int16_t *filter_y, int y_step_q4, int w,
     197             :                               int h) {
     198           0 :   const InterpKernel *const filters_y = get_filter_base(filter_y);
     199           0 :   const int y0_q4 = get_filter_offset(filter_y, filters_y);
     200             : 
     201             :   (void)filter_x;
     202             :   (void)x_step_q4;
     203             : 
     204           0 :   convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4,
     205             :                     y_step_q4, w, h);
     206           0 : }
     207             : 
     208           0 : void vpx_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
     209             :                      ptrdiff_t dst_stride, const int16_t *filter_x,
     210             :                      int x_step_q4, const int16_t *filter_y, int y_step_q4,
     211             :                      int w, int h) {
     212           0 :   const InterpKernel *const filters_x = get_filter_base(filter_x);
     213           0 :   const int x0_q4 = get_filter_offset(filter_x, filters_x);
     214           0 :   const InterpKernel *const filters_y = get_filter_base(filter_y);
     215           0 :   const int y0_q4 = get_filter_offset(filter_y, filters_y);
     216             : 
     217           0 :   convolve(src, src_stride, dst, dst_stride, filters_x, x0_q4, x_step_q4,
     218             :            filters_y, y0_q4, y_step_q4, w, h);
     219           0 : }
     220             : 
     221           0 : void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
     222             :                          ptrdiff_t dst_stride, const int16_t *filter_x,
     223             :                          int x_step_q4, const int16_t *filter_y, int y_step_q4,
     224             :                          int w, int h) {
     225             :   // Fixed size intermediate buffer places limits on parameters.
     226             :   DECLARE_ALIGNED(16, uint8_t, temp[64 * 64]);
     227           0 :   assert(w <= 64);
     228           0 :   assert(h <= 64);
     229             : 
     230           0 :   vpx_convolve8_c(src, src_stride, temp, 64, filter_x, x_step_q4, filter_y,
     231             :                   y_step_q4, w, h);
     232           0 :   vpx_convolve_avg_c(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h);
     233           0 : }
     234             : 
     235           0 : void vpx_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
     236             :                          ptrdiff_t dst_stride, const int16_t *filter_x,
     237             :                          int filter_x_stride, const int16_t *filter_y,
     238             :                          int filter_y_stride, int w, int h) {
     239             :   int r;
     240             : 
     241             :   (void)filter_x;
     242             :   (void)filter_x_stride;
     243             :   (void)filter_y;
     244             :   (void)filter_y_stride;
     245             : 
     246           0 :   for (r = h; r > 0; --r) {
     247           0 :     memcpy(dst, src, w);
     248           0 :     src += src_stride;
     249           0 :     dst += dst_stride;
     250             :   }
     251           0 : }
     252             : 
     253           0 : void vpx_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
     254             :                         ptrdiff_t dst_stride, const int16_t *filter_x,
     255             :                         int filter_x_stride, const int16_t *filter_y,
     256             :                         int filter_y_stride, int w, int h) {
     257             :   int x, y;
     258             : 
     259             :   (void)filter_x;
     260             :   (void)filter_x_stride;
     261             :   (void)filter_y;
     262             :   (void)filter_y_stride;
     263             : 
     264           0 :   for (y = 0; y < h; ++y) {
     265           0 :     for (x = 0; x < w; ++x) dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);
     266           0 :     src += src_stride;
     267           0 :     dst += dst_stride;
     268             :   }
     269           0 : }
     270             : 
     271           0 : void vpx_scaled_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
     272             :                         ptrdiff_t dst_stride, const int16_t *filter_x,
     273             :                         int x_step_q4, const int16_t *filter_y, int y_step_q4,
     274             :                         int w, int h) {
     275           0 :   vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
     276             :                         filter_y, y_step_q4, w, h);
     277           0 : }
     278             : 
     279           0 : void vpx_scaled_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
     280             :                        ptrdiff_t dst_stride, const int16_t *filter_x,
     281             :                        int x_step_q4, const int16_t *filter_y, int y_step_q4,
     282             :                        int w, int h) {
     283           0 :   vpx_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
     284             :                        filter_y, y_step_q4, w, h);
     285           0 : }
     286             : 
     287           0 : void vpx_scaled_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
     288             :                      ptrdiff_t dst_stride, const int16_t *filter_x,
     289             :                      int x_step_q4, const int16_t *filter_y, int y_step_q4,
     290             :                      int w, int h) {
     291           0 :   vpx_convolve8_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
     292             :                   filter_y, y_step_q4, w, h);
     293           0 : }
     294             : 
     295           0 : void vpx_scaled_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
     296             :                             uint8_t *dst, ptrdiff_t dst_stride,
     297             :                             const int16_t *filter_x, int x_step_q4,
     298             :                             const int16_t *filter_y, int y_step_q4, int w,
     299             :                             int h) {
     300           0 :   vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x,
     301             :                             x_step_q4, filter_y, y_step_q4, w, h);
     302           0 : }
     303             : 
     304           0 : void vpx_scaled_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
     305             :                            uint8_t *dst, ptrdiff_t dst_stride,
     306             :                            const int16_t *filter_x, int x_step_q4,
     307             :                            const int16_t *filter_y, int y_step_q4, int w,
     308             :                            int h) {
     309           0 :   vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x,
     310             :                            x_step_q4, filter_y, y_step_q4, w, h);
     311           0 : }
     312             : 
     313           0 : void vpx_scaled_avg_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
     314             :                          ptrdiff_t dst_stride, const int16_t *filter_x,
     315             :                          int x_step_q4, const int16_t *filter_y, int y_step_q4,
     316             :                          int w, int h) {
     317           0 :   vpx_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
     318             :                       filter_y, y_step_q4, w, h);
     319           0 : }
     320             : 
     321             : #if CONFIG_VP9_HIGHBITDEPTH
     322             : static void highbd_convolve_horiz(const uint8_t *src8, ptrdiff_t src_stride,
     323             :                                   uint8_t *dst8, ptrdiff_t dst_stride,
     324             :                                   const InterpKernel *x_filters, int x0_q4,
     325             :                                   int x_step_q4, int w, int h, int bd) {
     326             :   int x, y;
     327             :   const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
     328             :   uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
     329             :   src -= SUBPEL_TAPS / 2 - 1;
     330             : 
     331             :   for (y = 0; y < h; ++y) {
     332             :     int x_q4 = x0_q4;
     333             :     for (x = 0; x < w; ++x) {
     334             :       const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
     335             :       const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
     336             :       int k, sum = 0;
     337             :       for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_x[k] * x_filter[k];
     338             :       dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
     339             :       x_q4 += x_step_q4;
     340             :     }
     341             :     src += src_stride;
     342             :     dst += dst_stride;
     343             :   }
     344             : }
     345             : 
     346             : static void highbd_convolve_avg_horiz(const uint8_t *src8, ptrdiff_t src_stride,
     347             :                                       uint8_t *dst8, ptrdiff_t dst_stride,
     348             :                                       const InterpKernel *x_filters, int x0_q4,
     349             :                                       int x_step_q4, int w, int h, int bd) {
     350             :   int x, y;
     351             :   const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
     352             :   uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
     353             :   src -= SUBPEL_TAPS / 2 - 1;
     354             : 
     355             :   for (y = 0; y < h; ++y) {
     356             :     int x_q4 = x0_q4;
     357             :     for (x = 0; x < w; ++x) {
     358             :       const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
     359             :       const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
     360             :       int k, sum = 0;
     361             :       for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_x[k] * x_filter[k];
     362             :       dst[x] = ROUND_POWER_OF_TWO(
     363             :           dst[x] + clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),
     364             :           1);
     365             :       x_q4 += x_step_q4;
     366             :     }
     367             :     src += src_stride;
     368             :     dst += dst_stride;
     369             :   }
     370             : }
     371             : 
     372             : static void highbd_convolve_vert(const uint8_t *src8, ptrdiff_t src_stride,
     373             :                                  uint8_t *dst8, ptrdiff_t dst_stride,
     374             :                                  const InterpKernel *y_filters, int y0_q4,
     375             :                                  int y_step_q4, int w, int h, int bd) {
     376             :   int x, y;
     377             :   const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
     378             :   uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
     379             :   src -= src_stride * (SUBPEL_TAPS / 2 - 1);
     380             : 
     381             :   for (x = 0; x < w; ++x) {
     382             :     int y_q4 = y0_q4;
     383             :     for (y = 0; y < h; ++y) {
     384             :       const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
     385             :       const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
     386             :       int k, sum = 0;
     387             :       for (k = 0; k < SUBPEL_TAPS; ++k)
     388             :         sum += src_y[k * src_stride] * y_filter[k];
     389             :       dst[y * dst_stride] =
     390             :           clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
     391             :       y_q4 += y_step_q4;
     392             :     }
     393             :     ++src;
     394             :     ++dst;
     395             :   }
     396             : }
     397             : 
     398             : static void highbd_convolve_avg_vert(const uint8_t *src8, ptrdiff_t src_stride,
     399             :                                      uint8_t *dst8, ptrdiff_t dst_stride,
     400             :                                      const InterpKernel *y_filters, int y0_q4,
     401             :                                      int y_step_q4, int w, int h, int bd) {
     402             :   int x, y;
     403             :   const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
     404             :   uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
     405             :   src -= src_stride * (SUBPEL_TAPS / 2 - 1);
     406             : 
     407             :   for (x = 0; x < w; ++x) {
     408             :     int y_q4 = y0_q4;
     409             :     for (y = 0; y < h; ++y) {
     410             :       const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
     411             :       const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
     412             :       int k, sum = 0;
     413             :       for (k = 0; k < SUBPEL_TAPS; ++k)
     414             :         sum += src_y[k * src_stride] * y_filter[k];
     415             :       dst[y * dst_stride] = ROUND_POWER_OF_TWO(
     416             :           dst[y * dst_stride] +
     417             :               clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),
     418             :           1);
     419             :       y_q4 += y_step_q4;
     420             :     }
     421             :     ++src;
     422             :     ++dst;
     423             :   }
     424             : }
     425             : 
     426             : static void highbd_convolve(const uint8_t *src, ptrdiff_t src_stride,
     427             :                             uint8_t *dst, ptrdiff_t dst_stride,
     428             :                             const InterpKernel *const x_filters, int x0_q4,
     429             :                             int x_step_q4, const InterpKernel *const y_filters,
     430             :                             int y0_q4, int y_step_q4, int w, int h, int bd) {
     431             :   // Note: Fixed size intermediate buffer, temp, places limits on parameters.
     432             :   // 2d filtering proceeds in 2 steps:
     433             :   //   (1) Interpolate horizontally into an intermediate buffer, temp.
     434             :   //   (2) Interpolate temp vertically to derive the sub-pixel result.
     435             :   // Deriving the maximum number of rows in the temp buffer (135):
     436             :   // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).
     437             :   // --Largest block size is 64x64 pixels.
     438             :   // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the
     439             :   //   original frame (in 1/16th pixel units).
     440             :   // --Must round-up because block may be located at sub-pixel position.
     441             :   // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
     442             :   // --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
     443             :   uint16_t temp[64 * 135];
     444             :   const int intermediate_height =
     445             :       (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
     446             : 
     447             :   assert(w <= 64);
     448             :   assert(h <= 64);
     449             :   assert(y_step_q4 <= 32);
     450             :   assert(x_step_q4 <= 32);
     451             : 
     452             :   highbd_convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride,
     453             :                         CONVERT_TO_BYTEPTR(temp), 64, x_filters, x0_q4,
     454             :                         x_step_q4, w, intermediate_height, bd);
     455             :   highbd_convolve_vert(CONVERT_TO_BYTEPTR(temp) + 64 * (SUBPEL_TAPS / 2 - 1),
     456             :                        64, dst, dst_stride, y_filters, y0_q4, y_step_q4, w, h,
     457             :                        bd);
     458             : }
     459             : 
     460             : void vpx_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
     461             :                                   uint8_t *dst, ptrdiff_t dst_stride,
     462             :                                   const int16_t *filter_x, int x_step_q4,
     463             :                                   const int16_t *filter_y, int y_step_q4, int w,
     464             :                                   int h, int bd) {
     465             :   const InterpKernel *const filters_x = get_filter_base(filter_x);
     466             :   const int x0_q4 = get_filter_offset(filter_x, filters_x);
     467             : 
     468             :   (void)filter_y;
     469             :   (void)y_step_q4;
     470             : 
     471             :   highbd_convolve_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4,
     472             :                         x_step_q4, w, h, bd);
     473             : }
     474             : 
     475             : void vpx_highbd_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
     476             :                                       uint8_t *dst, ptrdiff_t dst_stride,
     477             :                                       const int16_t *filter_x, int x_step_q4,
     478             :                                       const int16_t *filter_y, int y_step_q4,
     479             :                                       int w, int h, int bd) {
     480             :   const InterpKernel *const filters_x = get_filter_base(filter_x);
     481             :   const int x0_q4 = get_filter_offset(filter_x, filters_x);
     482             : 
     483             :   (void)filter_y;
     484             :   (void)y_step_q4;
     485             : 
     486             :   highbd_convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4,
     487             :                             x_step_q4, w, h, bd);
     488             : }
     489             : 
     490             : void vpx_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
     491             :                                  uint8_t *dst, ptrdiff_t dst_stride,
     492             :                                  const int16_t *filter_x, int x_step_q4,
     493             :                                  const int16_t *filter_y, int y_step_q4, int w,
     494             :                                  int h, int bd) {
     495             :   const InterpKernel *const filters_y = get_filter_base(filter_y);
     496             :   const int y0_q4 = get_filter_offset(filter_y, filters_y);
     497             : 
     498             :   (void)filter_x;
     499             :   (void)x_step_q4;
     500             : 
     501             :   highbd_convolve_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4,
     502             :                        y_step_q4, w, h, bd);
     503             : }
     504             : 
     505             : void vpx_highbd_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
     506             :                                      uint8_t *dst, ptrdiff_t dst_stride,
     507             :                                      const int16_t *filter_x, int x_step_q4,
     508             :                                      const int16_t *filter_y, int y_step_q4,
     509             :                                      int w, int h, int bd) {
     510             :   const InterpKernel *const filters_y = get_filter_base(filter_y);
     511             :   const int y0_q4 = get_filter_offset(filter_y, filters_y);
     512             : 
     513             :   (void)filter_x;
     514             :   (void)x_step_q4;
     515             : 
     516             :   highbd_convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4,
     517             :                            y_step_q4, w, h, bd);
     518             : }
     519             : 
     520             : void vpx_highbd_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
     521             :                             uint8_t *dst, ptrdiff_t dst_stride,
     522             :                             const int16_t *filter_x, int x_step_q4,
     523             :                             const int16_t *filter_y, int y_step_q4, int w,
     524             :                             int h, int bd) {
     525             :   const InterpKernel *const filters_x = get_filter_base(filter_x);
     526             :   const int x0_q4 = get_filter_offset(filter_x, filters_x);
     527             :   const InterpKernel *const filters_y = get_filter_base(filter_y);
     528             :   const int y0_q4 = get_filter_offset(filter_y, filters_y);
     529             : 
     530             :   highbd_convolve(src, src_stride, dst, dst_stride, filters_x, x0_q4, x_step_q4,
     531             :                   filters_y, y0_q4, y_step_q4, w, h, bd);
     532             : }
     533             : 
     534             : void vpx_highbd_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
     535             :                                 uint8_t *dst, ptrdiff_t dst_stride,
     536             :                                 const int16_t *filter_x, int x_step_q4,
     537             :                                 const int16_t *filter_y, int y_step_q4, int w,
     538             :                                 int h, int bd) {
     539             :   // Fixed size intermediate buffer places limits on parameters.
     540             :   DECLARE_ALIGNED(16, uint16_t, temp[64 * 64]);
     541             :   assert(w <= 64);
     542             :   assert(h <= 64);
     543             : 
     544             :   vpx_highbd_convolve8_c(src, src_stride, CONVERT_TO_BYTEPTR(temp), 64,
     545             :                          filter_x, x_step_q4, filter_y, y_step_q4, w, h, bd);
     546             :   vpx_highbd_convolve_avg_c(CONVERT_TO_BYTEPTR(temp), 64, dst, dst_stride, NULL,
     547             :                             0, NULL, 0, w, h, bd);
     548             : }
     549             : 
     550             : void vpx_highbd_convolve_copy_c(const uint8_t *src8, ptrdiff_t src_stride,
     551             :                                 uint8_t *dst8, ptrdiff_t dst_stride,
     552             :                                 const int16_t *filter_x, int filter_x_stride,
     553             :                                 const int16_t *filter_y, int filter_y_stride,
     554             :                                 int w, int h, int bd) {
     555             :   int r;
     556             :   const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
     557             :   uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
     558             : 
     559             :   (void)filter_x;
     560             :   (void)filter_x_stride;
     561             :   (void)filter_y;
     562             :   (void)filter_y_stride;
     563             :   (void)bd;
     564             : 
     565             :   for (r = h; r > 0; --r) {
     566             :     memcpy(dst, src, w * sizeof(uint16_t));
     567             :     src += src_stride;
     568             :     dst += dst_stride;
     569             :   }
     570             : }
     571             : 
     572             : void vpx_highbd_convolve_avg_c(const uint8_t *src8, ptrdiff_t src_stride,
     573             :                                uint8_t *dst8, ptrdiff_t dst_stride,
     574             :                                const int16_t *filter_x, int filter_x_stride,
     575             :                                const int16_t *filter_y, int filter_y_stride,
     576             :                                int w, int h, int bd) {
     577             :   int x, y;
     578             :   const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
     579             :   uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
     580             : 
     581             :   (void)filter_x;
     582             :   (void)filter_x_stride;
     583             :   (void)filter_y;
     584             :   (void)filter_y_stride;
     585             :   (void)bd;
     586             : 
     587             :   for (y = 0; y < h; ++y) {
     588             :     for (x = 0; x < w; ++x) dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);
     589             :     src += src_stride;
     590             :     dst += dst_stride;
     591             :   }
     592             : }
     593             : #endif

Generated by: LCOV version 1.13