LCOV - code coverage report
Current view: top level - media/libvpx/libvpx/vp9/common - vp9_loopfilter.c (source / functions) Hit Total Coverage
Test: output.info Lines: 0 674 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 21 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
       3             :  *
       4             :  *  Use of this source code is governed by a BSD-style license
       5             :  *  that can be found in the LICENSE file in the root of the source
       6             :  *  tree. An additional intellectual property rights grant can be found
       7             :  *  in the file PATENTS.  All contributing project authors may
       8             :  *  be found in the AUTHORS file in the root of the source tree.
       9             :  */
      10             : 
      11             : #include "./vpx_config.h"
      12             : #include "./vpx_dsp_rtcd.h"
      13             : #include "vp9/common/vp9_loopfilter.h"
      14             : #include "vp9/common/vp9_onyxc_int.h"
      15             : #include "vp9/common/vp9_reconinter.h"
      16             : #include "vpx_dsp/vpx_dsp_common.h"
      17             : #include "vpx_mem/vpx_mem.h"
      18             : #include "vpx_ports/mem.h"
      19             : 
      20             : #include "vp9/common/vp9_seg_common.h"
      21             : 
      22             : // 64 bit masks for left transform size. Each 1 represents a position where
      23             : // we should apply a loop filter across the left border of an 8x8 block
      24             : // boundary.
      25             : //
      26             : // In the case of TX_16X16->  ( in low order byte first we end up with
      27             : // a mask that looks like this
      28             : //
      29             : //    10101010
      30             : //    10101010
      31             : //    10101010
      32             : //    10101010
      33             : //    10101010
      34             : //    10101010
      35             : //    10101010
      36             : //    10101010
      37             : //
      38             : // A loopfilter should be applied to every other 8x8 horizontally.
      39             : static const uint64_t left_64x64_txform_mask[TX_SIZES] = {
      40             :   0xffffffffffffffffULL,  // TX_4X4
      41             :   0xffffffffffffffffULL,  // TX_8x8
      42             :   0x5555555555555555ULL,  // TX_16x16
      43             :   0x1111111111111111ULL,  // TX_32x32
      44             : };
      45             : 
      46             : // 64 bit masks for above transform size. Each 1 represents a position where
      47             : // we should apply a loop filter across the top border of an 8x8 block
      48             : // boundary.
      49             : //
      50             : // In the case of TX_32x32 ->  ( in low order byte first we end up with
      51             : // a mask that looks like this
      52             : //
      53             : //    11111111
      54             : //    00000000
      55             : //    00000000
      56             : //    00000000
      57             : //    11111111
      58             : //    00000000
      59             : //    00000000
      60             : //    00000000
      61             : //
      62             : // A loopfilter should be applied to every other 4 the row vertically.
      63             : static const uint64_t above_64x64_txform_mask[TX_SIZES] = {
      64             :   0xffffffffffffffffULL,  // TX_4X4
      65             :   0xffffffffffffffffULL,  // TX_8x8
      66             :   0x00ff00ff00ff00ffULL,  // TX_16x16
      67             :   0x000000ff000000ffULL,  // TX_32x32
      68             : };
      69             : 
      70             : // 64 bit masks for prediction sizes (left). Each 1 represents a position
      71             : // where left border of an 8x8 block. These are aligned to the right most
      72             : // appropriate bit, and then shifted into place.
      73             : //
      74             : // In the case of TX_16x32 ->  ( low order byte first ) we end up with
      75             : // a mask that looks like this :
      76             : //
      77             : //  10000000
      78             : //  10000000
      79             : //  10000000
      80             : //  10000000
      81             : //  00000000
      82             : //  00000000
      83             : //  00000000
      84             : //  00000000
      85             : static const uint64_t left_prediction_mask[BLOCK_SIZES] = {
      86             :   0x0000000000000001ULL,  // BLOCK_4X4,
      87             :   0x0000000000000001ULL,  // BLOCK_4X8,
      88             :   0x0000000000000001ULL,  // BLOCK_8X4,
      89             :   0x0000000000000001ULL,  // BLOCK_8X8,
      90             :   0x0000000000000101ULL,  // BLOCK_8X16,
      91             :   0x0000000000000001ULL,  // BLOCK_16X8,
      92             :   0x0000000000000101ULL,  // BLOCK_16X16,
      93             :   0x0000000001010101ULL,  // BLOCK_16X32,
      94             :   0x0000000000000101ULL,  // BLOCK_32X16,
      95             :   0x0000000001010101ULL,  // BLOCK_32X32,
      96             :   0x0101010101010101ULL,  // BLOCK_32X64,
      97             :   0x0000000001010101ULL,  // BLOCK_64X32,
      98             :   0x0101010101010101ULL,  // BLOCK_64X64
      99             : };
     100             : 
     101             : // 64 bit mask to shift and set for each prediction size.
     102             : static const uint64_t above_prediction_mask[BLOCK_SIZES] = {
     103             :   0x0000000000000001ULL,  // BLOCK_4X4
     104             :   0x0000000000000001ULL,  // BLOCK_4X8
     105             :   0x0000000000000001ULL,  // BLOCK_8X4
     106             :   0x0000000000000001ULL,  // BLOCK_8X8
     107             :   0x0000000000000001ULL,  // BLOCK_8X16,
     108             :   0x0000000000000003ULL,  // BLOCK_16X8
     109             :   0x0000000000000003ULL,  // BLOCK_16X16
     110             :   0x0000000000000003ULL,  // BLOCK_16X32,
     111             :   0x000000000000000fULL,  // BLOCK_32X16,
     112             :   0x000000000000000fULL,  // BLOCK_32X32,
     113             :   0x000000000000000fULL,  // BLOCK_32X64,
     114             :   0x00000000000000ffULL,  // BLOCK_64X32,
     115             :   0x00000000000000ffULL,  // BLOCK_64X64
     116             : };
     117             : // 64 bit mask to shift and set for each prediction size. A bit is set for
     118             : // each 8x8 block that would be in the left most block of the given block
     119             : // size in the 64x64 block.
     120             : static const uint64_t size_mask[BLOCK_SIZES] = {
     121             :   0x0000000000000001ULL,  // BLOCK_4X4
     122             :   0x0000000000000001ULL,  // BLOCK_4X8
     123             :   0x0000000000000001ULL,  // BLOCK_8X4
     124             :   0x0000000000000001ULL,  // BLOCK_8X8
     125             :   0x0000000000000101ULL,  // BLOCK_8X16,
     126             :   0x0000000000000003ULL,  // BLOCK_16X8
     127             :   0x0000000000000303ULL,  // BLOCK_16X16
     128             :   0x0000000003030303ULL,  // BLOCK_16X32,
     129             :   0x0000000000000f0fULL,  // BLOCK_32X16,
     130             :   0x000000000f0f0f0fULL,  // BLOCK_32X32,
     131             :   0x0f0f0f0f0f0f0f0fULL,  // BLOCK_32X64,
     132             :   0x00000000ffffffffULL,  // BLOCK_64X32,
     133             :   0xffffffffffffffffULL,  // BLOCK_64X64
     134             : };
     135             : 
     136             : // These are used for masking the left and above borders.
     137             : static const uint64_t left_border = 0x1111111111111111ULL;
     138             : static const uint64_t above_border = 0x000000ff000000ffULL;
     139             : 
     140             : // 16 bit masks for uv transform sizes.
     141             : static const uint16_t left_64x64_txform_mask_uv[TX_SIZES] = {
     142             :   0xffff,  // TX_4X4
     143             :   0xffff,  // TX_8x8
     144             :   0x5555,  // TX_16x16
     145             :   0x1111,  // TX_32x32
     146             : };
     147             : 
     148             : static const uint16_t above_64x64_txform_mask_uv[TX_SIZES] = {
     149             :   0xffff,  // TX_4X4
     150             :   0xffff,  // TX_8x8
     151             :   0x0f0f,  // TX_16x16
     152             :   0x000f,  // TX_32x32
     153             : };
     154             : 
     155             : // 16 bit left mask to shift and set for each uv prediction size.
     156             : static const uint16_t left_prediction_mask_uv[BLOCK_SIZES] = {
     157             :   0x0001,  // BLOCK_4X4,
     158             :   0x0001,  // BLOCK_4X8,
     159             :   0x0001,  // BLOCK_8X4,
     160             :   0x0001,  // BLOCK_8X8,
     161             :   0x0001,  // BLOCK_8X16,
     162             :   0x0001,  // BLOCK_16X8,
     163             :   0x0001,  // BLOCK_16X16,
     164             :   0x0011,  // BLOCK_16X32,
     165             :   0x0001,  // BLOCK_32X16,
     166             :   0x0011,  // BLOCK_32X32,
     167             :   0x1111,  // BLOCK_32X64
     168             :   0x0011,  // BLOCK_64X32,
     169             :   0x1111,  // BLOCK_64X64
     170             : };
     171             : // 16 bit above mask to shift and set for uv each prediction size.
     172             : static const uint16_t above_prediction_mask_uv[BLOCK_SIZES] = {
     173             :   0x0001,  // BLOCK_4X4
     174             :   0x0001,  // BLOCK_4X8
     175             :   0x0001,  // BLOCK_8X4
     176             :   0x0001,  // BLOCK_8X8
     177             :   0x0001,  // BLOCK_8X16,
     178             :   0x0001,  // BLOCK_16X8
     179             :   0x0001,  // BLOCK_16X16
     180             :   0x0001,  // BLOCK_16X32,
     181             :   0x0003,  // BLOCK_32X16,
     182             :   0x0003,  // BLOCK_32X32,
     183             :   0x0003,  // BLOCK_32X64,
     184             :   0x000f,  // BLOCK_64X32,
     185             :   0x000f,  // BLOCK_64X64
     186             : };
     187             : 
     188             : // 64 bit mask to shift and set for each uv prediction size
     189             : static const uint16_t size_mask_uv[BLOCK_SIZES] = {
     190             :   0x0001,  // BLOCK_4X4
     191             :   0x0001,  // BLOCK_4X8
     192             :   0x0001,  // BLOCK_8X4
     193             :   0x0001,  // BLOCK_8X8
     194             :   0x0001,  // BLOCK_8X16,
     195             :   0x0001,  // BLOCK_16X8
     196             :   0x0001,  // BLOCK_16X16
     197             :   0x0011,  // BLOCK_16X32,
     198             :   0x0003,  // BLOCK_32X16,
     199             :   0x0033,  // BLOCK_32X32,
     200             :   0x3333,  // BLOCK_32X64,
     201             :   0x00ff,  // BLOCK_64X32,
     202             :   0xffff,  // BLOCK_64X64
     203             : };
     204             : static const uint16_t left_border_uv = 0x1111;
     205             : static const uint16_t above_border_uv = 0x000f;
     206             : 
     207             : static const int mode_lf_lut[MB_MODE_COUNT] = {
     208             :   0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // INTRA_MODES
     209             :   1, 1, 0, 1                     // INTER_MODES (ZEROMV == 0)
     210             : };
     211             : 
     212           0 : static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) {
     213             :   int lvl;
     214             : 
     215             :   // For each possible value for the loop filter fill out limits
     216           0 :   for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) {
     217             :     // Set loop filter parameters that control sharpness.
     218           0 :     int block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4));
     219             : 
     220           0 :     if (sharpness_lvl > 0) {
     221           0 :       if (block_inside_limit > (9 - sharpness_lvl))
     222           0 :         block_inside_limit = (9 - sharpness_lvl);
     223             :     }
     224             : 
     225           0 :     if (block_inside_limit < 1) block_inside_limit = 1;
     226             : 
     227           0 :     memset(lfi->lfthr[lvl].lim, block_inside_limit, SIMD_WIDTH);
     228           0 :     memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit),
     229             :            SIMD_WIDTH);
     230             :   }
     231           0 : }
     232             : 
     233           0 : static uint8_t get_filter_level(const loop_filter_info_n *lfi_n,
     234             :                                 const MODE_INFO *mi) {
     235           0 :   return lfi_n->lvl[mi->segment_id][mi->ref_frame[0]][mode_lf_lut[mi->mode]];
     236             : }
     237             : 
     238           0 : void vp9_loop_filter_init(VP9_COMMON *cm) {
     239           0 :   loop_filter_info_n *lfi = &cm->lf_info;
     240           0 :   struct loopfilter *lf = &cm->lf;
     241             :   int lvl;
     242             : 
     243             :   // init limits for given sharpness
     244           0 :   update_sharpness(lfi, lf->sharpness_level);
     245           0 :   lf->last_sharpness_level = lf->sharpness_level;
     246             : 
     247             :   // init hev threshold const vectors
     248           0 :   for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++)
     249           0 :     memset(lfi->lfthr[lvl].hev_thr, (lvl >> 4), SIMD_WIDTH);
     250           0 : }
     251             : 
     252           0 : void vp9_loop_filter_frame_init(VP9_COMMON *cm, int default_filt_lvl) {
     253             :   int seg_id;
     254             :   // n_shift is the multiplier for lf_deltas
     255             :   // the multiplier is 1 for when filter_lvl is between 0 and 31;
     256             :   // 2 when filter_lvl is between 32 and 63
     257           0 :   const int scale = 1 << (default_filt_lvl >> 5);
     258           0 :   loop_filter_info_n *const lfi = &cm->lf_info;
     259           0 :   struct loopfilter *const lf = &cm->lf;
     260           0 :   const struct segmentation *const seg = &cm->seg;
     261             : 
     262             :   // update limits if sharpness has changed
     263           0 :   if (lf->last_sharpness_level != lf->sharpness_level) {
     264           0 :     update_sharpness(lfi, lf->sharpness_level);
     265           0 :     lf->last_sharpness_level = lf->sharpness_level;
     266             :   }
     267             : 
     268           0 :   for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) {
     269           0 :     int lvl_seg = default_filt_lvl;
     270           0 :     if (segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) {
     271           0 :       const int data = get_segdata(seg, seg_id, SEG_LVL_ALT_LF);
     272           0 :       lvl_seg = clamp(
     273           0 :           seg->abs_delta == SEGMENT_ABSDATA ? data : default_filt_lvl + data, 0,
     274             :           MAX_LOOP_FILTER);
     275             :     }
     276             : 
     277           0 :     if (!lf->mode_ref_delta_enabled) {
     278             :       // we could get rid of this if we assume that deltas are set to
     279             :       // zero when not in use; encoder always uses deltas
     280           0 :       memset(lfi->lvl[seg_id], lvl_seg, sizeof(lfi->lvl[seg_id]));
     281             :     } else {
     282             :       int ref, mode;
     283           0 :       const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale;
     284           0 :       lfi->lvl[seg_id][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER);
     285             : 
     286           0 :       for (ref = LAST_FRAME; ref < MAX_REF_FRAMES; ++ref) {
     287           0 :         for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) {
     288           0 :           const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale +
     289           0 :                                 lf->mode_deltas[mode] * scale;
     290           0 :           lfi->lvl[seg_id][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER);
     291             :         }
     292             :       }
     293             :     }
     294             :   }
     295           0 : }
     296             : 
     297           0 : static void filter_selectively_vert_row2(
     298             :     int subsampling_factor, uint8_t *s, int pitch, unsigned int mask_16x16,
     299             :     unsigned int mask_8x8, unsigned int mask_4x4, unsigned int mask_4x4_int,
     300             :     const loop_filter_thresh *lfthr, const uint8_t *lfl) {
     301           0 :   const int dual_mask_cutoff = subsampling_factor ? 0xff : 0xffff;
     302           0 :   const int lfl_forward = subsampling_factor ? 4 : 8;
     303           0 :   const unsigned int dual_one = 1 | (1 << lfl_forward);
     304             :   unsigned int mask;
     305             :   uint8_t *ss[2];
     306           0 :   ss[0] = s;
     307             : 
     308           0 :   for (mask =
     309           0 :            (mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int) & dual_mask_cutoff;
     310           0 :        mask; mask = (mask & ~dual_one) >> 1) {
     311           0 :     if (mask & dual_one) {
     312             :       const loop_filter_thresh *lfis[2];
     313           0 :       lfis[0] = lfthr + *lfl;
     314           0 :       lfis[1] = lfthr + *(lfl + lfl_forward);
     315           0 :       ss[1] = ss[0] + 8 * pitch;
     316             : 
     317           0 :       if (mask_16x16 & dual_one) {
     318           0 :         if ((mask_16x16 & dual_one) == dual_one) {
     319           0 :           vpx_lpf_vertical_16_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim,
     320           0 :                                    lfis[0]->hev_thr);
     321             :         } else {
     322           0 :           const loop_filter_thresh *lfi = lfis[!(mask_16x16 & 1)];
     323           0 :           vpx_lpf_vertical_16(ss[!(mask_16x16 & 1)], pitch, lfi->mblim,
     324           0 :                               lfi->lim, lfi->hev_thr);
     325             :         }
     326             :       }
     327             : 
     328           0 :       if (mask_8x8 & dual_one) {
     329           0 :         if ((mask_8x8 & dual_one) == dual_one) {
     330           0 :           vpx_lpf_vertical_8_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim,
     331           0 :                                   lfis[0]->hev_thr, lfis[1]->mblim,
     332           0 :                                   lfis[1]->lim, lfis[1]->hev_thr);
     333             :         } else {
     334           0 :           const loop_filter_thresh *lfi = lfis[!(mask_8x8 & 1)];
     335           0 :           vpx_lpf_vertical_8(ss[!(mask_8x8 & 1)], pitch, lfi->mblim, lfi->lim,
     336           0 :                              lfi->hev_thr);
     337             :         }
     338             :       }
     339             : 
     340           0 :       if (mask_4x4 & dual_one) {
     341           0 :         if ((mask_4x4 & dual_one) == dual_one) {
     342           0 :           vpx_lpf_vertical_4_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim,
     343           0 :                                   lfis[0]->hev_thr, lfis[1]->mblim,
     344           0 :                                   lfis[1]->lim, lfis[1]->hev_thr);
     345             :         } else {
     346           0 :           const loop_filter_thresh *lfi = lfis[!(mask_4x4 & 1)];
     347           0 :           vpx_lpf_vertical_4(ss[!(mask_4x4 & 1)], pitch, lfi->mblim, lfi->lim,
     348           0 :                              lfi->hev_thr);
     349             :         }
     350             :       }
     351             : 
     352           0 :       if (mask_4x4_int & dual_one) {
     353           0 :         if ((mask_4x4_int & dual_one) == dual_one) {
     354           0 :           vpx_lpf_vertical_4_dual(
     355           0 :               ss[0] + 4, pitch, lfis[0]->mblim, lfis[0]->lim, lfis[0]->hev_thr,
     356           0 :               lfis[1]->mblim, lfis[1]->lim, lfis[1]->hev_thr);
     357             :         } else {
     358           0 :           const loop_filter_thresh *lfi = lfis[!(mask_4x4_int & 1)];
     359           0 :           vpx_lpf_vertical_4(ss[!(mask_4x4_int & 1)] + 4, pitch, lfi->mblim,
     360           0 :                              lfi->lim, lfi->hev_thr);
     361             :         }
     362             :       }
     363             :     }
     364             : 
     365           0 :     ss[0] += 8;
     366           0 :     lfl += 1;
     367           0 :     mask_16x16 >>= 1;
     368           0 :     mask_8x8 >>= 1;
     369           0 :     mask_4x4 >>= 1;
     370           0 :     mask_4x4_int >>= 1;
     371             :   }
     372           0 : }
     373             : 
     374             : #if CONFIG_VP9_HIGHBITDEPTH
     375             : static void highbd_filter_selectively_vert_row2(
     376             :     int subsampling_factor, uint16_t *s, int pitch, unsigned int mask_16x16,
     377             :     unsigned int mask_8x8, unsigned int mask_4x4, unsigned int mask_4x4_int,
     378             :     const loop_filter_thresh *lfthr, const uint8_t *lfl, int bd) {
     379             :   const int dual_mask_cutoff = subsampling_factor ? 0xff : 0xffff;
     380             :   const int lfl_forward = subsampling_factor ? 4 : 8;
     381             :   const unsigned int dual_one = 1 | (1 << lfl_forward);
     382             :   unsigned int mask;
     383             :   uint16_t *ss[2];
     384             :   ss[0] = s;
     385             : 
     386             :   for (mask =
     387             :            (mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int) & dual_mask_cutoff;
     388             :        mask; mask = (mask & ~dual_one) >> 1) {
     389             :     if (mask & dual_one) {
     390             :       const loop_filter_thresh *lfis[2];
     391             :       lfis[0] = lfthr + *lfl;
     392             :       lfis[1] = lfthr + *(lfl + lfl_forward);
     393             :       ss[1] = ss[0] + 8 * pitch;
     394             : 
     395             :       if (mask_16x16 & dual_one) {
     396             :         if ((mask_16x16 & dual_one) == dual_one) {
     397             :           vpx_highbd_lpf_vertical_16_dual(ss[0], pitch, lfis[0]->mblim,
     398             :                                           lfis[0]->lim, lfis[0]->hev_thr, bd);
     399             :         } else {
     400             :           const loop_filter_thresh *lfi = lfis[!(mask_16x16 & 1)];
     401             :           vpx_highbd_lpf_vertical_16(ss[!(mask_16x16 & 1)], pitch, lfi->mblim,
     402             :                                      lfi->lim, lfi->hev_thr, bd);
     403             :         }
     404             :       }
     405             : 
     406             :       if (mask_8x8 & dual_one) {
     407             :         if ((mask_8x8 & dual_one) == dual_one) {
     408             :           vpx_highbd_lpf_vertical_8_dual(
     409             :               ss[0], pitch, lfis[0]->mblim, lfis[0]->lim, lfis[0]->hev_thr,
     410             :               lfis[1]->mblim, lfis[1]->lim, lfis[1]->hev_thr, bd);
     411             :         } else {
     412             :           const loop_filter_thresh *lfi = lfis[!(mask_8x8 & 1)];
     413             :           vpx_highbd_lpf_vertical_8(ss[!(mask_8x8 & 1)], pitch, lfi->mblim,
     414             :                                     lfi->lim, lfi->hev_thr, bd);
     415             :         }
     416             :       }
     417             : 
     418             :       if (mask_4x4 & dual_one) {
     419             :         if ((mask_4x4 & dual_one) == dual_one) {
     420             :           vpx_highbd_lpf_vertical_4_dual(
     421             :               ss[0], pitch, lfis[0]->mblim, lfis[0]->lim, lfis[0]->hev_thr,
     422             :               lfis[1]->mblim, lfis[1]->lim, lfis[1]->hev_thr, bd);
     423             :         } else {
     424             :           const loop_filter_thresh *lfi = lfis[!(mask_4x4 & 1)];
     425             :           vpx_highbd_lpf_vertical_4(ss[!(mask_4x4 & 1)], pitch, lfi->mblim,
     426             :                                     lfi->lim, lfi->hev_thr, bd);
     427             :         }
     428             :       }
     429             : 
     430             :       if (mask_4x4_int & dual_one) {
     431             :         if ((mask_4x4_int & dual_one) == dual_one) {
     432             :           vpx_highbd_lpf_vertical_4_dual(
     433             :               ss[0] + 4, pitch, lfis[0]->mblim, lfis[0]->lim, lfis[0]->hev_thr,
     434             :               lfis[1]->mblim, lfis[1]->lim, lfis[1]->hev_thr, bd);
     435             :         } else {
     436             :           const loop_filter_thresh *lfi = lfis[!(mask_4x4_int & 1)];
     437             :           vpx_highbd_lpf_vertical_4(ss[!(mask_4x4_int & 1)] + 4, pitch,
     438             :                                     lfi->mblim, lfi->lim, lfi->hev_thr, bd);
     439             :         }
     440             :       }
     441             :     }
     442             : 
     443             :     ss[0] += 8;
     444             :     lfl += 1;
     445             :     mask_16x16 >>= 1;
     446             :     mask_8x8 >>= 1;
     447             :     mask_4x4 >>= 1;
     448             :     mask_4x4_int >>= 1;
     449             :   }
     450             : }
     451             : #endif  // CONFIG_VP9_HIGHBITDEPTH
     452             : 
     453           0 : static void filter_selectively_horiz(
     454             :     uint8_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8,
     455             :     unsigned int mask_4x4, unsigned int mask_4x4_int,
     456             :     const loop_filter_thresh *lfthr, const uint8_t *lfl) {
     457             :   unsigned int mask;
     458             :   int count;
     459             : 
     460           0 :   for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask;
     461           0 :        mask >>= count) {
     462           0 :     count = 1;
     463           0 :     if (mask & 1) {
     464           0 :       const loop_filter_thresh *lfi = lfthr + *lfl;
     465             : 
     466           0 :       if (mask_16x16 & 1) {
     467           0 :         if ((mask_16x16 & 3) == 3) {
     468           0 :           vpx_lpf_horizontal_16_dual(s, pitch, lfi->mblim, lfi->lim,
     469           0 :                                      lfi->hev_thr);
     470           0 :           count = 2;
     471             :         } else {
     472           0 :           vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
     473             :         }
     474           0 :       } else if (mask_8x8 & 1) {
     475           0 :         if ((mask_8x8 & 3) == 3) {
     476             :           // Next block's thresholds.
     477           0 :           const loop_filter_thresh *lfin = lfthr + *(lfl + 1);
     478             : 
     479           0 :           vpx_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
     480           0 :                                     lfi->hev_thr, lfin->mblim, lfin->lim,
     481           0 :                                     lfin->hev_thr);
     482             : 
     483           0 :           if ((mask_4x4_int & 3) == 3) {
     484           0 :             vpx_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
     485           0 :                                       lfi->lim, lfi->hev_thr, lfin->mblim,
     486           0 :                                       lfin->lim, lfin->hev_thr);
     487             :           } else {
     488           0 :             if (mask_4x4_int & 1)
     489           0 :               vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
     490           0 :                                    lfi->hev_thr);
     491           0 :             else if (mask_4x4_int & 2)
     492           0 :               vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
     493           0 :                                    lfin->lim, lfin->hev_thr);
     494             :           }
     495           0 :           count = 2;
     496             :         } else {
     497           0 :           vpx_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
     498             : 
     499           0 :           if (mask_4x4_int & 1)
     500           0 :             vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
     501           0 :                                  lfi->hev_thr);
     502             :         }
     503           0 :       } else if (mask_4x4 & 1) {
     504           0 :         if ((mask_4x4 & 3) == 3) {
     505             :           // Next block's thresholds.
     506           0 :           const loop_filter_thresh *lfin = lfthr + *(lfl + 1);
     507             : 
     508           0 :           vpx_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
     509           0 :                                     lfi->hev_thr, lfin->mblim, lfin->lim,
     510           0 :                                     lfin->hev_thr);
     511           0 :           if ((mask_4x4_int & 3) == 3) {
     512           0 :             vpx_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
     513           0 :                                       lfi->lim, lfi->hev_thr, lfin->mblim,
     514           0 :                                       lfin->lim, lfin->hev_thr);
     515             :           } else {
     516           0 :             if (mask_4x4_int & 1)
     517           0 :               vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
     518           0 :                                    lfi->hev_thr);
     519           0 :             else if (mask_4x4_int & 2)
     520           0 :               vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
     521           0 :                                    lfin->lim, lfin->hev_thr);
     522             :           }
     523           0 :           count = 2;
     524             :         } else {
     525           0 :           vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
     526             : 
     527           0 :           if (mask_4x4_int & 1)
     528           0 :             vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
     529           0 :                                  lfi->hev_thr);
     530             :         }
     531             :       } else {
     532           0 :         vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
     533           0 :                              lfi->hev_thr);
     534             :       }
     535             :     }
     536           0 :     s += 8 * count;
     537           0 :     lfl += count;
     538           0 :     mask_16x16 >>= count;
     539           0 :     mask_8x8 >>= count;
     540           0 :     mask_4x4 >>= count;
     541           0 :     mask_4x4_int >>= count;
     542             :   }
     543           0 : }
     544             : 
     545             : #if CONFIG_VP9_HIGHBITDEPTH
     546             : static void highbd_filter_selectively_horiz(
     547             :     uint16_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8,
     548             :     unsigned int mask_4x4, unsigned int mask_4x4_int,
     549             :     const loop_filter_thresh *lfthr, const uint8_t *lfl, int bd) {
     550             :   unsigned int mask;
     551             :   int count;
     552             : 
     553             :   for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask;
     554             :        mask >>= count) {
     555             :     count = 1;
     556             :     if (mask & 1) {
     557             :       const loop_filter_thresh *lfi = lfthr + *lfl;
     558             : 
     559             :       if (mask_16x16 & 1) {
     560             :         if ((mask_16x16 & 3) == 3) {
     561             :           vpx_highbd_lpf_horizontal_16_dual(s, pitch, lfi->mblim, lfi->lim,
     562             :                                             lfi->hev_thr, bd);
     563             :           count = 2;
     564             :         } else {
     565             :           vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
     566             :                                        lfi->hev_thr, bd);
     567             :         }
     568             :       } else if (mask_8x8 & 1) {
     569             :         if ((mask_8x8 & 3) == 3) {
     570             :           // Next block's thresholds.
     571             :           const loop_filter_thresh *lfin = lfthr + *(lfl + 1);
     572             : 
     573             :           vpx_highbd_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
     574             :                                            lfi->hev_thr, lfin->mblim, lfin->lim,
     575             :                                            lfin->hev_thr, bd);
     576             : 
     577             :           if ((mask_4x4_int & 3) == 3) {
     578             :             vpx_highbd_lpf_horizontal_4_dual(
     579             :                 s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
     580             :                 lfin->mblim, lfin->lim, lfin->hev_thr, bd);
     581             :           } else {
     582             :             if (mask_4x4_int & 1) {
     583             :               vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
     584             :                                           lfi->lim, lfi->hev_thr, bd);
     585             :             } else if (mask_4x4_int & 2) {
     586             :               vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
     587             :                                           lfin->lim, lfin->hev_thr, bd);
     588             :             }
     589             :           }
     590             :           count = 2;
     591             :         } else {
     592             :           vpx_highbd_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim,
     593             :                                       lfi->hev_thr, bd);
     594             : 
     595             :           if (mask_4x4_int & 1) {
     596             :             vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
     597             :                                         lfi->lim, lfi->hev_thr, bd);
     598             :           }
     599             :         }
     600             :       } else if (mask_4x4 & 1) {
     601             :         if ((mask_4x4 & 3) == 3) {
     602             :           // Next block's thresholds.
     603             :           const loop_filter_thresh *lfin = lfthr + *(lfl + 1);
     604             : 
     605             :           vpx_highbd_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
     606             :                                            lfi->hev_thr, lfin->mblim, lfin->lim,
     607             :                                            lfin->hev_thr, bd);
     608             :           if ((mask_4x4_int & 3) == 3) {
     609             :             vpx_highbd_lpf_horizontal_4_dual(
     610             :                 s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
     611             :                 lfin->mblim, lfin->lim, lfin->hev_thr, bd);
     612             :           } else {
     613             :             if (mask_4x4_int & 1) {
     614             :               vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
     615             :                                           lfi->lim, lfi->hev_thr, bd);
     616             :             } else if (mask_4x4_int & 2) {
     617             :               vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
     618             :                                           lfin->lim, lfin->hev_thr, bd);
     619             :             }
     620             :           }
     621             :           count = 2;
     622             :         } else {
     623             :           vpx_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim,
     624             :                                       lfi->hev_thr, bd);
     625             : 
     626             :           if (mask_4x4_int & 1) {
     627             :             vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
     628             :                                         lfi->lim, lfi->hev_thr, bd);
     629             :           }
     630             :         }
     631             :       } else {
     632             :         vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
     633             :                                     lfi->hev_thr, bd);
     634             :       }
     635             :     }
     636             :     s += 8 * count;
     637             :     lfl += count;
     638             :     mask_16x16 >>= count;
     639             :     mask_8x8 >>= count;
     640             :     mask_4x4 >>= count;
     641             :     mask_4x4_int >>= count;
     642             :   }
     643             : }
     644             : #endif  // CONFIG_VP9_HIGHBITDEPTH
     645             : 
     646             : // This function ors into the current lfm structure, where to do loop
     647             : // filters for the specific mi we are looking at. It uses information
     648             : // including the block_size_type (32x16, 32x32, etc.), the transform size,
     649             : // whether there were any coefficients encoded, and the loop filter strength
     650             : // block we are currently looking at. Shift is used to position the
     651             : // 1's we produce.
     652           0 : static void build_masks(const loop_filter_info_n *const lfi_n,
     653             :                         const MODE_INFO *mi, const int shift_y,
     654             :                         const int shift_uv, LOOP_FILTER_MASK *lfm) {
     655           0 :   const BLOCK_SIZE block_size = mi->sb_type;
     656           0 :   const TX_SIZE tx_size_y = mi->tx_size;
     657           0 :   const TX_SIZE tx_size_uv = uv_txsize_lookup[block_size][tx_size_y][1][1];
     658           0 :   const int filter_level = get_filter_level(lfi_n, mi);
     659           0 :   uint64_t *const left_y = &lfm->left_y[tx_size_y];
     660           0 :   uint64_t *const above_y = &lfm->above_y[tx_size_y];
     661           0 :   uint64_t *const int_4x4_y = &lfm->int_4x4_y;
     662           0 :   uint16_t *const left_uv = &lfm->left_uv[tx_size_uv];
     663           0 :   uint16_t *const above_uv = &lfm->above_uv[tx_size_uv];
     664           0 :   uint16_t *const int_4x4_uv = &lfm->int_4x4_uv;
     665             :   int i;
     666             : 
     667             :   // If filter level is 0 we don't loop filter.
     668           0 :   if (!filter_level) {
     669           0 :     return;
     670             :   } else {
     671           0 :     const int w = num_8x8_blocks_wide_lookup[block_size];
     672           0 :     const int h = num_8x8_blocks_high_lookup[block_size];
     673           0 :     int index = shift_y;
     674           0 :     for (i = 0; i < h; i++) {
     675           0 :       memset(&lfm->lfl_y[index], filter_level, w);
     676           0 :       index += 8;
     677             :     }
     678             :   }
     679             : 
     680             :   // These set 1 in the current block size for the block size edges.
     681             :   // For instance if the block size is 32x16, we'll set:
     682             :   //    above =   1111
     683             :   //              0000
     684             :   //    and
     685             :   //    left  =   1000
     686             :   //          =   1000
     687             :   // NOTE : In this example the low bit is left most ( 1000 ) is stored as
     688             :   //        1,  not 8...
     689             :   //
     690             :   // U and V set things on a 16 bit scale.
     691             :   //
     692           0 :   *above_y |= above_prediction_mask[block_size] << shift_y;
     693           0 :   *above_uv |= above_prediction_mask_uv[block_size] << shift_uv;
     694           0 :   *left_y |= left_prediction_mask[block_size] << shift_y;
     695           0 :   *left_uv |= left_prediction_mask_uv[block_size] << shift_uv;
     696             : 
     697             :   // If the block has no coefficients and is not intra we skip applying
     698             :   // the loop filter on block edges.
     699           0 :   if (mi->skip && is_inter_block(mi)) return;
     700             : 
     701             :   // Here we are adding a mask for the transform size. The transform
     702             :   // size mask is set to be correct for a 64x64 prediction block size. We
     703             :   // mask to match the size of the block we are working on and then shift it
     704             :   // into place..
     705           0 :   *above_y |= (size_mask[block_size] & above_64x64_txform_mask[tx_size_y])
     706           0 :               << shift_y;
     707           0 :   *above_uv |=
     708           0 :       (size_mask_uv[block_size] & above_64x64_txform_mask_uv[tx_size_uv])
     709           0 :       << shift_uv;
     710             : 
     711           0 :   *left_y |= (size_mask[block_size] & left_64x64_txform_mask[tx_size_y])
     712           0 :              << shift_y;
     713           0 :   *left_uv |= (size_mask_uv[block_size] & left_64x64_txform_mask_uv[tx_size_uv])
     714           0 :               << shift_uv;
     715             : 
     716             :   // Here we are trying to determine what to do with the internal 4x4 block
     717             :   // boundaries.  These differ from the 4x4 boundaries on the outside edge of
     718             :   // an 8x8 in that the internal ones can be skipped and don't depend on
     719             :   // the prediction block size.
     720           0 :   if (tx_size_y == TX_4X4) *int_4x4_y |= size_mask[block_size] << shift_y;
     721             : 
     722           0 :   if (tx_size_uv == TX_4X4)
     723           0 :     *int_4x4_uv |= (size_mask_uv[block_size] & 0xffff) << shift_uv;
     724             : }
     725             : 
     726             : // This function does the same thing as the one above with the exception that
     727             : // it only affects the y masks. It exists because for blocks < 16x16 in size,
     728             : // we only update u and v masks on the first block.
     729           0 : static void build_y_mask(const loop_filter_info_n *const lfi_n,
     730             :                          const MODE_INFO *mi, const int shift_y,
     731             :                          LOOP_FILTER_MASK *lfm) {
     732           0 :   const BLOCK_SIZE block_size = mi->sb_type;
     733           0 :   const TX_SIZE tx_size_y = mi->tx_size;
     734           0 :   const int filter_level = get_filter_level(lfi_n, mi);
     735           0 :   uint64_t *const left_y = &lfm->left_y[tx_size_y];
     736           0 :   uint64_t *const above_y = &lfm->above_y[tx_size_y];
     737           0 :   uint64_t *const int_4x4_y = &lfm->int_4x4_y;
     738             :   int i;
     739             : 
     740           0 :   if (!filter_level) {
     741           0 :     return;
     742             :   } else {
     743           0 :     const int w = num_8x8_blocks_wide_lookup[block_size];
     744           0 :     const int h = num_8x8_blocks_high_lookup[block_size];
     745           0 :     int index = shift_y;
     746           0 :     for (i = 0; i < h; i++) {
     747           0 :       memset(&lfm->lfl_y[index], filter_level, w);
     748           0 :       index += 8;
     749             :     }
     750             :   }
     751             : 
     752           0 :   *above_y |= above_prediction_mask[block_size] << shift_y;
     753           0 :   *left_y |= left_prediction_mask[block_size] << shift_y;
     754             : 
     755           0 :   if (mi->skip && is_inter_block(mi)) return;
     756             : 
     757           0 :   *above_y |= (size_mask[block_size] & above_64x64_txform_mask[tx_size_y])
     758           0 :               << shift_y;
     759             : 
     760           0 :   *left_y |= (size_mask[block_size] & left_64x64_txform_mask[tx_size_y])
     761           0 :              << shift_y;
     762             : 
     763           0 :   if (tx_size_y == TX_4X4) *int_4x4_y |= size_mask[block_size] << shift_y;
     764             : }
     765             : 
     766           0 : void vp9_adjust_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
     767             :                      LOOP_FILTER_MASK *lfm) {
     768             :   int i;
     769             : 
     770             :   // The largest loopfilter we have is 16x16 so we use the 16x16 mask
     771             :   // for 32x32 transforms also.
     772           0 :   lfm->left_y[TX_16X16] |= lfm->left_y[TX_32X32];
     773           0 :   lfm->above_y[TX_16X16] |= lfm->above_y[TX_32X32];
     774           0 :   lfm->left_uv[TX_16X16] |= lfm->left_uv[TX_32X32];
     775           0 :   lfm->above_uv[TX_16X16] |= lfm->above_uv[TX_32X32];
     776             : 
     777             :   // We do at least 8 tap filter on every 32x32 even if the transform size
     778             :   // is 4x4. So if the 4x4 is set on a border pixel add it to the 8x8 and
     779             :   // remove it from the 4x4.
     780           0 :   lfm->left_y[TX_8X8] |= lfm->left_y[TX_4X4] & left_border;
     781           0 :   lfm->left_y[TX_4X4] &= ~left_border;
     782           0 :   lfm->above_y[TX_8X8] |= lfm->above_y[TX_4X4] & above_border;
     783           0 :   lfm->above_y[TX_4X4] &= ~above_border;
     784           0 :   lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_4X4] & left_border_uv;
     785           0 :   lfm->left_uv[TX_4X4] &= ~left_border_uv;
     786           0 :   lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_4X4] & above_border_uv;
     787           0 :   lfm->above_uv[TX_4X4] &= ~above_border_uv;
     788             : 
     789             :   // We do some special edge handling.
     790           0 :   if (mi_row + MI_BLOCK_SIZE > cm->mi_rows) {
     791           0 :     const uint64_t rows = cm->mi_rows - mi_row;
     792             : 
     793             :     // Each pixel inside the border gets a 1,
     794           0 :     const uint64_t mask_y = (((uint64_t)1 << (rows << 3)) - 1);
     795           0 :     const uint16_t mask_uv = (((uint16_t)1 << (((rows + 1) >> 1) << 2)) - 1);
     796             : 
     797             :     // Remove values completely outside our border.
     798           0 :     for (i = 0; i < TX_32X32; i++) {
     799           0 :       lfm->left_y[i] &= mask_y;
     800           0 :       lfm->above_y[i] &= mask_y;
     801           0 :       lfm->left_uv[i] &= mask_uv;
     802           0 :       lfm->above_uv[i] &= mask_uv;
     803             :     }
     804           0 :     lfm->int_4x4_y &= mask_y;
     805           0 :     lfm->int_4x4_uv &= mask_uv;
     806             : 
     807             :     // We don't apply a wide loop filter on the last uv block row. If set
     808             :     // apply the shorter one instead.
     809           0 :     if (rows == 1) {
     810           0 :       lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16];
     811           0 :       lfm->above_uv[TX_16X16] = 0;
     812             :     }
     813           0 :     if (rows == 5) {
     814           0 :       lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16] & 0xff00;
     815           0 :       lfm->above_uv[TX_16X16] &= ~(lfm->above_uv[TX_16X16] & 0xff00);
     816             :     }
     817             :   }
     818             : 
     819           0 :   if (mi_col + MI_BLOCK_SIZE > cm->mi_cols) {
     820           0 :     const uint64_t columns = cm->mi_cols - mi_col;
     821             : 
     822             :     // Each pixel inside the border gets a 1, the multiply copies the border
     823             :     // to where we need it.
     824           0 :     const uint64_t mask_y = (((1 << columns) - 1)) * 0x0101010101010101ULL;
     825           0 :     const uint16_t mask_uv = ((1 << ((columns + 1) >> 1)) - 1) * 0x1111;
     826             : 
     827             :     // Internal edges are not applied on the last column of the image so
     828             :     // we mask 1 more for the internal edges
     829           0 :     const uint16_t mask_uv_int = ((1 << (columns >> 1)) - 1) * 0x1111;
     830             : 
     831             :     // Remove the bits outside the image edge.
     832           0 :     for (i = 0; i < TX_32X32; i++) {
     833           0 :       lfm->left_y[i] &= mask_y;
     834           0 :       lfm->above_y[i] &= mask_y;
     835           0 :       lfm->left_uv[i] &= mask_uv;
     836           0 :       lfm->above_uv[i] &= mask_uv;
     837             :     }
     838           0 :     lfm->int_4x4_y &= mask_y;
     839           0 :     lfm->int_4x4_uv &= mask_uv_int;
     840             : 
     841             :     // We don't apply a wide loop filter on the last uv column. If set
     842             :     // apply the shorter one instead.
     843           0 :     if (columns == 1) {
     844           0 :       lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_16X16];
     845           0 :       lfm->left_uv[TX_16X16] = 0;
     846             :     }
     847           0 :     if (columns == 5) {
     848           0 :       lfm->left_uv[TX_8X8] |= (lfm->left_uv[TX_16X16] & 0xcccc);
     849           0 :       lfm->left_uv[TX_16X16] &= ~(lfm->left_uv[TX_16X16] & 0xcccc);
     850             :     }
     851             :   }
     852             :   // We don't apply a loop filter on the first column in the image, mask that
     853             :   // out.
     854           0 :   if (mi_col == 0) {
     855           0 :     for (i = 0; i < TX_32X32; i++) {
     856           0 :       lfm->left_y[i] &= 0xfefefefefefefefeULL;
     857           0 :       lfm->left_uv[i] &= 0xeeee;
     858             :     }
     859             :   }
     860             : 
     861             :   // Assert if we try to apply 2 different loop filters at the same position.
     862           0 :   assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_8X8]));
     863           0 :   assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_4X4]));
     864           0 :   assert(!(lfm->left_y[TX_8X8] & lfm->left_y[TX_4X4]));
     865           0 :   assert(!(lfm->int_4x4_y & lfm->left_y[TX_16X16]));
     866           0 :   assert(!(lfm->left_uv[TX_16X16] & lfm->left_uv[TX_8X8]));
     867           0 :   assert(!(lfm->left_uv[TX_16X16] & lfm->left_uv[TX_4X4]));
     868           0 :   assert(!(lfm->left_uv[TX_8X8] & lfm->left_uv[TX_4X4]));
     869           0 :   assert(!(lfm->int_4x4_uv & lfm->left_uv[TX_16X16]));
     870           0 :   assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_8X8]));
     871           0 :   assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_4X4]));
     872           0 :   assert(!(lfm->above_y[TX_8X8] & lfm->above_y[TX_4X4]));
     873           0 :   assert(!(lfm->int_4x4_y & lfm->above_y[TX_16X16]));
     874           0 :   assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_8X8]));
     875           0 :   assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_4X4]));
     876           0 :   assert(!(lfm->above_uv[TX_8X8] & lfm->above_uv[TX_4X4]));
     877           0 :   assert(!(lfm->int_4x4_uv & lfm->above_uv[TX_16X16]));
     878           0 : }
     879             : 
     880             : // This function sets up the bit masks for the entire 64x64 region represented
     881             : // by mi_row, mi_col.
     882           0 : void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
     883             :                     MODE_INFO **mi, const int mode_info_stride,
     884             :                     LOOP_FILTER_MASK *lfm) {
     885             :   int idx_32, idx_16, idx_8;
     886           0 :   const loop_filter_info_n *const lfi_n = &cm->lf_info;
     887           0 :   MODE_INFO **mip = mi;
     888           0 :   MODE_INFO **mip2 = mi;
     889             : 
     890             :   // These are offsets to the next mi in the 64x64 block. It is what gets
     891             :   // added to the mi ptr as we go through each loop. It helps us to avoid
     892             :   // setting up special row and column counters for each index. The last step
     893             :   // brings us out back to the starting position.
     894           0 :   const int offset_32[] = { 4, (mode_info_stride << 2) - 4, 4,
     895           0 :                             -(mode_info_stride << 2) - 4 };
     896           0 :   const int offset_16[] = { 2, (mode_info_stride << 1) - 2, 2,
     897           0 :                             -(mode_info_stride << 1) - 2 };
     898           0 :   const int offset[] = { 1, mode_info_stride - 1, 1, -mode_info_stride - 1 };
     899             : 
     900             :   // Following variables represent shifts to position the current block
     901             :   // mask over the appropriate block. A shift of 36 to the left will move
     902             :   // the bits for the final 32 by 32 block in the 64x64 up 4 rows and left
     903             :   // 4 rows to the appropriate spot.
     904           0 :   const int shift_32_y[] = { 0, 4, 32, 36 };
     905           0 :   const int shift_16_y[] = { 0, 2, 16, 18 };
     906           0 :   const int shift_8_y[] = { 0, 1, 8, 9 };
     907           0 :   const int shift_32_uv[] = { 0, 2, 8, 10 };
     908           0 :   const int shift_16_uv[] = { 0, 1, 4, 5 };
     909           0 :   const int max_rows =
     910           0 :       (mi_row + MI_BLOCK_SIZE > cm->mi_rows ? cm->mi_rows - mi_row
     911           0 :                                             : MI_BLOCK_SIZE);
     912           0 :   const int max_cols =
     913           0 :       (mi_col + MI_BLOCK_SIZE > cm->mi_cols ? cm->mi_cols - mi_col
     914           0 :                                             : MI_BLOCK_SIZE);
     915             : 
     916           0 :   vp9_zero(*lfm);
     917           0 :   assert(mip[0] != NULL);
     918             : 
     919           0 :   switch (mip[0]->sb_type) {
     920           0 :     case BLOCK_64X64: build_masks(lfi_n, mip[0], 0, 0, lfm); break;
     921             :     case BLOCK_64X32:
     922           0 :       build_masks(lfi_n, mip[0], 0, 0, lfm);
     923           0 :       mip2 = mip + mode_info_stride * 4;
     924           0 :       if (4 >= max_rows) break;
     925           0 :       build_masks(lfi_n, mip2[0], 32, 8, lfm);
     926           0 :       break;
     927             :     case BLOCK_32X64:
     928           0 :       build_masks(lfi_n, mip[0], 0, 0, lfm);
     929           0 :       mip2 = mip + 4;
     930           0 :       if (4 >= max_cols) break;
     931           0 :       build_masks(lfi_n, mip2[0], 4, 2, lfm);
     932           0 :       break;
     933             :     default:
     934           0 :       for (idx_32 = 0; idx_32 < 4; mip += offset_32[idx_32], ++idx_32) {
     935           0 :         const int shift_y = shift_32_y[idx_32];
     936           0 :         const int shift_uv = shift_32_uv[idx_32];
     937           0 :         const int mi_32_col_offset = ((idx_32 & 1) << 2);
     938           0 :         const int mi_32_row_offset = ((idx_32 >> 1) << 2);
     939           0 :         if (mi_32_col_offset >= max_cols || mi_32_row_offset >= max_rows)
     940           0 :           continue;
     941           0 :         switch (mip[0]->sb_type) {
     942             :           case BLOCK_32X32:
     943           0 :             build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
     944           0 :             break;
     945             :           case BLOCK_32X16:
     946           0 :             build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
     947           0 :             if (mi_32_row_offset + 2 >= max_rows) continue;
     948           0 :             mip2 = mip + mode_info_stride * 2;
     949           0 :             build_masks(lfi_n, mip2[0], shift_y + 16, shift_uv + 4, lfm);
     950           0 :             break;
     951             :           case BLOCK_16X32:
     952           0 :             build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
     953           0 :             if (mi_32_col_offset + 2 >= max_cols) continue;
     954           0 :             mip2 = mip + 2;
     955           0 :             build_masks(lfi_n, mip2[0], shift_y + 2, shift_uv + 1, lfm);
     956           0 :             break;
     957             :           default:
     958           0 :             for (idx_16 = 0; idx_16 < 4; mip += offset_16[idx_16], ++idx_16) {
     959           0 :               const int shift_y = shift_32_y[idx_32] + shift_16_y[idx_16];
     960           0 :               const int shift_uv = shift_32_uv[idx_32] + shift_16_uv[idx_16];
     961           0 :               const int mi_16_col_offset =
     962           0 :                   mi_32_col_offset + ((idx_16 & 1) << 1);
     963           0 :               const int mi_16_row_offset =
     964           0 :                   mi_32_row_offset + ((idx_16 >> 1) << 1);
     965             : 
     966           0 :               if (mi_16_col_offset >= max_cols || mi_16_row_offset >= max_rows)
     967           0 :                 continue;
     968             : 
     969           0 :               switch (mip[0]->sb_type) {
     970             :                 case BLOCK_16X16:
     971           0 :                   build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
     972           0 :                   break;
     973             :                 case BLOCK_16X8:
     974           0 :                   build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
     975           0 :                   if (mi_16_row_offset + 1 >= max_rows) continue;
     976           0 :                   mip2 = mip + mode_info_stride;
     977           0 :                   build_y_mask(lfi_n, mip2[0], shift_y + 8, lfm);
     978           0 :                   break;
     979             :                 case BLOCK_8X16:
     980           0 :                   build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
     981           0 :                   if (mi_16_col_offset + 1 >= max_cols) continue;
     982           0 :                   mip2 = mip + 1;
     983           0 :                   build_y_mask(lfi_n, mip2[0], shift_y + 1, lfm);
     984           0 :                   break;
     985             :                 default: {
     986           0 :                   const int shift_y =
     987           0 :                       shift_32_y[idx_32] + shift_16_y[idx_16] + shift_8_y[0];
     988           0 :                   build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
     989           0 :                   mip += offset[0];
     990           0 :                   for (idx_8 = 1; idx_8 < 4; mip += offset[idx_8], ++idx_8) {
     991           0 :                     const int shift_y = shift_32_y[idx_32] +
     992           0 :                                         shift_16_y[idx_16] + shift_8_y[idx_8];
     993           0 :                     const int mi_8_col_offset =
     994           0 :                         mi_16_col_offset + ((idx_8 & 1));
     995           0 :                     const int mi_8_row_offset =
     996           0 :                         mi_16_row_offset + ((idx_8 >> 1));
     997             : 
     998           0 :                     if (mi_8_col_offset >= max_cols ||
     999             :                         mi_8_row_offset >= max_rows)
    1000           0 :                       continue;
    1001           0 :                     build_y_mask(lfi_n, mip[0], shift_y, lfm);
    1002             :                   }
    1003           0 :                   break;
    1004             :                 }
    1005             :               }
    1006             :             }
    1007           0 :             break;
    1008             :         }
    1009             :       }
    1010           0 :       break;
    1011             :   }
    1012           0 : }
    1013             : 
    1014           0 : static void filter_selectively_vert(
    1015             :     uint8_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8,
    1016             :     unsigned int mask_4x4, unsigned int mask_4x4_int,
    1017             :     const loop_filter_thresh *lfthr, const uint8_t *lfl) {
    1018             :   unsigned int mask;
    1019             : 
    1020           0 :   for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask;
    1021           0 :        mask >>= 1) {
    1022           0 :     const loop_filter_thresh *lfi = lfthr + *lfl;
    1023             : 
    1024           0 :     if (mask & 1) {
    1025           0 :       if (mask_16x16 & 1) {
    1026           0 :         vpx_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
    1027           0 :       } else if (mask_8x8 & 1) {
    1028           0 :         vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
    1029           0 :       } else if (mask_4x4 & 1) {
    1030           0 :         vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
    1031             :       }
    1032             :     }
    1033           0 :     if (mask_4x4_int & 1)
    1034           0 :       vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
    1035           0 :     s += 8;
    1036           0 :     lfl += 1;
    1037           0 :     mask_16x16 >>= 1;
    1038           0 :     mask_8x8 >>= 1;
    1039           0 :     mask_4x4 >>= 1;
    1040           0 :     mask_4x4_int >>= 1;
    1041             :   }
    1042           0 : }
    1043             : 
    1044             : #if CONFIG_VP9_HIGHBITDEPTH
    1045             : static void highbd_filter_selectively_vert(
    1046             :     uint16_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8,
    1047             :     unsigned int mask_4x4, unsigned int mask_4x4_int,
    1048             :     const loop_filter_thresh *lfthr, const uint8_t *lfl, int bd) {
    1049             :   unsigned int mask;
    1050             : 
    1051             :   for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask;
    1052             :        mask >>= 1) {
    1053             :     const loop_filter_thresh *lfi = lfthr + *lfl;
    1054             : 
    1055             :     if (mask & 1) {
    1056             :       if (mask_16x16 & 1) {
    1057             :         vpx_highbd_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
    1058             :                                    bd);
    1059             :       } else if (mask_8x8 & 1) {
    1060             :         vpx_highbd_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
    1061             :                                   bd);
    1062             :       } else if (mask_4x4 & 1) {
    1063             :         vpx_highbd_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr,
    1064             :                                   bd);
    1065             :       }
    1066             :     }
    1067             :     if (mask_4x4_int & 1)
    1068             :       vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim,
    1069             :                                 lfi->hev_thr, bd);
    1070             :     s += 8;
    1071             :     lfl += 1;
    1072             :     mask_16x16 >>= 1;
    1073             :     mask_8x8 >>= 1;
    1074             :     mask_4x4 >>= 1;
    1075             :     mask_4x4_int >>= 1;
    1076             :   }
    1077             : }
    1078             : #endif  // CONFIG_VP9_HIGHBITDEPTH
    1079             : 
    1080           0 : void vp9_filter_block_plane_non420(VP9_COMMON *cm,
    1081             :                                    struct macroblockd_plane *plane,
    1082             :                                    MODE_INFO **mi_8x8, int mi_row, int mi_col) {
    1083           0 :   const int ss_x = plane->subsampling_x;
    1084           0 :   const int ss_y = plane->subsampling_y;
    1085           0 :   const int row_step = 1 << ss_y;
    1086           0 :   const int col_step = 1 << ss_x;
    1087           0 :   const int row_step_stride = cm->mi_stride * row_step;
    1088           0 :   struct buf_2d *const dst = &plane->dst;
    1089           0 :   uint8_t *const dst0 = dst->buf;
    1090           0 :   unsigned int mask_16x16[MI_BLOCK_SIZE] = { 0 };
    1091           0 :   unsigned int mask_8x8[MI_BLOCK_SIZE] = { 0 };
    1092           0 :   unsigned int mask_4x4[MI_BLOCK_SIZE] = { 0 };
    1093           0 :   unsigned int mask_4x4_int[MI_BLOCK_SIZE] = { 0 };
    1094             :   uint8_t lfl[MI_BLOCK_SIZE * MI_BLOCK_SIZE];
    1095             :   int r, c;
    1096             : 
    1097           0 :   for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) {
    1098           0 :     unsigned int mask_16x16_c = 0;
    1099           0 :     unsigned int mask_8x8_c = 0;
    1100           0 :     unsigned int mask_4x4_c = 0;
    1101             :     unsigned int border_mask;
    1102             : 
    1103             :     // Determine the vertical edges that need filtering
    1104           0 :     for (c = 0; c < MI_BLOCK_SIZE && mi_col + c < cm->mi_cols; c += col_step) {
    1105           0 :       const MODE_INFO *mi = mi_8x8[c];
    1106           0 :       const BLOCK_SIZE sb_type = mi[0].sb_type;
    1107           0 :       const int skip_this = mi[0].skip && is_inter_block(mi);
    1108             :       // left edge of current unit is block/partition edge -> no skip
    1109           0 :       const int block_edge_left =
    1110           0 :           (num_4x4_blocks_wide_lookup[sb_type] > 1)
    1111           0 :               ? !(c & (num_8x8_blocks_wide_lookup[sb_type] - 1))
    1112           0 :               : 1;
    1113           0 :       const int skip_this_c = skip_this && !block_edge_left;
    1114             :       // top edge of current unit is block/partition edge -> no skip
    1115           0 :       const int block_edge_above =
    1116           0 :           (num_4x4_blocks_high_lookup[sb_type] > 1)
    1117           0 :               ? !(r & (num_8x8_blocks_high_lookup[sb_type] - 1))
    1118           0 :               : 1;
    1119           0 :       const int skip_this_r = skip_this && !block_edge_above;
    1120           0 :       const TX_SIZE tx_size = get_uv_tx_size(mi, plane);
    1121           0 :       const int skip_border_4x4_c = ss_x && mi_col + c == cm->mi_cols - 1;
    1122           0 :       const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1;
    1123             : 
    1124             :       // Filter level can vary per MI
    1125           0 :       if (!(lfl[(r << 3) + (c >> ss_x)] = get_filter_level(&cm->lf_info, mi)))
    1126           0 :         continue;
    1127             : 
    1128             :       // Build masks based on the transform size of each block
    1129           0 :       if (tx_size == TX_32X32) {
    1130           0 :         if (!skip_this_c && ((c >> ss_x) & 3) == 0) {
    1131           0 :           if (!skip_border_4x4_c)
    1132           0 :             mask_16x16_c |= 1 << (c >> ss_x);
    1133             :           else
    1134           0 :             mask_8x8_c |= 1 << (c >> ss_x);
    1135             :         }
    1136           0 :         if (!skip_this_r && ((r >> ss_y) & 3) == 0) {
    1137           0 :           if (!skip_border_4x4_r)
    1138           0 :             mask_16x16[r] |= 1 << (c >> ss_x);
    1139             :           else
    1140           0 :             mask_8x8[r] |= 1 << (c >> ss_x);
    1141             :         }
    1142           0 :       } else if (tx_size == TX_16X16) {
    1143           0 :         if (!skip_this_c && ((c >> ss_x) & 1) == 0) {
    1144           0 :           if (!skip_border_4x4_c)
    1145           0 :             mask_16x16_c |= 1 << (c >> ss_x);
    1146             :           else
    1147           0 :             mask_8x8_c |= 1 << (c >> ss_x);
    1148             :         }
    1149           0 :         if (!skip_this_r && ((r >> ss_y) & 1) == 0) {
    1150           0 :           if (!skip_border_4x4_r)
    1151           0 :             mask_16x16[r] |= 1 << (c >> ss_x);
    1152             :           else
    1153           0 :             mask_8x8[r] |= 1 << (c >> ss_x);
    1154             :         }
    1155             :       } else {
    1156             :         // force 8x8 filtering on 32x32 boundaries
    1157           0 :         if (!skip_this_c) {
    1158           0 :           if (tx_size == TX_8X8 || ((c >> ss_x) & 3) == 0)
    1159           0 :             mask_8x8_c |= 1 << (c >> ss_x);
    1160             :           else
    1161           0 :             mask_4x4_c |= 1 << (c >> ss_x);
    1162             :         }
    1163             : 
    1164           0 :         if (!skip_this_r) {
    1165           0 :           if (tx_size == TX_8X8 || ((r >> ss_y) & 3) == 0)
    1166           0 :             mask_8x8[r] |= 1 << (c >> ss_x);
    1167             :           else
    1168           0 :             mask_4x4[r] |= 1 << (c >> ss_x);
    1169             :         }
    1170             : 
    1171           0 :         if (!skip_this && tx_size < TX_8X8 && !skip_border_4x4_c)
    1172           0 :           mask_4x4_int[r] |= 1 << (c >> ss_x);
    1173             :       }
    1174             :     }
    1175             : 
    1176             :     // Disable filtering on the leftmost column
    1177           0 :     border_mask = ~(mi_col == 0);
    1178             : #if CONFIG_VP9_HIGHBITDEPTH
    1179             :     if (cm->use_highbitdepth) {
    1180             :       highbd_filter_selectively_vert(
    1181             :           CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
    1182             :           mask_16x16_c & border_mask, mask_8x8_c & border_mask,
    1183             :           mask_4x4_c & border_mask, mask_4x4_int[r], cm->lf_info.lfthr,
    1184             :           &lfl[r << 3], (int)cm->bit_depth);
    1185             :     } else {
    1186             : #endif  // CONFIG_VP9_HIGHBITDEPTH
    1187           0 :       filter_selectively_vert(dst->buf, dst->stride, mask_16x16_c & border_mask,
    1188             :                               mask_8x8_c & border_mask,
    1189             :                               mask_4x4_c & border_mask, mask_4x4_int[r],
    1190           0 :                               cm->lf_info.lfthr, &lfl[r << 3]);
    1191             : #if CONFIG_VP9_HIGHBITDEPTH
    1192             :     }
    1193             : #endif  // CONFIG_VP9_HIGHBITDEPTH
    1194           0 :     dst->buf += 8 * dst->stride;
    1195           0 :     mi_8x8 += row_step_stride;
    1196             :   }
    1197             : 
    1198             :   // Now do horizontal pass
    1199           0 :   dst->buf = dst0;
    1200           0 :   for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) {
    1201           0 :     const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1;
    1202           0 :     const unsigned int mask_4x4_int_r = skip_border_4x4_r ? 0 : mask_4x4_int[r];
    1203             : 
    1204             :     unsigned int mask_16x16_r;
    1205             :     unsigned int mask_8x8_r;
    1206             :     unsigned int mask_4x4_r;
    1207             : 
    1208           0 :     if (mi_row + r == 0) {
    1209           0 :       mask_16x16_r = 0;
    1210           0 :       mask_8x8_r = 0;
    1211           0 :       mask_4x4_r = 0;
    1212             :     } else {
    1213           0 :       mask_16x16_r = mask_16x16[r];
    1214           0 :       mask_8x8_r = mask_8x8[r];
    1215           0 :       mask_4x4_r = mask_4x4[r];
    1216             :     }
    1217             : #if CONFIG_VP9_HIGHBITDEPTH
    1218             :     if (cm->use_highbitdepth) {
    1219             :       highbd_filter_selectively_horiz(
    1220             :           CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r,
    1221             :           mask_4x4_r, mask_4x4_int_r, cm->lf_info.lfthr, &lfl[r << 3],
    1222             :           (int)cm->bit_depth);
    1223             :     } else {
    1224             : #endif  // CONFIG_VP9_HIGHBITDEPTH
    1225           0 :       filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
    1226           0 :                                mask_4x4_r, mask_4x4_int_r, cm->lf_info.lfthr,
    1227           0 :                                &lfl[r << 3]);
    1228             : #if CONFIG_VP9_HIGHBITDEPTH
    1229             :     }
    1230             : #endif  // CONFIG_VP9_HIGHBITDEPTH
    1231           0 :     dst->buf += 8 * dst->stride;
    1232             :   }
    1233           0 : }
    1234             : 
    1235           0 : void vp9_filter_block_plane_ss00(VP9_COMMON *const cm,
    1236             :                                  struct macroblockd_plane *const plane,
    1237             :                                  int mi_row, LOOP_FILTER_MASK *lfm) {
    1238           0 :   struct buf_2d *const dst = &plane->dst;
    1239           0 :   uint8_t *const dst0 = dst->buf;
    1240             :   int r;
    1241           0 :   uint64_t mask_16x16 = lfm->left_y[TX_16X16];
    1242           0 :   uint64_t mask_8x8 = lfm->left_y[TX_8X8];
    1243           0 :   uint64_t mask_4x4 = lfm->left_y[TX_4X4];
    1244           0 :   uint64_t mask_4x4_int = lfm->int_4x4_y;
    1245             : 
    1246           0 :   assert(plane->subsampling_x == 0 && plane->subsampling_y == 0);
    1247             : 
    1248             :   // Vertical pass: do 2 rows at one time
    1249           0 :   for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) {
    1250             : #if CONFIG_VP9_HIGHBITDEPTH
    1251             :     if (cm->use_highbitdepth) {
    1252             :       // Disable filtering on the leftmost column.
    1253             :       highbd_filter_selectively_vert_row2(
    1254             :           plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
    1255             :           (unsigned int)mask_16x16, (unsigned int)mask_8x8,
    1256             :           (unsigned int)mask_4x4, (unsigned int)mask_4x4_int, cm->lf_info.lfthr,
    1257             :           &lfm->lfl_y[r << 3], (int)cm->bit_depth);
    1258             :     } else {
    1259             : #endif  // CONFIG_VP9_HIGHBITDEPTH
    1260             :       // Disable filtering on the leftmost column.
    1261           0 :       filter_selectively_vert_row2(
    1262             :           plane->subsampling_x, dst->buf, dst->stride, (unsigned int)mask_16x16,
    1263             :           (unsigned int)mask_8x8, (unsigned int)mask_4x4,
    1264           0 :           (unsigned int)mask_4x4_int, cm->lf_info.lfthr, &lfm->lfl_y[r << 3]);
    1265             : #if CONFIG_VP9_HIGHBITDEPTH
    1266             :     }
    1267             : #endif  // CONFIG_VP9_HIGHBITDEPTH
    1268           0 :     dst->buf += 16 * dst->stride;
    1269           0 :     mask_16x16 >>= 16;
    1270           0 :     mask_8x8 >>= 16;
    1271           0 :     mask_4x4 >>= 16;
    1272           0 :     mask_4x4_int >>= 16;
    1273             :   }
    1274             : 
    1275             :   // Horizontal pass
    1276           0 :   dst->buf = dst0;
    1277           0 :   mask_16x16 = lfm->above_y[TX_16X16];
    1278           0 :   mask_8x8 = lfm->above_y[TX_8X8];
    1279           0 :   mask_4x4 = lfm->above_y[TX_4X4];
    1280           0 :   mask_4x4_int = lfm->int_4x4_y;
    1281             : 
    1282           0 :   for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r++) {
    1283             :     unsigned int mask_16x16_r;
    1284             :     unsigned int mask_8x8_r;
    1285             :     unsigned int mask_4x4_r;
    1286             : 
    1287           0 :     if (mi_row + r == 0) {
    1288           0 :       mask_16x16_r = 0;
    1289           0 :       mask_8x8_r = 0;
    1290           0 :       mask_4x4_r = 0;
    1291             :     } else {
    1292           0 :       mask_16x16_r = mask_16x16 & 0xff;
    1293           0 :       mask_8x8_r = mask_8x8 & 0xff;
    1294           0 :       mask_4x4_r = mask_4x4 & 0xff;
    1295             :     }
    1296             : 
    1297             : #if CONFIG_VP9_HIGHBITDEPTH
    1298             :     if (cm->use_highbitdepth) {
    1299             :       highbd_filter_selectively_horiz(
    1300             :           CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r,
    1301             :           mask_4x4_r, mask_4x4_int & 0xff, cm->lf_info.lfthr,
    1302             :           &lfm->lfl_y[r << 3], (int)cm->bit_depth);
    1303             :     } else {
    1304             : #endif  // CONFIG_VP9_HIGHBITDEPTH
    1305           0 :       filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
    1306             :                                mask_4x4_r, mask_4x4_int & 0xff,
    1307           0 :                                cm->lf_info.lfthr, &lfm->lfl_y[r << 3]);
    1308             : #if CONFIG_VP9_HIGHBITDEPTH
    1309             :     }
    1310             : #endif  // CONFIG_VP9_HIGHBITDEPTH
    1311             : 
    1312           0 :     dst->buf += 8 * dst->stride;
    1313           0 :     mask_16x16 >>= 8;
    1314           0 :     mask_8x8 >>= 8;
    1315           0 :     mask_4x4 >>= 8;
    1316           0 :     mask_4x4_int >>= 8;
    1317             :   }
    1318           0 : }
    1319             : 
    1320           0 : void vp9_filter_block_plane_ss11(VP9_COMMON *const cm,
    1321             :                                  struct macroblockd_plane *const plane,
    1322             :                                  int mi_row, LOOP_FILTER_MASK *lfm) {
    1323           0 :   struct buf_2d *const dst = &plane->dst;
    1324           0 :   uint8_t *const dst0 = dst->buf;
    1325             :   int r, c;
    1326             :   uint8_t lfl_uv[16];
    1327             : 
    1328           0 :   uint16_t mask_16x16 = lfm->left_uv[TX_16X16];
    1329           0 :   uint16_t mask_8x8 = lfm->left_uv[TX_8X8];
    1330           0 :   uint16_t mask_4x4 = lfm->left_uv[TX_4X4];
    1331           0 :   uint16_t mask_4x4_int = lfm->int_4x4_uv;
    1332             : 
    1333           0 :   assert(plane->subsampling_x == 1 && plane->subsampling_y == 1);
    1334             : 
    1335             :   // Vertical pass: do 2 rows at one time
    1336           0 :   for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 4) {
    1337           0 :     for (c = 0; c < (MI_BLOCK_SIZE >> 1); c++) {
    1338           0 :       lfl_uv[(r << 1) + c] = lfm->lfl_y[(r << 3) + (c << 1)];
    1339           0 :       lfl_uv[((r + 2) << 1) + c] = lfm->lfl_y[((r + 2) << 3) + (c << 1)];
    1340             :     }
    1341             : 
    1342             : #if CONFIG_VP9_HIGHBITDEPTH
    1343             :     if (cm->use_highbitdepth) {
    1344             :       // Disable filtering on the leftmost column.
    1345             :       highbd_filter_selectively_vert_row2(
    1346             :           plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
    1347             :           (unsigned int)mask_16x16, (unsigned int)mask_8x8,
    1348             :           (unsigned int)mask_4x4, (unsigned int)mask_4x4_int, cm->lf_info.lfthr,
    1349             :           &lfl_uv[r << 1], (int)cm->bit_depth);
    1350             :     } else {
    1351             : #endif  // CONFIG_VP9_HIGHBITDEPTH
    1352             :       // Disable filtering on the leftmost column.
    1353           0 :       filter_selectively_vert_row2(
    1354             :           plane->subsampling_x, dst->buf, dst->stride, (unsigned int)mask_16x16,
    1355             :           (unsigned int)mask_8x8, (unsigned int)mask_4x4,
    1356           0 :           (unsigned int)mask_4x4_int, cm->lf_info.lfthr, &lfl_uv[r << 1]);
    1357             : #if CONFIG_VP9_HIGHBITDEPTH
    1358             :     }
    1359             : #endif  // CONFIG_VP9_HIGHBITDEPTH
    1360             : 
    1361           0 :     dst->buf += 16 * dst->stride;
    1362           0 :     mask_16x16 >>= 8;
    1363           0 :     mask_8x8 >>= 8;
    1364           0 :     mask_4x4 >>= 8;
    1365           0 :     mask_4x4_int >>= 8;
    1366             :   }
    1367             : 
    1368             :   // Horizontal pass
    1369           0 :   dst->buf = dst0;
    1370           0 :   mask_16x16 = lfm->above_uv[TX_16X16];
    1371           0 :   mask_8x8 = lfm->above_uv[TX_8X8];
    1372           0 :   mask_4x4 = lfm->above_uv[TX_4X4];
    1373           0 :   mask_4x4_int = lfm->int_4x4_uv;
    1374             : 
    1375           0 :   for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) {
    1376           0 :     const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1;
    1377           0 :     const unsigned int mask_4x4_int_r =
    1378           0 :         skip_border_4x4_r ? 0 : (mask_4x4_int & 0xf);
    1379             :     unsigned int mask_16x16_r;
    1380             :     unsigned int mask_8x8_r;
    1381             :     unsigned int mask_4x4_r;
    1382             : 
    1383           0 :     if (mi_row + r == 0) {
    1384           0 :       mask_16x16_r = 0;
    1385           0 :       mask_8x8_r = 0;
    1386           0 :       mask_4x4_r = 0;
    1387             :     } else {
    1388           0 :       mask_16x16_r = mask_16x16 & 0xf;
    1389           0 :       mask_8x8_r = mask_8x8 & 0xf;
    1390           0 :       mask_4x4_r = mask_4x4 & 0xf;
    1391             :     }
    1392             : 
    1393             : #if CONFIG_VP9_HIGHBITDEPTH
    1394             :     if (cm->use_highbitdepth) {
    1395             :       highbd_filter_selectively_horiz(
    1396             :           CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r,
    1397             :           mask_4x4_r, mask_4x4_int_r, cm->lf_info.lfthr, &lfl_uv[r << 1],
    1398             :           (int)cm->bit_depth);
    1399             :     } else {
    1400             : #endif  // CONFIG_VP9_HIGHBITDEPTH
    1401           0 :       filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
    1402           0 :                                mask_4x4_r, mask_4x4_int_r, cm->lf_info.lfthr,
    1403           0 :                                &lfl_uv[r << 1]);
    1404             : #if CONFIG_VP9_HIGHBITDEPTH
    1405             :     }
    1406             : #endif  // CONFIG_VP9_HIGHBITDEPTH
    1407             : 
    1408           0 :     dst->buf += 8 * dst->stride;
    1409           0 :     mask_16x16 >>= 4;
    1410           0 :     mask_8x8 >>= 4;
    1411           0 :     mask_4x4 >>= 4;
    1412           0 :     mask_4x4_int >>= 4;
    1413             :   }
    1414           0 : }
    1415             : 
    1416           0 : static void loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, VP9_COMMON *cm,
    1417             :                              struct macroblockd_plane planes[MAX_MB_PLANE],
    1418             :                              int start, int stop, int y_only) {
    1419           0 :   const int num_planes = y_only ? 1 : MAX_MB_PLANE;
    1420             :   enum lf_path path;
    1421             :   int mi_row, mi_col;
    1422             : 
    1423           0 :   if (y_only)
    1424           0 :     path = LF_PATH_444;
    1425           0 :   else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1)
    1426           0 :     path = LF_PATH_420;
    1427           0 :   else if (planes[1].subsampling_y == 0 && planes[1].subsampling_x == 0)
    1428           0 :     path = LF_PATH_444;
    1429             :   else
    1430           0 :     path = LF_PATH_SLOW;
    1431             : 
    1432           0 :   for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) {
    1433           0 :     MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
    1434           0 :     LOOP_FILTER_MASK *lfm = get_lfm(&cm->lf, mi_row, 0);
    1435             : 
    1436           0 :     for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE, ++lfm) {
    1437             :       int plane;
    1438             : 
    1439           0 :       vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
    1440             : 
    1441             :       // TODO(jimbankoski): For 444 only need to do y mask.
    1442           0 :       vp9_adjust_mask(cm, mi_row, mi_col, lfm);
    1443             : 
    1444           0 :       vp9_filter_block_plane_ss00(cm, &planes[0], mi_row, lfm);
    1445           0 :       for (plane = 1; plane < num_planes; ++plane) {
    1446           0 :         switch (path) {
    1447             :           case LF_PATH_420:
    1448           0 :             vp9_filter_block_plane_ss11(cm, &planes[plane], mi_row, lfm);
    1449           0 :             break;
    1450             :           case LF_PATH_444:
    1451           0 :             vp9_filter_block_plane_ss00(cm, &planes[plane], mi_row, lfm);
    1452           0 :             break;
    1453             :           case LF_PATH_SLOW:
    1454           0 :             vp9_filter_block_plane_non420(cm, &planes[plane], mi + mi_col,
    1455             :                                           mi_row, mi_col);
    1456           0 :             break;
    1457             :         }
    1458             :       }
    1459             :     }
    1460             :   }
    1461           0 : }
    1462             : 
    1463           0 : void vp9_loop_filter_frame(YV12_BUFFER_CONFIG *frame, VP9_COMMON *cm,
    1464             :                            MACROBLOCKD *xd, int frame_filter_level, int y_only,
    1465             :                            int partial_frame) {
    1466             :   int start_mi_row, end_mi_row, mi_rows_to_filter;
    1467           0 :   if (!frame_filter_level) return;
    1468           0 :   start_mi_row = 0;
    1469           0 :   mi_rows_to_filter = cm->mi_rows;
    1470           0 :   if (partial_frame && cm->mi_rows > 8) {
    1471           0 :     start_mi_row = cm->mi_rows >> 1;
    1472           0 :     start_mi_row &= 0xfffffff8;
    1473           0 :     mi_rows_to_filter = VPXMAX(cm->mi_rows / 8, 8);
    1474             :   }
    1475           0 :   end_mi_row = start_mi_row + mi_rows_to_filter;
    1476           0 :   loop_filter_rows(frame, cm, xd->plane, start_mi_row, end_mi_row, y_only);
    1477             : }
    1478             : 
    1479             : // Used by the encoder to build the loopfilter masks.
    1480             : // TODO(slavarnway): Do the encoder the same way the decoder does it and
    1481             : //                   build the masks in line as part of the encode process.
    1482           0 : void vp9_build_mask_frame(VP9_COMMON *cm, int frame_filter_level,
    1483             :                           int partial_frame) {
    1484             :   int start_mi_row, end_mi_row, mi_rows_to_filter;
    1485             :   int mi_col, mi_row;
    1486           0 :   if (!frame_filter_level) return;
    1487           0 :   start_mi_row = 0;
    1488           0 :   mi_rows_to_filter = cm->mi_rows;
    1489           0 :   if (partial_frame && cm->mi_rows > 8) {
    1490           0 :     start_mi_row = cm->mi_rows >> 1;
    1491           0 :     start_mi_row &= 0xfffffff8;
    1492           0 :     mi_rows_to_filter = VPXMAX(cm->mi_rows / 8, 8);
    1493             :   }
    1494           0 :   end_mi_row = start_mi_row + mi_rows_to_filter;
    1495             : 
    1496           0 :   vp9_loop_filter_frame_init(cm, frame_filter_level);
    1497             : 
    1498           0 :   for (mi_row = start_mi_row; mi_row < end_mi_row; mi_row += MI_BLOCK_SIZE) {
    1499           0 :     MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
    1500           0 :     for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
    1501             :       // vp9_setup_mask() zeros lfm
    1502           0 :       vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride,
    1503           0 :                      get_lfm(&cm->lf, mi_row, mi_col));
    1504             :     }
    1505             :   }
    1506             : }
    1507             : 
    1508             : // 8x8 blocks in a superblock.  A "1" represents the first block in a 16x16
    1509             : // or greater area.
    1510             : static const uint8_t first_block_in_16x16[8][8] = {
    1511             :   { 1, 0, 1, 0, 1, 0, 1, 0 }, { 0, 0, 0, 0, 0, 0, 0, 0 },
    1512             :   { 1, 0, 1, 0, 1, 0, 1, 0 }, { 0, 0, 0, 0, 0, 0, 0, 0 },
    1513             :   { 1, 0, 1, 0, 1, 0, 1, 0 }, { 0, 0, 0, 0, 0, 0, 0, 0 },
    1514             :   { 1, 0, 1, 0, 1, 0, 1, 0 }, { 0, 0, 0, 0, 0, 0, 0, 0 }
    1515             : };
    1516             : 
    1517             : // This function sets up the bit masks for a block represented
    1518             : // by mi_row, mi_col in a 64x64 region.
    1519             : // TODO(SJL): This function only works for yv12.
    1520           0 : void vp9_build_mask(VP9_COMMON *cm, const MODE_INFO *mi, int mi_row, int mi_col,
    1521             :                     int bw, int bh) {
    1522           0 :   const BLOCK_SIZE block_size = mi->sb_type;
    1523           0 :   const TX_SIZE tx_size_y = mi->tx_size;
    1524           0 :   const loop_filter_info_n *const lfi_n = &cm->lf_info;
    1525           0 :   const int filter_level = get_filter_level(lfi_n, mi);
    1526           0 :   const TX_SIZE tx_size_uv = uv_txsize_lookup[block_size][tx_size_y][1][1];
    1527           0 :   LOOP_FILTER_MASK *const lfm = get_lfm(&cm->lf, mi_row, mi_col);
    1528           0 :   uint64_t *const left_y = &lfm->left_y[tx_size_y];
    1529           0 :   uint64_t *const above_y = &lfm->above_y[tx_size_y];
    1530           0 :   uint64_t *const int_4x4_y = &lfm->int_4x4_y;
    1531           0 :   uint16_t *const left_uv = &lfm->left_uv[tx_size_uv];
    1532           0 :   uint16_t *const above_uv = &lfm->above_uv[tx_size_uv];
    1533           0 :   uint16_t *const int_4x4_uv = &lfm->int_4x4_uv;
    1534           0 :   const int row_in_sb = (mi_row & 7);
    1535           0 :   const int col_in_sb = (mi_col & 7);
    1536           0 :   const int shift_y = col_in_sb + (row_in_sb << 3);
    1537           0 :   const int shift_uv = (col_in_sb >> 1) + ((row_in_sb >> 1) << 2);
    1538           0 :   const int build_uv = first_block_in_16x16[row_in_sb][col_in_sb];
    1539             : 
    1540           0 :   if (!filter_level) {
    1541           0 :     return;
    1542             :   } else {
    1543           0 :     int index = shift_y;
    1544             :     int i;
    1545           0 :     for (i = 0; i < bh; i++) {
    1546           0 :       memset(&lfm->lfl_y[index], filter_level, bw);
    1547           0 :       index += 8;
    1548             :     }
    1549             :   }
    1550             : 
    1551             :   // These set 1 in the current block size for the block size edges.
    1552             :   // For instance if the block size is 32x16, we'll set:
    1553             :   //    above =   1111
    1554             :   //              0000
    1555             :   //    and
    1556             :   //    left  =   1000
    1557             :   //          =   1000
    1558             :   // NOTE : In this example the low bit is left most ( 1000 ) is stored as
    1559             :   //        1,  not 8...
    1560             :   //
    1561             :   // U and V set things on a 16 bit scale.
    1562             :   //
    1563           0 :   *above_y |= above_prediction_mask[block_size] << shift_y;
    1564           0 :   *left_y |= left_prediction_mask[block_size] << shift_y;
    1565             : 
    1566           0 :   if (build_uv) {
    1567           0 :     *above_uv |= above_prediction_mask_uv[block_size] << shift_uv;
    1568           0 :     *left_uv |= left_prediction_mask_uv[block_size] << shift_uv;
    1569             :   }
    1570             : 
    1571             :   // If the block has no coefficients and is not intra we skip applying
    1572             :   // the loop filter on block edges.
    1573           0 :   if (mi->skip && is_inter_block(mi)) return;
    1574             : 
    1575             :   // Add a mask for the transform size. The transform size mask is set to
    1576             :   // be correct for a 64x64 prediction block size. Mask to match the size of
    1577             :   // the block we are working on and then shift it into place.
    1578           0 :   *above_y |= (size_mask[block_size] & above_64x64_txform_mask[tx_size_y])
    1579           0 :               << shift_y;
    1580           0 :   *left_y |= (size_mask[block_size] & left_64x64_txform_mask[tx_size_y])
    1581           0 :              << shift_y;
    1582             : 
    1583           0 :   if (build_uv) {
    1584           0 :     *above_uv |=
    1585           0 :         (size_mask_uv[block_size] & above_64x64_txform_mask_uv[tx_size_uv])
    1586           0 :         << shift_uv;
    1587             : 
    1588           0 :     *left_uv |=
    1589           0 :         (size_mask_uv[block_size] & left_64x64_txform_mask_uv[tx_size_uv])
    1590           0 :         << shift_uv;
    1591             :   }
    1592             : 
    1593             :   // Try to determine what to do with the internal 4x4 block boundaries.  These
    1594             :   // differ from the 4x4 boundaries on the outside edge of an 8x8 in that the
    1595             :   // internal ones can be skipped and don't depend on the prediction block size.
    1596           0 :   if (tx_size_y == TX_4X4) *int_4x4_y |= size_mask[block_size] << shift_y;
    1597             : 
    1598           0 :   if (build_uv && tx_size_uv == TX_4X4)
    1599           0 :     *int_4x4_uv |= (size_mask_uv[block_size] & 0xffff) << shift_uv;
    1600             : }
    1601             : 
    1602           0 : void vp9_loop_filter_data_reset(
    1603             :     LFWorkerData *lf_data, YV12_BUFFER_CONFIG *frame_buffer,
    1604             :     struct VP9Common *cm, const struct macroblockd_plane planes[MAX_MB_PLANE]) {
    1605           0 :   lf_data->frame_buffer = frame_buffer;
    1606           0 :   lf_data->cm = cm;
    1607           0 :   lf_data->start = 0;
    1608           0 :   lf_data->stop = 0;
    1609           0 :   lf_data->y_only = 0;
    1610           0 :   memcpy(lf_data->planes, planes, sizeof(lf_data->planes));
    1611           0 : }
    1612             : 
    1613           0 : void vp9_reset_lfm(VP9_COMMON *const cm) {
    1614           0 :   if (cm->lf.filter_level) {
    1615           0 :     memset(cm->lf.lfm, 0, ((cm->mi_rows + (MI_BLOCK_SIZE - 1)) >> 3) *
    1616           0 :                               cm->lf.lfm_stride * sizeof(*cm->lf.lfm));
    1617             :   }
    1618           0 : }
    1619             : 
    1620           0 : int vp9_loop_filter_worker(LFWorkerData *const lf_data, void *unused) {
    1621             :   (void)unused;
    1622           0 :   loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes,
    1623             :                    lf_data->start, lf_data->stop, lf_data->y_only);
    1624           0 :   return 1;
    1625             : }

Generated by: LCOV version 1.13