LCOV - code coverage report
Current view: top level - media/libvpx/libvpx/vp9/common - vp9_mfqe.c (source / functions) Hit Total Coverage
Test: output.info Lines: 0 225 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 16 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
       3             :  *
       4             :  *  Use of this source code is governed by a BSD-style license
       5             :  *  that can be found in the LICENSE file in the root of the source
       6             :  *  tree. An additional intellectual property rights grant can be found
       7             :  *  in the file PATENTS.  All contributing project authors may
       8             :  *  be found in the AUTHORS file in the root of the source tree.
       9             :  */
      10             : 
      11             : #include "./vpx_config.h"
      12             : #include "./vp9_rtcd.h"
      13             : #include "./vpx_dsp_rtcd.h"
      14             : #include "./vpx_scale_rtcd.h"
      15             : 
      16             : #include "vp9/common/vp9_onyxc_int.h"
      17             : #include "vp9/common/vp9_postproc.h"
      18             : 
      19             : // TODO(jackychen): Replace this function with SSE2 code. There is
      20             : // one SSE2 implementation in vp8, so will consider how to share it
      21             : // between vp8 and vp9.
      22           0 : static void filter_by_weight(const uint8_t *src, int src_stride, uint8_t *dst,
      23             :                              int dst_stride, int block_size, int src_weight) {
      24           0 :   const int dst_weight = (1 << MFQE_PRECISION) - src_weight;
      25           0 :   const int rounding_bit = 1 << (MFQE_PRECISION - 1);
      26             :   int r, c;
      27             : 
      28           0 :   for (r = 0; r < block_size; r++) {
      29           0 :     for (c = 0; c < block_size; c++) {
      30           0 :       dst[c] = (src[c] * src_weight + dst[c] * dst_weight + rounding_bit) >>
      31             :                MFQE_PRECISION;
      32             :     }
      33           0 :     src += src_stride;
      34           0 :     dst += dst_stride;
      35             :   }
      36           0 : }
      37             : 
      38           0 : void vp9_filter_by_weight8x8_c(const uint8_t *src, int src_stride, uint8_t *dst,
      39             :                                int dst_stride, int src_weight) {
      40           0 :   filter_by_weight(src, src_stride, dst, dst_stride, 8, src_weight);
      41           0 : }
      42             : 
      43           0 : void vp9_filter_by_weight16x16_c(const uint8_t *src, int src_stride,
      44             :                                  uint8_t *dst, int dst_stride, int src_weight) {
      45           0 :   filter_by_weight(src, src_stride, dst, dst_stride, 16, src_weight);
      46           0 : }
      47             : 
      48           0 : static void filter_by_weight32x32(const uint8_t *src, int src_stride,
      49             :                                   uint8_t *dst, int dst_stride, int weight) {
      50           0 :   vp9_filter_by_weight16x16(src, src_stride, dst, dst_stride, weight);
      51           0 :   vp9_filter_by_weight16x16(src + 16, src_stride, dst + 16, dst_stride, weight);
      52           0 :   vp9_filter_by_weight16x16(src + src_stride * 16, src_stride,
      53           0 :                             dst + dst_stride * 16, dst_stride, weight);
      54           0 :   vp9_filter_by_weight16x16(src + src_stride * 16 + 16, src_stride,
      55           0 :                             dst + dst_stride * 16 + 16, dst_stride, weight);
      56           0 : }
      57             : 
      58           0 : static void filter_by_weight64x64(const uint8_t *src, int src_stride,
      59             :                                   uint8_t *dst, int dst_stride, int weight) {
      60           0 :   filter_by_weight32x32(src, src_stride, dst, dst_stride, weight);
      61           0 :   filter_by_weight32x32(src + 32, src_stride, dst + 32, dst_stride, weight);
      62           0 :   filter_by_weight32x32(src + src_stride * 32, src_stride,
      63           0 :                         dst + dst_stride * 32, dst_stride, weight);
      64           0 :   filter_by_weight32x32(src + src_stride * 32 + 32, src_stride,
      65           0 :                         dst + dst_stride * 32 + 32, dst_stride, weight);
      66           0 : }
      67             : 
      68           0 : static void apply_ifactor(const uint8_t *y, int y_stride, uint8_t *yd,
      69             :                           int yd_stride, const uint8_t *u, const uint8_t *v,
      70             :                           int uv_stride, uint8_t *ud, uint8_t *vd,
      71             :                           int uvd_stride, BLOCK_SIZE block_size, int weight) {
      72           0 :   if (block_size == BLOCK_16X16) {
      73           0 :     vp9_filter_by_weight16x16(y, y_stride, yd, yd_stride, weight);
      74           0 :     vp9_filter_by_weight8x8(u, uv_stride, ud, uvd_stride, weight);
      75           0 :     vp9_filter_by_weight8x8(v, uv_stride, vd, uvd_stride, weight);
      76           0 :   } else if (block_size == BLOCK_32X32) {
      77           0 :     filter_by_weight32x32(y, y_stride, yd, yd_stride, weight);
      78           0 :     vp9_filter_by_weight16x16(u, uv_stride, ud, uvd_stride, weight);
      79           0 :     vp9_filter_by_weight16x16(v, uv_stride, vd, uvd_stride, weight);
      80           0 :   } else if (block_size == BLOCK_64X64) {
      81           0 :     filter_by_weight64x64(y, y_stride, yd, yd_stride, weight);
      82           0 :     filter_by_weight32x32(u, uv_stride, ud, uvd_stride, weight);
      83           0 :     filter_by_weight32x32(v, uv_stride, vd, uvd_stride, weight);
      84             :   }
      85           0 : }
      86             : 
      87             : // TODO(jackychen): Determine whether replace it with assembly code.
      88           0 : static void copy_mem8x8(const uint8_t *src, int src_stride, uint8_t *dst,
      89             :                         int dst_stride) {
      90             :   int r;
      91           0 :   for (r = 0; r < 8; r++) {
      92           0 :     memcpy(dst, src, 8);
      93           0 :     src += src_stride;
      94           0 :     dst += dst_stride;
      95             :   }
      96           0 : }
      97             : 
      98           0 : static void copy_mem16x16(const uint8_t *src, int src_stride, uint8_t *dst,
      99             :                           int dst_stride) {
     100             :   int r;
     101           0 :   for (r = 0; r < 16; r++) {
     102           0 :     memcpy(dst, src, 16);
     103           0 :     src += src_stride;
     104           0 :     dst += dst_stride;
     105             :   }
     106           0 : }
     107             : 
     108           0 : static void copy_mem32x32(const uint8_t *src, int src_stride, uint8_t *dst,
     109             :                           int dst_stride) {
     110           0 :   copy_mem16x16(src, src_stride, dst, dst_stride);
     111           0 :   copy_mem16x16(src + 16, src_stride, dst + 16, dst_stride);
     112           0 :   copy_mem16x16(src + src_stride * 16, src_stride, dst + dst_stride * 16,
     113             :                 dst_stride);
     114           0 :   copy_mem16x16(src + src_stride * 16 + 16, src_stride,
     115           0 :                 dst + dst_stride * 16 + 16, dst_stride);
     116           0 : }
     117             : 
     118           0 : static void copy_mem64x64(const uint8_t *src, int src_stride, uint8_t *dst,
     119             :                           int dst_stride) {
     120           0 :   copy_mem32x32(src, src_stride, dst, dst_stride);
     121           0 :   copy_mem32x32(src + 32, src_stride, dst + 32, dst_stride);
     122           0 :   copy_mem32x32(src + src_stride * 32, src_stride, dst + src_stride * 32,
     123             :                 dst_stride);
     124           0 :   copy_mem32x32(src + src_stride * 32 + 32, src_stride,
     125           0 :                 dst + src_stride * 32 + 32, dst_stride);
     126           0 : }
     127             : 
     128           0 : static void copy_block(const uint8_t *y, const uint8_t *u, const uint8_t *v,
     129             :                        int y_stride, int uv_stride, uint8_t *yd, uint8_t *ud,
     130             :                        uint8_t *vd, int yd_stride, int uvd_stride,
     131             :                        BLOCK_SIZE bs) {
     132           0 :   if (bs == BLOCK_16X16) {
     133           0 :     copy_mem16x16(y, y_stride, yd, yd_stride);
     134           0 :     copy_mem8x8(u, uv_stride, ud, uvd_stride);
     135           0 :     copy_mem8x8(v, uv_stride, vd, uvd_stride);
     136           0 :   } else if (bs == BLOCK_32X32) {
     137           0 :     copy_mem32x32(y, y_stride, yd, yd_stride);
     138           0 :     copy_mem16x16(u, uv_stride, ud, uvd_stride);
     139           0 :     copy_mem16x16(v, uv_stride, vd, uvd_stride);
     140             :   } else {
     141           0 :     copy_mem64x64(y, y_stride, yd, yd_stride);
     142           0 :     copy_mem32x32(u, uv_stride, ud, uvd_stride);
     143           0 :     copy_mem32x32(v, uv_stride, vd, uvd_stride);
     144             :   }
     145           0 : }
     146             : 
     147           0 : static void get_thr(BLOCK_SIZE bs, int qdiff, int *sad_thr, int *vdiff_thr) {
     148           0 :   const int adj = qdiff >> MFQE_PRECISION;
     149           0 :   if (bs == BLOCK_16X16) {
     150           0 :     *sad_thr = 7 + adj;
     151           0 :   } else if (bs == BLOCK_32X32) {
     152           0 :     *sad_thr = 6 + adj;
     153             :   } else {  // BLOCK_64X64
     154           0 :     *sad_thr = 5 + adj;
     155             :   }
     156           0 :   *vdiff_thr = 125 + qdiff;
     157           0 : }
     158             : 
     159           0 : static void mfqe_block(BLOCK_SIZE bs, const uint8_t *y, const uint8_t *u,
     160             :                        const uint8_t *v, int y_stride, int uv_stride,
     161             :                        uint8_t *yd, uint8_t *ud, uint8_t *vd, int yd_stride,
     162             :                        int uvd_stride, int qdiff) {
     163             :   int sad, sad_thr, vdiff, vdiff_thr;
     164             :   uint32_t sse;
     165             : 
     166           0 :   get_thr(bs, qdiff, &sad_thr, &vdiff_thr);
     167             : 
     168           0 :   if (bs == BLOCK_16X16) {
     169           0 :     vdiff = (vpx_variance16x16(y, y_stride, yd, yd_stride, &sse) + 128) >> 8;
     170           0 :     sad = (vpx_sad16x16(y, y_stride, yd, yd_stride) + 128) >> 8;
     171           0 :   } else if (bs == BLOCK_32X32) {
     172           0 :     vdiff = (vpx_variance32x32(y, y_stride, yd, yd_stride, &sse) + 512) >> 10;
     173           0 :     sad = (vpx_sad32x32(y, y_stride, yd, yd_stride) + 512) >> 10;
     174             :   } else /* if (bs == BLOCK_64X64) */ {
     175           0 :     vdiff = (vpx_variance64x64(y, y_stride, yd, yd_stride, &sse) + 2048) >> 12;
     176           0 :     sad = (vpx_sad64x64(y, y_stride, yd, yd_stride) + 2048) >> 12;
     177             :   }
     178             : 
     179             :   // vdiff > sad * 3 means vdiff should not be too small, otherwise,
     180             :   // it might be a lighting change in smooth area. When there is a
     181             :   // lighting change in smooth area, it is dangerous to do MFQE.
     182           0 :   if (sad > 1 && vdiff > sad * 3) {
     183           0 :     const int weight = 1 << MFQE_PRECISION;
     184           0 :     int ifactor = weight * sad * vdiff / (sad_thr * vdiff_thr);
     185             :     // When ifactor equals weight, no MFQE is done.
     186           0 :     if (ifactor > weight) {
     187           0 :       ifactor = weight;
     188             :     }
     189           0 :     apply_ifactor(y, y_stride, yd, yd_stride, u, v, uv_stride, ud, vd,
     190             :                   uvd_stride, bs, ifactor);
     191             :   } else {
     192             :     // Copy the block from current frame (i.e., no mfqe is done).
     193           0 :     copy_block(y, u, v, y_stride, uv_stride, yd, ud, vd, yd_stride, uvd_stride,
     194             :                bs);
     195             :   }
     196           0 : }
     197             : 
     198           0 : static int mfqe_decision(MODE_INFO *mi, BLOCK_SIZE cur_bs) {
     199             :   // Check the motion in current block(for inter frame),
     200             :   // or check the motion in the correlated block in last frame (for keyframe).
     201           0 :   const int mv_len_square = mi->mv[0].as_mv.row * mi->mv[0].as_mv.row +
     202           0 :                             mi->mv[0].as_mv.col * mi->mv[0].as_mv.col;
     203           0 :   const int mv_threshold = 100;
     204           0 :   return mi->mode >= NEARESTMV &&  // Not an intra block
     205           0 :          cur_bs >= BLOCK_16X16 && mv_len_square <= mv_threshold;
     206             : }
     207             : 
     208             : // Process each partiton in a super block, recursively.
     209           0 : static void mfqe_partition(VP9_COMMON *cm, MODE_INFO *mi, BLOCK_SIZE bs,
     210             :                            const uint8_t *y, const uint8_t *u, const uint8_t *v,
     211             :                            int y_stride, int uv_stride, uint8_t *yd,
     212             :                            uint8_t *ud, uint8_t *vd, int yd_stride,
     213             :                            int uvd_stride) {
     214             :   int mi_offset, y_offset, uv_offset;
     215           0 :   const BLOCK_SIZE cur_bs = mi->sb_type;
     216           0 :   const int qdiff = cm->base_qindex - cm->postproc_state.last_base_qindex;
     217           0 :   const int bsl = b_width_log2_lookup[bs];
     218           0 :   PARTITION_TYPE partition = partition_lookup[bsl][cur_bs];
     219           0 :   const BLOCK_SIZE subsize = get_subsize(bs, partition);
     220             : 
     221           0 :   if (cur_bs < BLOCK_8X8) {
     222             :     // If there are blocks smaller than 8x8, it must be on the boundary.
     223           0 :     return;
     224             :   }
     225             :   // No MFQE on blocks smaller than 16x16
     226           0 :   if (bs == BLOCK_16X16) {
     227           0 :     partition = PARTITION_NONE;
     228             :   }
     229           0 :   if (bs == BLOCK_64X64) {
     230           0 :     mi_offset = 4;
     231           0 :     y_offset = 32;
     232           0 :     uv_offset = 16;
     233             :   } else {
     234           0 :     mi_offset = 2;
     235           0 :     y_offset = 16;
     236           0 :     uv_offset = 8;
     237             :   }
     238           0 :   switch (partition) {
     239             :     BLOCK_SIZE mfqe_bs, bs_tmp;
     240             :     case PARTITION_HORZ:
     241           0 :       if (bs == BLOCK_64X64) {
     242           0 :         mfqe_bs = BLOCK_64X32;
     243           0 :         bs_tmp = BLOCK_32X32;
     244             :       } else {
     245           0 :         mfqe_bs = BLOCK_32X16;
     246           0 :         bs_tmp = BLOCK_16X16;
     247             :       }
     248           0 :       if (mfqe_decision(mi, mfqe_bs)) {
     249             :         // Do mfqe on the first square partition.
     250           0 :         mfqe_block(bs_tmp, y, u, v, y_stride, uv_stride, yd, ud, vd, yd_stride,
     251             :                    uvd_stride, qdiff);
     252             :         // Do mfqe on the second square partition.
     253           0 :         mfqe_block(bs_tmp, y + y_offset, u + uv_offset, v + uv_offset, y_stride,
     254             :                    uv_stride, yd + y_offset, ud + uv_offset, vd + uv_offset,
     255             :                    yd_stride, uvd_stride, qdiff);
     256             :       }
     257           0 :       if (mfqe_decision(mi + mi_offset * cm->mi_stride, mfqe_bs)) {
     258             :         // Do mfqe on the first square partition.
     259           0 :         mfqe_block(bs_tmp, y + y_offset * y_stride, u + uv_offset * uv_stride,
     260           0 :                    v + uv_offset * uv_stride, y_stride, uv_stride,
     261           0 :                    yd + y_offset * yd_stride, ud + uv_offset * uvd_stride,
     262           0 :                    vd + uv_offset * uvd_stride, yd_stride, uvd_stride, qdiff);
     263             :         // Do mfqe on the second square partition.
     264           0 :         mfqe_block(bs_tmp, y + y_offset * y_stride + y_offset,
     265           0 :                    u + uv_offset * uv_stride + uv_offset,
     266           0 :                    v + uv_offset * uv_stride + uv_offset, y_stride, uv_stride,
     267           0 :                    yd + y_offset * yd_stride + y_offset,
     268           0 :                    ud + uv_offset * uvd_stride + uv_offset,
     269           0 :                    vd + uv_offset * uvd_stride + uv_offset, yd_stride,
     270             :                    uvd_stride, qdiff);
     271             :       }
     272           0 :       break;
     273             :     case PARTITION_VERT:
     274           0 :       if (bs == BLOCK_64X64) {
     275           0 :         mfqe_bs = BLOCK_32X64;
     276           0 :         bs_tmp = BLOCK_32X32;
     277             :       } else {
     278           0 :         mfqe_bs = BLOCK_16X32;
     279           0 :         bs_tmp = BLOCK_16X16;
     280             :       }
     281           0 :       if (mfqe_decision(mi, mfqe_bs)) {
     282             :         // Do mfqe on the first square partition.
     283           0 :         mfqe_block(bs_tmp, y, u, v, y_stride, uv_stride, yd, ud, vd, yd_stride,
     284             :                    uvd_stride, qdiff);
     285             :         // Do mfqe on the second square partition.
     286           0 :         mfqe_block(bs_tmp, y + y_offset * y_stride, u + uv_offset * uv_stride,
     287           0 :                    v + uv_offset * uv_stride, y_stride, uv_stride,
     288           0 :                    yd + y_offset * yd_stride, ud + uv_offset * uvd_stride,
     289           0 :                    vd + uv_offset * uvd_stride, yd_stride, uvd_stride, qdiff);
     290             :       }
     291           0 :       if (mfqe_decision(mi + mi_offset, mfqe_bs)) {
     292             :         // Do mfqe on the first square partition.
     293           0 :         mfqe_block(bs_tmp, y + y_offset, u + uv_offset, v + uv_offset, y_stride,
     294             :                    uv_stride, yd + y_offset, ud + uv_offset, vd + uv_offset,
     295             :                    yd_stride, uvd_stride, qdiff);
     296             :         // Do mfqe on the second square partition.
     297           0 :         mfqe_block(bs_tmp, y + y_offset * y_stride + y_offset,
     298           0 :                    u + uv_offset * uv_stride + uv_offset,
     299           0 :                    v + uv_offset * uv_stride + uv_offset, y_stride, uv_stride,
     300           0 :                    yd + y_offset * yd_stride + y_offset,
     301           0 :                    ud + uv_offset * uvd_stride + uv_offset,
     302           0 :                    vd + uv_offset * uvd_stride + uv_offset, yd_stride,
     303             :                    uvd_stride, qdiff);
     304             :       }
     305           0 :       break;
     306             :     case PARTITION_NONE:
     307           0 :       if (mfqe_decision(mi, cur_bs)) {
     308             :         // Do mfqe on this partition.
     309           0 :         mfqe_block(cur_bs, y, u, v, y_stride, uv_stride, yd, ud, vd, yd_stride,
     310             :                    uvd_stride, qdiff);
     311             :       } else {
     312             :         // Copy the block from current frame(i.e., no mfqe is done).
     313           0 :         copy_block(y, u, v, y_stride, uv_stride, yd, ud, vd, yd_stride,
     314             :                    uvd_stride, bs);
     315             :       }
     316           0 :       break;
     317             :     case PARTITION_SPLIT:
     318             :       // Recursion on four square partitions, e.g. if bs is 64X64,
     319             :       // then look into four 32X32 blocks in it.
     320           0 :       mfqe_partition(cm, mi, subsize, y, u, v, y_stride, uv_stride, yd, ud, vd,
     321             :                      yd_stride, uvd_stride);
     322           0 :       mfqe_partition(cm, mi + mi_offset, subsize, y + y_offset, u + uv_offset,
     323             :                      v + uv_offset, y_stride, uv_stride, yd + y_offset,
     324             :                      ud + uv_offset, vd + uv_offset, yd_stride, uvd_stride);
     325           0 :       mfqe_partition(cm, mi + mi_offset * cm->mi_stride, subsize,
     326           0 :                      y + y_offset * y_stride, u + uv_offset * uv_stride,
     327           0 :                      v + uv_offset * uv_stride, y_stride, uv_stride,
     328           0 :                      yd + y_offset * yd_stride, ud + uv_offset * uvd_stride,
     329           0 :                      vd + uv_offset * uvd_stride, yd_stride, uvd_stride);
     330           0 :       mfqe_partition(cm, mi + mi_offset * cm->mi_stride + mi_offset, subsize,
     331           0 :                      y + y_offset * y_stride + y_offset,
     332           0 :                      u + uv_offset * uv_stride + uv_offset,
     333           0 :                      v + uv_offset * uv_stride + uv_offset, y_stride, uv_stride,
     334           0 :                      yd + y_offset * yd_stride + y_offset,
     335           0 :                      ud + uv_offset * uvd_stride + uv_offset,
     336           0 :                      vd + uv_offset * uvd_stride + uv_offset, yd_stride,
     337             :                      uvd_stride);
     338           0 :       break;
     339           0 :     default: assert(0);
     340             :   }
     341             : }
     342             : 
     343           0 : void vp9_mfqe(VP9_COMMON *cm) {
     344             :   int mi_row, mi_col;
     345             :   // Current decoded frame.
     346           0 :   const YV12_BUFFER_CONFIG *show = cm->frame_to_show;
     347             :   // Last decoded frame and will store the MFQE result.
     348           0 :   YV12_BUFFER_CONFIG *dest = &cm->post_proc_buffer;
     349             :   // Loop through each super block.
     350           0 :   for (mi_row = 0; mi_row < cm->mi_rows; mi_row += MI_BLOCK_SIZE) {
     351           0 :     for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
     352             :       MODE_INFO *mi;
     353           0 :       MODE_INFO *mi_local = cm->mi + (mi_row * cm->mi_stride + mi_col);
     354             :       // Motion Info in last frame.
     355           0 :       MODE_INFO *mi_prev =
     356           0 :           cm->postproc_state.prev_mi + (mi_row * cm->mi_stride + mi_col);
     357           0 :       const uint32_t y_stride = show->y_stride;
     358           0 :       const uint32_t uv_stride = show->uv_stride;
     359           0 :       const uint32_t yd_stride = dest->y_stride;
     360           0 :       const uint32_t uvd_stride = dest->uv_stride;
     361           0 :       const uint32_t row_offset_y = mi_row << 3;
     362           0 :       const uint32_t row_offset_uv = mi_row << 2;
     363           0 :       const uint32_t col_offset_y = mi_col << 3;
     364           0 :       const uint32_t col_offset_uv = mi_col << 2;
     365           0 :       const uint8_t *y =
     366           0 :           show->y_buffer + row_offset_y * y_stride + col_offset_y;
     367           0 :       const uint8_t *u =
     368           0 :           show->u_buffer + row_offset_uv * uv_stride + col_offset_uv;
     369           0 :       const uint8_t *v =
     370           0 :           show->v_buffer + row_offset_uv * uv_stride + col_offset_uv;
     371           0 :       uint8_t *yd = dest->y_buffer + row_offset_y * yd_stride + col_offset_y;
     372           0 :       uint8_t *ud = dest->u_buffer + row_offset_uv * uvd_stride + col_offset_uv;
     373           0 :       uint8_t *vd = dest->v_buffer + row_offset_uv * uvd_stride + col_offset_uv;
     374           0 :       if (frame_is_intra_only(cm)) {
     375           0 :         mi = mi_prev;
     376             :       } else {
     377           0 :         mi = mi_local;
     378             :       }
     379           0 :       mfqe_partition(cm, mi, BLOCK_64X64, y, u, v, y_stride, uv_stride, yd, ud,
     380             :                      vd, yd_stride, uvd_stride);
     381             :     }
     382             :   }
     383           0 : }

Generated by: LCOV version 1.13