LCOV - code coverage report
Current view: top level - third_party/aom/av1/common - od_dering.c (source / functions) Hit Total Coverage
Test: output.info Lines: 0 222 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 12 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
       3             :  *
       4             :  * This source code is subject to the terms of the BSD 2 Clause License and
       5             :  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
       6             :  * was not distributed with this source code in the LICENSE file, you can
       7             :  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
       8             :  * Media Patent License 1.0 was not distributed with this source code in the
       9             :  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
      10             :  */
      11             : 
      12             : #include <math.h>
      13             : #include <stdlib.h>
      14             : 
      15             : #ifdef HAVE_CONFIG_H
      16             : #include "./config.h"
      17             : #endif
      18             : 
      19             : #include "./aom_dsp_rtcd.h"
      20             : #include "./av1_rtcd.h"
      21             : #include "./cdef.h"
      22             : 
      23             : /* Generated from gen_filter_tables.c. */
      24             : const int OD_DIRECTION_OFFSETS_TABLE[8][3] = {
      25             :   { -1 * OD_FILT_BSTRIDE + 1, -2 * OD_FILT_BSTRIDE + 2,
      26             :     -3 * OD_FILT_BSTRIDE + 3 },
      27             :   { 0 * OD_FILT_BSTRIDE + 1, -1 * OD_FILT_BSTRIDE + 2,
      28             :     -1 * OD_FILT_BSTRIDE + 3 },
      29             :   { 0 * OD_FILT_BSTRIDE + 1, 0 * OD_FILT_BSTRIDE + 2, 0 * OD_FILT_BSTRIDE + 3 },
      30             :   { 0 * OD_FILT_BSTRIDE + 1, 1 * OD_FILT_BSTRIDE + 2, 1 * OD_FILT_BSTRIDE + 3 },
      31             :   { 1 * OD_FILT_BSTRIDE + 1, 2 * OD_FILT_BSTRIDE + 2, 3 * OD_FILT_BSTRIDE + 3 },
      32             :   { 1 * OD_FILT_BSTRIDE + 0, 2 * OD_FILT_BSTRIDE + 1, 3 * OD_FILT_BSTRIDE + 1 },
      33             :   { 1 * OD_FILT_BSTRIDE + 0, 2 * OD_FILT_BSTRIDE + 0, 3 * OD_FILT_BSTRIDE + 0 },
      34             :   { 1 * OD_FILT_BSTRIDE + 0, 2 * OD_FILT_BSTRIDE - 1, 3 * OD_FILT_BSTRIDE - 1 },
      35             : };
      36             : 
      37             : /* Detect direction. 0 means 45-degree up-right, 2 is horizontal, and so on.
      38             :    The search minimizes the weighted variance along all the lines in a
      39             :    particular direction, i.e. the squared error between the input and a
      40             :    "predicted" block where each pixel is replaced by the average along a line
      41             :    in a particular direction. Since each direction have the same sum(x^2) term,
      42             :    that term is never computed. See Section 2, step 2, of:
      43             :    http://jmvalin.ca/notes/intra_paint.pdf */
      44           0 : int od_dir_find8_c(const uint16_t *img, int stride, int32_t *var,
      45             :                    int coeff_shift) {
      46             :   int i;
      47           0 :   int32_t cost[8] = { 0 };
      48           0 :   int partial[8][15] = { { 0 } };
      49           0 :   int32_t best_cost = 0;
      50           0 :   int best_dir = 0;
      51             :   /* Instead of dividing by n between 2 and 8, we multiply by 3*5*7*8/n.
      52             :      The output is then 840 times larger, but we don't care for finding
      53             :      the max. */
      54             :   static const int div_table[] = { 0, 840, 420, 280, 210, 168, 140, 120, 105 };
      55           0 :   for (i = 0; i < 8; i++) {
      56             :     int j;
      57           0 :     for (j = 0; j < 8; j++) {
      58             :       int x;
      59             :       /* We subtract 128 here to reduce the maximum range of the squared
      60             :          partial sums. */
      61           0 :       x = (img[i * stride + j] >> coeff_shift) - 128;
      62           0 :       partial[0][i + j] += x;
      63           0 :       partial[1][i + j / 2] += x;
      64           0 :       partial[2][i] += x;
      65           0 :       partial[3][3 + i - j / 2] += x;
      66           0 :       partial[4][7 + i - j] += x;
      67           0 :       partial[5][3 - i / 2 + j] += x;
      68           0 :       partial[6][j] += x;
      69           0 :       partial[7][i / 2 + j] += x;
      70             :     }
      71             :   }
      72           0 :   for (i = 0; i < 8; i++) {
      73           0 :     cost[2] += partial[2][i] * partial[2][i];
      74           0 :     cost[6] += partial[6][i] * partial[6][i];
      75             :   }
      76           0 :   cost[2] *= div_table[8];
      77           0 :   cost[6] *= div_table[8];
      78           0 :   for (i = 0; i < 7; i++) {
      79           0 :     cost[0] += (partial[0][i] * partial[0][i] +
      80           0 :                 partial[0][14 - i] * partial[0][14 - i]) *
      81           0 :                div_table[i + 1];
      82           0 :     cost[4] += (partial[4][i] * partial[4][i] +
      83           0 :                 partial[4][14 - i] * partial[4][14 - i]) *
      84           0 :                div_table[i + 1];
      85             :   }
      86           0 :   cost[0] += partial[0][7] * partial[0][7] * div_table[8];
      87           0 :   cost[4] += partial[4][7] * partial[4][7] * div_table[8];
      88           0 :   for (i = 1; i < 8; i += 2) {
      89             :     int j;
      90           0 :     for (j = 0; j < 4 + 1; j++) {
      91           0 :       cost[i] += partial[i][3 + j] * partial[i][3 + j];
      92             :     }
      93           0 :     cost[i] *= div_table[8];
      94           0 :     for (j = 0; j < 4 - 1; j++) {
      95           0 :       cost[i] += (partial[i][j] * partial[i][j] +
      96           0 :                   partial[i][10 - j] * partial[i][10 - j]) *
      97           0 :                  div_table[2 * j + 2];
      98             :     }
      99             :   }
     100           0 :   for (i = 0; i < 8; i++) {
     101           0 :     if (cost[i] > best_cost) {
     102           0 :       best_cost = cost[i];
     103           0 :       best_dir = i;
     104             :     }
     105             :   }
     106             :   /* Difference between the optimal variance and the variance along the
     107             :      orthogonal direction. Again, the sum(x^2) terms cancel out. */
     108           0 :   *var = best_cost - cost[(best_dir + 4) & 7];
     109             :   /* We'd normally divide by 840, but dividing by 1024 is close enough
     110             :      for what we're going to do with this. */
     111           0 :   *var >>= 10;
     112           0 :   return best_dir;
     113             : }
     114             : 
     115             : /* Smooth in the direction detected. */
     116           0 : void od_filter_dering_direction_8x8_c(uint16_t *y, int ystride,
     117             :                                       const uint16_t *in, int threshold,
     118             :                                       int dir, int damping) {
     119             :   int i;
     120             :   int j;
     121             :   int k;
     122             :   static const int taps[3] = { 3, 2, 1 };
     123           0 :   for (i = 0; i < 8; i++) {
     124           0 :     for (j = 0; j < 8; j++) {
     125             :       int16_t sum;
     126             :       int16_t xx;
     127             :       int16_t yy;
     128           0 :       xx = in[i * OD_FILT_BSTRIDE + j];
     129           0 :       sum = 0;
     130           0 :       for (k = 0; k < 3; k++) {
     131             :         int16_t p0;
     132             :         int16_t p1;
     133           0 :         p0 = in[i * OD_FILT_BSTRIDE + j + OD_DIRECTION_OFFSETS_TABLE[dir][k]] -
     134             :              xx;
     135           0 :         p1 = in[i * OD_FILT_BSTRIDE + j - OD_DIRECTION_OFFSETS_TABLE[dir][k]] -
     136             :              xx;
     137           0 :         sum += taps[k] * constrain(p0, threshold, damping);
     138           0 :         sum += taps[k] * constrain(p1, threshold, damping);
     139             :       }
     140           0 :       sum = (sum + 8) >> 4;
     141           0 :       yy = xx + sum;
     142           0 :       y[i * ystride + j] = yy;
     143             :     }
     144             :   }
     145           0 : }
     146             : 
     147             : /* Smooth in the direction detected. */
     148           0 : void od_filter_dering_direction_4x4_c(uint16_t *y, int ystride,
     149             :                                       const uint16_t *in, int threshold,
     150             :                                       int dir, int damping) {
     151             :   int i;
     152             :   int j;
     153             :   int k;
     154             :   static const int taps[2] = { 4, 1 };
     155           0 :   for (i = 0; i < 4; i++) {
     156           0 :     for (j = 0; j < 4; j++) {
     157             :       int16_t sum;
     158             :       int16_t xx;
     159             :       int16_t yy;
     160           0 :       xx = in[i * OD_FILT_BSTRIDE + j];
     161           0 :       sum = 0;
     162           0 :       for (k = 0; k < 2; k++) {
     163             :         int16_t p0;
     164             :         int16_t p1;
     165           0 :         p0 = in[i * OD_FILT_BSTRIDE + j + OD_DIRECTION_OFFSETS_TABLE[dir][k]] -
     166             :              xx;
     167           0 :         p1 = in[i * OD_FILT_BSTRIDE + j - OD_DIRECTION_OFFSETS_TABLE[dir][k]] -
     168             :              xx;
     169           0 :         sum += taps[k] * constrain(p0, threshold, damping);
     170           0 :         sum += taps[k] * constrain(p1, threshold, damping);
     171             :       }
     172           0 :       sum = (sum + 8) >> 4;
     173           0 :       yy = xx + sum;
     174           0 :       y[i * ystride + j] = yy;
     175             :     }
     176             :   }
     177           0 : }
     178             : 
     179             : /* Compute deringing filter threshold for an 8x8 block based on the
     180             :    directional variance difference. A high variance difference means that we
     181             :    have a highly directional pattern (e.g. a high contrast edge), so we can
     182             :    apply more deringing. A low variance means that we either have a low
     183             :    contrast edge, or a non-directional texture, so we want to be careful not
     184             :    to blur. */
     185           0 : static INLINE int od_adjust_thresh(int threshold, int32_t var) {
     186           0 :   const int i = var >> 6 ? AOMMIN(get_msb(var >> 6), 12) : 0;
     187             :   /* We use the variance of 8x8 blocks to adjust the threshold. */
     188           0 :   return var ? (threshold * (4 + i) + 8) >> 4 : 0;
     189             : }
     190             : 
     191           0 : void copy_8x8_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src,
     192             :                                int sstride) {
     193             :   int i, j;
     194           0 :   for (i = 0; i < 8; i++)
     195           0 :     for (j = 0; j < 8; j++) dst[i * dstride + j] = src[i * sstride + j];
     196           0 : }
     197             : 
     198           0 : void copy_4x4_16bit_to_16bit_c(uint16_t *dst, int dstride, const uint16_t *src,
     199             :                                int sstride) {
     200             :   int i, j;
     201           0 :   for (i = 0; i < 4; i++)
     202           0 :     for (j = 0; j < 4; j++) dst[i * dstride + j] = src[i * sstride + j];
     203           0 : }
     204             : 
     205           0 : void copy_dering_16bit_to_16bit(uint16_t *dst, int dstride, uint16_t *src,
     206             :                                 dering_list *dlist, int dering_count,
     207             :                                 int bsize) {
     208             :   int bi, bx, by;
     209             : 
     210           0 :   if (bsize == BLOCK_8X8) {
     211           0 :     for (bi = 0; bi < dering_count; bi++) {
     212           0 :       by = dlist[bi].by;
     213           0 :       bx = dlist[bi].bx;
     214           0 :       copy_8x8_16bit_to_16bit(&dst[(by << 3) * dstride + (bx << 3)], dstride,
     215           0 :                               &src[bi << (3 + 3)], 8);
     216             :     }
     217           0 :   } else if (bsize == BLOCK_4X8) {
     218           0 :     for (bi = 0; bi < dering_count; bi++) {
     219           0 :       by = dlist[bi].by;
     220           0 :       bx = dlist[bi].bx;
     221           0 :       copy_4x4_16bit_to_16bit(&dst[(by << 3) * dstride + (bx << 2)], dstride,
     222           0 :                               &src[bi << (3 + 2)], 4);
     223           0 :       copy_4x4_16bit_to_16bit(&dst[((by << 3) + 4) * dstride + (bx << 2)],
     224           0 :                               dstride, &src[(bi << (3 + 2)) + 4 * 4], 4);
     225             :     }
     226           0 :   } else if (bsize == BLOCK_8X4) {
     227           0 :     for (bi = 0; bi < dering_count; bi++) {
     228           0 :       by = dlist[bi].by;
     229           0 :       bx = dlist[bi].bx;
     230           0 :       copy_4x4_16bit_to_16bit(&dst[(by << 2) * dstride + (bx << 3)], dstride,
     231           0 :                               &src[bi << (2 + 3)], 8);
     232           0 :       copy_4x4_16bit_to_16bit(&dst[(by << 2) * dstride + (bx << 3) + 4],
     233           0 :                               dstride, &src[(bi << (2 + 3)) + 4], 8);
     234             :     }
     235             :   } else {
     236           0 :     assert(bsize == BLOCK_4X4);
     237           0 :     for (bi = 0; bi < dering_count; bi++) {
     238           0 :       by = dlist[bi].by;
     239           0 :       bx = dlist[bi].bx;
     240           0 :       copy_4x4_16bit_to_16bit(&dst[(by << 2) * dstride + (bx << 2)], dstride,
     241           0 :                               &src[bi << (2 + 2)], 4);
     242             :     }
     243             :   }
     244           0 : }
     245             : 
     246           0 : void copy_8x8_16bit_to_8bit_c(uint8_t *dst, int dstride, const uint16_t *src,
     247             :                               int sstride) {
     248             :   int i, j;
     249           0 :   for (i = 0; i < 8; i++)
     250           0 :     for (j = 0; j < 8; j++)
     251           0 :       dst[i * dstride + j] = (uint8_t)src[i * sstride + j];
     252           0 : }
     253             : 
     254           0 : void copy_4x4_16bit_to_8bit_c(uint8_t *dst, int dstride, const uint16_t *src,
     255             :                               int sstride) {
     256             :   int i, j;
     257           0 :   for (i = 0; i < 4; i++)
     258           0 :     for (j = 0; j < 4; j++)
     259           0 :       dst[i * dstride + j] = (uint8_t)src[i * sstride + j];
     260           0 : }
     261             : 
     262           0 : static void copy_dering_16bit_to_8bit(uint8_t *dst, int dstride,
     263             :                                       const uint16_t *src, dering_list *dlist,
     264             :                                       int dering_count, int bsize) {
     265             :   int bi, bx, by;
     266           0 :   if (bsize == BLOCK_8X8) {
     267           0 :     for (bi = 0; bi < dering_count; bi++) {
     268           0 :       by = dlist[bi].by;
     269           0 :       bx = dlist[bi].bx;
     270           0 :       copy_8x8_16bit_to_8bit(&dst[(by << 3) * dstride + (bx << 3)], dstride,
     271           0 :                              &src[bi << (3 + 3)], 8);
     272             :     }
     273           0 :   } else if (bsize == BLOCK_4X8) {
     274           0 :     for (bi = 0; bi < dering_count; bi++) {
     275           0 :       by = dlist[bi].by;
     276           0 :       bx = dlist[bi].bx;
     277           0 :       copy_4x4_16bit_to_8bit(&dst[(by << 3) * dstride + (bx << 2)], dstride,
     278           0 :                              &src[bi << (3 + 2)], 4);
     279           0 :       copy_4x4_16bit_to_8bit(&dst[((by << 3) + 4) * dstride + (bx << 2)],
     280           0 :                              dstride, &src[(bi << (3 + 2)) + 4 * 4], 4);
     281             :     }
     282           0 :   } else if (bsize == BLOCK_8X4) {
     283           0 :     for (bi = 0; bi < dering_count; bi++) {
     284           0 :       by = dlist[bi].by;
     285           0 :       bx = dlist[bi].bx;
     286           0 :       copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 3)], dstride,
     287           0 :                              &src[bi << (2 + 3)], 8);
     288           0 :       copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 3) + 4], dstride,
     289           0 :                              &src[(bi << (2 + 3)) + 4], 8);
     290             :     }
     291             :   } else {
     292           0 :     assert(bsize == BLOCK_4X4);
     293           0 :     for (bi = 0; bi < dering_count; bi++) {
     294           0 :       by = dlist[bi].by;
     295           0 :       bx = dlist[bi].bx;
     296           0 :       copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 2)], dstride,
     297           0 :                              &src[bi << (2 * 2)], 4);
     298             :     }
     299             :   }
     300           0 : }
     301             : 
     302           0 : int get_filter_skip(int level) {
     303           0 :   int filter_skip = level & 1;
     304           0 :   if (level == 1) filter_skip = 0;
     305           0 :   return filter_skip;
     306             : }
     307             : 
     308           0 : void od_dering(uint8_t *dst, int dstride, uint16_t *y, uint16_t *in, int xdec,
     309             :                int ydec, int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS],
     310             :                int *dirinit, int var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS],
     311             :                int pli, dering_list *dlist, int dering_count, int level,
     312             :                int clpf_strength, int clpf_damping, int dering_damping,
     313             :                int coeff_shift, int skip_dering, int hbd) {
     314             :   int bi;
     315             :   int bx;
     316             :   int by;
     317             :   int bsize, bsizex, bsizey;
     318             : 
     319           0 :   int threshold = (level >> 1) << coeff_shift;
     320           0 :   int filter_skip = get_filter_skip(level);
     321           0 :   if (level == 1) threshold = 31 << coeff_shift;
     322             : 
     323           0 :   od_filter_dering_direction_func filter_dering_direction[] = {
     324             :     od_filter_dering_direction_4x4, od_filter_dering_direction_8x8
     325             :   };
     326           0 :   clpf_damping += coeff_shift - (pli != AOM_PLANE_Y);
     327           0 :   dering_damping += coeff_shift - (pli != AOM_PLANE_Y);
     328           0 :   bsize =
     329           0 :       ydec ? (xdec ? BLOCK_4X4 : BLOCK_8X4) : (xdec ? BLOCK_4X8 : BLOCK_8X8);
     330           0 :   bsizex = 3 - xdec;
     331           0 :   bsizey = 3 - ydec;
     332             : 
     333           0 :   if (!skip_dering) {
     334           0 :     if (pli == 0) {
     335           0 :       if (!dirinit || !*dirinit) {
     336           0 :         for (bi = 0; bi < dering_count; bi++) {
     337           0 :           by = dlist[bi].by;
     338           0 :           bx = dlist[bi].bx;
     339           0 :           dir[by][bx] =
     340           0 :               od_dir_find8(&in[8 * by * OD_FILT_BSTRIDE + 8 * bx],
     341           0 :                            OD_FILT_BSTRIDE, &var[by][bx], coeff_shift);
     342             :         }
     343           0 :         if (dirinit) *dirinit = 1;
     344             :       }
     345             :     }
     346             :     // Only run dering for non-zero threshold (which is always the case for
     347             :     // 4:2:2 or 4:4:0). If we don't dering, we still need to eventually write
     348             :     // something out in y[] later.
     349           0 :     if (threshold != 0) {
     350           0 :       assert(bsize == BLOCK_8X8 || bsize == BLOCK_4X4);
     351           0 :       for (bi = 0; bi < dering_count; bi++) {
     352           0 :         int t = !filter_skip && dlist[bi].skip ? 0 : threshold;
     353           0 :         by = dlist[bi].by;
     354           0 :         bx = dlist[bi].bx;
     355           0 :         (filter_dering_direction[bsize == BLOCK_8X8])(
     356           0 :             &y[bi << (bsizex + bsizey)], 1 << bsizex,
     357           0 :             &in[(by * OD_FILT_BSTRIDE << bsizey) + (bx << bsizex)],
     358           0 :             pli ? t : od_adjust_thresh(t, var[by][bx]), dir[by][bx],
     359             :             dering_damping);
     360             :       }
     361             :     }
     362             :   }
     363             : 
     364           0 :   if (clpf_strength) {
     365           0 :     if (threshold && !skip_dering)
     366           0 :       copy_dering_16bit_to_16bit(in, OD_FILT_BSTRIDE, y, dlist, dering_count,
     367             :                                  bsize);
     368           0 :     for (bi = 0; bi < dering_count; bi++) {
     369           0 :       by = dlist[bi].by;
     370           0 :       bx = dlist[bi].bx;
     371           0 :       int py = by << bsizey;
     372           0 :       int px = bx << bsizex;
     373             : 
     374           0 :       if (!filter_skip && dlist[bi].skip) continue;
     375           0 :       if (!dst || hbd) {
     376             :         // 16 bit destination if high bitdepth or 8 bit destination not given
     377           0 :         (!threshold || (dir[by][bx] < 4 && dir[by][bx]) ? aom_clpf_block_hbd
     378           0 :                                                         : aom_clpf_hblock_hbd)(
     379           0 :             dst ? (uint16_t *)dst + py * dstride + px
     380           0 :                 : &y[bi << (bsizex + bsizey)],
     381           0 :             in + py * OD_FILT_BSTRIDE + px, dst && hbd ? dstride : 1 << bsizex,
     382             :             OD_FILT_BSTRIDE, 1 << bsizex, 1 << bsizey,
     383           0 :             clpf_strength << coeff_shift, clpf_damping);
     384             :       } else {
     385             :         // Do clpf and write the result to an 8 bit destination
     386           0 :         (!threshold || (dir[by][bx] < 4 && dir[by][bx]) ? aom_clpf_block
     387           0 :                                                         : aom_clpf_hblock)(
     388           0 :             dst + py * dstride + px, in + py * OD_FILT_BSTRIDE + px, dstride,
     389             :             OD_FILT_BSTRIDE, 1 << bsizex, 1 << bsizey,
     390           0 :             clpf_strength << coeff_shift, clpf_damping);
     391             :       }
     392             :     }
     393           0 :   } else if (threshold != 0) {
     394             :     // No clpf, so copy instead
     395           0 :     if (hbd) {
     396           0 :       copy_dering_16bit_to_16bit((uint16_t *)dst, dstride, y, dlist,
     397             :                                  dering_count, bsize);
     398             :     } else {
     399           0 :       copy_dering_16bit_to_8bit(dst, dstride, y, dlist, dering_count, bsize);
     400             :     }
     401           0 :   } else if (dirinit) {
     402             :     // If we're here, both dering and clpf are off, and we still haven't written
     403             :     // anything to y[] yet, so we just copy the input to y[]. This is necessary
     404             :     // only for av1_cdef_search() and only av1_cdef_search() sets dirinit.
     405           0 :     for (bi = 0; bi < dering_count; bi++) {
     406           0 :       by = dlist[bi].by;
     407           0 :       bx = dlist[bi].bx;
     408             :       int iy, ix;
     409             :       // TODO(stemidts/jmvalin): SIMD optimisations
     410           0 :       for (iy = 0; iy < 1 << bsizey; iy++)
     411           0 :         for (ix = 0; ix < 1 << bsizex; ix++)
     412           0 :           y[(bi << (bsizex + bsizey)) + (iy << bsizex) + ix] =
     413           0 :               in[((by << bsizey) + iy) * OD_FILT_BSTRIDE + (bx << bsizex) + ix];
     414             :     }
     415             :   }
     416           0 : }

Generated by: LCOV version 1.13