LCOV - code coverage report
Current view: top level - third_party/aom/av1/encoder - pickcdef.c (source / functions) Hit Total Coverage
Test: output.info Lines: 0 280 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 10 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
       3             :  *
       4             :  * This source code is subject to the terms of the BSD 2 Clause License and
       5             :  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
       6             :  * was not distributed with this source code in the LICENSE file, you can
       7             :  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
       8             :  * Media Patent License 1.0 was not distributed with this source code in the
       9             :  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
      10             :  */
      11             : 
      12             : #include <math.h>
      13             : #include <string.h>
      14             : 
      15             : #include "./aom_scale_rtcd.h"
      16             : #include "aom/aom_integer.h"
      17             : #include "av1/common/cdef.h"
      18             : #include "av1/common/onyxc_int.h"
      19             : #include "av1/common/reconinter.h"
      20             : #include "av1/encoder/encoder.h"
      21             : 
      22             : #define TOTAL_STRENGTHS (DERING_STRENGTHS * CLPF_STRENGTHS)
      23             : 
      24             : /* Search for the best strength to add as an option, knowing we
      25             :    already selected nb_strengths options. */
      26           0 : static uint64_t search_one(int *lev, int nb_strengths,
      27             :                            uint64_t mse[][TOTAL_STRENGTHS], int sb_count) {
      28             :   uint64_t tot_mse[TOTAL_STRENGTHS];
      29             :   int i, j;
      30           0 :   uint64_t best_tot_mse = (uint64_t)1 << 63;
      31           0 :   int best_id = 0;
      32           0 :   memset(tot_mse, 0, sizeof(tot_mse));
      33           0 :   for (i = 0; i < sb_count; i++) {
      34             :     int gi;
      35           0 :     uint64_t best_mse = (uint64_t)1 << 63;
      36             :     /* Find best mse among already selected options. */
      37           0 :     for (gi = 0; gi < nb_strengths; gi++) {
      38           0 :       if (mse[i][lev[gi]] < best_mse) {
      39           0 :         best_mse = mse[i][lev[gi]];
      40             :       }
      41             :     }
      42             :     /* Find best mse when adding each possible new option. */
      43           0 :     for (j = 0; j < TOTAL_STRENGTHS; j++) {
      44           0 :       uint64_t best = best_mse;
      45           0 :       if (mse[i][j] < best) best = mse[i][j];
      46           0 :       tot_mse[j] += best;
      47             :     }
      48             :   }
      49           0 :   for (j = 0; j < TOTAL_STRENGTHS; j++) {
      50           0 :     if (tot_mse[j] < best_tot_mse) {
      51           0 :       best_tot_mse = tot_mse[j];
      52           0 :       best_id = j;
      53             :     }
      54             :   }
      55           0 :   lev[nb_strengths] = best_id;
      56           0 :   return best_tot_mse;
      57             : }
      58             : 
      59             : /* Search for the best luma+chroma strength to add as an option, knowing we
      60             :    already selected nb_strengths options. */
      61           0 : static uint64_t search_one_dual(int *lev0, int *lev1, int nb_strengths,
      62             :                                 uint64_t (**mse)[TOTAL_STRENGTHS],
      63             :                                 int sb_count) {
      64             :   uint64_t tot_mse[TOTAL_STRENGTHS][TOTAL_STRENGTHS];
      65             :   int i, j;
      66           0 :   uint64_t best_tot_mse = (uint64_t)1 << 63;
      67           0 :   int best_id0 = 0;
      68           0 :   int best_id1 = 0;
      69           0 :   memset(tot_mse, 0, sizeof(tot_mse));
      70           0 :   for (i = 0; i < sb_count; i++) {
      71             :     int gi;
      72           0 :     uint64_t best_mse = (uint64_t)1 << 63;
      73             :     /* Find best mse among already selected options. */
      74           0 :     for (gi = 0; gi < nb_strengths; gi++) {
      75           0 :       uint64_t curr = mse[0][i][lev0[gi]];
      76           0 :       curr += mse[1][i][lev1[gi]];
      77           0 :       if (curr < best_mse) {
      78           0 :         best_mse = curr;
      79             :       }
      80             :     }
      81             :     /* Find best mse when adding each possible new option. */
      82           0 :     for (j = 0; j < TOTAL_STRENGTHS; j++) {
      83             :       int k;
      84           0 :       for (k = 0; k < TOTAL_STRENGTHS; k++) {
      85           0 :         uint64_t best = best_mse;
      86           0 :         uint64_t curr = mse[0][i][j];
      87           0 :         curr += mse[1][i][k];
      88           0 :         if (curr < best) best = curr;
      89           0 :         tot_mse[j][k] += best;
      90             :       }
      91             :     }
      92             :   }
      93           0 :   for (j = 0; j < TOTAL_STRENGTHS; j++) {
      94             :     int k;
      95           0 :     for (k = 0; k < TOTAL_STRENGTHS; k++) {
      96           0 :       if (tot_mse[j][k] < best_tot_mse) {
      97           0 :         best_tot_mse = tot_mse[j][k];
      98           0 :         best_id0 = j;
      99           0 :         best_id1 = k;
     100             :       }
     101             :     }
     102             :   }
     103           0 :   lev0[nb_strengths] = best_id0;
     104           0 :   lev1[nb_strengths] = best_id1;
     105           0 :   return best_tot_mse;
     106             : }
     107             : 
     108             : /* Search for the set of strengths that minimizes mse. */
     109           0 : static uint64_t joint_strength_search(int *best_lev, int nb_strengths,
     110             :                                       uint64_t mse[][TOTAL_STRENGTHS],
     111             :                                       int sb_count) {
     112             :   uint64_t best_tot_mse;
     113             :   int i;
     114           0 :   best_tot_mse = (uint64_t)1 << 63;
     115             :   /* Greedy search: add one strength options at a time. */
     116           0 :   for (i = 0; i < nb_strengths; i++) {
     117           0 :     best_tot_mse = search_one(best_lev, i, mse, sb_count);
     118             :   }
     119             :   /* Trying to refine the greedy search by reconsidering each
     120             :      already-selected option. */
     121           0 :   for (i = 0; i < 4 * nb_strengths; i++) {
     122             :     int j;
     123           0 :     for (j = 0; j < nb_strengths - 1; j++) best_lev[j] = best_lev[j + 1];
     124           0 :     best_tot_mse = search_one(best_lev, nb_strengths - 1, mse, sb_count);
     125             :   }
     126           0 :   return best_tot_mse;
     127             : }
     128             : 
     129             : /* Search for the set of luma+chroma strengths that minimizes mse. */
     130           0 : static uint64_t joint_strength_search_dual(int *best_lev0, int *best_lev1,
     131             :                                            int nb_strengths,
     132             :                                            uint64_t (**mse)[TOTAL_STRENGTHS],
     133             :                                            int sb_count) {
     134             :   uint64_t best_tot_mse;
     135             :   int i;
     136           0 :   best_tot_mse = (uint64_t)1 << 63;
     137             :   /* Greedy search: add one strength options at a time. */
     138           0 :   for (i = 0; i < nb_strengths; i++) {
     139           0 :     best_tot_mse = search_one_dual(best_lev0, best_lev1, i, mse, sb_count);
     140             :   }
     141             :   /* Trying to refine the greedy search by reconsidering each
     142             :      already-selected option. */
     143           0 :   for (i = 0; i < 4 * nb_strengths; i++) {
     144             :     int j;
     145           0 :     for (j = 0; j < nb_strengths - 1; j++) {
     146           0 :       best_lev0[j] = best_lev0[j + 1];
     147           0 :       best_lev1[j] = best_lev1[j + 1];
     148             :     }
     149           0 :     best_tot_mse =
     150           0 :         search_one_dual(best_lev0, best_lev1, nb_strengths - 1, mse, sb_count);
     151             :   }
     152           0 :   return best_tot_mse;
     153             : }
     154             : 
     155             : /* FIXME: SSE-optimize this. */
     156           0 : static void copy_sb16_16(uint16_t *dst, int dstride, const uint16_t *src,
     157             :                          int src_voffset, int src_hoffset, int sstride,
     158             :                          int vsize, int hsize) {
     159             :   int r, c;
     160           0 :   const uint16_t *base = &src[src_voffset * sstride + src_hoffset];
     161           0 :   for (r = 0; r < vsize; r++) {
     162           0 :     for (c = 0; c < hsize; c++) {
     163           0 :       dst[r * dstride + c] = base[r * sstride + c];
     164             :     }
     165             :   }
     166           0 : }
     167             : 
     168           0 : static INLINE uint64_t dist_8x8_16bit(uint16_t *dst, int dstride, uint16_t *src,
     169             :                                       int sstride, int coeff_shift) {
     170           0 :   uint64_t svar = 0;
     171           0 :   uint64_t dvar = 0;
     172           0 :   uint64_t sum_s = 0;
     173           0 :   uint64_t sum_d = 0;
     174           0 :   uint64_t sum_s2 = 0;
     175           0 :   uint64_t sum_d2 = 0;
     176           0 :   uint64_t sum_sd = 0;
     177             :   int i, j;
     178           0 :   for (i = 0; i < 8; i++) {
     179           0 :     for (j = 0; j < 8; j++) {
     180           0 :       sum_s += src[i * sstride + j];
     181           0 :       sum_d += dst[i * dstride + j];
     182           0 :       sum_s2 += src[i * sstride + j] * src[i * sstride + j];
     183           0 :       sum_d2 += dst[i * dstride + j] * dst[i * dstride + j];
     184           0 :       sum_sd += src[i * sstride + j] * dst[i * dstride + j];
     185             :     }
     186             :   }
     187             :   /* Compute the variance -- the calculation cannot go negative. */
     188           0 :   svar = sum_s2 - ((sum_s * sum_s + 32) >> 6);
     189           0 :   dvar = sum_d2 - ((sum_d * sum_d + 32) >> 6);
     190           0 :   return (uint64_t)floor(
     191             :       .5 +
     192           0 :       (sum_d2 + sum_s2 - 2 * sum_sd) * .5 *
     193           0 :           (svar + dvar + (400 << 2 * coeff_shift)) /
     194           0 :           (sqrt((20000 << 4 * coeff_shift) + svar * (double)dvar)));
     195             : }
     196             : 
     197           0 : static INLINE uint64_t mse_8x8_16bit(uint16_t *dst, int dstride, uint16_t *src,
     198             :                                      int sstride) {
     199           0 :   uint64_t sum = 0;
     200             :   int i, j;
     201           0 :   for (i = 0; i < 8; i++) {
     202           0 :     for (j = 0; j < 8; j++) {
     203           0 :       int e = dst[i * dstride + j] - src[i * sstride + j];
     204           0 :       sum += e * e;
     205             :     }
     206             :   }
     207           0 :   return sum;
     208             : }
     209             : 
     210           0 : static INLINE uint64_t mse_4x4_16bit(uint16_t *dst, int dstride, uint16_t *src,
     211             :                                      int sstride) {
     212           0 :   uint64_t sum = 0;
     213             :   int i, j;
     214           0 :   for (i = 0; i < 4; i++) {
     215           0 :     for (j = 0; j < 4; j++) {
     216           0 :       int e = dst[i * dstride + j] - src[i * sstride + j];
     217           0 :       sum += e * e;
     218             :     }
     219             :   }
     220           0 :   return sum;
     221             : }
     222             : 
     223             : /* Compute MSE only on the blocks we filtered. */
     224           0 : uint64_t compute_dering_dist(uint16_t *dst, int dstride, uint16_t *src,
     225             :                              dering_list *dlist, int dering_count,
     226             :                              BLOCK_SIZE bsize, int coeff_shift, int pli) {
     227           0 :   uint64_t sum = 0;
     228             :   int bi, bx, by;
     229           0 :   if (bsize == BLOCK_8X8) {
     230           0 :     for (bi = 0; bi < dering_count; bi++) {
     231           0 :       by = dlist[bi].by;
     232           0 :       bx = dlist[bi].bx;
     233           0 :       if (pli == 0) {
     234           0 :         sum += dist_8x8_16bit(&dst[(by << 3) * dstride + (bx << 3)], dstride,
     235           0 :                               &src[bi << (3 + 3)], 8, coeff_shift);
     236             :       } else {
     237           0 :         sum += mse_8x8_16bit(&dst[(by << 3) * dstride + (bx << 3)], dstride,
     238           0 :                              &src[bi << (3 + 3)], 8);
     239             :       }
     240             :     }
     241           0 :   } else if (bsize == BLOCK_4X8) {
     242           0 :     for (bi = 0; bi < dering_count; bi++) {
     243           0 :       by = dlist[bi].by;
     244           0 :       bx = dlist[bi].bx;
     245           0 :       sum += mse_4x4_16bit(&dst[(by << 3) * dstride + (bx << 2)], dstride,
     246           0 :                            &src[bi << (3 + 2)], 4);
     247           0 :       sum += mse_4x4_16bit(&dst[((by << 3) + 4) * dstride + (bx << 2)], dstride,
     248           0 :                            &src[(bi << (3 + 2)) + 4 * 4], 4);
     249             :     }
     250           0 :   } else if (bsize == BLOCK_8X4) {
     251           0 :     for (bi = 0; bi < dering_count; bi++) {
     252           0 :       by = dlist[bi].by;
     253           0 :       bx = dlist[bi].bx;
     254           0 :       sum += mse_4x4_16bit(&dst[(by << 2) * dstride + (bx << 3)], dstride,
     255           0 :                            &src[bi << (2 + 3)], 8);
     256           0 :       sum += mse_4x4_16bit(&dst[(by << 2) * dstride + (bx << 3) + 4], dstride,
     257           0 :                            &src[(bi << (2 + 3)) + 4], 8);
     258             :     }
     259             :   } else {
     260           0 :     assert(bsize == BLOCK_4X4);
     261           0 :     for (bi = 0; bi < dering_count; bi++) {
     262           0 :       by = dlist[bi].by;
     263           0 :       bx = dlist[bi].bx;
     264           0 :       sum += mse_4x4_16bit(&dst[(by << 2) * dstride + (bx << 2)], dstride,
     265           0 :                            &src[bi << (2 + 2)], 4);
     266             :     }
     267             :   }
     268           0 :   return sum >> 2 * coeff_shift;
     269             : }
     270             : 
     271           0 : void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
     272             :                      AV1_COMMON *cm, MACROBLOCKD *xd) {
     273             :   int r, c;
     274             :   int sbr, sbc;
     275             :   uint16_t *src[3];
     276             :   uint16_t *ref_coeff[3];
     277             :   dering_list dlist[MAX_MIB_SIZE * MAX_MIB_SIZE];
     278           0 :   int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } };
     279           0 :   int var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } };
     280             :   int stride[3];
     281             :   int bsize[3];
     282             :   int mi_wide_l2[3];
     283             :   int mi_high_l2[3];
     284             :   int xdec[3];
     285             :   int ydec[3];
     286             :   int pli;
     287             :   int dering_count;
     288           0 :   int coeff_shift = AOMMAX(cm->bit_depth - 8, 0);
     289           0 :   uint64_t best_tot_mse = (uint64_t)1 << 63;
     290             :   uint64_t tot_mse;
     291             :   int sb_count;
     292           0 :   int nvsb = (cm->mi_rows + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
     293           0 :   int nhsb = (cm->mi_cols + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
     294           0 :   int *sb_index = aom_malloc(nvsb * nhsb * sizeof(*sb_index));
     295           0 :   int *selected_strength = aom_malloc(nvsb * nhsb * sizeof(*sb_index));
     296             :   uint64_t(*mse[2])[TOTAL_STRENGTHS];
     297           0 :   int clpf_damping = 3 + (cm->base_qindex >> 6);
     298           0 :   int dering_damping = 6;
     299             :   int i;
     300             :   int nb_strengths;
     301             :   int nb_strength_bits;
     302             :   int quantizer;
     303             :   double lambda;
     304           0 :   int nplanes = 3;
     305             :   DECLARE_ALIGNED(32, uint16_t, inbuf[OD_DERING_INBUF_SIZE]);
     306             :   uint16_t *in;
     307             :   DECLARE_ALIGNED(32, uint16_t, tmp_dst[MAX_SB_SQUARE]);
     308           0 :   int chroma_dering =
     309           0 :       xd->plane[1].subsampling_x == xd->plane[1].subsampling_y &&
     310           0 :       xd->plane[2].subsampling_x == xd->plane[2].subsampling_y;
     311           0 :   quantizer =
     312           0 :       av1_ac_quant(cm->base_qindex, 0, cm->bit_depth) >> (cm->bit_depth - 8);
     313           0 :   lambda = .12 * quantizer * quantizer / 256.;
     314             : 
     315           0 :   av1_setup_dst_planes(xd->plane, cm->sb_size, frame, 0, 0);
     316           0 :   mse[0] = aom_malloc(sizeof(**mse) * nvsb * nhsb);
     317           0 :   mse[1] = aom_malloc(sizeof(**mse) * nvsb * nhsb);
     318           0 :   for (pli = 0; pli < nplanes; pli++) {
     319             :     uint8_t *ref_buffer;
     320             :     int ref_stride;
     321           0 :     switch (pli) {
     322             :       case 0:
     323           0 :         ref_buffer = ref->y_buffer;
     324           0 :         ref_stride = ref->y_stride;
     325           0 :         break;
     326             :       case 1:
     327           0 :         ref_buffer = ref->u_buffer;
     328           0 :         ref_stride = ref->uv_stride;
     329           0 :         break;
     330             :       case 2:
     331           0 :         ref_buffer = ref->v_buffer;
     332           0 :         ref_stride = ref->uv_stride;
     333           0 :         break;
     334             :     }
     335           0 :     src[pli] = aom_memalign(
     336           0 :         32, sizeof(*src) * cm->mi_rows * cm->mi_cols * MI_SIZE * MI_SIZE);
     337           0 :     ref_coeff[pli] = aom_memalign(
     338           0 :         32, sizeof(*ref_coeff) * cm->mi_rows * cm->mi_cols * MI_SIZE * MI_SIZE);
     339           0 :     xdec[pli] = xd->plane[pli].subsampling_x;
     340           0 :     ydec[pli] = xd->plane[pli].subsampling_y;
     341           0 :     bsize[pli] = ydec[pli] ? (xdec[pli] ? BLOCK_4X4 : BLOCK_8X4)
     342           0 :                            : (xdec[pli] ? BLOCK_4X8 : BLOCK_8X8);
     343           0 :     stride[pli] = cm->mi_cols << MI_SIZE_LOG2;
     344           0 :     mi_wide_l2[pli] = MI_SIZE_LOG2 - xd->plane[pli].subsampling_x;
     345           0 :     mi_high_l2[pli] = MI_SIZE_LOG2 - xd->plane[pli].subsampling_y;
     346             : 
     347           0 :     const int frame_height =
     348           0 :         (cm->mi_rows * MI_SIZE) >> xd->plane[pli].subsampling_y;
     349           0 :     const int frame_width =
     350           0 :         (cm->mi_cols * MI_SIZE) >> xd->plane[pli].subsampling_x;
     351             : 
     352           0 :     for (r = 0; r < frame_height; ++r) {
     353           0 :       for (c = 0; c < frame_width; ++c) {
     354             : #if CONFIG_HIGHBITDEPTH
     355           0 :         if (cm->use_highbitdepth) {
     356           0 :           src[pli][r * stride[pli] + c] = CONVERT_TO_SHORTPTR(
     357           0 :               xd->plane[pli].dst.buf)[r * xd->plane[pli].dst.stride + c];
     358           0 :           ref_coeff[pli][r * stride[pli] + c] =
     359           0 :               CONVERT_TO_SHORTPTR(ref_buffer)[r * ref_stride + c];
     360             :         } else {
     361             : #endif
     362           0 :           src[pli][r * stride[pli] + c] =
     363           0 :               xd->plane[pli].dst.buf[r * xd->plane[pli].dst.stride + c];
     364           0 :           ref_coeff[pli][r * stride[pli] + c] = ref_buffer[r * ref_stride + c];
     365             : #if CONFIG_HIGHBITDEPTH
     366             :         }
     367             : #endif
     368             :       }
     369             :     }
     370             :   }
     371           0 :   in = inbuf + OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER;
     372           0 :   sb_count = 0;
     373           0 :   for (sbr = 0; sbr < nvsb; ++sbr) {
     374           0 :     for (sbc = 0; sbc < nhsb; ++sbc) {
     375             :       int nvb, nhb;
     376             :       int gi;
     377           0 :       int dirinit = 0;
     378           0 :       nhb = AOMMIN(MAX_MIB_SIZE, cm->mi_cols - MAX_MIB_SIZE * sbc);
     379           0 :       nvb = AOMMIN(MAX_MIB_SIZE, cm->mi_rows - MAX_MIB_SIZE * sbr);
     380           0 :       cm->mi_grid_visible[MAX_MIB_SIZE * sbr * cm->mi_stride +
     381             :                           MAX_MIB_SIZE * sbc]
     382           0 :           ->mbmi.cdef_strength = -1;
     383           0 :       if (sb_all_skip(cm, sbr * MAX_MIB_SIZE, sbc * MAX_MIB_SIZE)) continue;
     384           0 :       dering_count = sb_compute_dering_list(cm, sbr * MAX_MIB_SIZE,
     385             :                                             sbc * MAX_MIB_SIZE, dlist, 1);
     386           0 :       for (pli = 0; pli < nplanes; pli++) {
     387           0 :         for (i = 0; i < OD_DERING_INBUF_SIZE; i++)
     388           0 :           inbuf[i] = OD_DERING_VERY_LARGE;
     389           0 :         for (gi = 0; gi < TOTAL_STRENGTHS; gi++) {
     390             :           int threshold;
     391             :           uint64_t curr_mse;
     392             :           int clpf_strength;
     393           0 :           threshold = gi / CLPF_STRENGTHS;
     394           0 :           if (pli > 0 && !chroma_dering) threshold = 0;
     395             :           /* We avoid filtering the pixels for which some of the pixels to
     396             :              average
     397             :              are outside the frame. We could change the filter instead, but it
     398             :              would add special cases for any future vectorization. */
     399           0 :           int yoff = OD_FILT_VBORDER * (sbr != 0);
     400           0 :           int xoff = OD_FILT_HBORDER * (sbc != 0);
     401           0 :           int ysize = (nvb << mi_high_l2[pli]) +
     402           0 :                       OD_FILT_VBORDER * (sbr != nvsb - 1) + yoff;
     403           0 :           int xsize = (nhb << mi_wide_l2[pli]) +
     404           0 :                       OD_FILT_HBORDER * (sbc != nhsb - 1) + xoff;
     405           0 :           clpf_strength = gi % CLPF_STRENGTHS;
     406           0 :           if (clpf_strength == 0)
     407           0 :             copy_sb16_16(&in[(-yoff * OD_FILT_BSTRIDE - xoff)], OD_FILT_BSTRIDE,
     408           0 :                          src[pli],
     409           0 :                          (sbr * MAX_MIB_SIZE << mi_high_l2[pli]) - yoff,
     410           0 :                          (sbc * MAX_MIB_SIZE << mi_wide_l2[pli]) - xoff,
     411             :                          stride[pli], ysize, xsize);
     412           0 :           od_dering(clpf_strength ? NULL : (uint8_t *)in, OD_FILT_BSTRIDE,
     413             :                     tmp_dst, in, xdec[pli], ydec[pli], dir, &dirinit, var, pli,
     414             :                     dlist, dering_count, threshold,
     415           0 :                     clpf_strength + (clpf_strength == 3), clpf_damping,
     416             :                     dering_damping, coeff_shift, clpf_strength != 0, 1);
     417           0 :           curr_mse = compute_dering_dist(
     418           0 :               ref_coeff[pli] +
     419           0 :                   (sbr * MAX_MIB_SIZE << mi_high_l2[pli]) * stride[pli] +
     420           0 :                   (sbc * MAX_MIB_SIZE << mi_wide_l2[pli]),
     421           0 :               stride[pli], tmp_dst, dlist, dering_count, bsize[pli],
     422             :               coeff_shift, pli);
     423           0 :           if (pli < 2)
     424           0 :             mse[pli][sb_count][gi] = curr_mse;
     425             :           else
     426           0 :             mse[1][sb_count][gi] += curr_mse;
     427           0 :           sb_index[sb_count] =
     428           0 :               MAX_MIB_SIZE * sbr * cm->mi_stride + MAX_MIB_SIZE * sbc;
     429             :         }
     430             :       }
     431           0 :       sb_count++;
     432             :     }
     433             :   }
     434           0 :   nb_strength_bits = 0;
     435             :   /* Search for different number of signalling bits. */
     436           0 :   for (i = 0; i <= 3; i++) {
     437             :     int j;
     438             :     int best_lev0[CDEF_MAX_STRENGTHS];
     439           0 :     int best_lev1[CDEF_MAX_STRENGTHS] = { 0 };
     440           0 :     nb_strengths = 1 << i;
     441           0 :     if (nplanes >= 3)
     442           0 :       tot_mse = joint_strength_search_dual(best_lev0, best_lev1, nb_strengths,
     443             :                                            mse, sb_count);
     444             :     else
     445           0 :       tot_mse =
     446           0 :           joint_strength_search(best_lev0, nb_strengths, mse[0], sb_count);
     447             :     /* Count superblock signalling cost. */
     448           0 :     tot_mse += (uint64_t)(sb_count * lambda * i);
     449             :     /* Count header signalling cost. */
     450           0 :     tot_mse += (uint64_t)(nb_strengths * lambda * CDEF_STRENGTH_BITS);
     451           0 :     if (tot_mse < best_tot_mse) {
     452           0 :       best_tot_mse = tot_mse;
     453           0 :       nb_strength_bits = i;
     454           0 :       for (j = 0; j < 1 << nb_strength_bits; j++) {
     455           0 :         cm->cdef_strengths[j] = best_lev0[j];
     456           0 :         cm->cdef_uv_strengths[j] = best_lev1[j];
     457             :       }
     458             :     }
     459             :   }
     460           0 :   nb_strengths = 1 << nb_strength_bits;
     461             : 
     462           0 :   cm->cdef_bits = nb_strength_bits;
     463           0 :   cm->nb_cdef_strengths = nb_strengths;
     464           0 :   for (i = 0; i < sb_count; i++) {
     465             :     int gi;
     466             :     int best_gi;
     467           0 :     uint64_t best_mse = (uint64_t)1 << 63;
     468           0 :     best_gi = 0;
     469           0 :     for (gi = 0; gi < cm->nb_cdef_strengths; gi++) {
     470           0 :       uint64_t curr = mse[0][i][cm->cdef_strengths[gi]];
     471           0 :       if (nplanes >= 3) curr += mse[1][i][cm->cdef_uv_strengths[gi]];
     472           0 :       if (curr < best_mse) {
     473           0 :         best_gi = gi;
     474           0 :         best_mse = curr;
     475             :       }
     476             :     }
     477           0 :     selected_strength[i] = best_gi;
     478           0 :     cm->mi_grid_visible[sb_index[i]]->mbmi.cdef_strength = best_gi;
     479             :   }
     480           0 :   cm->cdef_dering_damping = dering_damping;
     481           0 :   cm->cdef_clpf_damping = clpf_damping;
     482           0 :   aom_free(mse[0]);
     483           0 :   aom_free(mse[1]);
     484           0 :   for (pli = 0; pli < nplanes; pli++) {
     485           0 :     aom_free(src[pli]);
     486           0 :     aom_free(ref_coeff[pli]);
     487             :   }
     488           0 :   aom_free(sb_index);
     489           0 :   aom_free(selected_strength);
     490           0 : }

Generated by: LCOV version 1.13