LCOV - code coverage report
Current view: top level - media/libvpx/libvpx/vp9/encoder - vp9_encodemb.c (source / functions) Hit Total Coverage
Test: output.info Lines: 0 423 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 13 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
       3             :  *
       4             :  *  Use of this source code is governed by a BSD-style license
       5             :  *  that can be found in the LICENSE file in the root of the source
       6             :  *  tree. An additional intellectual property rights grant can be found
       7             :  *  in the file PATENTS.  All contributing project authors may
       8             :  *  be found in the AUTHORS file in the root of the source tree.
       9             :  */
      10             : 
      11             : #include "./vp9_rtcd.h"
      12             : #include "./vpx_config.h"
      13             : #include "./vpx_dsp_rtcd.h"
      14             : 
      15             : #include "vpx_dsp/quantize.h"
      16             : #include "vpx_mem/vpx_mem.h"
      17             : #include "vpx_ports/mem.h"
      18             : 
      19             : #include "vp9/common/vp9_idct.h"
      20             : #include "vp9/common/vp9_reconinter.h"
      21             : #include "vp9/common/vp9_reconintra.h"
      22             : #include "vp9/common/vp9_scan.h"
      23             : 
      24             : #include "vp9/encoder/vp9_encodemb.h"
      25             : #include "vp9/encoder/vp9_rd.h"
      26             : #include "vp9/encoder/vp9_tokenize.h"
      27             : 
      28             : struct optimize_ctx {
      29             :   ENTROPY_CONTEXT ta[MAX_MB_PLANE][16];
      30             :   ENTROPY_CONTEXT tl[MAX_MB_PLANE][16];
      31             : };
      32             : 
      33           0 : void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
      34           0 :   struct macroblock_plane *const p = &x->plane[plane];
      35           0 :   const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
      36           0 :   const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
      37           0 :   const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
      38           0 :   const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize];
      39             : 
      40             : #if CONFIG_VP9_HIGHBITDEPTH
      41             :   if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      42             :     vpx_highbd_subtract_block(bh, bw, p->src_diff, bw, p->src.buf,
      43             :                               p->src.stride, pd->dst.buf, pd->dst.stride,
      44             :                               x->e_mbd.bd);
      45             :     return;
      46             :   }
      47             : #endif  // CONFIG_VP9_HIGHBITDEPTH
      48           0 :   vpx_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
      49           0 :                      pd->dst.buf, pd->dst.stride);
      50           0 : }
      51             : 
      52             : typedef struct vp9_token_state {
      53             :   int64_t error;
      54             :   int rate;
      55             :   int16_t next;
      56             :   int16_t token;
      57             :   tran_low_t qc;
      58             :   tran_low_t dqc;
      59             :   uint8_t best_index;
      60             : } vp9_token_state;
      61             : 
      62             : static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
      63             :   { 10, 6 }, { 8, 5 },
      64             : };
      65             : 
      66             : #define UPDATE_RD_COST()                             \
      67             :   {                                                  \
      68             :     rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0); \
      69             :     rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1); \
      70             :   }
      71             : 
      72             : // This function is a place holder for now but may ultimately need
      73             : // to scan previous tokens to work out the correct context.
      74           0 : static int trellis_get_coeff_context(const int16_t *scan, const int16_t *nb,
      75             :                                      int idx, int token, uint8_t *token_cache) {
      76           0 :   int bak = token_cache[scan[idx]], pt;
      77           0 :   token_cache[scan[idx]] = vp9_pt_energy_class[token];
      78           0 :   pt = get_coef_context(nb, token_cache, idx + 1);
      79           0 :   token_cache[scan[idx]] = bak;
      80           0 :   return pt;
      81             : }
      82             : 
      83           0 : int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
      84             :                    int ctx) {
      85           0 :   MACROBLOCKD *const xd = &mb->e_mbd;
      86           0 :   struct macroblock_plane *const p = &mb->plane[plane];
      87           0 :   struct macroblockd_plane *const pd = &xd->plane[plane];
      88           0 :   const int ref = is_inter_block(xd->mi[0]);
      89             :   vp9_token_state tokens[1025][2];
      90             :   uint8_t token_cache[1024];
      91           0 :   const tran_low_t *const coeff = BLOCK_OFFSET(mb->plane[plane].coeff, block);
      92           0 :   tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
      93           0 :   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
      94           0 :   const int eob = p->eobs[block];
      95           0 :   const PLANE_TYPE type = get_plane_type(plane);
      96           0 :   const int default_eob = 16 << (tx_size << 1);
      97           0 :   const int shift = (tx_size == TX_32X32);
      98           0 :   const int16_t *const dequant_ptr = pd->dequant;
      99           0 :   const uint8_t *const band_translate = get_band_translate(tx_size);
     100           0 :   const scan_order *const so = get_scan(xd, tx_size, type, block);
     101           0 :   const int16_t *const scan = so->scan;
     102           0 :   const int16_t *const nb = so->neighbors;
     103           0 :   const int dq_step[2] = { dequant_ptr[0] >> shift, dequant_ptr[1] >> shift };
     104           0 :   int next = eob, sz = 0;
     105           0 :   const int64_t rdmult = (mb->rdmult * plane_rd_mult[ref][type]) >> 1;
     106           0 :   const int64_t rddiv = mb->rddiv;
     107             :   int64_t rd_cost0, rd_cost1;
     108             :   int rate0, rate1;
     109             :   int64_t error0, error1;
     110             :   int16_t t0, t1;
     111             :   EXTRABIT e0;
     112           0 :   unsigned int(*const token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
     113           0 :       mb->token_costs[tx_size][type][ref];
     114             :   int best, band, pt, i, final_eob;
     115             : #if CONFIG_VP9_HIGHBITDEPTH
     116             :   const int *cat6_high_cost = vp9_get_high_cost_table(xd->bd);
     117             : #else
     118           0 :   const int *cat6_high_cost = vp9_get_high_cost_table(8);
     119             : #endif
     120             : 
     121           0 :   assert((!type && !plane) || (type && plane));
     122           0 :   assert(eob <= default_eob);
     123             : 
     124             :   /* Now set up a Viterbi trellis to evaluate alternative roundings. */
     125             :   /* Initialize the sentinel node of the trellis. */
     126           0 :   tokens[eob][0].rate = 0;
     127           0 :   tokens[eob][0].error = 0;
     128           0 :   tokens[eob][0].next = default_eob;
     129           0 :   tokens[eob][0].token = EOB_TOKEN;
     130           0 :   tokens[eob][0].qc = 0;
     131           0 :   tokens[eob][1] = tokens[eob][0];
     132             : 
     133           0 :   for (i = 0; i < eob; i++)
     134           0 :     token_cache[scan[i]] = vp9_pt_energy_class[vp9_get_token(qcoeff[scan[i]])];
     135             : 
     136           0 :   for (i = eob; i-- > 0;) {
     137             :     int base_bits, d2, dx;
     138           0 :     const int rc = scan[i];
     139           0 :     int x = qcoeff[rc];
     140             :     /* Only add a trellis state for non-zero coefficients. */
     141           0 :     if (x) {
     142           0 :       error0 = tokens[next][0].error;
     143           0 :       error1 = tokens[next][1].error;
     144             :       /* Evaluate the first possibility for this state. */
     145           0 :       rate0 = tokens[next][0].rate;
     146           0 :       rate1 = tokens[next][1].rate;
     147           0 :       vp9_get_token_extra(x, &t0, &e0);
     148             :       /* Consider both possible successor states. */
     149           0 :       if (next < default_eob) {
     150           0 :         band = band_translate[i + 1];
     151           0 :         pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
     152           0 :         rate0 += token_costs[band][0][pt][tokens[next][0].token];
     153           0 :         rate1 += token_costs[band][0][pt][tokens[next][1].token];
     154             :       }
     155           0 :       UPDATE_RD_COST();
     156             :       /* And pick the best. */
     157           0 :       best = rd_cost1 < rd_cost0;
     158           0 :       base_bits = vp9_get_cost(t0, e0, cat6_high_cost);
     159           0 :       dx = (dqcoeff[rc] - coeff[rc]) * (1 << shift);
     160             : #if CONFIG_VP9_HIGHBITDEPTH
     161             :       if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
     162             :         dx >>= xd->bd - 8;
     163             :       }
     164             : #endif  // CONFIG_VP9_HIGHBITDEPTH
     165           0 :       d2 = dx * dx;
     166           0 :       tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
     167           0 :       tokens[i][0].error = d2 + (best ? error1 : error0);
     168           0 :       tokens[i][0].next = next;
     169           0 :       tokens[i][0].token = t0;
     170           0 :       tokens[i][0].qc = x;
     171           0 :       tokens[i][0].dqc = dqcoeff[rc];
     172           0 :       tokens[i][0].best_index = best;
     173             : 
     174             :       /* Evaluate the second possibility for this state. */
     175           0 :       rate0 = tokens[next][0].rate;
     176           0 :       rate1 = tokens[next][1].rate;
     177             : 
     178           0 :       if ((abs(x) * dequant_ptr[rc != 0] > (abs(coeff[rc]) << shift)) &&
     179           0 :           (abs(x) * dequant_ptr[rc != 0] <
     180           0 :            (abs(coeff[rc]) << shift) + dequant_ptr[rc != 0])) {
     181           0 :         sz = -(x < 0);
     182           0 :         x -= 2 * sz + 1;
     183             :       } else {
     184           0 :         tokens[i][1] = tokens[i][0];
     185           0 :         next = i;
     186           0 :         continue;
     187             :       }
     188             : 
     189             :       /* Consider both possible successor states. */
     190           0 :       if (!x) {
     191             :         /* If we reduced this coefficient to zero, check to see if
     192             :          *  we need to move the EOB back here.
     193             :          */
     194           0 :         t0 = tokens[next][0].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
     195           0 :         t1 = tokens[next][1].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
     196           0 :         e0 = 0;
     197             :       } else {
     198           0 :         vp9_get_token_extra(x, &t0, &e0);
     199           0 :         t1 = t0;
     200             :       }
     201           0 :       if (next < default_eob) {
     202           0 :         band = band_translate[i + 1];
     203           0 :         if (t0 != EOB_TOKEN) {
     204           0 :           pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
     205           0 :           rate0 += token_costs[band][!x][pt][tokens[next][0].token];
     206             :         }
     207           0 :         if (t1 != EOB_TOKEN) {
     208           0 :           pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache);
     209           0 :           rate1 += token_costs[band][!x][pt][tokens[next][1].token];
     210             :         }
     211             :       }
     212             : 
     213           0 :       UPDATE_RD_COST();
     214             :       /* And pick the best. */
     215           0 :       best = rd_cost1 < rd_cost0;
     216           0 :       base_bits = vp9_get_cost(t0, e0, cat6_high_cost);
     217             : 
     218             : #if CONFIG_VP9_HIGHBITDEPTH
     219             :       if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
     220             :         dx -= ((dequant_ptr[rc != 0] >> (xd->bd - 8)) + sz) ^ sz;
     221             :       } else {
     222             :         dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
     223             :       }
     224             : #else
     225           0 :       dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
     226             : #endif  // CONFIG_VP9_HIGHBITDEPTH
     227           0 :       d2 = dx * dx;
     228             : 
     229           0 :       tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
     230           0 :       tokens[i][1].error = d2 + (best ? error1 : error0);
     231           0 :       tokens[i][1].next = next;
     232           0 :       tokens[i][1].token = best ? t1 : t0;
     233           0 :       tokens[i][1].qc = x;
     234             : 
     235           0 :       if (x) {
     236           0 :         tran_low_t offset = dq_step[rc != 0];
     237             :         // The 32x32 transform coefficient uses half quantization step size.
     238             :         // Account for the rounding difference in the dequantized coefficeint
     239             :         // value when the quantization index is dropped from an even number
     240             :         // to an odd number.
     241           0 :         if (shift & x) offset += (dequant_ptr[rc != 0] & 0x01);
     242             : 
     243           0 :         if (sz == 0)
     244           0 :           tokens[i][1].dqc = dqcoeff[rc] - offset;
     245             :         else
     246           0 :           tokens[i][1].dqc = dqcoeff[rc] + offset;
     247             :       } else {
     248           0 :         tokens[i][1].dqc = 0;
     249             :       }
     250             : 
     251           0 :       tokens[i][1].best_index = best;
     252             :       /* Finally, make this the new head of the trellis. */
     253           0 :       next = i;
     254             :     } else {
     255             :       /* There's no choice to make for a zero coefficient, so we don't
     256             :        *  add a new trellis node, but we do need to update the costs.
     257             :        */
     258           0 :       band = band_translate[i + 1];
     259           0 :       pt = get_coef_context(nb, token_cache, i + 1);
     260           0 :       t0 = tokens[next][0].token;
     261           0 :       t1 = tokens[next][1].token;
     262             :       /* Update the cost of each path if we're past the EOB token. */
     263           0 :       if (t0 != EOB_TOKEN) {
     264           0 :         tokens[next][0].rate += token_costs[band][1][pt][t0];
     265           0 :         tokens[next][0].token = ZERO_TOKEN;
     266             :       }
     267           0 :       if (t1 != EOB_TOKEN) {
     268           0 :         tokens[next][1].rate += token_costs[band][1][pt][t1];
     269           0 :         tokens[next][1].token = ZERO_TOKEN;
     270             :       }
     271           0 :       tokens[i][0].best_index = tokens[i][1].best_index = 0;
     272             :       /* Don't update next, because we didn't add a new node. */
     273             :     }
     274             :   }
     275             : 
     276             :   /* Now pick the best path through the whole trellis. */
     277           0 :   band = band_translate[i + 1];
     278           0 :   rate0 = tokens[next][0].rate;
     279           0 :   rate1 = tokens[next][1].rate;
     280           0 :   error0 = tokens[next][0].error;
     281           0 :   error1 = tokens[next][1].error;
     282           0 :   t0 = tokens[next][0].token;
     283           0 :   t1 = tokens[next][1].token;
     284           0 :   rate0 += token_costs[band][0][ctx][t0];
     285           0 :   rate1 += token_costs[band][0][ctx][t1];
     286           0 :   UPDATE_RD_COST();
     287           0 :   best = rd_cost1 < rd_cost0;
     288           0 :   final_eob = -1;
     289             : 
     290           0 :   for (i = next; i < eob; i = next) {
     291           0 :     const int x = tokens[i][best].qc;
     292           0 :     const int rc = scan[i];
     293           0 :     if (x) final_eob = i;
     294           0 :     qcoeff[rc] = x;
     295           0 :     dqcoeff[rc] = tokens[i][best].dqc;
     296           0 :     next = tokens[i][best].next;
     297           0 :     best = tokens[i][best].best_index;
     298             :   }
     299           0 :   final_eob++;
     300             : 
     301           0 :   mb->plane[plane].eobs[block] = final_eob;
     302           0 :   return final_eob;
     303             : }
     304             : 
     305           0 : static INLINE void fdct32x32(int rd_transform, const int16_t *src,
     306             :                              tran_low_t *dst, int src_stride) {
     307           0 :   if (rd_transform)
     308           0 :     vpx_fdct32x32_rd(src, dst, src_stride);
     309             :   else
     310           0 :     vpx_fdct32x32(src, dst, src_stride);
     311           0 : }
     312             : 
     313             : #if CONFIG_VP9_HIGHBITDEPTH
     314             : static INLINE void highbd_fdct32x32(int rd_transform, const int16_t *src,
     315             :                                     tran_low_t *dst, int src_stride) {
     316             :   if (rd_transform)
     317             :     vpx_highbd_fdct32x32_rd(src, dst, src_stride);
     318             :   else
     319             :     vpx_highbd_fdct32x32(src, dst, src_stride);
     320             : }
     321             : #endif  // CONFIG_VP9_HIGHBITDEPTH
     322             : 
     323           0 : void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, int row, int col,
     324             :                         BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
     325           0 :   MACROBLOCKD *const xd = &x->e_mbd;
     326           0 :   const struct macroblock_plane *const p = &x->plane[plane];
     327           0 :   const struct macroblockd_plane *const pd = &xd->plane[plane];
     328           0 :   const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
     329           0 :   tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
     330           0 :   tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
     331           0 :   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
     332           0 :   uint16_t *const eob = &p->eobs[block];
     333           0 :   const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
     334             :   const int16_t *src_diff;
     335           0 :   src_diff = &p->src_diff[4 * (row * diff_stride + col)];
     336             : 
     337             : #if CONFIG_VP9_HIGHBITDEPTH
     338             :   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
     339             :     switch (tx_size) {
     340             :       case TX_32X32:
     341             :         highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
     342             :         vp9_highbd_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin,
     343             :                                      p->round_fp, p->quant_fp, p->quant_shift,
     344             :                                      qcoeff, dqcoeff, pd->dequant, eob,
     345             :                                      scan_order->scan, scan_order->iscan);
     346             :         break;
     347             :       case TX_16X16:
     348             :         vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
     349             :         vp9_highbd_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
     350             :                                p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
     351             :                                pd->dequant, eob, scan_order->scan,
     352             :                                scan_order->iscan);
     353             :         break;
     354             :       case TX_8X8:
     355             :         vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
     356             :         vp9_highbd_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp,
     357             :                                p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
     358             :                                pd->dequant, eob, scan_order->scan,
     359             :                                scan_order->iscan);
     360             :         break;
     361             :       case TX_4X4:
     362             :         x->fwd_txm4x4(src_diff, coeff, diff_stride);
     363             :         vp9_highbd_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
     364             :                                p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
     365             :                                pd->dequant, eob, scan_order->scan,
     366             :                                scan_order->iscan);
     367             :         break;
     368             :       default: assert(0);
     369             :     }
     370             :     return;
     371             :   }
     372             : #endif  // CONFIG_VP9_HIGHBITDEPTH
     373             : 
     374           0 :   switch (tx_size) {
     375             :     case TX_32X32:
     376           0 :       fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
     377           0 :       vp9_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin, p->round_fp,
     378           0 :                             p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
     379             :                             pd->dequant, eob, scan_order->scan,
     380             :                             scan_order->iscan);
     381           0 :       break;
     382             :     case TX_16X16:
     383           0 :       vpx_fdct16x16(src_diff, coeff, diff_stride);
     384           0 :       vp9_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
     385           0 :                       p->quant_fp, p->quant_shift, qcoeff, dqcoeff, pd->dequant,
     386             :                       eob, scan_order->scan, scan_order->iscan);
     387           0 :       break;
     388             :     case TX_8X8:
     389           0 :       vp9_fdct8x8_quant(src_diff, diff_stride, coeff, 64, x->skip_block,
     390           0 :                         p->zbin, p->round_fp, p->quant_fp, p->quant_shift,
     391             :                         qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
     392             :                         scan_order->iscan);
     393           0 :       break;
     394             :     case TX_4X4:
     395           0 :       x->fwd_txm4x4(src_diff, coeff, diff_stride);
     396           0 :       vp9_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
     397           0 :                       p->quant_fp, p->quant_shift, qcoeff, dqcoeff, pd->dequant,
     398             :                       eob, scan_order->scan, scan_order->iscan);
     399           0 :       break;
     400           0 :     default: assert(0); break;
     401             :   }
     402           0 : }
     403             : 
     404           0 : void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, int row, int col,
     405             :                         BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
     406           0 :   MACROBLOCKD *const xd = &x->e_mbd;
     407           0 :   const struct macroblock_plane *const p = &x->plane[plane];
     408           0 :   const struct macroblockd_plane *const pd = &xd->plane[plane];
     409           0 :   tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
     410           0 :   tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
     411           0 :   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
     412           0 :   uint16_t *const eob = &p->eobs[block];
     413           0 :   const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
     414             :   const int16_t *src_diff;
     415           0 :   src_diff = &p->src_diff[4 * (row * diff_stride + col)];
     416             : #if CONFIG_VP9_HIGHBITDEPTH
     417             :   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
     418             :     switch (tx_size) {
     419             :       case TX_32X32:
     420             :         vpx_highbd_fdct32x32_1(src_diff, coeff, diff_stride);
     421             :         vpx_highbd_quantize_dc_32x32(coeff, x->skip_block, p->round,
     422             :                                      p->quant_fp[0], qcoeff, dqcoeff,
     423             :                                      pd->dequant[0], eob);
     424             :         break;
     425             :       case TX_16X16:
     426             :         vpx_highbd_fdct16x16_1(src_diff, coeff, diff_stride);
     427             :         vpx_highbd_quantize_dc(coeff, 256, x->skip_block, p->round,
     428             :                                p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0],
     429             :                                eob);
     430             :         break;
     431             :       case TX_8X8:
     432             :         vpx_highbd_fdct8x8_1(src_diff, coeff, diff_stride);
     433             :         vpx_highbd_quantize_dc(coeff, 64, x->skip_block, p->round,
     434             :                                p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0],
     435             :                                eob);
     436             :         break;
     437             :       case TX_4X4:
     438             :         x->fwd_txm4x4(src_diff, coeff, diff_stride);
     439             :         vpx_highbd_quantize_dc(coeff, 16, x->skip_block, p->round,
     440             :                                p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0],
     441             :                                eob);
     442             :         break;
     443             :       default: assert(0);
     444             :     }
     445             :     return;
     446             :   }
     447             : #endif  // CONFIG_VP9_HIGHBITDEPTH
     448             : 
     449           0 :   switch (tx_size) {
     450             :     case TX_32X32:
     451           0 :       vpx_fdct32x32_1(src_diff, coeff, diff_stride);
     452           0 :       vpx_quantize_dc_32x32(coeff, x->skip_block, p->round, p->quant_fp[0],
     453           0 :                             qcoeff, dqcoeff, pd->dequant[0], eob);
     454           0 :       break;
     455             :     case TX_16X16:
     456           0 :       vpx_fdct16x16_1(src_diff, coeff, diff_stride);
     457           0 :       vpx_quantize_dc(coeff, 256, x->skip_block, p->round, p->quant_fp[0],
     458           0 :                       qcoeff, dqcoeff, pd->dequant[0], eob);
     459           0 :       break;
     460             :     case TX_8X8:
     461           0 :       vpx_fdct8x8_1(src_diff, coeff, diff_stride);
     462           0 :       vpx_quantize_dc(coeff, 64, x->skip_block, p->round, p->quant_fp[0],
     463           0 :                       qcoeff, dqcoeff, pd->dequant[0], eob);
     464           0 :       break;
     465             :     case TX_4X4:
     466           0 :       x->fwd_txm4x4(src_diff, coeff, diff_stride);
     467           0 :       vpx_quantize_dc(coeff, 16, x->skip_block, p->round, p->quant_fp[0],
     468           0 :                       qcoeff, dqcoeff, pd->dequant[0], eob);
     469           0 :       break;
     470           0 :     default: assert(0); break;
     471             :   }
     472           0 : }
     473             : 
     474           0 : void vp9_xform_quant(MACROBLOCK *x, int plane, int block, int row, int col,
     475             :                      BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
     476           0 :   MACROBLOCKD *const xd = &x->e_mbd;
     477           0 :   const struct macroblock_plane *const p = &x->plane[plane];
     478           0 :   const struct macroblockd_plane *const pd = &xd->plane[plane];
     479           0 :   const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
     480           0 :   tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
     481           0 :   tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
     482           0 :   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
     483           0 :   uint16_t *const eob = &p->eobs[block];
     484           0 :   const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
     485             :   const int16_t *src_diff;
     486           0 :   src_diff = &p->src_diff[4 * (row * diff_stride + col)];
     487             : 
     488             : #if CONFIG_VP9_HIGHBITDEPTH
     489             :   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
     490             :     switch (tx_size) {
     491             :       case TX_32X32:
     492             :         highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
     493             :         vpx_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
     494             :                                     p->round, p->quant, p->quant_shift, qcoeff,
     495             :                                     dqcoeff, pd->dequant, eob, scan_order->scan,
     496             :                                     scan_order->iscan);
     497             :         break;
     498             :       case TX_16X16:
     499             :         vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
     500             :         vpx_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
     501             :                               p->quant, p->quant_shift, qcoeff, dqcoeff,
     502             :                               pd->dequant, eob, scan_order->scan,
     503             :                               scan_order->iscan);
     504             :         break;
     505             :       case TX_8X8:
     506             :         vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
     507             :         vpx_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
     508             :                               p->quant, p->quant_shift, qcoeff, dqcoeff,
     509             :                               pd->dequant, eob, scan_order->scan,
     510             :                               scan_order->iscan);
     511             :         break;
     512             :       case TX_4X4:
     513             :         x->fwd_txm4x4(src_diff, coeff, diff_stride);
     514             :         vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
     515             :                               p->quant, p->quant_shift, qcoeff, dqcoeff,
     516             :                               pd->dequant, eob, scan_order->scan,
     517             :                               scan_order->iscan);
     518             :         break;
     519             :       default: assert(0);
     520             :     }
     521             :     return;
     522             :   }
     523             : #endif  // CONFIG_VP9_HIGHBITDEPTH
     524             : 
     525           0 :   switch (tx_size) {
     526             :     case TX_32X32:
     527           0 :       fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
     528           0 :       vpx_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
     529           0 :                            p->quant, p->quant_shift, qcoeff, dqcoeff,
     530             :                            pd->dequant, eob, scan_order->scan,
     531             :                            scan_order->iscan);
     532           0 :       break;
     533             :     case TX_16X16:
     534           0 :       vpx_fdct16x16(src_diff, coeff, diff_stride);
     535           0 :       vpx_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant,
     536           0 :                      p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
     537             :                      scan_order->scan, scan_order->iscan);
     538           0 :       break;
     539             :     case TX_8X8:
     540           0 :       vpx_fdct8x8(src_diff, coeff, diff_stride);
     541           0 :       vpx_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
     542           0 :                      p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
     543             :                      scan_order->scan, scan_order->iscan);
     544           0 :       break;
     545             :     case TX_4X4:
     546           0 :       x->fwd_txm4x4(src_diff, coeff, diff_stride);
     547           0 :       vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
     548           0 :                      p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
     549             :                      scan_order->scan, scan_order->iscan);
     550           0 :       break;
     551           0 :     default: assert(0); break;
     552             :   }
     553           0 : }
     554             : 
     555           0 : static void encode_block(int plane, int block, int row, int col,
     556             :                          BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
     557           0 :   struct encode_b_args *const args = arg;
     558           0 :   MACROBLOCK *const x = args->x;
     559           0 :   MACROBLOCKD *const xd = &x->e_mbd;
     560           0 :   struct macroblock_plane *const p = &x->plane[plane];
     561           0 :   struct macroblockd_plane *const pd = &xd->plane[plane];
     562           0 :   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
     563             :   uint8_t *dst;
     564             :   ENTROPY_CONTEXT *a, *l;
     565           0 :   dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col];
     566           0 :   a = &args->ta[col];
     567           0 :   l = &args->tl[row];
     568             : 
     569             :   // TODO(jingning): per transformed block zero forcing only enabled for
     570             :   // luma component. will integrate chroma components as well.
     571           0 :   if (x->zcoeff_blk[tx_size][block] && plane == 0) {
     572           0 :     p->eobs[block] = 0;
     573           0 :     *a = *l = 0;
     574           0 :     return;
     575             :   }
     576             : 
     577           0 :   if (!x->skip_recode) {
     578           0 :     if (x->quant_fp) {
     579             :       // Encoding process for rtc mode
     580           0 :       if (x->skip_txfm[0] == SKIP_TXFM_AC_DC && plane == 0) {
     581             :         // skip forward transform
     582           0 :         p->eobs[block] = 0;
     583           0 :         *a = *l = 0;
     584           0 :         return;
     585             :       } else {
     586           0 :         vp9_xform_quant_fp(x, plane, block, row, col, plane_bsize, tx_size);
     587             :       }
     588             :     } else {
     589           0 :       if (max_txsize_lookup[plane_bsize] == tx_size) {
     590           0 :         int txfm_blk_index = (plane << 2) + (block >> (tx_size << 1));
     591           0 :         if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_NONE) {
     592             :           // full forward transform and quantization
     593           0 :           vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
     594           0 :         } else if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_AC_ONLY) {
     595             :           // fast path forward transform and quantization
     596           0 :           vp9_xform_quant_dc(x, plane, block, row, col, plane_bsize, tx_size);
     597             :         } else {
     598             :           // skip forward transform
     599           0 :           p->eobs[block] = 0;
     600           0 :           *a = *l = 0;
     601           0 :           return;
     602             :         }
     603             :       } else {
     604           0 :         vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
     605             :       }
     606             :     }
     607             :   }
     608             : 
     609           0 :   if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
     610           0 :     const int ctx = combine_entropy_contexts(*a, *l);
     611           0 :     *a = *l = vp9_optimize_b(x, plane, block, tx_size, ctx) > 0;
     612             :   } else {
     613           0 :     *a = *l = p->eobs[block] > 0;
     614             :   }
     615             : 
     616           0 :   if (p->eobs[block]) *(args->skip) = 0;
     617             : 
     618           0 :   if (x->skip_encode || p->eobs[block] == 0) return;
     619             : #if CONFIG_VP9_HIGHBITDEPTH
     620             :   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
     621             :     switch (tx_size) {
     622             :       case TX_32X32:
     623             :         vp9_highbd_idct32x32_add(dqcoeff, dst, pd->dst.stride, p->eobs[block],
     624             :                                  xd->bd);
     625             :         break;
     626             :       case TX_16X16:
     627             :         vp9_highbd_idct16x16_add(dqcoeff, dst, pd->dst.stride, p->eobs[block],
     628             :                                  xd->bd);
     629             :         break;
     630             :       case TX_8X8:
     631             :         vp9_highbd_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block],
     632             :                                xd->bd);
     633             :         break;
     634             :       case TX_4X4:
     635             :         // this is like vp9_short_idct4x4 but has a special case around eob<=1
     636             :         // which is significant (not just an optimization) for the lossless
     637             :         // case.
     638             :         x->highbd_itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block],
     639             :                            xd->bd);
     640             :         break;
     641             :       default: assert(0 && "Invalid transform size");
     642             :     }
     643             :     return;
     644             :   }
     645             : #endif  // CONFIG_VP9_HIGHBITDEPTH
     646             : 
     647           0 :   switch (tx_size) {
     648             :     case TX_32X32:
     649           0 :       vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
     650           0 :       break;
     651             :     case TX_16X16:
     652           0 :       vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
     653           0 :       break;
     654             :     case TX_8X8:
     655           0 :       vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
     656           0 :       break;
     657             :     case TX_4X4:
     658             :       // this is like vp9_short_idct4x4 but has a special case around eob<=1
     659             :       // which is significant (not just an optimization) for the lossless
     660             :       // case.
     661           0 :       x->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
     662           0 :       break;
     663           0 :     default: assert(0 && "Invalid transform size"); break;
     664             :   }
     665             : }
     666             : 
     667           0 : static void encode_block_pass1(int plane, int block, int row, int col,
     668             :                                BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
     669             :                                void *arg) {
     670           0 :   MACROBLOCK *const x = (MACROBLOCK *)arg;
     671           0 :   MACROBLOCKD *const xd = &x->e_mbd;
     672           0 :   struct macroblock_plane *const p = &x->plane[plane];
     673           0 :   struct macroblockd_plane *const pd = &xd->plane[plane];
     674           0 :   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
     675             :   uint8_t *dst;
     676           0 :   dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col];
     677             : 
     678           0 :   vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
     679             : 
     680           0 :   if (p->eobs[block] > 0) {
     681             : #if CONFIG_VP9_HIGHBITDEPTH
     682             :     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
     683             :       x->highbd_itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block], xd->bd);
     684             :       return;
     685             :     }
     686             : #endif  // CONFIG_VP9_HIGHBITDEPTH
     687           0 :     x->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
     688             :   }
     689           0 : }
     690             : 
     691           0 : void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) {
     692           0 :   vp9_subtract_plane(x, bsize, 0);
     693           0 :   vp9_foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0,
     694             :                                          encode_block_pass1, x);
     695           0 : }
     696             : 
     697           0 : void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
     698           0 :   MACROBLOCKD *const xd = &x->e_mbd;
     699             :   struct optimize_ctx ctx;
     700           0 :   MODE_INFO *mi = xd->mi[0];
     701           0 :   struct encode_b_args arg = { x, 1, NULL, NULL, &mi->skip };
     702             :   int plane;
     703             : 
     704           0 :   mi->skip = 1;
     705             : 
     706           0 :   if (x->skip) return;
     707             : 
     708           0 :   for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
     709           0 :     if (!x->skip_recode) vp9_subtract_plane(x, bsize, plane);
     710             : 
     711           0 :     if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
     712           0 :       const struct macroblockd_plane *const pd = &xd->plane[plane];
     713           0 :       const TX_SIZE tx_size = plane ? get_uv_tx_size(mi, pd) : mi->tx_size;
     714           0 :       vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane],
     715           0 :                                ctx.tl[plane]);
     716           0 :       arg.enable_coeff_opt = 1;
     717             :     } else {
     718           0 :       arg.enable_coeff_opt = 0;
     719             :     }
     720           0 :     arg.ta = ctx.ta[plane];
     721           0 :     arg.tl = ctx.tl[plane];
     722             : 
     723           0 :     vp9_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block,
     724             :                                            &arg);
     725             :   }
     726             : }
     727             : 
     728           0 : void vp9_encode_block_intra(int plane, int block, int row, int col,
     729             :                             BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
     730             :                             void *arg) {
     731           0 :   struct encode_b_args *const args = arg;
     732           0 :   MACROBLOCK *const x = args->x;
     733           0 :   MACROBLOCKD *const xd = &x->e_mbd;
     734           0 :   MODE_INFO *mi = xd->mi[0];
     735           0 :   struct macroblock_plane *const p = &x->plane[plane];
     736           0 :   struct macroblockd_plane *const pd = &xd->plane[plane];
     737           0 :   tran_low_t *coeff = BLOCK_OFFSET(p->coeff, block);
     738           0 :   tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
     739           0 :   tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
     740             :   const scan_order *scan_order;
     741           0 :   TX_TYPE tx_type = DCT_DCT;
     742             :   PREDICTION_MODE mode;
     743           0 :   const int bwl = b_width_log2_lookup[plane_bsize];
     744           0 :   const int diff_stride = 4 * (1 << bwl);
     745             :   uint8_t *src, *dst;
     746             :   int16_t *src_diff;
     747           0 :   uint16_t *eob = &p->eobs[block];
     748           0 :   const int src_stride = p->src.stride;
     749           0 :   const int dst_stride = pd->dst.stride;
     750           0 :   ENTROPY_CONTEXT *a = NULL;
     751           0 :   ENTROPY_CONTEXT *l = NULL;
     752           0 :   int entropy_ctx = 0;
     753           0 :   dst = &pd->dst.buf[4 * (row * dst_stride + col)];
     754           0 :   src = &p->src.buf[4 * (row * src_stride + col)];
     755           0 :   src_diff = &p->src_diff[4 * (row * diff_stride + col)];
     756           0 :   if (args->enable_coeff_opt) {
     757           0 :     a = &args->ta[col];
     758           0 :     l = &args->tl[row];
     759           0 :     entropy_ctx = combine_entropy_contexts(*a, *l);
     760             :   }
     761             : 
     762           0 :   if (tx_size == TX_4X4) {
     763           0 :     tx_type = get_tx_type_4x4(get_plane_type(plane), xd, block);
     764           0 :     scan_order = &vp9_scan_orders[TX_4X4][tx_type];
     765           0 :     mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mi->uv_mode;
     766             :   } else {
     767           0 :     mode = plane == 0 ? mi->mode : mi->uv_mode;
     768           0 :     if (tx_size == TX_32X32) {
     769           0 :       scan_order = &vp9_default_scan_orders[TX_32X32];
     770             :     } else {
     771           0 :       tx_type = get_tx_type(get_plane_type(plane), xd);
     772           0 :       scan_order = &vp9_scan_orders[tx_size][tx_type];
     773             :     }
     774             :   }
     775             : 
     776           0 :   vp9_predict_intra_block(xd, bwl, tx_size, mode, x->skip_encode ? src : dst,
     777           0 :                           x->skip_encode ? src_stride : dst_stride, dst,
     778             :                           dst_stride, col, row, plane);
     779             : 
     780             : #if CONFIG_VP9_HIGHBITDEPTH
     781             :   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
     782             :     switch (tx_size) {
     783             :       case TX_32X32:
     784             :         if (!x->skip_recode) {
     785             :           vpx_highbd_subtract_block(32, 32, src_diff, diff_stride, src,
     786             :                                     src_stride, dst, dst_stride, xd->bd);
     787             :           highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
     788             :           vpx_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
     789             :                                       p->round, p->quant, p->quant_shift,
     790             :                                       qcoeff, dqcoeff, pd->dequant, eob,
     791             :                                       scan_order->scan, scan_order->iscan);
     792             :         }
     793             :         if (!x->skip_encode && *eob) {
     794             :           vp9_highbd_idct32x32_add(dqcoeff, dst, dst_stride, *eob, xd->bd);
     795             :         }
     796             :         break;
     797             :       case TX_16X16:
     798             :         if (!x->skip_recode) {
     799             :           vpx_highbd_subtract_block(16, 16, src_diff, diff_stride, src,
     800             :                                     src_stride, dst, dst_stride, xd->bd);
     801             :           if (tx_type == DCT_DCT)
     802             :             vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
     803             :           else
     804             :             vp9_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type);
     805             :           vpx_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
     806             :                                 p->quant, p->quant_shift, qcoeff, dqcoeff,
     807             :                                 pd->dequant, eob, scan_order->scan,
     808             :                                 scan_order->iscan);
     809             :         }
     810             :         if (!x->skip_encode && *eob) {
     811             :           vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst, dst_stride, *eob,
     812             :                                   xd->bd);
     813             :         }
     814             :         break;
     815             :       case TX_8X8:
     816             :         if (!x->skip_recode) {
     817             :           vpx_highbd_subtract_block(8, 8, src_diff, diff_stride, src,
     818             :                                     src_stride, dst, dst_stride, xd->bd);
     819             :           if (tx_type == DCT_DCT)
     820             :             vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
     821             :           else
     822             :             vp9_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type);
     823             :           vpx_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
     824             :                                 p->quant, p->quant_shift, qcoeff, dqcoeff,
     825             :                                 pd->dequant, eob, scan_order->scan,
     826             :                                 scan_order->iscan);
     827             :         }
     828             :         if (!x->skip_encode && *eob) {
     829             :           vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob,
     830             :                                 xd->bd);
     831             :         }
     832             :         break;
     833             :       case TX_4X4:
     834             :         if (!x->skip_recode) {
     835             :           vpx_highbd_subtract_block(4, 4, src_diff, diff_stride, src,
     836             :                                     src_stride, dst, dst_stride, xd->bd);
     837             :           if (tx_type != DCT_DCT)
     838             :             vp9_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type);
     839             :           else
     840             :             x->fwd_txm4x4(src_diff, coeff, diff_stride);
     841             :           vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
     842             :                                 p->quant, p->quant_shift, qcoeff, dqcoeff,
     843             :                                 pd->dequant, eob, scan_order->scan,
     844             :                                 scan_order->iscan);
     845             :         }
     846             : 
     847             :         if (!x->skip_encode && *eob) {
     848             :           if (tx_type == DCT_DCT) {
     849             :             // this is like vp9_short_idct4x4 but has a special case around
     850             :             // eob<=1 which is significant (not just an optimization) for the
     851             :             // lossless case.
     852             :             x->highbd_itxm_add(dqcoeff, dst, dst_stride, *eob, xd->bd);
     853             :           } else {
     854             :             vp9_highbd_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type, xd->bd);
     855             :           }
     856             :         }
     857             :         break;
     858             :       default: assert(0); return;
     859             :     }
     860             :     if (*eob) *(args->skip) = 0;
     861             :     return;
     862             :   }
     863             : #endif  // CONFIG_VP9_HIGHBITDEPTH
     864             : 
     865           0 :   switch (tx_size) {
     866             :     case TX_32X32:
     867           0 :       if (!x->skip_recode) {
     868           0 :         vpx_subtract_block(32, 32, src_diff, diff_stride, src, src_stride, dst,
     869             :                            dst_stride);
     870           0 :         fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
     871           0 :         vpx_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
     872           0 :                              p->quant, p->quant_shift, qcoeff, dqcoeff,
     873             :                              pd->dequant, eob, scan_order->scan,
     874             :                              scan_order->iscan);
     875             :       }
     876           0 :       if (args->enable_coeff_opt && !x->skip_recode) {
     877           0 :         *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
     878             :       }
     879           0 :       if (!x->skip_encode && *eob)
     880           0 :         vp9_idct32x32_add(dqcoeff, dst, dst_stride, *eob);
     881           0 :       break;
     882             :     case TX_16X16:
     883           0 :       if (!x->skip_recode) {
     884           0 :         vpx_subtract_block(16, 16, src_diff, diff_stride, src, src_stride, dst,
     885             :                            dst_stride);
     886           0 :         vp9_fht16x16(src_diff, coeff, diff_stride, tx_type);
     887           0 :         vpx_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant,
     888           0 :                        p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
     889             :                        scan_order->scan, scan_order->iscan);
     890             :       }
     891           0 :       if (args->enable_coeff_opt && !x->skip_recode) {
     892           0 :         *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
     893             :       }
     894           0 :       if (!x->skip_encode && *eob)
     895           0 :         vp9_iht16x16_add(tx_type, dqcoeff, dst, dst_stride, *eob);
     896           0 :       break;
     897             :     case TX_8X8:
     898           0 :       if (!x->skip_recode) {
     899           0 :         vpx_subtract_block(8, 8, src_diff, diff_stride, src, src_stride, dst,
     900             :                            dst_stride);
     901           0 :         vp9_fht8x8(src_diff, coeff, diff_stride, tx_type);
     902           0 :         vpx_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
     903           0 :                        p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
     904             :                        scan_order->scan, scan_order->iscan);
     905             :       }
     906           0 :       if (args->enable_coeff_opt && !x->skip_recode) {
     907           0 :         *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
     908             :       }
     909           0 :       if (!x->skip_encode && *eob)
     910           0 :         vp9_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob);
     911           0 :       break;
     912             :     case TX_4X4:
     913           0 :       if (!x->skip_recode) {
     914           0 :         vpx_subtract_block(4, 4, src_diff, diff_stride, src, src_stride, dst,
     915             :                            dst_stride);
     916           0 :         if (tx_type != DCT_DCT)
     917           0 :           vp9_fht4x4(src_diff, coeff, diff_stride, tx_type);
     918             :         else
     919           0 :           x->fwd_txm4x4(src_diff, coeff, diff_stride);
     920           0 :         vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
     921           0 :                        p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
     922             :                        scan_order->scan, scan_order->iscan);
     923             :       }
     924           0 :       if (args->enable_coeff_opt && !x->skip_recode) {
     925           0 :         *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
     926             :       }
     927           0 :       if (!x->skip_encode && *eob) {
     928           0 :         if (tx_type == DCT_DCT)
     929             :           // this is like vp9_short_idct4x4 but has a special case around eob<=1
     930             :           // which is significant (not just an optimization) for the lossless
     931             :           // case.
     932           0 :           x->itxm_add(dqcoeff, dst, dst_stride, *eob);
     933             :         else
     934           0 :           vp9_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type);
     935             :       }
     936           0 :       break;
     937           0 :     default: assert(0); break;
     938             :   }
     939           0 :   if (*eob) *(args->skip) = 0;
     940           0 : }
     941             : 
     942           0 : void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane,
     943             :                                   int enable_optimize_b) {
     944           0 :   const MACROBLOCKD *const xd = &x->e_mbd;
     945             :   struct optimize_ctx ctx;
     946           0 :   struct encode_b_args arg = { x, enable_optimize_b, ctx.ta[plane],
     947           0 :                                ctx.tl[plane], &xd->mi[0]->skip };
     948             : 
     949           0 :   if (enable_optimize_b && x->optimize &&
     950           0 :       (!x->skip_recode || !x->skip_optimize)) {
     951           0 :     const struct macroblockd_plane *const pd = &xd->plane[plane];
     952           0 :     const TX_SIZE tx_size =
     953           0 :         plane ? get_uv_tx_size(xd->mi[0], pd) : xd->mi[0]->tx_size;
     954           0 :     vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], ctx.tl[plane]);
     955             :   } else {
     956           0 :     arg.enable_coeff_opt = 0;
     957             :   }
     958             : 
     959           0 :   vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
     960             :                                          vp9_encode_block_intra, &arg);
     961           0 : }

Generated by: LCOV version 1.13