LCOV - code coverage report
Current view: top level - media/libvpx/libvpx/vp8/decoder - threading.c (source / functions) Hit Total Coverage
Test: output.info Lines: 0 435 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 9 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
       3             :  *
       4             :  *  Use of this source code is governed by a BSD-style license
       5             :  *  that can be found in the LICENSE file in the root of the source
       6             :  *  tree. An additional intellectual property rights grant can be found
       7             :  *  in the file PATENTS.  All contributing project authors may
       8             :  *  be found in the AUTHORS file in the root of the source tree.
       9             :  */
      10             : 
      11             : #include "vpx_config.h"
      12             : #include "vp8_rtcd.h"
      13             : #if !defined(WIN32) && CONFIG_OS_SUPPORT == 1
      14             : #include <unistd.h>
      15             : #endif
      16             : #include "onyxd_int.h"
      17             : #include "vpx_mem/vpx_mem.h"
      18             : #include "vp8/common/threading.h"
      19             : 
      20             : #include "vp8/common/loopfilter.h"
      21             : #include "vp8/common/extend.h"
      22             : #include "vpx_ports/vpx_timer.h"
      23             : #include "detokenize.h"
      24             : #include "vp8/common/reconintra4x4.h"
      25             : #include "vp8/common/reconinter.h"
      26             : #include "vp8/common/reconintra.h"
      27             : #include "vp8/common/setupintrarecon.h"
      28             : #if CONFIG_ERROR_CONCEALMENT
      29             : #include "error_concealment.h"
      30             : #endif
      31             : 
      32             : #define CALLOC_ARRAY(p, n) CHECK_MEM_ERROR((p), vpx_calloc(sizeof(*(p)), (n)))
      33             : #define CALLOC_ARRAY_ALIGNED(p, n, algn)                            \
      34             :   do {                                                              \
      35             :     CHECK_MEM_ERROR((p), vpx_memalign((algn), sizeof(*(p)) * (n))); \
      36             :     memset((p), 0, (n) * sizeof(*(p)));                             \
      37             :   } while (0)
      38             : 
      39             : void vp8_mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd);
      40             : 
      41           0 : static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd,
      42             :                                        MB_ROW_DEC *mbrd, int count) {
      43           0 :   VP8_COMMON *const pc = &pbi->common;
      44             :   int i;
      45             : 
      46           0 :   for (i = 0; i < count; ++i) {
      47           0 :     MACROBLOCKD *mbd = &mbrd[i].mbd;
      48           0 :     mbd->subpixel_predict = xd->subpixel_predict;
      49           0 :     mbd->subpixel_predict8x4 = xd->subpixel_predict8x4;
      50           0 :     mbd->subpixel_predict8x8 = xd->subpixel_predict8x8;
      51           0 :     mbd->subpixel_predict16x16 = xd->subpixel_predict16x16;
      52             : 
      53           0 :     mbd->frame_type = pc->frame_type;
      54           0 :     mbd->pre = xd->pre;
      55           0 :     mbd->dst = xd->dst;
      56             : 
      57           0 :     mbd->segmentation_enabled = xd->segmentation_enabled;
      58           0 :     mbd->mb_segement_abs_delta = xd->mb_segement_abs_delta;
      59           0 :     memcpy(mbd->segment_feature_data, xd->segment_feature_data,
      60             :            sizeof(xd->segment_feature_data));
      61             : 
      62             :     /*signed char ref_lf_deltas[MAX_REF_LF_DELTAS];*/
      63           0 :     memcpy(mbd->ref_lf_deltas, xd->ref_lf_deltas, sizeof(xd->ref_lf_deltas));
      64             :     /*signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];*/
      65           0 :     memcpy(mbd->mode_lf_deltas, xd->mode_lf_deltas, sizeof(xd->mode_lf_deltas));
      66             :     /*unsigned char mode_ref_lf_delta_enabled;
      67             :     unsigned char mode_ref_lf_delta_update;*/
      68           0 :     mbd->mode_ref_lf_delta_enabled = xd->mode_ref_lf_delta_enabled;
      69           0 :     mbd->mode_ref_lf_delta_update = xd->mode_ref_lf_delta_update;
      70             : 
      71           0 :     mbd->current_bc = &pbi->mbc[0];
      72             : 
      73           0 :     memcpy(mbd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc));
      74           0 :     memcpy(mbd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1));
      75           0 :     memcpy(mbd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2));
      76           0 :     memcpy(mbd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv));
      77             : 
      78           0 :     mbd->fullpixel_mask = 0xffffffff;
      79             : 
      80           0 :     if (pc->full_pixel) mbd->fullpixel_mask = 0xfffffff8;
      81             :   }
      82             : 
      83           0 :   for (i = 0; i < pc->mb_rows; ++i) pbi->mt_current_mb_col[i] = -1;
      84           0 : }
      85             : 
      86           0 : static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
      87             :                                  unsigned int mb_idx) {
      88             :   MB_PREDICTION_MODE mode;
      89             :   int i;
      90             : #if CONFIG_ERROR_CONCEALMENT
      91             :   int corruption_detected = 0;
      92             : #else
      93             :   (void)mb_idx;
      94             : #endif
      95             : 
      96           0 :   if (xd->mode_info_context->mbmi.mb_skip_coeff) {
      97           0 :     vp8_reset_mb_tokens_context(xd);
      98           0 :   } else if (!vp8dx_bool_error(xd->current_bc)) {
      99             :     int eobtotal;
     100           0 :     eobtotal = vp8_decode_mb_tokens(pbi, xd);
     101             : 
     102             :     /* Special case:  Force the loopfilter to skip when eobtotal is zero */
     103           0 :     xd->mode_info_context->mbmi.mb_skip_coeff = (eobtotal == 0);
     104             :   }
     105             : 
     106           0 :   mode = xd->mode_info_context->mbmi.mode;
     107             : 
     108           0 :   if (xd->segmentation_enabled) vp8_mb_init_dequantizer(pbi, xd);
     109             : 
     110             : #if CONFIG_ERROR_CONCEALMENT
     111             : 
     112             :   if (pbi->ec_active) {
     113             :     int throw_residual;
     114             :     /* When we have independent partitions we can apply residual even
     115             :      * though other partitions within the frame are corrupt.
     116             :      */
     117             :     throw_residual =
     118             :         (!pbi->independent_partitions && pbi->frame_corrupt_residual);
     119             :     throw_residual = (throw_residual || vp8dx_bool_error(xd->current_bc));
     120             : 
     121             :     if ((mb_idx >= pbi->mvs_corrupt_from_mb || throw_residual)) {
     122             :       /* MB with corrupt residuals or corrupt mode/motion vectors.
     123             :        * Better to use the predictor as reconstruction.
     124             :        */
     125             :       pbi->frame_corrupt_residual = 1;
     126             :       memset(xd->qcoeff, 0, sizeof(xd->qcoeff));
     127             : 
     128             :       corruption_detected = 1;
     129             : 
     130             :       /* force idct to be skipped for B_PRED and use the
     131             :        * prediction only for reconstruction
     132             :        * */
     133             :       memset(xd->eobs, 0, 25);
     134             :     }
     135             :   }
     136             : #endif
     137             : 
     138             :   /* do prediction */
     139           0 :   if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
     140           0 :     vp8_build_intra_predictors_mbuv_s(
     141             :         xd, xd->recon_above[1], xd->recon_above[2], xd->recon_left[1],
     142           0 :         xd->recon_left[2], xd->recon_left_stride[1], xd->dst.u_buffer,
     143           0 :         xd->dst.v_buffer, xd->dst.uv_stride);
     144             : 
     145           0 :     if (mode != B_PRED) {
     146           0 :       vp8_build_intra_predictors_mby_s(
     147             :           xd, xd->recon_above[0], xd->recon_left[0], xd->recon_left_stride[0],
     148           0 :           xd->dst.y_buffer, xd->dst.y_stride);
     149             :     } else {
     150           0 :       short *DQC = xd->dequant_y1;
     151           0 :       int dst_stride = xd->dst.y_stride;
     152             : 
     153             :       /* clear out residual eob info */
     154           0 :       if (xd->mode_info_context->mbmi.mb_skip_coeff) memset(xd->eobs, 0, 25);
     155             : 
     156           0 :       intra_prediction_down_copy(xd, xd->recon_above[0] + 16);
     157             : 
     158           0 :       for (i = 0; i < 16; ++i) {
     159           0 :         BLOCKD *b = &xd->block[i];
     160           0 :         unsigned char *dst = xd->dst.y_buffer + b->offset;
     161           0 :         B_PREDICTION_MODE b_mode = xd->mode_info_context->bmi[i].as_mode;
     162             :         unsigned char *Above;
     163             :         unsigned char *yleft;
     164             :         int left_stride;
     165             :         unsigned char top_left;
     166             : 
     167             :         /*Caution: For some b_mode, it needs 8 pixels (4 above + 4
     168             :          * above-right).*/
     169           0 :         if (i < 4 && pbi->common.filter_level) {
     170           0 :           Above = xd->recon_above[0] + b->offset;
     171             :         } else {
     172           0 :           Above = dst - dst_stride;
     173             :         }
     174             : 
     175           0 :         if (i % 4 == 0 && pbi->common.filter_level) {
     176           0 :           yleft = xd->recon_left[0] + i;
     177           0 :           left_stride = 1;
     178             :         } else {
     179           0 :           yleft = dst - 1;
     180           0 :           left_stride = dst_stride;
     181             :         }
     182             : 
     183           0 :         if ((i == 4 || i == 8 || i == 12) && pbi->common.filter_level) {
     184           0 :           top_left = *(xd->recon_left[0] + i - 1);
     185             :         } else {
     186           0 :           top_left = Above[-1];
     187             :         }
     188             : 
     189           0 :         vp8_intra4x4_predict(Above, yleft, left_stride, b_mode, dst, dst_stride,
     190             :                              top_left);
     191             : 
     192           0 :         if (xd->eobs[i]) {
     193           0 :           if (xd->eobs[i] > 1) {
     194           0 :             vp8_dequant_idct_add(b->qcoeff, DQC, dst, dst_stride);
     195             :           } else {
     196           0 :             vp8_dc_only_idct_add(b->qcoeff[0] * DQC[0], dst, dst_stride, dst,
     197             :                                  dst_stride);
     198           0 :             memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0]));
     199             :           }
     200             :         }
     201             :       }
     202             :     }
     203             :   } else {
     204           0 :     vp8_build_inter_predictors_mb(xd);
     205             :   }
     206             : 
     207             : #if CONFIG_ERROR_CONCEALMENT
     208             :   if (corruption_detected) {
     209             :     return;
     210             :   }
     211             : #endif
     212             : 
     213           0 :   if (!xd->mode_info_context->mbmi.mb_skip_coeff) {
     214             :     /* dequantization and idct */
     215           0 :     if (mode != B_PRED) {
     216           0 :       short *DQC = xd->dequant_y1;
     217             : 
     218           0 :       if (mode != SPLITMV) {
     219           0 :         BLOCKD *b = &xd->block[24];
     220             : 
     221             :         /* do 2nd order transform on the dc block */
     222           0 :         if (xd->eobs[24] > 1) {
     223           0 :           vp8_dequantize_b(b, xd->dequant_y2);
     224             : 
     225           0 :           vp8_short_inv_walsh4x4(&b->dqcoeff[0], xd->qcoeff);
     226           0 :           memset(b->qcoeff, 0, 16 * sizeof(b->qcoeff[0]));
     227             :         } else {
     228           0 :           b->dqcoeff[0] = b->qcoeff[0] * xd->dequant_y2[0];
     229           0 :           vp8_short_inv_walsh4x4_1(&b->dqcoeff[0], xd->qcoeff);
     230           0 :           memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0]));
     231             :         }
     232             : 
     233             :         /* override the dc dequant constant in order to preserve the
     234             :          * dc components
     235             :          */
     236           0 :         DQC = xd->dequant_y1_dc;
     237             :       }
     238             : 
     239           0 :       vp8_dequant_idct_add_y_block(xd->qcoeff, DQC, xd->dst.y_buffer,
     240           0 :                                    xd->dst.y_stride, xd->eobs);
     241             :     }
     242             : 
     243           0 :     vp8_dequant_idct_add_uv_block(xd->qcoeff + 16 * 16, xd->dequant_uv,
     244           0 :                                   xd->dst.u_buffer, xd->dst.v_buffer,
     245           0 :                                   xd->dst.uv_stride, xd->eobs + 16);
     246             :   }
     247           0 : }
     248             : 
     249           0 : static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd,
     250             :                               int start_mb_row) {
     251             :   const int *last_row_current_mb_col;
     252             :   int *current_mb_col;
     253             :   int mb_row;
     254           0 :   VP8_COMMON *pc = &pbi->common;
     255           0 :   const int nsync = pbi->sync_range;
     256           0 :   const int first_row_no_sync_above = pc->mb_cols + nsync;
     257           0 :   int num_part = 1 << pbi->common.multi_token_partition;
     258           0 :   int last_mb_row = start_mb_row;
     259             : 
     260           0 :   YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME];
     261           0 :   YV12_BUFFER_CONFIG *yv12_fb_lst = pbi->dec_fb_ref[LAST_FRAME];
     262             : 
     263           0 :   int recon_y_stride = yv12_fb_new->y_stride;
     264           0 :   int recon_uv_stride = yv12_fb_new->uv_stride;
     265             : 
     266             :   unsigned char *ref_buffer[MAX_REF_FRAMES][3];
     267             :   unsigned char *dst_buffer[3];
     268             :   int i;
     269             :   int ref_fb_corrupted[MAX_REF_FRAMES];
     270             : 
     271           0 :   ref_fb_corrupted[INTRA_FRAME] = 0;
     272             : 
     273           0 :   for (i = 1; i < MAX_REF_FRAMES; ++i) {
     274           0 :     YV12_BUFFER_CONFIG *this_fb = pbi->dec_fb_ref[i];
     275             : 
     276           0 :     ref_buffer[i][0] = this_fb->y_buffer;
     277           0 :     ref_buffer[i][1] = this_fb->u_buffer;
     278           0 :     ref_buffer[i][2] = this_fb->v_buffer;
     279             : 
     280           0 :     ref_fb_corrupted[i] = this_fb->corrupted;
     281             :   }
     282             : 
     283           0 :   dst_buffer[0] = yv12_fb_new->y_buffer;
     284           0 :   dst_buffer[1] = yv12_fb_new->u_buffer;
     285           0 :   dst_buffer[2] = yv12_fb_new->v_buffer;
     286             : 
     287           0 :   xd->up_available = (start_mb_row != 0);
     288             : 
     289           0 :   xd->mode_info_context = pc->mi + pc->mode_info_stride * start_mb_row;
     290           0 :   xd->mode_info_stride = pc->mode_info_stride;
     291             : 
     292           0 :   for (mb_row = start_mb_row; mb_row < pc->mb_rows;
     293           0 :        mb_row += (pbi->decoding_thread_count + 1)) {
     294             :     int recon_yoffset, recon_uvoffset;
     295             :     int mb_col;
     296             :     int filter_level;
     297           0 :     loop_filter_info_n *lfi_n = &pc->lf_info;
     298             : 
     299             :     /* save last row processed by this thread */
     300           0 :     last_mb_row = mb_row;
     301             :     /* select bool coder for current partition */
     302           0 :     xd->current_bc = &pbi->mbc[mb_row % num_part];
     303             : 
     304           0 :     if (mb_row > 0) {
     305           0 :       last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row - 1];
     306             :     } else {
     307           0 :       last_row_current_mb_col = &first_row_no_sync_above;
     308             :     }
     309             : 
     310           0 :     current_mb_col = &pbi->mt_current_mb_col[mb_row];
     311             : 
     312           0 :     recon_yoffset = mb_row * recon_y_stride * 16;
     313           0 :     recon_uvoffset = mb_row * recon_uv_stride * 8;
     314             : 
     315             :     /* reset contexts */
     316           0 :     xd->above_context = pc->above_context;
     317           0 :     memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
     318             : 
     319           0 :     xd->left_available = 0;
     320             : 
     321           0 :     xd->mb_to_top_edge = -((mb_row * 16) << 3);
     322           0 :     xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
     323             : 
     324           0 :     if (pbi->common.filter_level) {
     325           0 :       xd->recon_above[0] = pbi->mt_yabove_row[mb_row] + 0 * 16 + 32;
     326           0 :       xd->recon_above[1] = pbi->mt_uabove_row[mb_row] + 0 * 8 + 16;
     327           0 :       xd->recon_above[2] = pbi->mt_vabove_row[mb_row] + 0 * 8 + 16;
     328             : 
     329           0 :       xd->recon_left[0] = pbi->mt_yleft_col[mb_row];
     330           0 :       xd->recon_left[1] = pbi->mt_uleft_col[mb_row];
     331           0 :       xd->recon_left[2] = pbi->mt_vleft_col[mb_row];
     332             : 
     333             :       /* TODO: move to outside row loop */
     334           0 :       xd->recon_left_stride[0] = 1;
     335           0 :       xd->recon_left_stride[1] = 1;
     336             :     } else {
     337           0 :       xd->recon_above[0] = dst_buffer[0] + recon_yoffset;
     338           0 :       xd->recon_above[1] = dst_buffer[1] + recon_uvoffset;
     339           0 :       xd->recon_above[2] = dst_buffer[2] + recon_uvoffset;
     340             : 
     341           0 :       xd->recon_left[0] = xd->recon_above[0] - 1;
     342           0 :       xd->recon_left[1] = xd->recon_above[1] - 1;
     343           0 :       xd->recon_left[2] = xd->recon_above[2] - 1;
     344             : 
     345           0 :       xd->recon_above[0] -= xd->dst.y_stride;
     346           0 :       xd->recon_above[1] -= xd->dst.uv_stride;
     347           0 :       xd->recon_above[2] -= xd->dst.uv_stride;
     348             : 
     349             :       /* TODO: move to outside row loop */
     350           0 :       xd->recon_left_stride[0] = xd->dst.y_stride;
     351           0 :       xd->recon_left_stride[1] = xd->dst.uv_stride;
     352             : 
     353           0 :       setup_intra_recon_left(xd->recon_left[0], xd->recon_left[1],
     354             :                              xd->recon_left[2], xd->dst.y_stride,
     355             :                              xd->dst.uv_stride);
     356             :     }
     357             : 
     358           0 :     for (mb_col = 0; mb_col < pc->mb_cols; ++mb_col) {
     359           0 :       if (((mb_col - 1) % nsync) == 0) {
     360           0 :         pthread_mutex_t *mutex = &pbi->pmutex[mb_row];
     361           0 :         protected_write(mutex, current_mb_col, mb_col - 1);
     362             :       }
     363             : 
     364           0 :       if (mb_row && !(mb_col & (nsync - 1))) {
     365           0 :         pthread_mutex_t *mutex = &pbi->pmutex[mb_row - 1];
     366           0 :         sync_read(mutex, mb_col, last_row_current_mb_col, nsync);
     367             :       }
     368             : 
     369             :       /* Distance of MB to the various image edges.
     370             :        * These are specified to 8th pel as they are always
     371             :        * compared to values that are in 1/8th pel units.
     372             :        */
     373           0 :       xd->mb_to_left_edge = -((mb_col * 16) << 3);
     374           0 :       xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
     375             : 
     376             : #if CONFIG_ERROR_CONCEALMENT
     377             :       {
     378             :         int corrupt_residual =
     379             :             (!pbi->independent_partitions && pbi->frame_corrupt_residual) ||
     380             :             vp8dx_bool_error(xd->current_bc);
     381             :         if (pbi->ec_active &&
     382             :             (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) &&
     383             :             corrupt_residual) {
     384             :           /* We have an intra block with corrupt
     385             :            * coefficients, better to conceal with an inter
     386             :            * block.
     387             :            * Interpolate MVs from neighboring MBs
     388             :            *
     389             :            * Note that for the first mb with corrupt
     390             :            * residual in a frame, we might not discover
     391             :            * that before decoding the residual. That
     392             :            * happens after this check, and therefore no
     393             :            * inter concealment will be done.
     394             :            */
     395             :           vp8_interpolate_motion(xd, mb_row, mb_col, pc->mb_rows, pc->mb_cols);
     396             :         }
     397             :       }
     398             : #endif
     399             : 
     400           0 :       xd->dst.y_buffer = dst_buffer[0] + recon_yoffset;
     401           0 :       xd->dst.u_buffer = dst_buffer[1] + recon_uvoffset;
     402           0 :       xd->dst.v_buffer = dst_buffer[2] + recon_uvoffset;
     403             : 
     404           0 :       xd->pre.y_buffer =
     405           0 :           ref_buffer[xd->mode_info_context->mbmi.ref_frame][0] + recon_yoffset;
     406           0 :       xd->pre.u_buffer =
     407           0 :           ref_buffer[xd->mode_info_context->mbmi.ref_frame][1] + recon_uvoffset;
     408           0 :       xd->pre.v_buffer =
     409           0 :           ref_buffer[xd->mode_info_context->mbmi.ref_frame][2] + recon_uvoffset;
     410             : 
     411             :       /* propagate errors from reference frames */
     412           0 :       xd->corrupted |= ref_fb_corrupted[xd->mode_info_context->mbmi.ref_frame];
     413             : 
     414           0 :       mt_decode_macroblock(pbi, xd, 0);
     415             : 
     416           0 :       xd->left_available = 1;
     417             : 
     418             :       /* check if the boolean decoder has suffered an error */
     419           0 :       xd->corrupted |= vp8dx_bool_error(xd->current_bc);
     420             : 
     421           0 :       xd->recon_above[0] += 16;
     422           0 :       xd->recon_above[1] += 8;
     423           0 :       xd->recon_above[2] += 8;
     424             : 
     425           0 :       if (!pbi->common.filter_level) {
     426           0 :         xd->recon_left[0] += 16;
     427           0 :         xd->recon_left[1] += 8;
     428           0 :         xd->recon_left[2] += 8;
     429             :       }
     430             : 
     431           0 :       if (pbi->common.filter_level) {
     432           0 :         int skip_lf = (xd->mode_info_context->mbmi.mode != B_PRED &&
     433           0 :                        xd->mode_info_context->mbmi.mode != SPLITMV &&
     434           0 :                        xd->mode_info_context->mbmi.mb_skip_coeff);
     435             : 
     436           0 :         const int mode_index =
     437           0 :             lfi_n->mode_lf_lut[xd->mode_info_context->mbmi.mode];
     438           0 :         const int seg = xd->mode_info_context->mbmi.segment_id;
     439           0 :         const int ref_frame = xd->mode_info_context->mbmi.ref_frame;
     440             : 
     441           0 :         filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
     442             : 
     443           0 :         if (mb_row != pc->mb_rows - 1) {
     444             :           /* Save decoded MB last row data for next-row decoding */
     445           0 :           memcpy((pbi->mt_yabove_row[mb_row + 1] + 32 + mb_col * 16),
     446           0 :                  (xd->dst.y_buffer + 15 * recon_y_stride), 16);
     447           0 :           memcpy((pbi->mt_uabove_row[mb_row + 1] + 16 + mb_col * 8),
     448           0 :                  (xd->dst.u_buffer + 7 * recon_uv_stride), 8);
     449           0 :           memcpy((pbi->mt_vabove_row[mb_row + 1] + 16 + mb_col * 8),
     450           0 :                  (xd->dst.v_buffer + 7 * recon_uv_stride), 8);
     451             :         }
     452             : 
     453             :         /* save left_col for next MB decoding */
     454           0 :         if (mb_col != pc->mb_cols - 1) {
     455           0 :           MODE_INFO *next = xd->mode_info_context + 1;
     456             : 
     457           0 :           if (next->mbmi.ref_frame == INTRA_FRAME) {
     458           0 :             for (i = 0; i < 16; ++i) {
     459           0 :               pbi->mt_yleft_col[mb_row][i] =
     460           0 :                   xd->dst.y_buffer[i * recon_y_stride + 15];
     461             :             }
     462           0 :             for (i = 0; i < 8; ++i) {
     463           0 :               pbi->mt_uleft_col[mb_row][i] =
     464           0 :                   xd->dst.u_buffer[i * recon_uv_stride + 7];
     465           0 :               pbi->mt_vleft_col[mb_row][i] =
     466           0 :                   xd->dst.v_buffer[i * recon_uv_stride + 7];
     467             :             }
     468             :           }
     469             :         }
     470             : 
     471             :         /* loopfilter on this macroblock. */
     472           0 :         if (filter_level) {
     473           0 :           if (pc->filter_type == NORMAL_LOOPFILTER) {
     474             :             loop_filter_info lfi;
     475           0 :             FRAME_TYPE frame_type = pc->frame_type;
     476           0 :             const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
     477           0 :             lfi.mblim = lfi_n->mblim[filter_level];
     478           0 :             lfi.blim = lfi_n->blim[filter_level];
     479           0 :             lfi.lim = lfi_n->lim[filter_level];
     480           0 :             lfi.hev_thr = lfi_n->hev_thr[hev_index];
     481             : 
     482           0 :             if (mb_col > 0)
     483           0 :               vp8_loop_filter_mbv(xd->dst.y_buffer, xd->dst.u_buffer,
     484           0 :                                   xd->dst.v_buffer, recon_y_stride,
     485             :                                   recon_uv_stride, &lfi);
     486             : 
     487           0 :             if (!skip_lf)
     488           0 :               vp8_loop_filter_bv(xd->dst.y_buffer, xd->dst.u_buffer,
     489           0 :                                  xd->dst.v_buffer, recon_y_stride,
     490             :                                  recon_uv_stride, &lfi);
     491             : 
     492             :             /* don't apply across umv border */
     493           0 :             if (mb_row > 0)
     494           0 :               vp8_loop_filter_mbh(xd->dst.y_buffer, xd->dst.u_buffer,
     495           0 :                                   xd->dst.v_buffer, recon_y_stride,
     496             :                                   recon_uv_stride, &lfi);
     497             : 
     498           0 :             if (!skip_lf)
     499           0 :               vp8_loop_filter_bh(xd->dst.y_buffer, xd->dst.u_buffer,
     500           0 :                                  xd->dst.v_buffer, recon_y_stride,
     501             :                                  recon_uv_stride, &lfi);
     502             :           } else {
     503           0 :             if (mb_col > 0)
     504           0 :               vp8_loop_filter_simple_mbv(xd->dst.y_buffer, recon_y_stride,
     505           0 :                                          lfi_n->mblim[filter_level]);
     506             : 
     507           0 :             if (!skip_lf)
     508           0 :               vp8_loop_filter_simple_bv(xd->dst.y_buffer, recon_y_stride,
     509           0 :                                         lfi_n->blim[filter_level]);
     510             : 
     511             :             /* don't apply across umv border */
     512           0 :             if (mb_row > 0)
     513           0 :               vp8_loop_filter_simple_mbh(xd->dst.y_buffer, recon_y_stride,
     514           0 :                                          lfi_n->mblim[filter_level]);
     515             : 
     516           0 :             if (!skip_lf)
     517           0 :               vp8_loop_filter_simple_bh(xd->dst.y_buffer, recon_y_stride,
     518           0 :                                         lfi_n->blim[filter_level]);
     519             :           }
     520             :         }
     521             :       }
     522             : 
     523           0 :       recon_yoffset += 16;
     524           0 :       recon_uvoffset += 8;
     525             : 
     526           0 :       ++xd->mode_info_context; /* next mb */
     527             : 
     528           0 :       xd->above_context++;
     529             :     }
     530             : 
     531             :     /* adjust to the next row of mbs */
     532           0 :     if (pbi->common.filter_level) {
     533           0 :       if (mb_row != pc->mb_rows - 1) {
     534           0 :         int lasty = yv12_fb_lst->y_width + VP8BORDERINPIXELS;
     535           0 :         int lastuv = (yv12_fb_lst->y_width >> 1) + (VP8BORDERINPIXELS >> 1);
     536             : 
     537           0 :         for (i = 0; i < 4; ++i) {
     538           0 :           pbi->mt_yabove_row[mb_row + 1][lasty + i] =
     539           0 :               pbi->mt_yabove_row[mb_row + 1][lasty - 1];
     540           0 :           pbi->mt_uabove_row[mb_row + 1][lastuv + i] =
     541           0 :               pbi->mt_uabove_row[mb_row + 1][lastuv - 1];
     542           0 :           pbi->mt_vabove_row[mb_row + 1][lastuv + i] =
     543           0 :               pbi->mt_vabove_row[mb_row + 1][lastuv - 1];
     544             :         }
     545             :       }
     546             :     } else {
     547           0 :       vp8_extend_mb_row(yv12_fb_new, xd->dst.y_buffer + 16,
     548           0 :                         xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
     549             :     }
     550             : 
     551             :     /* last MB of row is ready just after extension is done */
     552           0 :     protected_write(&pbi->pmutex[mb_row], current_mb_col, mb_col + nsync);
     553             : 
     554           0 :     ++xd->mode_info_context; /* skip prediction column */
     555           0 :     xd->up_available = 1;
     556             : 
     557             :     /* since we have multithread */
     558           0 :     xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
     559             :   }
     560             : 
     561             :   /* signal end of frame decoding if this thread processed the last mb_row */
     562           0 :   if (last_mb_row == (pc->mb_rows - 1)) sem_post(&pbi->h_event_end_decoding);
     563           0 : }
     564             : 
     565           0 : static THREAD_FUNCTION thread_decoding_proc(void *p_data) {
     566           0 :   int ithread = ((DECODETHREAD_DATA *)p_data)->ithread;
     567           0 :   VP8D_COMP *pbi = (VP8D_COMP *)(((DECODETHREAD_DATA *)p_data)->ptr1);
     568           0 :   MB_ROW_DEC *mbrd = (MB_ROW_DEC *)(((DECODETHREAD_DATA *)p_data)->ptr2);
     569             :   ENTROPY_CONTEXT_PLANES mb_row_left_context;
     570             : 
     571             :   while (1) {
     572           0 :     if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd) == 0) break;
     573             : 
     574           0 :     if (sem_wait(&pbi->h_event_start_decoding[ithread]) == 0) {
     575           0 :       if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd) == 0) {
     576           0 :         break;
     577             :       } else {
     578           0 :         MACROBLOCKD *xd = &mbrd->mbd;
     579           0 :         xd->left_context = &mb_row_left_context;
     580             : 
     581           0 :         mt_decode_mb_rows(pbi, xd, ithread + 1);
     582             :       }
     583             :     }
     584             :   }
     585             : 
     586           0 :   return 0;
     587             : }
     588             : 
     589           0 : void vp8_decoder_create_threads(VP8D_COMP *pbi) {
     590           0 :   int core_count = 0;
     591             :   unsigned int ithread;
     592             : 
     593           0 :   pbi->b_multithreaded_rd = 0;
     594           0 :   pbi->allocated_decoding_thread_count = 0;
     595           0 :   pthread_mutex_init(&pbi->mt_mutex, NULL);
     596             : 
     597             :   /* limit decoding threads to the max number of token partitions */
     598           0 :   core_count = (pbi->max_threads > 8) ? 8 : pbi->max_threads;
     599             : 
     600             :   /* limit decoding threads to the available cores */
     601           0 :   if (core_count > pbi->common.processor_core_count) {
     602           0 :     core_count = pbi->common.processor_core_count;
     603             :   }
     604             : 
     605           0 :   if (core_count > 1) {
     606           0 :     pbi->b_multithreaded_rd = 1;
     607           0 :     pbi->decoding_thread_count = core_count - 1;
     608             : 
     609           0 :     CALLOC_ARRAY(pbi->h_decoding_thread, pbi->decoding_thread_count);
     610           0 :     CALLOC_ARRAY(pbi->h_event_start_decoding, pbi->decoding_thread_count);
     611           0 :     CALLOC_ARRAY_ALIGNED(pbi->mb_row_di, pbi->decoding_thread_count, 32);
     612           0 :     CALLOC_ARRAY(pbi->de_thread_data, pbi->decoding_thread_count);
     613             : 
     614           0 :     if (sem_init(&pbi->h_event_end_decoding, 0, 0)) {
     615           0 :       vpx_internal_error(&pbi->common.error, VPX_CODEC_MEM_ERROR,
     616             :                          "Failed to initialize semaphore");
     617             :     }
     618             : 
     619           0 :     for (ithread = 0; ithread < pbi->decoding_thread_count; ++ithread) {
     620           0 :       if (sem_init(&pbi->h_event_start_decoding[ithread], 0, 0)) break;
     621             : 
     622           0 :       vp8_setup_block_dptrs(&pbi->mb_row_di[ithread].mbd);
     623             : 
     624           0 :       pbi->de_thread_data[ithread].ithread = ithread;
     625           0 :       pbi->de_thread_data[ithread].ptr1 = (void *)pbi;
     626           0 :       pbi->de_thread_data[ithread].ptr2 = (void *)&pbi->mb_row_di[ithread];
     627             : 
     628           0 :       if (pthread_create(&pbi->h_decoding_thread[ithread], 0,
     629           0 :                          thread_decoding_proc, &pbi->de_thread_data[ithread])) {
     630           0 :         sem_destroy(&pbi->h_event_start_decoding[ithread]);
     631           0 :         break;
     632             :       }
     633             :     }
     634             : 
     635           0 :     pbi->allocated_decoding_thread_count = ithread;
     636           0 :     if (pbi->allocated_decoding_thread_count !=
     637           0 :         (int)pbi->decoding_thread_count) {
     638             :       /* the remainder of cleanup cases will be handled in
     639             :        * vp8_decoder_remove_threads(). */
     640           0 :       if (pbi->allocated_decoding_thread_count == 0) {
     641           0 :         sem_destroy(&pbi->h_event_end_decoding);
     642             :       }
     643           0 :       vpx_internal_error(&pbi->common.error, VPX_CODEC_MEM_ERROR,
     644             :                          "Failed to create threads");
     645             :     }
     646             :   }
     647           0 : }
     648             : 
     649           0 : void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows) {
     650             :   int i;
     651             : 
     652             :   /* De-allocate mutex */
     653           0 :   if (pbi->pmutex != NULL) {
     654           0 :     for (i = 0; i < mb_rows; ++i) {
     655           0 :       pthread_mutex_destroy(&pbi->pmutex[i]);
     656             :     }
     657             : 
     658           0 :     vpx_free(pbi->pmutex);
     659           0 :     pbi->pmutex = NULL;
     660             :   }
     661             : 
     662           0 :   vpx_free(pbi->mt_current_mb_col);
     663           0 :   pbi->mt_current_mb_col = NULL;
     664             : 
     665             :   /* Free above_row buffers. */
     666           0 :   if (pbi->mt_yabove_row) {
     667           0 :     for (i = 0; i < mb_rows; ++i) {
     668           0 :       vpx_free(pbi->mt_yabove_row[i]);
     669           0 :       pbi->mt_yabove_row[i] = NULL;
     670             :     }
     671           0 :     vpx_free(pbi->mt_yabove_row);
     672           0 :     pbi->mt_yabove_row = NULL;
     673             :   }
     674             : 
     675           0 :   if (pbi->mt_uabove_row) {
     676           0 :     for (i = 0; i < mb_rows; ++i) {
     677           0 :       vpx_free(pbi->mt_uabove_row[i]);
     678           0 :       pbi->mt_uabove_row[i] = NULL;
     679             :     }
     680           0 :     vpx_free(pbi->mt_uabove_row);
     681           0 :     pbi->mt_uabove_row = NULL;
     682             :   }
     683             : 
     684           0 :   if (pbi->mt_vabove_row) {
     685           0 :     for (i = 0; i < mb_rows; ++i) {
     686           0 :       vpx_free(pbi->mt_vabove_row[i]);
     687           0 :       pbi->mt_vabove_row[i] = NULL;
     688             :     }
     689           0 :     vpx_free(pbi->mt_vabove_row);
     690           0 :     pbi->mt_vabove_row = NULL;
     691             :   }
     692             : 
     693             :   /* Free left_col buffers. */
     694           0 :   if (pbi->mt_yleft_col) {
     695           0 :     for (i = 0; i < mb_rows; ++i) {
     696           0 :       vpx_free(pbi->mt_yleft_col[i]);
     697           0 :       pbi->mt_yleft_col[i] = NULL;
     698             :     }
     699           0 :     vpx_free(pbi->mt_yleft_col);
     700           0 :     pbi->mt_yleft_col = NULL;
     701             :   }
     702             : 
     703           0 :   if (pbi->mt_uleft_col) {
     704           0 :     for (i = 0; i < mb_rows; ++i) {
     705           0 :       vpx_free(pbi->mt_uleft_col[i]);
     706           0 :       pbi->mt_uleft_col[i] = NULL;
     707             :     }
     708           0 :     vpx_free(pbi->mt_uleft_col);
     709           0 :     pbi->mt_uleft_col = NULL;
     710             :   }
     711             : 
     712           0 :   if (pbi->mt_vleft_col) {
     713           0 :     for (i = 0; i < mb_rows; ++i) {
     714           0 :       vpx_free(pbi->mt_vleft_col[i]);
     715           0 :       pbi->mt_vleft_col[i] = NULL;
     716             :     }
     717           0 :     vpx_free(pbi->mt_vleft_col);
     718           0 :     pbi->mt_vleft_col = NULL;
     719             :   }
     720           0 : }
     721             : 
     722           0 : void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) {
     723           0 :   VP8_COMMON *const pc = &pbi->common;
     724             :   int i;
     725             :   int uv_width;
     726             : 
     727           0 :   if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd)) {
     728           0 :     vp8mt_de_alloc_temp_buffers(pbi, prev_mb_rows);
     729             : 
     730             :     /* our internal buffers are always multiples of 16 */
     731           0 :     if ((width & 0xf) != 0) width += 16 - (width & 0xf);
     732             : 
     733           0 :     if (width < 640) {
     734           0 :       pbi->sync_range = 1;
     735           0 :     } else if (width <= 1280) {
     736           0 :       pbi->sync_range = 8;
     737           0 :     } else if (width <= 2560) {
     738           0 :       pbi->sync_range = 16;
     739             :     } else {
     740           0 :       pbi->sync_range = 32;
     741             :     }
     742             : 
     743           0 :     uv_width = width >> 1;
     744             : 
     745             :     /* Allocate mutex */
     746           0 :     CHECK_MEM_ERROR(pbi->pmutex,
     747             :                     vpx_malloc(sizeof(*pbi->pmutex) * pc->mb_rows));
     748           0 :     if (pbi->pmutex) {
     749           0 :       for (i = 0; i < pc->mb_rows; ++i) {
     750           0 :         pthread_mutex_init(&pbi->pmutex[i], NULL);
     751             :       }
     752             :     }
     753             : 
     754             :     /* Allocate an int for each mb row. */
     755           0 :     CALLOC_ARRAY(pbi->mt_current_mb_col, pc->mb_rows);
     756             : 
     757             :     /* Allocate memory for above_row buffers. */
     758           0 :     CALLOC_ARRAY(pbi->mt_yabove_row, pc->mb_rows);
     759           0 :     for (i = 0; i < pc->mb_rows; ++i)
     760           0 :       CHECK_MEM_ERROR(pbi->mt_yabove_row[i],
     761             :                       vpx_memalign(16, sizeof(unsigned char) *
     762             :                                            (width + (VP8BORDERINPIXELS << 1))));
     763             : 
     764           0 :     CALLOC_ARRAY(pbi->mt_uabove_row, pc->mb_rows);
     765           0 :     for (i = 0; i < pc->mb_rows; ++i)
     766           0 :       CHECK_MEM_ERROR(pbi->mt_uabove_row[i],
     767             :                       vpx_memalign(16, sizeof(unsigned char) *
     768             :                                            (uv_width + VP8BORDERINPIXELS)));
     769             : 
     770           0 :     CALLOC_ARRAY(pbi->mt_vabove_row, pc->mb_rows);
     771           0 :     for (i = 0; i < pc->mb_rows; ++i)
     772           0 :       CHECK_MEM_ERROR(pbi->mt_vabove_row[i],
     773             :                       vpx_memalign(16, sizeof(unsigned char) *
     774             :                                            (uv_width + VP8BORDERINPIXELS)));
     775             : 
     776             :     /* Allocate memory for left_col buffers. */
     777           0 :     CALLOC_ARRAY(pbi->mt_yleft_col, pc->mb_rows);
     778           0 :     for (i = 0; i < pc->mb_rows; ++i)
     779           0 :       CHECK_MEM_ERROR(pbi->mt_yleft_col[i],
     780             :                       vpx_calloc(sizeof(unsigned char) * 16, 1));
     781             : 
     782           0 :     CALLOC_ARRAY(pbi->mt_uleft_col, pc->mb_rows);
     783           0 :     for (i = 0; i < pc->mb_rows; ++i)
     784           0 :       CHECK_MEM_ERROR(pbi->mt_uleft_col[i],
     785             :                       vpx_calloc(sizeof(unsigned char) * 8, 1));
     786             : 
     787           0 :     CALLOC_ARRAY(pbi->mt_vleft_col, pc->mb_rows);
     788           0 :     for (i = 0; i < pc->mb_rows; ++i)
     789           0 :       CHECK_MEM_ERROR(pbi->mt_vleft_col[i],
     790             :                       vpx_calloc(sizeof(unsigned char) * 8, 1));
     791             :   }
     792           0 : }
     793             : 
     794           0 : void vp8_decoder_remove_threads(VP8D_COMP *pbi) {
     795             :   /* shutdown MB Decoding thread; */
     796           0 :   if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd)) {
     797             :     int i;
     798           0 :     protected_write(&pbi->mt_mutex, &pbi->b_multithreaded_rd, 0);
     799             : 
     800             :     /* allow all threads to exit */
     801           0 :     for (i = 0; i < pbi->allocated_decoding_thread_count; ++i) {
     802           0 :       sem_post(&pbi->h_event_start_decoding[i]);
     803           0 :       pthread_join(pbi->h_decoding_thread[i], NULL);
     804             :     }
     805             : 
     806           0 :     for (i = 0; i < pbi->allocated_decoding_thread_count; ++i) {
     807           0 :       sem_destroy(&pbi->h_event_start_decoding[i]);
     808             :     }
     809             : 
     810           0 :     if (pbi->allocated_decoding_thread_count) {
     811           0 :       sem_destroy(&pbi->h_event_end_decoding);
     812             :     }
     813             : 
     814           0 :     vpx_free(pbi->h_decoding_thread);
     815           0 :     pbi->h_decoding_thread = NULL;
     816             : 
     817           0 :     vpx_free(pbi->h_event_start_decoding);
     818           0 :     pbi->h_event_start_decoding = NULL;
     819             : 
     820           0 :     vpx_free(pbi->mb_row_di);
     821           0 :     pbi->mb_row_di = NULL;
     822             : 
     823           0 :     vpx_free(pbi->de_thread_data);
     824           0 :     pbi->de_thread_data = NULL;
     825             : 
     826           0 :     vp8mt_de_alloc_temp_buffers(pbi, pbi->common.mb_rows);
     827             :   }
     828           0 :   pthread_mutex_destroy(&pbi->mt_mutex);
     829           0 : }
     830             : 
     831           0 : void vp8mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd) {
     832           0 :   VP8_COMMON *pc = &pbi->common;
     833             :   unsigned int i;
     834             :   int j;
     835             : 
     836           0 :   int filter_level = pc->filter_level;
     837           0 :   YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME];
     838             : 
     839           0 :   if (filter_level) {
     840             :     /* Set above_row buffer to 127 for decoding first MB row */
     841           0 :     memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS - 1, 127,
     842           0 :            yv12_fb_new->y_width + 5);
     843           0 :     memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS >> 1) - 1, 127,
     844           0 :            (yv12_fb_new->y_width >> 1) + 5);
     845           0 :     memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS >> 1) - 1, 127,
     846           0 :            (yv12_fb_new->y_width >> 1) + 5);
     847             : 
     848           0 :     for (j = 1; j < pc->mb_rows; ++j) {
     849           0 :       memset(pbi->mt_yabove_row[j] + VP8BORDERINPIXELS - 1, (unsigned char)129,
     850             :              1);
     851           0 :       memset(pbi->mt_uabove_row[j] + (VP8BORDERINPIXELS >> 1) - 1,
     852             :              (unsigned char)129, 1);
     853           0 :       memset(pbi->mt_vabove_row[j] + (VP8BORDERINPIXELS >> 1) - 1,
     854             :              (unsigned char)129, 1);
     855             :     }
     856             : 
     857             :     /* Set left_col to 129 initially */
     858           0 :     for (j = 0; j < pc->mb_rows; ++j) {
     859           0 :       memset(pbi->mt_yleft_col[j], (unsigned char)129, 16);
     860           0 :       memset(pbi->mt_uleft_col[j], (unsigned char)129, 8);
     861           0 :       memset(pbi->mt_vleft_col[j], (unsigned char)129, 8);
     862             :     }
     863             : 
     864             :     /* Initialize the loop filter for this frame. */
     865           0 :     vp8_loop_filter_frame_init(pc, &pbi->mb, filter_level);
     866             :   } else {
     867           0 :     vp8_setup_intra_recon_top_line(yv12_fb_new);
     868             :   }
     869             : 
     870           0 :   setup_decoding_thread_data(pbi, xd, pbi->mb_row_di,
     871           0 :                              pbi->decoding_thread_count);
     872             : 
     873           0 :   for (i = 0; i < pbi->decoding_thread_count; ++i) {
     874           0 :     sem_post(&pbi->h_event_start_decoding[i]);
     875             :   }
     876             : 
     877           0 :   mt_decode_mb_rows(pbi, xd, 0);
     878             : 
     879           0 :   sem_wait(&pbi->h_event_end_decoding); /* add back for each frame */
     880           0 : }

Generated by: LCOV version 1.13