LCOV - code coverage report
Current view: top level - media/libtheora/lib/x86 - mmxstate.c (source / functions) Hit Total Coverage
Test: output.info Lines: 0 85 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 5 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /********************************************************************
       2             :  *                                                                  *
       3             :  * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
       4             :  * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
       5             :  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
       6             :  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
       7             :  *                                                                  *
       8             :  * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
       9             :  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
      10             :  *                                                                  *
      11             :  ********************************************************************
      12             : 
      13             :   function:
      14             :     last mod: $Id: mmxstate.c 17563 2010-10-25 17:40:54Z tterribe $
      15             : 
      16             :  ********************************************************************/
      17             : 
      18             : /*MMX acceleration of complete fragment reconstruction algorithm.
      19             :   Originally written by Rudolf Marek.*/
      20             : #include <string.h>
      21             : #include "x86int.h"
      22             : #include "mmxloop.h"
      23             : 
      24             : #if defined(OC_X86_ASM)
      25             : 
      26           0 : void oc_state_frag_recon_mmx(const oc_theora_state *_state,ptrdiff_t _fragi,
      27             :  int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){
      28             :   unsigned char *dst;
      29             :   ptrdiff_t      frag_buf_off;
      30             :   int            ystride;
      31             :   int            refi;
      32             :   /*Apply the inverse transform.*/
      33             :   /*Special case only having a DC component.*/
      34           0 :   if(_last_zzi<2){
      35             :     /*Note that this value must be unsigned, to keep the __asm__ block from
      36             :        sign-extending it when it puts it in a register.*/
      37             :     ogg_uint16_t p;
      38             :     int          i;
      39             :     /*We round this dequant product (and not any of the others) because there's
      40             :        no iDCT rounding.*/
      41           0 :     p=(ogg_int16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5);
      42             :     /*Fill _dct_coeffs with p.*/
      43           0 :     __asm__ __volatile__(
      44             :       /*mm0=0000 0000 0000 AAAA*/
      45             :       "movd %[p],%%mm0\n\t"
      46             :       /*mm0=0000 0000 AAAA AAAA*/
      47             :       "punpcklwd %%mm0,%%mm0\n\t"
      48             :       /*mm0=AAAA AAAA AAAA AAAA*/
      49             :       "punpckldq %%mm0,%%mm0\n\t"
      50             :       :
      51           0 :       :[p]"r"((unsigned)p)
      52             :     );
      53           0 :     for(i=0;i<4;i++){
      54           0 :       __asm__ __volatile__(
      55             :         "movq %%mm0,"OC_MEM_OFFS(0x00,y)"\n\t"
      56             :         "movq %%mm0,"OC_MEM_OFFS(0x08,y)"\n\t"
      57             :         "movq %%mm0,"OC_MEM_OFFS(0x10,y)"\n\t"
      58             :         "movq %%mm0,"OC_MEM_OFFS(0x18,y)"\n\t"
      59           0 :         :[y]"=m"OC_ARRAY_OPERAND(ogg_int16_t,_dct_coeffs+64+16*i,16)
      60             :       );
      61             :     }
      62             :   }
      63             :   else{
      64             :     /*Dequantize the DC coefficient.*/
      65           0 :     _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant);
      66           0 :     oc_idct8x8(_state,_dct_coeffs+64,_dct_coeffs,_last_zzi);
      67             :   }
      68             :   /*Fill in the target buffer.*/
      69           0 :   frag_buf_off=_state->frag_buf_offs[_fragi];
      70           0 :   refi=_state->frags[_fragi].refi;
      71           0 :   ystride=_state->ref_ystride[_pli];
      72           0 :   dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off;
      73           0 :   if(refi==OC_FRAME_SELF)oc_frag_recon_intra_mmx(dst,ystride,_dct_coeffs+64);
      74             :   else{
      75             :     const unsigned char *ref;
      76             :     int                  mvoffsets[2];
      77           0 :     ref=_state->ref_frame_data[refi]+frag_buf_off;
      78           0 :     if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
      79           0 :      _state->frag_mvs[_fragi])>1){
      80           0 :       oc_frag_recon_inter2_mmx(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,
      81           0 :        _dct_coeffs+64);
      82             :     }
      83           0 :     else oc_frag_recon_inter_mmx(dst,ref+mvoffsets[0],ystride,_dct_coeffs+64);
      84             :   }
      85           0 : }
      86             : 
      87             : /*We copy these entire function to inline the actual MMX routines so that we
      88             :    use only a single indirect call.*/
      89             : 
      90           0 : void oc_loop_filter_init_mmx(signed char _bv[256],int _flimit){
      91           0 :   memset(_bv,_flimit,8);
      92           0 : }
      93             : 
      94             : /*Apply the loop filter to a given set of fragment rows in the given plane.
      95             :   The filter may be run on the bottom edge, affecting pixels in the next row of
      96             :    fragments, so this row also needs to be available.
      97             :   _bv:        The bounding values array.
      98             :   _refi:      The index of the frame buffer to filter.
      99             :   _pli:       The color plane to filter.
     100             :   _fragy0:    The Y coordinate of the first fragment row to filter.
     101             :   _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/
     102           0 : void oc_state_loop_filter_frag_rows_mmx(const oc_theora_state *_state,
     103             :  signed char _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end){
     104             :   OC_ALIGN8(unsigned char   ll[8]);
     105             :   const oc_fragment_plane *fplane;
     106             :   const oc_fragment       *frags;
     107             :   const ptrdiff_t         *frag_buf_offs;
     108             :   unsigned char           *ref_frame_data;
     109             :   ptrdiff_t                fragi_top;
     110             :   ptrdiff_t                fragi_bot;
     111             :   ptrdiff_t                fragi0;
     112             :   ptrdiff_t                fragi0_end;
     113             :   int                      ystride;
     114             :   int                      nhfrags;
     115           0 :   memset(ll,_state->loop_filter_limits[_state->qis[0]],sizeof(ll));
     116           0 :   fplane=_state->fplanes+_pli;
     117           0 :   nhfrags=fplane->nhfrags;
     118           0 :   fragi_top=fplane->froffset;
     119           0 :   fragi_bot=fragi_top+fplane->nfrags;
     120           0 :   fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags;
     121           0 :   fragi0_end=fragi0+(_fragy_end-_fragy0)*(ptrdiff_t)nhfrags;
     122           0 :   ystride=_state->ref_ystride[_pli];
     123           0 :   frags=_state->frags;
     124           0 :   frag_buf_offs=_state->frag_buf_offs;
     125           0 :   ref_frame_data=_state->ref_frame_data[_refi];
     126             :   /*The following loops are constructed somewhat non-intuitively on purpose.
     127             :     The main idea is: if a block boundary has at least one coded fragment on
     128             :      it, the filter is applied to it.
     129             :     However, the order that the filters are applied in matters, and VP3 chose
     130             :      the somewhat strange ordering used below.*/
     131           0 :   while(fragi0<fragi0_end){
     132             :     ptrdiff_t fragi;
     133             :     ptrdiff_t fragi_end;
     134           0 :     fragi=fragi0;
     135           0 :     fragi_end=fragi+nhfrags;
     136           0 :     while(fragi<fragi_end){
     137           0 :       if(frags[fragi].coded){
     138             :         unsigned char *ref;
     139           0 :         ref=ref_frame_data+frag_buf_offs[fragi];
     140           0 :         if(fragi>fragi0){
     141           0 :           OC_LOOP_FILTER_H(OC_LOOP_FILTER8_MMX,ref,ystride,ll);
     142             :         }
     143           0 :         if(fragi0>fragi_top){
     144           0 :           OC_LOOP_FILTER_V(OC_LOOP_FILTER8_MMX,ref,ystride,ll);
     145             :         }
     146           0 :         if(fragi+1<fragi_end&&!frags[fragi+1].coded){
     147           0 :           OC_LOOP_FILTER_H(OC_LOOP_FILTER8_MMX,ref+8,ystride,ll);
     148             :         }
     149           0 :         if(fragi+nhfrags<fragi_bot&&!frags[fragi+nhfrags].coded){
     150           0 :           OC_LOOP_FILTER_V(OC_LOOP_FILTER8_MMX,ref+(ystride<<3),ystride,ll);
     151             :         }
     152             :       }
     153           0 :       fragi++;
     154             :     }
     155           0 :     fragi0+=nhfrags;
     156             :   }
     157           0 : }
     158             : 
     159           0 : void oc_loop_filter_init_mmxext(signed char _bv[256],int _flimit){
     160           0 :   memset(_bv,~(_flimit<<1),8);
     161           0 : }
     162             : 
     163             : /*Apply the loop filter to a given set of fragment rows in the given plane.
     164             :   The filter may be run on the bottom edge, affecting pixels in the next row of
     165             :    fragments, so this row also needs to be available.
     166             :   _bv:        The bounding values array.
     167             :   _refi:      The index of the frame buffer to filter.
     168             :   _pli:       The color plane to filter.
     169             :   _fragy0:    The Y coordinate of the first fragment row to filter.
     170             :   _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/
     171           0 : void oc_state_loop_filter_frag_rows_mmxext(const oc_theora_state *_state,
     172             :  signed char _bv[256],int _refi,int _pli,int _fragy0,int _fragy_end){
     173             :   const oc_fragment_plane *fplane;
     174             :   const oc_fragment       *frags;
     175             :   const ptrdiff_t         *frag_buf_offs;
     176             :   unsigned char           *ref_frame_data;
     177             :   ptrdiff_t                fragi_top;
     178             :   ptrdiff_t                fragi_bot;
     179             :   ptrdiff_t                fragi0;
     180             :   ptrdiff_t                fragi0_end;
     181             :   int                      ystride;
     182             :   int                      nhfrags;
     183           0 :   fplane=_state->fplanes+_pli;
     184           0 :   nhfrags=fplane->nhfrags;
     185           0 :   fragi_top=fplane->froffset;
     186           0 :   fragi_bot=fragi_top+fplane->nfrags;
     187           0 :   fragi0=fragi_top+_fragy0*(ptrdiff_t)nhfrags;
     188           0 :   fragi0_end=fragi_top+_fragy_end*(ptrdiff_t)nhfrags;
     189           0 :   ystride=_state->ref_ystride[_pli];
     190           0 :   frags=_state->frags;
     191           0 :   frag_buf_offs=_state->frag_buf_offs;
     192           0 :   ref_frame_data=_state->ref_frame_data[_refi];
     193             :   /*The following loops are constructed somewhat non-intuitively on purpose.
     194             :     The main idea is: if a block boundary has at least one coded fragment on
     195             :      it, the filter is applied to it.
     196             :     However, the order that the filters are applied in matters, and VP3 chose
     197             :      the somewhat strange ordering used below.*/
     198           0 :   while(fragi0<fragi0_end){
     199             :     ptrdiff_t fragi;
     200             :     ptrdiff_t fragi_end;
     201           0 :     fragi=fragi0;
     202           0 :     fragi_end=fragi+nhfrags;
     203           0 :     while(fragi<fragi_end){
     204           0 :       if(frags[fragi].coded){
     205             :         unsigned char *ref;
     206           0 :         ref=ref_frame_data+frag_buf_offs[fragi];
     207           0 :         if(fragi>fragi0){
     208           0 :           OC_LOOP_FILTER_H(OC_LOOP_FILTER8_MMXEXT,ref,ystride,_bv);
     209             :         }
     210           0 :         if(fragi0>fragi_top){
     211           0 :           OC_LOOP_FILTER_V(OC_LOOP_FILTER8_MMXEXT,ref,ystride,_bv);
     212             :         }
     213           0 :         if(fragi+1<fragi_end&&!frags[fragi+1].coded){
     214           0 :           OC_LOOP_FILTER_H(OC_LOOP_FILTER8_MMXEXT,ref+8,ystride,_bv);
     215             :         }
     216           0 :         if(fragi+nhfrags<fragi_bot&&!frags[fragi+nhfrags].coded){
     217           0 :           OC_LOOP_FILTER_V(OC_LOOP_FILTER8_MMXEXT,ref+(ystride<<3),ystride,_bv);
     218             :         }
     219             :       }
     220           0 :       fragi++;
     221             :     }
     222           0 :     fragi0+=nhfrags;
     223             :   }
     224           0 : }
     225             : 
     226             : #endif

Generated by: LCOV version 1.13