LCOV - code coverage report
Current view: top level - gfx/skia/skia/src/opts - Sk4px_SSE2.h (source / functions) Hit Total Coverage
Test: output.info Lines: 27 49 55.1 %
Date: 2017-07-14 16:53:18 Functions: 13 17 76.5 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * Copyright 2015 Google Inc.
       3             :  *
       4             :  * Use of this source code is governed by a BSD-style license that can be
       5             :  * found in the LICENSE file.
       6             :  */
       7             : 
       8             : namespace { // See Sk4px.h
       9             : 
      10         102 : inline Sk4px Sk4px::DupPMColor(SkPMColor px) { return Sk16b(_mm_set1_epi32(px)); }
      11             : 
      12        3340 : inline Sk4px Sk4px::Load4(const SkPMColor px[4]) {
      13        6680 :     return Sk16b(_mm_loadu_si128((const __m128i*)px));
      14             : }
      15         238 : inline Sk4px Sk4px::Load2(const SkPMColor px[2]) {
      16         476 :     return Sk16b(_mm_loadl_epi64((const __m128i*)px));
      17             : }
      18         258 : inline Sk4px Sk4px::Load1(const SkPMColor px[1]) { return Sk16b(_mm_cvtsi32_si128(*px)); }
      19             : 
      20        6680 : inline void Sk4px::store4(SkPMColor px[4]) const { _mm_storeu_si128((__m128i*)px, this->fVec); }
      21         476 : inline void Sk4px::store2(SkPMColor px[2]) const { _mm_storel_epi64((__m128i*)px, this->fVec); }
      22         172 : inline void Sk4px::store1(SkPMColor px[1]) const { *px = _mm_cvtsi128_si32(this->fVec); }
      23             : 
      24       21984 : inline Sk4px::Wide Sk4px::widenLo() const {
      25       65952 :     return Sk16h(_mm_unpacklo_epi8(this->fVec, _mm_setzero_si128()),
      26       87936 :                  _mm_unpackhi_epi8(this->fVec, _mm_setzero_si128()));
      27             : }
      28             : 
      29             : inline Sk4px::Wide Sk4px::widenHi() const {
      30             :     return Sk16h(_mm_unpacklo_epi8(_mm_setzero_si128(), this->fVec),
      31             :                  _mm_unpackhi_epi8(_mm_setzero_si128(), this->fVec));
      32             : }
      33             : 
      34           0 : inline Sk4px::Wide Sk4px::widenLoHi() const {
      35           0 :     return Sk16h(_mm_unpacklo_epi8(this->fVec, this->fVec),
      36           0 :                  _mm_unpackhi_epi8(this->fVec, this->fVec));
      37             : }
      38             : 
      39        7328 : inline Sk4px::Wide Sk4px::mulWiden(const Sk16b& other) const {
      40        7328 :     return this->widenLo() * Sk4px(other).widenLo();
      41             : }
      42             : 
      43        7328 : inline Sk4px Sk4px::Wide::addNarrowHi(const Sk16h& other) const {
      44        7328 :     Sk4px::Wide r = (*this + other) >> 8;
      45       21984 :     return Sk4px(_mm_packus_epi16(r.fLo.fVec, r.fHi.fVec));
      46             : }
      47             : 
      48           0 : inline Sk4px Sk4px::Wide::div255() const {
      49             :     // (x + 127) / 255 == ((x+128) * 257)>>16,
      50             :     // and _mm_mulhi_epu16 makes the (_ * 257)>>16 part very convenient.
      51           0 :     const __m128i _128 = _mm_set1_epi16(128),
      52           0 :                   _257 = _mm_set1_epi16(257);
      53           0 :     return Sk4px(_mm_packus_epi16(_mm_mulhi_epu16(_mm_add_epi16(fLo.fVec, _128), _257),
      54           0 :                                   _mm_mulhi_epu16(_mm_add_epi16(fHi.fVec, _128), _257)));
      55             : }
      56             : 
      57             : // Load4Alphas and Load2Alphas use possibly-unaligned loads (SkAlpha[] -> uint16_t or uint32_t).
      58             : // These are safe on x86, often with no speed penalty.
      59             : 
      60             : #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
      61         828 :     inline Sk4px Sk4px::alphas() const {
      62             :         static_assert(SK_A32_SHIFT == 24, "Intel's always little-endian.");
      63         828 :         __m128i splat = _mm_set_epi8(15,15,15,15, 11,11,11,11, 7,7,7,7, 3,3,3,3);
      64        2484 :         return Sk16b(_mm_shuffle_epi8(this->fVec, splat));
      65             :     }
      66             : 
      67        3578 :     inline Sk4px Sk4px::Load4Alphas(const SkAlpha a[4]) {
      68        3578 :         uint32_t as = *(const uint32_t*)a;
      69        3578 :         __m128i splat = _mm_set_epi8(3,3,3,3, 2,2,2,2, 1,1,1,1, 0,0,0,0);
      70       14312 :         return Sk16b(_mm_shuffle_epi8(_mm_cvtsi32_si128(as), splat));
      71             :     }
      72             : #else
      73           0 :     inline Sk4px Sk4px::alphas() const {
      74             :         static_assert(SK_A32_SHIFT == 24, "Intel's always little-endian.");
      75             :         // We exploit that A >= rgb for any premul pixel.
      76           0 :         __m128i as = fVec;                             // 3xxx 2xxx 1xxx 0xxx
      77           0 :         as = _mm_max_epu8(as, _mm_srli_epi32(as,  8)); // 33xx 22xx 11xx 00xx
      78           0 :         as = _mm_max_epu8(as, _mm_srli_epi32(as, 16)); // 3333 2222 1111 0000
      79           0 :         return Sk16b(as);
      80             :     }
      81             : 
      82           0 :     inline Sk4px Sk4px::Load4Alphas(const SkAlpha a[4]) {
      83           0 :         __m128i as = _mm_cvtsi32_si128(*(const uint32_t*)a);  // ____ ____ ____ 3210
      84           0 :         as = _mm_unpacklo_epi8 (as, as);                      // ____ ____ 3322 1100
      85           0 :         as = _mm_unpacklo_epi16(as, as);                      // 3333 2222 1111 0000
      86           0 :         return Sk16b(as);
      87             :     }
      88             : #endif
      89             : 
      90         238 : inline Sk4px Sk4px::Load2Alphas(const SkAlpha a[2]) {
      91         238 :     uint32_t as = *(const uint16_t*)a;   // Aa -> Aa00
      92         238 :     return Load4Alphas((const SkAlpha*)&as);
      93             : }
      94             : 
      95           0 : inline Sk4px Sk4px::zeroColors() const {
      96           0 :     return Sk16b(_mm_and_si128(_mm_set1_epi32(0xFF << SK_A32_SHIFT), this->fVec));
      97             : }
      98             : 
      99           0 : inline Sk4px Sk4px::zeroAlphas() const {
     100             :     // andnot(a,b) == ~a & b
     101           0 :     return Sk16b(_mm_andnot_si128(_mm_set1_epi32(0xFF << SK_A32_SHIFT), this->fVec));
     102             : }
     103             : 
     104             : }  // namespace

Generated by: LCOV version 1.13