LCOV - code coverage report
Current view: top level - gfx/skia/skia/src/opts - SkBlend_opts.h (source / functions) Hit Total Coverage
Test: output.info Lines: 0 98 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 13 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * Copyright 2016 Google Inc.
       3             :  *
       4             :  * Use of this source code is governed by a BSD-style license that can be
       5             :  * found in the LICENSE file.
       6             :  */
       7             : 
       8             : /*
       9             : ninja -C out/Release dm nanobench ; and ./out/Release/dm --match Blend_opts ; and ./out/Release/nanobench  --samples 300 --nompd --match LinearSrcOver -q
      10             :  */
      11             : 
      12             : #ifndef SkBlend_opts_DEFINED
      13             : #define SkBlend_opts_DEFINED
      14             : 
      15             : #include "SkNx.h"
      16             : #include "SkPM4fPriv.h"
      17             : 
      18             : #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
      19             :     #include <immintrin.h>
      20             : #endif
      21             : 
      22             : namespace SK_OPTS_NS {
      23             : 
      24           0 : static inline void srcover_srgb_srgb_1(uint32_t* dst, uint32_t src) {
      25           0 :     if (src >= 0xFF000000) {
      26           0 :         *dst = src;
      27           0 :         return;
      28             :     }
      29           0 :     auto d = Sk4f_fromS32(*dst),
      30           0 :          s = Sk4f_fromS32( src);
      31           0 :     *dst = Sk4f_toS32(s + d * (1.0f - s[3]));
      32             : }
      33             : 
      34           0 : static inline void srcover_srgb_srgb_4(uint32_t* dst, const uint32_t* src) {
      35           0 :     srcover_srgb_srgb_1(dst++, *src++);
      36           0 :     srcover_srgb_srgb_1(dst++, *src++);
      37           0 :     srcover_srgb_srgb_1(dst++, *src++);
      38           0 :     srcover_srgb_srgb_1(dst  , *src  );
      39           0 : }
      40             : 
      41             : #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
      42             : 
      43           0 :     static inline __m128i load(const uint32_t* p) {
      44           0 :         return _mm_loadu_si128(reinterpret_cast<const __m128i*>(p));
      45             :     }
      46             : 
      47           0 :     static inline void store(uint32_t* p, __m128i v) {
      48             :         _mm_storeu_si128(reinterpret_cast<__m128i*>(p), v);
      49           0 :     }
      50             : 
      51             :     #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
      52             : 
      53           0 :         static void srcover_srgb_srgb(
      54             :             uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc) {
      55           0 :             const __m128i alphaMask = _mm_set1_epi32(0xFF000000);
      56           0 :             while (ndst > 0) {
      57           0 :                 int count = SkTMin(ndst, nsrc);
      58           0 :                 ndst -= count;
      59           0 :                 const uint32_t* src = srcStart;
      60           0 :                 const uint32_t* end = dst + (count & ~3);
      61           0 :                 ptrdiff_t delta = src - dst;
      62             : 
      63           0 :                 while (dst < end) {
      64           0 :                     __m128i pixels = load(src);
      65           0 :                     if (_mm_testc_si128(pixels, alphaMask)) {
      66           0 :                          uint32_t* start = dst;
      67           0 :                         do {
      68           0 :                             store(dst, pixels);
      69           0 :                             dst += 4;
      70             :                         } while (dst < end
      71           0 :                                  && _mm_testc_si128(pixels = load(dst + delta), alphaMask));
      72           0 :                         src += dst - start;
      73           0 :                     } else if (_mm_testz_si128(pixels, alphaMask)) {
      74           0 :                         do {
      75           0 :                             dst += 4;
      76           0 :                             src += 4;
      77             :                         } while (dst < end
      78           0 :                                  && _mm_testz_si128(pixels = load(src), alphaMask));
      79             :                     } else {
      80           0 :                         uint32_t* start = dst;
      81           0 :                         do {
      82           0 :                             srcover_srgb_srgb_4(dst, dst + delta);
      83           0 :                             dst += 4;
      84             :                         } while (dst < end
      85           0 :                                  && _mm_testnzc_si128(pixels = load(dst + delta), alphaMask));
      86           0 :                         src += dst - start;
      87             :                     }
      88             :                 }
      89             : 
      90           0 :                 count = count & 3;
      91           0 :                 while (count-- > 0) {
      92           0 :                     srcover_srgb_srgb_1(dst++, *src++);
      93             :                 }
      94             :             }
      95           0 :         }
      96             :     #else
      97             :     // SSE2 versions
      98             : 
      99             :         // Note: In the next three comparisons a group of 4 pixels is converted to a group of
     100             :         // "signed" pixels because the sse2 does not have an unsigned comparison.
     101             :         // Make it so that we can use the signed comparison operators by biasing
     102             :         // 0x00xxxxxx to 0x80xxxxxxx which is the smallest values and biasing 0xffxxxxxx to
     103             :         // 0x7fxxxxxx which is the largest set of values.
     104           0 :         static inline bool check_opaque_alphas(__m128i pixels) {
     105           0 :             __m128i signedPixels = _mm_xor_si128(pixels, _mm_set1_epi32(0x80000000));
     106             :             int mask =
     107           0 :                 _mm_movemask_epi8(
     108           0 :                     _mm_cmplt_epi32(signedPixels, _mm_set1_epi32(0x7F000000)));
     109           0 :             return mask == 0;
     110             :         }
     111             : 
     112           0 :         static inline bool check_transparent_alphas(__m128i pixels) {
     113           0 :             __m128i signedPixels = _mm_xor_si128(pixels, _mm_set1_epi32(0x80000000));
     114             :             int mask =
     115           0 :                 _mm_movemask_epi8(
     116           0 :                     _mm_cmpgt_epi32(signedPixels, _mm_set1_epi32(0x80FFFFFF)));
     117           0 :             return mask == 0;
     118             :         }
     119             : 
     120           0 :         static inline bool check_partial_alphas(__m128i pixels) {
     121           0 :             __m128i signedPixels = _mm_xor_si128(pixels, _mm_set1_epi32(0x80000000));
     122           0 :             __m128i opaque       = _mm_cmplt_epi32(signedPixels, _mm_set1_epi32(0x7F000000));
     123           0 :             __m128i transparent  = _mm_cmpgt_epi32(signedPixels, _mm_set1_epi32(0x80FFFFFF));
     124           0 :             int mask             = _mm_movemask_epi8(_mm_xor_si128(opaque, transparent));
     125           0 :             return mask == 0;
     126             :         }
     127             : 
     128           0 :         static void srcover_srgb_srgb(
     129             :             uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc) {
     130           0 :             while (ndst > 0) {
     131           0 :                 int count = SkTMin(ndst, nsrc);
     132           0 :                 ndst -= count;
     133           0 :                 const uint32_t* src = srcStart;
     134           0 :                 const uint32_t* end = dst + (count & ~3);
     135           0 :                 const ptrdiff_t delta = src - dst;
     136             : 
     137           0 :                 __m128i pixels = load(src);
     138           0 :                 do {
     139           0 :                     if (check_opaque_alphas(pixels)) {
     140           0 :                         uint32_t* start = dst;
     141           0 :                         do {
     142           0 :                             store(dst, pixels);
     143           0 :                             dst += 4;
     144           0 :                         } while (dst < end && check_opaque_alphas((pixels = load(dst + delta))));
     145           0 :                         src += dst - start;
     146           0 :                     } else if (check_transparent_alphas(pixels)) {
     147           0 :                         const uint32_t* start = dst;
     148           0 :                         do {
     149           0 :                             dst += 4;
     150           0 :                         } while (dst < end && check_transparent_alphas(pixels = load(dst + delta)));
     151           0 :                         src += dst - start;
     152             :                     } else {
     153           0 :                         const uint32_t* start = dst;
     154           0 :                         do {
     155           0 :                             srcover_srgb_srgb_4(dst, dst + delta);
     156           0 :                             dst += 4;
     157           0 :                         } while (dst < end && check_partial_alphas(pixels = load(dst + delta)));
     158           0 :                         src += dst - start;
     159             :                     }
     160           0 :                 } while (dst < end);
     161             : 
     162           0 :                 count = count & 3;
     163           0 :                 while (count-- > 0) {
     164           0 :                     srcover_srgb_srgb_1(dst++, *src++);
     165             :                 }
     166             :             }
     167           0 :         }
     168             :     #endif
     169             : #else
     170             : 
     171             :     static void srcover_srgb_srgb(
     172             :         uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
     173             :         while (ndst > 0) {
     174             :             int n = SkTMin(ndst, nsrc);
     175             : 
     176             :             for (int i = 0; i < n; i++) {
     177             :                 srcover_srgb_srgb_1(dst++, src[i]);
     178             :             }
     179             :             ndst -= n;
     180             :         }
     181             :     }
     182             : 
     183             : #endif
     184             : 
     185             : }  // namespace SK_OPTS_NS
     186             : 
     187             : #endif//SkBlend_opts_DEFINED

Generated by: LCOV version 1.13