LCOV - code coverage report
Current view: top level - media/libyuv/libyuv/source - rotate_gcc.cc (source / functions) Hit Total Coverage
Test: output.info Lines: 0 16 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 3 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  *  Copyright 2015 The LibYuv Project Authors. All rights reserved.
       3             :  *
       4             :  *  Use of this source code is governed by a BSD-style license
       5             :  *  that can be found in the LICENSE file in the root of the source
       6             :  *  tree. An additional intellectual property rights grant can be found
       7             :  *  in the file PATENTS. All contributing project authors may
       8             :  *  be found in the AUTHORS file in the root of the source tree.
       9             :  */
      10             : 
      11             : #include "libyuv/row.h"
      12             : #include "libyuv/rotate_row.h"
      13             : 
      14             : #ifdef __cplusplus
      15             : namespace libyuv {
      16             : extern "C" {
      17             : #endif
      18             : 
      19             : // This module is for GCC x86 and x64.
      20             : #if !defined(LIBYUV_DISABLE_X86) && \
      21             :     (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
      22             : 
      23             : // Transpose 8x8. 32 or 64 bit, but not NaCL for 64 bit.
      24             : #if defined(HAS_TRANSPOSEWX8_SSSE3)
      25           0 : void TransposeWx8_SSSE3(const uint8* src,
      26             :                         int src_stride,
      27             :                         uint8* dst,
      28             :                         int dst_stride,
      29             :                         int width) {
      30             :   asm volatile(
      31             :       // Read in the data from the source pointer.
      32             :       // First round of bit swap.
      33             :       LABELALIGN
      34             :       "1:                                          \n"
      35             :       "movq       (%0),%%xmm0                      \n"
      36             :       "movq       (%0,%3),%%xmm1                   \n"
      37             :       "lea        (%0,%3,2),%0                     \n"
      38             :       "punpcklbw  %%xmm1,%%xmm0                    \n"
      39             :       "movq       (%0),%%xmm2                      \n"
      40             :       "movdqa     %%xmm0,%%xmm1                    \n"
      41             :       "palignr    $0x8,%%xmm1,%%xmm1               \n"
      42             :       "movq       (%0,%3),%%xmm3                   \n"
      43             :       "lea        (%0,%3,2),%0                     \n"
      44             :       "punpcklbw  %%xmm3,%%xmm2                    \n"
      45             :       "movdqa     %%xmm2,%%xmm3                    \n"
      46             :       "movq       (%0),%%xmm4                      \n"
      47             :       "palignr    $0x8,%%xmm3,%%xmm3               \n"
      48             :       "movq       (%0,%3),%%xmm5                   \n"
      49             :       "lea        (%0,%3,2),%0                     \n"
      50             :       "punpcklbw  %%xmm5,%%xmm4                    \n"
      51             :       "movdqa     %%xmm4,%%xmm5                    \n"
      52             :       "movq       (%0),%%xmm6                      \n"
      53             :       "palignr    $0x8,%%xmm5,%%xmm5               \n"
      54             :       "movq       (%0,%3),%%xmm7                   \n"
      55             :       "lea        (%0,%3,2),%0                     \n"
      56             :       "punpcklbw  %%xmm7,%%xmm6                    \n"
      57             :       "neg        %3                               \n"
      58             :       "movdqa     %%xmm6,%%xmm7                    \n"
      59             :       "lea        0x8(%0,%3,8),%0                  \n"
      60             :       "palignr    $0x8,%%xmm7,%%xmm7               \n"
      61             :       "neg        %3                               \n"
      62             :       // Second round of bit swap.
      63             :       "punpcklwd  %%xmm2,%%xmm0                    \n"
      64             :       "punpcklwd  %%xmm3,%%xmm1                    \n"
      65             :       "movdqa     %%xmm0,%%xmm2                    \n"
      66             :       "movdqa     %%xmm1,%%xmm3                    \n"
      67             :       "palignr    $0x8,%%xmm2,%%xmm2               \n"
      68             :       "palignr    $0x8,%%xmm3,%%xmm3               \n"
      69             :       "punpcklwd  %%xmm6,%%xmm4                    \n"
      70             :       "punpcklwd  %%xmm7,%%xmm5                    \n"
      71             :       "movdqa     %%xmm4,%%xmm6                    \n"
      72             :       "movdqa     %%xmm5,%%xmm7                    \n"
      73             :       "palignr    $0x8,%%xmm6,%%xmm6               \n"
      74             :       "palignr    $0x8,%%xmm7,%%xmm7               \n"
      75             :       // Third round of bit swap.
      76             :       // Write to the destination pointer.
      77             :       "punpckldq  %%xmm4,%%xmm0                    \n"
      78             :       "movq       %%xmm0,(%1)                      \n"
      79             :       "movdqa     %%xmm0,%%xmm4                    \n"
      80             :       "palignr    $0x8,%%xmm4,%%xmm4               \n"
      81             :       "movq       %%xmm4,(%1,%4)                   \n"
      82             :       "lea        (%1,%4,2),%1                     \n"
      83             :       "punpckldq  %%xmm6,%%xmm2                    \n"
      84             :       "movdqa     %%xmm2,%%xmm6                    \n"
      85             :       "movq       %%xmm2,(%1)                      \n"
      86             :       "palignr    $0x8,%%xmm6,%%xmm6               \n"
      87             :       "punpckldq  %%xmm5,%%xmm1                    \n"
      88             :       "movq       %%xmm6,(%1,%4)                   \n"
      89             :       "lea        (%1,%4,2),%1                     \n"
      90             :       "movdqa     %%xmm1,%%xmm5                    \n"
      91             :       "movq       %%xmm1,(%1)                      \n"
      92             :       "palignr    $0x8,%%xmm5,%%xmm5               \n"
      93             :       "movq       %%xmm5,(%1,%4)                   \n"
      94             :       "lea        (%1,%4,2),%1                     \n"
      95             :       "punpckldq  %%xmm7,%%xmm3                    \n"
      96             :       "movq       %%xmm3,(%1)                      \n"
      97             :       "movdqa     %%xmm3,%%xmm7                    \n"
      98             :       "palignr    $0x8,%%xmm7,%%xmm7               \n"
      99             :       "sub        $0x8,%2                          \n"
     100             :       "movq       %%xmm7,(%1,%4)                   \n"
     101             :       "lea        (%1,%4,2),%1                     \n"
     102             :       "jg         1b                               \n"
     103             :       : "+r"(src),                    // %0
     104             :         "+r"(dst),                    // %1
     105             :         "+r"(width)                   // %2
     106           0 :       : "r"((intptr_t)(src_stride)),  // %3
     107           0 :         "r"((intptr_t)(dst_stride))   // %4
     108             :       : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
     109           0 :         "xmm7");
     110           0 : }
     111             : #endif  // defined(HAS_TRANSPOSEWX8_SSSE3)
     112             : 
     113             : // Transpose 16x8. 64 bit
     114             : #if defined(HAS_TRANSPOSEWX8_FAST_SSSE3)
     115           0 : void TransposeWx8_Fast_SSSE3(const uint8* src,
     116             :                              int src_stride,
     117             :                              uint8* dst,
     118             :                              int dst_stride,
     119             :                              int width) {
     120             :   asm volatile(
     121             :       // Read in the data from the source pointer.
     122             :       // First round of bit swap.
     123             :       LABELALIGN
     124             :       "1:                                          \n"
     125             :       "movdqu     (%0),%%xmm0                      \n"
     126             :       "movdqu     (%0,%3),%%xmm1                   \n"
     127             :       "lea        (%0,%3,2),%0                     \n"
     128             :       "movdqa     %%xmm0,%%xmm8                    \n"
     129             :       "punpcklbw  %%xmm1,%%xmm0                    \n"
     130             :       "punpckhbw  %%xmm1,%%xmm8                    \n"
     131             :       "movdqu     (%0),%%xmm2                      \n"
     132             :       "movdqa     %%xmm0,%%xmm1                    \n"
     133             :       "movdqa     %%xmm8,%%xmm9                    \n"
     134             :       "palignr    $0x8,%%xmm1,%%xmm1               \n"
     135             :       "palignr    $0x8,%%xmm9,%%xmm9               \n"
     136             :       "movdqu     (%0,%3),%%xmm3                   \n"
     137             :       "lea        (%0,%3,2),%0                     \n"
     138             :       "movdqa     %%xmm2,%%xmm10                   \n"
     139             :       "punpcklbw  %%xmm3,%%xmm2                    \n"
     140             :       "punpckhbw  %%xmm3,%%xmm10                   \n"
     141             :       "movdqa     %%xmm2,%%xmm3                    \n"
     142             :       "movdqa     %%xmm10,%%xmm11                  \n"
     143             :       "movdqu     (%0),%%xmm4                      \n"
     144             :       "palignr    $0x8,%%xmm3,%%xmm3               \n"
     145             :       "palignr    $0x8,%%xmm11,%%xmm11             \n"
     146             :       "movdqu     (%0,%3),%%xmm5                   \n"
     147             :       "lea        (%0,%3,2),%0                     \n"
     148             :       "movdqa     %%xmm4,%%xmm12                   \n"
     149             :       "punpcklbw  %%xmm5,%%xmm4                    \n"
     150             :       "punpckhbw  %%xmm5,%%xmm12                   \n"
     151             :       "movdqa     %%xmm4,%%xmm5                    \n"
     152             :       "movdqa     %%xmm12,%%xmm13                  \n"
     153             :       "movdqu     (%0),%%xmm6                      \n"
     154             :       "palignr    $0x8,%%xmm5,%%xmm5               \n"
     155             :       "palignr    $0x8,%%xmm13,%%xmm13             \n"
     156             :       "movdqu     (%0,%3),%%xmm7                   \n"
     157             :       "lea        (%0,%3,2),%0                     \n"
     158             :       "movdqa     %%xmm6,%%xmm14                   \n"
     159             :       "punpcklbw  %%xmm7,%%xmm6                    \n"
     160             :       "punpckhbw  %%xmm7,%%xmm14                   \n"
     161             :       "neg        %3                               \n"
     162             :       "movdqa     %%xmm6,%%xmm7                    \n"
     163             :       "movdqa     %%xmm14,%%xmm15                  \n"
     164             :       "lea        0x10(%0,%3,8),%0                 \n"
     165             :       "palignr    $0x8,%%xmm7,%%xmm7               \n"
     166             :       "palignr    $0x8,%%xmm15,%%xmm15             \n"
     167             :       "neg        %3                               \n"
     168             :       // Second round of bit swap.
     169             :       "punpcklwd  %%xmm2,%%xmm0                    \n"
     170             :       "punpcklwd  %%xmm3,%%xmm1                    \n"
     171             :       "movdqa     %%xmm0,%%xmm2                    \n"
     172             :       "movdqa     %%xmm1,%%xmm3                    \n"
     173             :       "palignr    $0x8,%%xmm2,%%xmm2               \n"
     174             :       "palignr    $0x8,%%xmm3,%%xmm3               \n"
     175             :       "punpcklwd  %%xmm6,%%xmm4                    \n"
     176             :       "punpcklwd  %%xmm7,%%xmm5                    \n"
     177             :       "movdqa     %%xmm4,%%xmm6                    \n"
     178             :       "movdqa     %%xmm5,%%xmm7                    \n"
     179             :       "palignr    $0x8,%%xmm6,%%xmm6               \n"
     180             :       "palignr    $0x8,%%xmm7,%%xmm7               \n"
     181             :       "punpcklwd  %%xmm10,%%xmm8                   \n"
     182             :       "punpcklwd  %%xmm11,%%xmm9                   \n"
     183             :       "movdqa     %%xmm8,%%xmm10                   \n"
     184             :       "movdqa     %%xmm9,%%xmm11                   \n"
     185             :       "palignr    $0x8,%%xmm10,%%xmm10             \n"
     186             :       "palignr    $0x8,%%xmm11,%%xmm11             \n"
     187             :       "punpcklwd  %%xmm14,%%xmm12                  \n"
     188             :       "punpcklwd  %%xmm15,%%xmm13                  \n"
     189             :       "movdqa     %%xmm12,%%xmm14                  \n"
     190             :       "movdqa     %%xmm13,%%xmm15                  \n"
     191             :       "palignr    $0x8,%%xmm14,%%xmm14             \n"
     192             :       "palignr    $0x8,%%xmm15,%%xmm15             \n"
     193             :       // Third round of bit swap.
     194             :       // Write to the destination pointer.
     195             :       "punpckldq  %%xmm4,%%xmm0                    \n"
     196             :       "movq       %%xmm0,(%1)                      \n"
     197             :       "movdqa     %%xmm0,%%xmm4                    \n"
     198             :       "palignr    $0x8,%%xmm4,%%xmm4               \n"
     199             :       "movq       %%xmm4,(%1,%4)                   \n"
     200             :       "lea        (%1,%4,2),%1                     \n"
     201             :       "punpckldq  %%xmm6,%%xmm2                    \n"
     202             :       "movdqa     %%xmm2,%%xmm6                    \n"
     203             :       "movq       %%xmm2,(%1)                      \n"
     204             :       "palignr    $0x8,%%xmm6,%%xmm6               \n"
     205             :       "punpckldq  %%xmm5,%%xmm1                    \n"
     206             :       "movq       %%xmm6,(%1,%4)                   \n"
     207             :       "lea        (%1,%4,2),%1                     \n"
     208             :       "movdqa     %%xmm1,%%xmm5                    \n"
     209             :       "movq       %%xmm1,(%1)                      \n"
     210             :       "palignr    $0x8,%%xmm5,%%xmm5               \n"
     211             :       "movq       %%xmm5,(%1,%4)                   \n"
     212             :       "lea        (%1,%4,2),%1                     \n"
     213             :       "punpckldq  %%xmm7,%%xmm3                    \n"
     214             :       "movq       %%xmm3,(%1)                      \n"
     215             :       "movdqa     %%xmm3,%%xmm7                    \n"
     216             :       "palignr    $0x8,%%xmm7,%%xmm7               \n"
     217             :       "movq       %%xmm7,(%1,%4)                   \n"
     218             :       "lea        (%1,%4,2),%1                     \n"
     219             :       "punpckldq  %%xmm12,%%xmm8                   \n"
     220             :       "movq       %%xmm8,(%1)                      \n"
     221             :       "movdqa     %%xmm8,%%xmm12                   \n"
     222             :       "palignr    $0x8,%%xmm12,%%xmm12             \n"
     223             :       "movq       %%xmm12,(%1,%4)                  \n"
     224             :       "lea        (%1,%4,2),%1                     \n"
     225             :       "punpckldq  %%xmm14,%%xmm10                  \n"
     226             :       "movdqa     %%xmm10,%%xmm14                  \n"
     227             :       "movq       %%xmm10,(%1)                     \n"
     228             :       "palignr    $0x8,%%xmm14,%%xmm14             \n"
     229             :       "punpckldq  %%xmm13,%%xmm9                   \n"
     230             :       "movq       %%xmm14,(%1,%4)                  \n"
     231             :       "lea        (%1,%4,2),%1                     \n"
     232             :       "movdqa     %%xmm9,%%xmm13                   \n"
     233             :       "movq       %%xmm9,(%1)                      \n"
     234             :       "palignr    $0x8,%%xmm13,%%xmm13             \n"
     235             :       "movq       %%xmm13,(%1,%4)                  \n"
     236             :       "lea        (%1,%4,2),%1                     \n"
     237             :       "punpckldq  %%xmm15,%%xmm11                  \n"
     238             :       "movq       %%xmm11,(%1)                     \n"
     239             :       "movdqa     %%xmm11,%%xmm15                  \n"
     240             :       "palignr    $0x8,%%xmm15,%%xmm15             \n"
     241             :       "sub        $0x10,%2                         \n"
     242             :       "movq       %%xmm15,(%1,%4)                  \n"
     243             :       "lea        (%1,%4,2),%1                     \n"
     244             :       "jg         1b                               \n"
     245             :       : "+r"(src),                    // %0
     246             :         "+r"(dst),                    // %1
     247             :         "+r"(width)                   // %2
     248           0 :       : "r"((intptr_t)(src_stride)),  // %3
     249           0 :         "r"((intptr_t)(dst_stride))   // %4
     250             :       : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
     251             :         "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14",
     252           0 :         "xmm15");
     253           0 : }
     254             : #endif  // defined(HAS_TRANSPOSEWX8_FAST_SSSE3)
     255             : 
     256             : // Transpose UV 8x8.  64 bit.
     257             : #if defined(HAS_TRANSPOSEUVWX8_SSE2)
     258           0 : void TransposeUVWx8_SSE2(const uint8* src,
     259             :                          int src_stride,
     260             :                          uint8* dst_a,
     261             :                          int dst_stride_a,
     262             :                          uint8* dst_b,
     263             :                          int dst_stride_b,
     264             :                          int width) {
     265             :   asm volatile(
     266             :       // Read in the data from the source pointer.
     267             :       // First round of bit swap.
     268             :       LABELALIGN
     269             :       "1:                                          \n"
     270             :       "movdqu     (%0),%%xmm0                      \n"
     271             :       "movdqu     (%0,%4),%%xmm1                   \n"
     272             :       "lea        (%0,%4,2),%0                     \n"
     273             :       "movdqa     %%xmm0,%%xmm8                    \n"
     274             :       "punpcklbw  %%xmm1,%%xmm0                    \n"
     275             :       "punpckhbw  %%xmm1,%%xmm8                    \n"
     276             :       "movdqa     %%xmm8,%%xmm1                    \n"
     277             :       "movdqu     (%0),%%xmm2                      \n"
     278             :       "movdqu     (%0,%4),%%xmm3                   \n"
     279             :       "lea        (%0,%4,2),%0                     \n"
     280             :       "movdqa     %%xmm2,%%xmm8                    \n"
     281             :       "punpcklbw  %%xmm3,%%xmm2                    \n"
     282             :       "punpckhbw  %%xmm3,%%xmm8                    \n"
     283             :       "movdqa     %%xmm8,%%xmm3                    \n"
     284             :       "movdqu     (%0),%%xmm4                      \n"
     285             :       "movdqu     (%0,%4),%%xmm5                   \n"
     286             :       "lea        (%0,%4,2),%0                     \n"
     287             :       "movdqa     %%xmm4,%%xmm8                    \n"
     288             :       "punpcklbw  %%xmm5,%%xmm4                    \n"
     289             :       "punpckhbw  %%xmm5,%%xmm8                    \n"
     290             :       "movdqa     %%xmm8,%%xmm5                    \n"
     291             :       "movdqu     (%0),%%xmm6                      \n"
     292             :       "movdqu     (%0,%4),%%xmm7                   \n"
     293             :       "lea        (%0,%4,2),%0                     \n"
     294             :       "movdqa     %%xmm6,%%xmm8                    \n"
     295             :       "punpcklbw  %%xmm7,%%xmm6                    \n"
     296             :       "neg        %4                               \n"
     297             :       "lea        0x10(%0,%4,8),%0                 \n"
     298             :       "punpckhbw  %%xmm7,%%xmm8                    \n"
     299             :       "movdqa     %%xmm8,%%xmm7                    \n"
     300             :       "neg        %4                               \n"
     301             :       // Second round of bit swap.
     302             :       "movdqa     %%xmm0,%%xmm8                    \n"
     303             :       "movdqa     %%xmm1,%%xmm9                    \n"
     304             :       "punpckhwd  %%xmm2,%%xmm8                    \n"
     305             :       "punpckhwd  %%xmm3,%%xmm9                    \n"
     306             :       "punpcklwd  %%xmm2,%%xmm0                    \n"
     307             :       "punpcklwd  %%xmm3,%%xmm1                    \n"
     308             :       "movdqa     %%xmm8,%%xmm2                    \n"
     309             :       "movdqa     %%xmm9,%%xmm3                    \n"
     310             :       "movdqa     %%xmm4,%%xmm8                    \n"
     311             :       "movdqa     %%xmm5,%%xmm9                    \n"
     312             :       "punpckhwd  %%xmm6,%%xmm8                    \n"
     313             :       "punpckhwd  %%xmm7,%%xmm9                    \n"
     314             :       "punpcklwd  %%xmm6,%%xmm4                    \n"
     315             :       "punpcklwd  %%xmm7,%%xmm5                    \n"
     316             :       "movdqa     %%xmm8,%%xmm6                    \n"
     317             :       "movdqa     %%xmm9,%%xmm7                    \n"
     318             :       // Third round of bit swap.
     319             :       // Write to the destination pointer.
     320             :       "movdqa     %%xmm0,%%xmm8                    \n"
     321             :       "punpckldq  %%xmm4,%%xmm0                    \n"
     322             :       "movlpd     %%xmm0,(%1)                      \n"  // Write back U channel
     323             :       "movhpd     %%xmm0,(%2)                      \n"  // Write back V channel
     324             :       "punpckhdq  %%xmm4,%%xmm8                    \n"
     325             :       "movlpd     %%xmm8,(%1,%5)                   \n"
     326             :       "lea        (%1,%5,2),%1                     \n"
     327             :       "movhpd     %%xmm8,(%2,%6)                   \n"
     328             :       "lea        (%2,%6,2),%2                     \n"
     329             :       "movdqa     %%xmm2,%%xmm8                    \n"
     330             :       "punpckldq  %%xmm6,%%xmm2                    \n"
     331             :       "movlpd     %%xmm2,(%1)                      \n"
     332             :       "movhpd     %%xmm2,(%2)                      \n"
     333             :       "punpckhdq  %%xmm6,%%xmm8                    \n"
     334             :       "movlpd     %%xmm8,(%1,%5)                   \n"
     335             :       "lea        (%1,%5,2),%1                     \n"
     336             :       "movhpd     %%xmm8,(%2,%6)                   \n"
     337             :       "lea        (%2,%6,2),%2                     \n"
     338             :       "movdqa     %%xmm1,%%xmm8                    \n"
     339             :       "punpckldq  %%xmm5,%%xmm1                    \n"
     340             :       "movlpd     %%xmm1,(%1)                      \n"
     341             :       "movhpd     %%xmm1,(%2)                      \n"
     342             :       "punpckhdq  %%xmm5,%%xmm8                    \n"
     343             :       "movlpd     %%xmm8,(%1,%5)                   \n"
     344             :       "lea        (%1,%5,2),%1                     \n"
     345             :       "movhpd     %%xmm8,(%2,%6)                   \n"
     346             :       "lea        (%2,%6,2),%2                     \n"
     347             :       "movdqa     %%xmm3,%%xmm8                    \n"
     348             :       "punpckldq  %%xmm7,%%xmm3                    \n"
     349             :       "movlpd     %%xmm3,(%1)                      \n"
     350             :       "movhpd     %%xmm3,(%2)                      \n"
     351             :       "punpckhdq  %%xmm7,%%xmm8                    \n"
     352             :       "sub        $0x8,%3                          \n"
     353             :       "movlpd     %%xmm8,(%1,%5)                   \n"
     354             :       "lea        (%1,%5,2),%1                     \n"
     355             :       "movhpd     %%xmm8,(%2,%6)                   \n"
     356             :       "lea        (%2,%6,2),%2                     \n"
     357             :       "jg         1b                               \n"
     358             :       : "+r"(src),                      // %0
     359             :         "+r"(dst_a),                    // %1
     360             :         "+r"(dst_b),                    // %2
     361             :         "+r"(width)                     // %3
     362           0 :       : "r"((intptr_t)(src_stride)),    // %4
     363           0 :         "r"((intptr_t)(dst_stride_a)),  // %5
     364           0 :         "r"((intptr_t)(dst_stride_b))   // %6
     365             :       : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
     366           0 :         "xmm7", "xmm8", "xmm9");
     367           0 : }
     368             : #endif  // defined(HAS_TRANSPOSEUVWX8_SSE2)
     369             : #endif  // defined(__x86_64__) || defined(__i386__)
     370             : 
     371             : #ifdef __cplusplus
     372             : }  // extern "C"
     373             : }  // namespace libyuv
     374             : #endif

Generated by: LCOV version 1.13