LCOV - code coverage report
Current view: top level - gfx/2d - ssse3-scaler.c (source / functions) Hit Total Coverage
Test: output.info Lines: 0 162 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 7 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * Copyright © 2013 Soren Sandmann Pedersen
       3             :  * Copyright © 2013 Red Hat, Inc.
       4             :  * Copyright © 2016 Mozilla Foundation
       5             :  *
       6             :  * Permission is hereby granted, free of charge, to any person obtaining a
       7             :  * copy of this software and associated documentation files (the "Software"),
       8             :  * to deal in the Software without restriction, including without limitation
       9             :  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      10             :  * and/or sell copies of the Software, and to permit persons to whom the
      11             :  * Software is furnished to do so, subject to the following conditions:
      12             :  *
      13             :  * The above copyright notice and this permission notice (including the next
      14             :  * paragraph) shall be included in all copies or substantial portions of the
      15             :  * Software.
      16             :  *
      17             :  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
      18             :  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
      19             :  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
      20             :  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
      21             :  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
      22             :  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
      23             :  * DEALINGS IN THE SOFTWARE.
      24             :  *
      25             :  * Author: Soren Sandmann (soren.sandmann@gmail.com)
      26             :  *         Jeff Muizelaar (jmuizelaar@mozilla.com)
      27             :  */
      28             : 
      29             : /* This has been adapted from the ssse3 code from pixman. It's currently
      30             :  * a mess as I want to try it out in practice before finalizing the details.
      31             :  */
      32             : 
      33             : #include <stdlib.h>
      34             : #include <mmintrin.h>
      35             : #include <xmmintrin.h>
      36             : #include <emmintrin.h>
      37             : #include <tmmintrin.h>
      38             : #include <stdint.h>
      39             : #include <assert.h>
      40             : 
      41             : typedef int32_t                 pixman_fixed_16_16_t;
      42             : typedef pixman_fixed_16_16_t    pixman_fixed_t;
      43             : #define pixman_fixed_1                  (pixman_int_to_fixed(1))
      44             : #define pixman_fixed_to_int(f)          ((int) ((f) >> 16))
      45             : #define pixman_int_to_fixed(i)          ((pixman_fixed_t) ((i) << 16))
      46             : #define pixman_double_to_fixed(d)       ((pixman_fixed_t) ((d) * 65536.0))
      47             : typedef struct pixman_vector pixman_vector_t;
      48             : 
      49             : typedef int pixman_bool_t;
      50             : typedef int64_t                 pixman_fixed_32_32_t;
      51             : typedef pixman_fixed_32_32_t    pixman_fixed_48_16_t;
      52             : typedef struct { pixman_fixed_48_16_t v[3]; } pixman_vector_48_16_t;
      53             : 
      54             : struct pixman_vector
      55             : {
      56             :     pixman_fixed_t      vector[3];
      57             : };
      58             : typedef struct pixman_transform pixman_transform_t;
      59             : 
      60             : struct pixman_transform
      61             : {
      62             :     pixman_fixed_t      matrix[3][3];
      63             : };
      64             : 
      65             : #ifdef _MSC_VER
      66             : #define force_inline __forceinline
      67             : #else
      68             : #define force_inline __inline__ __attribute__((always_inline))
      69             : #endif
      70             : 
      71             : #define BILINEAR_INTERPOLATION_BITS 6
      72             : 
      73             : static force_inline int
      74             : pixman_fixed_to_bilinear_weight (pixman_fixed_t x)
      75             : {
      76           0 :     return (x >> (16 - BILINEAR_INTERPOLATION_BITS)) &
      77             :                                ((1 << BILINEAR_INTERPOLATION_BITS) - 1);
      78             : }
      79             : 
      80             : static void
      81           0 : pixman_transform_point_31_16_3d (const pixman_transform_t    *t,
      82             :                                  const pixman_vector_48_16_t *v,
      83             :                                  pixman_vector_48_16_t       *result)
      84             : {
      85             :     int i;
      86             :     int64_t tmp[3][2];
      87             : 
      88             :     /* input vector values must have no more than 31 bits (including sign)
      89             :      * in the integer part */
      90           0 :     assert (v->v[0] <   ((pixman_fixed_48_16_t)1 << (30 + 16)));
      91           0 :     assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
      92           0 :     assert (v->v[1] <   ((pixman_fixed_48_16_t)1 << (30 + 16)));
      93           0 :     assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
      94           0 :     assert (v->v[2] <   ((pixman_fixed_48_16_t)1 << (30 + 16)));
      95           0 :     assert (v->v[2] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
      96             : 
      97           0 :     for (i = 0; i < 3; i++)
      98             :     {
      99           0 :         tmp[i][0] = (int64_t)t->matrix[i][0] * (v->v[0] >> 16);
     100           0 :         tmp[i][1] = (int64_t)t->matrix[i][0] * (v->v[0] & 0xFFFF);
     101           0 :         tmp[i][0] += (int64_t)t->matrix[i][1] * (v->v[1] >> 16);
     102           0 :         tmp[i][1] += (int64_t)t->matrix[i][1] * (v->v[1] & 0xFFFF);
     103           0 :         tmp[i][0] += (int64_t)t->matrix[i][2] * (v->v[2] >> 16);
     104           0 :         tmp[i][1] += (int64_t)t->matrix[i][2] * (v->v[2] & 0xFFFF);
     105             :     }
     106             : 
     107           0 :     result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16);
     108           0 :     result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16);
     109           0 :     result->v[2] = tmp[2][0] + ((tmp[2][1] + 0x8000) >> 16);
     110           0 : }
     111             : 
     112             : static pixman_bool_t
     113           0 : pixman_transform_point_3d (const struct pixman_transform *transform,
     114             :                            struct pixman_vector *         vector)
     115             : {
     116             :     pixman_vector_48_16_t tmp;
     117           0 :     tmp.v[0] = vector->vector[0];
     118           0 :     tmp.v[1] = vector->vector[1];
     119           0 :     tmp.v[2] = vector->vector[2];
     120             : 
     121           0 :     pixman_transform_point_31_16_3d (transform, &tmp, &tmp);
     122             : 
     123           0 :     vector->vector[0] = tmp.v[0];
     124           0 :     vector->vector[1] = tmp.v[1];
     125           0 :     vector->vector[2] = tmp.v[2];
     126             : 
     127           0 :     return vector->vector[0] == tmp.v[0] &&
     128           0 :            vector->vector[1] == tmp.v[1] &&
     129           0 :            vector->vector[2] == tmp.v[2];
     130             : }
     131             : 
     132             : 
     133             : struct bits_image_t
     134             : {
     135             :     uint32_t *                 bits;
     136             :     int                        rowstride;
     137             :     pixman_transform_t *transform;
     138             : };
     139             : 
     140             : typedef struct bits_image_t bits_image_t;
     141             : typedef struct {
     142             :     int unused;
     143             : } pixman_iter_info_t;
     144             : 
     145             : typedef struct pixman_iter_t pixman_iter_t;
     146             : typedef void      (* pixman_iter_fini_t)         (pixman_iter_t *iter);
     147             : 
     148             : struct pixman_iter_t
     149             : {
     150             :     int x, y;
     151             :     pixman_iter_fini_t          fini;
     152             :     bits_image_t *image;
     153             :     uint32_t *                  buffer;
     154             :     int width;
     155             :     int height;
     156             :     void *                      data;
     157             : };
     158             : 
     159             : typedef struct
     160             : {
     161             :     int         y;
     162             :     uint64_t *  buffer;
     163             : } line_t;
     164             : 
     165             : typedef struct
     166             : {
     167             :     line_t              lines[2];
     168             :     pixman_fixed_t      y;
     169             :     pixman_fixed_t      x;
     170             :     uint64_t            data[1];
     171             : } bilinear_info_t;
     172             : 
     173             : static void
     174           0 : ssse3_fetch_horizontal (bits_image_t *image, line_t *line,
     175             :                         int y, pixman_fixed_t x, pixman_fixed_t ux, int n)
     176             : {
     177           0 :     uint32_t *bits = image->bits + y * image->rowstride;
     178           0 :     __m128i vx = _mm_set_epi16 (
     179             :         - (x + 1), x, - (x + 1), x,
     180           0 :         - (x + ux + 1), x + ux,  - (x + ux + 1), x + ux);
     181           0 :     __m128i vux = _mm_set_epi16 (
     182             :         - 2 * ux, 2 * ux, - 2 * ux, 2 * ux,
     183             :         - 2 * ux, 2 * ux, - 2 * ux, 2 * ux);
     184           0 :     __m128i vaddc = _mm_set_epi16 (1, 0, 1, 0, 1, 0, 1, 0);
     185           0 :     __m128i *b = (__m128i *)line->buffer;
     186             :     __m128i vrl0, vrl1;
     187             : 
     188           0 :     while ((n -= 2) >= 0)
     189             :     {
     190             :         __m128i vw, vr, s;
     191             : #ifdef HACKY_PADDING
     192             :         if (pixman_fixed_to_int(x + ux) >= image->rowstride) {
     193             :             vrl1 = _mm_setzero_si128();
     194             :             printf("overread 2loop\n");
     195             :          } else {
     196             :                  if (pixman_fixed_to_int(x + ux) < 0)
     197             :                          printf("underflow\n");
     198             :         vrl1 = _mm_loadl_epi64(
     199             :             (__m128i *)(bits + (pixman_fixed_to_int(x + ux) < 0 ? 0 : pixman_fixed_to_int(x + ux))));
     200             :         }
     201             : #else
     202           0 :         vrl1 = _mm_loadl_epi64(
     203           0 :             (__m128i *)(bits + pixman_fixed_to_int(x + ux)));
     204             : #endif
     205             :         /* vrl1: R1, L1 */
     206             : 
     207             :     final_pixel:
     208             : #ifdef HACKY_PADDING
     209             :         vrl0 = _mm_loadl_epi64 (
     210             :             (__m128i *)(bits + (pixman_fixed_to_int (x) < 0 ? 0 : pixman_fixed_to_int (x))));
     211             : #else
     212           0 :         vrl0 = _mm_loadl_epi64 (
     213           0 :             (__m128i *)(bits + pixman_fixed_to_int (x)));
     214             : #endif
     215             :         /* vrl0: R0, L0 */
     216             : 
     217             :         /* The weights are based on vx which is a vector of 
     218             :          *
     219             :          *    - (x + 1), x, - (x + 1), x,
     220             :          *          - (x + ux + 1), x + ux, - (x + ux + 1), x + ux
     221             :          *
     222             :          * so the 16 bit weights end up like this:
     223             :          *
     224             :          *    iw0, w0, iw0, w0, iw1, w1, iw1, w1
     225             :          *
     226             :          * and after shifting and packing, we get these bytes:
     227             :          *
     228             :          *    iw0, w0, iw0, w0, iw1, w1, iw1, w1,
     229             :          *        iw0, w0, iw0, w0, iw1, w1, iw1, w1,
     230             :          *
     231             :          * which means the first and the second input pixel 
     232             :          * have to be interleaved like this:
     233             :          *
     234             :          *    la0, ra0, lr0, rr0, la1, ra1, lr1, rr1,
     235             :          *        lg0, rg0, lb0, rb0, lg1, rg1, lb1, rb1
     236             :          *
     237             :          * before maddubsw can be used.
     238             :          */
     239             : 
     240           0 :         vw = _mm_add_epi16 (
     241             :             vaddc, _mm_srli_epi16 (vx, 16 - BILINEAR_INTERPOLATION_BITS));
     242             :         /* vw: iw0, w0, iw0, w0, iw1, w1, iw1, w1
     243             :          */
     244             : 
     245           0 :         vw = _mm_packus_epi16 (vw, vw);
     246             :         /* vw: iw0, w0, iw0, w0, iw1, w1, iw1, w1,
     247             :          *         iw0, w0, iw0, w0, iw1, w1, iw1, w1
     248             :          */
     249           0 :         vx = _mm_add_epi16 (vx, vux);
     250             : 
     251           0 :         x += 2 * ux;
     252             : 
     253           0 :         vr = _mm_unpacklo_epi16 (vrl1, vrl0);
     254             :         /* vr: rar0, rar1, rgb0, rgb1, lar0, lar1, lgb0, lgb1 */
     255             : 
     256           0 :         s = _mm_shuffle_epi32 (vr, _MM_SHUFFLE (1, 0, 3, 2));
     257             :         /* s:  lar0, lar1, lgb0, lgb1, rar0, rar1, rgb0, rgb1 */
     258             : 
     259           0 :         vr = _mm_unpackhi_epi8 (vr, s);
     260             :         /* vr: la0, ra0, lr0, rr0, la1, ra1, lr1, rr1,
     261             :          *         lg0, rg0, lb0, rb0, lg1, rg1, lb1, rb1
     262             :          */
     263             : 
     264           0 :         vr = _mm_maddubs_epi16 (vr, vw);
     265             : 
     266             :         /* When the weight is 0, the inverse weight is
     267             :          * 128 which can't be represented in a signed byte.
     268             :          * As a result maddubsw computes the following:
     269             :          *
     270             :          *     r = l * -128 + r * 0
     271             :          *
     272             :          * rather than the desired
     273             :          *
     274             :          *     r = l * 128 + r * 0
     275             :          *
     276             :          * We fix this by taking the absolute value of the
     277             :          * result.
     278             :          */
     279             :         // we can drop this if we use lower precision
     280             : 
     281           0 :         vr = _mm_shuffle_epi32 (vr, _MM_SHUFFLE (2, 0, 3, 1));
     282             :         /* vr: A0, R0, A1, R1, G0, B0, G1, B1 */
     283           0 :         _mm_store_si128 (b++, vr);
     284             :     }
     285             : 
     286           0 :     if (n == -1)
     287             :     {
     288           0 :         vrl1 = _mm_setzero_si128();
     289           0 :         goto final_pixel;
     290             :     }
     291             : 
     292           0 :     line->y = y;
     293           0 : }
     294             : 
     295             : // scale a line of destination pixels
     296             : static uint32_t *
     297           0 : ssse3_fetch_bilinear_cover (pixman_iter_t *iter, const uint32_t *mask)
     298             : {
     299             :     pixman_fixed_t fx, ux;
     300           0 :     bilinear_info_t *info = iter->data;
     301             :     line_t *line0, *line1;
     302             :     int y0, y1;
     303             :     int32_t dist_y;
     304             :     __m128i vw, uvw;
     305             :     int i;
     306             : 
     307           0 :     fx = info->x;
     308           0 :     ux = iter->image->transform->matrix[0][0];
     309             : 
     310           0 :     y0 = pixman_fixed_to_int (info->y);
     311           0 :     if (y0 < 0)
     312           0 :         *(volatile char*)0 = 9;
     313           0 :     y1 = y0 + 1;
     314             : 
     315             :     // clamping in y direction
     316           0 :     if (y1 >= iter->height) {
     317           0 :         y1 = iter->height - 1;
     318             :     }
     319             : 
     320           0 :     line0 = &info->lines[y0 & 0x01];
     321           0 :     line1 = &info->lines[y1 & 0x01];
     322             : 
     323           0 :     if (line0->y != y0)
     324             :     {
     325           0 :         ssse3_fetch_horizontal (
     326             :             iter->image, line0, y0, fx, ux, iter->width);
     327             :     }
     328             : 
     329           0 :     if (line1->y != y1)
     330             :     {
     331           0 :         ssse3_fetch_horizontal (
     332             :             iter->image, line1, y1, fx, ux, iter->width);
     333             :     }
     334             : 
     335             : #ifdef PIXMAN_STYLE_INTERPOLATION
     336             :     dist_y = pixman_fixed_to_bilinear_weight (info->y);
     337             :     dist_y <<= (16 - BILINEAR_INTERPOLATION_BITS);
     338             : 
     339             :     vw = _mm_set_epi16 (
     340             :         dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, dist_y);
     341             : 
     342             : #else
     343             :     // setup the weights for the top (vw) and bottom (uvw) lines
     344           0 :     dist_y = pixman_fixed_to_bilinear_weight (info->y);
     345             :     // we use 15 instead of 16 because we need an extra bit to handle when the weights are 0 and 1
     346           0 :     dist_y <<= (15 - BILINEAR_INTERPOLATION_BITS);
     347             : 
     348           0 :     vw = _mm_set_epi16 (
     349             :         dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, dist_y);
     350             : 
     351             : 
     352           0 :     dist_y = (1 << BILINEAR_INTERPOLATION_BITS) - pixman_fixed_to_bilinear_weight (info->y);
     353           0 :     dist_y <<= (15 - BILINEAR_INTERPOLATION_BITS);
     354           0 :     uvw = _mm_set_epi16 (
     355             :         dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, dist_y);
     356             : #endif
     357             : 
     358           0 :     for (i = 0; i + 3 < iter->width; i += 4)
     359             :     {
     360           0 :         __m128i top0 = _mm_load_si128 ((__m128i *)(line0->buffer + i));
     361           0 :         __m128i bot0 = _mm_load_si128 ((__m128i *)(line1->buffer + i));
     362           0 :         __m128i top1 = _mm_load_si128 ((__m128i *)(line0->buffer + i + 2));
     363           0 :         __m128i bot1 = _mm_load_si128 ((__m128i *)(line1->buffer + i + 2));
     364             : #ifdef PIXMAN_STYLE_INTERPOLATION
     365             :         __m128i r0, r1, tmp, p;
     366             : 
     367             :         r0 = _mm_mulhi_epu16 (
     368             :             _mm_sub_epi16 (bot0, top0), vw);
     369             :         tmp = _mm_cmplt_epi16 (bot0, top0);
     370             :         tmp = _mm_and_si128 (tmp, vw);
     371             :         r0 = _mm_sub_epi16 (r0, tmp);
     372             :         r0 = _mm_add_epi16 (r0, top0);
     373             :         r0 = _mm_srli_epi16 (r0, BILINEAR_INTERPOLATION_BITS);
     374             :         /* r0:  A0 R0 A1 R1 G0 B0 G1 B1 */
     375             :         //r0 = _mm_shuffle_epi32 (r0, _MM_SHUFFLE (2, 0, 3, 1));
     376             :         /* r0:  A1 R1 G1 B1 A0 R0 G0 B0 */
     377             : 
     378             :         // tmp = bot1 < top1 ? vw : 0;
     379             :         // r1 = (bot1 - top1)*vw + top1 - tmp
     380             :         // r1 = bot1*vw - vw*top1 + top1 - tmp
     381             :         // r1 = bot1*vw + top1 - vw*top1 - tmp
     382             :         // r1 = bot1*vw + top1*(1 - vw) - tmp
     383             :         r1 = _mm_mulhi_epu16 (
     384             :             _mm_sub_epi16 (bot1, top1), vw);
     385             :         tmp = _mm_cmplt_epi16 (bot1, top1);
     386             :         tmp = _mm_and_si128 (tmp, vw);
     387             :         r1 = _mm_sub_epi16 (r1, tmp);
     388             :         r1 = _mm_add_epi16 (r1, top1);
     389             :         r1 = _mm_srli_epi16 (r1, BILINEAR_INTERPOLATION_BITS);
     390             :         //r1 = _mm_shuffle_epi32 (r1, _MM_SHUFFLE (2, 0, 3, 1));
     391             :         /* r1: A3 R3 G3 B3 A2 R2 G2 B2 */
     392             : #else
     393             :         __m128i r0, r1, p;
     394           0 :         top0 = _mm_mulhi_epu16 (top0, uvw);
     395           0 :         bot0 = _mm_mulhi_epu16 (bot0, vw);
     396           0 :         r0 = _mm_add_epi16(top0, bot0);
     397           0 :         r0 = _mm_srli_epi16(r0, BILINEAR_INTERPOLATION_BITS-1);
     398             : 
     399           0 :         top1 = _mm_mulhi_epu16 (top1, uvw);
     400           0 :         bot1 = _mm_mulhi_epu16 (bot1, vw);
     401           0 :         r1 = _mm_add_epi16(top1, bot1);
     402           0 :         r1 = _mm_srli_epi16(r1, BILINEAR_INTERPOLATION_BITS-1);
     403             : #endif
     404             : 
     405           0 :         p = _mm_packus_epi16 (r0, r1);
     406           0 :         _mm_storeu_si128 ((__m128i *)(iter->buffer + i), p);
     407             :     }
     408             : 
     409           0 :     while (i < iter->width)
     410             :     {
     411           0 :         __m128i top0 = _mm_load_si128 ((__m128i *)(line0->buffer + i));
     412           0 :         __m128i bot0 = _mm_load_si128 ((__m128i *)(line1->buffer + i));
     413             : 
     414             : #ifdef PIXMAN_STYLE_INTERPOLATION
     415             :         __m128i r0, tmp, p;
     416             :         r0 = _mm_mulhi_epu16 (
     417             :             _mm_sub_epi16 (bot0, top0), vw);
     418             :         tmp = _mm_cmplt_epi16 (bot0, top0);
     419             :         tmp = _mm_and_si128 (tmp, vw);
     420             :         r0 = _mm_sub_epi16 (r0, tmp);
     421             :         r0 = _mm_add_epi16 (r0, top0);
     422             :         r0 = _mm_srli_epi16 (r0, BILINEAR_INTERPOLATION_BITS);
     423             :         /* r0:  A0 R0 A1 R1 G0 B0 G1 B1 */
     424             :         r0 = _mm_shuffle_epi32 (r0, _MM_SHUFFLE (2, 0, 3, 1));
     425             :         /* r0:  A1 R1 G1 B1 A0 R0 G0 B0 */
     426             : #else
     427             :         __m128i r0, p;
     428           0 :         top0 = _mm_mulhi_epu16 (top0, uvw);
     429           0 :         bot0 = _mm_mulhi_epu16 (bot0, vw);
     430           0 :         r0 = _mm_add_epi16(top0, bot0);
     431           0 :         r0 = _mm_srli_epi16(r0, BILINEAR_INTERPOLATION_BITS-1);
     432             : #endif
     433             : 
     434           0 :         p = _mm_packus_epi16 (r0, r0);
     435             : 
     436           0 :         if (iter->width - i == 1)
     437             :         {
     438           0 :             *(uint32_t *)(iter->buffer + i) = _mm_cvtsi128_si32 (p);
     439           0 :             i++;
     440             :         }
     441             :         else
     442             :         {
     443           0 :             _mm_storel_epi64 ((__m128i *)(iter->buffer + i), p);
     444           0 :             i += 2;
     445             :         }
     446             :     }
     447             : 
     448           0 :     info->y += iter->image->transform->matrix[1][1];
     449             : 
     450           0 :     return iter->buffer;
     451             : }
     452             : 
     453             : static void
     454           0 : ssse3_bilinear_cover_iter_fini (pixman_iter_t *iter)
     455             : {
     456           0 :     free (iter->data);
     457           0 : }
     458             : 
     459             : static void
     460           0 : ssse3_bilinear_cover_iter_init (pixman_iter_t *iter)
     461             : {
     462           0 :     int width = iter->width;
     463             :     bilinear_info_t *info;
     464             :     pixman_vector_t v;
     465             : 
     466             :     /* Reference point is the center of the pixel */
     467           0 :     v.vector[0] = pixman_int_to_fixed (iter->x) + pixman_fixed_1 / 2;
     468           0 :     v.vector[1] = pixman_int_to_fixed (iter->y) + pixman_fixed_1 / 2;
     469           0 :     v.vector[2] = pixman_fixed_1;
     470             : 
     471           0 :     if (!pixman_transform_point_3d (iter->image->transform, &v))
     472           0 :         goto fail;
     473             : 
     474           0 :     info = malloc (sizeof (*info) + (2 * width - 1) * sizeof (uint64_t) + 64);
     475           0 :     if (!info)
     476           0 :         goto fail;
     477             : 
     478           0 :     info->x = v.vector[0] - pixman_fixed_1 / 2;
     479           0 :     info->y = v.vector[1] - pixman_fixed_1 / 2;
     480             : 
     481             : #define ALIGN(addr)                                                     \
     482             :     ((void *)((((uintptr_t)(addr)) + 15) & (~15)))
     483             : 
     484             :     /* It is safe to set the y coordinates to -1 initially
     485             :      * because COVER_CLIP_BILINEAR ensures that we will only
     486             :      * be asked to fetch lines in the [0, height) interval
     487             :      */
     488           0 :     info->lines[0].y = -1;
     489           0 :     info->lines[0].buffer = ALIGN (&(info->data[0]));
     490           0 :     info->lines[1].y = -1;
     491           0 :     info->lines[1].buffer = ALIGN (info->lines[0].buffer + width);
     492             : 
     493           0 :     iter->fini = ssse3_bilinear_cover_iter_fini;
     494             : 
     495           0 :     iter->data = info;
     496           0 :     return;
     497             : 
     498             : fail:
     499             :     /* Something went wrong, either a bad matrix or OOM; in such cases,
     500             :      * we don't guarantee any particular rendering.
     501             :      */
     502           0 :     iter->fini = NULL;
     503             : }
     504             : 
     505             : /* scale the src from src_width/height to dest_width/height drawn
     506             :  * into the rectangle x,y width,height
     507             :  * src_stride and dst_stride are 4 byte units */
     508           0 : void ssse3_scale_data(uint32_t *src, int src_width, int src_height, int src_stride,
     509             :                       uint32_t *dest, int dest_width, int dest_height,
     510             :                       int dest_stride,
     511             :                       int x, int y,
     512             :                       int width, int height)
     513             : {
     514             :     //XXX: assert(src_width > 1)
     515           0 :     pixman_transform_t transform = {
     516             :         { { pixman_fixed_1, 0, 0 },
     517             :             { 0, pixman_fixed_1, 0 },
     518             :             { 0, 0, pixman_fixed_1 } }
     519             :     };
     520           0 :     double width_scale = ((double)src_width)/dest_width;
     521           0 :     double height_scale = ((double)src_height)/dest_height;
     522             : #define AVOID_PADDING
     523             : #ifdef AVOID_PADDING
     524             :     // scale up by enough that we don't read outside of the bounds of the source surface
     525             :     // currently this is required to avoid reading out of bounds.
     526           0 :     if (width_scale < 1) {
     527           0 :         width_scale = (double)(src_width-1)/dest_width;
     528           0 :         transform.matrix[0][2] = pixman_fixed_1/2;
     529             :     }
     530           0 :     if (height_scale < 1) {
     531           0 :         height_scale = (double)(src_height-1)/dest_height;
     532           0 :         transform.matrix[1][2] = pixman_fixed_1/2;
     533             :     }
     534             : #endif
     535           0 :     transform.matrix[0][0] = pixman_double_to_fixed(width_scale);
     536           0 :     transform.matrix[1][1] = pixman_double_to_fixed(height_scale);
     537           0 :     transform.matrix[2][2] = pixman_fixed_1;
     538             : 
     539             :     bits_image_t image;
     540           0 :     image.bits = src;
     541           0 :     image.transform = &transform;
     542           0 :     image.rowstride = src_stride;
     543             : 
     544             :     pixman_iter_t iter;
     545           0 :     iter.image = &image;
     546           0 :     iter.x = x;
     547           0 :     iter.y = y;
     548           0 :     iter.width = width;
     549           0 :     iter.height = src_height;
     550           0 :     iter.buffer = dest;
     551           0 :     iter.data = NULL;
     552             : 
     553           0 :     ssse3_bilinear_cover_iter_init(&iter);
     554           0 :     if (iter.data) {
     555           0 :         for (int iy = 0; iy < height; iy++) {
     556           0 :             ssse3_fetch_bilinear_cover(&iter, NULL);
     557           0 :             iter.buffer += dest_stride;
     558             :         }
     559           0 :         ssse3_bilinear_cover_iter_fini(&iter);
     560             :     }
     561           0 : }

Generated by: LCOV version 1.13