Line data Source code
1 : /* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
2 : /*
3 : * Copyright © 2000 SuSE, Inc.
4 : * Copyright © 2007 Red Hat, Inc.
5 : *
6 : * Permission to use, copy, modify, distribute, and sell this software and its
7 : * documentation for any purpose is hereby granted without fee, provided that
8 : * the above copyright notice appear in all copies and that both that
9 : * copyright notice and this permission notice appear in supporting
10 : * documentation, and that the name of SuSE not be used in advertising or
11 : * publicity pertaining to distribution of the software without specific,
12 : * written prior permission. SuSE makes no representations about the
13 : * suitability of this software for any purpose. It is provided "as is"
14 : * without express or implied warranty.
15 : *
16 : * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
17 : * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
18 : * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 : * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
20 : * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
21 : * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22 : *
23 : * Author: Keith Packard, SuSE, Inc.
24 : */
25 :
26 : #ifndef PIXMAN_FAST_PATH_H__
27 : #define PIXMAN_FAST_PATH_H__
28 :
29 : #include "pixman-private.h"
30 :
31 : #define PIXMAN_REPEAT_COVER -1
32 :
33 : /* Flags describing input parameters to fast path macro template.
34 : * Turning on some flag values may indicate that
35 : * "some property X is available so template can use this" or
36 : * "some property X should be handled by template".
37 : *
38 : * FLAG_HAVE_SOLID_MASK
39 : * Input mask is solid so template should handle this.
40 : *
41 : * FLAG_HAVE_NON_SOLID_MASK
42 : * Input mask is bits mask so template should handle this.
43 : *
44 : * FLAG_HAVE_SOLID_MASK and FLAG_HAVE_NON_SOLID_MASK are mutually
45 : * exclusive. (It's not allowed to turn both flags on)
46 : */
47 : #define FLAG_NONE (0)
48 : #define FLAG_HAVE_SOLID_MASK (1 << 1)
49 : #define FLAG_HAVE_NON_SOLID_MASK (1 << 2)
50 :
51 : /* To avoid too short repeated scanline function calls, extend source
52 : * scanlines having width less than below constant value.
53 : */
54 : #define REPEAT_NORMAL_MIN_WIDTH 64
55 :
56 : static force_inline pixman_bool_t
57 : repeat (pixman_repeat_t repeat, int *c, int size)
58 : {
59 0 : if (repeat == PIXMAN_REPEAT_NONE)
60 : {
61 0 : if (*c < 0 || *c >= size)
62 0 : return FALSE;
63 : }
64 0 : else if (repeat == PIXMAN_REPEAT_NORMAL)
65 : {
66 0 : while (*c >= size)
67 0 : *c -= size;
68 0 : while (*c < 0)
69 0 : *c += size;
70 : }
71 0 : else if (repeat == PIXMAN_REPEAT_PAD)
72 : {
73 0 : *c = CLIP (*c, 0, size - 1);
74 : }
75 : else /* REFLECT */
76 : {
77 0 : *c = MOD (*c, size * 2);
78 0 : if (*c >= size)
79 0 : *c = size * 2 - *c - 1;
80 : }
81 0 : return TRUE;
82 : }
83 :
84 : static force_inline int
85 : pixman_fixed_to_bilinear_weight (pixman_fixed_t x)
86 : {
87 0 : return (x >> (16 - BILINEAR_INTERPOLATION_BITS)) &
88 : ((1 << BILINEAR_INTERPOLATION_BITS) - 1);
89 : }
90 :
91 : #if BILINEAR_INTERPOLATION_BITS <= 4
92 : /* Inspired by Filter_32_opaque from Skia */
93 : static force_inline uint32_t
94 : bilinear_interpolation (uint32_t tl, uint32_t tr,
95 : uint32_t bl, uint32_t br,
96 : int distx, int disty)
97 : {
98 : int distxy, distxiy, distixy, distixiy;
99 : uint32_t lo, hi;
100 :
101 : distx <<= (4 - BILINEAR_INTERPOLATION_BITS);
102 : disty <<= (4 - BILINEAR_INTERPOLATION_BITS);
103 :
104 : distxy = distx * disty;
105 : distxiy = (distx << 4) - distxy; /* distx * (16 - disty) */
106 : distixy = (disty << 4) - distxy; /* disty * (16 - distx) */
107 : distixiy =
108 : 16 * 16 - (disty << 4) -
109 : (distx << 4) + distxy; /* (16 - distx) * (16 - disty) */
110 :
111 : lo = (tl & 0xff00ff) * distixiy;
112 : hi = ((tl >> 8) & 0xff00ff) * distixiy;
113 :
114 : lo += (tr & 0xff00ff) * distxiy;
115 : hi += ((tr >> 8) & 0xff00ff) * distxiy;
116 :
117 : lo += (bl & 0xff00ff) * distixy;
118 : hi += ((bl >> 8) & 0xff00ff) * distixy;
119 :
120 : lo += (br & 0xff00ff) * distxy;
121 : hi += ((br >> 8) & 0xff00ff) * distxy;
122 :
123 : return ((lo >> 8) & 0xff00ff) | (hi & ~0xff00ff);
124 : }
125 :
126 : #else
127 : #if SIZEOF_LONG > 4
128 :
129 : static force_inline uint32_t
130 : bilinear_interpolation (uint32_t tl, uint32_t tr,
131 : uint32_t bl, uint32_t br,
132 : int distx, int disty)
133 : {
134 : uint64_t distxy, distxiy, distixy, distixiy;
135 : uint64_t tl64, tr64, bl64, br64;
136 : uint64_t f, r;
137 :
138 : distx <<= (8 - BILINEAR_INTERPOLATION_BITS);
139 : disty <<= (8 - BILINEAR_INTERPOLATION_BITS);
140 :
141 : distxy = distx * disty;
142 : distxiy = distx * (256 - disty);
143 : distixy = (256 - distx) * disty;
144 : distixiy = (256 - distx) * (256 - disty);
145 :
146 : /* Alpha and Blue */
147 : tl64 = tl & 0xff0000ff;
148 : tr64 = tr & 0xff0000ff;
149 : bl64 = bl & 0xff0000ff;
150 : br64 = br & 0xff0000ff;
151 :
152 : f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
153 : r = f & 0x0000ff0000ff0000ull;
154 :
155 : /* Red and Green */
156 : tl64 = tl;
157 : tl64 = ((tl64 << 16) & 0x000000ff00000000ull) | (tl64 & 0x0000ff00ull);
158 :
159 : tr64 = tr;
160 : tr64 = ((tr64 << 16) & 0x000000ff00000000ull) | (tr64 & 0x0000ff00ull);
161 :
162 : bl64 = bl;
163 : bl64 = ((bl64 << 16) & 0x000000ff00000000ull) | (bl64 & 0x0000ff00ull);
164 :
165 : br64 = br;
166 : br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull);
167 :
168 : f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
169 : r |= ((f >> 16) & 0x000000ff00000000ull) | (f & 0xff000000ull);
170 :
171 : return (uint32_t)(r >> 16);
172 : }
173 :
174 : #else
175 :
176 : #ifdef LOW_QUALITY_INTERPOLATION
177 : /* Based on Filter_32_opaque_portable from Skia */
178 : static force_inline uint32_t
179 : bilinear_interpolation(uint32_t a00, uint32_t a01,
180 : uint32_t a10, uint32_t a11,
181 : int x, int y)
182 : {
183 : int xy = x * y;
184 : static const uint32_t mask = 0xff00ff;
185 :
186 : int scale = 256 - 16*y - 16*x + xy;
187 : uint32_t lo = (a00 & mask) * scale;
188 : uint32_t hi = ((a00 >> 8) & mask) * scale;
189 :
190 : scale = 16*x - xy;
191 : lo += (a01 & mask) * scale;
192 : hi += ((a01 >> 8) & mask) * scale;
193 :
194 : scale = 16*y - xy;
195 : lo += (a10 & mask) * scale;
196 : hi += ((a10 >> 8) & mask) * scale;
197 :
198 : lo += (a11 & mask) * xy;
199 : hi += ((a11 >> 8) & mask) * xy;
200 :
201 : return ((lo >> 8) & mask) | (hi & ~mask);
202 : }
203 : #else
204 : static force_inline uint32_t
205 : bilinear_interpolation (uint32_t tl, uint32_t tr,
206 : uint32_t bl, uint32_t br,
207 : int distx, int disty)
208 : {
209 : int distxy, distxiy, distixy, distixiy;
210 : uint32_t f, r;
211 :
212 0 : distx <<= (8 - BILINEAR_INTERPOLATION_BITS);
213 0 : disty <<= (8 - BILINEAR_INTERPOLATION_BITS);
214 :
215 0 : distxy = distx * disty;
216 0 : distxiy = (distx << 8) - distxy; /* distx * (256 - disty) */
217 0 : distixy = (disty << 8) - distxy; /* disty * (256 - distx) */
218 0 : distixiy =
219 0 : 256 * 256 - (disty << 8) -
220 0 : (distx << 8) + distxy; /* (256 - distx) * (256 - disty) */
221 :
222 : /* Blue */
223 0 : r = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
224 0 : + (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy;
225 :
226 : /* Green */
227 0 : f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
228 0 : + (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy;
229 0 : r |= f & 0xff000000;
230 :
231 0 : tl >>= 16;
232 0 : tr >>= 16;
233 0 : bl >>= 16;
234 0 : br >>= 16;
235 0 : r >>= 16;
236 :
237 : /* Red */
238 0 : f = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
239 0 : + (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy;
240 0 : r |= f & 0x00ff0000;
241 :
242 : /* Alpha */
243 0 : f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
244 0 : + (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy;
245 0 : r |= f & 0xff000000;
246 :
247 0 : return r;
248 : }
249 : #endif
250 : #endif
251 : #endif // BILINEAR_INTERPOLATION_BITS <= 4
252 :
253 : /*
254 : * For each scanline fetched from source image with PAD repeat:
255 : * - calculate how many pixels need to be padded on the left side
256 : * - calculate how many pixels need to be padded on the right side
257 : * - update width to only count pixels which are fetched from the image
258 : * All this information is returned via 'width', 'left_pad', 'right_pad'
259 : * arguments. The code is assuming that 'unit_x' is positive.
260 : *
261 : * Note: 64-bit math is used in order to avoid potential overflows, which
262 : * is probably excessive in many cases. This particular function
263 : * may need its own correctness test and performance tuning.
264 : */
265 : static force_inline void
266 : pad_repeat_get_scanline_bounds (int32_t source_image_width,
267 : pixman_fixed_t vx,
268 : pixman_fixed_t unit_x,
269 : int32_t * width,
270 : int32_t * left_pad,
271 : int32_t * right_pad)
272 : {
273 0 : int64_t max_vx = (int64_t) source_image_width << 16;
274 : int64_t tmp;
275 0 : if (vx < 0)
276 : {
277 0 : tmp = ((int64_t) unit_x - 1 - vx) / unit_x;
278 0 : if (tmp > *width)
279 : {
280 0 : *left_pad = *width;
281 0 : *width = 0;
282 : }
283 : else
284 : {
285 0 : *left_pad = (int32_t) tmp;
286 0 : *width -= (int32_t) tmp;
287 : }
288 : }
289 : else
290 : {
291 0 : *left_pad = 0;
292 : }
293 0 : tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad;
294 0 : if (tmp < 0)
295 : {
296 0 : *right_pad = *width;
297 0 : *width = 0;
298 : }
299 0 : else if (tmp >= *width)
300 : {
301 0 : *right_pad = 0;
302 : }
303 : else
304 : {
305 0 : *right_pad = *width - (int32_t) tmp;
306 0 : *width = (int32_t) tmp;
307 : }
308 : }
309 :
310 : /* A macroified version of specialized nearest scalers for some
311 : * common 8888 and 565 formats. It supports SRC and OVER ops.
312 : *
313 : * There are two repeat versions, one that handles repeat normal,
314 : * and one without repeat handling that only works if the src region
315 : * used is completely covered by the pre-repeated source samples.
316 : *
317 : * The loops are unrolled to process two pixels per iteration for better
318 : * performance on most CPU architectures (superscalar processors
319 : * can issue several operations simultaneously, other processors can hide
320 : * instructions latencies by pipelining operations). Unrolling more
321 : * does not make much sense because the compiler will start running out
322 : * of spare registers soon.
323 : */
324 :
325 : #define GET_8888_ALPHA(s) ((s) >> 24)
326 : /* This is not actually used since we don't have an OVER with
327 : 565 source, but it is needed to build. */
328 : #define GET_0565_ALPHA(s) 0xff
329 : #define GET_x888_ALPHA(s) 0xff
330 :
331 : #define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT, \
332 : src_type_t, dst_type_t, OP, repeat_mode) \
333 : static force_inline void \
334 : scanline_func_name (dst_type_t *dst, \
335 : const src_type_t *src, \
336 : int32_t w, \
337 : pixman_fixed_t vx, \
338 : pixman_fixed_t unit_x, \
339 : pixman_fixed_t src_width_fixed, \
340 : pixman_bool_t fully_transparent_src) \
341 : { \
342 : uint32_t d; \
343 : src_type_t s1, s2; \
344 : uint8_t a1, a2; \
345 : int x1, x2; \
346 : \
347 : if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER && fully_transparent_src) \
348 : return; \
349 : \
350 : if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER) \
351 : abort(); \
352 : \
353 : while ((w -= 2) >= 0) \
354 : { \
355 : x1 = pixman_fixed_to_int (vx); \
356 : vx += unit_x; \
357 : if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
358 : { \
359 : /* This works because we know that unit_x is positive */ \
360 : while (vx >= 0) \
361 : vx -= src_width_fixed; \
362 : } \
363 : s1 = *(src + x1); \
364 : \
365 : x2 = pixman_fixed_to_int (vx); \
366 : vx += unit_x; \
367 : if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
368 : { \
369 : /* This works because we know that unit_x is positive */ \
370 : while (vx >= 0) \
371 : vx -= src_width_fixed; \
372 : } \
373 : s2 = *(src + x2); \
374 : \
375 : if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \
376 : { \
377 : a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \
378 : a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2); \
379 : \
380 : if (a1 == 0xff) \
381 : { \
382 : *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \
383 : } \
384 : else if (s1) \
385 : { \
386 : d = convert_ ## DST_FORMAT ## _to_8888 (*dst); \
387 : s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1); \
388 : a1 ^= 0xff; \
389 : UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \
390 : *dst = convert_8888_to_ ## DST_FORMAT (d); \
391 : } \
392 : dst++; \
393 : \
394 : if (a2 == 0xff) \
395 : { \
396 : *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2); \
397 : } \
398 : else if (s2) \
399 : { \
400 : d = convert_## DST_FORMAT ## _to_8888 (*dst); \
401 : s2 = convert_## SRC_FORMAT ## _to_8888 (s2); \
402 : a2 ^= 0xff; \
403 : UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2); \
404 : *dst = convert_8888_to_ ## DST_FORMAT (d); \
405 : } \
406 : dst++; \
407 : } \
408 : else /* PIXMAN_OP_SRC */ \
409 : { \
410 : *dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \
411 : *dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2); \
412 : } \
413 : } \
414 : \
415 : if (w & 1) \
416 : { \
417 : x1 = pixman_fixed_to_int (vx); \
418 : s1 = *(src + x1); \
419 : \
420 : if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \
421 : { \
422 : a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \
423 : \
424 : if (a1 == 0xff) \
425 : { \
426 : *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \
427 : } \
428 : else if (s1) \
429 : { \
430 : d = convert_## DST_FORMAT ## _to_8888 (*dst); \
431 : s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1); \
432 : a1 ^= 0xff; \
433 : UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \
434 : *dst = convert_8888_to_ ## DST_FORMAT (d); \
435 : } \
436 : dst++; \
437 : } \
438 : else /* PIXMAN_OP_SRC */ \
439 : { \
440 : *dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \
441 : } \
442 : } \
443 : }
444 :
445 : #define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \
446 : dst_type_t, repeat_mode, have_mask, mask_is_solid) \
447 : static void \
448 : fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp, \
449 : pixman_composite_info_t *info) \
450 : { \
451 : PIXMAN_COMPOSITE_ARGS (info); \
452 : dst_type_t *dst_line; \
453 : mask_type_t *mask_line; \
454 : src_type_t *src_first_line; \
455 : int y; \
456 : pixman_fixed_t src_width_fixed = pixman_int_to_fixed (src_image->bits.width); \
457 : pixman_fixed_t max_vy; \
458 : pixman_vector_t v; \
459 : pixman_fixed_t vx, vy; \
460 : pixman_fixed_t unit_x, unit_y; \
461 : int32_t left_pad, right_pad; \
462 : \
463 : src_type_t *src; \
464 : dst_type_t *dst; \
465 : mask_type_t solid_mask; \
466 : const mask_type_t *mask = &solid_mask; \
467 : int src_stride, mask_stride, dst_stride; \
468 : \
469 : PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1); \
470 : if (have_mask) \
471 : { \
472 : if (mask_is_solid) \
473 : solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); \
474 : else \
475 : PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \
476 : mask_stride, mask_line, 1); \
477 : } \
478 : /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \
479 : * transformed from destination space to source space */ \
480 : PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \
481 : \
482 : /* reference point is the center of the pixel */ \
483 : v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \
484 : v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \
485 : v.vector[2] = pixman_fixed_1; \
486 : \
487 : if (!pixman_transform_point_3d (src_image->common.transform, &v)) \
488 : return; \
489 : \
490 : unit_x = src_image->common.transform->matrix[0][0]; \
491 : unit_y = src_image->common.transform->matrix[1][1]; \
492 : \
493 : /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ \
494 : v.vector[0] -= pixman_fixed_e; \
495 : v.vector[1] -= pixman_fixed_e; \
496 : \
497 : vx = v.vector[0]; \
498 : vy = v.vector[1]; \
499 : \
500 : if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
501 : { \
502 : max_vy = pixman_int_to_fixed (src_image->bits.height); \
503 : \
504 : /* Clamp repeating positions inside the actual samples */ \
505 : repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \
506 : repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \
507 : } \
508 : \
509 : if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \
510 : PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
511 : { \
512 : pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, \
513 : &width, &left_pad, &right_pad); \
514 : vx += left_pad * unit_x; \
515 : } \
516 : \
517 : while (--height >= 0) \
518 : { \
519 : dst = dst_line; \
520 : dst_line += dst_stride; \
521 : if (have_mask && !mask_is_solid) \
522 : { \
523 : mask = mask_line; \
524 : mask_line += mask_stride; \
525 : } \
526 : \
527 : y = pixman_fixed_to_int (vy); \
528 : vy += unit_y; \
529 : if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
530 : repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \
531 : if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
532 : { \
533 : repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height); \
534 : src = src_first_line + src_stride * y; \
535 : if (left_pad > 0) \
536 : { \
537 : scanline_func (mask, dst, \
538 : src + src_image->bits.width - src_image->bits.width + 1, \
539 : left_pad, -pixman_fixed_e, 0, src_width_fixed, FALSE); \
540 : } \
541 : if (width > 0) \
542 : { \
543 : scanline_func (mask + (mask_is_solid ? 0 : left_pad), \
544 : dst + left_pad, src + src_image->bits.width, width, \
545 : vx - src_width_fixed, unit_x, src_width_fixed, FALSE); \
546 : } \
547 : if (right_pad > 0) \
548 : { \
549 : scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \
550 : dst + left_pad + width, src + src_image->bits.width, \
551 : right_pad, -pixman_fixed_e, 0, src_width_fixed, FALSE); \
552 : } \
553 : } \
554 : else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
555 : { \
556 : static const src_type_t zero[1] = { 0 }; \
557 : if (y < 0 || y >= src_image->bits.height) \
558 : { \
559 : scanline_func (mask, dst, zero + 1, left_pad + width + right_pad, \
560 : -pixman_fixed_e, 0, src_width_fixed, TRUE); \
561 : continue; \
562 : } \
563 : src = src_first_line + src_stride * y; \
564 : if (left_pad > 0) \
565 : { \
566 : scanline_func (mask, dst, zero + 1, left_pad, \
567 : -pixman_fixed_e, 0, src_width_fixed, TRUE); \
568 : } \
569 : if (width > 0) \
570 : { \
571 : scanline_func (mask + (mask_is_solid ? 0 : left_pad), \
572 : dst + left_pad, src + src_image->bits.width, width, \
573 : vx - src_width_fixed, unit_x, src_width_fixed, FALSE); \
574 : } \
575 : if (right_pad > 0) \
576 : { \
577 : scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \
578 : dst + left_pad + width, zero + 1, right_pad, \
579 : -pixman_fixed_e, 0, src_width_fixed, TRUE); \
580 : } \
581 : } \
582 : else \
583 : { \
584 : src = src_first_line + src_stride * y; \
585 : scanline_func (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed, \
586 : unit_x, src_width_fixed, FALSE); \
587 : } \
588 : } \
589 : }
590 :
591 : /* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
592 : #define FAST_NEAREST_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \
593 : dst_type_t, repeat_mode, have_mask, mask_is_solid) \
594 : FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t, \
595 : dst_type_t, repeat_mode, have_mask, mask_is_solid)
596 :
597 : #define FAST_NEAREST_MAINLOOP_NOMASK(scale_func_name, scanline_func, src_type_t, dst_type_t, \
598 : repeat_mode) \
599 : static force_inline void \
600 : scanline_func##scale_func_name##_wrapper ( \
601 : const uint8_t *mask, \
602 : dst_type_t *dst, \
603 : const src_type_t *src, \
604 : int32_t w, \
605 : pixman_fixed_t vx, \
606 : pixman_fixed_t unit_x, \
607 : pixman_fixed_t max_vx, \
608 : pixman_bool_t fully_transparent_src) \
609 : { \
610 : scanline_func (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); \
611 : } \
612 : FAST_NEAREST_MAINLOOP_INT (scale_func_name, scanline_func##scale_func_name##_wrapper, \
613 : src_type_t, uint8_t, dst_type_t, repeat_mode, FALSE, FALSE)
614 :
615 : #define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t, \
616 : repeat_mode) \
617 : FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name, scanline_func, src_type_t, \
618 : dst_type_t, repeat_mode)
619 :
620 : #define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT, \
621 : src_type_t, dst_type_t, OP, repeat_mode) \
622 : FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \
623 : SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t, \
624 : OP, repeat_mode) \
625 : FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name ## _ ## OP, \
626 : scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \
627 : src_type_t, dst_type_t, repeat_mode)
628 :
629 :
630 : #define SCALED_NEAREST_FLAGS \
631 : (FAST_PATH_SCALE_TRANSFORM | \
632 : FAST_PATH_NO_ALPHA_MAP | \
633 : FAST_PATH_NEAREST_FILTER | \
634 : FAST_PATH_NO_ACCESSORS | \
635 : FAST_PATH_NARROW_FORMAT)
636 :
637 : #define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func) \
638 : { PIXMAN_OP_ ## op, \
639 : PIXMAN_ ## s, \
640 : (SCALED_NEAREST_FLAGS | \
641 : FAST_PATH_NORMAL_REPEAT | \
642 : FAST_PATH_X_UNIT_POSITIVE), \
643 : PIXMAN_null, 0, \
644 : PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
645 : fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
646 : }
647 :
648 : #define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func) \
649 : { PIXMAN_OP_ ## op, \
650 : PIXMAN_ ## s, \
651 : (SCALED_NEAREST_FLAGS | \
652 : FAST_PATH_PAD_REPEAT | \
653 : FAST_PATH_X_UNIT_POSITIVE), \
654 : PIXMAN_null, 0, \
655 : PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
656 : fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \
657 : }
658 :
659 : #define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func) \
660 : { PIXMAN_OP_ ## op, \
661 : PIXMAN_ ## s, \
662 : (SCALED_NEAREST_FLAGS | \
663 : FAST_PATH_NONE_REPEAT | \
664 : FAST_PATH_X_UNIT_POSITIVE), \
665 : PIXMAN_null, 0, \
666 : PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
667 : fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
668 : }
669 :
670 : #define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func) \
671 : { PIXMAN_OP_ ## op, \
672 : PIXMAN_ ## s, \
673 : SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, \
674 : PIXMAN_null, 0, \
675 : PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
676 : fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
677 : }
678 :
679 : #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL(op,s,d,func) \
680 : { PIXMAN_OP_ ## op, \
681 : PIXMAN_ ## s, \
682 : (SCALED_NEAREST_FLAGS | \
683 : FAST_PATH_NORMAL_REPEAT | \
684 : FAST_PATH_X_UNIT_POSITIVE), \
685 : PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
686 : PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
687 : fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
688 : }
689 :
690 : #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD(op,s,d,func) \
691 : { PIXMAN_OP_ ## op, \
692 : PIXMAN_ ## s, \
693 : (SCALED_NEAREST_FLAGS | \
694 : FAST_PATH_PAD_REPEAT | \
695 : FAST_PATH_X_UNIT_POSITIVE), \
696 : PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
697 : PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
698 : fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \
699 : }
700 :
701 : #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE(op,s,d,func) \
702 : { PIXMAN_OP_ ## op, \
703 : PIXMAN_ ## s, \
704 : (SCALED_NEAREST_FLAGS | \
705 : FAST_PATH_NONE_REPEAT | \
706 : FAST_PATH_X_UNIT_POSITIVE), \
707 : PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
708 : PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
709 : fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
710 : }
711 :
712 : #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER(op,s,d,func) \
713 : { PIXMAN_OP_ ## op, \
714 : PIXMAN_ ## s, \
715 : SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, \
716 : PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
717 : PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
718 : fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
719 : }
720 :
721 : #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func) \
722 : { PIXMAN_OP_ ## op, \
723 : PIXMAN_ ## s, \
724 : (SCALED_NEAREST_FLAGS | \
725 : FAST_PATH_NORMAL_REPEAT | \
726 : FAST_PATH_X_UNIT_POSITIVE), \
727 : PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
728 : PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
729 : fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
730 : }
731 :
732 : #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \
733 : { PIXMAN_OP_ ## op, \
734 : PIXMAN_ ## s, \
735 : (SCALED_NEAREST_FLAGS | \
736 : FAST_PATH_PAD_REPEAT | \
737 : FAST_PATH_X_UNIT_POSITIVE), \
738 : PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
739 : PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
740 : fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \
741 : }
742 :
743 : #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \
744 : { PIXMAN_OP_ ## op, \
745 : PIXMAN_ ## s, \
746 : (SCALED_NEAREST_FLAGS | \
747 : FAST_PATH_NONE_REPEAT | \
748 : FAST_PATH_X_UNIT_POSITIVE), \
749 : PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
750 : PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
751 : fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
752 : }
753 :
754 : #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \
755 : { PIXMAN_OP_ ## op, \
756 : PIXMAN_ ## s, \
757 : SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, \
758 : PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
759 : PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
760 : fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
761 : }
762 :
763 : /* Prefer the use of 'cover' variant, because it is faster */
764 : #define SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \
765 : SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \
766 : SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \
767 : SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func), \
768 : SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func)
769 :
770 : #define SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \
771 : SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \
772 : SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \
773 : SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
774 :
775 : #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH(op,s,d,func) \
776 : SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \
777 : SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \
778 : SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func)
779 :
780 : /*****************************************************************************/
781 :
782 : /*
783 : * Identify 5 zones in each scanline for bilinear scaling. Depending on
784 : * whether 2 pixels to be interpolated are fetched from the image itself,
785 : * from the padding area around it or from both image and padding area.
786 : */
787 : static force_inline void
788 : bilinear_pad_repeat_get_scanline_bounds (int32_t source_image_width,
789 : pixman_fixed_t vx,
790 : pixman_fixed_t unit_x,
791 : int32_t * left_pad,
792 : int32_t * left_tz,
793 : int32_t * width,
794 : int32_t * right_tz,
795 : int32_t * right_pad)
796 : {
797 0 : int width1 = *width, left_pad1, right_pad1;
798 0 : int width2 = *width, left_pad2, right_pad2;
799 :
800 : pad_repeat_get_scanline_bounds (source_image_width, vx, unit_x,
801 : &width1, &left_pad1, &right_pad1);
802 0 : pad_repeat_get_scanline_bounds (source_image_width, vx + pixman_fixed_1,
803 : unit_x, &width2, &left_pad2, &right_pad2);
804 :
805 0 : *left_pad = left_pad2;
806 0 : *left_tz = left_pad1 - left_pad2;
807 0 : *right_tz = right_pad2 - right_pad1;
808 0 : *right_pad = right_pad1;
809 0 : *width -= *left_pad + *left_tz + *right_tz + *right_pad;
810 : }
811 :
812 : /*
813 : * Main loop template for single pass bilinear scaling. It needs to be
814 : * provided with 'scanline_func' which should do the compositing operation.
815 : * The needed function has the following prototype:
816 : *
817 : * scanline_func (dst_type_t * dst,
818 : * const mask_type_ * mask,
819 : * const src_type_t * src_top,
820 : * const src_type_t * src_bottom,
821 : * int32_t width,
822 : * int weight_top,
823 : * int weight_bottom,
824 : * pixman_fixed_t vx,
825 : * pixman_fixed_t unit_x,
826 : * pixman_fixed_t max_vx,
827 : * pixman_bool_t zero_src)
828 : *
829 : * Where:
830 : * dst - destination scanline buffer for storing results
831 : * mask - mask buffer (or single value for solid mask)
832 : * src_top, src_bottom - two source scanlines
833 : * width - number of pixels to process
834 : * weight_top - weight of the top row for interpolation
835 : * weight_bottom - weight of the bottom row for interpolation
836 : * vx - initial position for fetching the first pair of
837 : * pixels from the source buffer
838 : * unit_x - position increment needed to move to the next pair
839 : * of pixels
840 : * max_vx - image size as a fixed point value, can be used for
841 : * implementing NORMAL repeat (when it is supported)
842 : * zero_src - boolean hint variable, which is set to TRUE when
843 : * all source pixels are fetched from zero padding
844 : * zone for NONE repeat
845 : *
846 : * Note: normally the sum of 'weight_top' and 'weight_bottom' is equal to
847 : * BILINEAR_INTERPOLATION_RANGE, but sometimes it may be less than that
848 : * for NONE repeat when handling fuzzy antialiased top or bottom image
849 : * edges. Also both top and bottom weight variables are guaranteed to
850 : * have value, which is less than BILINEAR_INTERPOLATION_RANGE.
851 : * For example, the weights can fit into unsigned byte or be used
852 : * with 8-bit SIMD multiplication instructions for 8-bit interpolation
853 : * precision.
854 : */
855 :
856 : /* Replace a single "scanline_func" with "fetch_func" & "op_func" to allow optional
857 : * two stage processing (bilinear fetch to a temp buffer, followed by unscaled
858 : * combine), "op_func" may be NULL, in this case we keep old behavior.
859 : * This is ugly and gcc issues some warnings, but works.
860 : *
861 : * An advice: clang has much better error reporting than gcc for deeply nested macros.
862 : */
863 :
864 : #define scanline_func(dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
865 : scanline_buf, mask, src_top, src_bottom, width, \
866 : weight_top, weight_bottom, vx, unit_x, max_vx, zero_src) \
867 : do { \
868 : if (op_func != NULL) \
869 : { \
870 : fetch_func ((void *)scanline_buf, (mask), (src_top), (src_bottom), (width), \
871 : (weight_top), (weight_bottom), (vx), (unit_x), (max_vx), (zero_src)); \
872 : ((void (*)(dst_type_t *, const mask_type_t *, const src_type_t *, int)) op_func)\
873 : ((dst), (mask), (src_type_t *)scanline_buf, (width)); \
874 : } \
875 : else \
876 : { \
877 : fetch_func ((void*)(dst), (mask), (src_top), (src_bottom), (width), (weight_top), \
878 : (weight_bottom), (vx), (unit_x), (max_vx), (zero_src)); \
879 : } \
880 : } while (0)
881 :
882 :
883 : #define SCANLINE_BUFFER_LENGTH 3072
884 :
885 : #define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, fetch_func, op_func, src_type_t, \
886 : mask_type_t, dst_type_t, repeat_mode, flags) \
887 : static void \
888 : fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, \
889 : pixman_composite_info_t *info) \
890 : { \
891 : PIXMAN_COMPOSITE_ARGS (info); \
892 : dst_type_t *dst_line; \
893 : mask_type_t *mask_line; \
894 : src_type_t *src_first_line; \
895 : int y1, y2; \
896 : pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */ \
897 : pixman_vector_t v; \
898 : pixman_fixed_t vx, vy; \
899 : pixman_fixed_t unit_x, unit_y; \
900 : int32_t left_pad, left_tz, right_tz, right_pad; \
901 : \
902 : dst_type_t *dst; \
903 : mask_type_t solid_mask; \
904 : const mask_type_t *mask = &solid_mask; \
905 : int src_stride, mask_stride, dst_stride; \
906 : \
907 : int src_width; \
908 : pixman_fixed_t src_width_fixed; \
909 : int max_x; \
910 : pixman_bool_t need_src_extension; \
911 : \
912 : uint64_t stack_scanline_buffer[SCANLINE_BUFFER_LENGTH]; \
913 : uint8_t *scanline_buffer = (uint8_t *) stack_scanline_buffer; \
914 : \
915 : PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1); \
916 : if (flags & FLAG_HAVE_SOLID_MASK) \
917 : { \
918 : solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); \
919 : mask_stride = 0; \
920 : } \
921 : else if (flags & FLAG_HAVE_NON_SOLID_MASK) \
922 : { \
923 : PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \
924 : mask_stride, mask_line, 1); \
925 : } \
926 : \
927 : /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \
928 : * transformed from destination space to source space */ \
929 : PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \
930 : \
931 : /* reference point is the center of the pixel */ \
932 : v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \
933 : v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \
934 : v.vector[2] = pixman_fixed_1; \
935 : \
936 : if (!pixman_transform_point_3d (src_image->common.transform, &v)) \
937 : return; \
938 : \
939 : unit_x = src_image->common.transform->matrix[0][0]; \
940 : unit_y = src_image->common.transform->matrix[1][1]; \
941 : \
942 : v.vector[0] -= pixman_fixed_1 / 2; \
943 : v.vector[1] -= pixman_fixed_1 / 2; \
944 : \
945 : vy = v.vector[1]; \
946 : \
947 : if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \
948 : PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
949 : { \
950 : bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, \
951 : &left_pad, &left_tz, &width, &right_tz, &right_pad); \
952 : if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
953 : { \
954 : /* PAD repeat does not need special handling for 'transition zones' and */ \
955 : /* they can be combined with 'padding zones' safely */ \
956 : left_pad += left_tz; \
957 : right_pad += right_tz; \
958 : left_tz = right_tz = 0; \
959 : } \
960 : v.vector[0] += left_pad * unit_x; \
961 : } \
962 : \
963 : if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
964 : { \
965 : vx = v.vector[0]; \
966 : repeat (PIXMAN_REPEAT_NORMAL, &vx, pixman_int_to_fixed(src_image->bits.width)); \
967 : max_x = pixman_fixed_to_int (vx + (width - 1) * (int64_t)unit_x) + 1; \
968 : \
969 : if (src_image->bits.width < REPEAT_NORMAL_MIN_WIDTH) \
970 : { \
971 : src_width = 0; \
972 : \
973 : while (src_width < REPEAT_NORMAL_MIN_WIDTH && src_width <= max_x) \
974 : src_width += src_image->bits.width; \
975 : \
976 : need_src_extension = TRUE; \
977 : } \
978 : else \
979 : { \
980 : src_width = src_image->bits.width; \
981 : need_src_extension = FALSE; \
982 : } \
983 : \
984 : src_width_fixed = pixman_int_to_fixed (src_width); \
985 : } \
986 : \
987 : if (op_func != NULL && width * sizeof(src_type_t) > sizeof(stack_scanline_buffer)) \
988 : { \
989 : scanline_buffer = pixman_malloc_ab (width, sizeof(src_type_t)); \
990 : \
991 : if (!scanline_buffer) \
992 : return; \
993 : } \
994 : \
995 : while (--height >= 0) \
996 : { \
997 : int weight1, weight2; \
998 : dst = dst_line; \
999 : dst_line += dst_stride; \
1000 : vx = v.vector[0]; \
1001 : if (flags & FLAG_HAVE_NON_SOLID_MASK) \
1002 : { \
1003 : mask = mask_line; \
1004 : mask_line += mask_stride; \
1005 : } \
1006 : \
1007 : y1 = pixman_fixed_to_int (vy); \
1008 : weight2 = pixman_fixed_to_bilinear_weight (vy); \
1009 : if (weight2) \
1010 : { \
1011 : /* both weight1 and weight2 are smaller than BILINEAR_INTERPOLATION_RANGE */ \
1012 : y2 = y1 + 1; \
1013 : weight1 = BILINEAR_INTERPOLATION_RANGE - weight2; \
1014 : } \
1015 : else \
1016 : { \
1017 : /* set both top and bottom row to the same scanline and tweak weights */ \
1018 : y2 = y1; \
1019 : weight1 = weight2 = BILINEAR_INTERPOLATION_RANGE / 2; \
1020 : } \
1021 : vy += unit_y; \
1022 : if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
1023 : { \
1024 : src_type_t *src1, *src2; \
1025 : src_type_t buf1[2]; \
1026 : src_type_t buf2[2]; \
1027 : repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height); \
1028 : repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height); \
1029 : src1 = src_first_line + src_stride * y1; \
1030 : src2 = src_first_line + src_stride * y2; \
1031 : \
1032 : if (left_pad > 0) \
1033 : { \
1034 : buf1[0] = buf1[1] = src1[0]; \
1035 : buf2[0] = buf2[1] = src2[0]; \
1036 : scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
1037 : scanline_buffer, mask, buf1, buf2, left_pad, weight1, weight2, \
1038 : 0, 0, 0, FALSE); \
1039 : dst += left_pad; \
1040 : if (flags & FLAG_HAVE_NON_SOLID_MASK) \
1041 : mask += left_pad; \
1042 : } \
1043 : if (width > 0) \
1044 : { \
1045 : scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
1046 : scanline_buffer, mask, src1, src2, width, weight1, weight2, \
1047 : vx, unit_x, 0, FALSE); \
1048 : dst += width; \
1049 : if (flags & FLAG_HAVE_NON_SOLID_MASK) \
1050 : mask += width; \
1051 : } \
1052 : if (right_pad > 0) \
1053 : { \
1054 : buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; \
1055 : buf2[0] = buf2[1] = src2[src_image->bits.width - 1]; \
1056 : scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
1057 : scanline_buffer, mask, buf1, buf2, right_pad, weight1, weight2, \
1058 : 0, 0, 0, FALSE); \
1059 : } \
1060 : } \
1061 : else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
1062 : { \
1063 : src_type_t *src1, *src2; \
1064 : src_type_t buf1[2]; \
1065 : src_type_t buf2[2]; \
1066 : /* handle top/bottom zero padding by just setting weights to 0 if needed */ \
1067 : if (y1 < 0) \
1068 : { \
1069 : weight1 = 0; \
1070 : y1 = 0; \
1071 : } \
1072 : if (y1 >= src_image->bits.height) \
1073 : { \
1074 : weight1 = 0; \
1075 : y1 = src_image->bits.height - 1; \
1076 : } \
1077 : if (y2 < 0) \
1078 : { \
1079 : weight2 = 0; \
1080 : y2 = 0; \
1081 : } \
1082 : if (y2 >= src_image->bits.height) \
1083 : { \
1084 : weight2 = 0; \
1085 : y2 = src_image->bits.height - 1; \
1086 : } \
1087 : src1 = src_first_line + src_stride * y1; \
1088 : src2 = src_first_line + src_stride * y2; \
1089 : \
1090 : if (left_pad > 0) \
1091 : { \
1092 : buf1[0] = buf1[1] = 0; \
1093 : buf2[0] = buf2[1] = 0; \
1094 : scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
1095 : scanline_buffer, mask, buf1, buf2, left_pad, weight1, weight2, \
1096 : 0, 0, 0, TRUE); \
1097 : dst += left_pad; \
1098 : if (flags & FLAG_HAVE_NON_SOLID_MASK) \
1099 : mask += left_pad; \
1100 : } \
1101 : if (left_tz > 0) \
1102 : { \
1103 : buf1[0] = 0; \
1104 : buf1[1] = src1[0]; \
1105 : buf2[0] = 0; \
1106 : buf2[1] = src2[0]; \
1107 : scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
1108 : scanline_buffer, mask, buf1, buf2, left_tz, weight1, weight2, \
1109 : pixman_fixed_frac (vx), unit_x, 0, FALSE); \
1110 : dst += left_tz; \
1111 : if (flags & FLAG_HAVE_NON_SOLID_MASK) \
1112 : mask += left_tz; \
1113 : vx += left_tz * unit_x; \
1114 : } \
1115 : if (width > 0) \
1116 : { \
1117 : scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
1118 : scanline_buffer, mask, src1, src2, width, weight1, weight2, \
1119 : vx, unit_x, 0, FALSE); \
1120 : dst += width; \
1121 : if (flags & FLAG_HAVE_NON_SOLID_MASK) \
1122 : mask += width; \
1123 : vx += width * unit_x; \
1124 : } \
1125 : if (right_tz > 0) \
1126 : { \
1127 : buf1[0] = src1[src_image->bits.width - 1]; \
1128 : buf1[1] = 0; \
1129 : buf2[0] = src2[src_image->bits.width - 1]; \
1130 : buf2[1] = 0; \
1131 : scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
1132 : scanline_buffer, mask, buf1, buf2, right_tz, weight1, weight2, \
1133 : pixman_fixed_frac (vx), unit_x, 0, FALSE); \
1134 : dst += right_tz; \
1135 : if (flags & FLAG_HAVE_NON_SOLID_MASK) \
1136 : mask += right_tz; \
1137 : } \
1138 : if (right_pad > 0) \
1139 : { \
1140 : buf1[0] = buf1[1] = 0; \
1141 : buf2[0] = buf2[1] = 0; \
1142 : scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
1143 : scanline_buffer, mask, buf1, buf2, right_pad, weight1, weight2, \
1144 : 0, 0, 0, TRUE); \
1145 : } \
1146 : } \
1147 : else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
1148 : { \
1149 : int32_t num_pixels; \
1150 : int32_t width_remain; \
1151 : src_type_t * src_line_top; \
1152 : src_type_t * src_line_bottom; \
1153 : src_type_t buf1[2]; \
1154 : src_type_t buf2[2]; \
1155 : src_type_t extended_src_line0[REPEAT_NORMAL_MIN_WIDTH*2]; \
1156 : src_type_t extended_src_line1[REPEAT_NORMAL_MIN_WIDTH*2]; \
1157 : int i, j; \
1158 : \
1159 : repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); \
1160 : repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); \
1161 : src_line_top = src_first_line + src_stride * y1; \
1162 : src_line_bottom = src_first_line + src_stride * y2; \
1163 : \
1164 : if (need_src_extension) \
1165 : { \
1166 : for (i=0; i<src_width;) \
1167 : { \
1168 : for (j=0; j<src_image->bits.width; j++, i++) \
1169 : { \
1170 : extended_src_line0[i] = src_line_top[j]; \
1171 : extended_src_line1[i] = src_line_bottom[j]; \
1172 : } \
1173 : } \
1174 : \
1175 : src_line_top = &extended_src_line0[0]; \
1176 : src_line_bottom = &extended_src_line1[0]; \
1177 : } \
1178 : \
1179 : /* Top & Bottom wrap around buffer */ \
1180 : buf1[0] = src_line_top[src_width - 1]; \
1181 : buf1[1] = src_line_top[0]; \
1182 : buf2[0] = src_line_bottom[src_width - 1]; \
1183 : buf2[1] = src_line_bottom[0]; \
1184 : \
1185 : width_remain = width; \
1186 : \
1187 : while (width_remain > 0) \
1188 : { \
1189 : /* We use src_width_fixed because it can make vx in original source range */ \
1190 : repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \
1191 : \
1192 : /* Wrap around part */ \
1193 : if (pixman_fixed_to_int (vx) == src_width - 1) \
1194 : { \
1195 : /* for positive unit_x \
1196 : * num_pixels = max(n) + 1, where vx + n*unit_x < src_width_fixed \
1197 : * \
1198 : * vx is in range [0, src_width_fixed - pixman_fixed_e] \
1199 : * So we are safe from overflow. \
1200 : */ \
1201 : num_pixels = ((src_width_fixed - vx - pixman_fixed_e) / unit_x) + 1; \
1202 : \
1203 : if (num_pixels > width_remain) \
1204 : num_pixels = width_remain; \
1205 : \
1206 : scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, \
1207 : dst, scanline_buffer, mask, buf1, buf2, num_pixels, \
1208 : weight1, weight2, pixman_fixed_frac(vx), \
1209 : unit_x, src_width_fixed, FALSE); \
1210 : \
1211 : width_remain -= num_pixels; \
1212 : vx += num_pixels * unit_x; \
1213 : dst += num_pixels; \
1214 : \
1215 : if (flags & FLAG_HAVE_NON_SOLID_MASK) \
1216 : mask += num_pixels; \
1217 : \
1218 : repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \
1219 : } \
1220 : \
1221 : /* Normal scanline composite */ \
1222 : if (pixman_fixed_to_int (vx) != src_width - 1 && width_remain > 0) \
1223 : { \
1224 : /* for positive unit_x \
1225 : * num_pixels = max(n) + 1, where vx + n*unit_x < (src_width_fixed - 1) \
1226 : * \
1227 : * vx is in range [0, src_width_fixed - pixman_fixed_e] \
1228 : * So we are safe from overflow here. \
1229 : */ \
1230 : num_pixels = ((src_width_fixed - pixman_fixed_1 - vx - pixman_fixed_e) \
1231 : / unit_x) + 1; \
1232 : \
1233 : if (num_pixels > width_remain) \
1234 : num_pixels = width_remain; \
1235 : \
1236 : scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, \
1237 : dst, scanline_buffer, mask, src_line_top, src_line_bottom, \
1238 : num_pixels, weight1, weight2, vx, unit_x, src_width_fixed, \
1239 : FALSE); \
1240 : \
1241 : width_remain -= num_pixels; \
1242 : vx += num_pixels * unit_x; \
1243 : dst += num_pixels; \
1244 : \
1245 : if (flags & FLAG_HAVE_NON_SOLID_MASK) \
1246 : mask += num_pixels; \
1247 : } \
1248 : } \
1249 : } \
1250 : else \
1251 : { \
1252 : scanline_func (dst_type_t, mask_type_t, src_type_t, fetch_func, op_func, dst, \
1253 : scanline_buffer, mask, \
1254 : src_first_line + src_stride * y1, \
1255 : src_first_line + src_stride * y2, width, \
1256 : weight1, weight2, vx, unit_x, max_vx, FALSE); \
1257 : } \
1258 : } \
1259 : if (scanline_buffer != (uint8_t *) stack_scanline_buffer) \
1260 : free (scanline_buffer); \
1261 : }
1262 :
1263 : /* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
1264 : #define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, fetch_func, op_func, src_type_t, mask_type_t,\
1265 : dst_type_t, repeat_mode, flags) \
1266 : FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, fetch_func, op_func, src_type_t, mask_type_t,\
1267 : dst_type_t, repeat_mode, flags)
1268 :
1269 : #define SCALED_BILINEAR_FLAGS \
1270 : (FAST_PATH_SCALE_TRANSFORM | \
1271 : FAST_PATH_NO_ALPHA_MAP | \
1272 : FAST_PATH_BILINEAR_FILTER | \
1273 : FAST_PATH_NO_ACCESSORS | \
1274 : FAST_PATH_NARROW_FORMAT)
1275 :
1276 : #define SIMPLE_BILINEAR_FAST_PATH_PAD(op,s,d,func) \
1277 : { PIXMAN_OP_ ## op, \
1278 : PIXMAN_ ## s, \
1279 : (SCALED_BILINEAR_FLAGS | \
1280 : FAST_PATH_PAD_REPEAT | \
1281 : FAST_PATH_X_UNIT_POSITIVE), \
1282 : PIXMAN_null, 0, \
1283 : PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1284 : fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \
1285 : }
1286 :
1287 : #define SIMPLE_BILINEAR_FAST_PATH_NONE(op,s,d,func) \
1288 : { PIXMAN_OP_ ## op, \
1289 : PIXMAN_ ## s, \
1290 : (SCALED_BILINEAR_FLAGS | \
1291 : FAST_PATH_NONE_REPEAT | \
1292 : FAST_PATH_X_UNIT_POSITIVE), \
1293 : PIXMAN_null, 0, \
1294 : PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1295 : fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \
1296 : }
1297 :
1298 : #define SIMPLE_BILINEAR_FAST_PATH_COVER(op,s,d,func) \
1299 : { PIXMAN_OP_ ## op, \
1300 : PIXMAN_ ## s, \
1301 : SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR, \
1302 : PIXMAN_null, 0, \
1303 : PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1304 : fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
1305 : }
1306 :
1307 : #define SIMPLE_BILINEAR_FAST_PATH_NORMAL(op,s,d,func) \
1308 : { PIXMAN_OP_ ## op, \
1309 : PIXMAN_ ## s, \
1310 : (SCALED_BILINEAR_FLAGS | \
1311 : FAST_PATH_NORMAL_REPEAT | \
1312 : FAST_PATH_X_UNIT_POSITIVE), \
1313 : PIXMAN_null, 0, \
1314 : PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1315 : fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \
1316 : }
1317 :
1318 : #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD(op,s,d,func) \
1319 : { PIXMAN_OP_ ## op, \
1320 : PIXMAN_ ## s, \
1321 : (SCALED_BILINEAR_FLAGS | \
1322 : FAST_PATH_PAD_REPEAT | \
1323 : FAST_PATH_X_UNIT_POSITIVE), \
1324 : PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
1325 : PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1326 : fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \
1327 : }
1328 :
1329 : #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE(op,s,d,func) \
1330 : { PIXMAN_OP_ ## op, \
1331 : PIXMAN_ ## s, \
1332 : (SCALED_BILINEAR_FLAGS | \
1333 : FAST_PATH_NONE_REPEAT | \
1334 : FAST_PATH_X_UNIT_POSITIVE), \
1335 : PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
1336 : PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1337 : fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \
1338 : }
1339 :
1340 : #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER(op,s,d,func) \
1341 : { PIXMAN_OP_ ## op, \
1342 : PIXMAN_ ## s, \
1343 : SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR, \
1344 : PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
1345 : PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1346 : fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
1347 : }
1348 :
1349 : #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL(op,s,d,func) \
1350 : { PIXMAN_OP_ ## op, \
1351 : PIXMAN_ ## s, \
1352 : (SCALED_BILINEAR_FLAGS | \
1353 : FAST_PATH_NORMAL_REPEAT | \
1354 : FAST_PATH_X_UNIT_POSITIVE), \
1355 : PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
1356 : PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1357 : fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \
1358 : }
1359 :
1360 : #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \
1361 : { PIXMAN_OP_ ## op, \
1362 : PIXMAN_ ## s, \
1363 : (SCALED_BILINEAR_FLAGS | \
1364 : FAST_PATH_PAD_REPEAT | \
1365 : FAST_PATH_X_UNIT_POSITIVE), \
1366 : PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
1367 : PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1368 : fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \
1369 : }
1370 :
1371 : #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \
1372 : { PIXMAN_OP_ ## op, \
1373 : PIXMAN_ ## s, \
1374 : (SCALED_BILINEAR_FLAGS | \
1375 : FAST_PATH_NONE_REPEAT | \
1376 : FAST_PATH_X_UNIT_POSITIVE), \
1377 : PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
1378 : PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1379 : fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \
1380 : }
1381 :
1382 : #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \
1383 : { PIXMAN_OP_ ## op, \
1384 : PIXMAN_ ## s, \
1385 : SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR, \
1386 : PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
1387 : PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1388 : fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
1389 : }
1390 :
1391 : #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func) \
1392 : { PIXMAN_OP_ ## op, \
1393 : PIXMAN_ ## s, \
1394 : (SCALED_BILINEAR_FLAGS | \
1395 : FAST_PATH_NORMAL_REPEAT | \
1396 : FAST_PATH_X_UNIT_POSITIVE), \
1397 : PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
1398 : PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1399 : fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \
1400 : }
1401 :
1402 : /* Prefer the use of 'cover' variant, because it is faster */
1403 : #define SIMPLE_BILINEAR_FAST_PATH(op,s,d,func) \
1404 : SIMPLE_BILINEAR_FAST_PATH_COVER (op,s,d,func), \
1405 : SIMPLE_BILINEAR_FAST_PATH_NONE (op,s,d,func), \
1406 : SIMPLE_BILINEAR_FAST_PATH_PAD (op,s,d,func), \
1407 : SIMPLE_BILINEAR_FAST_PATH_NORMAL (op,s,d,func)
1408 :
1409 : #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH(op,s,d,func) \
1410 : SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER (op,s,d,func), \
1411 : SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE (op,s,d,func), \
1412 : SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD (op,s,d,func), \
1413 : SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL (op,s,d,func)
1414 :
1415 : #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH(op,s,d,func) \
1416 : SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \
1417 : SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \
1418 : SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD (op,s,d,func), \
1419 : SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL (op,s,d,func)
1420 :
1421 : #endif
|