Line data Source code
1 : /* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
2 : /*
3 : * Copyright © 2000 SuSE, Inc.
4 : * Copyright © 2007 Red Hat, Inc.
5 : *
6 : * Permission to use, copy, modify, distribute, and sell this software and its
7 : * documentation for any purpose is hereby granted without fee, provided that
8 : * the above copyright notice appear in all copies and that both that
9 : * copyright notice and this permission notice appear in supporting
10 : * documentation, and that the name of SuSE not be used in advertising or
11 : * publicity pertaining to distribution of the software without specific,
12 : * written prior permission. SuSE makes no representations about the
13 : * suitability of this software for any purpose. It is provided "as is"
14 : * without express or implied warranty.
15 : *
16 : * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
17 : * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
18 : * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 : * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
20 : * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
21 : * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22 : *
23 : * Author: Keith Packard, SuSE, Inc.
24 : */
25 :
26 : #ifdef HAVE_CONFIG_H
27 : #include <config.h>
28 : #endif
29 : #include <string.h>
30 : #include <stdlib.h>
31 : #include "pixman-private.h"
32 : #include "pixman-combine32.h"
33 : #include "pixman-inlines.h"
34 :
35 : static force_inline uint32_t
36 : fetch_24 (uint8_t *a)
37 : {
38 0 : if (((uintptr_t)a) & 1)
39 : {
40 : #ifdef WORDS_BIGENDIAN
41 : return (*a << 16) | (*(uint16_t *)(a + 1));
42 : #else
43 0 : return *a | (*(uint16_t *)(a + 1) << 8);
44 : #endif
45 : }
46 : else
47 : {
48 : #ifdef WORDS_BIGENDIAN
49 : return (*(uint16_t *)a << 8) | *(a + 2);
50 : #else
51 0 : return *(uint16_t *)a | (*(a + 2) << 16);
52 : #endif
53 : }
54 : }
55 :
56 : static force_inline void
57 : store_24 (uint8_t *a,
58 : uint32_t v)
59 : {
60 0 : if (((uintptr_t)a) & 1)
61 : {
62 : #ifdef WORDS_BIGENDIAN
63 : *a = (uint8_t) (v >> 16);
64 : *(uint16_t *)(a + 1) = (uint16_t) (v);
65 : #else
66 0 : *a = (uint8_t) (v);
67 0 : *(uint16_t *)(a + 1) = (uint16_t) (v >> 8);
68 : #endif
69 : }
70 : else
71 : {
72 : #ifdef WORDS_BIGENDIAN
73 : *(uint16_t *)a = (uint16_t)(v >> 8);
74 : *(a + 2) = (uint8_t)v;
75 : #else
76 0 : *(uint16_t *)a = (uint16_t)v;
77 0 : *(a + 2) = (uint8_t)(v >> 16);
78 : #endif
79 : }
80 : }
81 :
82 : static force_inline uint32_t
83 : over (uint32_t src,
84 : uint32_t dest)
85 : {
86 0 : uint32_t a = ~src >> 24;
87 :
88 0 : UN8x4_MUL_UN8_ADD_UN8x4 (dest, a, src);
89 :
90 0 : return dest;
91 : }
92 :
93 : static force_inline uint32_t
94 : in (uint32_t x,
95 : uint8_t y)
96 : {
97 0 : uint16_t a = y;
98 :
99 0 : UN8x4_MUL_UN8 (x, a);
100 :
101 0 : return x;
102 : }
103 :
104 : /*
105 : * Naming convention:
106 : *
107 : * op_src_mask_dest
108 : */
109 : static void
110 0 : fast_composite_over_x888_8_8888 (pixman_implementation_t *imp,
111 : pixman_composite_info_t *info)
112 : {
113 0 : PIXMAN_COMPOSITE_ARGS (info);
114 : uint32_t *src, *src_line;
115 : uint32_t *dst, *dst_line;
116 : uint8_t *mask, *mask_line;
117 : int src_stride, mask_stride, dst_stride;
118 : uint8_t m;
119 : uint32_t s, d;
120 : int32_t w;
121 :
122 0 : PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
123 0 : PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
124 0 : PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
125 :
126 0 : while (height--)
127 : {
128 0 : src = src_line;
129 0 : src_line += src_stride;
130 0 : dst = dst_line;
131 0 : dst_line += dst_stride;
132 0 : mask = mask_line;
133 0 : mask_line += mask_stride;
134 :
135 0 : w = width;
136 0 : while (w--)
137 : {
138 0 : m = *mask++;
139 0 : if (m)
140 : {
141 0 : s = *src | 0xff000000;
142 :
143 0 : if (m == 0xff)
144 : {
145 0 : *dst = s;
146 : }
147 : else
148 : {
149 0 : d = in (s, m);
150 0 : *dst = over (d, *dst);
151 : }
152 : }
153 0 : src++;
154 0 : dst++;
155 : }
156 : }
157 0 : }
158 :
159 : static void
160 0 : fast_composite_in_n_8_8 (pixman_implementation_t *imp,
161 : pixman_composite_info_t *info)
162 : {
163 0 : PIXMAN_COMPOSITE_ARGS (info);
164 : uint32_t src, srca;
165 : uint8_t *dst_line, *dst;
166 : uint8_t *mask_line, *mask, m;
167 : int dst_stride, mask_stride;
168 : int32_t w;
169 : uint16_t t;
170 :
171 0 : src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
172 :
173 0 : srca = src >> 24;
174 :
175 0 : PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
176 0 : PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
177 :
178 0 : if (srca == 0xff)
179 : {
180 0 : while (height--)
181 : {
182 0 : dst = dst_line;
183 0 : dst_line += dst_stride;
184 0 : mask = mask_line;
185 0 : mask_line += mask_stride;
186 0 : w = width;
187 :
188 0 : while (w--)
189 : {
190 0 : m = *mask++;
191 :
192 0 : if (m == 0)
193 0 : *dst = 0;
194 0 : else if (m != 0xff)
195 0 : *dst = MUL_UN8 (m, *dst, t);
196 :
197 0 : dst++;
198 : }
199 : }
200 : }
201 : else
202 : {
203 0 : while (height--)
204 : {
205 0 : dst = dst_line;
206 0 : dst_line += dst_stride;
207 0 : mask = mask_line;
208 0 : mask_line += mask_stride;
209 0 : w = width;
210 :
211 0 : while (w--)
212 : {
213 0 : m = *mask++;
214 0 : m = MUL_UN8 (m, srca, t);
215 :
216 0 : if (m == 0)
217 0 : *dst = 0;
218 0 : else if (m != 0xff)
219 0 : *dst = MUL_UN8 (m, *dst, t);
220 :
221 0 : dst++;
222 : }
223 : }
224 : }
225 0 : }
226 :
227 : static void
228 0 : fast_composite_in_8_8 (pixman_implementation_t *imp,
229 : pixman_composite_info_t *info)
230 : {
231 0 : PIXMAN_COMPOSITE_ARGS (info);
232 : uint8_t *dst_line, *dst;
233 : uint8_t *src_line, *src;
234 : int dst_stride, src_stride;
235 : int32_t w;
236 : uint8_t s;
237 : uint16_t t;
238 :
239 0 : PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
240 0 : PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
241 :
242 0 : while (height--)
243 : {
244 0 : dst = dst_line;
245 0 : dst_line += dst_stride;
246 0 : src = src_line;
247 0 : src_line += src_stride;
248 0 : w = width;
249 :
250 0 : while (w--)
251 : {
252 0 : s = *src++;
253 :
254 0 : if (s == 0)
255 0 : *dst = 0;
256 0 : else if (s != 0xff)
257 0 : *dst = MUL_UN8 (s, *dst, t);
258 :
259 0 : dst++;
260 : }
261 : }
262 0 : }
263 :
264 : static void
265 0 : fast_composite_over_n_8_8888 (pixman_implementation_t *imp,
266 : pixman_composite_info_t *info)
267 : {
268 0 : PIXMAN_COMPOSITE_ARGS (info);
269 : uint32_t src, srca;
270 : uint32_t *dst_line, *dst, d;
271 : uint8_t *mask_line, *mask, m;
272 : int dst_stride, mask_stride;
273 : int32_t w;
274 :
275 0 : src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
276 :
277 0 : srca = src >> 24;
278 0 : if (src == 0)
279 0 : return;
280 :
281 0 : PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
282 0 : PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
283 :
284 0 : while (height--)
285 : {
286 0 : dst = dst_line;
287 0 : dst_line += dst_stride;
288 0 : mask = mask_line;
289 0 : mask_line += mask_stride;
290 0 : w = width;
291 :
292 0 : while (w--)
293 : {
294 0 : m = *mask++;
295 0 : if (m == 0xff)
296 : {
297 0 : if (srca == 0xff)
298 0 : *dst = src;
299 : else
300 0 : *dst = over (src, *dst);
301 : }
302 0 : else if (m)
303 : {
304 0 : d = in (src, m);
305 0 : *dst = over (d, *dst);
306 : }
307 0 : dst++;
308 : }
309 : }
310 : }
311 :
312 : static void
313 0 : fast_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
314 : pixman_composite_info_t *info)
315 : {
316 0 : PIXMAN_COMPOSITE_ARGS (info);
317 : uint32_t src, s;
318 : uint32_t *dst_line, *dst, d;
319 : uint32_t *mask_line, *mask, ma;
320 : int dst_stride, mask_stride;
321 : int32_t w;
322 :
323 0 : src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
324 :
325 0 : if (src == 0)
326 0 : return;
327 :
328 0 : PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
329 0 : PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
330 :
331 0 : while (height--)
332 : {
333 0 : dst = dst_line;
334 0 : dst_line += dst_stride;
335 0 : mask = mask_line;
336 0 : mask_line += mask_stride;
337 0 : w = width;
338 :
339 0 : while (w--)
340 : {
341 0 : ma = *mask++;
342 :
343 0 : if (ma)
344 : {
345 0 : d = *dst;
346 0 : s = src;
347 :
348 0 : UN8x4_MUL_UN8x4_ADD_UN8x4 (s, ma, d);
349 :
350 0 : *dst = s;
351 : }
352 :
353 0 : dst++;
354 : }
355 : }
356 : }
357 :
358 : static void
359 0 : fast_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
360 : pixman_composite_info_t *info)
361 : {
362 0 : PIXMAN_COMPOSITE_ARGS (info);
363 : uint32_t src, srca, s;
364 : uint32_t *dst_line, *dst, d;
365 : uint32_t *mask_line, *mask, ma;
366 : int dst_stride, mask_stride;
367 : int32_t w;
368 :
369 0 : src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
370 :
371 0 : srca = src >> 24;
372 0 : if (src == 0)
373 0 : return;
374 :
375 0 : PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
376 0 : PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
377 :
378 0 : while (height--)
379 : {
380 0 : dst = dst_line;
381 0 : dst_line += dst_stride;
382 0 : mask = mask_line;
383 0 : mask_line += mask_stride;
384 0 : w = width;
385 :
386 0 : while (w--)
387 : {
388 0 : ma = *mask++;
389 0 : if (ma == 0xffffffff)
390 : {
391 0 : if (srca == 0xff)
392 0 : *dst = src;
393 : else
394 0 : *dst = over (src, *dst);
395 : }
396 0 : else if (ma)
397 : {
398 0 : d = *dst;
399 0 : s = src;
400 :
401 0 : UN8x4_MUL_UN8x4 (s, ma);
402 0 : UN8x4_MUL_UN8 (ma, srca);
403 0 : ma = ~ma;
404 0 : UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
405 :
406 0 : *dst = d;
407 : }
408 :
409 0 : dst++;
410 : }
411 : }
412 : }
413 :
414 : static void
415 0 : fast_composite_over_n_8_0888 (pixman_implementation_t *imp,
416 : pixman_composite_info_t *info)
417 : {
418 0 : PIXMAN_COMPOSITE_ARGS (info);
419 : uint32_t src, srca;
420 : uint8_t *dst_line, *dst;
421 : uint32_t d;
422 : uint8_t *mask_line, *mask, m;
423 : int dst_stride, mask_stride;
424 : int32_t w;
425 :
426 0 : src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
427 :
428 0 : srca = src >> 24;
429 0 : if (src == 0)
430 0 : return;
431 :
432 0 : PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
433 0 : PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
434 :
435 0 : while (height--)
436 : {
437 0 : dst = dst_line;
438 0 : dst_line += dst_stride;
439 0 : mask = mask_line;
440 0 : mask_line += mask_stride;
441 0 : w = width;
442 :
443 0 : while (w--)
444 : {
445 0 : m = *mask++;
446 0 : if (m == 0xff)
447 : {
448 0 : if (srca == 0xff)
449 : {
450 0 : d = src;
451 : }
452 : else
453 : {
454 0 : d = fetch_24 (dst);
455 0 : d = over (src, d);
456 : }
457 : store_24 (dst, d);
458 : }
459 0 : else if (m)
460 : {
461 0 : d = over (in (src, m), fetch_24 (dst));
462 : store_24 (dst, d);
463 : }
464 0 : dst += 3;
465 : }
466 : }
467 : }
468 :
469 : static void
470 0 : fast_composite_over_n_8_0565 (pixman_implementation_t *imp,
471 : pixman_composite_info_t *info)
472 : {
473 0 : PIXMAN_COMPOSITE_ARGS (info);
474 : uint32_t src, srca;
475 : uint16_t *dst_line, *dst;
476 : uint32_t d;
477 : uint8_t *mask_line, *mask, m;
478 : int dst_stride, mask_stride;
479 : int32_t w;
480 :
481 0 : src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
482 :
483 0 : srca = src >> 24;
484 0 : if (src == 0)
485 0 : return;
486 :
487 0 : PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
488 0 : PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
489 :
490 0 : while (height--)
491 : {
492 0 : dst = dst_line;
493 0 : dst_line += dst_stride;
494 0 : mask = mask_line;
495 0 : mask_line += mask_stride;
496 0 : w = width;
497 :
498 0 : while (w--)
499 : {
500 0 : m = *mask++;
501 0 : if (m == 0xff)
502 : {
503 0 : if (srca == 0xff)
504 : {
505 0 : d = src;
506 : }
507 : else
508 : {
509 0 : d = *dst;
510 0 : d = over (src, convert_0565_to_0888 (d));
511 : }
512 0 : *dst = convert_8888_to_0565 (d);
513 : }
514 0 : else if (m)
515 : {
516 0 : d = *dst;
517 0 : d = over (in (src, m), convert_0565_to_0888 (d));
518 0 : *dst = convert_8888_to_0565 (d);
519 : }
520 0 : dst++;
521 : }
522 : }
523 : }
524 :
525 : static void
526 0 : fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
527 : pixman_composite_info_t *info)
528 : {
529 0 : PIXMAN_COMPOSITE_ARGS (info);
530 : uint32_t src, srca, s;
531 : uint16_t src16;
532 : uint16_t *dst_line, *dst;
533 : uint32_t d;
534 : uint32_t *mask_line, *mask, ma;
535 : int dst_stride, mask_stride;
536 : int32_t w;
537 :
538 0 : src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
539 :
540 0 : srca = src >> 24;
541 0 : if (src == 0)
542 0 : return;
543 :
544 0 : src16 = convert_8888_to_0565 (src);
545 :
546 0 : PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
547 0 : PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
548 :
549 0 : while (height--)
550 : {
551 0 : dst = dst_line;
552 0 : dst_line += dst_stride;
553 0 : mask = mask_line;
554 0 : mask_line += mask_stride;
555 0 : w = width;
556 :
557 0 : while (w--)
558 : {
559 0 : ma = *mask++;
560 0 : if (ma == 0xffffffff)
561 : {
562 0 : if (srca == 0xff)
563 : {
564 0 : *dst = src16;
565 : }
566 : else
567 : {
568 0 : d = *dst;
569 0 : d = over (src, convert_0565_to_0888 (d));
570 0 : *dst = convert_8888_to_0565 (d);
571 : }
572 : }
573 0 : else if (ma)
574 : {
575 0 : d = *dst;
576 0 : d = convert_0565_to_0888 (d);
577 :
578 0 : s = src;
579 :
580 0 : UN8x4_MUL_UN8x4 (s, ma);
581 0 : UN8x4_MUL_UN8 (ma, srca);
582 0 : ma = ~ma;
583 0 : UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
584 :
585 0 : *dst = convert_8888_to_0565 (d);
586 : }
587 0 : dst++;
588 : }
589 : }
590 : }
591 :
592 : static void
593 0 : fast_composite_over_8888_8888 (pixman_implementation_t *imp,
594 : pixman_composite_info_t *info)
595 : {
596 0 : PIXMAN_COMPOSITE_ARGS (info);
597 : uint32_t *dst_line, *dst;
598 : uint32_t *src_line, *src, s;
599 : int dst_stride, src_stride;
600 : uint8_t a;
601 : int32_t w;
602 :
603 0 : PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
604 0 : PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
605 :
606 0 : while (height--)
607 : {
608 0 : dst = dst_line;
609 0 : dst_line += dst_stride;
610 0 : src = src_line;
611 0 : src_line += src_stride;
612 0 : w = width;
613 :
614 0 : while (w--)
615 : {
616 0 : s = *src++;
617 0 : a = s >> 24;
618 0 : if (a == 0xff)
619 0 : *dst = s;
620 0 : else if (s)
621 0 : *dst = over (s, *dst);
622 0 : dst++;
623 : }
624 : }
625 0 : }
626 :
627 : static void
628 0 : fast_composite_src_x888_8888 (pixman_implementation_t *imp,
629 : pixman_composite_info_t *info)
630 : {
631 0 : PIXMAN_COMPOSITE_ARGS (info);
632 : uint32_t *dst_line, *dst;
633 : uint32_t *src_line, *src;
634 : int dst_stride, src_stride;
635 : int32_t w;
636 :
637 0 : PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
638 0 : PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
639 :
640 0 : while (height--)
641 : {
642 0 : dst = dst_line;
643 0 : dst_line += dst_stride;
644 0 : src = src_line;
645 0 : src_line += src_stride;
646 0 : w = width;
647 :
648 0 : while (w--)
649 0 : *dst++ = (*src++) | 0xff000000;
650 : }
651 0 : }
652 :
653 : #if 0
654 : static void
655 : fast_composite_over_8888_0888 (pixman_implementation_t *imp,
656 : pixman_composite_info_t *info)
657 : {
658 : PIXMAN_COMPOSITE_ARGS (info);
659 : uint8_t *dst_line, *dst;
660 : uint32_t d;
661 : uint32_t *src_line, *src, s;
662 : uint8_t a;
663 : int dst_stride, src_stride;
664 : int32_t w;
665 :
666 : PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
667 : PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
668 :
669 : while (height--)
670 : {
671 : dst = dst_line;
672 : dst_line += dst_stride;
673 : src = src_line;
674 : src_line += src_stride;
675 : w = width;
676 :
677 : while (w--)
678 : {
679 : s = *src++;
680 : a = s >> 24;
681 : if (a)
682 : {
683 : if (a == 0xff)
684 : d = s;
685 : else
686 : d = over (s, fetch_24 (dst));
687 :
688 : store_24 (dst, d);
689 : }
690 : dst += 3;
691 : }
692 : }
693 : }
694 : #endif
695 :
696 : static void
697 0 : fast_composite_over_8888_0565 (pixman_implementation_t *imp,
698 : pixman_composite_info_t *info)
699 : {
700 0 : PIXMAN_COMPOSITE_ARGS (info);
701 : uint16_t *dst_line, *dst;
702 : uint32_t d;
703 : uint32_t *src_line, *src, s;
704 : uint8_t a;
705 : int dst_stride, src_stride;
706 : int32_t w;
707 :
708 0 : PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
709 0 : PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
710 :
711 0 : while (height--)
712 : {
713 0 : dst = dst_line;
714 0 : dst_line += dst_stride;
715 0 : src = src_line;
716 0 : src_line += src_stride;
717 0 : w = width;
718 :
719 0 : while (w--)
720 : {
721 0 : s = *src++;
722 0 : a = s >> 24;
723 0 : if (s)
724 : {
725 0 : if (a == 0xff)
726 : {
727 0 : d = s;
728 : }
729 : else
730 : {
731 0 : d = *dst;
732 0 : d = over (s, convert_0565_to_0888 (d));
733 : }
734 0 : *dst = convert_8888_to_0565 (d);
735 : }
736 0 : dst++;
737 : }
738 : }
739 0 : }
740 :
741 : static void
742 0 : fast_composite_add_8_8 (pixman_implementation_t *imp,
743 : pixman_composite_info_t *info)
744 : {
745 0 : PIXMAN_COMPOSITE_ARGS (info);
746 : uint8_t *dst_line, *dst;
747 : uint8_t *src_line, *src;
748 : int dst_stride, src_stride;
749 : int32_t w;
750 : uint8_t s, d;
751 : uint16_t t;
752 :
753 0 : PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
754 0 : PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
755 :
756 0 : while (height--)
757 : {
758 0 : dst = dst_line;
759 0 : dst_line += dst_stride;
760 0 : src = src_line;
761 0 : src_line += src_stride;
762 0 : w = width;
763 :
764 0 : while (w--)
765 : {
766 0 : s = *src++;
767 0 : if (s)
768 : {
769 0 : if (s != 0xff)
770 : {
771 0 : d = *dst;
772 0 : t = d + s;
773 0 : s = t | (0 - (t >> 8));
774 : }
775 0 : *dst = s;
776 : }
777 0 : dst++;
778 : }
779 : }
780 0 : }
781 :
782 : static void
783 0 : fast_composite_add_0565_0565 (pixman_implementation_t *imp,
784 : pixman_composite_info_t *info)
785 : {
786 0 : PIXMAN_COMPOSITE_ARGS (info);
787 : uint16_t *dst_line, *dst;
788 : uint32_t d;
789 : uint16_t *src_line, *src;
790 : uint32_t s;
791 : int dst_stride, src_stride;
792 : int32_t w;
793 :
794 0 : PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint16_t, src_stride, src_line, 1);
795 0 : PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
796 :
797 0 : while (height--)
798 : {
799 0 : dst = dst_line;
800 0 : dst_line += dst_stride;
801 0 : src = src_line;
802 0 : src_line += src_stride;
803 0 : w = width;
804 :
805 0 : while (w--)
806 : {
807 0 : s = *src++;
808 0 : if (s)
809 : {
810 0 : d = *dst;
811 0 : s = convert_0565_to_8888 (s);
812 0 : if (d)
813 : {
814 0 : d = convert_0565_to_8888 (d);
815 0 : UN8x4_ADD_UN8x4 (s, d);
816 : }
817 0 : *dst = convert_8888_to_0565 (s);
818 : }
819 0 : dst++;
820 : }
821 : }
822 0 : }
823 :
824 : static void
825 0 : fast_composite_add_8888_8888 (pixman_implementation_t *imp,
826 : pixman_composite_info_t *info)
827 : {
828 0 : PIXMAN_COMPOSITE_ARGS (info);
829 : uint32_t *dst_line, *dst;
830 : uint32_t *src_line, *src;
831 : int dst_stride, src_stride;
832 : int32_t w;
833 : uint32_t s, d;
834 :
835 0 : PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
836 0 : PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
837 :
838 0 : while (height--)
839 : {
840 0 : dst = dst_line;
841 0 : dst_line += dst_stride;
842 0 : src = src_line;
843 0 : src_line += src_stride;
844 0 : w = width;
845 :
846 0 : while (w--)
847 : {
848 0 : s = *src++;
849 0 : if (s)
850 : {
851 0 : if (s != 0xffffffff)
852 : {
853 0 : d = *dst;
854 0 : if (d)
855 0 : UN8x4_ADD_UN8x4 (s, d);
856 : }
857 0 : *dst = s;
858 : }
859 0 : dst++;
860 : }
861 : }
862 0 : }
863 :
864 : static void
865 0 : fast_composite_add_n_8_8 (pixman_implementation_t *imp,
866 : pixman_composite_info_t *info)
867 : {
868 0 : PIXMAN_COMPOSITE_ARGS (info);
869 : uint8_t *dst_line, *dst;
870 : uint8_t *mask_line, *mask;
871 : int dst_stride, mask_stride;
872 : int32_t w;
873 : uint32_t src;
874 : uint8_t sa;
875 :
876 0 : PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
877 0 : PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
878 0 : src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
879 0 : sa = (src >> 24);
880 :
881 0 : while (height--)
882 : {
883 0 : dst = dst_line;
884 0 : dst_line += dst_stride;
885 0 : mask = mask_line;
886 0 : mask_line += mask_stride;
887 0 : w = width;
888 :
889 0 : while (w--)
890 : {
891 : uint16_t tmp;
892 : uint16_t a;
893 : uint32_t m, d;
894 : uint32_t r;
895 :
896 0 : a = *mask++;
897 0 : d = *dst;
898 :
899 0 : m = MUL_UN8 (sa, a, tmp);
900 0 : r = ADD_UN8 (m, d, tmp);
901 :
902 0 : *dst++ = r;
903 : }
904 : }
905 0 : }
906 :
907 : #ifdef WORDS_BIGENDIAN
908 : #define CREATE_BITMASK(n) (0x80000000 >> (n))
909 : #define UPDATE_BITMASK(n) ((n) >> 1)
910 : #else
911 : #define CREATE_BITMASK(n) (1 << (n))
912 : #define UPDATE_BITMASK(n) ((n) << 1)
913 : #endif
914 :
915 : #define TEST_BIT(p, n) \
916 : (*((p) + ((n) >> 5)) & CREATE_BITMASK ((n) & 31))
917 : #define SET_BIT(p, n) \
918 : do { *((p) + ((n) >> 5)) |= CREATE_BITMASK ((n) & 31); } while (0);
919 :
920 : static void
921 0 : fast_composite_add_1_1 (pixman_implementation_t *imp,
922 : pixman_composite_info_t *info)
923 : {
924 0 : PIXMAN_COMPOSITE_ARGS (info);
925 : uint32_t *dst_line, *dst;
926 : uint32_t *src_line, *src;
927 : int dst_stride, src_stride;
928 : int32_t w;
929 :
930 0 : PIXMAN_IMAGE_GET_LINE (src_image, 0, src_y, uint32_t,
931 : src_stride, src_line, 1);
932 0 : PIXMAN_IMAGE_GET_LINE (dest_image, 0, dest_y, uint32_t,
933 : dst_stride, dst_line, 1);
934 :
935 0 : while (height--)
936 : {
937 0 : dst = dst_line;
938 0 : dst_line += dst_stride;
939 0 : src = src_line;
940 0 : src_line += src_stride;
941 0 : w = width;
942 :
943 0 : while (w--)
944 : {
945 : /*
946 : * TODO: improve performance by processing uint32_t data instead
947 : * of individual bits
948 : */
949 0 : if (TEST_BIT (src, src_x + w))
950 0 : SET_BIT (dst, dest_x + w);
951 : }
952 : }
953 0 : }
954 :
955 : static void
956 0 : fast_composite_over_n_1_8888 (pixman_implementation_t *imp,
957 : pixman_composite_info_t *info)
958 : {
959 0 : PIXMAN_COMPOSITE_ARGS (info);
960 : uint32_t src, srca;
961 : uint32_t *dst, *dst_line;
962 : uint32_t *mask, *mask_line;
963 : int mask_stride, dst_stride;
964 : uint32_t bitcache, bitmask;
965 : int32_t w;
966 :
967 0 : if (width <= 0)
968 0 : return;
969 :
970 0 : src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
971 0 : srca = src >> 24;
972 0 : if (src == 0)
973 0 : return;
974 :
975 0 : PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t,
976 : dst_stride, dst_line, 1);
977 0 : PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
978 : mask_stride, mask_line, 1);
979 0 : mask_line += mask_x >> 5;
980 :
981 0 : if (srca == 0xff)
982 : {
983 0 : while (height--)
984 : {
985 0 : dst = dst_line;
986 0 : dst_line += dst_stride;
987 0 : mask = mask_line;
988 0 : mask_line += mask_stride;
989 0 : w = width;
990 :
991 0 : bitcache = *mask++;
992 0 : bitmask = CREATE_BITMASK (mask_x & 31);
993 :
994 0 : while (w--)
995 : {
996 0 : if (bitmask == 0)
997 : {
998 0 : bitcache = *mask++;
999 0 : bitmask = CREATE_BITMASK (0);
1000 : }
1001 0 : if (bitcache & bitmask)
1002 0 : *dst = src;
1003 0 : bitmask = UPDATE_BITMASK (bitmask);
1004 0 : dst++;
1005 : }
1006 : }
1007 : }
1008 : else
1009 : {
1010 0 : while (height--)
1011 : {
1012 0 : dst = dst_line;
1013 0 : dst_line += dst_stride;
1014 0 : mask = mask_line;
1015 0 : mask_line += mask_stride;
1016 0 : w = width;
1017 :
1018 0 : bitcache = *mask++;
1019 0 : bitmask = CREATE_BITMASK (mask_x & 31);
1020 :
1021 0 : while (w--)
1022 : {
1023 0 : if (bitmask == 0)
1024 : {
1025 0 : bitcache = *mask++;
1026 0 : bitmask = CREATE_BITMASK (0);
1027 : }
1028 0 : if (bitcache & bitmask)
1029 0 : *dst = over (src, *dst);
1030 0 : bitmask = UPDATE_BITMASK (bitmask);
1031 0 : dst++;
1032 : }
1033 : }
1034 : }
1035 : }
1036 :
1037 : static void
1038 0 : fast_composite_over_n_1_0565 (pixman_implementation_t *imp,
1039 : pixman_composite_info_t *info)
1040 : {
1041 0 : PIXMAN_COMPOSITE_ARGS (info);
1042 : uint32_t src, srca;
1043 : uint16_t *dst, *dst_line;
1044 : uint32_t *mask, *mask_line;
1045 : int mask_stride, dst_stride;
1046 : uint32_t bitcache, bitmask;
1047 : int32_t w;
1048 : uint32_t d;
1049 : uint16_t src565;
1050 :
1051 0 : if (width <= 0)
1052 0 : return;
1053 :
1054 0 : src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
1055 0 : srca = src >> 24;
1056 0 : if (src == 0)
1057 0 : return;
1058 :
1059 0 : PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t,
1060 : dst_stride, dst_line, 1);
1061 0 : PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
1062 : mask_stride, mask_line, 1);
1063 0 : mask_line += mask_x >> 5;
1064 :
1065 0 : if (srca == 0xff)
1066 : {
1067 0 : src565 = convert_8888_to_0565 (src);
1068 0 : while (height--)
1069 : {
1070 0 : dst = dst_line;
1071 0 : dst_line += dst_stride;
1072 0 : mask = mask_line;
1073 0 : mask_line += mask_stride;
1074 0 : w = width;
1075 :
1076 0 : bitcache = *mask++;
1077 0 : bitmask = CREATE_BITMASK (mask_x & 31);
1078 :
1079 0 : while (w--)
1080 : {
1081 0 : if (bitmask == 0)
1082 : {
1083 0 : bitcache = *mask++;
1084 0 : bitmask = CREATE_BITMASK (0);
1085 : }
1086 0 : if (bitcache & bitmask)
1087 0 : *dst = src565;
1088 0 : bitmask = UPDATE_BITMASK (bitmask);
1089 0 : dst++;
1090 : }
1091 : }
1092 : }
1093 : else
1094 : {
1095 0 : while (height--)
1096 : {
1097 0 : dst = dst_line;
1098 0 : dst_line += dst_stride;
1099 0 : mask = mask_line;
1100 0 : mask_line += mask_stride;
1101 0 : w = width;
1102 :
1103 0 : bitcache = *mask++;
1104 0 : bitmask = CREATE_BITMASK (mask_x & 31);
1105 :
1106 0 : while (w--)
1107 : {
1108 0 : if (bitmask == 0)
1109 : {
1110 0 : bitcache = *mask++;
1111 0 : bitmask = CREATE_BITMASK (0);
1112 : }
1113 0 : if (bitcache & bitmask)
1114 : {
1115 0 : d = over (src, convert_0565_to_0888 (*dst));
1116 0 : *dst = convert_8888_to_0565 (d);
1117 : }
1118 0 : bitmask = UPDATE_BITMASK (bitmask);
1119 0 : dst++;
1120 : }
1121 : }
1122 : }
1123 : }
1124 :
1125 : /*
1126 : * Simple bitblt
1127 : */
1128 :
1129 : static void
1130 0 : fast_composite_solid_fill (pixman_implementation_t *imp,
1131 : pixman_composite_info_t *info)
1132 : {
1133 0 : PIXMAN_COMPOSITE_ARGS (info);
1134 : uint32_t src;
1135 :
1136 0 : src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
1137 :
1138 0 : if (dest_image->bits.format == PIXMAN_a1)
1139 : {
1140 0 : src = src >> 31;
1141 : }
1142 0 : else if (dest_image->bits.format == PIXMAN_a8)
1143 : {
1144 0 : src = src >> 24;
1145 : }
1146 0 : else if (dest_image->bits.format == PIXMAN_r5g6b5 ||
1147 0 : dest_image->bits.format == PIXMAN_b5g6r5)
1148 : {
1149 0 : src = convert_8888_to_0565 (src);
1150 : }
1151 :
1152 0 : pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride,
1153 0 : PIXMAN_FORMAT_BPP (dest_image->bits.format),
1154 : dest_x, dest_y,
1155 : width, height,
1156 : src);
1157 0 : }
1158 :
1159 : static void
1160 0 : fast_composite_src_memcpy (pixman_implementation_t *imp,
1161 : pixman_composite_info_t *info)
1162 : {
1163 0 : PIXMAN_COMPOSITE_ARGS (info);
1164 0 : int bpp = PIXMAN_FORMAT_BPP (dest_image->bits.format) / 8;
1165 0 : uint32_t n_bytes = width * bpp;
1166 : int dst_stride, src_stride;
1167 : uint8_t *dst;
1168 : uint8_t *src;
1169 :
1170 0 : src_stride = src_image->bits.rowstride * 4;
1171 0 : dst_stride = dest_image->bits.rowstride * 4;
1172 :
1173 0 : src = (uint8_t *)src_image->bits.bits + src_y * src_stride + src_x * bpp;
1174 0 : dst = (uint8_t *)dest_image->bits.bits + dest_y * dst_stride + dest_x * bpp;
1175 :
1176 0 : while (height--)
1177 : {
1178 0 : memcpy (dst, src, n_bytes);
1179 :
1180 0 : dst += dst_stride;
1181 0 : src += src_stride;
1182 : }
1183 0 : }
1184 :
1185 0 : FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, SRC, COVER)
1186 0 : FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, SRC, NONE)
1187 0 : FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, SRC, PAD)
1188 0 : FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, SRC, NORMAL)
1189 0 : FAST_NEAREST (x888_8888_cover, x888, 8888, uint32_t, uint32_t, SRC, COVER)
1190 0 : FAST_NEAREST (x888_8888_pad, x888, 8888, uint32_t, uint32_t, SRC, PAD)
1191 0 : FAST_NEAREST (x888_8888_normal, x888, 8888, uint32_t, uint32_t, SRC, NORMAL)
1192 0 : FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, OVER, COVER)
1193 0 : FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, OVER, NONE)
1194 0 : FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, OVER, PAD)
1195 0 : FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, OVER, NORMAL)
1196 0 : FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, SRC, COVER)
1197 0 : FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE)
1198 0 : FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD)
1199 0 : FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL)
1200 0 : FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL)
1201 0 : FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER)
1202 0 : FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE)
1203 0 : FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD)
1204 0 : FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL)
1205 :
1206 : static force_inline void
1207 : scaled_bilinear_scanline_8888_565_OVER (uint16_t * dst,
1208 : const uint32_t * mask,
1209 : const uint32_t * src_top,
1210 : const uint32_t * src_bottom,
1211 : int32_t w,
1212 : int wt,
1213 : int wb,
1214 : pixman_fixed_t vx,
1215 : pixman_fixed_t unit_x,
1216 : pixman_fixed_t max_vx,
1217 : pixman_bool_t zero_src)
1218 : {
1219 0 : while ((w -= 1) >= 0)
1220 : {
1221 0 : uint32_t tl = src_top [pixman_fixed_to_int (vx)];
1222 0 : uint32_t tr = src_top [pixman_fixed_to_int (vx) + 1];
1223 0 : uint32_t bl = src_bottom [pixman_fixed_to_int (vx)];
1224 0 : uint32_t br = src_bottom [pixman_fixed_to_int (vx) + 1];
1225 : uint32_t src, result;
1226 : uint16_t d;
1227 0 : d = *dst;
1228 0 : src = bilinear_interpolation (tl, tr,
1229 : bl, br,
1230 : pixman_fixed_to_bilinear_weight(vx),
1231 : wb);
1232 0 : vx += unit_x;
1233 0 : result = over (src, convert_0565_to_0888 (d));
1234 0 : *dst++ = convert_8888_to_0565 (result);
1235 : }
1236 : }
1237 :
1238 : static force_inline void
1239 : scaled_bilinear_scanline_8888_8888_OVER (uint32_t * dst,
1240 : const uint32_t * mask,
1241 : const uint32_t * src_top,
1242 : const uint32_t * src_bottom,
1243 : int32_t w,
1244 : int wt,
1245 : int wb,
1246 : pixman_fixed_t vx,
1247 : pixman_fixed_t unit_x,
1248 : pixman_fixed_t max_vx,
1249 : pixman_bool_t zero_src)
1250 : {
1251 0 : while ((w -= 1) >= 0)
1252 : {
1253 0 : uint32_t tl = src_top [pixman_fixed_to_int (vx)];
1254 0 : uint32_t tr = src_top [pixman_fixed_to_int (vx) + 1];
1255 0 : uint32_t bl = src_bottom [pixman_fixed_to_int (vx)];
1256 0 : uint32_t br = src_bottom [pixman_fixed_to_int (vx) + 1];
1257 : uint32_t src;
1258 : uint32_t d;
1259 : uint32_t result;
1260 0 : d = *dst;
1261 0 : src = bilinear_interpolation (tl, tr,
1262 : bl, br,
1263 : pixman_fixed_to_bilinear_weight(vx),
1264 : wb);
1265 0 : vx += unit_x;
1266 0 : *dst++ = over (src, d);
1267 : }
1268 : }
1269 :
1270 : #ifndef LOWER_QUALITY_INTERPOLATION
1271 :
1272 : static force_inline void
1273 : scaled_bilinear_scanline_565_565_SRC (uint16_t * dst,
1274 : const uint32_t * mask,
1275 : const uint16_t * src_top,
1276 : const uint16_t * src_bottom,
1277 : int32_t w,
1278 : int wt,
1279 : int wb,
1280 : pixman_fixed_t vx,
1281 : pixman_fixed_t unit_x,
1282 : pixman_fixed_t max_vx,
1283 : pixman_bool_t zero_src)
1284 : {
1285 0 : while ((w -= 1) >= 0)
1286 : {
1287 0 : uint16_t tl = src_top [pixman_fixed_to_int (vx)];
1288 0 : uint16_t tr = src_top [pixman_fixed_to_int (vx) + 1];
1289 0 : uint16_t bl = src_bottom [pixman_fixed_to_int (vx)];
1290 0 : uint16_t br = src_bottom [pixman_fixed_to_int (vx) + 1];
1291 : uint32_t d;
1292 0 : d = bilinear_interpolation(convert_0565_to_8888 (tl),
1293 : convert_0565_to_8888 (tr),
1294 : convert_0565_to_8888 (bl),
1295 : convert_0565_to_8888 (br),
1296 : pixman_fixed_to_bilinear_weight (vx),
1297 : wb);
1298 0 : vx += unit_x;
1299 0 : *dst++ = convert_8888_to_0565 (d);
1300 : }
1301 : }
1302 :
1303 : #else
1304 :
1305 : /* This is a clever low resolution bilinear interpolation inspired by the code
1306 : in Skia */
1307 :
1308 : /* This takes the green component from the 565 representation and moves it:
1309 : 00000000 00000000 rrrrrggg gggbbbbb
1310 :
1311 : 00000ggg ggg00000 rrrrr000 000bbbbb
1312 :
1313 : This gives us 5 extra bits of space before each component to let us do
1314 : SWAR style optimizations
1315 : */
1316 :
1317 : #define GREEN_MASK (((1 << 6) - 1) << 5)
1318 :
1319 : static inline uint32_t
1320 : expand_rgb_565 (uint16_t c) {
1321 : return ((c & GREEN_MASK) << 16) | (c & ~GREEN_MASK);
1322 : }
1323 :
1324 : static inline uint16_t
1325 : compact_rgb_565 (uint32_t c) {
1326 : return ((c >> 16) & GREEN_MASK) | (c & ~GREEN_MASK);
1327 : }
1328 :
1329 : static inline uint16_t
1330 : bilinear_interpolation_565(uint16_t tl, uint16_t tr,
1331 : uint16_t bl, uint16_t br,
1332 : int x, int y)
1333 : {
1334 : int xy;
1335 : uint32_t a00 = expand_rgb_565 (tl);
1336 : uint32_t a01 = expand_rgb_565 (tr);
1337 : uint32_t a10 = expand_rgb_565 (bl);
1338 : uint32_t a11 = expand_rgb_565 (br);
1339 :
1340 : xy = (x * y) >> 3;
1341 : return compact_rgb_565 ((a00 * (32 - 2*y - 2*x + xy) +
1342 : a01 * (2*x - xy) +
1343 : a10 * (2*y - xy) +
1344 : a11 * xy) >> 5);
1345 : }
1346 :
1347 : static force_inline void
1348 : scaled_bilinear_scanline_565_565_SRC (uint16_t * dst,
1349 : const uint32_t * mask,
1350 : const uint16_t * src_top,
1351 : const uint16_t * src_bottom,
1352 : int32_t w,
1353 : int wt,
1354 : int wb,
1355 : pixman_fixed_t vx,
1356 : pixman_fixed_t unit_x,
1357 : pixman_fixed_t max_vx,
1358 : pixman_bool_t zero_src)
1359 : {
1360 : while ((w -= 1) >= 0)
1361 : {
1362 : uint16_t tl = src_top [pixman_fixed_to_int (vx)];
1363 : uint16_t tr = src_top [pixman_fixed_to_int (vx) + 1];
1364 : uint16_t bl = src_bottom [pixman_fixed_to_int (vx)];
1365 : uint16_t br = src_bottom [pixman_fixed_to_int (vx) + 1];
1366 :
1367 : uint16_t d = bilinear_interpolation_565 (tl, tr, bl, br,
1368 : pixman_fixed_to_bilinear_weight(vx),
1369 : wb);
1370 : vx += unit_x;
1371 : *dst++ = d;
1372 : }
1373 : }
1374 :
1375 : #endif
1376 :
1377 0 : FAST_BILINEAR_MAINLOOP_COMMON (565_565_cover_SRC,
1378 : scaled_bilinear_scanline_565_565_SRC, NULL,
1379 : uint16_t, uint32_t, uint16_t,
1380 : COVER, FLAG_NONE)
1381 0 : FAST_BILINEAR_MAINLOOP_COMMON (565_565_pad_SRC,
1382 : scaled_bilinear_scanline_565_565_SRC, NULL,
1383 : uint16_t, uint32_t, uint16_t,
1384 : PAD, FLAG_NONE)
1385 0 : FAST_BILINEAR_MAINLOOP_COMMON (565_565_none_SRC,
1386 : scaled_bilinear_scanline_565_565_SRC, NULL,
1387 : uint16_t, uint32_t, uint16_t,
1388 : NONE, FLAG_NONE)
1389 0 : FAST_BILINEAR_MAINLOOP_COMMON (565_565_normal_SRC,
1390 : scaled_bilinear_scanline_565_565_SRC, NULL,
1391 : uint16_t, uint32_t, uint16_t,
1392 : NORMAL, FLAG_NONE)
1393 :
1394 0 : FAST_BILINEAR_MAINLOOP_COMMON (8888_565_cover_OVER,
1395 : scaled_bilinear_scanline_8888_565_OVER, NULL,
1396 : uint32_t, uint32_t, uint16_t,
1397 : COVER, FLAG_NONE)
1398 0 : FAST_BILINEAR_MAINLOOP_COMMON (8888_565_pad_OVER,
1399 : scaled_bilinear_scanline_8888_565_OVER, NULL,
1400 : uint32_t, uint32_t, uint16_t,
1401 : PAD, FLAG_NONE)
1402 0 : FAST_BILINEAR_MAINLOOP_COMMON (8888_565_none_OVER,
1403 : scaled_bilinear_scanline_8888_565_OVER, NULL,
1404 : uint32_t, uint32_t, uint16_t,
1405 : NONE, FLAG_NONE)
1406 0 : FAST_BILINEAR_MAINLOOP_COMMON (8888_565_normal_OVER,
1407 : scaled_bilinear_scanline_8888_565_OVER, NULL,
1408 : uint32_t, uint32_t, uint16_t,
1409 : NORMAL, FLAG_NONE)
1410 :
1411 0 : FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_cover_OVER,
1412 : scaled_bilinear_scanline_8888_8888_OVER, NULL,
1413 : uint32_t, uint32_t, uint32_t,
1414 : COVER, FLAG_NONE)
1415 0 : FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_pad_OVER,
1416 : scaled_bilinear_scanline_8888_8888_OVER, NULL,
1417 : uint32_t, uint32_t, uint32_t,
1418 : PAD, FLAG_NONE)
1419 0 : FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_none_OVER,
1420 : scaled_bilinear_scanline_8888_8888_OVER, NULL,
1421 : uint32_t, uint32_t, uint32_t,
1422 : NONE, FLAG_NONE)
1423 0 : FAST_BILINEAR_MAINLOOP_COMMON (8888_8888_normal_OVER,
1424 : scaled_bilinear_scanline_8888_8888_OVER, NULL,
1425 : uint32_t, uint32_t, uint32_t,
1426 : NORMAL, FLAG_NONE)
1427 :
1428 : #define REPEAT_MIN_WIDTH 32
1429 :
1430 : static void
1431 0 : fast_composite_tiled_repeat (pixman_implementation_t *imp,
1432 : pixman_composite_info_t *info)
1433 : {
1434 0 : PIXMAN_COMPOSITE_ARGS (info);
1435 : pixman_composite_func_t func;
1436 : pixman_format_code_t mask_format;
1437 : uint32_t src_flags, mask_flags;
1438 : int32_t sx, sy;
1439 : int32_t width_remain;
1440 : int32_t num_pixels;
1441 : int32_t src_width;
1442 : int32_t i, j;
1443 : pixman_image_t extended_src_image;
1444 : uint32_t extended_src[REPEAT_MIN_WIDTH * 2];
1445 : pixman_bool_t need_src_extension;
1446 : uint32_t *src_line;
1447 : int32_t src_stride;
1448 : int32_t src_bpp;
1449 0 : pixman_composite_info_t info2 = *info;
1450 :
1451 0 : src_flags = (info->src_flags & ~FAST_PATH_NORMAL_REPEAT) |
1452 : FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
1453 :
1454 0 : if (mask_image)
1455 : {
1456 0 : mask_format = mask_image->common.extended_format_code;
1457 0 : mask_flags = info->mask_flags;
1458 : }
1459 : else
1460 : {
1461 0 : mask_format = PIXMAN_null;
1462 0 : mask_flags = FAST_PATH_IS_OPAQUE;
1463 : }
1464 :
1465 0 : _pixman_implementation_lookup_composite (
1466 0 : imp->toplevel, info->op,
1467 : src_image->common.extended_format_code, src_flags,
1468 : mask_format, mask_flags,
1469 : dest_image->common.extended_format_code, info->dest_flags,
1470 : &imp, &func);
1471 :
1472 0 : src_bpp = PIXMAN_FORMAT_BPP (src_image->bits.format);
1473 :
1474 0 : if (src_image->bits.width < REPEAT_MIN_WIDTH &&
1475 0 : (src_bpp == 32 || src_bpp == 16 || src_bpp == 8) &&
1476 0 : !src_image->bits.indexed)
1477 : {
1478 0 : sx = src_x;
1479 0 : sx = MOD (sx, src_image->bits.width);
1480 0 : sx += width;
1481 0 : src_width = 0;
1482 :
1483 0 : while (src_width < REPEAT_MIN_WIDTH && src_width <= sx)
1484 0 : src_width += src_image->bits.width;
1485 :
1486 0 : src_stride = (src_width * (src_bpp >> 3) + 3) / (int) sizeof (uint32_t);
1487 :
1488 : /* Initialize/validate stack-allocated temporary image */
1489 0 : _pixman_bits_image_init (&extended_src_image, src_image->bits.format,
1490 : src_width, 1, &extended_src[0], src_stride,
1491 : FALSE);
1492 0 : _pixman_image_validate (&extended_src_image);
1493 :
1494 0 : info2.src_image = &extended_src_image;
1495 0 : need_src_extension = TRUE;
1496 : }
1497 : else
1498 : {
1499 0 : src_width = src_image->bits.width;
1500 0 : need_src_extension = FALSE;
1501 : }
1502 :
1503 0 : sx = src_x;
1504 0 : sy = src_y;
1505 :
1506 0 : while (--height >= 0)
1507 : {
1508 0 : sx = MOD (sx, src_width);
1509 0 : sy = MOD (sy, src_image->bits.height);
1510 :
1511 0 : if (need_src_extension)
1512 : {
1513 0 : if (src_bpp == 32)
1514 : {
1515 0 : PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint32_t, src_stride, src_line, 1);
1516 :
1517 0 : for (i = 0; i < src_width; )
1518 : {
1519 0 : for (j = 0; j < src_image->bits.width; j++, i++)
1520 0 : extended_src[i] = src_line[j];
1521 : }
1522 : }
1523 0 : else if (src_bpp == 16)
1524 : {
1525 : uint16_t *src_line_16;
1526 :
1527 0 : PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint16_t, src_stride,
1528 : src_line_16, 1);
1529 0 : src_line = (uint32_t*)src_line_16;
1530 :
1531 0 : for (i = 0; i < src_width; )
1532 : {
1533 0 : for (j = 0; j < src_image->bits.width; j++, i++)
1534 0 : ((uint16_t*)extended_src)[i] = ((uint16_t*)src_line)[j];
1535 : }
1536 : }
1537 0 : else if (src_bpp == 8)
1538 : {
1539 : uint8_t *src_line_8;
1540 :
1541 0 : PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint8_t, src_stride,
1542 : src_line_8, 1);
1543 0 : src_line = (uint32_t*)src_line_8;
1544 :
1545 0 : for (i = 0; i < src_width; )
1546 : {
1547 0 : for (j = 0; j < src_image->bits.width; j++, i++)
1548 0 : ((uint8_t*)extended_src)[i] = ((uint8_t*)src_line)[j];
1549 : }
1550 : }
1551 :
1552 0 : info2.src_y = 0;
1553 : }
1554 : else
1555 : {
1556 0 : info2.src_y = sy;
1557 : }
1558 :
1559 0 : width_remain = width;
1560 :
1561 0 : while (width_remain > 0)
1562 : {
1563 0 : num_pixels = src_width - sx;
1564 :
1565 0 : if (num_pixels > width_remain)
1566 0 : num_pixels = width_remain;
1567 :
1568 0 : info2.src_x = sx;
1569 0 : info2.width = num_pixels;
1570 0 : info2.height = 1;
1571 :
1572 0 : func (imp, &info2);
1573 :
1574 0 : width_remain -= num_pixels;
1575 0 : info2.mask_x += num_pixels;
1576 0 : info2.dest_x += num_pixels;
1577 0 : sx = 0;
1578 : }
1579 :
1580 0 : sx = src_x;
1581 0 : sy++;
1582 0 : info2.mask_x = info->mask_x;
1583 0 : info2.mask_y++;
1584 0 : info2.dest_x = info->dest_x;
1585 0 : info2.dest_y++;
1586 : }
1587 :
1588 0 : if (need_src_extension)
1589 0 : _pixman_image_fini (&extended_src_image);
1590 0 : }
1591 :
1592 : /* Use more unrolling for src_0565_0565 because it is typically CPU bound */
1593 : static force_inline void
1594 : scaled_nearest_scanline_565_565_SRC (uint16_t * dst,
1595 : const uint16_t * src,
1596 : int32_t w,
1597 : pixman_fixed_t vx,
1598 : pixman_fixed_t unit_x,
1599 : pixman_fixed_t max_vx,
1600 : pixman_bool_t fully_transparent_src)
1601 : {
1602 : uint16_t tmp1, tmp2, tmp3, tmp4;
1603 0 : while ((w -= 4) >= 0)
1604 : {
1605 0 : tmp1 = *(src + pixman_fixed_to_int (vx));
1606 0 : vx += unit_x;
1607 0 : tmp2 = *(src + pixman_fixed_to_int (vx));
1608 0 : vx += unit_x;
1609 0 : tmp3 = *(src + pixman_fixed_to_int (vx));
1610 0 : vx += unit_x;
1611 0 : tmp4 = *(src + pixman_fixed_to_int (vx));
1612 0 : vx += unit_x;
1613 0 : *dst++ = tmp1;
1614 0 : *dst++ = tmp2;
1615 0 : *dst++ = tmp3;
1616 0 : *dst++ = tmp4;
1617 : }
1618 0 : if (w & 2)
1619 : {
1620 0 : tmp1 = *(src + pixman_fixed_to_int (vx));
1621 0 : vx += unit_x;
1622 0 : tmp2 = *(src + pixman_fixed_to_int (vx));
1623 0 : vx += unit_x;
1624 0 : *dst++ = tmp1;
1625 0 : *dst++ = tmp2;
1626 : }
1627 0 : if (w & 1)
1628 0 : *dst = *(src + pixman_fixed_to_int (vx));
1629 : }
1630 :
1631 0 : FAST_NEAREST_MAINLOOP (565_565_cover_SRC,
1632 : scaled_nearest_scanline_565_565_SRC,
1633 : uint16_t, uint16_t, COVER)
1634 0 : FAST_NEAREST_MAINLOOP (565_565_none_SRC,
1635 : scaled_nearest_scanline_565_565_SRC,
1636 : uint16_t, uint16_t, NONE)
1637 0 : FAST_NEAREST_MAINLOOP (565_565_pad_SRC,
1638 : scaled_nearest_scanline_565_565_SRC,
1639 : uint16_t, uint16_t, PAD)
1640 :
1641 : static force_inline uint32_t
1642 : fetch_nearest (pixman_repeat_t src_repeat,
1643 : pixman_format_code_t format,
1644 : uint32_t *src, int x, int src_width)
1645 : {
1646 0 : if (repeat (src_repeat, &x, src_width))
1647 : {
1648 0 : if (format == PIXMAN_x8r8g8b8 || format == PIXMAN_x8b8g8r8)
1649 0 : return *(src + x) | 0xff000000;
1650 : else
1651 0 : return *(src + x);
1652 : }
1653 : else
1654 : {
1655 0 : return 0;
1656 : }
1657 : }
1658 :
1659 : static force_inline void
1660 : combine_over (uint32_t s, uint32_t *dst)
1661 : {
1662 0 : if (s)
1663 : {
1664 0 : uint8_t ia = 0xff - (s >> 24);
1665 :
1666 0 : if (ia)
1667 0 : UN8x4_MUL_UN8_ADD_UN8x4 (*dst, ia, s);
1668 : else
1669 0 : *dst = s;
1670 : }
1671 : }
1672 :
1673 : static force_inline void
1674 : combine_src (uint32_t s, uint32_t *dst)
1675 : {
1676 0 : *dst = s;
1677 : }
1678 :
1679 : static void
1680 0 : fast_composite_scaled_nearest (pixman_implementation_t *imp,
1681 : pixman_composite_info_t *info)
1682 : {
1683 0 : PIXMAN_COMPOSITE_ARGS (info);
1684 : uint32_t *dst_line;
1685 : uint32_t *src_line;
1686 : int dst_stride, src_stride;
1687 : int src_width, src_height;
1688 : pixman_repeat_t src_repeat;
1689 : pixman_fixed_t unit_x, unit_y;
1690 : pixman_format_code_t src_format;
1691 : pixman_vector_t v;
1692 : pixman_fixed_t vy;
1693 :
1694 0 : PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
1695 : /* pass in 0 instead of src_x and src_y because src_x and src_y need to be
1696 : * transformed from destination space to source space
1697 : */
1698 0 : PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, uint32_t, src_stride, src_line, 1);
1699 :
1700 : /* reference point is the center of the pixel */
1701 0 : v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;
1702 0 : v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;
1703 0 : v.vector[2] = pixman_fixed_1;
1704 :
1705 0 : if (!pixman_transform_point_3d (src_image->common.transform, &v))
1706 0 : return;
1707 :
1708 0 : unit_x = src_image->common.transform->matrix[0][0];
1709 0 : unit_y = src_image->common.transform->matrix[1][1];
1710 :
1711 : /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */
1712 0 : v.vector[0] -= pixman_fixed_e;
1713 0 : v.vector[1] -= pixman_fixed_e;
1714 :
1715 0 : src_height = src_image->bits.height;
1716 0 : src_width = src_image->bits.width;
1717 0 : src_repeat = src_image->common.repeat;
1718 0 : src_format = src_image->bits.format;
1719 :
1720 0 : vy = v.vector[1];
1721 0 : while (height--)
1722 : {
1723 0 : pixman_fixed_t vx = v.vector[0];
1724 0 : int y = pixman_fixed_to_int (vy);
1725 0 : uint32_t *dst = dst_line;
1726 :
1727 0 : dst_line += dst_stride;
1728 :
1729 : /* adjust the y location by a unit vector in the y direction
1730 : * this is equivalent to transforming y+1 of the destination point to source space */
1731 0 : vy += unit_y;
1732 :
1733 0 : if (!repeat (src_repeat, &y, src_height))
1734 : {
1735 0 : if (op == PIXMAN_OP_SRC)
1736 0 : memset (dst, 0, sizeof (*dst) * width);
1737 : }
1738 : else
1739 : {
1740 0 : int w = width;
1741 :
1742 0 : uint32_t *src = src_line + y * src_stride;
1743 :
1744 0 : while (w >= 2)
1745 : {
1746 : uint32_t s1, s2;
1747 : int x1, x2;
1748 :
1749 0 : x1 = pixman_fixed_to_int (vx);
1750 0 : vx += unit_x;
1751 :
1752 0 : x2 = pixman_fixed_to_int (vx);
1753 0 : vx += unit_x;
1754 :
1755 0 : w -= 2;
1756 :
1757 0 : s1 = fetch_nearest (src_repeat, src_format, src, x1, src_width);
1758 0 : s2 = fetch_nearest (src_repeat, src_format, src, x2, src_width);
1759 :
1760 0 : if (op == PIXMAN_OP_OVER)
1761 : {
1762 0 : combine_over (s1, dst++);
1763 0 : combine_over (s2, dst++);
1764 : }
1765 : else
1766 : {
1767 0 : combine_src (s1, dst++);
1768 0 : combine_src (s2, dst++);
1769 : }
1770 : }
1771 :
1772 0 : while (w--)
1773 : {
1774 : uint32_t s;
1775 : int x;
1776 :
1777 0 : x = pixman_fixed_to_int (vx);
1778 0 : vx += unit_x;
1779 :
1780 0 : s = fetch_nearest (src_repeat, src_format, src, x, src_width);
1781 :
1782 0 : if (op == PIXMAN_OP_OVER)
1783 0 : combine_over (s, dst++);
1784 : else
1785 0 : combine_src (s, dst++);
1786 : }
1787 : }
1788 : }
1789 : }
1790 :
1791 : #define CACHE_LINE_SIZE 64
1792 :
1793 : #define FAST_SIMPLE_ROTATE(suffix, pix_type) \
1794 : \
1795 : static void \
1796 : blt_rotated_90_trivial_##suffix (pix_type *dst, \
1797 : int dst_stride, \
1798 : const pix_type *src, \
1799 : int src_stride, \
1800 : int w, \
1801 : int h) \
1802 : { \
1803 : int x, y; \
1804 : for (y = 0; y < h; y++) \
1805 : { \
1806 : const pix_type *s = src + (h - y - 1); \
1807 : pix_type *d = dst + dst_stride * y; \
1808 : for (x = 0; x < w; x++) \
1809 : { \
1810 : *d++ = *s; \
1811 : s += src_stride; \
1812 : } \
1813 : } \
1814 : } \
1815 : \
1816 : static void \
1817 : blt_rotated_270_trivial_##suffix (pix_type *dst, \
1818 : int dst_stride, \
1819 : const pix_type *src, \
1820 : int src_stride, \
1821 : int w, \
1822 : int h) \
1823 : { \
1824 : int x, y; \
1825 : for (y = 0; y < h; y++) \
1826 : { \
1827 : const pix_type *s = src + src_stride * (w - 1) + y; \
1828 : pix_type *d = dst + dst_stride * y; \
1829 : for (x = 0; x < w; x++) \
1830 : { \
1831 : *d++ = *s; \
1832 : s -= src_stride; \
1833 : } \
1834 : } \
1835 : } \
1836 : \
1837 : static void \
1838 : blt_rotated_90_##suffix (pix_type *dst, \
1839 : int dst_stride, \
1840 : const pix_type *src, \
1841 : int src_stride, \
1842 : int W, \
1843 : int H) \
1844 : { \
1845 : int x; \
1846 : int leading_pixels = 0, trailing_pixels = 0; \
1847 : const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type); \
1848 : \
1849 : /* \
1850 : * split processing into handling destination as TILE_SIZExH cache line \
1851 : * aligned vertical stripes (optimistically assuming that destination \
1852 : * stride is a multiple of cache line, if not - it will be just a bit \
1853 : * slower) \
1854 : */ \
1855 : \
1856 : if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1)) \
1857 : { \
1858 : leading_pixels = TILE_SIZE - (((uintptr_t)dst & \
1859 : (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
1860 : if (leading_pixels > W) \
1861 : leading_pixels = W; \
1862 : \
1863 : /* unaligned leading part NxH (where N < TILE_SIZE) */ \
1864 : blt_rotated_90_trivial_##suffix ( \
1865 : dst, \
1866 : dst_stride, \
1867 : src, \
1868 : src_stride, \
1869 : leading_pixels, \
1870 : H); \
1871 : \
1872 : dst += leading_pixels; \
1873 : src += leading_pixels * src_stride; \
1874 : W -= leading_pixels; \
1875 : } \
1876 : \
1877 : if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1)) \
1878 : { \
1879 : trailing_pixels = (((uintptr_t)(dst + W) & \
1880 : (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
1881 : if (trailing_pixels > W) \
1882 : trailing_pixels = W; \
1883 : W -= trailing_pixels; \
1884 : } \
1885 : \
1886 : for (x = 0; x < W; x += TILE_SIZE) \
1887 : { \
1888 : /* aligned middle part TILE_SIZExH */ \
1889 : blt_rotated_90_trivial_##suffix ( \
1890 : dst + x, \
1891 : dst_stride, \
1892 : src + src_stride * x, \
1893 : src_stride, \
1894 : TILE_SIZE, \
1895 : H); \
1896 : } \
1897 : \
1898 : if (trailing_pixels) \
1899 : { \
1900 : /* unaligned trailing part NxH (where N < TILE_SIZE) */ \
1901 : blt_rotated_90_trivial_##suffix ( \
1902 : dst + W, \
1903 : dst_stride, \
1904 : src + W * src_stride, \
1905 : src_stride, \
1906 : trailing_pixels, \
1907 : H); \
1908 : } \
1909 : } \
1910 : \
1911 : static void \
1912 : blt_rotated_270_##suffix (pix_type *dst, \
1913 : int dst_stride, \
1914 : const pix_type *src, \
1915 : int src_stride, \
1916 : int W, \
1917 : int H) \
1918 : { \
1919 : int x; \
1920 : int leading_pixels = 0, trailing_pixels = 0; \
1921 : const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type); \
1922 : \
1923 : /* \
1924 : * split processing into handling destination as TILE_SIZExH cache line \
1925 : * aligned vertical stripes (optimistically assuming that destination \
1926 : * stride is a multiple of cache line, if not - it will be just a bit \
1927 : * slower) \
1928 : */ \
1929 : \
1930 : if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1)) \
1931 : { \
1932 : leading_pixels = TILE_SIZE - (((uintptr_t)dst & \
1933 : (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
1934 : if (leading_pixels > W) \
1935 : leading_pixels = W; \
1936 : \
1937 : /* unaligned leading part NxH (where N < TILE_SIZE) */ \
1938 : blt_rotated_270_trivial_##suffix ( \
1939 : dst, \
1940 : dst_stride, \
1941 : src + src_stride * (W - leading_pixels), \
1942 : src_stride, \
1943 : leading_pixels, \
1944 : H); \
1945 : \
1946 : dst += leading_pixels; \
1947 : W -= leading_pixels; \
1948 : } \
1949 : \
1950 : if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1)) \
1951 : { \
1952 : trailing_pixels = (((uintptr_t)(dst + W) & \
1953 : (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
1954 : if (trailing_pixels > W) \
1955 : trailing_pixels = W; \
1956 : W -= trailing_pixels; \
1957 : src += trailing_pixels * src_stride; \
1958 : } \
1959 : \
1960 : for (x = 0; x < W; x += TILE_SIZE) \
1961 : { \
1962 : /* aligned middle part TILE_SIZExH */ \
1963 : blt_rotated_270_trivial_##suffix ( \
1964 : dst + x, \
1965 : dst_stride, \
1966 : src + src_stride * (W - x - TILE_SIZE), \
1967 : src_stride, \
1968 : TILE_SIZE, \
1969 : H); \
1970 : } \
1971 : \
1972 : if (trailing_pixels) \
1973 : { \
1974 : /* unaligned trailing part NxH (where N < TILE_SIZE) */ \
1975 : blt_rotated_270_trivial_##suffix ( \
1976 : dst + W, \
1977 : dst_stride, \
1978 : src - trailing_pixels * src_stride, \
1979 : src_stride, \
1980 : trailing_pixels, \
1981 : H); \
1982 : } \
1983 : } \
1984 : \
1985 : static void \
1986 : fast_composite_rotate_90_##suffix (pixman_implementation_t *imp, \
1987 : pixman_composite_info_t *info) \
1988 : { \
1989 : PIXMAN_COMPOSITE_ARGS (info); \
1990 : pix_type *dst_line; \
1991 : pix_type *src_line; \
1992 : int dst_stride, src_stride; \
1993 : int src_x_t, src_y_t; \
1994 : \
1995 : PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, pix_type, \
1996 : dst_stride, dst_line, 1); \
1997 : src_x_t = -src_y + pixman_fixed_to_int ( \
1998 : src_image->common.transform->matrix[0][2] + \
1999 : pixman_fixed_1 / 2 - pixman_fixed_e) - height;\
2000 : src_y_t = src_x + pixman_fixed_to_int ( \
2001 : src_image->common.transform->matrix[1][2] + \
2002 : pixman_fixed_1 / 2 - pixman_fixed_e); \
2003 : PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type, \
2004 : src_stride, src_line, 1); \
2005 : blt_rotated_90_##suffix (dst_line, dst_stride, src_line, src_stride, \
2006 : width, height); \
2007 : } \
2008 : \
2009 : static void \
2010 : fast_composite_rotate_270_##suffix (pixman_implementation_t *imp, \
2011 : pixman_composite_info_t *info) \
2012 : { \
2013 : PIXMAN_COMPOSITE_ARGS (info); \
2014 : pix_type *dst_line; \
2015 : pix_type *src_line; \
2016 : int dst_stride, src_stride; \
2017 : int src_x_t, src_y_t; \
2018 : \
2019 : PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, pix_type, \
2020 : dst_stride, dst_line, 1); \
2021 : src_x_t = src_y + pixman_fixed_to_int ( \
2022 : src_image->common.transform->matrix[0][2] + \
2023 : pixman_fixed_1 / 2 - pixman_fixed_e); \
2024 : src_y_t = -src_x + pixman_fixed_to_int ( \
2025 : src_image->common.transform->matrix[1][2] + \
2026 : pixman_fixed_1 / 2 - pixman_fixed_e) - width; \
2027 : PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type, \
2028 : src_stride, src_line, 1); \
2029 : blt_rotated_270_##suffix (dst_line, dst_stride, src_line, src_stride, \
2030 : width, height); \
2031 : }
2032 :
2033 0 : FAST_SIMPLE_ROTATE (8, uint8_t)
2034 0 : FAST_SIMPLE_ROTATE (565, uint16_t)
2035 0 : FAST_SIMPLE_ROTATE (8888, uint32_t)
2036 :
2037 : static const pixman_fast_path_t c_fast_paths[] =
2038 : {
2039 : PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, fast_composite_over_n_8_0565),
2040 : PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, fast_composite_over_n_8_0565),
2041 : PIXMAN_STD_FAST_PATH (OVER, solid, a8, r8g8b8, fast_composite_over_n_8_0888),
2042 : PIXMAN_STD_FAST_PATH (OVER, solid, a8, b8g8r8, fast_composite_over_n_8_0888),
2043 : PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, fast_composite_over_n_8_8888),
2044 : PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, fast_composite_over_n_8_8888),
2045 : PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, fast_composite_over_n_8_8888),
2046 : PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, fast_composite_over_n_8_8888),
2047 : PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8r8g8b8, fast_composite_over_n_1_8888),
2048 : PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8r8g8b8, fast_composite_over_n_1_8888),
2049 : PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8b8g8r8, fast_composite_over_n_1_8888),
2050 : PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8b8g8r8, fast_composite_over_n_1_8888),
2051 : PIXMAN_STD_FAST_PATH (OVER, solid, a1, r5g6b5, fast_composite_over_n_1_0565),
2052 : PIXMAN_STD_FAST_PATH (OVER, solid, a1, b5g6r5, fast_composite_over_n_1_0565),
2053 : PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, fast_composite_over_n_8888_8888_ca),
2054 : PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, fast_composite_over_n_8888_8888_ca),
2055 : PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, fast_composite_over_n_8888_0565_ca),
2056 : PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, fast_composite_over_n_8888_8888_ca),
2057 : PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, fast_composite_over_n_8888_8888_ca),
2058 : PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, fast_composite_over_n_8888_0565_ca),
2059 : PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, fast_composite_over_x888_8_8888),
2060 : PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, fast_composite_over_x888_8_8888),
2061 : PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, fast_composite_over_x888_8_8888),
2062 : PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, fast_composite_over_x888_8_8888),
2063 : PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, fast_composite_over_8888_8888),
2064 : PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, fast_composite_over_8888_8888),
2065 : PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, fast_composite_over_8888_0565),
2066 : PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, fast_composite_over_8888_8888),
2067 : PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, fast_composite_over_8888_8888),
2068 : PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, fast_composite_over_8888_0565),
2069 : PIXMAN_STD_FAST_PATH (ADD, r5g6b5, null, r5g6b5, fast_composite_add_0565_0565),
2070 : PIXMAN_STD_FAST_PATH (ADD, b5g6r5, null, b5g6r5, fast_composite_add_0565_0565),
2071 : PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, fast_composite_add_8888_8888),
2072 : PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, fast_composite_add_8888_8888),
2073 : PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, fast_composite_add_8_8),
2074 : PIXMAN_STD_FAST_PATH (ADD, a1, null, a1, fast_composite_add_1_1),
2075 : PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, fast_composite_add_n_8888_8888_ca),
2076 : PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, fast_composite_add_n_8_8),
2077 : PIXMAN_STD_FAST_PATH (SRC, solid, null, a8r8g8b8, fast_composite_solid_fill),
2078 : PIXMAN_STD_FAST_PATH (SRC, solid, null, x8r8g8b8, fast_composite_solid_fill),
2079 : PIXMAN_STD_FAST_PATH (SRC, solid, null, a8b8g8r8, fast_composite_solid_fill),
2080 : PIXMAN_STD_FAST_PATH (SRC, solid, null, x8b8g8r8, fast_composite_solid_fill),
2081 : PIXMAN_STD_FAST_PATH (SRC, solid, null, a1, fast_composite_solid_fill),
2082 : PIXMAN_STD_FAST_PATH (SRC, solid, null, a8, fast_composite_solid_fill),
2083 : PIXMAN_STD_FAST_PATH (SRC, solid, null, r5g6b5, fast_composite_solid_fill),
2084 : PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, fast_composite_src_x888_8888),
2085 : PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, fast_composite_src_x888_8888),
2086 : PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
2087 : PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, fast_composite_src_memcpy),
2088 : PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
2089 : PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy),
2090 : PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, fast_composite_src_memcpy),
2091 : PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy),
2092 : PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8x8, fast_composite_src_memcpy),
2093 : PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8a8, fast_composite_src_memcpy),
2094 : PIXMAN_STD_FAST_PATH (SRC, b8g8r8x8, null, b8g8r8x8, fast_composite_src_memcpy),
2095 : PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, fast_composite_src_memcpy),
2096 : PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, fast_composite_src_memcpy),
2097 : PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, fast_composite_src_memcpy),
2098 : PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, b8g8r8, fast_composite_src_memcpy),
2099 : PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
2100 : PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
2101 : PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, fast_composite_src_memcpy),
2102 : PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8),
2103 : PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8),
2104 :
2105 : SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888_8888),
2106 : SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888_8888),
2107 : SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, 8888_8888),
2108 : SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, 8888_8888),
2109 :
2110 : SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888_8888),
2111 : SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, 8888_8888),
2112 :
2113 : SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, 8888_565),
2114 : SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, 8888_565),
2115 :
2116 : SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565),
2117 :
2118 : SIMPLE_NEAREST_FAST_PATH_COVER (SRC, x8r8g8b8, a8r8g8b8, x888_8888),
2119 : SIMPLE_NEAREST_FAST_PATH_COVER (SRC, x8b8g8r8, a8b8g8r8, x888_8888),
2120 : SIMPLE_NEAREST_FAST_PATH_PAD (SRC, x8r8g8b8, a8r8g8b8, x888_8888),
2121 : SIMPLE_NEAREST_FAST_PATH_PAD (SRC, x8b8g8r8, a8b8g8r8, x888_8888),
2122 : SIMPLE_NEAREST_FAST_PATH_NORMAL (SRC, x8r8g8b8, a8r8g8b8, x888_8888),
2123 : SIMPLE_NEAREST_FAST_PATH_NORMAL (SRC, x8b8g8r8, a8b8g8r8, x888_8888),
2124 :
2125 : SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, 8888_8888),
2126 : SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, 8888_8888),
2127 : SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888),
2128 : SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, 8888_8888),
2129 :
2130 : SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565),
2131 :
2132 : #define NEAREST_FAST_PATH(op,s,d) \
2133 : { PIXMAN_OP_ ## op, \
2134 : PIXMAN_ ## s, SCALED_NEAREST_FLAGS, \
2135 : PIXMAN_null, 0, \
2136 : PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
2137 : fast_composite_scaled_nearest, \
2138 : }
2139 :
2140 : NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8),
2141 : NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8),
2142 : NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8),
2143 : NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8),
2144 :
2145 : NEAREST_FAST_PATH (SRC, x8r8g8b8, a8r8g8b8),
2146 : NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8),
2147 : NEAREST_FAST_PATH (SRC, x8b8g8r8, a8b8g8r8),
2148 : NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8),
2149 :
2150 : NEAREST_FAST_PATH (OVER, x8r8g8b8, x8r8g8b8),
2151 : NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8),
2152 : NEAREST_FAST_PATH (OVER, x8b8g8r8, x8b8g8r8),
2153 : NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8),
2154 :
2155 : NEAREST_FAST_PATH (OVER, x8r8g8b8, a8r8g8b8),
2156 : NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8),
2157 : NEAREST_FAST_PATH (OVER, x8b8g8r8, a8b8g8r8),
2158 : NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8),
2159 :
2160 : #define SIMPLE_ROTATE_FLAGS(angle) \
2161 : (FAST_PATH_ROTATE_ ## angle ## _TRANSFORM | \
2162 : FAST_PATH_NEAREST_FILTER | \
2163 : FAST_PATH_SAMPLES_COVER_CLIP_NEAREST | \
2164 : FAST_PATH_STANDARD_FLAGS)
2165 :
2166 : #define SIMPLE_ROTATE_FAST_PATH(op,s,d,suffix) \
2167 : { PIXMAN_OP_ ## op, \
2168 : PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (90), \
2169 : PIXMAN_null, 0, \
2170 : PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
2171 : fast_composite_rotate_90_##suffix, \
2172 : }, \
2173 : { PIXMAN_OP_ ## op, \
2174 : PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (270), \
2175 : PIXMAN_null, 0, \
2176 : PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
2177 : fast_composite_rotate_270_##suffix, \
2178 : }
2179 :
2180 : SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888),
2181 : SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888),
2182 : SIMPLE_ROTATE_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888),
2183 : SIMPLE_ROTATE_FAST_PATH (SRC, r5g6b5, r5g6b5, 565),
2184 : SIMPLE_ROTATE_FAST_PATH (SRC, a8, a8, 8),
2185 :
2186 : /* Simple repeat fast path entry. */
2187 : { PIXMAN_OP_any,
2188 : PIXMAN_any,
2189 : (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | FAST_PATH_BITS_IMAGE |
2190 : FAST_PATH_NORMAL_REPEAT),
2191 : PIXMAN_any, 0,
2192 : PIXMAN_any, FAST_PATH_STD_DEST_FLAGS,
2193 : fast_composite_tiled_repeat
2194 : },
2195 :
2196 : SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565),
2197 : SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565),
2198 : SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888),
2199 :
2200 : { PIXMAN_OP_NONE },
2201 : };
2202 :
2203 : #ifdef WORDS_BIGENDIAN
2204 : #define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (32 - (offs) - (n)))
2205 : #else
2206 : #define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (offs))
2207 : #endif
2208 :
2209 : static force_inline void
2210 : pixman_fill1_line (uint32_t *dst, int offs, int width, int v)
2211 : {
2212 0 : if (offs)
2213 : {
2214 0 : int leading_pixels = 32 - offs;
2215 0 : if (leading_pixels >= width)
2216 : {
2217 0 : if (v)
2218 0 : *dst |= A1_FILL_MASK (width, offs);
2219 : else
2220 0 : *dst &= ~A1_FILL_MASK (width, offs);
2221 : return;
2222 : }
2223 : else
2224 : {
2225 0 : if (v)
2226 0 : *dst++ |= A1_FILL_MASK (leading_pixels, offs);
2227 : else
2228 0 : *dst++ &= ~A1_FILL_MASK (leading_pixels, offs);
2229 0 : width -= leading_pixels;
2230 : }
2231 : }
2232 0 : while (width >= 32)
2233 : {
2234 0 : if (v)
2235 0 : *dst++ = 0xFFFFFFFF;
2236 : else
2237 0 : *dst++ = 0;
2238 0 : width -= 32;
2239 : }
2240 0 : if (width > 0)
2241 : {
2242 0 : if (v)
2243 0 : *dst |= A1_FILL_MASK (width, 0);
2244 : else
2245 0 : *dst &= ~A1_FILL_MASK (width, 0);
2246 : }
2247 : }
2248 :
2249 : static void
2250 0 : pixman_fill1 (uint32_t *bits,
2251 : int stride,
2252 : int x,
2253 : int y,
2254 : int width,
2255 : int height,
2256 : uint32_t filler)
2257 : {
2258 0 : uint32_t *dst = bits + y * stride + (x >> 5);
2259 0 : int offs = x & 31;
2260 :
2261 0 : if (filler & 1)
2262 : {
2263 0 : while (height--)
2264 : {
2265 : pixman_fill1_line (dst, offs, width, 1);
2266 0 : dst += stride;
2267 : }
2268 : }
2269 : else
2270 : {
2271 0 : while (height--)
2272 : {
2273 : pixman_fill1_line (dst, offs, width, 0);
2274 0 : dst += stride;
2275 : }
2276 : }
2277 0 : }
2278 :
2279 : static void
2280 0 : pixman_fill8 (uint32_t *bits,
2281 : int stride,
2282 : int x,
2283 : int y,
2284 : int width,
2285 : int height,
2286 : uint32_t filler)
2287 : {
2288 0 : int byte_stride = stride * (int) sizeof (uint32_t);
2289 0 : uint8_t *dst = (uint8_t *) bits;
2290 0 : uint8_t v = filler & 0xff;
2291 : int i;
2292 :
2293 0 : dst = dst + y * byte_stride + x;
2294 :
2295 0 : while (height--)
2296 : {
2297 0 : for (i = 0; i < width; ++i)
2298 0 : dst[i] = v;
2299 :
2300 0 : dst += byte_stride;
2301 : }
2302 0 : }
2303 :
2304 : static void
2305 0 : pixman_fill16 (uint32_t *bits,
2306 : int stride,
2307 : int x,
2308 : int y,
2309 : int width,
2310 : int height,
2311 : uint32_t filler)
2312 : {
2313 0 : int short_stride =
2314 0 : (stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t);
2315 0 : uint16_t *dst = (uint16_t *)bits;
2316 0 : uint16_t v = filler & 0xffff;
2317 : int i;
2318 :
2319 0 : dst = dst + y * short_stride + x;
2320 :
2321 0 : while (height--)
2322 : {
2323 0 : for (i = 0; i < width; ++i)
2324 0 : dst[i] = v;
2325 :
2326 0 : dst += short_stride;
2327 : }
2328 0 : }
2329 :
2330 : static void
2331 0 : pixman_fill32 (uint32_t *bits,
2332 : int stride,
2333 : int x,
2334 : int y,
2335 : int width,
2336 : int height,
2337 : uint32_t filler)
2338 : {
2339 : int i;
2340 :
2341 0 : bits = bits + y * stride + x;
2342 :
2343 0 : while (height--)
2344 : {
2345 0 : for (i = 0; i < width; ++i)
2346 0 : bits[i] = filler;
2347 :
2348 0 : bits += stride;
2349 : }
2350 0 : }
2351 :
2352 : static pixman_bool_t
2353 0 : fast_path_fill (pixman_implementation_t *imp,
2354 : uint32_t * bits,
2355 : int stride,
2356 : int bpp,
2357 : int x,
2358 : int y,
2359 : int width,
2360 : int height,
2361 : uint32_t filler)
2362 : {
2363 0 : switch (bpp)
2364 : {
2365 : case 1:
2366 0 : pixman_fill1 (bits, stride, x, y, width, height, filler);
2367 0 : break;
2368 :
2369 : case 8:
2370 0 : pixman_fill8 (bits, stride, x, y, width, height, filler);
2371 0 : break;
2372 :
2373 : case 16:
2374 0 : pixman_fill16 (bits, stride, x, y, width, height, filler);
2375 0 : break;
2376 :
2377 : case 32:
2378 0 : pixman_fill32 (bits, stride, x, y, width, height, filler);
2379 0 : break;
2380 :
2381 : default:
2382 0 : return FALSE;
2383 : }
2384 :
2385 0 : return TRUE;
2386 : }
2387 :
2388 : /*****************************************************************************/
2389 :
2390 : static uint32_t *
2391 0 : fast_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask)
2392 : {
2393 0 : int32_t w = iter->width;
2394 0 : uint32_t *dst = iter->buffer;
2395 0 : const uint16_t *src = (const uint16_t *)iter->bits;
2396 :
2397 0 : iter->bits += iter->stride;
2398 :
2399 : /* Align the source buffer at 4 bytes boundary */
2400 0 : if (w > 0 && ((uintptr_t)src & 3))
2401 : {
2402 0 : *dst++ = convert_0565_to_8888 (*src++);
2403 0 : w--;
2404 : }
2405 : /* Process two pixels per iteration */
2406 0 : while ((w -= 2) >= 0)
2407 : {
2408 : uint32_t sr, sb, sg, t0, t1;
2409 0 : uint32_t s = *(const uint32_t *)src;
2410 0 : src += 2;
2411 0 : sr = (s >> 8) & 0x00F800F8;
2412 0 : sb = (s << 3) & 0x00F800F8;
2413 0 : sg = (s >> 3) & 0x00FC00FC;
2414 0 : sr |= sr >> 5;
2415 0 : sb |= sb >> 5;
2416 0 : sg |= sg >> 6;
2417 0 : t0 = ((sr << 16) & 0x00FF0000) | ((sg << 8) & 0x0000FF00) |
2418 0 : (sb & 0xFF) | 0xFF000000;
2419 0 : t1 = (sr & 0x00FF0000) | ((sg >> 8) & 0x0000FF00) |
2420 0 : (sb >> 16) | 0xFF000000;
2421 : #ifdef WORDS_BIGENDIAN
2422 : *dst++ = t1;
2423 : *dst++ = t0;
2424 : #else
2425 0 : *dst++ = t0;
2426 0 : *dst++ = t1;
2427 : #endif
2428 : }
2429 0 : if (w & 1)
2430 : {
2431 0 : *dst = convert_0565_to_8888 (*src);
2432 : }
2433 :
2434 0 : return iter->buffer;
2435 : }
2436 :
2437 : static uint32_t *
2438 0 : fast_dest_fetch_noop (pixman_iter_t *iter, const uint32_t *mask)
2439 : {
2440 0 : iter->bits += iter->stride;
2441 0 : return iter->buffer;
2442 : }
2443 :
2444 : /* Helper function for a workaround, which tries to ensure that 0x1F001F
2445 : * constant is always allocated in a register on RISC architectures.
2446 : */
2447 : static force_inline uint32_t
2448 : convert_8888_to_0565_workaround (uint32_t s, uint32_t x1F001F)
2449 : {
2450 : uint32_t a, b;
2451 0 : a = (s >> 3) & x1F001F;
2452 0 : b = s & 0xFC00;
2453 0 : a |= a >> 5;
2454 0 : a |= b >> 5;
2455 0 : return a;
2456 : }
2457 :
2458 : static void
2459 0 : fast_write_back_r5g6b5 (pixman_iter_t *iter)
2460 : {
2461 0 : int32_t w = iter->width;
2462 0 : uint16_t *dst = (uint16_t *)(iter->bits - iter->stride);
2463 0 : const uint32_t *src = iter->buffer;
2464 : /* Workaround to ensure that x1F001F variable is allocated in a register */
2465 : static volatile uint32_t volatile_x1F001F = 0x1F001F;
2466 0 : uint32_t x1F001F = volatile_x1F001F;
2467 :
2468 0 : while ((w -= 4) >= 0)
2469 : {
2470 0 : uint32_t s1 = *src++;
2471 0 : uint32_t s2 = *src++;
2472 0 : uint32_t s3 = *src++;
2473 0 : uint32_t s4 = *src++;
2474 0 : *dst++ = convert_8888_to_0565_workaround (s1, x1F001F);
2475 0 : *dst++ = convert_8888_to_0565_workaround (s2, x1F001F);
2476 0 : *dst++ = convert_8888_to_0565_workaround (s3, x1F001F);
2477 0 : *dst++ = convert_8888_to_0565_workaround (s4, x1F001F);
2478 : }
2479 0 : if (w & 2)
2480 : {
2481 0 : *dst++ = convert_8888_to_0565_workaround (*src++, x1F001F);
2482 0 : *dst++ = convert_8888_to_0565_workaround (*src++, x1F001F);
2483 : }
2484 0 : if (w & 1)
2485 : {
2486 0 : *dst = convert_8888_to_0565_workaround (*src, x1F001F);
2487 : }
2488 0 : }
2489 :
2490 : typedef struct
2491 : {
2492 : pixman_format_code_t format;
2493 : pixman_iter_get_scanline_t get_scanline;
2494 : pixman_iter_write_back_t write_back;
2495 : } fetcher_info_t;
2496 :
2497 : static const fetcher_info_t fetchers[] =
2498 : {
2499 : { PIXMAN_r5g6b5, fast_fetch_r5g6b5, fast_write_back_r5g6b5 },
2500 : { PIXMAN_null }
2501 : };
2502 :
2503 : static pixman_bool_t
2504 0 : fast_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
2505 : {
2506 0 : pixman_image_t *image = iter->image;
2507 :
2508 : #define FLAGS \
2509 : (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \
2510 : FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
2511 :
2512 0 : if (iter->iter_flags & ITER_16)
2513 0 : return FALSE;
2514 :
2515 0 : if ((iter->iter_flags & ITER_NARROW) &&
2516 0 : (iter->image_flags & FLAGS) == FLAGS)
2517 : {
2518 : const fetcher_info_t *f;
2519 :
2520 0 : for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
2521 : {
2522 0 : if (image->common.extended_format_code == f->format)
2523 : {
2524 0 : uint8_t *b = (uint8_t *)image->bits.bits;
2525 0 : int s = image->bits.rowstride * 4;
2526 :
2527 0 : iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8;
2528 0 : iter->stride = s;
2529 :
2530 0 : iter->get_scanline = f->get_scanline;
2531 0 : return TRUE;
2532 : }
2533 : }
2534 : }
2535 :
2536 0 : return FALSE;
2537 : }
2538 :
2539 : static pixman_bool_t
2540 0 : fast_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
2541 : {
2542 0 : pixman_image_t *image = iter->image;
2543 :
2544 0 : if (iter->iter_flags & ITER_16)
2545 0 : return FALSE;
2546 :
2547 0 : if ((iter->iter_flags & ITER_NARROW) &&
2548 0 : (iter->image_flags & FAST_PATH_STD_DEST_FLAGS) == FAST_PATH_STD_DEST_FLAGS)
2549 : {
2550 : const fetcher_info_t *f;
2551 :
2552 0 : for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
2553 : {
2554 0 : if (image->common.extended_format_code == f->format)
2555 : {
2556 0 : uint8_t *b = (uint8_t *)image->bits.bits;
2557 0 : int s = image->bits.rowstride * 4;
2558 :
2559 0 : iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8;
2560 0 : iter->stride = s;
2561 :
2562 0 : if ((iter->iter_flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) ==
2563 : (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA))
2564 : {
2565 0 : iter->get_scanline = fast_dest_fetch_noop;
2566 : }
2567 : else
2568 : {
2569 0 : iter->get_scanline = f->get_scanline;
2570 : }
2571 0 : iter->write_back = f->write_back;
2572 0 : return TRUE;
2573 : }
2574 : }
2575 : }
2576 0 : return FALSE;
2577 : }
2578 :
2579 :
2580 : pixman_implementation_t *
2581 1 : _pixman_implementation_create_fast_path (pixman_implementation_t *fallback)
2582 : {
2583 1 : pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths);
2584 :
2585 1 : imp->fill = fast_path_fill;
2586 1 : imp->src_iter_init = fast_src_iter_init;
2587 1 : imp->dest_iter_init = fast_dest_iter_init;
2588 :
2589 1 : return imp;
2590 : }
|