Line data Source code
1 : /* This Source Code Form is subject to the terms of the Mozilla Public
2 : * License, v. 2.0. If a copy of the MPL was not distributed with this
3 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4 :
5 : #include "Swizzle.h"
6 : #include "Logging.h"
7 : #include "Tools.h"
8 : #include "mozilla/CheckedInt.h"
9 : #include "mozilla/EndianUtils.h"
10 :
11 : #ifdef BUILD_ARM_NEON
12 : #include "mozilla/arm.h"
13 : #endif
14 :
15 : namespace mozilla {
16 : namespace gfx {
17 :
18 : /**
19 : * Convenience macros for dispatching to various format combinations.
20 : */
21 :
22 : // Hash the formats to a relatively dense value to optimize jump table generation.
23 : // The first 6 formats in SurfaceFormat are the 32-bit BGRA variants and are the most
24 : // common formats dispatched here. Room is reserved in the lowish bits for up to
25 : // these 6 destination formats. If a destination format is >= 6, the 6th bit is set
26 : // to avoid collisions.
27 : #define FORMAT_KEY(aSrcFormat, aDstFormat) \
28 : (int(aSrcFormat) * 6 + int(aDstFormat) + (int(int(aDstFormat) >= 6) << 6))
29 :
30 : #define FORMAT_CASE_EXPR(aSrcFormat, aDstFormat, ...) \
31 : case FORMAT_KEY(aSrcFormat, aDstFormat): \
32 : __VA_ARGS__; \
33 : return true;
34 :
35 : #define FORMAT_CASE(aSrcFormat, aDstFormat, ...) \
36 : FORMAT_CASE_EXPR(aSrcFormat, aDstFormat, FORMAT_CASE_CALL(__VA_ARGS__))
37 :
38 : /**
39 : * Constexpr functions for analyzing format attributes in templates.
40 : */
41 :
42 : // Whether B comes before R in pixel memory layout.
43 : static constexpr bool
44 : IsBGRFormat(SurfaceFormat aFormat)
45 : {
46 : return aFormat == SurfaceFormat::B8G8R8A8 ||
47 : #if MOZ_LITTLE_ENDIAN
48 : aFormat == SurfaceFormat::R5G6B5_UINT16 ||
49 : #endif
50 : aFormat == SurfaceFormat::B8G8R8X8 ||
51 : aFormat == SurfaceFormat::B8G8R8;
52 : }
53 :
54 : // Whether the order of B and R need to be swapped to map from src to dst.
55 : static constexpr bool
56 : ShouldSwapRB(SurfaceFormat aSrcFormat, SurfaceFormat aDstFormat)
57 : {
58 : return IsBGRFormat(aSrcFormat) != IsBGRFormat(aDstFormat);
59 : }
60 :
61 : // The starting byte of the RGB components in pixel memory.
62 : static constexpr uint32_t
63 : RGBByteIndex(SurfaceFormat aFormat)
64 : {
65 : return aFormat == SurfaceFormat::A8R8G8B8 ||
66 : aFormat == SurfaceFormat::X8R8G8B8
67 : ? 1 : 0;
68 : }
69 :
70 : // The byte of the alpha component, which just comes after RGB.
71 : static constexpr uint32_t
72 : AlphaByteIndex(SurfaceFormat aFormat)
73 : {
74 : return (RGBByteIndex(aFormat) + 3) % 4;
75 : }
76 :
77 : // The endian-dependent bit shift to access RGB of a UINT32 pixel.
78 : static constexpr uint32_t
79 : RGBBitShift(SurfaceFormat aFormat)
80 : {
81 : #if MOZ_LITTLE_ENDIAN
82 : return 8 * RGBByteIndex(aFormat);
83 : #else
84 : return 24 - 8 * RGBByteIndex(aFormat);
85 : #endif
86 : }
87 :
88 : // The endian-dependent bit shift to access alpha of a UINT32 pixel.
89 : static constexpr uint32_t
90 : AlphaBitShift(SurfaceFormat aFormat)
91 : {
92 : return (RGBBitShift(aFormat) + 24) % 32;
93 : }
94 :
95 : // Whether the pixel format should ignore the value of the alpha channel and treat it as opaque.
96 : static constexpr bool
97 : IgnoreAlpha(SurfaceFormat aFormat)
98 : {
99 : return aFormat == SurfaceFormat::B8G8R8X8 ||
100 : aFormat == SurfaceFormat::R8G8B8X8 ||
101 : aFormat == SurfaceFormat::X8R8G8B8;
102 : }
103 :
104 : // Whether to force alpha to opaque to map from src to dst.
105 : static constexpr bool
106 : ShouldForceOpaque(SurfaceFormat aSrcFormat, SurfaceFormat aDstFormat)
107 : {
108 : return IgnoreAlpha(aSrcFormat) != IgnoreAlpha(aDstFormat);
109 : }
110 :
111 : #ifdef USE_SSE2
112 : /**
113 : * SSE2 optimizations
114 : */
115 :
116 : template<bool aSwapRB, bool aOpaqueAlpha>
117 : void Premultiply_SSE2(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize);
118 :
119 : #define PREMULTIPLY_SSE2(aSrcFormat, aDstFormat) \
120 : FORMAT_CASE(aSrcFormat, aDstFormat, \
121 : Premultiply_SSE2 \
122 : <ShouldSwapRB(aSrcFormat, aDstFormat), \
123 : ShouldForceOpaque(aSrcFormat, aDstFormat)>)
124 :
125 : template<bool aSwapRB>
126 : void Unpremultiply_SSE2(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize);
127 :
128 : #define UNPREMULTIPLY_SSE2(aSrcFormat, aDstFormat) \
129 : FORMAT_CASE(aSrcFormat, aDstFormat, \
130 : Unpremultiply_SSE2<ShouldSwapRB(aSrcFormat, aDstFormat)>)
131 :
132 : template<bool aSwapRB, bool aOpaqueAlpha>
133 : void Swizzle_SSE2(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize);
134 :
135 : #define SWIZZLE_SSE2(aSrcFormat, aDstFormat) \
136 : FORMAT_CASE(aSrcFormat, aDstFormat, \
137 : Swizzle_SSE2 \
138 : <ShouldSwapRB(aSrcFormat, aDstFormat), \
139 : ShouldForceOpaque(aSrcFormat, aDstFormat)>)
140 :
141 : #endif
142 :
143 : #ifdef BUILD_ARM_NEON
144 : /**
145 : * ARM NEON optimizations
146 : */
147 :
148 : template<bool aSwapRB, bool aOpaqueAlpha>
149 : void Premultiply_NEON(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize);
150 :
151 : #define PREMULTIPLY_NEON(aSrcFormat, aDstFormat) \
152 : FORMAT_CASE(aSrcFormat, aDstFormat, \
153 : Premultiply_NEON \
154 : <ShouldSwapRB(aSrcFormat, aDstFormat), \
155 : ShouldForceOpaque(aSrcFormat, aDstFormat)>)
156 :
157 : template<bool aSwapRB>
158 : void Unpremultiply_NEON(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize);
159 :
160 : #define UNPREMULTIPLY_NEON(aSrcFormat, aDstFormat) \
161 : FORMAT_CASE(aSrcFormat, aDstFormat, \
162 : Unpremultiply_NEON<ShouldSwapRB(aSrcFormat, aDstFormat)>)
163 :
164 : template<bool aSwapRB, bool aOpaqueAlpha>
165 : void Swizzle_NEON(const uint8_t*, int32_t, uint8_t*, int32_t, IntSize);
166 :
167 : #define SWIZZLE_NEON(aSrcFormat, aDstFormat) \
168 : FORMAT_CASE(aSrcFormat, aDstFormat, \
169 : Swizzle_NEON \
170 : <ShouldSwapRB(aSrcFormat, aDstFormat), \
171 : ShouldForceOpaque(aSrcFormat, aDstFormat)>)
172 :
173 : #endif
174 :
175 : /**
176 : * Premultiplying
177 : */
178 :
179 : // Fallback premultiply implementation that uses splayed pixel math to reduce the
180 : // multiplications used. That is, the R and B components are isolated from the G and A
181 : // components, which then can be multiplied as if they were two 2-component vectors.
182 : // Otherwise, an approximation if divide-by-255 is used which is faster than an actual
183 : // division. These optimizations are also used for the SSE2 and NEON implementations.
184 : template<bool aSwapRB, bool aOpaqueAlpha,
185 : uint32_t aSrcRGBShift, uint32_t aSrcAShift,
186 : uint32_t aDstRGBShift, uint32_t aDstAShift>
187 : static void
188 0 : PremultiplyFallback(const uint8_t* aSrc, int32_t aSrcGap,
189 : uint8_t* aDst, int32_t aDstGap,
190 : IntSize aSize)
191 : {
192 0 : for (int32_t height = aSize.height; height > 0; height--) {
193 0 : const uint8_t* end = aSrc + 4 * aSize.width;
194 0 : do {
195 : // Load and process 1 entire pixel at a time.
196 0 : uint32_t color = *reinterpret_cast<const uint32_t*>(aSrc);
197 :
198 0 : uint32_t a = aSrcAShift ? color >> aSrcAShift : color & 0xFF;
199 :
200 : // Isolate the R and B components.
201 0 : uint32_t rb = (color >> aSrcRGBShift) & 0x00FF00FF;
202 : // Swap the order of R and B if necessary.
203 : if (aSwapRB) {
204 0 : rb = (rb >> 16) | (rb << 16);
205 : }
206 : // Approximate the multiply by alpha and divide by 255 which is essentially:
207 : // c = c*a + 255; c = (c + (c >> 8)) >> 8;
208 : // However, we omit the final >> 8 to fold it with the final shift into place
209 : // depending on desired output format.
210 0 : rb = rb*a + 0x00FF00FF;
211 0 : rb = (rb + ((rb >> 8) & 0x00FF00FF)) & 0xFF00FF00;
212 :
213 : // Use same approximation as above, but G is shifted 8 bits left.
214 : // Alpha is left out and handled separately.
215 0 : uint32_t g = color & (0xFF00 << aSrcRGBShift);
216 0 : g = g*a + (0xFF00 << aSrcRGBShift);
217 0 : g = (g + (g >> 8)) & (0xFF0000 << aSrcRGBShift);
218 :
219 : // The above math leaves RGB shifted left by 8 bits.
220 : // Shift them right if required for the output format.
221 : // then combine them back together to produce output pixel.
222 : // Add the alpha back on if the output format is not opaque.
223 0 : *reinterpret_cast<uint32_t*>(aDst) =
224 0 : (rb >> (8 - aDstRGBShift)) |
225 0 : (g >> (8 + aSrcRGBShift - aDstRGBShift)) |
226 0 : (aOpaqueAlpha ? 0xFF << aDstAShift : a << aDstAShift);
227 :
228 0 : aSrc += 4;
229 0 : aDst += 4;
230 0 : } while (aSrc < end);
231 :
232 0 : aSrc += aSrcGap;
233 0 : aDst += aDstGap;
234 : }
235 0 : }
236 :
237 : #define PREMULTIPLY_FALLBACK_CASE(aSrcFormat, aDstFormat) \
238 : FORMAT_CASE(aSrcFormat, aDstFormat, \
239 : PremultiplyFallback \
240 : <ShouldSwapRB(aSrcFormat, aDstFormat), \
241 : ShouldForceOpaque(aSrcFormat, aDstFormat), \
242 : RGBBitShift(aSrcFormat), AlphaBitShift(aSrcFormat), \
243 : RGBBitShift(aDstFormat), AlphaBitShift(aDstFormat)>)
244 :
245 : #define PREMULTIPLY_FALLBACK(aSrcFormat) \
246 : PREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::B8G8R8A8) \
247 : PREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::B8G8R8X8) \
248 : PREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::R8G8B8A8) \
249 : PREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::R8G8B8X8) \
250 : PREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::A8R8G8B8) \
251 : PREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::X8R8G8B8)
252 :
253 : // If rows are tightly packed, and the size of the total area will fit within
254 : // the precision range of a single row, then process all the data as if it was
255 : // a single row.
256 : static inline IntSize
257 0 : CollapseSize(const IntSize& aSize, int32_t aSrcStride, int32_t aDstStride)
258 : {
259 0 : if (aSrcStride == aDstStride &&
260 0 : aSrcStride == 4 * aSize.width) {
261 0 : CheckedInt32 area = CheckedInt32(aSize.width) * CheckedInt32(aSize.height);
262 0 : if (area.isValid()) {
263 0 : return IntSize(area.value(), 1);
264 : }
265 : }
266 0 : return aSize;
267 : }
268 :
269 : bool
270 0 : PremultiplyData(const uint8_t* aSrc, int32_t aSrcStride, SurfaceFormat aSrcFormat,
271 : uint8_t* aDst, int32_t aDstStride, SurfaceFormat aDstFormat,
272 : const IntSize& aSize)
273 : {
274 0 : if (aSize.IsEmpty()) {
275 0 : return true;
276 : }
277 0 : IntSize size = CollapseSize(aSize, aSrcStride, aDstStride);
278 : // Find gap from end of row to the start of the next row.
279 0 : int32_t srcGap = aSrcStride - BytesPerPixel(aSrcFormat) * aSize.width;
280 0 : int32_t dstGap = aDstStride - BytesPerPixel(aDstFormat) * aSize.width;
281 0 : MOZ_ASSERT(srcGap >= 0 && dstGap >= 0);
282 :
283 : #define FORMAT_CASE_CALL(...) __VA_ARGS__(aSrc, srcGap, aDst, dstGap, size)
284 :
285 : #ifdef USE_SSE2
286 0 : switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
287 0 : PREMULTIPLY_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8)
288 0 : PREMULTIPLY_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8X8)
289 0 : PREMULTIPLY_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8)
290 0 : PREMULTIPLY_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8)
291 0 : PREMULTIPLY_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8A8)
292 0 : PREMULTIPLY_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8X8)
293 0 : PREMULTIPLY_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8)
294 0 : PREMULTIPLY_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8)
295 0 : default: break;
296 : }
297 : #endif
298 :
299 : #ifdef BUILD_ARM_NEON
300 : if (mozilla::supports_neon()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
301 : PREMULTIPLY_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8)
302 : PREMULTIPLY_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8X8)
303 : PREMULTIPLY_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8)
304 : PREMULTIPLY_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8)
305 : PREMULTIPLY_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8A8)
306 : PREMULTIPLY_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8X8)
307 : PREMULTIPLY_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8)
308 : PREMULTIPLY_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8)
309 : default: break;
310 : }
311 : #endif
312 :
313 0 : switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
314 0 : PREMULTIPLY_FALLBACK(SurfaceFormat::B8G8R8A8)
315 0 : PREMULTIPLY_FALLBACK(SurfaceFormat::R8G8B8A8)
316 0 : PREMULTIPLY_FALLBACK(SurfaceFormat::A8R8G8B8)
317 0 : default: break;
318 : }
319 :
320 : #undef FORMAT_CASE_CALL
321 :
322 0 : MOZ_ASSERT(false, "Unsupported premultiply formats");
323 : return false;
324 : }
325 :
326 : /**
327 : * Unpremultiplying
328 : */
329 :
330 : // Generate a table of 8.16 fixed-point reciprocals representing 1/alpha.
331 : #define UNPREMULQ(x) (0xFF00FFU / (x))
332 : #define UNPREMULQ_2(x) UNPREMULQ(x), UNPREMULQ((x) + 1)
333 : #define UNPREMULQ_4(x) UNPREMULQ_2(x), UNPREMULQ_2((x) + 2)
334 : #define UNPREMULQ_8(x) UNPREMULQ_4(x), UNPREMULQ_4((x) + 4)
335 : #define UNPREMULQ_16(x) UNPREMULQ_8(x), UNPREMULQ_8((x) + 8)
336 : #define UNPREMULQ_32(x) UNPREMULQ_16(x), UNPREMULQ_16((x) + 16)
337 : static const uint32_t sUnpremultiplyTable[256] =
338 : {
339 : 0, UNPREMULQ(1), UNPREMULQ_2(2), UNPREMULQ_4(4),
340 : UNPREMULQ_8(8), UNPREMULQ_16(16), UNPREMULQ_32(32),
341 : UNPREMULQ_32(64), UNPREMULQ_32(96), UNPREMULQ_32(128),
342 : UNPREMULQ_32(160), UNPREMULQ_32(192), UNPREMULQ_32(224)
343 : };
344 :
345 : // Fallback unpremultiply implementation that uses 8.16 fixed-point reciprocal math
346 : // to eliminate any division by the alpha component. This optimization is used for the
347 : // SSE2 and NEON implementations, with some adaptations. This implementation also accesses
348 : // color components using individual byte accesses as this profiles faster than accessing
349 : // the pixel as a uint32_t and shifting/masking to access components.
350 : template<bool aSwapRB,
351 : uint32_t aSrcRGBIndex, uint32_t aSrcAIndex,
352 : uint32_t aDstRGBIndex, uint32_t aDstAIndex>
353 : static void
354 0 : UnpremultiplyFallback(const uint8_t* aSrc, int32_t aSrcGap,
355 : uint8_t* aDst, int32_t aDstGap,
356 : IntSize aSize)
357 : {
358 0 : for (int32_t height = aSize.height; height > 0; height--) {
359 0 : const uint8_t* end = aSrc + 4 * aSize.width;
360 0 : do {
361 0 : uint8_t r = aSrc[aSrcRGBIndex + (aSwapRB ? 2 : 0)];
362 0 : uint8_t g = aSrc[aSrcRGBIndex + 1];
363 0 : uint8_t b = aSrc[aSrcRGBIndex + (aSwapRB ? 0 : 2)];
364 0 : uint8_t a = aSrc[aSrcAIndex];
365 :
366 : // Access the 8.16 reciprocal from the table based on alpha. Multiply by the
367 : // reciprocal and shift off the fraction bits to approximate the division by alpha.
368 0 : uint32_t q = sUnpremultiplyTable[a];
369 0 : aDst[aDstRGBIndex + 0] = (r * q) >> 16;
370 0 : aDst[aDstRGBIndex + 1] = (g * q) >> 16;
371 0 : aDst[aDstRGBIndex + 2] = (b * q) >> 16;
372 0 : aDst[aDstAIndex] = a;
373 :
374 0 : aSrc += 4;
375 0 : aDst += 4;
376 0 : } while (aSrc < end);
377 :
378 0 : aSrc += aSrcGap;
379 0 : aDst += aDstGap;
380 : }
381 0 : }
382 :
383 : #define UNPREMULTIPLY_FALLBACK_CASE(aSrcFormat, aDstFormat) \
384 : FORMAT_CASE(aSrcFormat, aDstFormat, \
385 : UnpremultiplyFallback \
386 : <ShouldSwapRB(aSrcFormat, aDstFormat), \
387 : RGBByteIndex(aSrcFormat), AlphaByteIndex(aSrcFormat), \
388 : RGBByteIndex(aDstFormat), AlphaByteIndex(aDstFormat)>)
389 :
390 : #define UNPREMULTIPLY_FALLBACK(aSrcFormat) \
391 : UNPREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::B8G8R8A8) \
392 : UNPREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::R8G8B8A8) \
393 : UNPREMULTIPLY_FALLBACK_CASE(aSrcFormat, SurfaceFormat::A8R8G8B8)
394 :
395 : bool
396 0 : UnpremultiplyData(const uint8_t* aSrc, int32_t aSrcStride, SurfaceFormat aSrcFormat,
397 : uint8_t* aDst, int32_t aDstStride, SurfaceFormat aDstFormat,
398 : const IntSize& aSize)
399 : {
400 0 : if (aSize.IsEmpty()) {
401 0 : return true;
402 : }
403 0 : IntSize size = CollapseSize(aSize, aSrcStride, aDstStride);
404 : // Find gap from end of row to the start of the next row.
405 0 : int32_t srcGap = aSrcStride - BytesPerPixel(aSrcFormat) * aSize.width;
406 0 : int32_t dstGap = aDstStride - BytesPerPixel(aDstFormat) * aSize.width;
407 0 : MOZ_ASSERT(srcGap >= 0 && dstGap >= 0);
408 :
409 : #define FORMAT_CASE_CALL(...) __VA_ARGS__(aSrc, srcGap, aDst, dstGap, size)
410 :
411 : #ifdef USE_SSE2
412 0 : switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
413 0 : UNPREMULTIPLY_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8)
414 0 : UNPREMULTIPLY_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8)
415 0 : UNPREMULTIPLY_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8A8)
416 0 : UNPREMULTIPLY_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8)
417 0 : default: break;
418 : }
419 : #endif
420 :
421 : #ifdef BUILD_ARM_NEON
422 : if (mozilla::supports_neon()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
423 : UNPREMULTIPLY_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8A8)
424 : UNPREMULTIPLY_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8)
425 : UNPREMULTIPLY_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8A8)
426 : UNPREMULTIPLY_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8)
427 : default: break;
428 : }
429 : #endif
430 :
431 0 : switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
432 0 : UNPREMULTIPLY_FALLBACK(SurfaceFormat::B8G8R8A8)
433 0 : UNPREMULTIPLY_FALLBACK(SurfaceFormat::R8G8B8A8)
434 0 : UNPREMULTIPLY_FALLBACK(SurfaceFormat::A8R8G8B8)
435 0 : default: break;
436 : }
437 :
438 : #undef FORMAT_CASE_CALL
439 :
440 0 : MOZ_ASSERT(false, "Unsupported unpremultiply formats");
441 : return false;
442 : }
443 :
444 : /**
445 : * Swizzling
446 : */
447 :
448 : // Fallback swizzle implementation that uses shifting and masking to reorder pixels.
449 : template<bool aSwapRB, bool aOpaqueAlpha,
450 : uint32_t aSrcRGBShift, uint32_t aSrcAShift,
451 : uint32_t aDstRGBShift, uint32_t aDstAShift>
452 : static void
453 0 : SwizzleFallback(const uint8_t* aSrc, int32_t aSrcGap,
454 : uint8_t* aDst, int32_t aDstGap,
455 : IntSize aSize)
456 : {
457 0 : for (int32_t height = aSize.height; height > 0; height--) {
458 0 : const uint8_t* end = aSrc + 4 * aSize.width;
459 0 : do {
460 0 : uint32_t rgba = *reinterpret_cast<const uint32_t*>(aSrc);
461 :
462 : if (aSwapRB) {
463 : // Handle R and B swaps by exchanging words and masking.
464 0 : uint32_t rb = ((rgba << 16) | (rgba >> 16)) & (0x00FF00FF << aSrcRGBShift);
465 0 : uint32_t ga = rgba & ((0xFF << aSrcAShift) | (0xFF00 << aSrcRGBShift));
466 0 : rgba = rb | ga;
467 : }
468 :
469 : // If src and dst shifts differ, rotate left or right to move RGB into place,
470 : // i.e. ARGB -> RGBA or ARGB -> RGBA.
471 : if (aDstRGBShift > aSrcRGBShift) {
472 0 : rgba = (rgba << 8) | (aOpaqueAlpha ? 0x000000FF : rgba >> 24);
473 : } else if (aSrcRGBShift > aDstRGBShift) {
474 0 : rgba = (rgba >> 8) | (aOpaqueAlpha ? 0xFF000000 : rgba << 24);
475 : } else if (aOpaqueAlpha) {
476 0 : rgba |= 0xFF << aDstAShift;
477 : }
478 :
479 0 : *reinterpret_cast<uint32_t*>(aDst) = rgba;
480 :
481 0 : aSrc += 4;
482 0 : aDst += 4;
483 0 : } while (aSrc < end);
484 :
485 0 : aSrc += aSrcGap;
486 0 : aDst += aDstGap;
487 : }
488 0 : }
489 :
490 : #define SWIZZLE_FALLBACK(aSrcFormat, aDstFormat) \
491 : FORMAT_CASE(aSrcFormat, aDstFormat, \
492 : SwizzleFallback \
493 : <ShouldSwapRB(aSrcFormat, aDstFormat), \
494 : ShouldForceOpaque(aSrcFormat, aDstFormat), \
495 : RGBBitShift(aSrcFormat), AlphaBitShift(aSrcFormat), \
496 : RGBBitShift(aDstFormat), AlphaBitShift(aDstFormat)>)
497 :
498 : // Fast-path for matching formats.
499 : static void
500 0 : SwizzleCopy(const uint8_t* aSrc, int32_t aSrcGap,
501 : uint8_t* aDst, int32_t aDstGap,
502 : IntSize aSize, int32_t aBPP)
503 : {
504 0 : if (aSrc != aDst) {
505 0 : int32_t rowLength = aBPP * aSize.width;
506 0 : for (int32_t height = aSize.height; height > 0; height--) {
507 0 : memcpy(aDst, aSrc, rowLength);
508 0 : aSrc += rowLength + aSrcGap;
509 0 : aDst += rowLength + aDstGap;
510 : }
511 : }
512 0 : }
513 :
514 : // Fast-path for conversions that swap all bytes.
515 : template<bool aOpaqueAlpha, uint32_t aSrcAShift, uint32_t aDstAShift>
516 : static void
517 0 : SwizzleSwap(const uint8_t* aSrc, int32_t aSrcGap,
518 : uint8_t* aDst, int32_t aDstGap,
519 : IntSize aSize)
520 : {
521 0 : for (int32_t height = aSize.height; height > 0; height--) {
522 0 : const uint8_t* end = aSrc + 4 * aSize.width;
523 0 : do {
524 : // Use an endian swap to move the bytes, i.e. BGRA -> ARGB.
525 0 : uint32_t rgba = *reinterpret_cast<const uint32_t*>(aSrc);
526 : #if MOZ_LITTLE_ENDIAN
527 0 : rgba = NativeEndian::swapToBigEndian(rgba);
528 : #else
529 : rgba = NativeEndian::swapToLittleEndian(rgba);
530 : #endif
531 : if (aOpaqueAlpha) {
532 0 : rgba |= 0xFF << aDstAShift;
533 : }
534 0 : *reinterpret_cast<uint32_t*>(aDst) = rgba;
535 0 : aSrc += 4;
536 0 : aDst += 4;
537 0 : } while (aSrc < end);
538 0 : aSrc += aSrcGap;
539 0 : aDst += aDstGap;
540 : }
541 0 : }
542 :
543 : #define SWIZZLE_SWAP(aSrcFormat, aDstFormat) \
544 : FORMAT_CASE(aSrcFormat, aDstFormat, \
545 : SwizzleSwap \
546 : <ShouldForceOpaque(aSrcFormat, aDstFormat), \
547 : AlphaBitShift(aSrcFormat), AlphaBitShift(aDstFormat)>)
548 :
549 : // Fast-path for conversions that force alpha to opaque.
550 : template<uint32_t aDstAShift>
551 : static void
552 0 : SwizzleOpaque(const uint8_t* aSrc, int32_t aSrcGap,
553 : uint8_t* aDst, int32_t aDstGap,
554 : IntSize aSize)
555 : {
556 0 : if (aSrc == aDst) {
557 : // Modifying in-place, so just write out the alpha.
558 0 : for (int32_t height = aSize.height; height > 0; height--) {
559 0 : const uint8_t* end = aDst + 4 * aSize.width;
560 0 : do {
561 : // ORing directly onto destination memory profiles faster than writing
562 : // individually to the alpha byte and also profiles equivalently to a
563 : // SSE2 implementation.
564 0 : *reinterpret_cast<uint32_t*>(aDst) |= 0xFF << aDstAShift;
565 0 : aDst += 4;
566 0 : } while (aDst < end);
567 0 : aDst += aDstGap;
568 : }
569 : } else {
570 0 : for (int32_t height = aSize.height; height > 0; height--) {
571 0 : const uint8_t* end = aSrc + 4 * aSize.width;
572 0 : do {
573 0 : uint32_t rgba = *reinterpret_cast<const uint32_t*>(aSrc);
574 : // Just add on the alpha bits to the source.
575 0 : rgba |= 0xFF << aDstAShift;
576 0 : *reinterpret_cast<uint32_t*>(aDst) = rgba;
577 0 : aSrc += 4;
578 0 : aDst += 4;
579 0 : } while (aSrc < end);
580 0 : aSrc += aSrcGap;
581 0 : aDst += aDstGap;
582 : }
583 : }
584 0 : }
585 :
586 : #define SWIZZLE_OPAQUE(aSrcFormat, aDstFormat) \
587 : FORMAT_CASE(aSrcFormat, aDstFormat, \
588 : SwizzleOpaque<AlphaBitShift(aDstFormat)>)
589 :
590 : // Packing of 32-bit formats to RGB565.
591 : template<bool aSwapRB, uint32_t aSrcRGBShift, uint32_t aSrcRGBIndex>
592 : static void
593 0 : PackToRGB565(const uint8_t* aSrc, int32_t aSrcGap,
594 : uint8_t* aDst, int32_t aDstGap,
595 : IntSize aSize)
596 : {
597 0 : for (int32_t height = aSize.height; height > 0; height--) {
598 0 : const uint8_t* end = aSrc + 4 * aSize.width;
599 0 : do {
600 0 : uint32_t rgba = *reinterpret_cast<const uint32_t*>(aSrc);
601 :
602 : // Isolate the R, G, and B components and shift to final endian-dependent locations.
603 : uint16_t rgb565;
604 : if (aSwapRB) {
605 0 : rgb565 = ((rgba & (0xF8 << aSrcRGBShift)) << (8 - aSrcRGBShift)) |
606 0 : ((rgba & (0xFC00 << aSrcRGBShift)) >> (5 + aSrcRGBShift)) |
607 0 : ((rgba & (0xF80000 << aSrcRGBShift)) >> (19 + aSrcRGBShift));
608 : } else {
609 0 : rgb565 = ((rgba & (0xF8 << aSrcRGBShift)) >> (3 + aSrcRGBShift)) |
610 0 : ((rgba & (0xFC00 << aSrcRGBShift)) >> (5 + aSrcRGBShift)) |
611 0 : ((rgba & (0xF80000 << aSrcRGBShift)) >> (8 + aSrcRGBShift));
612 : }
613 :
614 0 : *reinterpret_cast<uint16_t*>(aDst) = rgb565;
615 :
616 0 : aSrc += 4;
617 0 : aDst += 2;
618 0 : } while (aSrc < end);
619 :
620 0 : aSrc += aSrcGap;
621 0 : aDst += aDstGap;
622 : }
623 0 : }
624 :
625 : // Packing of 32-bit formats to 24-bit formats.
626 : template<bool aSwapRB, uint32_t aSrcRGBShift, uint32_t aSrcRGBIndex>
627 : static void
628 0 : PackToRGB24(const uint8_t* aSrc, int32_t aSrcGap,
629 : uint8_t* aDst, int32_t aDstGap,
630 : IntSize aSize)
631 : {
632 0 : for (int32_t height = aSize.height; height > 0; height--) {
633 0 : const uint8_t* end = aSrc + 4 * aSize.width;
634 0 : do {
635 0 : uint8_t r = aSrc[aSrcRGBIndex + (aSwapRB ? 2 : 0)];
636 0 : uint8_t g = aSrc[aSrcRGBIndex + 1];
637 0 : uint8_t b = aSrc[aSrcRGBIndex + (aSwapRB ? 0 : 2)];
638 :
639 0 : aDst[0] = r;
640 0 : aDst[1] = g;
641 0 : aDst[2] = b;
642 :
643 0 : aSrc += 4;
644 0 : aDst += 3;
645 0 : } while (aSrc < end);
646 :
647 0 : aSrc += aSrcGap;
648 0 : aDst += aDstGap;
649 : }
650 0 : }
651 :
652 : #define PACK_RGB_CASE(aSrcFormat, aDstFormat, aPackFunc) \
653 : FORMAT_CASE(aSrcFormat, aDstFormat, \
654 : aPackFunc \
655 : <ShouldSwapRB(aSrcFormat, aDstFormat), \
656 : RGBBitShift(aSrcFormat), RGBByteIndex(aSrcFormat)>)
657 :
658 : #define PACK_RGB(aDstFormat, aPackFunc) \
659 : PACK_RGB_CASE(SurfaceFormat::B8G8R8A8, aDstFormat, aPackFunc) \
660 : PACK_RGB_CASE(SurfaceFormat::B8G8R8X8, aDstFormat, aPackFunc) \
661 : PACK_RGB_CASE(SurfaceFormat::R8G8B8A8, aDstFormat, aPackFunc) \
662 : PACK_RGB_CASE(SurfaceFormat::R8G8B8X8, aDstFormat, aPackFunc) \
663 : PACK_RGB_CASE(SurfaceFormat::A8R8G8B8, aDstFormat, aPackFunc) \
664 : PACK_RGB_CASE(SurfaceFormat::X8R8G8B8, aDstFormat, aPackFunc)
665 :
666 : // Packing of 32-bit formats to A8.
667 : template<uint32_t aSrcAIndex>
668 : static void
669 0 : PackToA8(const uint8_t* aSrc, int32_t aSrcGap,
670 : uint8_t* aDst, int32_t aDstGap,
671 : IntSize aSize)
672 : {
673 0 : for (int32_t height = aSize.height; height > 0; height--) {
674 0 : const uint8_t* end = aSrc + 4 * aSize.width;
675 0 : do {
676 0 : *aDst++ = aSrc[aSrcAIndex];
677 0 : aSrc += 4;
678 0 : } while (aSrc < end);
679 0 : aSrc += aSrcGap;
680 0 : aDst += aDstGap;
681 : }
682 0 : }
683 :
684 : #define PACK_ALPHA_CASE(aSrcFormat, aDstFormat, aPackFunc) \
685 : FORMAT_CASE(aSrcFormat, aDstFormat, \
686 : aPackFunc<AlphaByteIndex(aSrcFormat)>)
687 :
688 : #define PACK_ALPHA(aDstFormat, aPackFunc) \
689 : PACK_ALPHA_CASE(SurfaceFormat::B8G8R8A8, aDstFormat, aPackFunc) \
690 : PACK_ALPHA_CASE(SurfaceFormat::R8G8B8A8, aDstFormat, aPackFunc) \
691 : PACK_ALPHA_CASE(SurfaceFormat::A8R8G8B8, aDstFormat, aPackFunc)
692 :
693 : bool
694 0 : SwizzleData(const uint8_t* aSrc, int32_t aSrcStride, SurfaceFormat aSrcFormat,
695 : uint8_t* aDst, int32_t aDstStride, SurfaceFormat aDstFormat,
696 : const IntSize& aSize)
697 : {
698 0 : if (aSize.IsEmpty()) {
699 0 : return true;
700 : }
701 0 : IntSize size = CollapseSize(aSize, aSrcStride, aDstStride);
702 : // Find gap from end of row to the start of the next row.
703 0 : int32_t srcGap = aSrcStride - BytesPerPixel(aSrcFormat) * aSize.width;
704 0 : int32_t dstGap = aDstStride - BytesPerPixel(aDstFormat) * aSize.width;
705 0 : MOZ_ASSERT(srcGap >= 0 && dstGap >= 0);
706 :
707 : #define FORMAT_CASE_CALL(...) __VA_ARGS__(aSrc, srcGap, aDst, dstGap, size)
708 :
709 : #ifdef USE_SSE2
710 0 : switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
711 0 : SWIZZLE_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8)
712 0 : SWIZZLE_SSE2(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8X8)
713 0 : SWIZZLE_SSE2(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8)
714 0 : SWIZZLE_SSE2(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8A8)
715 0 : SWIZZLE_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8)
716 0 : SWIZZLE_SSE2(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8X8)
717 0 : SWIZZLE_SSE2(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8)
718 0 : SWIZZLE_SSE2(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8A8)
719 0 : default: break;
720 : }
721 : #endif
722 :
723 : #ifdef BUILD_ARM_NEON
724 : if (mozilla::supports_neon()) switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
725 : SWIZZLE_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8)
726 : SWIZZLE_NEON(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8X8)
727 : SWIZZLE_NEON(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8)
728 : SWIZZLE_NEON(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8A8)
729 : SWIZZLE_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8)
730 : SWIZZLE_NEON(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8X8)
731 : SWIZZLE_NEON(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8)
732 : SWIZZLE_NEON(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8A8)
733 : default: break;
734 : }
735 : #endif
736 :
737 0 : switch (FORMAT_KEY(aSrcFormat, aDstFormat)) {
738 :
739 0 : SWIZZLE_FALLBACK(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8A8)
740 0 : SWIZZLE_FALLBACK(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8X8)
741 0 : SWIZZLE_FALLBACK(SurfaceFormat::B8G8R8A8, SurfaceFormat::R8G8B8X8)
742 0 : SWIZZLE_FALLBACK(SurfaceFormat::B8G8R8X8, SurfaceFormat::R8G8B8A8)
743 :
744 0 : SWIZZLE_FALLBACK(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8A8)
745 0 : SWIZZLE_FALLBACK(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8X8)
746 0 : SWIZZLE_FALLBACK(SurfaceFormat::R8G8B8A8, SurfaceFormat::B8G8R8X8)
747 0 : SWIZZLE_FALLBACK(SurfaceFormat::R8G8B8X8, SurfaceFormat::B8G8R8A8)
748 0 : SWIZZLE_FALLBACK(SurfaceFormat::R8G8B8A8, SurfaceFormat::A8R8G8B8)
749 0 : SWIZZLE_FALLBACK(SurfaceFormat::R8G8B8X8, SurfaceFormat::X8R8G8B8)
750 :
751 0 : SWIZZLE_FALLBACK(SurfaceFormat::A8R8G8B8, SurfaceFormat::R8G8B8A8)
752 0 : SWIZZLE_FALLBACK(SurfaceFormat::X8R8G8B8, SurfaceFormat::R8G8B8X8)
753 0 : SWIZZLE_FALLBACK(SurfaceFormat::A8R8G8B8, SurfaceFormat::R8G8B8X8)
754 0 : SWIZZLE_FALLBACK(SurfaceFormat::X8R8G8B8, SurfaceFormat::R8G8B8A8)
755 :
756 0 : SWIZZLE_SWAP(SurfaceFormat::B8G8R8A8, SurfaceFormat::A8R8G8B8)
757 0 : SWIZZLE_SWAP(SurfaceFormat::B8G8R8A8, SurfaceFormat::X8R8G8B8)
758 0 : SWIZZLE_SWAP(SurfaceFormat::B8G8R8X8, SurfaceFormat::X8R8G8B8)
759 0 : SWIZZLE_SWAP(SurfaceFormat::B8G8R8X8, SurfaceFormat::A8R8G8B8)
760 0 : SWIZZLE_SWAP(SurfaceFormat::A8R8G8B8, SurfaceFormat::B8G8R8A8)
761 0 : SWIZZLE_SWAP(SurfaceFormat::A8R8G8B8, SurfaceFormat::B8G8R8X8)
762 0 : SWIZZLE_SWAP(SurfaceFormat::X8R8G8B8, SurfaceFormat::B8G8R8X8)
763 0 : SWIZZLE_SWAP(SurfaceFormat::X8R8G8B8, SurfaceFormat::B8G8R8A8)
764 :
765 0 : SWIZZLE_OPAQUE(SurfaceFormat::B8G8R8A8, SurfaceFormat::B8G8R8X8)
766 0 : SWIZZLE_OPAQUE(SurfaceFormat::B8G8R8X8, SurfaceFormat::B8G8R8A8)
767 0 : SWIZZLE_OPAQUE(SurfaceFormat::R8G8B8A8, SurfaceFormat::R8G8B8X8)
768 0 : SWIZZLE_OPAQUE(SurfaceFormat::R8G8B8X8, SurfaceFormat::R8G8B8A8)
769 0 : SWIZZLE_OPAQUE(SurfaceFormat::A8R8G8B8, SurfaceFormat::X8R8G8B8)
770 0 : SWIZZLE_OPAQUE(SurfaceFormat::X8R8G8B8, SurfaceFormat::A8R8G8B8)
771 :
772 0 : PACK_RGB(SurfaceFormat::R5G6B5_UINT16, PackToRGB565)
773 0 : PACK_RGB(SurfaceFormat::B8G8R8, PackToRGB24)
774 0 : PACK_RGB(SurfaceFormat::R8G8B8, PackToRGB24)
775 0 : PACK_ALPHA(SurfaceFormat::A8, PackToA8)
776 :
777 0 : default: break;
778 : }
779 :
780 0 : if (aSrcFormat == aDstFormat) {
781 : // If the formats match, just do a generic copy.
782 0 : SwizzleCopy(aSrc, srcGap, aDst, dstGap, size, BytesPerPixel(aSrcFormat));
783 0 : return true;
784 : }
785 :
786 : #undef FORMAT_CASE_CALL
787 :
788 0 : MOZ_ASSERT(false, "Unsupported swizzle formats");
789 : return false;
790 : }
791 :
792 : } // namespace gfx
793 : } // namespace mozilla
|