Line data Source code
1 : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 : /* This Source Code Form is subject to the terms of the Mozilla Public
4 : * License, v. 2.0. If a copy of the MPL was not distributed with this
5 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 :
7 : #include "Blur.h"
8 :
9 : #include <algorithm>
10 : #include <math.h>
11 : #include <string.h>
12 :
13 : #include "mozilla/CheckedInt.h"
14 :
15 : #include "2D.h"
16 : #include "DataSurfaceHelpers.h"
17 : #include "Tools.h"
18 :
19 : #ifdef BUILD_ARM_NEON
20 : #include "mozilla/arm.h"
21 : #endif
22 :
23 : using namespace std;
24 :
25 : namespace mozilla {
26 : namespace gfx {
27 :
28 : /**
29 : * Helper function to process each row of the box blur.
30 : * It takes care of transposing the data on input or output depending
31 : * on whether we intend a horizontal or vertical blur, and whether we're
32 : * reading from the initial source or writing to the final destination.
33 : * It allows starting or ending anywhere within the row to accomodate
34 : * a skip rect.
35 : */
36 : template<bool aTransposeInput, bool aTransposeOutput>
37 : static inline void
38 0 : BoxBlurRow(const uint8_t* aInput,
39 : uint8_t* aOutput,
40 : int32_t aLeftLobe,
41 : int32_t aRightLobe,
42 : int32_t aWidth,
43 : int32_t aStride,
44 : int32_t aStart,
45 : int32_t aEnd)
46 : {
47 : // If the input or output is transposed, then we will move down a row
48 : // for each step, instead of moving over a column. Since these values
49 : // only depend on a template parameter, they will more easily get
50 : // copy-propagated in the non-transposed case, which is why they
51 : // are not passed as parameters.
52 0 : const int32_t inputStep = aTransposeInput ? aStride : 1;
53 0 : const int32_t outputStep = aTransposeOutput ? aStride : 1;
54 :
55 : // We need to sample aLeftLobe pixels to the left and aRightLobe pixels
56 : // to the right of the current position, then average them. So this is
57 : // the size of the total width of this filter.
58 0 : const int32_t boxSize = aLeftLobe + aRightLobe + 1;
59 :
60 : // Instead of dividing the pixel sum by boxSize to average, we can just
61 : // compute a scale that will normalize the result so that it can be quickly
62 : // shifted into the desired range.
63 0 : const uint32_t reciprocal = (1 << 24) / boxSize;
64 :
65 : // The shift would normally truncate the result, whereas we would rather
66 : // prefer to round the result to the closest increment. By adding 0.5 units
67 : // to the initial sum, we bias the sum so that it will be rounded by the
68 : // truncation instead.
69 0 : uint32_t alphaSum = (boxSize + 1) / 2;
70 :
71 : // We process the row with a moving filter, keeping a sum (alphaSum) of
72 : // boxSize pixels. As we move over a pixel, we need to add on a pixel
73 : // from the right extreme of the window that moved into range, and subtract
74 : // off a pixel from the left extreme of window that moved out of range.
75 : // But first, we need to initialization alphaSum to the contents of
76 : // the window before we can get going. If the window moves out of bounds
77 : // of the row, we clamp each sample to be the closest pixel from within
78 : // row bounds, so the 0th and aWidth-1th pixel.
79 0 : int32_t initLeft = aStart - aLeftLobe;
80 0 : if (initLeft < 0) {
81 : // If the left lobe samples before the row, add in clamped samples.
82 0 : alphaSum += -initLeft * aInput[0];
83 0 : initLeft = 0;
84 : }
85 0 : int32_t initRight = aStart + boxSize - aLeftLobe;
86 0 : if (initRight > aWidth) {
87 : // If the right lobe samples after the row, add in clamped samples.
88 0 : alphaSum += (initRight - aWidth) * aInput[(aWidth - 1) * inputStep];
89 0 : initRight = aWidth;
90 : }
91 : // Finally, add in all the valid, non-clamped samples to fill up the
92 : // rest of the window.
93 0 : const uint8_t* src = &aInput[initLeft * inputStep];
94 0 : const uint8_t* iterEnd = &aInput[initRight * inputStep];
95 :
96 : #define INIT_ITER \
97 : alphaSum += *src; \
98 : src += inputStep;
99 :
100 : // We unroll the per-pixel loop here substantially. The amount of work
101 : // done per sample is so small that the cost of a loop condition check
102 : // and a branch can substantially add to or even dominate the performance
103 : // of the loop.
104 0 : while (src + 16 * inputStep <= iterEnd) {
105 0 : INIT_ITER; INIT_ITER; INIT_ITER; INIT_ITER;
106 0 : INIT_ITER; INIT_ITER; INIT_ITER; INIT_ITER;
107 0 : INIT_ITER; INIT_ITER; INIT_ITER; INIT_ITER;
108 0 : INIT_ITER; INIT_ITER; INIT_ITER; INIT_ITER;
109 : }
110 0 : while (src < iterEnd) {
111 0 : INIT_ITER;
112 : }
113 :
114 : // Now we start moving the window over the row. We will be accessing
115 : // pixels form aStart - aLeftLobe up to aEnd + aRightLobe, which may be
116 : // out of bounds of the row. To avoid having to check within the inner
117 : // loops if we are in bound, we instead compute the points at which
118 : // we will move out of bounds of the row on the left side (splitLeft)
119 : // and right side (splitRight).
120 0 : int32_t splitLeft = min(max(aLeftLobe, aStart), aEnd);
121 0 : int32_t splitRight = min(max(aWidth - (boxSize - aLeftLobe), aStart), aEnd);
122 : // If the filter window is actually large than the size of the row,
123 : // there will be a middle area of overlap where the leftmost and rightmost
124 : // pixel of the filter will both be outside the row. In this case, we need
125 : // to invert the splits so that splitLeft <= splitRight.
126 0 : if (boxSize > aWidth) {
127 0 : swap(splitLeft, splitRight);
128 : }
129 :
130 : // Process all pixels up to splitLeft that would sample before the start of the row.
131 : // Note that because inputStep and outputStep may not be a const 1 value, it is more
132 : // performant to increment pointers here for the source and destination rather than
133 : // use a loop counter, since doing so would entail an expensive multiplication that
134 : // significantly slows down the loop.
135 0 : uint8_t* dst = &aOutput[aStart * outputStep];
136 0 : iterEnd = &aOutput[splitLeft * outputStep];
137 0 : src = &aInput[(aStart + boxSize - aLeftLobe) * inputStep];
138 0 : uint8_t firstVal = aInput[0];
139 :
140 : #define LEFT_ITER \
141 : *dst = (alphaSum * reciprocal) >> 24; \
142 : alphaSum += *src - firstVal; \
143 : dst += outputStep; \
144 : src += inputStep;
145 :
146 0 : while (dst + 16 * outputStep <= iterEnd) {
147 0 : LEFT_ITER; LEFT_ITER; LEFT_ITER; LEFT_ITER;
148 0 : LEFT_ITER; LEFT_ITER; LEFT_ITER; LEFT_ITER;
149 0 : LEFT_ITER; LEFT_ITER; LEFT_ITER; LEFT_ITER;
150 0 : LEFT_ITER; LEFT_ITER; LEFT_ITER; LEFT_ITER;
151 : }
152 0 : while (dst < iterEnd) {
153 0 : LEFT_ITER;
154 : }
155 :
156 : // Process all pixels between splitLeft and splitRight.
157 0 : iterEnd = &aOutput[splitRight * outputStep];
158 0 : if (boxSize <= aWidth) {
159 : // The filter window is smaller than the row size, so the leftmost and rightmost
160 : // samples are both within row bounds.
161 0 : src = &aInput[(splitLeft - aLeftLobe) * inputStep];
162 0 : int32_t boxStep = boxSize * inputStep;
163 :
164 : #define CENTER_ITER \
165 : *dst = (alphaSum * reciprocal) >> 24; \
166 : alphaSum += src[boxStep] - *src; \
167 : dst += outputStep; \
168 : src += inputStep;
169 :
170 0 : while (dst + 16 * outputStep <= iterEnd) {
171 0 : CENTER_ITER; CENTER_ITER; CENTER_ITER; CENTER_ITER;
172 0 : CENTER_ITER; CENTER_ITER; CENTER_ITER; CENTER_ITER;
173 0 : CENTER_ITER; CENTER_ITER; CENTER_ITER; CENTER_ITER;
174 0 : CENTER_ITER; CENTER_ITER; CENTER_ITER; CENTER_ITER;
175 : }
176 0 : while (dst < iterEnd) {
177 0 : CENTER_ITER;
178 : }
179 : } else {
180 : // The filter window is larger than the row size, and we're in the area of split
181 : // overlap. So the leftmost and rightmost samples are both out of bounds and need
182 : // to be clamped. We can just precompute the difference here consequently.
183 0 : int32_t firstLastDiff = aInput[(aWidth -1) * inputStep] - aInput[0];
184 0 : while (dst < iterEnd) {
185 0 : *dst = (alphaSum * reciprocal) >> 24;
186 0 : alphaSum += firstLastDiff;
187 0 : dst += outputStep;
188 : }
189 : }
190 :
191 : // Process all remaining pixels after splitRight that would sample after the row end.
192 0 : iterEnd = &aOutput[aEnd * outputStep];
193 0 : src = &aInput[(splitRight - aLeftLobe) * inputStep];
194 0 : uint8_t lastVal = aInput[(aWidth - 1) * inputStep];
195 :
196 : #define RIGHT_ITER \
197 : *dst = (alphaSum * reciprocal) >> 24; \
198 : alphaSum += lastVal - *src; \
199 : dst += outputStep; \
200 : src += inputStep;
201 :
202 0 : while (dst + 16 * outputStep <= iterEnd) {
203 0 : RIGHT_ITER; RIGHT_ITER; RIGHT_ITER; RIGHT_ITER;
204 0 : RIGHT_ITER; RIGHT_ITER; RIGHT_ITER; RIGHT_ITER;
205 0 : RIGHT_ITER; RIGHT_ITER; RIGHT_ITER; RIGHT_ITER;
206 0 : RIGHT_ITER; RIGHT_ITER; RIGHT_ITER; RIGHT_ITER;
207 : }
208 0 : while (dst < iterEnd) {
209 0 : RIGHT_ITER;
210 : }
211 0 : }
212 :
213 : /**
214 : * Box blur involves looking at one pixel, and setting its value to the average
215 : * of its neighbouring pixels. This is meant to provide a 3-pass approximation of a
216 : * Gaussian blur.
217 : * @param aTranspose Whether to transpose the buffer when reading and writing to it.
218 : * @param aData The buffer to be blurred.
219 : * @param aLobes The number of pixels to blend on the left and right for each of 3 passes.
220 : * @param aWidth The number of columns in the buffers.
221 : * @param aRows The number of rows in the buffers.
222 : * @param aStride The stride of the buffer.
223 : */
224 : template<bool aTranspose>
225 : static void
226 0 : BoxBlur(uint8_t* aData,
227 : const int32_t aLobes[3][2],
228 : int32_t aWidth,
229 : int32_t aRows,
230 : int32_t aStride,
231 : IntRect aSkipRect)
232 : {
233 : if (aTranspose) {
234 0 : swap(aWidth, aRows);
235 0 : swap(aSkipRect.x, aSkipRect.y);
236 0 : swap(aSkipRect.width, aSkipRect.height);
237 : }
238 :
239 0 : MOZ_ASSERT(aWidth > 0);
240 :
241 : // All three passes of the box blur that approximate the Gaussian are done
242 : // on each row in turn, so we only need two temporary row buffers to process
243 : // each row, instead of a full-sized buffer. Data moves from the source to the
244 : // first temporary, from the first temporary to the second, then from the second
245 : // back to the destination. This way is more cache-friendly than processing whe
246 : // whole buffer in each pass and thus yields a nice speedup.
247 0 : uint8_t* tmpRow = new (std::nothrow) uint8_t[2 * aWidth];
248 0 : if (!tmpRow) {
249 0 : return;
250 : }
251 0 : uint8_t* tmpRow2 = tmpRow + aWidth;
252 :
253 0 : const int32_t stride = aTranspose ? 1 : aStride;
254 0 : bool skipRectCoversWholeRow = 0 >= aSkipRect.x &&
255 0 : aWidth <= aSkipRect.XMost();
256 :
257 0 : for (int32_t y = 0; y < aRows; y++) {
258 : // Check whether the skip rect intersects this row. If the skip
259 : // rect covers the whole surface in this row, we can avoid
260 : // this row entirely (and any others along the skip rect).
261 0 : bool inSkipRectY = y >= aSkipRect.y &&
262 0 : y < aSkipRect.YMost();
263 0 : if (inSkipRectY && skipRectCoversWholeRow) {
264 0 : aData += stride * (aSkipRect.YMost() - y);
265 0 : y = aSkipRect.YMost() - 1;
266 0 : continue;
267 : }
268 :
269 : // Read in data from the source transposed if necessary.
270 0 : BoxBlurRow<aTranspose, false>(aData, tmpRow, aLobes[0][0], aLobes[0][1], aWidth, aStride, 0, aWidth);
271 :
272 : // For the middle pass, the data is already pre-transposed and does not need to be post-transposed yet.
273 0 : BoxBlurRow<false, false>(tmpRow, tmpRow2, aLobes[1][0], aLobes[1][1], aWidth, aStride, 0, aWidth);
274 :
275 : // Write back data to the destination transposed if necessary too.
276 : // Make sure not to overwrite the skip rect by only outputting to the
277 : // destination before and after the skip rect, if requested.
278 0 : int32_t skipStart = inSkipRectY ? min(max(aSkipRect.x, 0), aWidth) : aWidth;
279 0 : int32_t skipEnd = max(skipStart, aSkipRect.XMost());
280 0 : if (skipStart > 0) {
281 0 : BoxBlurRow<false, aTranspose>(tmpRow2, aData, aLobes[2][0], aLobes[2][1], aWidth, aStride, 0, skipStart);
282 : }
283 0 : if (skipEnd < aWidth) {
284 0 : BoxBlurRow<false, aTranspose>(tmpRow2, aData, aLobes[2][0], aLobes[2][1], aWidth, aStride, skipEnd, aWidth);
285 : }
286 :
287 0 : aData += stride;
288 : }
289 :
290 0 : delete[] tmpRow;
291 : }
292 :
293 2 : static void ComputeLobes(int32_t aRadius, int32_t aLobes[3][2])
294 : {
295 : int32_t major, minor, final;
296 :
297 : /* See http://www.w3.org/TR/SVG/filters.html#feGaussianBlur for
298 : * some notes about approximating the Gaussian blur with box-blurs.
299 : * The comments below are in the terminology of that page.
300 : */
301 2 : int32_t z = aRadius / 3;
302 2 : switch (aRadius % 3) {
303 : case 0:
304 : // aRadius = z*3; choose d = 2*z + 1
305 0 : major = minor = final = z;
306 0 : break;
307 : case 1:
308 : // aRadius = z*3 + 1
309 : // This is a tricky case since there is no value of d which will
310 : // yield a radius of exactly aRadius. If d is odd, i.e. d=2*k + 1
311 : // for some integer k, then the radius will be 3*k. If d is even,
312 : // i.e. d=2*k, then the radius will be 3*k - 1.
313 : // So we have to choose values that don't match the standard
314 : // algorithm.
315 0 : major = z + 1;
316 0 : minor = final = z;
317 0 : break;
318 : case 2:
319 : // aRadius = z*3 + 2; choose d = 2*z + 2
320 2 : major = final = z + 1;
321 2 : minor = z;
322 2 : break;
323 : default:
324 : // Mathematical impossibility!
325 0 : MOZ_ASSERT(false);
326 : major = minor = final = 0;
327 : }
328 2 : MOZ_ASSERT(major + minor + final == aRadius);
329 :
330 2 : aLobes[0][0] = major;
331 2 : aLobes[0][1] = minor;
332 2 : aLobes[1][0] = minor;
333 2 : aLobes[1][1] = major;
334 2 : aLobes[2][0] = final;
335 2 : aLobes[2][1] = final;
336 2 : }
337 :
338 : static void
339 0 : SpreadHorizontal(uint8_t* aInput,
340 : uint8_t* aOutput,
341 : int32_t aRadius,
342 : int32_t aWidth,
343 : int32_t aRows,
344 : int32_t aStride,
345 : const IntRect& aSkipRect)
346 : {
347 0 : if (aRadius == 0) {
348 0 : memcpy(aOutput, aInput, aStride * aRows);
349 0 : return;
350 : }
351 :
352 0 : bool skipRectCoversWholeRow = 0 >= aSkipRect.x &&
353 0 : aWidth <= aSkipRect.XMost();
354 0 : for (int32_t y = 0; y < aRows; y++) {
355 : // Check whether the skip rect intersects this row. If the skip
356 : // rect covers the whole surface in this row, we can avoid
357 : // this row entirely (and any others along the skip rect).
358 0 : bool inSkipRectY = y >= aSkipRect.y &&
359 0 : y < aSkipRect.YMost();
360 0 : if (inSkipRectY && skipRectCoversWholeRow) {
361 0 : y = aSkipRect.YMost() - 1;
362 0 : continue;
363 : }
364 :
365 0 : for (int32_t x = 0; x < aWidth; x++) {
366 : // Check whether we are within the skip rect. If so, go
367 : // to the next point outside the skip rect.
368 0 : if (inSkipRectY && x >= aSkipRect.x &&
369 0 : x < aSkipRect.XMost()) {
370 0 : x = aSkipRect.XMost();
371 0 : if (x >= aWidth)
372 0 : break;
373 : }
374 :
375 0 : int32_t sMin = max(x - aRadius, 0);
376 0 : int32_t sMax = min(x + aRadius, aWidth - 1);
377 0 : int32_t v = 0;
378 0 : for (int32_t s = sMin; s <= sMax; ++s) {
379 0 : v = max<int32_t>(v, aInput[aStride * y + s]);
380 : }
381 0 : aOutput[aStride * y + x] = v;
382 : }
383 : }
384 : }
385 :
386 : static void
387 0 : SpreadVertical(uint8_t* aInput,
388 : uint8_t* aOutput,
389 : int32_t aRadius,
390 : int32_t aWidth,
391 : int32_t aRows,
392 : int32_t aStride,
393 : const IntRect& aSkipRect)
394 : {
395 0 : if (aRadius == 0) {
396 0 : memcpy(aOutput, aInput, aStride * aRows);
397 0 : return;
398 : }
399 :
400 0 : bool skipRectCoversWholeColumn = 0 >= aSkipRect.y &&
401 0 : aRows <= aSkipRect.YMost();
402 0 : for (int32_t x = 0; x < aWidth; x++) {
403 0 : bool inSkipRectX = x >= aSkipRect.x &&
404 0 : x < aSkipRect.XMost();
405 0 : if (inSkipRectX && skipRectCoversWholeColumn) {
406 0 : x = aSkipRect.XMost() - 1;
407 0 : continue;
408 : }
409 :
410 0 : for (int32_t y = 0; y < aRows; y++) {
411 : // Check whether we are within the skip rect. If so, go
412 : // to the next point outside the skip rect.
413 0 : if (inSkipRectX && y >= aSkipRect.y &&
414 0 : y < aSkipRect.YMost()) {
415 0 : y = aSkipRect.YMost();
416 0 : if (y >= aRows)
417 0 : break;
418 : }
419 :
420 0 : int32_t sMin = max(y - aRadius, 0);
421 0 : int32_t sMax = min(y + aRadius, aRows - 1);
422 0 : int32_t v = 0;
423 0 : for (int32_t s = sMin; s <= sMax; ++s) {
424 0 : v = max<int32_t>(v, aInput[aStride * s + x]);
425 : }
426 0 : aOutput[aStride * y + x] = v;
427 : }
428 : }
429 : }
430 :
431 : CheckedInt<int32_t>
432 5 : AlphaBoxBlur::RoundUpToMultipleOf4(int32_t aVal)
433 : {
434 5 : CheckedInt<int32_t> val(aVal);
435 :
436 5 : val += 3;
437 5 : val /= 4;
438 5 : val *= 4;
439 :
440 5 : return val;
441 : }
442 :
443 0 : AlphaBoxBlur::AlphaBoxBlur(const Rect& aRect,
444 : const IntSize& aSpreadRadius,
445 : const IntSize& aBlurRadius,
446 : const Rect* aDirtyRect,
447 0 : const Rect* aSkipRect)
448 0 : : mSurfaceAllocationSize(0)
449 : {
450 0 : Init(aRect, aSpreadRadius, aBlurRadius, aDirtyRect, aSkipRect);
451 0 : }
452 :
453 11 : AlphaBoxBlur::AlphaBoxBlur()
454 11 : : mSurfaceAllocationSize(0)
455 : {
456 11 : }
457 :
458 : void
459 1 : AlphaBoxBlur::Init(const Rect& aRect,
460 : const IntSize& aSpreadRadius,
461 : const IntSize& aBlurRadius,
462 : const Rect* aDirtyRect,
463 : const Rect* aSkipRect)
464 : {
465 1 : mSpreadRadius = aSpreadRadius;
466 1 : mBlurRadius = aBlurRadius;
467 :
468 1 : Rect rect(aRect);
469 1 : rect.Inflate(Size(aBlurRadius + aSpreadRadius));
470 1 : rect.RoundOut();
471 :
472 1 : if (aDirtyRect) {
473 : // If we get passed a dirty rect from layout, we can minimize the
474 : // shadow size and make painting faster.
475 0 : mHasDirtyRect = true;
476 0 : mDirtyRect = *aDirtyRect;
477 0 : Rect requiredBlurArea = mDirtyRect.Intersect(rect);
478 0 : requiredBlurArea.Inflate(Size(aBlurRadius + aSpreadRadius));
479 0 : rect = requiredBlurArea.Intersect(rect);
480 : } else {
481 1 : mHasDirtyRect = false;
482 : }
483 :
484 1 : mRect = TruncatedToInt(rect);
485 1 : if (mRect.IsEmpty()) {
486 0 : return;
487 : }
488 :
489 1 : if (aSkipRect) {
490 : // If we get passed a skip rect, we can lower the amount of
491 : // blurring/spreading we need to do. We convert it to IntRect to avoid
492 : // expensive int<->float conversions if we were to use Rect instead.
493 0 : Rect skipRect = *aSkipRect;
494 0 : skipRect.Deflate(Size(aBlurRadius + aSpreadRadius));
495 0 : mSkipRect = RoundedIn(skipRect);
496 0 : mSkipRect = mSkipRect.Intersect(mRect);
497 0 : if (mSkipRect.IsEqualInterior(mRect))
498 0 : return;
499 :
500 0 : mSkipRect -= mRect.TopLeft();
501 : } else {
502 1 : mSkipRect = IntRect(0, 0, 0, 0);
503 : }
504 :
505 1 : CheckedInt<int32_t> stride = RoundUpToMultipleOf4(mRect.width);
506 1 : if (stride.isValid()) {
507 1 : mStride = stride.value();
508 :
509 : // We need to leave room for an additional 3 bytes for a potential overrun
510 : // in our blurring code.
511 1 : size_t size = BufferSizeFromStrideAndHeight(mStride, mRect.height, 3);
512 1 : if (size != 0) {
513 1 : mSurfaceAllocationSize = size;
514 : }
515 : }
516 : }
517 :
518 0 : AlphaBoxBlur::AlphaBoxBlur(const Rect& aRect,
519 : int32_t aStride,
520 : float aSigmaX,
521 0 : float aSigmaY)
522 : : mRect(TruncatedToInt(aRect)),
523 : mSpreadRadius(),
524 0 : mBlurRadius(CalculateBlurRadius(Point(aSigmaX, aSigmaY))),
525 : mStride(aStride),
526 0 : mSurfaceAllocationSize(0)
527 : {
528 0 : IntRect intRect;
529 0 : if (aRect.ToIntRect(&intRect)) {
530 0 : size_t minDataSize = BufferSizeFromStrideAndHeight(intRect.width, intRect.height);
531 0 : if (minDataSize != 0) {
532 0 : mSurfaceAllocationSize = minDataSize;
533 : }
534 : }
535 0 : }
536 :
537 :
538 11 : AlphaBoxBlur::~AlphaBoxBlur()
539 : {
540 11 : }
541 :
542 : IntSize
543 5 : AlphaBoxBlur::GetSize()
544 : {
545 5 : IntSize size(mRect.width, mRect.height);
546 5 : return size;
547 : }
548 :
549 : int32_t
550 2 : AlphaBoxBlur::GetStride()
551 : {
552 2 : return mStride;
553 : }
554 :
555 : IntRect
556 2 : AlphaBoxBlur::GetRect()
557 : {
558 2 : return mRect;
559 : }
560 :
561 : Rect*
562 0 : AlphaBoxBlur::GetDirtyRect()
563 : {
564 0 : if (mHasDirtyRect) {
565 0 : return &mDirtyRect;
566 : }
567 :
568 0 : return nullptr;
569 : }
570 :
571 : size_t
572 1 : AlphaBoxBlur::GetSurfaceAllocationSize() const
573 : {
574 1 : return mSurfaceAllocationSize;
575 : }
576 :
577 : void
578 1 : AlphaBoxBlur::Blur(uint8_t* aData)
579 : {
580 1 : if (!aData) {
581 0 : return;
582 : }
583 :
584 : // no need to do all this if not blurring or spreading
585 1 : if (mBlurRadius != IntSize(0,0) || mSpreadRadius != IntSize(0,0)) {
586 1 : int32_t stride = GetStride();
587 :
588 1 : IntSize size = GetSize();
589 :
590 1 : if (mSpreadRadius.width > 0 || mSpreadRadius.height > 0) {
591 : // No need to use CheckedInt here - we have validated it in the constructor.
592 0 : size_t szB = stride * size.height;
593 0 : uint8_t* tmpData = new (std::nothrow) uint8_t[szB];
594 :
595 0 : if (!tmpData) {
596 0 : return;
597 : }
598 :
599 0 : memset(tmpData, 0, szB);
600 :
601 0 : SpreadHorizontal(aData, tmpData, mSpreadRadius.width, size.width, size.height, stride, mSkipRect);
602 0 : SpreadVertical(tmpData, aData, mSpreadRadius.height, size.width, size.height, stride, mSkipRect);
603 :
604 0 : delete [] tmpData;
605 : }
606 :
607 : int32_t horizontalLobes[3][2];
608 1 : ComputeLobes(mBlurRadius.width, horizontalLobes);
609 : int32_t verticalLobes[3][2];
610 1 : ComputeLobes(mBlurRadius.height, verticalLobes);
611 :
612 : // We want to allow for some extra space on the left for alignment reasons.
613 1 : int32_t maxLeftLobe = RoundUpToMultipleOf4(horizontalLobes[0][0] + 1).value();
614 :
615 1 : IntSize integralImageSize(size.width + maxLeftLobe + horizontalLobes[1][1],
616 2 : size.height + verticalLobes[0][0] + verticalLobes[1][1] + 1);
617 :
618 1 : if ((integralImageSize.width * integralImageSize.height) > (1 << 24)) {
619 : // Fallback to old blurring code when the surface is so large it may
620 : // overflow our integral image!
621 0 : if (mBlurRadius.width > 0) {
622 0 : BoxBlur<false>(aData, horizontalLobes, size.width, size.height, stride, mSkipRect);
623 : }
624 0 : if (mBlurRadius.height > 0) {
625 0 : BoxBlur<true>(aData, verticalLobes, size.width, size.height, stride, mSkipRect);
626 : }
627 : } else {
628 1 : size_t integralImageStride = GetAlignedStride<16>(integralImageSize.width, 4);
629 1 : if (integralImageStride == 0) {
630 0 : return;
631 : }
632 :
633 : // We need to leave room for an additional 12 bytes for a maximum overrun
634 : // of 3 pixels in the blurring code.
635 1 : size_t bufLen = BufferSizeFromStrideAndHeight(integralImageStride, integralImageSize.height, 12);
636 1 : if (bufLen == 0) {
637 0 : return;
638 : }
639 : // bufLen is a byte count, but here we want a multiple of 32-bit ints, so
640 : // we divide by 4.
641 2 : AlignedArray<uint32_t> integralImage((bufLen / 4) + ((bufLen % 4) ? 1 : 0));
642 :
643 1 : if (!integralImage) {
644 0 : return;
645 : }
646 :
647 : #ifdef USE_SSE2
648 1 : if (Factory::HasSSE2()) {
649 1 : BoxBlur_SSE2(aData, horizontalLobes[0][0], horizontalLobes[0][1], verticalLobes[0][0],
650 1 : verticalLobes[0][1], integralImage, integralImageStride);
651 1 : BoxBlur_SSE2(aData, horizontalLobes[1][0], horizontalLobes[1][1], verticalLobes[1][0],
652 1 : verticalLobes[1][1], integralImage, integralImageStride);
653 1 : BoxBlur_SSE2(aData, horizontalLobes[2][0], horizontalLobes[2][1], verticalLobes[2][0],
654 1 : verticalLobes[2][1], integralImage, integralImageStride);
655 : } else
656 : #endif
657 : #ifdef BUILD_ARM_NEON
658 : if (mozilla::supports_neon()) {
659 : BoxBlur_NEON(aData, horizontalLobes[0][0], horizontalLobes[0][1], verticalLobes[0][0],
660 : verticalLobes[0][1], integralImage, integralImageStride);
661 : BoxBlur_NEON(aData, horizontalLobes[1][0], horizontalLobes[1][1], verticalLobes[1][0],
662 : verticalLobes[1][1], integralImage, integralImageStride);
663 : BoxBlur_NEON(aData, horizontalLobes[2][0], horizontalLobes[2][1], verticalLobes[2][0],
664 : verticalLobes[2][1], integralImage, integralImageStride);
665 : } else
666 : #endif
667 : {
668 : #ifdef _MIPS_ARCH_LOONGSON3A
669 : BoxBlur_LS3(aData, horizontalLobes[0][0], horizontalLobes[0][1], verticalLobes[0][0],
670 : verticalLobes[0][1], integralImage, integralImageStride);
671 : BoxBlur_LS3(aData, horizontalLobes[1][0], horizontalLobes[1][1], verticalLobes[1][0],
672 : verticalLobes[1][1], integralImage, integralImageStride);
673 : BoxBlur_LS3(aData, horizontalLobes[2][0], horizontalLobes[2][1], verticalLobes[2][0],
674 : verticalLobes[2][1], integralImage, integralImageStride);
675 : #else
676 0 : BoxBlur_C(aData, horizontalLobes[0][0], horizontalLobes[0][1], verticalLobes[0][0],
677 0 : verticalLobes[0][1], integralImage, integralImageStride);
678 0 : BoxBlur_C(aData, horizontalLobes[1][0], horizontalLobes[1][1], verticalLobes[1][0],
679 0 : verticalLobes[1][1], integralImage, integralImageStride);
680 0 : BoxBlur_C(aData, horizontalLobes[2][0], horizontalLobes[2][1], verticalLobes[2][0],
681 0 : verticalLobes[2][1], integralImage, integralImageStride);
682 : #endif
683 : }
684 : }
685 : }
686 : }
687 :
688 : MOZ_ALWAYS_INLINE void
689 0 : GenerateIntegralRow(uint32_t *aDest, const uint8_t *aSource, uint32_t *aPreviousRow,
690 : const uint32_t &aSourceWidth, const uint32_t &aLeftInflation, const uint32_t &aRightInflation)
691 : {
692 0 : uint32_t currentRowSum = 0;
693 0 : uint32_t pixel = aSource[0];
694 0 : for (uint32_t x = 0; x < aLeftInflation; x++) {
695 0 : currentRowSum += pixel;
696 0 : *aDest++ = currentRowSum + *aPreviousRow++;
697 : }
698 0 : for (uint32_t x = aLeftInflation; x < (aSourceWidth + aLeftInflation); x += 4) {
699 0 : uint32_t alphaValues = *(uint32_t*)(aSource + (x - aLeftInflation));
700 : #if defined WORDS_BIGENDIAN || defined IS_BIG_ENDIAN || defined __BIG_ENDIAN__
701 : currentRowSum += (alphaValues >> 24) & 0xff;
702 : *aDest++ = *aPreviousRow++ + currentRowSum;
703 : currentRowSum += (alphaValues >> 16) & 0xff;
704 : *aDest++ = *aPreviousRow++ + currentRowSum;
705 : currentRowSum += (alphaValues >> 8) & 0xff;
706 : *aDest++ = *aPreviousRow++ + currentRowSum;
707 : currentRowSum += alphaValues & 0xff;
708 : *aDest++ = *aPreviousRow++ + currentRowSum;
709 : #else
710 0 : currentRowSum += alphaValues & 0xff;
711 0 : *aDest++ = *aPreviousRow++ + currentRowSum;
712 0 : alphaValues >>= 8;
713 0 : currentRowSum += alphaValues & 0xff;
714 0 : *aDest++ = *aPreviousRow++ + currentRowSum;
715 0 : alphaValues >>= 8;
716 0 : currentRowSum += alphaValues & 0xff;
717 0 : *aDest++ = *aPreviousRow++ + currentRowSum;
718 0 : alphaValues >>= 8;
719 0 : currentRowSum += alphaValues & 0xff;
720 0 : *aDest++ = *aPreviousRow++ + currentRowSum;
721 : #endif
722 : }
723 0 : pixel = aSource[aSourceWidth - 1];
724 0 : for (uint32_t x = (aSourceWidth + aLeftInflation); x < (aSourceWidth + aLeftInflation + aRightInflation); x++) {
725 0 : currentRowSum += pixel;
726 0 : *aDest++ = currentRowSum + *aPreviousRow++;
727 : }
728 0 : }
729 :
730 : MOZ_ALWAYS_INLINE void
731 0 : GenerateIntegralImage_C(int32_t aLeftInflation, int32_t aRightInflation,
732 : int32_t aTopInflation, int32_t aBottomInflation,
733 : uint32_t *aIntegralImage, size_t aIntegralImageStride,
734 : uint8_t *aSource, int32_t aSourceStride, const IntSize &aSize)
735 : {
736 0 : uint32_t stride32bit = aIntegralImageStride / 4;
737 :
738 0 : IntSize integralImageSize(aSize.width + aLeftInflation + aRightInflation,
739 0 : aSize.height + aTopInflation + aBottomInflation);
740 :
741 0 : memset(aIntegralImage, 0, aIntegralImageStride);
742 :
743 0 : GenerateIntegralRow(aIntegralImage, aSource, aIntegralImage,
744 0 : aSize.width, aLeftInflation, aRightInflation);
745 0 : for (int y = 1; y < aTopInflation + 1; y++) {
746 0 : GenerateIntegralRow(aIntegralImage + (y * stride32bit), aSource, aIntegralImage + (y - 1) * stride32bit,
747 0 : aSize.width, aLeftInflation, aRightInflation);
748 : }
749 :
750 0 : for (int y = aTopInflation + 1; y < (aSize.height + aTopInflation); y++) {
751 0 : GenerateIntegralRow(aIntegralImage + (y * stride32bit), aSource + aSourceStride * (y - aTopInflation),
752 0 : aIntegralImage + (y - 1) * stride32bit, aSize.width, aLeftInflation, aRightInflation);
753 : }
754 :
755 0 : if (aBottomInflation) {
756 0 : for (int y = (aSize.height + aTopInflation); y < integralImageSize.height; y++) {
757 0 : GenerateIntegralRow(aIntegralImage + (y * stride32bit), aSource + ((aSize.height - 1) * aSourceStride),
758 0 : aIntegralImage + (y - 1) * stride32bit,
759 0 : aSize.width, aLeftInflation, aRightInflation);
760 : }
761 : }
762 0 : }
763 :
764 : /**
765 : * Attempt to do an in-place box blur using an integral image.
766 : */
767 : void
768 0 : AlphaBoxBlur::BoxBlur_C(uint8_t* aData,
769 : int32_t aLeftLobe,
770 : int32_t aRightLobe,
771 : int32_t aTopLobe,
772 : int32_t aBottomLobe,
773 : uint32_t *aIntegralImage,
774 : size_t aIntegralImageStride)
775 : {
776 0 : IntSize size = GetSize();
777 :
778 0 : MOZ_ASSERT(size.width > 0);
779 :
780 : // Our 'left' or 'top' lobe will include the current pixel. i.e. when
781 : // looking at an integral image the value of a pixel at 'x,y' is calculated
782 : // using the value of the integral image values above/below that.
783 0 : aLeftLobe++;
784 0 : aTopLobe++;
785 0 : int32_t boxSize = (aLeftLobe + aRightLobe) * (aTopLobe + aBottomLobe);
786 :
787 0 : MOZ_ASSERT(boxSize > 0);
788 :
789 0 : if (boxSize == 1) {
790 0 : return;
791 : }
792 :
793 0 : int32_t stride32bit = aIntegralImageStride / 4;
794 :
795 0 : int32_t leftInflation = RoundUpToMultipleOf4(aLeftLobe).value();
796 :
797 0 : GenerateIntegralImage_C(leftInflation, aRightLobe, aTopLobe, aBottomLobe,
798 : aIntegralImage, aIntegralImageStride, aData,
799 0 : mStride, size);
800 :
801 0 : uint32_t reciprocal = uint32_t((uint64_t(1) << 32) / boxSize);
802 :
803 0 : uint32_t *innerIntegral = aIntegralImage + (aTopLobe * stride32bit) + leftInflation;
804 :
805 : // Storing these locally makes this about 30% faster! Presumably the compiler
806 : // can't be sure we're not altering the member variables in this loop.
807 0 : IntRect skipRect = mSkipRect;
808 0 : uint8_t *data = aData;
809 0 : int32_t stride = mStride;
810 0 : for (int32_t y = 0; y < size.height; y++) {
811 0 : bool inSkipRectY = y > skipRect.y && y < skipRect.YMost();
812 :
813 0 : uint32_t *topLeftBase = innerIntegral + ((y - aTopLobe) * stride32bit - aLeftLobe);
814 0 : uint32_t *topRightBase = innerIntegral + ((y - aTopLobe) * stride32bit + aRightLobe);
815 0 : uint32_t *bottomRightBase = innerIntegral + ((y + aBottomLobe) * stride32bit + aRightLobe);
816 0 : uint32_t *bottomLeftBase = innerIntegral + ((y + aBottomLobe) * stride32bit - aLeftLobe);
817 :
818 0 : for (int32_t x = 0; x < size.width; x++) {
819 0 : if (inSkipRectY && x > skipRect.x && x < skipRect.XMost()) {
820 0 : x = skipRect.XMost() - 1;
821 : // Trigger early jump on coming loop iterations, this will be reset
822 : // next line anyway.
823 0 : inSkipRectY = false;
824 0 : continue;
825 : }
826 0 : int32_t topLeft = topLeftBase[x];
827 0 : int32_t topRight = topRightBase[x];
828 0 : int32_t bottomRight = bottomRightBase[x];
829 0 : int32_t bottomLeft = bottomLeftBase[x];
830 :
831 0 : uint32_t value = bottomRight - topRight - bottomLeft;
832 0 : value += topLeft;
833 :
834 0 : data[stride * y + x] = (uint64_t(reciprocal) * value + (uint64_t(1) << 31)) >> 32;
835 : }
836 : }
837 : }
838 :
839 : /**
840 : * Compute the box blur size (which we're calling the blur radius) from
841 : * the standard deviation.
842 : *
843 : * Much of this, the 3 * sqrt(2 * pi) / 4, is the known value for
844 : * approximating a Gaussian using box blurs. This yields quite a good
845 : * approximation for a Gaussian. Then we multiply this by 1.5 since our
846 : * code wants the radius of the entire triple-box-blur kernel instead of
847 : * the diameter of an individual box blur. For more details, see:
848 : * http://www.w3.org/TR/SVG11/filters.html#feGaussianBlurElement
849 : * https://bugzilla.mozilla.org/show_bug.cgi?id=590039#c19
850 : */
851 : static const Float GAUSSIAN_SCALE_FACTOR = Float((3 * sqrt(2 * M_PI) / 4) * 1.5);
852 :
853 : IntSize
854 102 : AlphaBoxBlur::CalculateBlurRadius(const Point& aStd)
855 : {
856 102 : IntSize size(static_cast<int32_t>(floor(aStd.x * GAUSSIAN_SCALE_FACTOR + 0.5f)),
857 204 : static_cast<int32_t>(floor(aStd.y * GAUSSIAN_SCALE_FACTOR + 0.5f)));
858 :
859 102 : return size;
860 : }
861 :
862 : Float
863 0 : AlphaBoxBlur::CalculateBlurSigma(int32_t aBlurRadius)
864 : {
865 0 : return aBlurRadius / GAUSSIAN_SCALE_FACTOR;
866 : }
867 :
868 : } // namespace gfx
869 : } // namespace mozilla
|