LCOV - code coverage report
Current view: top level - gfx/2d - Blur.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 81 364 22.3 %
Date: 2017-07-14 16:53:18 Functions: 11 25 44.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
       2             : /* vim: set ts=8 sts=2 et sw=2 tw=80: */
       3             : /* This Source Code Form is subject to the terms of the Mozilla Public
       4             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       5             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
       6             : 
       7             : #include "Blur.h"
       8             : 
       9             : #include <algorithm>
      10             : #include <math.h>
      11             : #include <string.h>
      12             : 
      13             : #include "mozilla/CheckedInt.h"
      14             : 
      15             : #include "2D.h"
      16             : #include "DataSurfaceHelpers.h"
      17             : #include "Tools.h"
      18             : 
      19             : #ifdef BUILD_ARM_NEON
      20             : #include "mozilla/arm.h"
      21             : #endif
      22             : 
      23             : using namespace std;
      24             : 
      25             : namespace mozilla {
      26             : namespace gfx {
      27             : 
      28             : /**
      29             :  * Helper function to process each row of the box blur.
      30             :  * It takes care of transposing the data on input or output depending
      31             :  * on whether we intend a horizontal or vertical blur, and whether we're
      32             :  * reading from the initial source or writing to the final destination.
      33             :  * It allows starting or ending anywhere within the row to accomodate
      34             :  * a skip rect.
      35             :  */
      36             : template<bool aTransposeInput, bool aTransposeOutput>
      37             : static inline void
      38           0 : BoxBlurRow(const uint8_t* aInput,
      39             :            uint8_t* aOutput,
      40             :            int32_t aLeftLobe,
      41             :            int32_t aRightLobe,
      42             :            int32_t aWidth,
      43             :            int32_t aStride,
      44             :            int32_t aStart,
      45             :            int32_t aEnd)
      46             : {
      47             :   // If the input or output is transposed, then we will move down a row
      48             :   // for each step, instead of moving over a column. Since these values
      49             :   // only depend on a template parameter, they will more easily get
      50             :   // copy-propagated in the non-transposed case, which is why they
      51             :   // are not passed as parameters.
      52           0 :   const int32_t inputStep = aTransposeInput ? aStride : 1;
      53           0 :   const int32_t outputStep = aTransposeOutput ? aStride : 1;
      54             : 
      55             :   // We need to sample aLeftLobe pixels to the left and aRightLobe pixels
      56             :   // to the right of the current position, then average them. So this is
      57             :   // the size of the total width of this filter.
      58           0 :   const int32_t boxSize = aLeftLobe + aRightLobe + 1;
      59             : 
      60             :   // Instead of dividing the pixel sum by boxSize to average, we can just
      61             :   // compute a scale that will normalize the result so that it can be quickly
      62             :   // shifted into the desired range.
      63           0 :   const uint32_t reciprocal = (1 << 24) / boxSize;
      64             : 
      65             :   // The shift would normally truncate the result, whereas we would rather
      66             :   // prefer to round the result to the closest increment. By adding 0.5 units
      67             :   // to the initial sum, we bias the sum so that it will be rounded by the
      68             :   // truncation instead.
      69           0 :   uint32_t alphaSum = (boxSize + 1) / 2;
      70             : 
      71             :   // We process the row with a moving filter, keeping a sum (alphaSum) of
      72             :   // boxSize pixels. As we move over a pixel, we need to add on a pixel
      73             :   // from the right extreme of the window that moved into range, and subtract
      74             :   // off a pixel from the left extreme of window that moved out of range.
      75             :   // But first, we need to initialization alphaSum to the contents of
      76             :   // the window before we can get going. If the window moves out of bounds
      77             :   // of the row, we clamp each sample to be the closest pixel from within
      78             :   // row bounds, so the 0th and aWidth-1th pixel.
      79           0 :   int32_t initLeft = aStart - aLeftLobe;
      80           0 :   if (initLeft < 0) {
      81             :     // If the left lobe samples before the row, add in clamped samples.
      82           0 :     alphaSum += -initLeft * aInput[0];
      83           0 :     initLeft = 0;
      84             :   }
      85           0 :   int32_t initRight = aStart + boxSize - aLeftLobe;
      86           0 :   if (initRight > aWidth) {
      87             :     // If the right lobe samples after the row, add in clamped samples.
      88           0 :     alphaSum += (initRight - aWidth) * aInput[(aWidth - 1) * inputStep];
      89           0 :     initRight = aWidth;
      90             :   }
      91             :   // Finally, add in all the valid, non-clamped samples to fill up the
      92             :   // rest of the window.
      93           0 :   const uint8_t* src = &aInput[initLeft * inputStep];
      94           0 :   const uint8_t* iterEnd = &aInput[initRight * inputStep];
      95             : 
      96             :   #define INIT_ITER \
      97             :     alphaSum += *src; \
      98             :     src += inputStep;
      99             : 
     100             :   // We unroll the per-pixel loop here substantially. The amount of work
     101             :   // done per sample is so small that the cost of a loop condition check
     102             :   // and a branch can substantially add to or even dominate the performance
     103             :   // of the loop.
     104           0 :   while (src + 16 * inputStep <= iterEnd) {
     105           0 :     INIT_ITER; INIT_ITER; INIT_ITER; INIT_ITER;
     106           0 :     INIT_ITER; INIT_ITER; INIT_ITER; INIT_ITER;
     107           0 :     INIT_ITER; INIT_ITER; INIT_ITER; INIT_ITER;
     108           0 :     INIT_ITER; INIT_ITER; INIT_ITER; INIT_ITER;
     109             :   }
     110           0 :   while (src < iterEnd) {
     111           0 :     INIT_ITER;
     112             :   }
     113             : 
     114             :   // Now we start moving the window over the row. We will be accessing
     115             :   // pixels form aStart - aLeftLobe up to aEnd + aRightLobe, which may be
     116             :   // out of bounds of the row. To avoid having to check within the inner
     117             :   // loops if we are in bound, we instead compute the points at which
     118             :   // we will move out of bounds of the row on the left side (splitLeft)
     119             :   // and right side (splitRight).
     120           0 :   int32_t splitLeft = min(max(aLeftLobe, aStart), aEnd);
     121           0 :   int32_t splitRight = min(max(aWidth - (boxSize - aLeftLobe), aStart), aEnd);
     122             :   // If the filter window is actually large than the size of the row,
     123             :   // there will be a middle area of overlap where the leftmost and rightmost
     124             :   // pixel of the filter will both be outside the row. In this case, we need
     125             :   // to invert the splits so that splitLeft <= splitRight.
     126           0 :   if (boxSize > aWidth) {
     127           0 :     swap(splitLeft, splitRight);
     128             :   }
     129             : 
     130             :   // Process all pixels up to splitLeft that would sample before the start of the row.
     131             :   // Note that because inputStep and outputStep may not be a const 1 value, it is more
     132             :   // performant to increment pointers here for the source and destination rather than
     133             :   // use a loop counter, since doing so would entail an expensive multiplication that
     134             :   // significantly slows down the loop.
     135           0 :   uint8_t* dst = &aOutput[aStart * outputStep];
     136           0 :   iterEnd = &aOutput[splitLeft * outputStep];
     137           0 :   src = &aInput[(aStart + boxSize - aLeftLobe) * inputStep];
     138           0 :   uint8_t firstVal = aInput[0];
     139             : 
     140             :   #define LEFT_ITER \
     141             :     *dst = (alphaSum * reciprocal) >> 24; \
     142             :     alphaSum += *src - firstVal; \
     143             :     dst += outputStep; \
     144             :     src += inputStep;
     145             : 
     146           0 :   while (dst + 16 * outputStep <= iterEnd) {
     147           0 :     LEFT_ITER; LEFT_ITER; LEFT_ITER; LEFT_ITER;
     148           0 :     LEFT_ITER; LEFT_ITER; LEFT_ITER; LEFT_ITER;
     149           0 :     LEFT_ITER; LEFT_ITER; LEFT_ITER; LEFT_ITER;
     150           0 :     LEFT_ITER; LEFT_ITER; LEFT_ITER; LEFT_ITER;
     151             :   }
     152           0 :   while (dst < iterEnd) {
     153           0 :     LEFT_ITER;
     154             :   }
     155             : 
     156             :   // Process all pixels between splitLeft and splitRight.
     157           0 :   iterEnd = &aOutput[splitRight * outputStep];
     158           0 :   if (boxSize <= aWidth) {
     159             :     // The filter window is smaller than the row size, so the leftmost and rightmost
     160             :     // samples are both within row bounds.
     161           0 :     src = &aInput[(splitLeft - aLeftLobe) * inputStep];
     162           0 :     int32_t boxStep = boxSize * inputStep;
     163             : 
     164             :     #define CENTER_ITER \
     165             :       *dst = (alphaSum * reciprocal) >> 24; \
     166             :       alphaSum += src[boxStep] - *src; \
     167             :       dst += outputStep; \
     168             :       src += inputStep;
     169             : 
     170           0 :     while (dst +  16 * outputStep <= iterEnd) {
     171           0 :       CENTER_ITER; CENTER_ITER; CENTER_ITER; CENTER_ITER;
     172           0 :       CENTER_ITER; CENTER_ITER; CENTER_ITER; CENTER_ITER;
     173           0 :       CENTER_ITER; CENTER_ITER; CENTER_ITER; CENTER_ITER;
     174           0 :       CENTER_ITER; CENTER_ITER; CENTER_ITER; CENTER_ITER;
     175             :     }
     176           0 :     while (dst < iterEnd) {
     177           0 :       CENTER_ITER;
     178             :     }
     179             :   } else {
     180             :     // The filter window is larger than the row size, and we're in the area of split
     181             :     // overlap. So the leftmost and rightmost samples are both out of bounds and need
     182             :     // to be clamped. We can just precompute the difference here consequently.
     183           0 :     int32_t firstLastDiff = aInput[(aWidth -1) * inputStep] - aInput[0];
     184           0 :     while (dst < iterEnd) {
     185           0 :       *dst = (alphaSum * reciprocal) >> 24;
     186           0 :       alphaSum += firstLastDiff;
     187           0 :       dst += outputStep;
     188             :     }
     189             :   }
     190             : 
     191             :   // Process all remaining pixels after splitRight that would sample after the row end.
     192           0 :   iterEnd = &aOutput[aEnd * outputStep];
     193           0 :   src = &aInput[(splitRight - aLeftLobe) * inputStep];
     194           0 :   uint8_t lastVal = aInput[(aWidth - 1) * inputStep];
     195             : 
     196             :   #define RIGHT_ITER \
     197             :     *dst = (alphaSum * reciprocal) >> 24; \
     198             :     alphaSum += lastVal - *src; \
     199             :     dst += outputStep; \
     200             :     src += inputStep;
     201             : 
     202           0 :   while (dst + 16 * outputStep <= iterEnd) {
     203           0 :     RIGHT_ITER; RIGHT_ITER; RIGHT_ITER; RIGHT_ITER;
     204           0 :     RIGHT_ITER; RIGHT_ITER; RIGHT_ITER; RIGHT_ITER;
     205           0 :     RIGHT_ITER; RIGHT_ITER; RIGHT_ITER; RIGHT_ITER;
     206           0 :     RIGHT_ITER; RIGHT_ITER; RIGHT_ITER; RIGHT_ITER;
     207             :   }
     208           0 :   while (dst < iterEnd) {
     209           0 :     RIGHT_ITER;
     210             :   }
     211           0 : }
     212             : 
     213             : /**
     214             :  * Box blur involves looking at one pixel, and setting its value to the average
     215             :  * of its neighbouring pixels. This is meant to provide a 3-pass approximation of a
     216             :  * Gaussian blur.
     217             :  * @param aTranspose Whether to transpose the buffer when reading and writing to it.
     218             :  * @param aData The buffer to be blurred.
     219             :  * @param aLobes The number of pixels to blend on the left and right for each of 3 passes.
     220             :  * @param aWidth The number of columns in the buffers.
     221             :  * @param aRows The number of rows in the buffers.
     222             :  * @param aStride The stride of the buffer.
     223             :  */
     224             : template<bool aTranspose>
     225             : static void
     226           0 : BoxBlur(uint8_t* aData,
     227             :         const int32_t aLobes[3][2],
     228             :         int32_t aWidth,
     229             :         int32_t aRows,
     230             :         int32_t aStride,
     231             :         IntRect aSkipRect)
     232             : {
     233             :   if (aTranspose) {
     234           0 :     swap(aWidth, aRows);
     235           0 :     swap(aSkipRect.x, aSkipRect.y);
     236           0 :     swap(aSkipRect.width, aSkipRect.height);
     237             :   }
     238             : 
     239           0 :   MOZ_ASSERT(aWidth > 0);
     240             : 
     241             :   // All three passes of the box blur that approximate the Gaussian are done
     242             :   // on each row in turn, so we only need two temporary row buffers to process
     243             :   // each row, instead of a full-sized buffer. Data moves from the source to the
     244             :   // first temporary, from the first temporary to the second, then from the second
     245             :   // back to the destination. This way is more cache-friendly than processing whe
     246             :   // whole buffer in each pass and thus yields a nice speedup.
     247           0 :   uint8_t* tmpRow = new (std::nothrow) uint8_t[2 * aWidth];
     248           0 :   if (!tmpRow) {
     249           0 :     return;
     250             :   }
     251           0 :   uint8_t* tmpRow2 = tmpRow + aWidth;
     252             : 
     253           0 :   const int32_t stride = aTranspose ? 1 : aStride;
     254           0 :   bool skipRectCoversWholeRow = 0 >= aSkipRect.x &&
     255           0 :                                 aWidth <= aSkipRect.XMost();
     256             : 
     257           0 :   for (int32_t y = 0; y < aRows; y++) {
     258             :     // Check whether the skip rect intersects this row. If the skip
     259             :     // rect covers the whole surface in this row, we can avoid
     260             :     // this row entirely (and any others along the skip rect).
     261           0 :     bool inSkipRectY = y >= aSkipRect.y &&
     262           0 :                        y < aSkipRect.YMost();
     263           0 :     if (inSkipRectY && skipRectCoversWholeRow) {
     264           0 :       aData += stride * (aSkipRect.YMost() - y);
     265           0 :       y = aSkipRect.YMost() - 1;
     266           0 :       continue;
     267             :     }
     268             : 
     269             :     // Read in data from the source transposed if necessary.
     270           0 :     BoxBlurRow<aTranspose, false>(aData, tmpRow, aLobes[0][0], aLobes[0][1], aWidth, aStride, 0, aWidth);
     271             : 
     272             :     // For the middle pass, the data is already pre-transposed and does not need to be post-transposed yet.
     273           0 :     BoxBlurRow<false, false>(tmpRow, tmpRow2, aLobes[1][0], aLobes[1][1], aWidth, aStride, 0, aWidth);
     274             : 
     275             :     // Write back data to the destination transposed if necessary too.
     276             :     // Make sure not to overwrite the skip rect by only outputting to the
     277             :     // destination before and after the skip rect, if requested.
     278           0 :     int32_t skipStart = inSkipRectY ? min(max(aSkipRect.x, 0), aWidth) : aWidth;
     279           0 :     int32_t skipEnd = max(skipStart, aSkipRect.XMost());
     280           0 :     if (skipStart > 0) {
     281           0 :       BoxBlurRow<false, aTranspose>(tmpRow2, aData, aLobes[2][0], aLobes[2][1], aWidth, aStride, 0, skipStart);
     282             :     }
     283           0 :     if (skipEnd < aWidth) {
     284           0 :       BoxBlurRow<false, aTranspose>(tmpRow2, aData, aLobes[2][0], aLobes[2][1], aWidth, aStride, skipEnd, aWidth);
     285             :     }
     286             : 
     287           0 :     aData += stride;
     288             :   }
     289             : 
     290           0 :   delete[] tmpRow;
     291             : }
     292             : 
     293           2 : static void ComputeLobes(int32_t aRadius, int32_t aLobes[3][2])
     294             : {
     295             :     int32_t major, minor, final;
     296             : 
     297             :     /* See http://www.w3.org/TR/SVG/filters.html#feGaussianBlur for
     298             :      * some notes about approximating the Gaussian blur with box-blurs.
     299             :      * The comments below are in the terminology of that page.
     300             :      */
     301           2 :     int32_t z = aRadius / 3;
     302           2 :     switch (aRadius % 3) {
     303             :     case 0:
     304             :         // aRadius = z*3; choose d = 2*z + 1
     305           0 :         major = minor = final = z;
     306           0 :         break;
     307             :     case 1:
     308             :         // aRadius = z*3 + 1
     309             :         // This is a tricky case since there is no value of d which will
     310             :         // yield a radius of exactly aRadius. If d is odd, i.e. d=2*k + 1
     311             :         // for some integer k, then the radius will be 3*k. If d is even,
     312             :         // i.e. d=2*k, then the radius will be 3*k - 1.
     313             :         // So we have to choose values that don't match the standard
     314             :         // algorithm.
     315           0 :         major = z + 1;
     316           0 :         minor = final = z;
     317           0 :         break;
     318             :     case 2:
     319             :         // aRadius = z*3 + 2; choose d = 2*z + 2
     320           2 :         major = final = z + 1;
     321           2 :         minor = z;
     322           2 :         break;
     323             :     default:
     324             :         // Mathematical impossibility!
     325           0 :         MOZ_ASSERT(false);
     326             :         major = minor = final = 0;
     327             :     }
     328           2 :     MOZ_ASSERT(major + minor + final == aRadius);
     329             : 
     330           2 :     aLobes[0][0] = major;
     331           2 :     aLobes[0][1] = minor;
     332           2 :     aLobes[1][0] = minor;
     333           2 :     aLobes[1][1] = major;
     334           2 :     aLobes[2][0] = final;
     335           2 :     aLobes[2][1] = final;
     336           2 : }
     337             : 
     338             : static void
     339           0 : SpreadHorizontal(uint8_t* aInput,
     340             :                  uint8_t* aOutput,
     341             :                  int32_t aRadius,
     342             :                  int32_t aWidth,
     343             :                  int32_t aRows,
     344             :                  int32_t aStride,
     345             :                  const IntRect& aSkipRect)
     346             : {
     347           0 :     if (aRadius == 0) {
     348           0 :         memcpy(aOutput, aInput, aStride * aRows);
     349           0 :         return;
     350             :     }
     351             : 
     352           0 :     bool skipRectCoversWholeRow = 0 >= aSkipRect.x &&
     353           0 :                                     aWidth <= aSkipRect.XMost();
     354           0 :     for (int32_t y = 0; y < aRows; y++) {
     355             :         // Check whether the skip rect intersects this row. If the skip
     356             :         // rect covers the whole surface in this row, we can avoid
     357             :         // this row entirely (and any others along the skip rect).
     358           0 :         bool inSkipRectY = y >= aSkipRect.y &&
     359           0 :                              y < aSkipRect.YMost();
     360           0 :         if (inSkipRectY && skipRectCoversWholeRow) {
     361           0 :             y = aSkipRect.YMost() - 1;
     362           0 :             continue;
     363             :         }
     364             : 
     365           0 :         for (int32_t x = 0; x < aWidth; x++) {
     366             :             // Check whether we are within the skip rect. If so, go
     367             :             // to the next point outside the skip rect.
     368           0 :             if (inSkipRectY && x >= aSkipRect.x &&
     369           0 :                 x < aSkipRect.XMost()) {
     370           0 :                 x = aSkipRect.XMost();
     371           0 :                 if (x >= aWidth)
     372           0 :                     break;
     373             :             }
     374             : 
     375           0 :             int32_t sMin = max(x - aRadius, 0);
     376           0 :             int32_t sMax = min(x + aRadius, aWidth - 1);
     377           0 :             int32_t v = 0;
     378           0 :             for (int32_t s = sMin; s <= sMax; ++s) {
     379           0 :                 v = max<int32_t>(v, aInput[aStride * y + s]);
     380             :             }
     381           0 :             aOutput[aStride * y + x] = v;
     382             :         }
     383             :     }
     384             : }
     385             : 
     386             : static void
     387           0 : SpreadVertical(uint8_t* aInput,
     388             :                uint8_t* aOutput,
     389             :                int32_t aRadius,
     390             :                int32_t aWidth,
     391             :                int32_t aRows,
     392             :                int32_t aStride,
     393             :                const IntRect& aSkipRect)
     394             : {
     395           0 :     if (aRadius == 0) {
     396           0 :         memcpy(aOutput, aInput, aStride * aRows);
     397           0 :         return;
     398             :     }
     399             : 
     400           0 :     bool skipRectCoversWholeColumn = 0 >= aSkipRect.y &&
     401           0 :                                      aRows <= aSkipRect.YMost();
     402           0 :     for (int32_t x = 0; x < aWidth; x++) {
     403           0 :         bool inSkipRectX = x >= aSkipRect.x &&
     404           0 :                            x < aSkipRect.XMost();
     405           0 :         if (inSkipRectX && skipRectCoversWholeColumn) {
     406           0 :             x = aSkipRect.XMost() - 1;
     407           0 :             continue;
     408             :         }
     409             : 
     410           0 :         for (int32_t y = 0; y < aRows; y++) {
     411             :             // Check whether we are within the skip rect. If so, go
     412             :             // to the next point outside the skip rect.
     413           0 :             if (inSkipRectX && y >= aSkipRect.y &&
     414           0 :                 y < aSkipRect.YMost()) {
     415           0 :                 y = aSkipRect.YMost();
     416           0 :                 if (y >= aRows)
     417           0 :                     break;
     418             :             }
     419             : 
     420           0 :             int32_t sMin = max(y - aRadius, 0);
     421           0 :             int32_t sMax = min(y + aRadius, aRows - 1);
     422           0 :             int32_t v = 0;
     423           0 :             for (int32_t s = sMin; s <= sMax; ++s) {
     424           0 :                 v = max<int32_t>(v, aInput[aStride * s + x]);
     425             :             }
     426           0 :             aOutput[aStride * y + x] = v;
     427             :         }
     428             :     }
     429             : }
     430             : 
     431             : CheckedInt<int32_t>
     432           5 : AlphaBoxBlur::RoundUpToMultipleOf4(int32_t aVal)
     433             : {
     434           5 :   CheckedInt<int32_t> val(aVal);
     435             : 
     436           5 :   val += 3;
     437           5 :   val /= 4;
     438           5 :   val *= 4;
     439             : 
     440           5 :   return val;
     441             : }
     442             : 
     443           0 : AlphaBoxBlur::AlphaBoxBlur(const Rect& aRect,
     444             :                            const IntSize& aSpreadRadius,
     445             :                            const IntSize& aBlurRadius,
     446             :                            const Rect* aDirtyRect,
     447           0 :                            const Rect* aSkipRect)
     448           0 :   : mSurfaceAllocationSize(0)
     449             : {
     450           0 :   Init(aRect, aSpreadRadius, aBlurRadius, aDirtyRect, aSkipRect);
     451           0 : }
     452             : 
     453          11 : AlphaBoxBlur::AlphaBoxBlur()
     454          11 :   : mSurfaceAllocationSize(0)
     455             : {
     456          11 : }
     457             : 
     458             : void
     459           1 : AlphaBoxBlur::Init(const Rect& aRect,
     460             :                    const IntSize& aSpreadRadius,
     461             :                    const IntSize& aBlurRadius,
     462             :                    const Rect* aDirtyRect,
     463             :                    const Rect* aSkipRect)
     464             : {
     465           1 :   mSpreadRadius = aSpreadRadius;
     466           1 :   mBlurRadius = aBlurRadius;
     467             : 
     468           1 :   Rect rect(aRect);
     469           1 :   rect.Inflate(Size(aBlurRadius + aSpreadRadius));
     470           1 :   rect.RoundOut();
     471             : 
     472           1 :   if (aDirtyRect) {
     473             :     // If we get passed a dirty rect from layout, we can minimize the
     474             :     // shadow size and make painting faster.
     475           0 :     mHasDirtyRect = true;
     476           0 :     mDirtyRect = *aDirtyRect;
     477           0 :     Rect requiredBlurArea = mDirtyRect.Intersect(rect);
     478           0 :     requiredBlurArea.Inflate(Size(aBlurRadius + aSpreadRadius));
     479           0 :     rect = requiredBlurArea.Intersect(rect);
     480             :   } else {
     481           1 :     mHasDirtyRect = false;
     482             :   }
     483             : 
     484           1 :   mRect = TruncatedToInt(rect);
     485           1 :   if (mRect.IsEmpty()) {
     486           0 :     return;
     487             :   }
     488             : 
     489           1 :   if (aSkipRect) {
     490             :     // If we get passed a skip rect, we can lower the amount of
     491             :     // blurring/spreading we need to do. We convert it to IntRect to avoid
     492             :     // expensive int<->float conversions if we were to use Rect instead.
     493           0 :     Rect skipRect = *aSkipRect;
     494           0 :     skipRect.Deflate(Size(aBlurRadius + aSpreadRadius));
     495           0 :     mSkipRect = RoundedIn(skipRect);
     496           0 :     mSkipRect = mSkipRect.Intersect(mRect);
     497           0 :     if (mSkipRect.IsEqualInterior(mRect))
     498           0 :       return;
     499             : 
     500           0 :     mSkipRect -= mRect.TopLeft();
     501             :   } else {
     502           1 :     mSkipRect = IntRect(0, 0, 0, 0);
     503             :   }
     504             : 
     505           1 :   CheckedInt<int32_t> stride = RoundUpToMultipleOf4(mRect.width);
     506           1 :   if (stride.isValid()) {
     507           1 :     mStride = stride.value();
     508             : 
     509             :     // We need to leave room for an additional 3 bytes for a potential overrun
     510             :     // in our blurring code.
     511           1 :     size_t size = BufferSizeFromStrideAndHeight(mStride, mRect.height, 3);
     512           1 :     if (size != 0) {
     513           1 :       mSurfaceAllocationSize = size;
     514             :     }
     515             :   }
     516             : }
     517             : 
     518           0 : AlphaBoxBlur::AlphaBoxBlur(const Rect& aRect,
     519             :                            int32_t aStride,
     520             :                            float aSigmaX,
     521           0 :                            float aSigmaY)
     522             :   : mRect(TruncatedToInt(aRect)),
     523             :     mSpreadRadius(),
     524           0 :     mBlurRadius(CalculateBlurRadius(Point(aSigmaX, aSigmaY))),
     525             :     mStride(aStride),
     526           0 :     mSurfaceAllocationSize(0)
     527             : {
     528           0 :   IntRect intRect;
     529           0 :   if (aRect.ToIntRect(&intRect)) {
     530           0 :     size_t minDataSize = BufferSizeFromStrideAndHeight(intRect.width, intRect.height);
     531           0 :     if (minDataSize != 0) {
     532           0 :       mSurfaceAllocationSize = minDataSize;
     533             :     }
     534             :   }
     535           0 : }
     536             : 
     537             : 
     538          11 : AlphaBoxBlur::~AlphaBoxBlur()
     539             : {
     540          11 : }
     541             : 
     542             : IntSize
     543           5 : AlphaBoxBlur::GetSize()
     544             : {
     545           5 :   IntSize size(mRect.width, mRect.height);
     546           5 :   return size;
     547             : }
     548             : 
     549             : int32_t
     550           2 : AlphaBoxBlur::GetStride()
     551             : {
     552           2 :   return mStride;
     553             : }
     554             : 
     555             : IntRect
     556           2 : AlphaBoxBlur::GetRect()
     557             : {
     558           2 :   return mRect;
     559             : }
     560             : 
     561             : Rect*
     562           0 : AlphaBoxBlur::GetDirtyRect()
     563             : {
     564           0 :   if (mHasDirtyRect) {
     565           0 :     return &mDirtyRect;
     566             :   }
     567             : 
     568           0 :   return nullptr;
     569             : }
     570             : 
     571             : size_t
     572           1 : AlphaBoxBlur::GetSurfaceAllocationSize() const
     573             : {
     574           1 :   return mSurfaceAllocationSize;
     575             : }
     576             : 
     577             : void
     578           1 : AlphaBoxBlur::Blur(uint8_t* aData)
     579             : {
     580           1 :   if (!aData) {
     581           0 :     return;
     582             :   }
     583             : 
     584             :   // no need to do all this if not blurring or spreading
     585           1 :   if (mBlurRadius != IntSize(0,0) || mSpreadRadius != IntSize(0,0)) {
     586           1 :     int32_t stride = GetStride();
     587             : 
     588           1 :     IntSize size = GetSize();
     589             : 
     590           1 :     if (mSpreadRadius.width > 0 || mSpreadRadius.height > 0) {
     591             :       // No need to use CheckedInt here - we have validated it in the constructor.
     592           0 :       size_t szB = stride * size.height;
     593           0 :       uint8_t* tmpData = new (std::nothrow) uint8_t[szB];
     594             : 
     595           0 :       if (!tmpData) {
     596           0 :         return;
     597             :       }
     598             : 
     599           0 :       memset(tmpData, 0, szB);
     600             : 
     601           0 :       SpreadHorizontal(aData, tmpData, mSpreadRadius.width, size.width, size.height, stride, mSkipRect);
     602           0 :       SpreadVertical(tmpData, aData, mSpreadRadius.height, size.width, size.height, stride, mSkipRect);
     603             : 
     604           0 :       delete [] tmpData;
     605             :     }
     606             : 
     607             :     int32_t horizontalLobes[3][2];
     608           1 :     ComputeLobes(mBlurRadius.width, horizontalLobes);
     609             :     int32_t verticalLobes[3][2];
     610           1 :     ComputeLobes(mBlurRadius.height, verticalLobes);
     611             : 
     612             :     // We want to allow for some extra space on the left for alignment reasons.
     613           1 :     int32_t maxLeftLobe = RoundUpToMultipleOf4(horizontalLobes[0][0] + 1).value();
     614             : 
     615           1 :     IntSize integralImageSize(size.width + maxLeftLobe + horizontalLobes[1][1],
     616           2 :                               size.height + verticalLobes[0][0] + verticalLobes[1][1] + 1);
     617             : 
     618           1 :     if ((integralImageSize.width * integralImageSize.height) > (1 << 24)) {
     619             :       // Fallback to old blurring code when the surface is so large it may
     620             :       // overflow our integral image!
     621           0 :       if (mBlurRadius.width > 0) {
     622           0 :         BoxBlur<false>(aData, horizontalLobes, size.width, size.height, stride, mSkipRect);
     623             :       }
     624           0 :       if (mBlurRadius.height > 0) {
     625           0 :         BoxBlur<true>(aData, verticalLobes, size.width, size.height, stride, mSkipRect);
     626             :       }
     627             :     } else {
     628           1 :       size_t integralImageStride = GetAlignedStride<16>(integralImageSize.width, 4);
     629           1 :       if (integralImageStride == 0) {
     630           0 :         return;
     631             :       }
     632             : 
     633             :       // We need to leave room for an additional 12 bytes for a maximum overrun
     634             :       // of 3 pixels in the blurring code.
     635           1 :       size_t bufLen = BufferSizeFromStrideAndHeight(integralImageStride, integralImageSize.height, 12);
     636           1 :       if (bufLen == 0) {
     637           0 :         return;
     638             :       }
     639             :       // bufLen is a byte count, but here we want a multiple of 32-bit ints, so
     640             :       // we divide by 4.
     641           2 :       AlignedArray<uint32_t> integralImage((bufLen / 4) + ((bufLen % 4) ? 1 : 0));
     642             : 
     643           1 :       if (!integralImage) {
     644           0 :         return;
     645             :       }
     646             : 
     647             : #ifdef USE_SSE2
     648           1 :       if (Factory::HasSSE2()) {
     649           1 :         BoxBlur_SSE2(aData, horizontalLobes[0][0], horizontalLobes[0][1], verticalLobes[0][0],
     650           1 :                      verticalLobes[0][1], integralImage, integralImageStride);
     651           1 :         BoxBlur_SSE2(aData, horizontalLobes[1][0], horizontalLobes[1][1], verticalLobes[1][0],
     652           1 :                      verticalLobes[1][1], integralImage, integralImageStride);
     653           1 :         BoxBlur_SSE2(aData, horizontalLobes[2][0], horizontalLobes[2][1], verticalLobes[2][0],
     654           1 :                      verticalLobes[2][1], integralImage, integralImageStride);
     655             :       } else
     656             : #endif
     657             : #ifdef BUILD_ARM_NEON
     658             :       if (mozilla::supports_neon()) {
     659             :         BoxBlur_NEON(aData, horizontalLobes[0][0], horizontalLobes[0][1], verticalLobes[0][0],
     660             :                      verticalLobes[0][1], integralImage, integralImageStride);
     661             :         BoxBlur_NEON(aData, horizontalLobes[1][0], horizontalLobes[1][1], verticalLobes[1][0],
     662             :                      verticalLobes[1][1], integralImage, integralImageStride);
     663             :         BoxBlur_NEON(aData, horizontalLobes[2][0], horizontalLobes[2][1], verticalLobes[2][0],
     664             :                      verticalLobes[2][1], integralImage, integralImageStride);
     665             :       } else
     666             : #endif
     667             :       {
     668             : #ifdef _MIPS_ARCH_LOONGSON3A
     669             :         BoxBlur_LS3(aData, horizontalLobes[0][0], horizontalLobes[0][1], verticalLobes[0][0],
     670             :                      verticalLobes[0][1], integralImage, integralImageStride);
     671             :         BoxBlur_LS3(aData, horizontalLobes[1][0], horizontalLobes[1][1], verticalLobes[1][0],
     672             :                      verticalLobes[1][1], integralImage, integralImageStride);
     673             :         BoxBlur_LS3(aData, horizontalLobes[2][0], horizontalLobes[2][1], verticalLobes[2][0],
     674             :                      verticalLobes[2][1], integralImage, integralImageStride);
     675             : #else
     676           0 :         BoxBlur_C(aData, horizontalLobes[0][0], horizontalLobes[0][1], verticalLobes[0][0],
     677           0 :                   verticalLobes[0][1], integralImage, integralImageStride);
     678           0 :         BoxBlur_C(aData, horizontalLobes[1][0], horizontalLobes[1][1], verticalLobes[1][0],
     679           0 :                   verticalLobes[1][1], integralImage, integralImageStride);
     680           0 :         BoxBlur_C(aData, horizontalLobes[2][0], horizontalLobes[2][1], verticalLobes[2][0],
     681           0 :                   verticalLobes[2][1], integralImage, integralImageStride);
     682             : #endif
     683             :       }
     684             :     }
     685             :   }
     686             : }
     687             : 
     688             : MOZ_ALWAYS_INLINE void
     689           0 : GenerateIntegralRow(uint32_t  *aDest, const uint8_t *aSource, uint32_t *aPreviousRow,
     690             :                     const uint32_t &aSourceWidth, const uint32_t &aLeftInflation, const uint32_t &aRightInflation)
     691             : {
     692           0 :   uint32_t currentRowSum = 0;
     693           0 :   uint32_t pixel = aSource[0];
     694           0 :   for (uint32_t x = 0; x < aLeftInflation; x++) {
     695           0 :     currentRowSum += pixel;
     696           0 :     *aDest++ = currentRowSum + *aPreviousRow++;
     697             :   }
     698           0 :   for (uint32_t x = aLeftInflation; x < (aSourceWidth + aLeftInflation); x += 4) {
     699           0 :       uint32_t alphaValues = *(uint32_t*)(aSource + (x - aLeftInflation));
     700             : #if defined WORDS_BIGENDIAN || defined IS_BIG_ENDIAN || defined __BIG_ENDIAN__
     701             :       currentRowSum += (alphaValues >> 24) & 0xff;
     702             :       *aDest++ = *aPreviousRow++ + currentRowSum;
     703             :       currentRowSum += (alphaValues >> 16) & 0xff;
     704             :       *aDest++ = *aPreviousRow++ + currentRowSum;
     705             :       currentRowSum += (alphaValues >> 8) & 0xff;
     706             :       *aDest++ = *aPreviousRow++ + currentRowSum;
     707             :       currentRowSum += alphaValues & 0xff;
     708             :       *aDest++ = *aPreviousRow++ + currentRowSum;
     709             : #else
     710           0 :       currentRowSum += alphaValues & 0xff;
     711           0 :       *aDest++ = *aPreviousRow++ + currentRowSum;
     712           0 :       alphaValues >>= 8;
     713           0 :       currentRowSum += alphaValues & 0xff;
     714           0 :       *aDest++ = *aPreviousRow++ + currentRowSum;
     715           0 :       alphaValues >>= 8;
     716           0 :       currentRowSum += alphaValues & 0xff;
     717           0 :       *aDest++ = *aPreviousRow++ + currentRowSum;
     718           0 :       alphaValues >>= 8;
     719           0 :       currentRowSum += alphaValues & 0xff;
     720           0 :       *aDest++ = *aPreviousRow++ + currentRowSum;
     721             : #endif
     722             :   }
     723           0 :   pixel = aSource[aSourceWidth - 1];
     724           0 :   for (uint32_t x = (aSourceWidth + aLeftInflation); x < (aSourceWidth + aLeftInflation + aRightInflation); x++) {
     725           0 :     currentRowSum += pixel;
     726           0 :     *aDest++ = currentRowSum + *aPreviousRow++;
     727             :   }
     728           0 : }
     729             : 
     730             : MOZ_ALWAYS_INLINE void
     731           0 : GenerateIntegralImage_C(int32_t aLeftInflation, int32_t aRightInflation,
     732             :                         int32_t aTopInflation, int32_t aBottomInflation,
     733             :                         uint32_t *aIntegralImage, size_t aIntegralImageStride,
     734             :                         uint8_t *aSource, int32_t aSourceStride, const IntSize &aSize)
     735             : {
     736           0 :   uint32_t stride32bit = aIntegralImageStride / 4;
     737             : 
     738           0 :   IntSize integralImageSize(aSize.width + aLeftInflation + aRightInflation,
     739           0 :                             aSize.height + aTopInflation + aBottomInflation);
     740             : 
     741           0 :   memset(aIntegralImage, 0, aIntegralImageStride);
     742             : 
     743           0 :   GenerateIntegralRow(aIntegralImage, aSource, aIntegralImage,
     744           0 :                       aSize.width, aLeftInflation, aRightInflation);
     745           0 :   for (int y = 1; y < aTopInflation + 1; y++) {
     746           0 :     GenerateIntegralRow(aIntegralImage + (y * stride32bit), aSource, aIntegralImage + (y - 1) * stride32bit,
     747           0 :                         aSize.width, aLeftInflation, aRightInflation);
     748             :   }
     749             : 
     750           0 :   for (int y = aTopInflation + 1; y < (aSize.height + aTopInflation); y++) {
     751           0 :     GenerateIntegralRow(aIntegralImage + (y * stride32bit), aSource + aSourceStride * (y - aTopInflation),
     752           0 :                         aIntegralImage + (y - 1) * stride32bit, aSize.width, aLeftInflation, aRightInflation);
     753             :   }
     754             : 
     755           0 :   if (aBottomInflation) {
     756           0 :     for (int y = (aSize.height + aTopInflation); y < integralImageSize.height; y++) {
     757           0 :       GenerateIntegralRow(aIntegralImage + (y * stride32bit), aSource + ((aSize.height - 1) * aSourceStride),
     758           0 :                           aIntegralImage + (y - 1) * stride32bit,
     759           0 :                           aSize.width, aLeftInflation, aRightInflation);
     760             :     }
     761             :   }
     762           0 : }
     763             : 
     764             : /**
     765             :  * Attempt to do an in-place box blur using an integral image.
     766             :  */
     767             : void
     768           0 : AlphaBoxBlur::BoxBlur_C(uint8_t* aData,
     769             :                         int32_t aLeftLobe,
     770             :                         int32_t aRightLobe,
     771             :                         int32_t aTopLobe,
     772             :                         int32_t aBottomLobe,
     773             :                         uint32_t *aIntegralImage,
     774             :                         size_t aIntegralImageStride)
     775             : {
     776           0 :   IntSize size = GetSize();
     777             : 
     778           0 :   MOZ_ASSERT(size.width > 0);
     779             : 
     780             :   // Our 'left' or 'top' lobe will include the current pixel. i.e. when
     781             :   // looking at an integral image the value of a pixel at 'x,y' is calculated
     782             :   // using the value of the integral image values above/below that.
     783           0 :   aLeftLobe++;
     784           0 :   aTopLobe++;
     785           0 :   int32_t boxSize = (aLeftLobe + aRightLobe) * (aTopLobe + aBottomLobe);
     786             : 
     787           0 :   MOZ_ASSERT(boxSize > 0);
     788             : 
     789           0 :   if (boxSize == 1) {
     790           0 :       return;
     791             :   }
     792             : 
     793           0 :   int32_t stride32bit = aIntegralImageStride / 4;
     794             : 
     795           0 :   int32_t leftInflation = RoundUpToMultipleOf4(aLeftLobe).value();
     796             : 
     797           0 :   GenerateIntegralImage_C(leftInflation, aRightLobe, aTopLobe, aBottomLobe,
     798             :                           aIntegralImage, aIntegralImageStride, aData,
     799           0 :                           mStride, size);
     800             : 
     801           0 :   uint32_t reciprocal = uint32_t((uint64_t(1) << 32) / boxSize);
     802             : 
     803           0 :   uint32_t *innerIntegral = aIntegralImage + (aTopLobe * stride32bit) + leftInflation;
     804             : 
     805             :   // Storing these locally makes this about 30% faster! Presumably the compiler
     806             :   // can't be sure we're not altering the member variables in this loop.
     807           0 :   IntRect skipRect = mSkipRect;
     808           0 :   uint8_t *data = aData;
     809           0 :   int32_t stride = mStride;
     810           0 :   for (int32_t y = 0; y < size.height; y++) {
     811           0 :     bool inSkipRectY = y > skipRect.y && y < skipRect.YMost();
     812             : 
     813           0 :     uint32_t *topLeftBase = innerIntegral + ((y - aTopLobe) * stride32bit - aLeftLobe);
     814           0 :     uint32_t *topRightBase = innerIntegral + ((y - aTopLobe) * stride32bit + aRightLobe);
     815           0 :     uint32_t *bottomRightBase = innerIntegral + ((y + aBottomLobe) * stride32bit + aRightLobe);
     816           0 :     uint32_t *bottomLeftBase = innerIntegral + ((y + aBottomLobe) * stride32bit - aLeftLobe);
     817             : 
     818           0 :     for (int32_t x = 0; x < size.width; x++) {
     819           0 :       if (inSkipRectY && x > skipRect.x && x < skipRect.XMost()) {
     820           0 :         x = skipRect.XMost() - 1;
     821             :         // Trigger early jump on coming loop iterations, this will be reset
     822             :         // next line anyway.
     823           0 :         inSkipRectY = false;
     824           0 :         continue;
     825             :       }
     826           0 :       int32_t topLeft = topLeftBase[x];
     827           0 :       int32_t topRight = topRightBase[x];
     828           0 :       int32_t bottomRight = bottomRightBase[x];
     829           0 :       int32_t bottomLeft = bottomLeftBase[x];
     830             : 
     831           0 :       uint32_t value = bottomRight - topRight - bottomLeft;
     832           0 :       value += topLeft;
     833             : 
     834           0 :       data[stride * y + x] = (uint64_t(reciprocal) * value + (uint64_t(1) << 31)) >> 32;
     835             :     }
     836             :   }
     837             : }
     838             : 
     839             : /**
     840             :  * Compute the box blur size (which we're calling the blur radius) from
     841             :  * the standard deviation.
     842             :  *
     843             :  * Much of this, the 3 * sqrt(2 * pi) / 4, is the known value for
     844             :  * approximating a Gaussian using box blurs.  This yields quite a good
     845             :  * approximation for a Gaussian.  Then we multiply this by 1.5 since our
     846             :  * code wants the radius of the entire triple-box-blur kernel instead of
     847             :  * the diameter of an individual box blur.  For more details, see:
     848             :  *   http://www.w3.org/TR/SVG11/filters.html#feGaussianBlurElement
     849             :  *   https://bugzilla.mozilla.org/show_bug.cgi?id=590039#c19
     850             :  */
     851             : static const Float GAUSSIAN_SCALE_FACTOR = Float((3 * sqrt(2 * M_PI) / 4) * 1.5);
     852             : 
     853             : IntSize
     854         102 : AlphaBoxBlur::CalculateBlurRadius(const Point& aStd)
     855             : {
     856         102 :     IntSize size(static_cast<int32_t>(floor(aStd.x * GAUSSIAN_SCALE_FACTOR + 0.5f)),
     857         204 :                  static_cast<int32_t>(floor(aStd.y * GAUSSIAN_SCALE_FACTOR + 0.5f)));
     858             : 
     859         102 :     return size;
     860             : }
     861             : 
     862             : Float
     863           0 : AlphaBoxBlur::CalculateBlurSigma(int32_t aBlurRadius)
     864             : {
     865           0 :   return aBlurRadius / GAUSSIAN_SCALE_FACTOR;
     866             : }
     867             : 
     868             : } // namespace gfx
     869             : } // namespace mozilla

Generated by: LCOV version 1.13