Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/gfx/2d/Blur.cpp
Line
Count
Source (jump to first uncovered line)
1
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
3
/* This Source Code Form is subject to the terms of the Mozilla Public
4
 * License, v. 2.0. If a copy of the MPL was not distributed with this
5
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7
#include "Blur.h"
8
9
#include <algorithm>
10
#include <math.h>
11
#include <string.h>
12
13
#include "mozilla/CheckedInt.h"
14
15
#include "2D.h"
16
#include "DataSurfaceHelpers.h"
17
#include "Tools.h"
18
19
#ifdef BUILD_ARM_NEON
20
#include "mozilla/arm.h"
21
#endif
22
23
using namespace std;
24
25
namespace mozilla {
26
namespace gfx {
27
28
/**
29
 * Helper function to process each row of the box blur.
30
 * It takes care of transposing the data on input or output depending
31
 * on whether we intend a horizontal or vertical blur, and whether we're
32
 * reading from the initial source or writing to the final destination.
33
 * It allows starting or ending anywhere within the row to accomodate
34
 * a skip rect.
35
 */
36
template<bool aTransposeInput, bool aTransposeOutput>
37
static inline void
38
BoxBlurRow(const uint8_t* aInput,
39
           uint8_t* aOutput,
40
           int32_t aLeftLobe,
41
           int32_t aRightLobe,
42
           int32_t aWidth,
43
           int32_t aStride,
44
           int32_t aStart,
45
           int32_t aEnd)
46
0
{
47
0
  // If the input or output is transposed, then we will move down a row
48
0
  // for each step, instead of moving over a column. Since these values
49
0
  // only depend on a template parameter, they will more easily get
50
0
  // copy-propagated in the non-transposed case, which is why they
51
0
  // are not passed as parameters.
52
0
  const int32_t inputStep = aTransposeInput ? aStride : 1;
53
0
  const int32_t outputStep = aTransposeOutput ? aStride : 1;
54
0
55
0
  // We need to sample aLeftLobe pixels to the left and aRightLobe pixels
56
0
  // to the right of the current position, then average them. So this is
57
0
  // the size of the total width of this filter.
58
0
  const int32_t boxSize = aLeftLobe + aRightLobe + 1;
59
0
60
0
  // Instead of dividing the pixel sum by boxSize to average, we can just
61
0
  // compute a scale that will normalize the result so that it can be quickly
62
0
  // shifted into the desired range.
63
0
  const uint32_t reciprocal = (1 << 24) / boxSize;
64
0
65
0
  // The shift would normally truncate the result, whereas we would rather
66
0
  // prefer to round the result to the closest increment. By adding 0.5 units
67
0
  // to the initial sum, we bias the sum so that it will be rounded by the
68
0
  // truncation instead.
69
0
  uint32_t alphaSum = (boxSize + 1) / 2;
70
0
71
0
  // We process the row with a moving filter, keeping a sum (alphaSum) of
72
0
  // boxSize pixels. As we move over a pixel, we need to add on a pixel
73
0
  // from the right extreme of the window that moved into range, and subtract
74
0
  // off a pixel from the left extreme of window that moved out of range.
75
0
  // But first, we need to initialization alphaSum to the contents of
76
0
  // the window before we can get going. If the window moves out of bounds
77
0
  // of the row, we clamp each sample to be the closest pixel from within
78
0
  // row bounds, so the 0th and aWidth-1th pixel.
79
0
  int32_t initLeft = aStart - aLeftLobe;
80
0
  if (initLeft < 0) {
81
0
    // If the left lobe samples before the row, add in clamped samples.
82
0
    alphaSum += -initLeft * aInput[0];
83
0
    initLeft = 0;
84
0
  }
85
0
  int32_t initRight = aStart + boxSize - aLeftLobe;
86
0
  if (initRight > aWidth) {
87
0
    // If the right lobe samples after the row, add in clamped samples.
88
0
    alphaSum += (initRight - aWidth) * aInput[(aWidth - 1) * inputStep];
89
0
    initRight = aWidth;
90
0
  }
91
0
  // Finally, add in all the valid, non-clamped samples to fill up the
92
0
  // rest of the window.
93
0
  const uint8_t* src = &aInput[initLeft * inputStep];
94
0
  const uint8_t* iterEnd = &aInput[initRight * inputStep];
95
0
96
0
  #define INIT_ITER \
97
0
    alphaSum += *src; \
98
0
    src += inputStep;
99
0
100
0
  // We unroll the per-pixel loop here substantially. The amount of work
101
0
  // done per sample is so small that the cost of a loop condition check
102
0
  // and a branch can substantially add to or even dominate the performance
103
0
  // of the loop.
104
0
  while (src + 16 * inputStep <= iterEnd) {
105
0
    INIT_ITER; INIT_ITER; INIT_ITER; INIT_ITER;
106
0
    INIT_ITER; INIT_ITER; INIT_ITER; INIT_ITER;
107
0
    INIT_ITER; INIT_ITER; INIT_ITER; INIT_ITER;
108
0
    INIT_ITER; INIT_ITER; INIT_ITER; INIT_ITER;
109
0
  }
110
0
  while (src < iterEnd) {
111
0
    INIT_ITER;
112
0
  }
113
0
114
0
  // Now we start moving the window over the row. We will be accessing
115
0
  // pixels form aStart - aLeftLobe up to aEnd + aRightLobe, which may be
116
0
  // out of bounds of the row. To avoid having to check within the inner
117
0
  // loops if we are in bound, we instead compute the points at which
118
0
  // we will move out of bounds of the row on the left side (splitLeft)
119
0
  // and right side (splitRight).
120
0
  int32_t splitLeft = min(max(aLeftLobe, aStart), aEnd);
121
0
  int32_t splitRight = min(max(aWidth - (boxSize - aLeftLobe), aStart), aEnd);
122
0
  // If the filter window is actually large than the size of the row,
123
0
  // there will be a middle area of overlap where the leftmost and rightmost
124
0
  // pixel of the filter will both be outside the row. In this case, we need
125
0
  // to invert the splits so that splitLeft <= splitRight.
126
0
  if (boxSize > aWidth) {
127
0
    swap(splitLeft, splitRight);
128
0
  }
129
0
130
0
  // Process all pixels up to splitLeft that would sample before the start of the row.
131
0
  // Note that because inputStep and outputStep may not be a const 1 value, it is more
132
0
  // performant to increment pointers here for the source and destination rather than
133
0
  // use a loop counter, since doing so would entail an expensive multiplication that
134
0
  // significantly slows down the loop.
135
0
  uint8_t* dst = &aOutput[aStart * outputStep];
136
0
  iterEnd = &aOutput[splitLeft * outputStep];
137
0
  src = &aInput[(aStart + boxSize - aLeftLobe) * inputStep];
138
0
  uint8_t firstVal = aInput[0];
139
0
140
0
  #define LEFT_ITER \
141
0
    *dst = (alphaSum * reciprocal) >> 24; \
142
0
    alphaSum += *src - firstVal; \
143
0
    dst += outputStep; \
144
0
    src += inputStep;
145
0
146
0
  while (dst + 16 * outputStep <= iterEnd) {
147
0
    LEFT_ITER; LEFT_ITER; LEFT_ITER; LEFT_ITER;
148
0
    LEFT_ITER; LEFT_ITER; LEFT_ITER; LEFT_ITER;
149
0
    LEFT_ITER; LEFT_ITER; LEFT_ITER; LEFT_ITER;
150
0
    LEFT_ITER; LEFT_ITER; LEFT_ITER; LEFT_ITER;
151
0
  }
152
0
  while (dst < iterEnd) {
153
0
    LEFT_ITER;
154
0
  }
155
0
156
0
  // Process all pixels between splitLeft and splitRight.
157
0
  iterEnd = &aOutput[splitRight * outputStep];
158
0
  if (boxSize <= aWidth) {
159
0
    // The filter window is smaller than the row size, so the leftmost and rightmost
160
0
    // samples are both within row bounds.
161
0
    src = &aInput[(splitLeft - aLeftLobe) * inputStep];
162
0
    int32_t boxStep = boxSize * inputStep;
163
0
164
0
    #define CENTER_ITER \
165
0
      *dst = (alphaSum * reciprocal) >> 24; \
166
0
      alphaSum += src[boxStep] - *src; \
167
0
      dst += outputStep; \
168
0
      src += inputStep;
169
0
170
0
    while (dst +  16 * outputStep <= iterEnd) {
171
0
      CENTER_ITER; CENTER_ITER; CENTER_ITER; CENTER_ITER;
172
0
      CENTER_ITER; CENTER_ITER; CENTER_ITER; CENTER_ITER;
173
0
      CENTER_ITER; CENTER_ITER; CENTER_ITER; CENTER_ITER;
174
0
      CENTER_ITER; CENTER_ITER; CENTER_ITER; CENTER_ITER;
175
0
    }
176
0
    while (dst < iterEnd) {
177
0
      CENTER_ITER;
178
0
    }
179
0
  } else {
180
0
    // The filter window is larger than the row size, and we're in the area of split
181
0
    // overlap. So the leftmost and rightmost samples are both out of bounds and need
182
0
    // to be clamped. We can just precompute the difference here consequently.
183
0
    int32_t firstLastDiff = aInput[(aWidth -1) * inputStep] - aInput[0];
184
0
    while (dst < iterEnd) {
185
0
      *dst = (alphaSum * reciprocal) >> 24;
186
0
      alphaSum += firstLastDiff;
187
0
      dst += outputStep;
188
0
    }
189
0
  }
190
0
191
0
  // Process all remaining pixels after splitRight that would sample after the row end.
192
0
  iterEnd = &aOutput[aEnd * outputStep];
193
0
  src = &aInput[(splitRight - aLeftLobe) * inputStep];
194
0
  uint8_t lastVal = aInput[(aWidth - 1) * inputStep];
195
0
196
0
  #define RIGHT_ITER \
197
0
    *dst = (alphaSum * reciprocal) >> 24; \
198
0
    alphaSum += lastVal - *src; \
199
0
    dst += outputStep; \
200
0
    src += inputStep;
201
0
202
0
  while (dst + 16 * outputStep <= iterEnd) {
203
0
    RIGHT_ITER; RIGHT_ITER; RIGHT_ITER; RIGHT_ITER;
204
0
    RIGHT_ITER; RIGHT_ITER; RIGHT_ITER; RIGHT_ITER;
205
0
    RIGHT_ITER; RIGHT_ITER; RIGHT_ITER; RIGHT_ITER;
206
0
    RIGHT_ITER; RIGHT_ITER; RIGHT_ITER; RIGHT_ITER;
207
0
  }
208
0
  while (dst < iterEnd) {
209
0
    RIGHT_ITER;
210
0
  }
211
0
}
Unexecuted instantiation: Unified_cpp_gfx_2d0.cpp:void mozilla::gfx::BoxBlurRow<false, false>(unsigned char const*, unsigned char*, int, int, int, int, int, int)
Unexecuted instantiation: Unified_cpp_gfx_2d0.cpp:void mozilla::gfx::BoxBlurRow<true, false>(unsigned char const*, unsigned char*, int, int, int, int, int, int)
Unexecuted instantiation: Unified_cpp_gfx_2d0.cpp:void mozilla::gfx::BoxBlurRow<false, true>(unsigned char const*, unsigned char*, int, int, int, int, int, int)
212
213
/**
214
 * Box blur involves looking at one pixel, and setting its value to the average
215
 * of its neighbouring pixels. This is meant to provide a 3-pass approximation of a
216
 * Gaussian blur.
217
 * @param aTranspose Whether to transpose the buffer when reading and writing to it.
218
 * @param aData The buffer to be blurred.
219
 * @param aLobes The number of pixels to blend on the left and right for each of 3 passes.
220
 * @param aWidth The number of columns in the buffers.
221
 * @param aRows The number of rows in the buffers.
222
 * @param aStride The stride of the buffer.
223
 */
224
template<bool aTranspose>
225
static void
226
BoxBlur(uint8_t* aData,
227
        const int32_t aLobes[3][2],
228
        int32_t aWidth,
229
        int32_t aRows,
230
        int32_t aStride,
231
        IntRect aSkipRect)
232
0
{
233
0
  if (aTranspose) {
234
0
    swap(aWidth, aRows);
235
0
    aSkipRect.Swap();
236
0
  }
237
0
238
0
  MOZ_ASSERT(aWidth > 0);
239
0
240
0
  // All three passes of the box blur that approximate the Gaussian are done
241
0
  // on each row in turn, so we only need two temporary row buffers to process
242
0
  // each row, instead of a full-sized buffer. Data moves from the source to the
243
0
  // first temporary, from the first temporary to the second, then from the second
244
0
  // back to the destination. This way is more cache-friendly than processing whe
245
0
  // whole buffer in each pass and thus yields a nice speedup.
246
0
  uint8_t* tmpRow = new (std::nothrow) uint8_t[2 * aWidth];
247
0
  if (!tmpRow) {
248
0
    return;
249
0
  }
250
0
  uint8_t* tmpRow2 = tmpRow + aWidth;
251
0
252
0
  const int32_t stride = aTranspose ? 1 : aStride;
253
0
  bool skipRectCoversWholeRow = 0 >= aSkipRect.X() &&
254
0
                                aWidth <= aSkipRect.XMost();
255
0
256
0
  for (int32_t y = 0; y < aRows; y++) {
257
0
    // Check whether the skip rect intersects this row. If the skip
258
0
    // rect covers the whole surface in this row, we can avoid
259
0
    // this row entirely (and any others along the skip rect).
260
0
    bool inSkipRectY = aSkipRect.ContainsY(y);
261
0
    if (inSkipRectY && skipRectCoversWholeRow) {
262
0
      aData += stride * (aSkipRect.YMost() - y);
263
0
      y = aSkipRect.YMost() - 1;
264
0
      continue;
265
0
    }
266
0
267
0
    // Read in data from the source transposed if necessary.
268
0
    BoxBlurRow<aTranspose, false>(aData, tmpRow, aLobes[0][0], aLobes[0][1], aWidth, aStride, 0, aWidth);
269
0
270
0
    // For the middle pass, the data is already pre-transposed and does not need to be post-transposed yet.
271
0
    BoxBlurRow<false, false>(tmpRow, tmpRow2, aLobes[1][0], aLobes[1][1], aWidth, aStride, 0, aWidth);
272
0
273
0
    // Write back data to the destination transposed if necessary too.
274
0
    // Make sure not to overwrite the skip rect by only outputting to the
275
0
    // destination before and after the skip rect, if requested.
276
0
    int32_t skipStart = inSkipRectY ? min(max(aSkipRect.X(), 0), aWidth) : aWidth;
277
0
    int32_t skipEnd = max(skipStart, aSkipRect.XMost());
278
0
    if (skipStart > 0) {
279
0
      BoxBlurRow<false, aTranspose>(tmpRow2, aData, aLobes[2][0], aLobes[2][1], aWidth, aStride, 0, skipStart);
280
0
    }
281
0
    if (skipEnd < aWidth) {
282
0
      BoxBlurRow<false, aTranspose>(tmpRow2, aData, aLobes[2][0], aLobes[2][1], aWidth, aStride, skipEnd, aWidth);
283
0
    }
284
0
285
0
    aData += stride;
286
0
  }
287
0
288
0
  delete[] tmpRow;
289
0
}
Unexecuted instantiation: Unified_cpp_gfx_2d0.cpp:void mozilla::gfx::BoxBlur<false>(unsigned char*, int const (*) [2], int, int, int, mozilla::gfx::IntRectTyped<mozilla::gfx::UnknownUnits>)
Unexecuted instantiation: Unified_cpp_gfx_2d0.cpp:void mozilla::gfx::BoxBlur<true>(unsigned char*, int const (*) [2], int, int, int, mozilla::gfx::IntRectTyped<mozilla::gfx::UnknownUnits>)
290
291
static void ComputeLobes(int32_t aRadius, int32_t aLobes[3][2])
292
0
{
293
0
    int32_t major, minor, final;
294
0
295
0
    /* See http://www.w3.org/TR/SVG/filters.html#feGaussianBlur for
296
0
     * some notes about approximating the Gaussian blur with box-blurs.
297
0
     * The comments below are in the terminology of that page.
298
0
     */
299
0
    int32_t z = aRadius / 3;
300
0
    switch (aRadius % 3) {
301
0
    case 0:
302
0
        // aRadius = z*3; choose d = 2*z + 1
303
0
        major = minor = final = z;
304
0
        break;
305
0
    case 1:
306
0
        // aRadius = z*3 + 1
307
0
        // This is a tricky case since there is no value of d which will
308
0
        // yield a radius of exactly aRadius. If d is odd, i.e. d=2*k + 1
309
0
        // for some integer k, then the radius will be 3*k. If d is even,
310
0
        // i.e. d=2*k, then the radius will be 3*k - 1.
311
0
        // So we have to choose values that don't match the standard
312
0
        // algorithm.
313
0
        major = z + 1;
314
0
        minor = final = z;
315
0
        break;
316
0
    case 2:
317
0
        // aRadius = z*3 + 2; choose d = 2*z + 2
318
0
        major = final = z + 1;
319
0
        minor = z;
320
0
        break;
321
0
    default:
322
0
        // Mathematical impossibility!
323
0
        MOZ_ASSERT(false);
324
0
        major = minor = final = 0;
325
0
    }
326
0
    MOZ_ASSERT(major + minor + final == aRadius);
327
0
328
0
    aLobes[0][0] = major;
329
0
    aLobes[0][1] = minor;
330
0
    aLobes[1][0] = minor;
331
0
    aLobes[1][1] = major;
332
0
    aLobes[2][0] = final;
333
0
    aLobes[2][1] = final;
334
0
}
335
336
static void
337
SpreadHorizontal(uint8_t* aInput,
338
                 uint8_t* aOutput,
339
                 int32_t aRadius,
340
                 int32_t aWidth,
341
                 int32_t aRows,
342
                 int32_t aStride,
343
                 const IntRect& aSkipRect)
344
0
{
345
0
    if (aRadius == 0) {
346
0
        memcpy(aOutput, aInput, aStride * aRows);
347
0
        return;
348
0
    }
349
0
350
0
    bool skipRectCoversWholeRow = 0 >= aSkipRect.X() &&
351
0
                                    aWidth <= aSkipRect.XMost();
352
0
    for (int32_t y = 0; y < aRows; y++) {
353
0
        // Check whether the skip rect intersects this row. If the skip
354
0
        // rect covers the whole surface in this row, we can avoid
355
0
        // this row entirely (and any others along the skip rect).
356
0
        bool inSkipRectY = aSkipRect.ContainsY(y);
357
0
        if (inSkipRectY && skipRectCoversWholeRow) {
358
0
            y = aSkipRect.YMost() - 1;
359
0
            continue;
360
0
        }
361
0
362
0
        for (int32_t x = 0; x < aWidth; x++) {
363
0
            // Check whether we are within the skip rect. If so, go
364
0
            // to the next point outside the skip rect.
365
0
            if (inSkipRectY && aSkipRect.ContainsX(x)) {
366
0
                x = aSkipRect.XMost();
367
0
                if (x >= aWidth)
368
0
                    break;
369
0
            }
370
0
371
0
            int32_t sMin = max(x - aRadius, 0);
372
0
            int32_t sMax = min(x + aRadius, aWidth - 1);
373
0
            int32_t v = 0;
374
0
            for (int32_t s = sMin; s <= sMax; ++s) {
375
0
                v = max<int32_t>(v, aInput[aStride * y + s]);
376
0
            }
377
0
            aOutput[aStride * y + x] = v;
378
0
        }
379
0
    }
380
0
}
381
382
static void
383
SpreadVertical(uint8_t* aInput,
384
               uint8_t* aOutput,
385
               int32_t aRadius,
386
               int32_t aWidth,
387
               int32_t aRows,
388
               int32_t aStride,
389
               const IntRect& aSkipRect)
390
0
{
391
0
    if (aRadius == 0) {
392
0
        memcpy(aOutput, aInput, aStride * aRows);
393
0
        return;
394
0
    }
395
0
396
0
    bool skipRectCoversWholeColumn = 0 >= aSkipRect.Y() &&
397
0
                                     aRows <= aSkipRect.YMost();
398
0
    for (int32_t x = 0; x < aWidth; x++) {
399
0
        bool inSkipRectX = aSkipRect.ContainsX(x);
400
0
        if (inSkipRectX && skipRectCoversWholeColumn) {
401
0
            x = aSkipRect.XMost() - 1;
402
0
            continue;
403
0
        }
404
0
405
0
        for (int32_t y = 0; y < aRows; y++) {
406
0
            // Check whether we are within the skip rect. If so, go
407
0
            // to the next point outside the skip rect.
408
0
            if (inSkipRectX && aSkipRect.ContainsY(y)) {
409
0
                y = aSkipRect.YMost();
410
0
                if (y >= aRows)
411
0
                    break;
412
0
            }
413
0
414
0
            int32_t sMin = max(y - aRadius, 0);
415
0
            int32_t sMax = min(y + aRadius, aRows - 1);
416
0
            int32_t v = 0;
417
0
            for (int32_t s = sMin; s <= sMax; ++s) {
418
0
                v = max<int32_t>(v, aInput[aStride * s + x]);
419
0
            }
420
0
            aOutput[aStride * y + x] = v;
421
0
        }
422
0
    }
423
0
}
424
425
CheckedInt<int32_t>
426
AlphaBoxBlur::RoundUpToMultipleOf4(int32_t aVal)
427
0
{
428
0
  CheckedInt<int32_t> val(aVal);
429
0
430
0
  val += 3;
431
0
  val /= 4;
432
0
  val *= 4;
433
0
434
0
  return val;
435
0
}
436
437
AlphaBoxBlur::AlphaBoxBlur(const Rect& aRect,
438
                           const IntSize& aSpreadRadius,
439
                           const IntSize& aBlurRadius,
440
                           const Rect* aDirtyRect,
441
                           const Rect* aSkipRect)
442
  : mStride(0),
443
    mSurfaceAllocationSize(0)
444
0
{
445
0
  Init(aRect, aSpreadRadius, aBlurRadius, aDirtyRect, aSkipRect);
446
0
}
447
448
AlphaBoxBlur::AlphaBoxBlur()
449
  : mStride(0),
450
    mSurfaceAllocationSize(0),
451
    mHasDirtyRect(false)
452
0
{
453
0
}
454
455
void
456
AlphaBoxBlur::Init(const Rect& aRect,
457
                   const IntSize& aSpreadRadius,
458
                   const IntSize& aBlurRadius,
459
                   const Rect* aDirtyRect,
460
                   const Rect* aSkipRect)
461
0
{
462
0
  mSpreadRadius = aSpreadRadius;
463
0
  mBlurRadius = aBlurRadius;
464
0
465
0
  Rect rect(aRect);
466
0
  rect.Inflate(Size(aBlurRadius + aSpreadRadius));
467
0
  rect.RoundOut();
468
0
469
0
  if (aDirtyRect) {
470
0
    // If we get passed a dirty rect from layout, we can minimize the
471
0
    // shadow size and make painting faster.
472
0
    mHasDirtyRect = true;
473
0
    mDirtyRect = *aDirtyRect;
474
0
    Rect requiredBlurArea = mDirtyRect.Intersect(rect);
475
0
    requiredBlurArea.Inflate(Size(aBlurRadius + aSpreadRadius));
476
0
    rect = requiredBlurArea.Intersect(rect);
477
0
  } else {
478
0
    mHasDirtyRect = false;
479
0
  }
480
0
481
0
  mRect = TruncatedToInt(rect);
482
0
  if (mRect.IsEmpty()) {
483
0
    return;
484
0
  }
485
0
486
0
  if (aSkipRect) {
487
0
    // If we get passed a skip rect, we can lower the amount of
488
0
    // blurring/spreading we need to do. We convert it to IntRect to avoid
489
0
    // expensive int<->float conversions if we were to use Rect instead.
490
0
    Rect skipRect = *aSkipRect;
491
0
    skipRect.Deflate(Size(aBlurRadius + aSpreadRadius));
492
0
    mSkipRect = RoundedIn(skipRect);
493
0
    mSkipRect = mSkipRect.Intersect(mRect);
494
0
    if (mSkipRect.IsEqualInterior(mRect))
495
0
      return;
496
0
497
0
    mSkipRect -= mRect.TopLeft();
498
0
  } else {
499
0
    mSkipRect = IntRect(0, 0, 0, 0);
500
0
  }
501
0
502
0
  CheckedInt<int32_t> stride = RoundUpToMultipleOf4(mRect.Width());
503
0
  if (stride.isValid()) {
504
0
    mStride = stride.value();
505
0
506
0
    // We need to leave room for an additional 3 bytes for a potential overrun
507
0
    // in our blurring code.
508
0
    size_t size = BufferSizeFromStrideAndHeight(mStride, mRect.Height(), 3);
509
0
    if (size != 0) {
510
0
      mSurfaceAllocationSize = size;
511
0
    }
512
0
  }
513
0
}
514
515
AlphaBoxBlur::AlphaBoxBlur(const Rect& aRect,
516
                           int32_t aStride,
517
                           float aSigmaX,
518
                           float aSigmaY)
519
  : mRect(TruncatedToInt(aRect)),
520
    mSpreadRadius(),
521
    mBlurRadius(CalculateBlurRadius(Point(aSigmaX, aSigmaY))),
522
    mStride(aStride),
523
    mSurfaceAllocationSize(0),
524
    mHasDirtyRect(false)
525
0
{
526
0
  IntRect intRect;
527
0
  if (aRect.ToIntRect(&intRect)) {
528
0
    size_t minDataSize = BufferSizeFromStrideAndHeight(intRect.Width(), intRect.Height());
529
0
    if (minDataSize != 0) {
530
0
      mSurfaceAllocationSize = minDataSize;
531
0
    }
532
0
  }
533
0
}
534
535
536
AlphaBoxBlur::~AlphaBoxBlur()
537
0
{
538
0
}
539
540
IntSize
541
AlphaBoxBlur::GetSize() const
542
0
{
543
0
  IntSize size(mRect.Width(), mRect.Height());
544
0
  return size;
545
0
}
546
547
int32_t
548
AlphaBoxBlur::GetStride() const
549
0
{
550
0
  return mStride;
551
0
}
552
553
IntRect
554
AlphaBoxBlur::GetRect() const
555
0
{
556
0
  return mRect;
557
0
}
558
559
Rect*
560
AlphaBoxBlur::GetDirtyRect()
561
0
{
562
0
  if (mHasDirtyRect) {
563
0
    return &mDirtyRect;
564
0
  }
565
0
566
0
  return nullptr;
567
0
}
568
569
size_t
570
AlphaBoxBlur::GetSurfaceAllocationSize() const
571
0
{
572
0
  return mSurfaceAllocationSize;
573
0
}
574
575
void
576
AlphaBoxBlur::Blur(uint8_t* aData) const
577
0
{
578
0
  if (!aData) {
579
0
    return;
580
0
  }
581
0
582
0
  // no need to do all this if not blurring or spreading
583
0
  if (mBlurRadius != IntSize(0,0) || mSpreadRadius != IntSize(0,0)) {
584
0
    int32_t stride = GetStride();
585
0
586
0
    IntSize size = GetSize();
587
0
588
0
    if (mSpreadRadius.width > 0 || mSpreadRadius.height > 0) {
589
0
      // No need to use CheckedInt here - we have validated it in the constructor.
590
0
      size_t szB = stride * size.height;
591
0
      uint8_t* tmpData = new (std::nothrow) uint8_t[szB];
592
0
593
0
      if (!tmpData) {
594
0
        return;
595
0
      }
596
0
597
0
      memset(tmpData, 0, szB);
598
0
599
0
      SpreadHorizontal(aData, tmpData, mSpreadRadius.width, size.width, size.height, stride, mSkipRect);
600
0
      SpreadVertical(tmpData, aData, mSpreadRadius.height, size.width, size.height, stride, mSkipRect);
601
0
602
0
      delete [] tmpData;
603
0
    }
604
0
605
0
    int32_t horizontalLobes[3][2];
606
0
    ComputeLobes(mBlurRadius.width, horizontalLobes);
607
0
    int32_t verticalLobes[3][2];
608
0
    ComputeLobes(mBlurRadius.height, verticalLobes);
609
0
610
0
    // We want to allow for some extra space on the left for alignment reasons.
611
0
    int32_t maxLeftLobe = RoundUpToMultipleOf4(horizontalLobes[0][0] + 1).value();
612
0
613
0
    IntSize integralImageSize(size.width + maxLeftLobe + horizontalLobes[1][1],
614
0
                              size.height + verticalLobes[0][0] + verticalLobes[1][1] + 1);
615
0
616
0
    if ((integralImageSize.width * integralImageSize.height) > (1 << 24)) {
617
0
      // Fallback to old blurring code when the surface is so large it may
618
0
      // overflow our integral image!
619
0
      if (mBlurRadius.width > 0) {
620
0
        BoxBlur<false>(aData, horizontalLobes, size.width, size.height, stride, mSkipRect);
621
0
      }
622
0
      if (mBlurRadius.height > 0) {
623
0
        BoxBlur<true>(aData, verticalLobes, size.width, size.height, stride, mSkipRect);
624
0
      }
625
0
    } else {
626
0
      size_t integralImageStride = GetAlignedStride<16>(integralImageSize.width, 4);
627
0
      if (integralImageStride == 0) {
628
0
        return;
629
0
      }
630
0
631
0
      // We need to leave room for an additional 12 bytes for a maximum overrun
632
0
      // of 3 pixels in the blurring code.
633
0
      size_t bufLen = BufferSizeFromStrideAndHeight(integralImageStride, integralImageSize.height, 12);
634
0
      if (bufLen == 0) {
635
0
        return;
636
0
      }
637
0
      // bufLen is a byte count, but here we want a multiple of 32-bit ints, so
638
0
      // we divide by 4.
639
0
      AlignedArray<uint32_t> integralImage((bufLen / 4) + ((bufLen % 4) ? 1 : 0));
640
0
641
0
      if (!integralImage) {
642
0
        return;
643
0
      }
644
0
645
0
#ifdef USE_SSE2
646
0
      if (Factory::HasSSE2()) {
647
0
        BoxBlur_SSE2(aData, horizontalLobes[0][0], horizontalLobes[0][1], verticalLobes[0][0],
648
0
                     verticalLobes[0][1], integralImage, integralImageStride);
649
0
        BoxBlur_SSE2(aData, horizontalLobes[1][0], horizontalLobes[1][1], verticalLobes[1][0],
650
0
                     verticalLobes[1][1], integralImage, integralImageStride);
651
0
        BoxBlur_SSE2(aData, horizontalLobes[2][0], horizontalLobes[2][1], verticalLobes[2][0],
652
0
                     verticalLobes[2][1], integralImage, integralImageStride);
653
0
      } else
654
0
#endif
655
#ifdef BUILD_ARM_NEON
656
      if (mozilla::supports_neon()) {
657
        BoxBlur_NEON(aData, horizontalLobes[0][0], horizontalLobes[0][1], verticalLobes[0][0],
658
                     verticalLobes[0][1], integralImage, integralImageStride);
659
        BoxBlur_NEON(aData, horizontalLobes[1][0], horizontalLobes[1][1], verticalLobes[1][0],
660
                     verticalLobes[1][1], integralImage, integralImageStride);
661
        BoxBlur_NEON(aData, horizontalLobes[2][0], horizontalLobes[2][1], verticalLobes[2][0],
662
                     verticalLobes[2][1], integralImage, integralImageStride);
663
      } else
664
#endif
665
0
      {
666
#ifdef _MIPS_ARCH_LOONGSON3A
667
        BoxBlur_LS3(aData, horizontalLobes[0][0], horizontalLobes[0][1], verticalLobes[0][0],
668
                     verticalLobes[0][1], integralImage, integralImageStride);
669
        BoxBlur_LS3(aData, horizontalLobes[1][0], horizontalLobes[1][1], verticalLobes[1][0],
670
                     verticalLobes[1][1], integralImage, integralImageStride);
671
        BoxBlur_LS3(aData, horizontalLobes[2][0], horizontalLobes[2][1], verticalLobes[2][0],
672
                     verticalLobes[2][1], integralImage, integralImageStride);
673
#else
674
        BoxBlur_C(aData, horizontalLobes[0][0], horizontalLobes[0][1], verticalLobes[0][0],
675
0
                  verticalLobes[0][1], integralImage, integralImageStride);
676
0
        BoxBlur_C(aData, horizontalLobes[1][0], horizontalLobes[1][1], verticalLobes[1][0],
677
0
                  verticalLobes[1][1], integralImage, integralImageStride);
678
0
        BoxBlur_C(aData, horizontalLobes[2][0], horizontalLobes[2][1], verticalLobes[2][0],
679
0
                  verticalLobes[2][1], integralImage, integralImageStride);
680
0
#endif
681
0
      }
682
0
    }
683
0
  }
684
0
}
685
686
MOZ_ALWAYS_INLINE void
687
GenerateIntegralRow(uint32_t  *aDest, const uint8_t *aSource, uint32_t *aPreviousRow,
688
                    const uint32_t &aSourceWidth, const uint32_t &aLeftInflation, const uint32_t &aRightInflation)
689
0
{
690
0
  uint32_t currentRowSum = 0;
691
0
  uint32_t pixel = aSource[0];
692
0
  for (uint32_t x = 0; x < aLeftInflation; x++) {
693
0
    currentRowSum += pixel;
694
0
    *aDest++ = currentRowSum + *aPreviousRow++;
695
0
  }
696
0
  for (uint32_t x = aLeftInflation; x < (aSourceWidth + aLeftInflation); x += 4) {
697
0
      uint32_t alphaValues = *(uint32_t*)(aSource + (x - aLeftInflation));
698
#if defined WORDS_BIGENDIAN || defined IS_BIG_ENDIAN || defined __BIG_ENDIAN__
699
      currentRowSum += (alphaValues >> 24) & 0xff;
700
      *aDest++ = *aPreviousRow++ + currentRowSum;
701
      currentRowSum += (alphaValues >> 16) & 0xff;
702
      *aDest++ = *aPreviousRow++ + currentRowSum;
703
      currentRowSum += (alphaValues >> 8) & 0xff;
704
      *aDest++ = *aPreviousRow++ + currentRowSum;
705
      currentRowSum += alphaValues & 0xff;
706
      *aDest++ = *aPreviousRow++ + currentRowSum;
707
#else
708
      currentRowSum += alphaValues & 0xff;
709
0
      *aDest++ = *aPreviousRow++ + currentRowSum;
710
0
      alphaValues >>= 8;
711
0
      currentRowSum += alphaValues & 0xff;
712
0
      *aDest++ = *aPreviousRow++ + currentRowSum;
713
0
      alphaValues >>= 8;
714
0
      currentRowSum += alphaValues & 0xff;
715
0
      *aDest++ = *aPreviousRow++ + currentRowSum;
716
0
      alphaValues >>= 8;
717
0
      currentRowSum += alphaValues & 0xff;
718
0
      *aDest++ = *aPreviousRow++ + currentRowSum;
719
0
#endif
720
0
  }
721
0
  pixel = aSource[aSourceWidth - 1];
722
0
  for (uint32_t x = (aSourceWidth + aLeftInflation); x < (aSourceWidth + aLeftInflation + aRightInflation); x++) {
723
0
    currentRowSum += pixel;
724
0
    *aDest++ = currentRowSum + *aPreviousRow++;
725
0
  }
726
0
}
727
728
MOZ_ALWAYS_INLINE void
729
GenerateIntegralImage_C(int32_t aLeftInflation, int32_t aRightInflation,
730
                        int32_t aTopInflation, int32_t aBottomInflation,
731
                        uint32_t *aIntegralImage, size_t aIntegralImageStride,
732
                        uint8_t *aSource, int32_t aSourceStride, const IntSize &aSize)
733
0
{
734
0
  uint32_t stride32bit = aIntegralImageStride / 4;
735
0
736
0
  IntSize integralImageSize(aSize.width + aLeftInflation + aRightInflation,
737
0
                            aSize.height + aTopInflation + aBottomInflation);
738
0
739
0
  memset(aIntegralImage, 0, aIntegralImageStride);
740
0
741
0
  GenerateIntegralRow(aIntegralImage, aSource, aIntegralImage,
742
0
                      aSize.width, aLeftInflation, aRightInflation);
743
0
  for (int y = 1; y < aTopInflation + 1; y++) {
744
0
    GenerateIntegralRow(aIntegralImage + (y * stride32bit), aSource, aIntegralImage + (y - 1) * stride32bit,
745
0
                        aSize.width, aLeftInflation, aRightInflation);
746
0
  }
747
0
748
0
  for (int y = aTopInflation + 1; y < (aSize.height + aTopInflation); y++) {
749
0
    GenerateIntegralRow(aIntegralImage + (y * stride32bit), aSource + aSourceStride * (y - aTopInflation),
750
0
                        aIntegralImage + (y - 1) * stride32bit, aSize.width, aLeftInflation, aRightInflation);
751
0
  }
752
0
753
0
  if (aBottomInflation) {
754
0
    for (int y = (aSize.height + aTopInflation); y < integralImageSize.height; y++) {
755
0
      GenerateIntegralRow(aIntegralImage + (y * stride32bit), aSource + ((aSize.height - 1) * aSourceStride),
756
0
                          aIntegralImage + (y - 1) * stride32bit,
757
0
                          aSize.width, aLeftInflation, aRightInflation);
758
0
    }
759
0
  }
760
0
}
761
762
/**
763
 * Attempt to do an in-place box blur using an integral image.
764
 */
765
void
766
AlphaBoxBlur::BoxBlur_C(uint8_t* aData,
767
                        int32_t aLeftLobe,
768
                        int32_t aRightLobe,
769
                        int32_t aTopLobe,
770
                        int32_t aBottomLobe,
771
                        uint32_t *aIntegralImage,
772
                        size_t aIntegralImageStride) const
773
0
{
774
0
  IntSize size = GetSize();
775
0
776
0
  MOZ_ASSERT(size.width > 0);
777
0
778
0
  // Our 'left' or 'top' lobe will include the current pixel. i.e. when
779
0
  // looking at an integral image the value of a pixel at 'x,y' is calculated
780
0
  // using the value of the integral image values above/below that.
781
0
  aLeftLobe++;
782
0
  aTopLobe++;
783
0
  int32_t boxSize = (aLeftLobe + aRightLobe) * (aTopLobe + aBottomLobe);
784
0
785
0
  MOZ_ASSERT(boxSize > 0);
786
0
787
0
  if (boxSize == 1) {
788
0
      return;
789
0
  }
790
0
791
0
  int32_t stride32bit = aIntegralImageStride / 4;
792
0
793
0
  int32_t leftInflation = RoundUpToMultipleOf4(aLeftLobe).value();
794
0
795
0
  GenerateIntegralImage_C(leftInflation, aRightLobe, aTopLobe, aBottomLobe,
796
0
                          aIntegralImage, aIntegralImageStride, aData,
797
0
                          mStride, size);
798
0
799
0
  uint32_t reciprocal = uint32_t((uint64_t(1) << 32) / boxSize);
800
0
801
0
  uint32_t *innerIntegral = aIntegralImage + (aTopLobe * stride32bit) + leftInflation;
802
0
803
0
  // Storing these locally makes this about 30% faster! Presumably the compiler
804
0
  // can't be sure we're not altering the member variables in this loop.
805
0
  IntRect skipRect = mSkipRect;
806
0
  uint8_t *data = aData;
807
0
  int32_t stride = mStride;
808
0
  for (int32_t y = 0; y < size.height; y++) {
809
0
    // Not using ContainsY(y) because we do not skip y == skipRect.Y()
810
0
    // although that may not be done on purpose
811
0
    bool inSkipRectY = y > skipRect.Y() && y < skipRect.YMost();
812
0
813
0
    uint32_t *topLeftBase = innerIntegral + ((y - aTopLobe) * stride32bit - aLeftLobe);
814
0
    uint32_t *topRightBase = innerIntegral + ((y - aTopLobe) * stride32bit + aRightLobe);
815
0
    uint32_t *bottomRightBase = innerIntegral + ((y + aBottomLobe) * stride32bit + aRightLobe);
816
0
    uint32_t *bottomLeftBase = innerIntegral + ((y + aBottomLobe) * stride32bit - aLeftLobe);
817
0
818
0
    for (int32_t x = 0; x < size.width; x++) {
819
0
      // Not using ContainsX(x) because we do not skip x == skipRect.X()
820
0
      // although that may not be done on purpose
821
0
      if (inSkipRectY && x > skipRect.X() && x < skipRect.XMost()) {
822
0
        x = skipRect.XMost() - 1;
823
0
        // Trigger early jump on coming loop iterations, this will be reset
824
0
        // next line anyway.
825
0
        inSkipRectY = false;
826
0
        continue;
827
0
      }
828
0
      int32_t topLeft = topLeftBase[x];
829
0
      int32_t topRight = topRightBase[x];
830
0
      int32_t bottomRight = bottomRightBase[x];
831
0
      int32_t bottomLeft = bottomLeftBase[x];
832
0
833
0
      uint32_t value = bottomRight - topRight - bottomLeft;
834
0
      value += topLeft;
835
0
836
0
      data[stride * y + x] = (uint64_t(reciprocal) * value + (uint64_t(1) << 31)) >> 32;
837
0
    }
838
0
  }
839
0
}
840
841
/**
842
 * Compute the box blur size (which we're calling the blur radius) from
843
 * the standard deviation.
844
 *
845
 * Much of this, the 3 * sqrt(2 * pi) / 4, is the known value for
846
 * approximating a Gaussian using box blurs.  This yields quite a good
847
 * approximation for a Gaussian.  Then we multiply this by 1.5 since our
848
 * code wants the radius of the entire triple-box-blur kernel instead of
849
 * the diameter of an individual box blur.  For more details, see:
850
 *   http://www.w3.org/TR/SVG11/filters.html#feGaussianBlurElement
851
 *   https://bugzilla.mozilla.org/show_bug.cgi?id=590039#c19
852
 */
853
static const Float GAUSSIAN_SCALE_FACTOR = Float((3 * sqrt(2 * M_PI) / 4) * 1.5);
854
855
IntSize
856
AlphaBoxBlur::CalculateBlurRadius(const Point& aStd)
857
0
{
858
0
    IntSize size(static_cast<int32_t>(floor(aStd.x * GAUSSIAN_SCALE_FACTOR + 0.5f)),
859
0
                 static_cast<int32_t>(floor(aStd.y * GAUSSIAN_SCALE_FACTOR + 0.5f)));
860
0
861
0
    return size;
862
0
}
863
864
Float
865
AlphaBoxBlur::CalculateBlurSigma(int32_t aBlurRadius)
866
0
{
867
0
  return aBlurRadius / GAUSSIAN_SCALE_FACTOR;
868
0
}
869
870
} // namespace gfx
871
} // namespace mozilla