Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/gfx/2d/ssse3-scaler.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright © 2013 Soren Sandmann Pedersen
3
 * Copyright © 2013 Red Hat, Inc.
4
 * Copyright © 2016 Mozilla Foundation
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a
7
 * copy of this software and associated documentation files (the "Software"),
8
 * to deal in the Software without restriction, including without limitation
9
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10
 * and/or sell copies of the Software, and to permit persons to whom the
11
 * Software is furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice (including the next
14
 * paragraph) shall be included in all copies or substantial portions of the
15
 * Software.
16
 *
17
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23
 * DEALINGS IN THE SOFTWARE.
24
 *
25
 * Author: Soren Sandmann (soren.sandmann@gmail.com)
26
 *         Jeff Muizelaar (jmuizelaar@mozilla.com)
27
 */
28
29
/* This has been adapted from the ssse3 code from pixman. It's currently
30
 * a mess as I want to try it out in practice before finalizing the details.
31
 */
32
33
#include <stdlib.h>
34
#include <mmintrin.h>
35
#include <xmmintrin.h>
36
#include <emmintrin.h>
37
#include <tmmintrin.h>
38
#include <stdint.h>
39
#include <assert.h>
40
#include "ssse3-scaler.h"
41
42
typedef int32_t                 pixman_fixed_16_16_t;
43
typedef pixman_fixed_16_16_t    pixman_fixed_t;
44
0
#define pixman_fixed_1                  (pixman_int_to_fixed(1))
45
0
#define pixman_fixed_to_int(f)          ((int) ((f) >> 16))
46
0
#define pixman_int_to_fixed(i)          ((pixman_fixed_t) ((i) << 16))
47
0
#define pixman_double_to_fixed(d)       ((pixman_fixed_t) ((d) * 65536.0))
48
0
#define PIXMAN_FIXED_INT_MAX 32767
49
0
#define PIXMAN_FIXED_INT_MIN -32768
50
typedef struct pixman_vector pixman_vector_t;
51
52
typedef int pixman_bool_t;
53
typedef int64_t                 pixman_fixed_32_32_t;
54
typedef pixman_fixed_32_32_t    pixman_fixed_48_16_t;
55
typedef struct { pixman_fixed_48_16_t v[3]; } pixman_vector_48_16_t;
56
57
struct pixman_vector
58
{
59
    pixman_fixed_t      vector[3];
60
};
61
typedef struct pixman_transform pixman_transform_t;
62
63
struct pixman_transform
64
{
65
    pixman_fixed_t      matrix[3][3];
66
};
67
68
#ifdef _MSC_VER
69
#define force_inline __forceinline
70
#else
71
#define force_inline __inline__ __attribute__((always_inline))
72
#endif
73
74
0
#define BILINEAR_INTERPOLATION_BITS 6
75
76
static force_inline int
77
pixman_fixed_to_bilinear_weight (pixman_fixed_t x)
78
0
{
79
0
    return (x >> (16 - BILINEAR_INTERPOLATION_BITS)) &
80
0
                               ((1 << BILINEAR_INTERPOLATION_BITS) - 1);
81
0
}
82
83
static void
84
pixman_transform_point_31_16_3d (const pixman_transform_t    *t,
85
                                 const pixman_vector_48_16_t *v,
86
                                 pixman_vector_48_16_t       *result)
87
0
{
88
0
    int i;
89
0
    int64_t tmp[3][2];
90
0
91
0
    /* input vector values must have no more than 31 bits (including sign)
92
0
     * in the integer part */
93
0
    assert (v->v[0] <   ((pixman_fixed_48_16_t)1 << (30 + 16)));
94
0
    assert (v->v[0] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
95
0
    assert (v->v[1] <   ((pixman_fixed_48_16_t)1 << (30 + 16)));
96
0
    assert (v->v[1] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
97
0
    assert (v->v[2] <   ((pixman_fixed_48_16_t)1 << (30 + 16)));
98
0
    assert (v->v[2] >= -((pixman_fixed_48_16_t)1 << (30 + 16)));
99
0
100
0
    for (i = 0; i < 3; i++)
101
0
    {
102
0
        tmp[i][0] = (int64_t)t->matrix[i][0] * (v->v[0] >> 16);
103
0
        tmp[i][1] = (int64_t)t->matrix[i][0] * (v->v[0] & 0xFFFF);
104
0
        tmp[i][0] += (int64_t)t->matrix[i][1] * (v->v[1] >> 16);
105
0
        tmp[i][1] += (int64_t)t->matrix[i][1] * (v->v[1] & 0xFFFF);
106
0
        tmp[i][0] += (int64_t)t->matrix[i][2] * (v->v[2] >> 16);
107
0
        tmp[i][1] += (int64_t)t->matrix[i][2] * (v->v[2] & 0xFFFF);
108
0
    }
109
0
110
0
    result->v[0] = tmp[0][0] + ((tmp[0][1] + 0x8000) >> 16);
111
0
    result->v[1] = tmp[1][0] + ((tmp[1][1] + 0x8000) >> 16);
112
0
    result->v[2] = tmp[2][0] + ((tmp[2][1] + 0x8000) >> 16);
113
0
}
114
115
static pixman_bool_t
116
pixman_transform_point_3d (const struct pixman_transform *transform,
117
                           struct pixman_vector *         vector)
118
0
{
119
0
    pixman_vector_48_16_t tmp;
120
0
    tmp.v[0] = vector->vector[0];
121
0
    tmp.v[1] = vector->vector[1];
122
0
    tmp.v[2] = vector->vector[2];
123
0
124
0
    pixman_transform_point_31_16_3d (transform, &tmp, &tmp);
125
0
126
0
    vector->vector[0] = tmp.v[0];
127
0
    vector->vector[1] = tmp.v[1];
128
0
    vector->vector[2] = tmp.v[2];
129
0
130
0
    return vector->vector[0] == tmp.v[0] &&
131
0
           vector->vector[1] == tmp.v[1] &&
132
0
           vector->vector[2] == tmp.v[2];
133
0
}
134
135
136
struct bits_image_t
137
{
138
    uint32_t *                 bits;
139
    int                        rowstride;
140
    pixman_transform_t *transform;
141
};
142
143
typedef struct bits_image_t bits_image_t;
144
typedef struct {
145
    int unused;
146
} pixman_iter_info_t;
147
148
typedef struct pixman_iter_t pixman_iter_t;
149
typedef void      (* pixman_iter_fini_t)         (pixman_iter_t *iter);
150
151
struct pixman_iter_t
152
{
153
    int x, y;
154
    pixman_iter_fini_t          fini;
155
    bits_image_t *image;
156
    uint32_t *                  buffer;
157
    int width;
158
    int height;
159
    void *                      data;
160
};
161
162
typedef struct
163
{
164
    int   y;
165
    uint64_t *  buffer;
166
} line_t;
167
168
typedef struct
169
{
170
    line_t    lines[2];
171
    pixman_fixed_t  y;
172
    pixman_fixed_t  x;
173
    uint64_t    data[1];
174
} bilinear_info_t;
175
176
static void
177
ssse3_fetch_horizontal (bits_image_t *image, line_t *line,
178
      int y, pixman_fixed_t x, pixman_fixed_t ux, int n)
179
0
{
180
0
    uint32_t *bits = image->bits + y * image->rowstride;
181
0
    __m128i vx = _mm_set_epi16 (
182
0
  - (x + 1), x, - (x + 1), x,
183
0
  - (x + ux + 1), x + ux,  - (x + ux + 1), x + ux);
184
0
    __m128i vux = _mm_set_epi16 (
185
0
  - 2 * ux, 2 * ux, - 2 * ux, 2 * ux,
186
0
  - 2 * ux, 2 * ux, - 2 * ux, 2 * ux);
187
0
    __m128i vaddc = _mm_set_epi16 (1, 0, 1, 0, 1, 0, 1, 0);
188
0
    __m128i *b = (__m128i *)line->buffer;
189
0
    __m128i vrl0, vrl1;
190
0
191
0
    while ((n -= 2) >= 0)
192
0
    {
193
0
        __m128i vw, vr, s;
194
#ifdef HACKY_PADDING
195
        if (pixman_fixed_to_int(x + ux) >= image->rowstride) {
196
            vrl1 = _mm_setzero_si128();
197
            printf("overread 2loop\n");
198
         } else {
199
                 if (pixman_fixed_to_int(x + ux) < 0)
200
                         printf("underflow\n");
201
        vrl1 = _mm_loadl_epi64(
202
            (__m128i *)(bits + (pixman_fixed_to_int(x + ux) < 0 ? 0 : pixman_fixed_to_int(x + ux))));
203
        }
204
#else
205
        vrl1 = _mm_loadl_epi64(
206
0
            (__m128i *)(bits + pixman_fixed_to_int(x + ux)));
207
0
#endif
208
0
  /* vrl1: R1, L1 */
209
0
210
0
    final_pixel:
211
#ifdef HACKY_PADDING
212
  vrl0 = _mm_loadl_epi64 (
213
      (__m128i *)(bits + (pixman_fixed_to_int (x) < 0 ? 0 : pixman_fixed_to_int (x))));
214
#else
215
        vrl0 = _mm_loadl_epi64 (
216
0
      (__m128i *)(bits + pixman_fixed_to_int (x)));
217
0
#endif
218
0
        /* vrl0: R0, L0 */
219
0
220
0
  /* The weights are based on vx which is a vector of 
221
0
   *
222
0
   *    - (x + 1), x, - (x + 1), x,
223
0
   *          - (x + ux + 1), x + ux, - (x + ux + 1), x + ux
224
0
   *
225
0
   * so the 16 bit weights end up like this:
226
0
   *
227
0
   *    iw0, w0, iw0, w0, iw1, w1, iw1, w1
228
0
   *
229
0
   * and after shifting and packing, we get these bytes:
230
0
   *
231
0
   *    iw0, w0, iw0, w0, iw1, w1, iw1, w1,
232
0
   *        iw0, w0, iw0, w0, iw1, w1, iw1, w1,
233
0
   *
234
0
   * which means the first and the second input pixel 
235
0
   * have to be interleaved like this:
236
0
   *
237
0
   *    la0, ra0, lr0, rr0, la1, ra1, lr1, rr1,
238
0
   *        lg0, rg0, lb0, rb0, lg1, rg1, lb1, rb1
239
0
   *
240
0
   * before maddubsw can be used.
241
0
   */
242
0
243
0
  vw = _mm_add_epi16 (
244
0
      vaddc, _mm_srli_epi16 (vx, 16 - BILINEAR_INTERPOLATION_BITS));
245
0
  /* vw: iw0, w0, iw0, w0, iw1, w1, iw1, w1
246
0
   */
247
0
248
0
  vw = _mm_packus_epi16 (vw, vw);
249
0
  /* vw: iw0, w0, iw0, w0, iw1, w1, iw1, w1,
250
0
   *         iw0, w0, iw0, w0, iw1, w1, iw1, w1
251
0
   */
252
0
  vx = _mm_add_epi16 (vx, vux);
253
0
254
0
  x += 2 * ux;
255
0
256
0
  vr = _mm_unpacklo_epi16 (vrl1, vrl0);
257
0
  /* vr: rar0, rar1, rgb0, rgb1, lar0, lar1, lgb0, lgb1 */
258
0
259
0
  s = _mm_shuffle_epi32 (vr, _MM_SHUFFLE (1, 0, 3, 2));
260
0
  /* s:  lar0, lar1, lgb0, lgb1, rar0, rar1, rgb0, rgb1 */
261
0
262
0
  vr = _mm_unpackhi_epi8 (vr, s);
263
0
  /* vr: la0, ra0, lr0, rr0, la1, ra1, lr1, rr1,
264
0
   *         lg0, rg0, lb0, rb0, lg1, rg1, lb1, rb1
265
0
   */
266
0
267
0
  vr = _mm_maddubs_epi16 (vr, vw);
268
0
269
0
  /* When the weight is 0, the inverse weight is
270
0
   * 128 which can't be represented in a signed byte.
271
0
   * As a result maddubsw computes the following:
272
0
   *
273
0
   *     r = l * -128 + r * 0
274
0
   *
275
0
   * rather than the desired
276
0
   *
277
0
   *     r = l * 128 + r * 0
278
0
   *
279
0
   * We fix this by taking the absolute value of the
280
0
   * result.
281
0
   */
282
0
        // we can drop this if we use lower precision
283
0
284
0
  vr = _mm_shuffle_epi32 (vr, _MM_SHUFFLE (2, 0, 3, 1));
285
0
  /* vr: A0, R0, A1, R1, G0, B0, G1, B1 */
286
0
  _mm_store_si128 (b++, vr);
287
0
    }
288
0
289
0
    if (n == -1)
290
0
    {
291
0
  vrl1 = _mm_setzero_si128();
292
0
  goto final_pixel;
293
0
    }
294
0
295
0
    line->y = y;
296
0
}
297
298
// scale a line of destination pixels
299
static uint32_t *
300
ssse3_fetch_bilinear_cover (pixman_iter_t *iter, const uint32_t *mask)
301
0
{
302
0
    pixman_fixed_t fx, ux;
303
0
    bilinear_info_t *info = iter->data;
304
0
    line_t *line0, *line1;
305
0
    int y0, y1;
306
0
    int32_t dist_y;
307
0
    __m128i vw, uvw;
308
0
    int i;
309
0
310
0
    fx = info->x;
311
0
    ux = iter->image->transform->matrix[0][0];
312
0
313
0
    y0 = pixman_fixed_to_int (info->y);
314
0
    if (y0 < 0)
315
0
        *(volatile char*)0 = 9;
316
0
    y1 = y0 + 1;
317
0
318
0
    // clamping in y direction
319
0
    if (y1 >= iter->height) {
320
0
        y1 = iter->height - 1;
321
0
    }
322
0
323
0
    line0 = &info->lines[y0 & 0x01];
324
0
    line1 = &info->lines[y1 & 0x01];
325
0
326
0
    if (line0->y != y0)
327
0
    {
328
0
  ssse3_fetch_horizontal (
329
0
      iter->image, line0, y0, fx, ux, iter->width);
330
0
    }
331
0
332
0
    if (line1->y != y1)
333
0
    {
334
0
  ssse3_fetch_horizontal (
335
0
      iter->image, line1, y1, fx, ux, iter->width);
336
0
    }
337
0
338
#ifdef PIXMAN_STYLE_INTERPOLATION
339
    dist_y = pixman_fixed_to_bilinear_weight (info->y);
340
    dist_y <<= (16 - BILINEAR_INTERPOLATION_BITS);
341
342
    vw = _mm_set_epi16 (
343
  dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, dist_y);
344
345
#else
346
    // setup the weights for the top (vw) and bottom (uvw) lines
347
0
    dist_y = pixman_fixed_to_bilinear_weight (info->y);
348
0
    // we use 15 instead of 16 because we need an extra bit to handle when the weights are 0 and 1
349
0
    dist_y <<= (15 - BILINEAR_INTERPOLATION_BITS);
350
0
351
0
    vw = _mm_set_epi16 (
352
0
  dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, dist_y);
353
0
354
0
355
0
    dist_y = (1 << BILINEAR_INTERPOLATION_BITS) - pixman_fixed_to_bilinear_weight (info->y);
356
0
    dist_y <<= (15 - BILINEAR_INTERPOLATION_BITS);
357
0
    uvw = _mm_set_epi16 (
358
0
  dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, dist_y, dist_y);
359
0
#endif
360
0
361
0
    for (i = 0; i + 3 < iter->width; i += 4)
362
0
    {
363
0
  __m128i top0 = _mm_load_si128 ((__m128i *)(line0->buffer + i));
364
0
  __m128i bot0 = _mm_load_si128 ((__m128i *)(line1->buffer + i));
365
0
  __m128i top1 = _mm_load_si128 ((__m128i *)(line0->buffer + i + 2));
366
0
  __m128i bot1 = _mm_load_si128 ((__m128i *)(line1->buffer + i + 2));
367
#ifdef PIXMAN_STYLE_INTERPOLATION
368
  __m128i r0, r1, tmp, p;
369
370
        r0 = _mm_mulhi_epu16 (
371
      _mm_sub_epi16 (bot0, top0), vw);
372
  tmp = _mm_cmplt_epi16 (bot0, top0);
373
  tmp = _mm_and_si128 (tmp, vw);
374
  r0 = _mm_sub_epi16 (r0, tmp);
375
  r0 = _mm_add_epi16 (r0, top0);
376
  r0 = _mm_srli_epi16 (r0, BILINEAR_INTERPOLATION_BITS);
377
  /* r0:  A0 R0 A1 R1 G0 B0 G1 B1 */
378
        //r0 = _mm_shuffle_epi32 (r0, _MM_SHUFFLE (2, 0, 3, 1));
379
  /* r0:  A1 R1 G1 B1 A0 R0 G0 B0 */
380
381
        // tmp = bot1 < top1 ? vw : 0;
382
        // r1 = (bot1 - top1)*vw + top1 - tmp
383
        // r1 = bot1*vw - vw*top1 + top1 - tmp
384
        // r1 = bot1*vw + top1 - vw*top1 - tmp
385
        // r1 = bot1*vw + top1*(1 - vw) - tmp
386
  r1 = _mm_mulhi_epu16 (
387
      _mm_sub_epi16 (bot1, top1), vw);
388
  tmp = _mm_cmplt_epi16 (bot1, top1);
389
  tmp = _mm_and_si128 (tmp, vw);
390
  r1 = _mm_sub_epi16 (r1, tmp);
391
  r1 = _mm_add_epi16 (r1, top1);
392
  r1 = _mm_srli_epi16 (r1, BILINEAR_INTERPOLATION_BITS);
393
  //r1 = _mm_shuffle_epi32 (r1, _MM_SHUFFLE (2, 0, 3, 1));
394
  /* r1: A3 R3 G3 B3 A2 R2 G2 B2 */
395
#else
396
  __m128i r0, r1, p;
397
0
        top0 = _mm_mulhi_epu16 (top0, uvw);
398
0
        bot0 = _mm_mulhi_epu16 (bot0, vw);
399
0
        r0 = _mm_add_epi16(top0, bot0);
400
0
        r0 = _mm_srli_epi16(r0, BILINEAR_INTERPOLATION_BITS-1);
401
0
402
0
        top1 = _mm_mulhi_epu16 (top1, uvw);
403
0
        bot1 = _mm_mulhi_epu16 (bot1, vw);
404
0
        r1 = _mm_add_epi16(top1, bot1);
405
0
        r1 = _mm_srli_epi16(r1, BILINEAR_INTERPOLATION_BITS-1);
406
0
#endif
407
0
408
0
  p = _mm_packus_epi16 (r0, r1);
409
0
  _mm_storeu_si128 ((__m128i *)(iter->buffer + i), p);
410
0
    }
411
0
412
0
    while (i < iter->width)
413
0
    {
414
0
  __m128i top0 = _mm_load_si128 ((__m128i *)(line0->buffer + i));
415
0
  __m128i bot0 = _mm_load_si128 ((__m128i *)(line1->buffer + i));
416
0
417
#ifdef PIXMAN_STYLE_INTERPOLATION
418
  __m128i r0, tmp, p;
419
  r0 = _mm_mulhi_epu16 (
420
      _mm_sub_epi16 (bot0, top0), vw);
421
  tmp = _mm_cmplt_epi16 (bot0, top0);
422
  tmp = _mm_and_si128 (tmp, vw);
423
  r0 = _mm_sub_epi16 (r0, tmp);
424
  r0 = _mm_add_epi16 (r0, top0);
425
  r0 = _mm_srli_epi16 (r0, BILINEAR_INTERPOLATION_BITS);
426
  /* r0:  A0 R0 A1 R1 G0 B0 G1 B1 */
427
  r0 = _mm_shuffle_epi32 (r0, _MM_SHUFFLE (2, 0, 3, 1));
428
  /* r0:  A1 R1 G1 B1 A0 R0 G0 B0 */
429
#else
430
  __m128i r0, p;
431
0
        top0 = _mm_mulhi_epu16 (top0, uvw);
432
0
        bot0 = _mm_mulhi_epu16 (bot0, vw);
433
0
        r0 = _mm_add_epi16(top0, bot0);
434
0
        r0 = _mm_srli_epi16(r0, BILINEAR_INTERPOLATION_BITS-1);
435
0
#endif
436
0
437
0
  p = _mm_packus_epi16 (r0, r0);
438
0
439
0
  if (iter->width - i == 1)
440
0
  {
441
0
      *(uint32_t *)(iter->buffer + i) = _mm_cvtsi128_si32 (p);
442
0
      i++;
443
0
  }
444
0
  else
445
0
  {
446
0
      _mm_storel_epi64 ((__m128i *)(iter->buffer + i), p);
447
0
      i += 2;
448
0
  }
449
0
    }
450
0
451
0
    info->y += iter->image->transform->matrix[1][1];
452
0
453
0
    return iter->buffer;
454
0
}
455
456
static void
457
ssse3_bilinear_cover_iter_fini (pixman_iter_t *iter)
458
0
{
459
0
    free (iter->data);
460
0
}
461
462
static void
463
ssse3_bilinear_cover_iter_init (pixman_iter_t *iter)
464
0
{
465
0
    int width = iter->width;
466
0
    bilinear_info_t *info;
467
0
    pixman_vector_t v;
468
0
469
0
    if (iter->x > PIXMAN_FIXED_INT_MAX ||
470
0
        iter->x < PIXMAN_FIXED_INT_MIN ||
471
0
        iter->y > PIXMAN_FIXED_INT_MAX ||
472
0
        iter->y < PIXMAN_FIXED_INT_MIN)
473
0
      goto fail;
474
0
475
0
    /* Reference point is the center of the pixel */
476
0
    v.vector[0] = pixman_int_to_fixed (iter->x) + pixman_fixed_1 / 2;
477
0
    v.vector[1] = pixman_int_to_fixed (iter->y) + pixman_fixed_1 / 2;
478
0
    v.vector[2] = pixman_fixed_1;
479
0
480
0
    if (!pixman_transform_point_3d (iter->image->transform, &v))
481
0
  goto fail;
482
0
483
0
    info = malloc (sizeof (*info) + (2 * width - 1) * sizeof (uint64_t) + 64);
484
0
    if (!info)
485
0
  goto fail;
486
0
487
0
    info->x = v.vector[0] - pixman_fixed_1 / 2;
488
0
    info->y = v.vector[1] - pixman_fixed_1 / 2;
489
0
490
0
#define ALIGN(addr)             \
491
0
    ((void *)((((uintptr_t)(addr)) + 15) & (~15)))
492
0
493
0
    /* It is safe to set the y coordinates to -1 initially
494
0
     * because COVER_CLIP_BILINEAR ensures that we will only
495
0
     * be asked to fetch lines in the [0, height) interval
496
0
     */
497
0
    info->lines[0].y = -1;
498
0
    info->lines[0].buffer = ALIGN (&(info->data[0]));
499
0
    info->lines[1].y = -1;
500
0
    info->lines[1].buffer = ALIGN (info->lines[0].buffer + width);
501
0
502
0
    iter->fini = ssse3_bilinear_cover_iter_fini;
503
0
504
0
    iter->data = info;
505
0
    return;
506
0
507
0
fail:
508
0
    /* Something went wrong, either a bad matrix or OOM; in such cases,
509
0
     * we don't guarantee any particular rendering.
510
0
     */
511
0
    iter->fini = NULL;
512
0
}
513
514
/* scale the src from src_width/height to dest_width/height drawn
515
 * into the rectangle x,y width,height
516
 * src_stride and dst_stride are 4 byte units */
517
bool ssse3_scale_data(uint32_t *src, int src_width, int src_height, int src_stride,
518
                      uint32_t *dest, int dest_width, int dest_height,
519
                      int dest_stride,
520
                      int x, int y,
521
                      int width, int height)
522
0
{
523
0
    //XXX: assert(src_width > 1)
524
0
    pixman_transform_t transform = {
525
0
        { { pixman_fixed_1, 0, 0 },
526
0
            { 0, pixman_fixed_1, 0 },
527
0
            { 0, 0, pixman_fixed_1 } }
528
0
    };
529
0
    double width_scale = ((double)src_width)/dest_width;
530
0
    double height_scale = ((double)src_height)/dest_height;
531
0
#define AVOID_PADDING
532
0
#ifdef AVOID_PADDING
533
0
    // scale up by enough that we don't read outside of the bounds of the source surface
534
0
    // currently this is required to avoid reading out of bounds.
535
0
    if (width_scale < 1) {
536
0
        width_scale = (double)(src_width-1)/dest_width;
537
0
        transform.matrix[0][2] = pixman_fixed_1/2;
538
0
    }
539
0
    if (height_scale < 1) {
540
0
        height_scale = (double)(src_height-1)/dest_height;
541
0
        transform.matrix[1][2] = pixman_fixed_1/2;
542
0
    }
543
0
#endif
544
0
    transform.matrix[0][0] = pixman_double_to_fixed(width_scale);
545
0
    transform.matrix[1][1] = pixman_double_to_fixed(height_scale);
546
0
    transform.matrix[2][2] = pixman_fixed_1;
547
0
548
0
    bits_image_t image;
549
0
    image.bits = src;
550
0
    image.transform = &transform;
551
0
    image.rowstride = src_stride;
552
0
553
0
    pixman_iter_t iter;
554
0
    iter.image = &image;
555
0
    iter.x = x;
556
0
    iter.y = y;
557
0
    iter.width = width;
558
0
    iter.height = src_height;
559
0
    iter.buffer = dest;
560
0
    iter.data = NULL;
561
0
562
0
    ssse3_bilinear_cover_iter_init(&iter);
563
0
564
0
    if (!iter.fini)
565
0
      return false;
566
0
567
0
    if (iter.data) {
568
0
        for (int iy = 0; iy < height; iy++) {
569
0
            ssse3_fetch_bilinear_cover(&iter, NULL);
570
0
            iter.buffer += dest_stride;
571
0
        }
572
0
        ssse3_bilinear_cover_iter_fini(&iter);
573
0
    }
574
0
    return true;
575
0
}