Coverage Report

Created: 2026-05-30 06:06

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/astc-encoder/Source/astcenc_image.cpp
Line
Count
Source
1
// SPDX-License-Identifier: Apache-2.0
2
// ----------------------------------------------------------------------------
3
// Copyright 2011-2026 Arm Limited
4
//
5
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
6
// use this file except in compliance with the License. You may obtain a copy
7
// of the License at:
8
//
9
//     http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing, software
12
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14
// License for the specific language governing permissions and limitations
15
// under the License.
16
// ----------------------------------------------------------------------------
17
18
/**
19
 * @brief Functions for creating in-memory ASTC image structures.
20
 */
21
22
#include <cassert>
23
#include <cstring>
24
25
#include "astcenc_internal.h"
26
27
/**
28
 * @brief Loader pipeline function type for data fetch from memory.
29
 */
30
using pixel_loader = vfloat4(*)(const void*, int);
31
32
/**
33
 * @brief Loader pipeline function type for swizzling data in a vector.
34
 */
35
using pixel_swizzler = vfloat4(*)(vfloat4, const astcenc_swizzle&);
36
37
/**
38
 * @brief Loader pipeline function type for converting data in a vector to LNS.
39
 */
40
using pixel_converter = vfloat4(*)(vfloat4, vmask4);
41
42
/**
43
 * @brief Load a 8-bit UNORM texel from a data array.
44
 *
45
 * @param data          The data pointer.
46
 * @param base_offset   The index offset to the start of the pixel.
47
 */
48
static vfloat4 load_texel_u8(
49
  const void* data,
50
  int base_offset
51
27.9k
) {
52
27.9k
  const uint8_t* data8 = static_cast<const uint8_t*>(data);
53
27.9k
  return int_to_float(vint4(data8 + base_offset)) / 255.0f;
54
27.9k
}
55
56
/**
57
 * @brief Load a 16-bit fp16 texel from a data array.
58
 *
59
 * @param data          The data pointer.
60
 * @param base_offset   The index offset to the start of the pixel.
61
 */
62
static vfloat4 load_texel_f16(
63
  const void* data,
64
  int base_offset
65
0
) {
66
0
  const uint16_t* data16 = static_cast<const uint16_t*>(data);
67
0
  int r = data16[base_offset    ];
68
0
  int g = data16[base_offset + 1];
69
0
  int b = data16[base_offset + 2];
70
0
  int a = data16[base_offset + 3];
71
0
  return float16_to_float(vint4(r, g, b, a));
72
0
}
73
74
/**
75
 * @brief Load a 32-bit float texel from a data array.
76
 *
77
 * @param data          The data pointer.
78
 * @param base_offset   The index offset to the start of the pixel.
79
 */
80
static vfloat4 load_texel_f32(
81
  const void* data,
82
  int base_offset
83
0
) {
84
0
  const float* data32 = static_cast<const float*>(data);
85
0
  return vfloat4(data32 + base_offset);
86
0
}
87
88
/**
89
 * @brief Dummy no-op swizzle function.
90
 *
91
 * @param data   The source RGBA vector to swizzle.
92
 * @param swz    The swizzle to use.
93
 */
94
static vfloat4 swz_texel_skip(
95
  vfloat4 data,
96
  const astcenc_swizzle& swz
97
27.9k
) {
98
27.9k
  (void)swz;
99
27.9k
  return data;
100
27.9k
}
101
102
/**
103
 * @brief Swizzle a texel into a new arrangement.
104
 *
105
 * @param data   The source RGBA vector to swizzle.
106
 * @param swz    The swizzle to use.
107
 */
108
static vfloat4 swz_texel(
109
  vfloat4 data,
110
  const astcenc_swizzle& swz
111
0
) {
112
0
  ASTCENC_ALIGNAS float datas[6];
113
114
0
  storea(data, datas);
115
0
  datas[ASTCENC_SWZ_0] = 0.0f;
116
0
  datas[ASTCENC_SWZ_1] = 1.0f;
117
118
0
  return vfloat4(datas[swz.r], datas[swz.g], datas[swz.b], datas[swz.a]);
119
0
}
120
121
/**
122
 * @brief Encode a texel that is entirely LDR linear.
123
 *
124
 * @param data       The RGBA data to encode.
125
 * @param lns_mask   The mask for the HDR channels than need LNS encoding.
126
 */
127
static vfloat4 encode_texel_unorm(
128
  vfloat4 data,
129
  vmask4 lns_mask
130
0
) {
131
0
  (void)lns_mask;
132
0
  return data * 65535.0f;
133
0
}
134
135
/**
136
 * @brief Encode a texel that includes at least some HDR LNS texels.
137
 *
138
 * @param data       The RGBA data to encode.
139
 * @param lns_mask   The mask for the HDR channels than need LNS encoding.
140
 */
141
static vfloat4 encode_texel_lns(
142
  vfloat4 data,
143
  vmask4 lns_mask
144
27.9k
) {
145
27.9k
  vfloat4 datav_unorm = data * 65535.0f;
146
27.9k
  vfloat4 datav_lns = float_to_lns(data);
147
27.9k
  return select(datav_unorm, datav_lns, lns_mask);
148
27.9k
}
149
150
/* See header for documentation. */
151
void load_image_block(
152
  astcenc_profile decode_mode,
153
  const astcenc_image& img,
154
  image_block& blk,
155
  const block_size_descriptor& bsd,
156
  unsigned int xpos,
157
  unsigned int ypos,
158
  unsigned int zpos,
159
  const astcenc_swizzle& swz
160
1.12k
) {
161
1.12k
  unsigned int xsize = img.dim_x;
162
1.12k
  unsigned int ysize = img.dim_y;
163
1.12k
  unsigned int zsize = img.dim_z;
164
165
1.12k
  blk.xpos = xpos;
166
1.12k
  blk.ypos = ypos;
167
1.12k
  blk.zpos = zpos;
168
169
  // True if any non-identity swizzle
170
1.12k
  bool needs_swz = (swz.r != ASTCENC_SWZ_R) || (swz.g != ASTCENC_SWZ_G) ||
171
1.12k
                   (swz.b != ASTCENC_SWZ_B) || (swz.a != ASTCENC_SWZ_A);
172
173
1.12k
  int idx = 0;
174
175
1.12k
  vfloat4 data_min(1e38f);
176
1.12k
  vfloat4 data_mean(0.0f);
177
1.12k
  vfloat4 data_mean_scale(1.0f / static_cast<float>(bsd.texel_count));
178
1.12k
  vfloat4 data_max(-1e38f);
179
1.12k
  vmask4 grayscalev(true);
180
181
  // This works because we impose the same choice everywhere during encode
182
1.12k
  uint8_t rgb_lns = (decode_mode == ASTCENC_PRF_HDR) ||
183
1.12k
                    (decode_mode == ASTCENC_PRF_HDR_RGB_LDR_A) ? 1 : 0;
184
1.12k
  uint8_t a_lns = decode_mode == ASTCENC_PRF_HDR ? 1 : 0;
185
1.12k
  vint4 use_lns(rgb_lns, rgb_lns, rgb_lns, a_lns);
186
1.12k
  vmask4 lns_mask = use_lns != vint4::zero();
187
188
  // Set up the function pointers for loading pipeline as needed
189
1.12k
  pixel_loader loader = load_texel_u8;
190
1.12k
  if (img.data_type == ASTCENC_TYPE_F16)
191
0
  {
192
0
    loader = load_texel_f16;
193
0
  }
194
1.12k
  else if  (img.data_type == ASTCENC_TYPE_F32)
195
0
  {
196
0
    loader = load_texel_f32;
197
0
  }
198
199
1.12k
  pixel_swizzler swizzler = swz_texel_skip;
200
1.12k
  if (needs_swz)
201
0
  {
202
0
    swizzler = swz_texel;
203
0
  }
204
205
1.12k
  pixel_converter converter = encode_texel_unorm;
206
1.12k
  if (any(lns_mask))
207
1.12k
  {
208
1.12k
    converter = encode_texel_lns;
209
1.12k
  }
210
211
2.24k
  for (unsigned int z = 0; z < bsd.zdim; z++)
212
1.12k
  {
213
1.12k
    unsigned int zi = astc::min(zpos + z, zsize - 1);
214
1.12k
    void* plane = img.data[zi];
215
216
6.24k
    for (unsigned int y = 0; y < bsd.ydim; y++)
217
5.11k
    {
218
5.11k
      unsigned int yi = astc::min(ypos + y, ysize - 1);
219
220
33.0k
      for (unsigned int x = 0; x < bsd.xdim; x++)
221
27.9k
      {
222
27.9k
        unsigned int xi = astc::min(xpos + x, xsize - 1);
223
224
27.9k
        vfloat4 datav = loader(plane, (4 * xsize * yi) + (4 * xi));
225
27.9k
        datav = swizzler(datav, swz);
226
27.9k
        datav = converter(datav, lns_mask);
227
228
        // Compute block metadata
229
27.9k
        data_min = min(data_min, datav);
230
27.9k
        data_mean += datav * data_mean_scale;
231
27.9k
        data_max = max(data_max, datav);
232
233
27.9k
        grayscalev = grayscalev & (datav.swz<0,0,0,0>() == datav.swz<1,1,2,2>());
234
235
27.9k
        blk.data_r[idx] = datav.lane<0>();
236
27.9k
        blk.data_g[idx] = datav.lane<1>();
237
27.9k
        blk.data_b[idx] = datav.lane<2>();
238
27.9k
        blk.data_a[idx] = datav.lane<3>();
239
240
27.9k
        blk.rgb_lns[idx] = rgb_lns;
241
27.9k
        blk.alpha_lns[idx] = a_lns;
242
243
27.9k
        idx++;
244
27.9k
      }
245
5.11k
    }
246
1.12k
  }
247
248
  // Reverse the encoding so we store origin block in the original format
249
1.12k
  vfloat4 data_enc = blk.texel(0);
250
1.12k
  vfloat4 data_enc_unorm = data_enc / 65535.0f;
251
1.12k
  vfloat4 data_enc_lns = vfloat4::zero();
252
253
1.12k
  if (rgb_lns || a_lns)
254
1.12k
  {
255
1.12k
    data_enc_lns = float16_to_float(lns_to_sf16(float_to_int(data_enc)));
256
1.12k
  }
257
258
1.12k
  blk.origin_texel = select(data_enc_unorm, data_enc_lns, lns_mask);
259
260
  // Store block metadata
261
1.12k
  blk.data_min = data_min;
262
1.12k
  blk.data_mean = data_mean;
263
1.12k
  blk.data_max = data_max;
264
1.12k
  blk.grayscale = all(grayscalev);
265
1.12k
}
266
267
/* See header for documentation. */
268
void load_image_block_fast_ldr(
269
  astcenc_profile decode_mode,
270
  const astcenc_image& img,
271
  image_block& blk,
272
  const block_size_descriptor& bsd,
273
  unsigned int xpos,
274
  unsigned int ypos,
275
  unsigned int zpos,
276
  const astcenc_swizzle& swz
277
1.09k
) {
278
1.09k
  (void)swz;
279
1.09k
  (void)decode_mode;
280
281
1.09k
  unsigned int xsize = img.dim_x;
282
1.09k
  unsigned int ysize = img.dim_y;
283
284
1.09k
  blk.xpos = xpos;
285
1.09k
  blk.ypos = ypos;
286
1.09k
  blk.zpos = zpos;
287
288
1.09k
  vfloat4 data_min(1e38f);
289
1.09k
  vfloat4 data_mean = vfloat4::zero();
290
1.09k
  vfloat4 data_max(-1e38f);
291
1.09k
  vmask4 grayscalev(true);
292
1.09k
  int idx = 0;
293
294
1.09k
  const uint8_t* plane = static_cast<const uint8_t*>(img.data[0]);
295
6.22k
  for (unsigned int y = ypos; y < ypos + bsd.ydim; y++)
296
5.13k
  {
297
5.13k
    unsigned int yi = astc::min(y, ysize - 1);
298
299
34.0k
    for (unsigned int x = xpos; x < xpos + bsd.xdim; x++)
300
28.9k
    {
301
28.9k
      unsigned int xi = astc::min(x, xsize - 1);
302
303
28.9k
      vint4 datavi = vint4(plane + (4 * xsize * yi) + (4 * xi));
304
28.9k
      vfloat4 datav = int_to_float(datavi) * (65535.0f / 255.0f);
305
306
      // Compute block metadata
307
28.9k
      data_min = min(data_min, datav);
308
28.9k
      data_mean += datav;
309
28.9k
      data_max = max(data_max, datav);
310
311
28.9k
      grayscalev = grayscalev & (datav.swz<0,0,0,0>() == datav.swz<1,1,2,2>());
312
313
28.9k
      blk.data_r[idx] = datav.lane<0>();
314
28.9k
      blk.data_g[idx] = datav.lane<1>();
315
28.9k
      blk.data_b[idx] = datav.lane<2>();
316
28.9k
      blk.data_a[idx] = datav.lane<3>();
317
318
28.9k
      idx++;
319
28.9k
    }
320
5.13k
  }
321
322
  // Reverse the encoding so we store origin block in the original format
323
1.09k
  blk.origin_texel = blk.texel(0) / 65535.0f;
324
325
  // Store block metadata
326
1.09k
  blk.rgb_lns[0] = 0;
327
1.09k
  blk.alpha_lns[0] = 0;
328
1.09k
  blk.data_min = data_min;
329
1.09k
  blk.data_mean = data_mean / static_cast<float>(bsd.texel_count);
330
1.09k
  blk.data_max = data_max;
331
1.09k
  blk.grayscale = all(grayscalev);
332
1.09k
}
333
334
/* See header for documentation. */
335
void store_image_block(
336
  astcenc_image& img,
337
  const image_block& blk,
338
  const block_size_descriptor& bsd,
339
  unsigned int xpos,
340
  unsigned int ypos,
341
  unsigned int zpos,
342
  const astcenc_swizzle& swz
343
3.95k
) {
344
3.95k
  unsigned int x_size = img.dim_x;
345
3.95k
  unsigned int x_start = xpos;
346
3.95k
  unsigned int x_end = astc::min(x_size, xpos + bsd.xdim);
347
3.95k
  unsigned int x_count = x_end - x_start;
348
3.95k
  unsigned int x_nudge = bsd.xdim - x_count;
349
350
3.95k
  unsigned int y_size = img.dim_y;
351
3.95k
  unsigned int y_start = ypos;
352
3.95k
  unsigned int y_end = astc::min(y_size, ypos + bsd.ydim);
353
3.95k
  unsigned int y_count = y_end - y_start;
354
3.95k
  unsigned int y_nudge = (bsd.ydim - y_count) * bsd.xdim;
355
356
3.95k
  unsigned int z_size = img.dim_z;
357
3.95k
  unsigned int z_start = zpos;
358
3.95k
  unsigned int z_end = astc::min(z_size, zpos + bsd.zdim);
359
360
  // True if any non-identity swizzle
361
3.95k
  bool needs_swz = (swz.r != ASTCENC_SWZ_R) || (swz.g != ASTCENC_SWZ_G) ||
362
3.95k
                   (swz.b != ASTCENC_SWZ_B) || (swz.a != ASTCENC_SWZ_A);
363
364
  // True if any swizzle uses Z reconstruct
365
3.95k
  bool needs_z = (swz.r == ASTCENC_SWZ_Z) || (swz.g == ASTCENC_SWZ_Z) ||
366
3.95k
                 (swz.b == ASTCENC_SWZ_Z) || (swz.a == ASTCENC_SWZ_Z);
367
368
3.95k
  int idx = 0;
369
3.95k
  if (img.data_type == ASTCENC_TYPE_U8)
370
3.95k
  {
371
7.90k
    for (unsigned int z = z_start; z < z_end; z++)
372
3.95k
    {
373
      // Fetch the image plane
374
3.95k
      uint8_t* data8 = static_cast<uint8_t*>(img.data[z]);
375
376
33.8k
      for (unsigned int y = y_start; y < y_end; y++)
377
29.8k
      {
378
29.8k
        uint8_t* data8_row = data8 + (4 * x_size * y) + (4 * x_start);
379
380
108k
        for (unsigned int x = 0; x < x_count; x += ASTCENC_SIMD_WIDTH)
381
78.9k
        {
382
78.9k
          unsigned int max_texels = ASTCENC_SIMD_WIDTH;
383
78.9k
          unsigned int used_texels = astc::min(x_count - x, max_texels);
384
385
          // Unaligned load as rows are not always SIMD_WIDTH long
386
78.9k
          vfloat data_r(blk.data_r + idx);
387
78.9k
          vfloat data_g(blk.data_g + idx);
388
78.9k
          vfloat data_b(blk.data_b + idx);
389
78.9k
          vfloat data_a(blk.data_a + idx);
390
391
          // Clamp values to [0.0, 1.0] range before unorm conversion
392
          //   - Values > 1.0 are possible for all HDR blocks
393
          //   - Values < 0.0 are possible for HDR void-extent blocks
394
78.9k
          vint data_ri = float_to_int_rtn(clampzo(data_r) * 255.0f);
395
78.9k
          vint data_gi = float_to_int_rtn(clampzo(data_g) * 255.0f);
396
78.9k
          vint data_bi = float_to_int_rtn(clampzo(data_b) * 255.0f);
397
78.9k
          vint data_ai = float_to_int_rtn(clampzo(data_a) * 255.0f);
398
399
78.9k
          if (needs_swz)
400
0
          {
401
0
            vint swizzle_table[7];
402
0
            swizzle_table[ASTCENC_SWZ_0] = vint(0);
403
0
            swizzle_table[ASTCENC_SWZ_1] = vint(255);
404
0
            swizzle_table[ASTCENC_SWZ_R] = data_ri;
405
0
            swizzle_table[ASTCENC_SWZ_G] = data_gi;
406
0
            swizzle_table[ASTCENC_SWZ_B] = data_bi;
407
0
            swizzle_table[ASTCENC_SWZ_A] = data_ai;
408
409
0
            if (needs_z)
410
0
            {
411
0
              vfloat data_x = (data_r * vfloat(2.0f)) - vfloat(1.0f);
412
0
              vfloat data_y = (data_a * vfloat(2.0f)) - vfloat(1.0f);
413
0
              vfloat data_z = vfloat(1.0f) - (data_x * data_x) - (data_y * data_y);
414
0
              data_z = max(data_z, 0.0f);
415
0
              data_z = (sqrt(data_z) * vfloat(0.5f)) + vfloat(0.5f);
416
417
0
              swizzle_table[ASTCENC_SWZ_Z] = float_to_int_rtn(min(data_z, 1.0f) * 255.0f);
418
0
            }
419
420
0
            data_ri = swizzle_table[swz.r];
421
0
            data_gi = swizzle_table[swz.g];
422
0
            data_bi = swizzle_table[swz.b];
423
0
            data_ai = swizzle_table[swz.a];
424
0
          }
425
426
          // Errors are NaN encoded - convert to magenta error color
427
          // Branch is OK here - it is almost never true so predicts well
428
78.9k
          vmask nan_mask = data_r != data_r;
429
78.9k
          if (any(nan_mask))
430
19.8k
          {
431
19.8k
            data_ri = select(data_ri, vint(0xFF), nan_mask);
432
19.8k
            data_gi = select(data_gi, vint(0x00), nan_mask);
433
19.8k
            data_bi = select(data_bi, vint(0xFF), nan_mask);
434
19.8k
            data_ai = select(data_ai, vint(0xFF), nan_mask);
435
19.8k
          }
436
437
78.9k
          vint data_rgbai = interleave_rgba8(data_ri, data_gi, data_bi, data_ai);
438
78.9k
          vmask store_mask = vint::lane_id() < vint(used_texels);
439
78.9k
          store_lanes_masked(data8_row, data_rgbai, store_mask);
440
441
78.9k
          data8_row += ASTCENC_SIMD_WIDTH * 4;
442
78.9k
          idx += used_texels;
443
78.9k
        }
444
29.8k
        idx += x_nudge;
445
29.8k
      }
446
3.95k
      idx += y_nudge;
447
3.95k
    }
448
3.95k
  }
449
0
  else if (img.data_type == ASTCENC_TYPE_F16)
450
0
  {
451
0
    for (unsigned int z = z_start; z < z_end; z++)
452
0
    {
453
      // Fetch the image plane
454
0
      uint16_t* data16 = static_cast<uint16_t*>(img.data[z]);
455
456
0
      for (unsigned int y = y_start; y < y_end; y++)
457
0
      {
458
0
        uint16_t* data16_row = data16 + (4 * x_size * y) + (4 * x_start);
459
460
0
        for (unsigned int x = 0; x < x_count; x++)
461
0
        {
462
0
          vint4 color;
463
464
          // NaNs are handled inline - no need to special case
465
0
          if (needs_swz)
466
0
          {
467
0
            float data[7];
468
0
            data[ASTCENC_SWZ_0] = 0.0f;
469
0
            data[ASTCENC_SWZ_1] = 1.0f;
470
0
            data[ASTCENC_SWZ_R] = blk.data_r[idx];
471
0
            data[ASTCENC_SWZ_G] = blk.data_g[idx];
472
0
            data[ASTCENC_SWZ_B] = blk.data_b[idx];
473
0
            data[ASTCENC_SWZ_A] = blk.data_a[idx];
474
475
0
            if (needs_z)
476
0
            {
477
0
              float xN = (data[0] * 2.0f) - 1.0f;
478
0
              float yN = (data[3] * 2.0f) - 1.0f;
479
0
              float zN = 1.0f - xN * xN - yN * yN;
480
0
              if (zN < 0.0f)
481
0
              {
482
0
                zN = 0.0f;
483
0
              }
484
0
              data[ASTCENC_SWZ_Z] = (astc::sqrt(zN) * 0.5f) + 0.5f;
485
0
            }
486
487
0
            vfloat4 colorf(data[swz.r], data[swz.g], data[swz.b], data[swz.a]);
488
0
            color = float_to_float16(colorf);
489
0
          }
490
0
          else
491
0
          {
492
0
            vfloat4 colorf = blk.texel(idx);
493
0
            color = float_to_float16(colorf);
494
0
          }
495
496
          // TODO: Vectorize with store N shorts?
497
0
          data16_row[0] = static_cast<uint16_t>(color.lane<0>());
498
0
          data16_row[1] = static_cast<uint16_t>(color.lane<1>());
499
0
          data16_row[2] = static_cast<uint16_t>(color.lane<2>());
500
0
          data16_row[3] = static_cast<uint16_t>(color.lane<3>());
501
0
          data16_row += 4;
502
0
          idx++;
503
0
        }
504
0
        idx += x_nudge;
505
0
      }
506
0
      idx += y_nudge;
507
0
    }
508
0
  }
509
0
  else // if (img.data_type == ASTCENC_TYPE_F32)
510
0
  {
511
0
    assert(img.data_type == ASTCENC_TYPE_F32);
512
513
0
    for (unsigned int z = z_start; z < z_end; z++)
514
0
    {
515
      // Fetch the image plane
516
0
      float* data32 = static_cast<float*>(img.data[z]);
517
518
0
      for (unsigned int y = y_start; y < y_end; y++)
519
0
      {
520
0
        float* data32_row = data32 + (4 * x_size * y) + (4 * x_start);
521
522
0
        for (unsigned int x = 0; x < x_count; x++)
523
0
        {
524
0
          vfloat4 color = blk.texel(idx);
525
526
          // NaNs are handled inline - no need to special case
527
0
          if (needs_swz)
528
0
          {
529
0
            float data[7];
530
0
            data[ASTCENC_SWZ_0] = 0.0f;
531
0
            data[ASTCENC_SWZ_1] = 1.0f;
532
0
            data[ASTCENC_SWZ_R] = color.lane<0>();
533
0
            data[ASTCENC_SWZ_G] = color.lane<1>();
534
0
            data[ASTCENC_SWZ_B] = color.lane<2>();
535
0
            data[ASTCENC_SWZ_A] = color.lane<3>();
536
537
0
            if (needs_z)
538
0
            {
539
0
              float xN = (data[0] * 2.0f) - 1.0f;
540
0
              float yN = (data[3] * 2.0f) - 1.0f;
541
0
              float zN = 1.0f - xN * xN - yN * yN;
542
0
              if (zN < 0.0f)
543
0
              {
544
0
                zN = 0.0f;
545
0
              }
546
0
              data[ASTCENC_SWZ_Z] = (astc::sqrt(zN) * 0.5f) + 0.5f;
547
0
            }
548
549
0
            color = vfloat4(data[swz.r], data[swz.g], data[swz.b], data[swz.a]);
550
0
          }
551
552
0
          store(color, data32_row);
553
0
          data32_row += 4;
554
0
          idx++;
555
0
        }
556
0
        idx += x_nudge;
557
0
      }
558
0
      idx += y_nudge;
559
0
    }
560
0
  }
561
3.95k
}