Coverage Report

Created: 2026-04-01 06:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/gdal/gcore/rasterio.cpp
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Project:  GDAL Core
4
 * Purpose:  Contains default implementation of GDALRasterBand::IRasterIO()
5
 *           and supporting functions of broader utility.
6
 * Author:   Frank Warmerdam, warmerdam@pobox.com
7
 *
8
 ******************************************************************************
9
 * Copyright (c) 1998, Frank Warmerdam
10
 * Copyright (c) 2007-2014, Even Rouault <even dot rouault at spatialys.com>
11
 *
12
 * SPDX-License-Identifier: MIT
13
 ****************************************************************************/
14
15
#include "cpl_port.h"
16
#include "gdal.h"
17
#include "gdal_priv.h"
18
19
#include <cassert>
20
#include <climits>
21
#include <cmath>
22
#include <cstddef>
23
#include <cstdio>
24
#include <cstdlib>
25
#include <cstring>
26
27
#include <algorithm>
28
#include <limits>
29
#include <stdexcept>
30
#include <type_traits>
31
32
#include "cpl_conv.h"
33
#include "cpl_cpu_features.h"
34
#include "cpl_error.h"
35
#include "cpl_float.h"
36
#include "cpl_progress.h"
37
#include "cpl_string.h"
38
#include "cpl_vsi.h"
39
#include "gdal_priv_templates.hpp"
40
#include "gdal_vrt.h"
41
#include "gdalwarper.h"
42
#include "memdataset.h"
43
#include "vrtdataset.h"
44
45
#if defined(__x86_64) || defined(_M_X64)
46
#include <emmintrin.h>
47
#include <immintrin.h>
48
#define HAVE_SSE2
49
// AVX2 dispatch: compile AVX2 code with target attribute, detect at runtime
50
#if (defined(__GNUC__) || defined(__clang__)) &&                               \
51
    defined(HAVE_AVX2_AT_COMPILE_TIME)
52
#define HAVE_AVX2_DISPATCH
53
#elif defined(_MSC_VER)
54
#include <intrin.h>
55
#define HAVE_AVX2_DISPATCH
56
#endif
57
#elif defined(USE_NEON_OPTIMIZATIONS)
58
#include "include_sse2neon.h"
59
#define HAVE_SSE2
60
#endif
61
62
#ifdef HAVE_SSSE3_AT_COMPILE_TIME
63
#include "rasterio_ssse3.h"
64
#ifdef __SSSE3__
65
#include <tmmintrin.h>
66
#endif
67
#endif
68
69
#ifdef __SSE4_1__
70
#include <smmintrin.h>
71
#endif
72
73
#ifdef __GNUC__
74
#define CPL_NOINLINE __attribute__((noinline))
75
#else
76
#define CPL_NOINLINE
77
#endif
78
79
static void GDALFastCopyByte(const GByte *CPL_RESTRICT pSrcData,
80
                             int nSrcPixelStride, GByte *CPL_RESTRICT pDstData,
81
                             int nDstPixelStride, GPtrDiff_t nWordCount);
82
83
/************************************************************************/
84
/*                     DownsamplingIntegerXFactor()                     */
85
/************************************************************************/
86
87
template <bool bSameDataType, int DATA_TYPE_SIZE>
88
static bool DownsamplingIntegerXFactor(
89
    GDALRasterBand *poBand, int iSrcX, int nSrcXInc, GPtrDiff_t iSrcOffsetCst,
90
    GByte *CPL_RESTRICT pabyDstData, int nPixelSpace, int nBufXSize,
91
    GDALDataType eDataType, GDALDataType eBufType, int &nStartBlockX,
92
    int nBlockXSize, GDALRasterBlock *&poBlock, int nLBlockY)
93
0
{
94
0
    const int nBandDataSize =
95
0
        bSameDataType ? DATA_TYPE_SIZE : GDALGetDataTypeSizeBytes(eDataType);
96
0
    int nOuterLoopIters = nBufXSize - 1;
97
0
    const int nIncSrcOffset = nSrcXInc * nBandDataSize;
98
0
    const GByte *CPL_RESTRICT pabySrcData;
99
0
    int nEndBlockX = nBlockXSize + nStartBlockX;
100
101
0
    if (iSrcX < nEndBlockX)
102
0
    {
103
0
        CPLAssert(poBlock);
104
0
        goto no_reload_block;
105
0
    }
106
0
    goto reload_block;
107
108
    // Don't do the last iteration in the loop, as iSrcX might go beyond
109
    // nRasterXSize - 1
110
0
    while (--nOuterLoopIters >= 1)
111
0
    {
112
0
        iSrcX += nSrcXInc;
113
0
        pabySrcData += nIncSrcOffset;
114
0
        pabyDstData += nPixelSpace;
115
116
        /* --------------------------------------------------------------------
117
         */
118
        /*      Ensure we have the appropriate block loaded. */
119
        /* --------------------------------------------------------------------
120
         */
121
0
        if (iSrcX >= nEndBlockX)
122
0
        {
123
0
        reload_block:
124
0
        {
125
0
            const int nLBlockX = iSrcX / nBlockXSize;
126
0
            nStartBlockX = nLBlockX * nBlockXSize;
127
0
            nEndBlockX = nStartBlockX + nBlockXSize;
128
129
0
            if (poBlock != nullptr)
130
0
                poBlock->DropLock();
131
132
0
            poBlock = poBand->GetLockedBlockRef(nLBlockX, nLBlockY, FALSE);
133
0
            if (poBlock == nullptr)
134
0
            {
135
0
                return false;
136
0
            }
137
0
        }
138
139
0
        no_reload_block:
140
0
            const GByte *pabySrcBlock =
141
0
                static_cast<const GByte *>(poBlock->GetDataRef());
142
0
            GPtrDiff_t iSrcOffset =
143
0
                (iSrcX - nStartBlockX + iSrcOffsetCst) * nBandDataSize;
144
0
            pabySrcData = pabySrcBlock + iSrcOffset;
145
0
        }
146
147
        /* --------------------------------------------------------------------
148
         */
149
        /*      Copy the maximum run of pixels. */
150
        /* --------------------------------------------------------------------
151
         */
152
153
0
        const int nIters = std::min(
154
0
            (nEndBlockX - iSrcX + (nSrcXInc - 1)) / nSrcXInc, nOuterLoopIters);
155
0
        if (bSameDataType)
156
0
        {
157
0
            memcpy(pabyDstData, pabySrcData, nBandDataSize);
158
0
            if (nIters > 1)
159
0
            {
160
0
                if (DATA_TYPE_SIZE == 1)
161
0
                {
162
0
                    pabySrcData += nIncSrcOffset;
163
0
                    pabyDstData += nPixelSpace;
164
0
                    GDALFastCopyByte(pabySrcData, nIncSrcOffset, pabyDstData,
165
0
                                     nPixelSpace, nIters - 1);
166
0
                    pabySrcData +=
167
0
                        static_cast<GPtrDiff_t>(nIncSrcOffset) * (nIters - 2);
168
0
                    pabyDstData +=
169
0
                        static_cast<GPtrDiff_t>(nPixelSpace) * (nIters - 2);
170
0
                }
171
0
                else
172
0
                {
173
0
                    for (int i = 0; i < nIters - 1; i++)
174
0
                    {
175
0
                        pabySrcData += nIncSrcOffset;
176
0
                        pabyDstData += nPixelSpace;
177
0
                        memcpy(pabyDstData, pabySrcData, nBandDataSize);
178
0
                    }
179
0
                }
180
0
                iSrcX += nSrcXInc * (nIters - 1);
181
0
                nOuterLoopIters -= nIters - 1;
182
0
            }
183
0
        }
184
0
        else
185
0
        {
186
            // Type to type conversion ...
187
0
            GDALCopyWords64(pabySrcData, eDataType, nIncSrcOffset, pabyDstData,
188
0
                            eBufType, nPixelSpace, std::max(1, nIters));
189
0
            if (nIters > 1)
190
0
            {
191
0
                pabySrcData +=
192
0
                    static_cast<GPtrDiff_t>(nIncSrcOffset) * (nIters - 1);
193
0
                pabyDstData +=
194
0
                    static_cast<GPtrDiff_t>(nPixelSpace) * (nIters - 1);
195
0
                iSrcX += nSrcXInc * (nIters - 1);
196
0
                nOuterLoopIters -= nIters - 1;
197
0
            }
198
0
        }
199
0
    }
200
201
    // Deal with last iteration to avoid iSrcX to go beyond nRasterXSize - 1
202
0
    if (nOuterLoopIters == 0)
203
0
    {
204
0
        const int nRasterXSize = poBand->GetXSize();
205
0
        iSrcX =
206
0
            static_cast<int>(std::min(static_cast<GInt64>(iSrcX) + nSrcXInc,
207
0
                                      static_cast<GInt64>(nRasterXSize - 1)));
208
0
        pabyDstData += nPixelSpace;
209
0
        if (iSrcX < nEndBlockX)
210
0
        {
211
0
            goto no_reload_block;
212
0
        }
213
0
        goto reload_block;
214
0
    }
215
0
    return true;
216
0
}
Unexecuted instantiation: rasterio.cpp:bool DownsamplingIntegerXFactor<true, 1>(GDALRasterBand*, int, int, long long, unsigned char*, int, int, GDALDataType, GDALDataType, int&, int, GDALRasterBlock*&, int)
Unexecuted instantiation: rasterio.cpp:bool DownsamplingIntegerXFactor<true, 2>(GDALRasterBand*, int, int, long long, unsigned char*, int, int, GDALDataType, GDALDataType, int&, int, GDALRasterBlock*&, int)
Unexecuted instantiation: rasterio.cpp:bool DownsamplingIntegerXFactor<true, 4>(GDALRasterBand*, int, int, long long, unsigned char*, int, int, GDALDataType, GDALDataType, int&, int, GDALRasterBlock*&, int)
Unexecuted instantiation: rasterio.cpp:bool DownsamplingIntegerXFactor<true, 8>(GDALRasterBand*, int, int, long long, unsigned char*, int, int, GDALDataType, GDALDataType, int&, int, GDALRasterBlock*&, int)
Unexecuted instantiation: rasterio.cpp:bool DownsamplingIntegerXFactor<true, 16>(GDALRasterBand*, int, int, long long, unsigned char*, int, int, GDALDataType, GDALDataType, int&, int, GDALRasterBlock*&, int)
Unexecuted instantiation: rasterio.cpp:bool DownsamplingIntegerXFactor<false, 0>(GDALRasterBand*, int, int, long long, unsigned char*, int, int, GDALDataType, GDALDataType, int&, int, GDALRasterBlock*&, int)
217
218
template <class A, class B>
219
CPL_NOSANITIZE_UNSIGNED_INT_OVERFLOW inline auto CPLUnsanitizedMul(A a, B b)
220
0
{
221
0
    return a * b;
222
0
}
223
224
/************************************************************************/
225
/*                             IRasterIO()                              */
226
/*                                                                      */
227
/*      Default internal implementation of RasterIO() ... utilizes      */
228
/*      the Block access methods to satisfy the request.  This would    */
229
/*      normally only be overridden by formats with overviews.          */
230
/************************************************************************/
231
232
CPLErr GDALRasterBand::IRasterIO(GDALRWFlag eRWFlag, int nXOff, int nYOff,
233
                                 int nXSize, int nYSize, void *pData,
234
                                 int nBufXSize, int nBufYSize,
235
                                 GDALDataType eBufType, GSpacing nPixelSpace,
236
                                 GSpacing nLineSpace,
237
                                 GDALRasterIOExtraArg *psExtraArg)
238
239
0
{
240
0
    if (eRWFlag == GF_Write && eFlushBlockErr != CE_None)
241
0
    {
242
0
        CPLError(eFlushBlockErr, CPLE_AppDefined,
243
0
                 "An error occurred while writing a dirty block "
244
0
                 "from GDALRasterBand::IRasterIO");
245
0
        CPLErr eErr = eFlushBlockErr;
246
0
        eFlushBlockErr = CE_None;
247
0
        return eErr;
248
0
    }
249
0
    if (nBlockXSize <= 0 || nBlockYSize <= 0)
250
0
    {
251
0
        CPLError(CE_Failure, CPLE_AppDefined, "Invalid block size");
252
0
        return CE_Failure;
253
0
    }
254
255
0
    const int nBandDataSize = GDALGetDataTypeSizeBytes(eDataType);
256
0
    const int nBufDataSize = GDALGetDataTypeSizeBytes(eBufType);
257
0
    GByte dummyBlock[2] = {0, 0};
258
0
    GByte *pabySrcBlock =
259
0
        dummyBlock; /* to avoid Coverity warning about nullptr dereference */
260
0
    GDALRasterBlock *poBlock = nullptr;
261
0
    const bool bUseIntegerRequestCoords =
262
0
        (!psExtraArg->bFloatingPointWindowValidity ||
263
0
         (nXOff == psExtraArg->dfXOff && nYOff == psExtraArg->dfYOff &&
264
0
          nXSize == psExtraArg->dfXSize && nYSize == psExtraArg->dfYSize));
265
266
    /* ==================================================================== */
267
    /*      A common case is the data requested with the destination        */
268
    /*      is packed, and the block width is the raster width.             */
269
    /* ==================================================================== */
270
0
    if (nPixelSpace == nBufDataSize && nLineSpace == nPixelSpace * nXSize &&
271
0
        nBlockXSize == GetXSize() && nBufXSize == nXSize &&
272
0
        nBufYSize == nYSize && bUseIntegerRequestCoords)
273
0
    {
274
0
        CPLErr eErr = CE_None;
275
0
        int nLBlockY = -1;
276
277
0
        for (int iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff++)
278
0
        {
279
0
            const int iSrcY = iBufYOff + nYOff;
280
281
0
            if (iSrcY < nLBlockY * nBlockYSize ||
282
0
                iSrcY - nBlockYSize >= nLBlockY * nBlockYSize)
283
0
            {
284
0
                nLBlockY = iSrcY / nBlockYSize;
285
0
                bool bJustInitialize =
286
0
                    eRWFlag == GF_Write && nXOff == 0 &&
287
0
                    nXSize == nBlockXSize && nYOff <= nLBlockY * nBlockYSize &&
288
0
                    nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize;
289
290
                // Is this a partial tile at right and/or bottom edges of
291
                // the raster, and that is going to be completely written?
292
                // If so, do not load it from storage, but zero it so that
293
                // the content outsize of the validity area is initialized.
294
0
                bool bMemZeroBuffer = false;
295
0
                if (eRWFlag == GF_Write && !bJustInitialize && nXOff == 0 &&
296
0
                    nXSize == nBlockXSize && nYOff <= nLBlockY * nBlockYSize &&
297
0
                    nYOff + nYSize == GetYSize() &&
298
0
                    nLBlockY * nBlockYSize > GetYSize() - nBlockYSize)
299
0
                {
300
0
                    bJustInitialize = true;
301
0
                    bMemZeroBuffer = true;
302
0
                }
303
304
0
                if (poBlock)
305
0
                    poBlock->DropLock();
306
307
0
                const GUInt32 nErrorCounter = CPLGetErrorCounter();
308
0
                poBlock = GetLockedBlockRef(0, nLBlockY, bJustInitialize);
309
0
                if (poBlock == nullptr)
310
0
                {
311
0
                    if (strstr(CPLGetLastErrorMsg(), "IReadBlock failed") ==
312
0
                        nullptr)
313
0
                    {
314
0
                        CPLError(CE_Failure, CPLE_AppDefined,
315
0
                                 "GetBlockRef failed at X block offset %d, "
316
0
                                 "Y block offset %d%s",
317
0
                                 0, nLBlockY,
318
0
                                 (nErrorCounter != CPLGetErrorCounter())
319
0
                                     ? CPLSPrintf(": %s", CPLGetLastErrorMsg())
320
0
                                     : "");
321
0
                    }
322
0
                    eErr = CE_Failure;
323
0
                    break;
324
0
                }
325
326
0
                if (eRWFlag == GF_Write)
327
0
                    poBlock->MarkDirty();
328
329
0
                pabySrcBlock = static_cast<GByte *>(poBlock->GetDataRef());
330
0
                if (bMemZeroBuffer)
331
0
                {
332
0
                    memset(pabySrcBlock, 0,
333
0
                           static_cast<GPtrDiff_t>(nBandDataSize) *
334
0
                               nBlockXSize * nBlockYSize);
335
0
                }
336
0
            }
337
338
0
            const auto nSrcByteOffset =
339
0
                (static_cast<GPtrDiff_t>(iSrcY - nLBlockY * nBlockYSize) *
340
0
                     nBlockXSize +
341
0
                 nXOff) *
342
0
                nBandDataSize;
343
344
0
            if (eDataType == eBufType)
345
0
            {
346
0
                if (eRWFlag == GF_Read)
347
0
                    memcpy(static_cast<GByte *>(pData) +
348
0
                               static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace,
349
0
                           pabySrcBlock + nSrcByteOffset,
350
0
                           static_cast<size_t>(nLineSpace));
351
0
                else
352
0
                    memcpy(pabySrcBlock + nSrcByteOffset,
353
0
                           static_cast<GByte *>(pData) +
354
0
                               static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace,
355
0
                           static_cast<size_t>(nLineSpace));
356
0
            }
357
0
            else
358
0
            {
359
                // Type to type conversion.
360
0
                if (eRWFlag == GF_Read)
361
0
                    GDALCopyWords64(
362
0
                        pabySrcBlock + nSrcByteOffset, eDataType, nBandDataSize,
363
0
                        static_cast<GByte *>(pData) +
364
0
                            static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace,
365
0
                        eBufType, static_cast<int>(nPixelSpace), nBufXSize);
366
0
                else
367
0
                    GDALCopyWords64(static_cast<GByte *>(pData) +
368
0
                                        static_cast<GPtrDiff_t>(iBufYOff) *
369
0
                                            nLineSpace,
370
0
                                    eBufType, static_cast<int>(nPixelSpace),
371
0
                                    pabySrcBlock + nSrcByteOffset, eDataType,
372
0
                                    nBandDataSize, nBufXSize);
373
0
            }
374
375
0
            if (psExtraArg->pfnProgress != nullptr &&
376
0
                !psExtraArg->pfnProgress(1.0 * (iBufYOff + 1) / nBufYSize, "",
377
0
                                         psExtraArg->pProgressData))
378
0
            {
379
0
                eErr = CE_Failure;
380
0
                break;
381
0
            }
382
0
        }
383
384
0
        if (poBlock)
385
0
            poBlock->DropLock();
386
387
0
        return eErr;
388
0
    }
389
390
    /* ==================================================================== */
391
    /*      Do we have overviews that would be appropriate to satisfy       */
392
    /*      this request?                                                   */
393
    /* ==================================================================== */
394
0
    if ((nBufXSize < nXSize || nBufYSize < nYSize) && GetOverviewCount() > 0 &&
395
0
        eRWFlag == GF_Read)
396
0
    {
397
0
        GDALRasterIOExtraArg sExtraArg;
398
0
        GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
399
400
0
        const int nOverview =
401
0
            GDALBandGetBestOverviewLevel2(this, nXOff, nYOff, nXSize, nYSize,
402
0
                                          nBufXSize, nBufYSize, &sExtraArg);
403
0
        if (nOverview >= 0)
404
0
        {
405
0
            GDALRasterBand *poOverviewBand = GetOverview(nOverview);
406
0
            if (poOverviewBand == nullptr)
407
0
                return CE_Failure;
408
409
0
            return poOverviewBand->RasterIO(
410
0
                eRWFlag, nXOff, nYOff, nXSize, nYSize, pData, nBufXSize,
411
0
                nBufYSize, eBufType, nPixelSpace, nLineSpace, &sExtraArg);
412
0
        }
413
0
    }
414
415
0
    if (eRWFlag == GF_Read && nBufXSize < nXSize / 100 &&
416
0
        nBufYSize < nYSize / 100 && nPixelSpace == nBufDataSize &&
417
0
        nLineSpace == nPixelSpace * nBufXSize &&
418
0
        CPLTestBool(CPLGetConfigOption("GDAL_NO_COSTLY_OVERVIEW", "NO")))
419
0
    {
420
0
        memset(pData, 0, static_cast<size_t>(nLineSpace * nBufYSize));
421
0
        return CE_None;
422
0
    }
423
424
    /* ==================================================================== */
425
    /*      The second case when we don't need subsample data but likely    */
426
    /*      need data type conversion.                                      */
427
    /* ==================================================================== */
428
0
    if (  // nPixelSpace == nBufDataSize &&
429
0
        nXSize == nBufXSize && nYSize == nBufYSize && bUseIntegerRequestCoords)
430
0
    {
431
#if DEBUG_VERBOSE
432
        printf("IRasterIO(%d,%d,%d,%d) rw=%d case 2\n", /*ok*/
433
               nXOff, nYOff, nXSize, nYSize, static_cast<int>(eRWFlag));
434
#endif
435
436
        /* --------------------------------------------------------------------
437
         */
438
        /*      Loop over buffer computing source locations. */
439
        /* --------------------------------------------------------------------
440
         */
441
        // Calculate starting values out of loop
442
0
        const int nLBlockXStart = nXOff / nBlockXSize;
443
0
        const int nXSpanEnd = nBufXSize + nXOff;
444
445
0
        int nYInc = 0;
446
0
        for (int iBufYOff = 0, iSrcY = nYOff; iBufYOff < nBufYSize;
447
0
             iBufYOff += nYInc, iSrcY += nYInc)
448
0
        {
449
0
            GPtrDiff_t iBufOffset = static_cast<GPtrDiff_t>(iBufYOff) *
450
0
                                    static_cast<GPtrDiff_t>(nLineSpace);
451
0
            int nLBlockY = iSrcY / nBlockYSize;
452
0
            int nLBlockX = nLBlockXStart;
453
0
            int iSrcX = nXOff;
454
0
            while (iSrcX < nXSpanEnd)
455
0
            {
456
0
                int nXSpan = nLBlockX * nBlockXSize;
457
0
                if (nXSpan < INT_MAX - nBlockXSize)
458
0
                    nXSpan += nBlockXSize;
459
0
                else
460
0
                    nXSpan = INT_MAX;
461
0
                const int nXRight = nXSpan;
462
0
                nXSpan = (nXSpan < nXSpanEnd ? nXSpan : nXSpanEnd) - iSrcX;
463
464
0
                const size_t nXSpanSize =
465
0
                    CPLUnsanitizedMul(nXSpan, static_cast<size_t>(nPixelSpace));
466
467
0
                bool bJustInitialize =
468
0
                    eRWFlag == GF_Write && nYOff <= nLBlockY * nBlockYSize &&
469
0
                    nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize &&
470
0
                    nXOff <= nLBlockX * nBlockXSize &&
471
0
                    nXOff + nXSize >= nXRight;
472
473
                // Is this a partial tile at right and/or bottom edges of
474
                // the raster, and that is going to be completely written?
475
                // If so, do not load it from storage, but zero it so that
476
                // the content outsize of the validity area is initialized.
477
0
                bool bMemZeroBuffer = false;
478
0
                if (eRWFlag == GF_Write && !bJustInitialize &&
479
0
                    nXOff <= nLBlockX * nBlockXSize &&
480
0
                    nYOff <= nLBlockY * nBlockYSize &&
481
0
                    (nXOff + nXSize >= nXRight ||
482
                     // cppcheck-suppress knownConditionTrueFalse
483
0
                     (nXOff + nXSize == GetXSize() && nXRight > GetXSize())) &&
484
0
                    (nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize ||
485
0
                     (nYOff + nYSize == GetYSize() &&
486
0
                      nLBlockY * nBlockYSize > GetYSize() - nBlockYSize)))
487
0
                {
488
0
                    bJustInitialize = true;
489
0
                    bMemZeroBuffer = true;
490
0
                }
491
492
                /* --------------------------------------------------------------------
493
                 */
494
                /*      Ensure we have the appropriate block loaded. */
495
                /* --------------------------------------------------------------------
496
                 */
497
0
                const GUInt32 nErrorCounter = CPLGetErrorCounter();
498
0
                poBlock =
499
0
                    GetLockedBlockRef(nLBlockX, nLBlockY, bJustInitialize);
500
0
                if (!poBlock)
501
0
                {
502
0
                    if (strstr(CPLGetLastErrorMsg(), "IReadBlock failed") ==
503
0
                        nullptr)
504
0
                    {
505
0
                        CPLError(CE_Failure, CPLE_AppDefined,
506
0
                                 "GetBlockRef failed at X block offset %d, "
507
0
                                 "Y block offset %d%s",
508
0
                                 nLBlockX, nLBlockY,
509
0
                                 (nErrorCounter != CPLGetErrorCounter())
510
0
                                     ? CPLSPrintf(": %s", CPLGetLastErrorMsg())
511
0
                                     : "");
512
0
                    }
513
0
                    return (CE_Failure);
514
0
                }
515
516
0
                if (eRWFlag == GF_Write)
517
0
                    poBlock->MarkDirty();
518
519
0
                pabySrcBlock = static_cast<GByte *>(poBlock->GetDataRef());
520
0
                if (bMemZeroBuffer)
521
0
                {
522
0
                    memset(pabySrcBlock, 0,
523
0
                           static_cast<GPtrDiff_t>(nBandDataSize) *
524
0
                               nBlockXSize * nBlockYSize);
525
0
                }
526
                /* --------------------------------------------------------------------
527
                 */
528
                /*      Copy over this chunk of data. */
529
                /* --------------------------------------------------------------------
530
                 */
531
0
                GPtrDiff_t iSrcOffset =
532
0
                    (static_cast<GPtrDiff_t>(iSrcX) -
533
0
                     static_cast<GPtrDiff_t>(nLBlockX * nBlockXSize) +
534
0
                     (static_cast<GPtrDiff_t>(iSrcY) -
535
0
                      static_cast<GPtrDiff_t>(nLBlockY) * nBlockYSize) *
536
0
                         nBlockXSize) *
537
0
                    nBandDataSize;
538
                // Fill up as many rows as possible for the loaded block.
539
0
                const int kmax = std::min(nBlockYSize - (iSrcY % nBlockYSize),
540
0
                                          nBufYSize - iBufYOff);
541
0
                for (int k = 0; k < kmax; k++)
542
0
                {
543
0
                    if (eDataType == eBufType && nPixelSpace == nBufDataSize)
544
0
                    {
545
0
                        if (eRWFlag == GF_Read)
546
0
                            memcpy(static_cast<GByte *>(pData) + iBufOffset +
547
0
                                       static_cast<GPtrDiff_t>(k) * nLineSpace,
548
0
                                   pabySrcBlock + iSrcOffset, nXSpanSize);
549
0
                        else
550
0
                            memcpy(pabySrcBlock + iSrcOffset,
551
0
                                   static_cast<GByte *>(pData) + iBufOffset +
552
0
                                       static_cast<GPtrDiff_t>(k) * nLineSpace,
553
0
                                   nXSpanSize);
554
0
                    }
555
0
                    else
556
0
                    {
557
                        /* type to type conversion */
558
0
                        if (eRWFlag == GF_Read)
559
0
                            GDALCopyWords64(
560
0
                                pabySrcBlock + iSrcOffset, eDataType,
561
0
                                nBandDataSize,
562
0
                                static_cast<GByte *>(pData) + iBufOffset +
563
0
                                    static_cast<GPtrDiff_t>(k) * nLineSpace,
564
0
                                eBufType, static_cast<int>(nPixelSpace),
565
0
                                nXSpan);
566
0
                        else
567
0
                            GDALCopyWords64(
568
0
                                static_cast<GByte *>(pData) + iBufOffset +
569
0
                                    static_cast<GPtrDiff_t>(k) * nLineSpace,
570
0
                                eBufType, static_cast<int>(nPixelSpace),
571
0
                                pabySrcBlock + iSrcOffset, eDataType,
572
0
                                nBandDataSize, nXSpan);
573
0
                    }
574
575
0
                    iSrcOffset +=
576
0
                        static_cast<GPtrDiff_t>(nBlockXSize) * nBandDataSize;
577
0
                }
578
579
0
                iBufOffset =
580
0
                    CPLUnsanitizedAdd<GPtrDiff_t>(iBufOffset, nXSpanSize);
581
0
                nLBlockX++;
582
0
                iSrcX += nXSpan;
583
584
0
                poBlock->DropLock();
585
0
                poBlock = nullptr;
586
0
            }
587
588
            /* Compute the increment to go on a block boundary */
589
0
            nYInc = nBlockYSize - (iSrcY % nBlockYSize);
590
591
0
            if (psExtraArg->pfnProgress != nullptr &&
592
0
                !psExtraArg->pfnProgress(
593
0
                    1.0 * std::min(nBufYSize, iBufYOff + nYInc) / nBufYSize, "",
594
0
                    psExtraArg->pProgressData))
595
0
            {
596
0
                return CE_Failure;
597
0
            }
598
0
        }
599
600
0
        return CE_None;
601
0
    }
602
603
    /* ==================================================================== */
604
    /*      Loop reading required source blocks to satisfy output           */
605
    /*      request.  This is the most general implementation.              */
606
    /* ==================================================================== */
607
608
0
    double dfXOff = nXOff;
609
0
    double dfYOff = nYOff;
610
0
    double dfXSize = nXSize;
611
0
    double dfYSize = nYSize;
612
0
    if (psExtraArg->bFloatingPointWindowValidity)
613
0
    {
614
0
        dfXOff = psExtraArg->dfXOff;
615
0
        dfYOff = psExtraArg->dfYOff;
616
0
        dfXSize = psExtraArg->dfXSize;
617
0
        dfYSize = psExtraArg->dfYSize;
618
0
    }
619
620
    /* -------------------------------------------------------------------- */
621
    /*      Compute stepping increment.                                     */
622
    /* -------------------------------------------------------------------- */
623
0
    const double dfSrcXInc = dfXSize / static_cast<double>(nBufXSize);
624
0
    const double dfSrcYInc = dfYSize / static_cast<double>(nBufYSize);
625
0
    CPLErr eErr = CE_None;
626
627
0
    if (eRWFlag == GF_Write)
628
0
    {
629
        /* --------------------------------------------------------------------
630
         */
631
        /*    Write case */
632
        /*    Loop over raster window computing source locations in the buffer.
633
         */
634
        /* --------------------------------------------------------------------
635
         */
636
0
        GByte *pabyDstBlock = nullptr;
637
0
        int nLBlockX = -1;
638
0
        int nLBlockY = -1;
639
640
0
        for (int iDstY = nYOff; iDstY < nYOff + nYSize; iDstY++)
641
0
        {
642
0
            const int iBufYOff = static_cast<int>((iDstY - nYOff) / dfSrcYInc);
643
644
0
            for (int iDstX = nXOff; iDstX < nXOff + nXSize; iDstX++)
645
0
            {
646
0
                const int iBufXOff =
647
0
                    static_cast<int>((iDstX - nXOff) / dfSrcXInc);
648
0
                GPtrDiff_t iBufOffset =
649
0
                    static_cast<GPtrDiff_t>(iBufYOff) *
650
0
                        static_cast<GPtrDiff_t>(nLineSpace) +
651
0
                    iBufXOff * static_cast<GPtrDiff_t>(nPixelSpace);
652
653
                // FIXME: this code likely doesn't work if the dirty block gets
654
                // flushed to disk before being completely written.
655
                // In the meantime, bJustInitialize should probably be set to
656
                // FALSE even if it is not ideal performance wise, and for
657
                // lossy compression.
658
659
                /* --------------------------------------------------------------------
660
                 */
661
                /*      Ensure we have the appropriate block loaded. */
662
                /* --------------------------------------------------------------------
663
                 */
664
0
                if (iDstX < nLBlockX * nBlockXSize ||
665
0
                    iDstX - nBlockXSize >= nLBlockX * nBlockXSize ||
666
0
                    iDstY < nLBlockY * nBlockYSize ||
667
0
                    iDstY - nBlockYSize >= nLBlockY * nBlockYSize)
668
0
                {
669
0
                    nLBlockX = iDstX / nBlockXSize;
670
0
                    nLBlockY = iDstY / nBlockYSize;
671
672
0
                    const bool bJustInitialize =
673
0
                        nYOff <= nLBlockY * nBlockYSize &&
674
0
                        nYOff + nYSize - nBlockYSize >=
675
0
                            nLBlockY * nBlockYSize &&
676
0
                        nXOff <= nLBlockX * nBlockXSize &&
677
0
                        nXOff + nXSize - nBlockXSize >= nLBlockX * nBlockXSize;
678
                    /*bool bMemZeroBuffer = FALSE;
679
                    if( !bJustInitialize &&
680
                        nXOff <= nLBlockX * nBlockXSize &&
681
                        nYOff <= nLBlockY * nBlockYSize &&
682
                        (nXOff + nXSize >= (nLBlockX+1) * nBlockXSize ||
683
                         (nXOff + nXSize == GetXSize() &&
684
                         (nLBlockX+1) * nBlockXSize > GetXSize())) &&
685
                        (nYOff + nYSize >= (nLBlockY+1) * nBlockYSize ||
686
                         (nYOff + nYSize == GetYSize() &&
687
                         (nLBlockY+1) * nBlockYSize > GetYSize())) )
688
                    {
689
                        bJustInitialize = TRUE;
690
                        bMemZeroBuffer = TRUE;
691
                    }*/
692
0
                    if (poBlock != nullptr)
693
0
                        poBlock->DropLock();
694
695
0
                    poBlock =
696
0
                        GetLockedBlockRef(nLBlockX, nLBlockY, bJustInitialize);
697
0
                    if (poBlock == nullptr)
698
0
                    {
699
0
                        return (CE_Failure);
700
0
                    }
701
702
0
                    poBlock->MarkDirty();
703
704
0
                    pabyDstBlock = static_cast<GByte *>(poBlock->GetDataRef());
705
                    /*if( bMemZeroBuffer )
706
                    {
707
                        memset(pabyDstBlock, 0,
708
                            static_cast<GPtrDiff_t>(nBandDataSize) * nBlockXSize
709
                    * nBlockYSize);
710
                    }*/
711
0
                }
712
713
                // To make Coverity happy. Should not happen by design.
714
0
                if (pabyDstBlock == nullptr)
715
0
                {
716
0
                    CPLAssert(false);
717
0
                    eErr = CE_Failure;
718
0
                    break;
719
0
                }
720
721
                /* --------------------------------------------------------------------
722
                 */
723
                /*      Copy over this pixel of data. */
724
                /* --------------------------------------------------------------------
725
                 */
726
0
                GPtrDiff_t iDstOffset =
727
0
                    (static_cast<GPtrDiff_t>(iDstX) -
728
0
                     static_cast<GPtrDiff_t>(nLBlockX) * nBlockXSize +
729
0
                     (static_cast<GPtrDiff_t>(iDstY) -
730
0
                      static_cast<GPtrDiff_t>(nLBlockY) * nBlockYSize) *
731
0
                         nBlockXSize) *
732
0
                    nBandDataSize;
733
734
0
                if (eDataType == eBufType)
735
0
                {
736
0
                    memcpy(pabyDstBlock + iDstOffset,
737
0
                           static_cast<GByte *>(pData) + iBufOffset,
738
0
                           nBandDataSize);
739
0
                }
740
0
                else
741
0
                {
742
                    /* type to type conversion ... ouch, this is expensive way
743
                    of handling single words */
744
0
                    GDALCopyWords64(static_cast<GByte *>(pData) + iBufOffset,
745
0
                                    eBufType, 0, pabyDstBlock + iDstOffset,
746
0
                                    eDataType, 0, 1);
747
0
                }
748
0
            }
749
750
0
            if (psExtraArg->pfnProgress != nullptr &&
751
0
                !psExtraArg->pfnProgress(1.0 * (iDstY - nYOff + 1) / nYSize, "",
752
0
                                         psExtraArg->pProgressData))
753
0
            {
754
0
                eErr = CE_Failure;
755
0
                break;
756
0
            }
757
0
        }
758
0
    }
759
0
    else
760
0
    {
761
0
        if (psExtraArg->eResampleAlg != GRIORA_NearestNeighbour)
762
0
        {
763
0
            if ((psExtraArg->eResampleAlg == GRIORA_Cubic ||
764
0
                 psExtraArg->eResampleAlg == GRIORA_CubicSpline ||
765
0
                 psExtraArg->eResampleAlg == GRIORA_Bilinear ||
766
0
                 psExtraArg->eResampleAlg == GRIORA_Lanczos) &&
767
0
                GetColorTable() != nullptr)
768
0
            {
769
0
                CPLError(CE_Warning, CPLE_NotSupported,
770
0
                         "Resampling method not supported on paletted band. "
771
0
                         "Falling back to nearest neighbour");
772
0
            }
773
0
            else if (psExtraArg->eResampleAlg == GRIORA_Gauss &&
774
0
                     GDALDataTypeIsComplex(eDataType))
775
0
            {
776
0
                CPLError(CE_Warning, CPLE_NotSupported,
777
0
                         "Resampling method not supported on complex data type "
778
0
                         "band. Falling back to nearest neighbour");
779
0
            }
780
0
            else
781
0
            {
782
0
                return RasterIOResampled(eRWFlag, nXOff, nYOff, nXSize, nYSize,
783
0
                                         pData, nBufXSize, nBufYSize, eBufType,
784
0
                                         nPixelSpace, nLineSpace, psExtraArg);
785
0
            }
786
0
        }
787
788
0
        int nLimitBlockY = 0;
789
0
        const bool bByteCopy = eDataType == eBufType && nBandDataSize == 1;
790
0
        int nStartBlockX = -nBlockXSize;
791
0
        constexpr double EPS = 1e-10;
792
0
        int nLBlockY = -1;
793
0
        const double dfSrcXStart = 0.5 * dfSrcXInc + dfXOff + EPS;
794
0
        const bool bIntegerXFactor =
795
0
            bUseIntegerRequestCoords &&
796
0
            static_cast<int>(dfSrcXInc) == dfSrcXInc &&
797
0
            static_cast<int>(dfSrcXInc) < INT_MAX / nBandDataSize;
798
799
        /* --------------------------------------------------------------------
800
         */
801
        /*      Read case */
802
        /*      Loop over buffer computing source locations. */
803
        /* --------------------------------------------------------------------
804
         */
805
0
        for (int iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff++)
806
0
        {
807
            // Add small epsilon to avoid some numeric precision issues.
808
0
            const double dfSrcY = (iBufYOff + 0.5) * dfSrcYInc + dfYOff + EPS;
809
0
            const int iSrcY = static_cast<int>(std::min(
810
0
                std::max(0.0, dfSrcY), static_cast<double>(nRasterYSize - 1)));
811
812
0
            GPtrDiff_t iBufOffset = static_cast<GPtrDiff_t>(iBufYOff) *
813
0
                                    static_cast<GPtrDiff_t>(nLineSpace);
814
815
0
            if (iSrcY >= nLimitBlockY)
816
0
            {
817
0
                nLBlockY = iSrcY / nBlockYSize;
818
0
                nLimitBlockY = nLBlockY * nBlockYSize;
819
0
                if (nLimitBlockY < INT_MAX - nBlockYSize)
820
0
                    nLimitBlockY += nBlockYSize;
821
0
                else
822
0
                    nLimitBlockY = INT_MAX;
823
                // Make sure a new block is loaded.
824
0
                nStartBlockX = -nBlockXSize;
825
0
            }
826
0
            else if (static_cast<int>(dfSrcXStart) < nStartBlockX)
827
0
            {
828
                // Make sure a new block is loaded.
829
0
                nStartBlockX = -nBlockXSize;
830
0
            }
831
832
0
            GPtrDiff_t iSrcOffsetCst = (iSrcY - nLBlockY * nBlockYSize) *
833
0
                                       static_cast<GPtrDiff_t>(nBlockXSize);
834
835
0
            if (bIntegerXFactor)
836
0
            {
837
0
                int iSrcX = static_cast<int>(dfSrcXStart);
838
0
                const int nSrcXInc = static_cast<int>(dfSrcXInc);
839
0
                GByte *pabyDstData = static_cast<GByte *>(pData) + iBufOffset;
840
0
                bool bRet = false;
841
0
                if (bByteCopy)
842
0
                {
843
0
                    bRet = DownsamplingIntegerXFactor<true, 1>(
844
0
                        this, iSrcX, nSrcXInc, iSrcOffsetCst, pabyDstData,
845
0
                        static_cast<int>(nPixelSpace), nBufXSize, GDT_UInt8,
846
0
                        GDT_UInt8, nStartBlockX, nBlockXSize, poBlock,
847
0
                        nLBlockY);
848
0
                }
849
0
                else if (eDataType == eBufType)
850
0
                {
851
0
                    switch (nBandDataSize)
852
0
                    {
853
0
                        case 2:
854
0
                            bRet = DownsamplingIntegerXFactor<true, 2>(
855
0
                                this, iSrcX, nSrcXInc, iSrcOffsetCst,
856
0
                                pabyDstData, static_cast<int>(nPixelSpace),
857
0
                                nBufXSize, eDataType, eDataType, nStartBlockX,
858
0
                                nBlockXSize, poBlock, nLBlockY);
859
0
                            break;
860
0
                        case 4:
861
0
                            bRet = DownsamplingIntegerXFactor<true, 4>(
862
0
                                this, iSrcX, nSrcXInc, iSrcOffsetCst,
863
0
                                pabyDstData, static_cast<int>(nPixelSpace),
864
0
                                nBufXSize, eDataType, eDataType, nStartBlockX,
865
0
                                nBlockXSize, poBlock, nLBlockY);
866
0
                            break;
867
0
                        case 8:
868
0
                            bRet = DownsamplingIntegerXFactor<true, 8>(
869
0
                                this, iSrcX, nSrcXInc, iSrcOffsetCst,
870
0
                                pabyDstData, static_cast<int>(nPixelSpace),
871
0
                                nBufXSize, eDataType, eDataType, nStartBlockX,
872
0
                                nBlockXSize, poBlock, nLBlockY);
873
0
                            break;
874
0
                        case 16:
875
0
                            bRet = DownsamplingIntegerXFactor<true, 16>(
876
0
                                this, iSrcX, nSrcXInc, iSrcOffsetCst,
877
0
                                pabyDstData, static_cast<int>(nPixelSpace),
878
0
                                nBufXSize, eDataType, eDataType, nStartBlockX,
879
0
                                nBlockXSize, poBlock, nLBlockY);
880
0
                            break;
881
0
                        default:
882
0
                            CPLAssert(false);
883
0
                            break;
884
0
                    }
885
0
                }
886
0
                else
887
0
                {
888
0
                    bRet = DownsamplingIntegerXFactor<false, 0>(
889
0
                        this, iSrcX, nSrcXInc, iSrcOffsetCst, pabyDstData,
890
0
                        static_cast<int>(nPixelSpace), nBufXSize, eDataType,
891
0
                        eBufType, nStartBlockX, nBlockXSize, poBlock, nLBlockY);
892
0
                }
893
0
                if (!bRet)
894
0
                    eErr = CE_Failure;
895
0
            }
896
0
            else
897
0
            {
898
0
                double dfSrcX = dfSrcXStart;
899
0
                for (int iBufXOff = 0; iBufXOff < nBufXSize;
900
0
                     iBufXOff++, dfSrcX += dfSrcXInc)
901
0
                {
902
                    // TODO?: try to avoid the clamping for most iterations
903
0
                    const int iSrcX = static_cast<int>(
904
0
                        std::min(std::max(0.0, dfSrcX),
905
0
                                 static_cast<double>(nRasterXSize - 1)));
906
907
                    /* --------------------------------------------------------------------
908
                     */
909
                    /*      Ensure we have the appropriate block loaded. */
910
                    /* --------------------------------------------------------------------
911
                     */
912
0
                    if (iSrcX >= nBlockXSize + nStartBlockX)
913
0
                    {
914
0
                        const int nLBlockX = iSrcX / nBlockXSize;
915
0
                        nStartBlockX = nLBlockX * nBlockXSize;
916
917
0
                        if (poBlock != nullptr)
918
0
                            poBlock->DropLock();
919
920
0
                        poBlock = GetLockedBlockRef(nLBlockX, nLBlockY, FALSE);
921
0
                        if (poBlock == nullptr)
922
0
                        {
923
0
                            eErr = CE_Failure;
924
0
                            break;
925
0
                        }
926
927
0
                        pabySrcBlock =
928
0
                            static_cast<GByte *>(poBlock->GetDataRef());
929
0
                    }
930
0
                    const GPtrDiff_t nDiffX =
931
0
                        static_cast<GPtrDiff_t>(iSrcX - nStartBlockX);
932
933
                    /* --------------------------------------------------------------------
934
                     */
935
                    /*      Copy over this pixel of data. */
936
                    /* --------------------------------------------------------------------
937
                     */
938
939
0
                    if (bByteCopy)
940
0
                    {
941
0
                        GPtrDiff_t iSrcOffset = nDiffX + iSrcOffsetCst;
942
0
                        static_cast<GByte *>(pData)[iBufOffset] =
943
0
                            pabySrcBlock[iSrcOffset];
944
0
                    }
945
0
                    else if (eDataType == eBufType)
946
0
                    {
947
0
                        GPtrDiff_t iSrcOffset =
948
0
                            (nDiffX + iSrcOffsetCst) * nBandDataSize;
949
0
                        memcpy(static_cast<GByte *>(pData) + iBufOffset,
950
0
                               pabySrcBlock + iSrcOffset, nBandDataSize);
951
0
                    }
952
0
                    else
953
0
                    {
954
                        // Type to type conversion ...
955
0
                        GPtrDiff_t iSrcOffset =
956
0
                            (nDiffX + iSrcOffsetCst) * nBandDataSize;
957
0
                        GDALCopyWords64(pabySrcBlock + iSrcOffset, eDataType, 0,
958
0
                                        static_cast<GByte *>(pData) +
959
0
                                            iBufOffset,
960
0
                                        eBufType, 0, 1);
961
0
                    }
962
963
0
                    iBufOffset += static_cast<int>(nPixelSpace);
964
0
                }
965
0
            }
966
0
            if (eErr == CE_Failure)
967
0
                break;
968
969
0
            if (psExtraArg->pfnProgress != nullptr &&
970
0
                !psExtraArg->pfnProgress(1.0 * (iBufYOff + 1) / nBufYSize, "",
971
0
                                         psExtraArg->pProgressData))
972
0
            {
973
0
                eErr = CE_Failure;
974
0
                break;
975
0
            }
976
0
        }
977
0
    }
978
979
0
    if (poBlock != nullptr)
980
0
        poBlock->DropLock();
981
982
0
    return eErr;
983
0
}
984
985
/************************************************************************/
986
/*                      GDALRasterIOTransformer()                       */
987
/************************************************************************/
988
989
struct GDALRasterIOTransformerStruct
990
{
991
    double dfXOff;
992
    double dfYOff;
993
    double dfXRatioDstToSrc;
994
    double dfYRatioDstToSrc;
995
};
996
997
static int GDALRasterIOTransformer(void *pTransformerArg, int bDstToSrc,
998
                                   int nPointCount, double *x, double *y,
999
                                   double * /* z */, int *panSuccess)
1000
0
{
1001
0
    GDALRasterIOTransformerStruct *psParams =
1002
0
        static_cast<GDALRasterIOTransformerStruct *>(pTransformerArg);
1003
0
    if (bDstToSrc)
1004
0
    {
1005
0
        for (int i = 0; i < nPointCount; i++)
1006
0
        {
1007
0
            x[i] = x[i] * psParams->dfXRatioDstToSrc + psParams->dfXOff;
1008
0
            y[i] = y[i] * psParams->dfYRatioDstToSrc + psParams->dfYOff;
1009
0
            panSuccess[i] = TRUE;
1010
0
        }
1011
0
    }
1012
0
    else
1013
0
    {
1014
0
        for (int i = 0; i < nPointCount; i++)
1015
0
        {
1016
0
            x[i] = (x[i] - psParams->dfXOff) / psParams->dfXRatioDstToSrc;
1017
0
            y[i] = (y[i] - psParams->dfYOff) / psParams->dfYRatioDstToSrc;
1018
0
            panSuccess[i] = TRUE;
1019
0
        }
1020
0
    }
1021
0
    return TRUE;
1022
0
}
1023
1024
/************************************************************************/
1025
/*                         RasterIOResampled()                          */
1026
/************************************************************************/
1027
1028
//! @cond Doxygen_Suppress
1029
CPLErr GDALRasterBand::RasterIOResampled(
1030
    GDALRWFlag /* eRWFlag */, int nXOff, int nYOff, int nXSize, int nYSize,
1031
    void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
1032
    GSpacing nPixelSpace, GSpacing nLineSpace, GDALRasterIOExtraArg *psExtraArg)
1033
0
{
1034
    // Determine if we use warping resampling or overview resampling
1035
0
    const bool bUseWarp =
1036
0
        (GDALDataTypeIsComplex(eDataType) &&
1037
0
         psExtraArg->eResampleAlg != GRIORA_NearestNeighbour &&
1038
0
         psExtraArg->eResampleAlg != GRIORA_Mode);
1039
1040
0
    double dfXOff = nXOff;
1041
0
    double dfYOff = nYOff;
1042
0
    double dfXSize = nXSize;
1043
0
    double dfYSize = nYSize;
1044
0
    if (psExtraArg->bFloatingPointWindowValidity)
1045
0
    {
1046
0
        dfXOff = psExtraArg->dfXOff;
1047
0
        dfYOff = psExtraArg->dfYOff;
1048
0
        dfXSize = psExtraArg->dfXSize;
1049
0
        dfYSize = psExtraArg->dfYSize;
1050
0
    }
1051
1052
0
    const double dfXRatioDstToSrc = dfXSize / nBufXSize;
1053
0
    const double dfYRatioDstToSrc = dfYSize / nBufYSize;
1054
1055
    // Determine the coordinates in the "virtual" output raster to see
1056
    // if there are not integers, in which case we will use them as a shift
1057
    // so that subwindow extracts give the exact same results as entire raster
1058
    // scaling.
1059
0
    double dfDestXOff = dfXOff / dfXRatioDstToSrc;
1060
0
    bool bHasXOffVirtual = false;
1061
0
    int nDestXOffVirtual = 0;
1062
0
    if (fabs(dfDestXOff - static_cast<int>(dfDestXOff + 0.5)) < 1e-8)
1063
0
    {
1064
0
        bHasXOffVirtual = true;
1065
0
        dfXOff = nXOff;
1066
0
        nDestXOffVirtual = static_cast<int>(dfDestXOff + 0.5);
1067
0
    }
1068
1069
0
    double dfDestYOff = dfYOff / dfYRatioDstToSrc;
1070
0
    bool bHasYOffVirtual = false;
1071
0
    int nDestYOffVirtual = 0;
1072
0
    if (fabs(dfDestYOff - static_cast<int>(dfDestYOff + 0.5)) < 1e-8)
1073
0
    {
1074
0
        bHasYOffVirtual = true;
1075
0
        dfYOff = nYOff;
1076
0
        nDestYOffVirtual = static_cast<int>(dfDestYOff + 0.5);
1077
0
    }
1078
1079
    // Create a MEM dataset that wraps the output buffer.
1080
0
    GDALDataset *poMEMDS;
1081
0
    void *pTempBuffer = nullptr;
1082
0
    GSpacing nPSMem = nPixelSpace;
1083
0
    GSpacing nLSMem = nLineSpace;
1084
0
    void *pDataMem = pData;
1085
0
    GDALDataType eDTMem = eBufType;
1086
0
    if (eBufType != eDataType && !GDAL_GET_OPERATE_IN_BUF_TYPE(*psExtraArg))
1087
0
    {
1088
0
        nPSMem = GDALGetDataTypeSizeBytes(eDataType);
1089
0
        nLSMem = nPSMem * nBufXSize;
1090
0
        pTempBuffer =
1091
0
            VSI_MALLOC2_VERBOSE(nBufYSize, static_cast<size_t>(nLSMem));
1092
0
        if (pTempBuffer == nullptr)
1093
0
            return CE_Failure;
1094
0
        pDataMem = pTempBuffer;
1095
0
        eDTMem = eDataType;
1096
0
    }
1097
1098
0
    poMEMDS =
1099
0
        MEMDataset::Create("", nDestXOffVirtual + nBufXSize,
1100
0
                           nDestYOffVirtual + nBufYSize, 0, eDTMem, nullptr);
1101
0
    GByte *pabyData = static_cast<GByte *>(pDataMem) -
1102
0
                      nPSMem * nDestXOffVirtual - nLSMem * nDestYOffVirtual;
1103
0
    GDALRasterBandH hMEMBand = MEMCreateRasterBandEx(
1104
0
        poMEMDS, 1, pabyData, eDTMem, nPSMem, nLSMem, false);
1105
0
    poMEMDS->SetBand(1, GDALRasterBand::FromHandle(hMEMBand));
1106
1107
0
    const char *pszNBITS = GetMetadataItem("NBITS", "IMAGE_STRUCTURE");
1108
0
    const int nNBITS = pszNBITS ? atoi(pszNBITS) : 0;
1109
0
    if (pszNBITS)
1110
0
        GDALRasterBand::FromHandle(hMEMBand)->SetMetadataItem(
1111
0
            "NBITS", pszNBITS, "IMAGE_STRUCTURE");
1112
1113
0
    CPLErr eErr = CE_None;
1114
1115
    // Do the resampling.
1116
0
    if (bUseWarp)
1117
0
    {
1118
0
        int bHasNoData = FALSE;
1119
0
        double dfNoDataValue = GetNoDataValue(&bHasNoData);
1120
1121
0
        VRTDatasetH hVRTDS = nullptr;
1122
0
        GDALRasterBandH hVRTBand = nullptr;
1123
0
        if (GetDataset() == nullptr)
1124
0
        {
1125
            /* Create VRT dataset that wraps the whole dataset */
1126
0
            hVRTDS = VRTCreate(nRasterXSize, nRasterYSize);
1127
0
            VRTAddBand(hVRTDS, eDataType, nullptr);
1128
0
            hVRTBand = GDALGetRasterBand(hVRTDS, 1);
1129
0
            VRTAddSimpleSource(hVRTBand, this, 0, 0, nRasterXSize, nRasterYSize,
1130
0
                               0, 0, nRasterXSize, nRasterYSize, nullptr,
1131
0
                               VRT_NODATA_UNSET);
1132
1133
            /* Add a mask band if needed */
1134
0
            if (GetMaskFlags() != GMF_ALL_VALID)
1135
0
            {
1136
0
                GDALDataset::FromHandle(hVRTDS)->CreateMaskBand(0);
1137
0
                VRTSourcedRasterBand *poVRTMaskBand =
1138
0
                    reinterpret_cast<VRTSourcedRasterBand *>(
1139
0
                        reinterpret_cast<GDALRasterBand *>(hVRTBand)
1140
0
                            ->GetMaskBand());
1141
0
                poVRTMaskBand->AddMaskBandSource(this, 0, 0, nRasterXSize,
1142
0
                                                 nRasterYSize, 0, 0,
1143
0
                                                 nRasterXSize, nRasterYSize);
1144
0
            }
1145
0
        }
1146
1147
0
        GDALWarpOptions *psWarpOptions = GDALCreateWarpOptions();
1148
0
        switch (psExtraArg->eResampleAlg)
1149
0
        {
1150
0
            case GRIORA_NearestNeighbour:
1151
0
                psWarpOptions->eResampleAlg = GRA_NearestNeighbour;
1152
0
                break;
1153
0
            case GRIORA_Bilinear:
1154
0
                psWarpOptions->eResampleAlg = GRA_Bilinear;
1155
0
                break;
1156
0
            case GRIORA_Cubic:
1157
0
                psWarpOptions->eResampleAlg = GRA_Cubic;
1158
0
                break;
1159
0
            case GRIORA_CubicSpline:
1160
0
                psWarpOptions->eResampleAlg = GRA_CubicSpline;
1161
0
                break;
1162
0
            case GRIORA_Lanczos:
1163
0
                psWarpOptions->eResampleAlg = GRA_Lanczos;
1164
0
                break;
1165
0
            case GRIORA_Average:
1166
0
                psWarpOptions->eResampleAlg = GRA_Average;
1167
0
                break;
1168
0
            case GRIORA_RMS:
1169
0
                psWarpOptions->eResampleAlg = GRA_RMS;
1170
0
                break;
1171
0
            case GRIORA_Mode:
1172
0
                psWarpOptions->eResampleAlg = GRA_Mode;
1173
0
                break;
1174
0
            default:
1175
0
                CPLAssert(false);
1176
0
                psWarpOptions->eResampleAlg = GRA_NearestNeighbour;
1177
0
                break;
1178
0
        }
1179
0
        psWarpOptions->hSrcDS = hVRTDS ? hVRTDS : GetDataset();
1180
0
        psWarpOptions->hDstDS = poMEMDS;
1181
0
        psWarpOptions->nBandCount = 1;
1182
0
        int nSrcBandNumber = hVRTDS ? 1 : nBand;
1183
0
        int nDstBandNumber = 1;
1184
0
        psWarpOptions->panSrcBands = &nSrcBandNumber;
1185
0
        psWarpOptions->panDstBands = &nDstBandNumber;
1186
0
        psWarpOptions->pfnProgress = psExtraArg->pfnProgress
1187
0
                                         ? psExtraArg->pfnProgress
1188
0
                                         : GDALDummyProgress;
1189
0
        psWarpOptions->pProgressArg = psExtraArg->pProgressData;
1190
0
        psWarpOptions->pfnTransformer = GDALRasterIOTransformer;
1191
0
        if (bHasNoData)
1192
0
        {
1193
0
            psWarpOptions->papszWarpOptions = CSLSetNameValue(
1194
0
                psWarpOptions->papszWarpOptions, "INIT_DEST", "NO_DATA");
1195
0
            if (psWarpOptions->padfSrcNoDataReal == nullptr)
1196
0
            {
1197
0
                psWarpOptions->padfSrcNoDataReal =
1198
0
                    static_cast<double *>(CPLMalloc(sizeof(double)));
1199
0
                psWarpOptions->padfSrcNoDataReal[0] = dfNoDataValue;
1200
0
            }
1201
1202
0
            if (psWarpOptions->padfDstNoDataReal == nullptr)
1203
0
            {
1204
0
                psWarpOptions->padfDstNoDataReal =
1205
0
                    static_cast<double *>(CPLMalloc(sizeof(double)));
1206
0
                psWarpOptions->padfDstNoDataReal[0] = dfNoDataValue;
1207
0
            }
1208
0
        }
1209
1210
0
        GDALRasterIOTransformerStruct sTransformer;
1211
0
        sTransformer.dfXOff = bHasXOffVirtual ? 0 : dfXOff;
1212
0
        sTransformer.dfYOff = bHasYOffVirtual ? 0 : dfYOff;
1213
0
        sTransformer.dfXRatioDstToSrc = dfXRatioDstToSrc;
1214
0
        sTransformer.dfYRatioDstToSrc = dfYRatioDstToSrc;
1215
0
        psWarpOptions->pTransformerArg = &sTransformer;
1216
1217
0
        GDALWarpOperationH hWarpOperation =
1218
0
            GDALCreateWarpOperation(psWarpOptions);
1219
0
        eErr = GDALChunkAndWarpImage(hWarpOperation, nDestXOffVirtual,
1220
0
                                     nDestYOffVirtual, nBufXSize, nBufYSize);
1221
0
        GDALDestroyWarpOperation(hWarpOperation);
1222
1223
0
        psWarpOptions->panSrcBands = nullptr;
1224
0
        psWarpOptions->panDstBands = nullptr;
1225
0
        GDALDestroyWarpOptions(psWarpOptions);
1226
1227
0
        if (hVRTDS)
1228
0
            GDALClose(hVRTDS);
1229
0
    }
1230
0
    else
1231
0
    {
1232
0
        const char *pszResampling =
1233
0
            GDALRasterIOGetResampleAlg(psExtraArg->eResampleAlg);
1234
0
        int nKernelRadius = 0;
1235
0
        GDALResampleFunction pfnResampleFunc =
1236
0
            GDALGetResampleFunction(pszResampling, &nKernelRadius);
1237
0
        CPLAssert(pfnResampleFunc);
1238
0
        GDALDataType eWrkDataType =
1239
0
            GDALGetOvrWorkDataType(pszResampling, eDataType);
1240
0
        int nHasNoData = 0;
1241
0
        double dfNoDataValue = GetNoDataValue(&nHasNoData);
1242
0
        const bool bHasNoData = CPL_TO_BOOL(nHasNoData);
1243
0
        if (!bHasNoData)
1244
0
            dfNoDataValue = 0.0;
1245
1246
0
        int nDstBlockXSize = nBufXSize;
1247
0
        int nDstBlockYSize = nBufYSize;
1248
0
        int nFullResXChunk = 0;
1249
0
        int nFullResYChunk = 0;
1250
0
        while (true)
1251
0
        {
1252
0
            nFullResXChunk =
1253
0
                3 + static_cast<int>(nDstBlockXSize * dfXRatioDstToSrc);
1254
0
            nFullResYChunk =
1255
0
                3 + static_cast<int>(nDstBlockYSize * dfYRatioDstToSrc);
1256
0
            if (nFullResXChunk > nRasterXSize)
1257
0
                nFullResXChunk = nRasterXSize;
1258
0
            if (nFullResYChunk > nRasterYSize)
1259
0
                nFullResYChunk = nRasterYSize;
1260
0
            if ((nDstBlockXSize == 1 && nDstBlockYSize == 1) ||
1261
0
                (static_cast<GIntBig>(nFullResXChunk) * nFullResYChunk <=
1262
0
                 1024 * 1024))
1263
0
                break;
1264
            // When operating on the full width of a raster whose block width is
1265
            // the raster width, prefer doing chunks in height.
1266
0
            if (nFullResXChunk >= nXSize && nXSize == nBlockXSize &&
1267
0
                nDstBlockYSize > 1)
1268
0
                nDstBlockYSize /= 2;
1269
            /* Otherwise cut the maximal dimension */
1270
0
            else if (nDstBlockXSize > 1 &&
1271
0
                     (nFullResXChunk > nFullResYChunk || nDstBlockYSize == 1))
1272
0
                nDstBlockXSize /= 2;
1273
0
            else
1274
0
                nDstBlockYSize /= 2;
1275
0
        }
1276
1277
0
        int nOvrXFactor = static_cast<int>(0.5 + dfXRatioDstToSrc);
1278
0
        int nOvrYFactor = static_cast<int>(0.5 + dfYRatioDstToSrc);
1279
0
        if (nOvrXFactor == 0)
1280
0
            nOvrXFactor = 1;
1281
0
        if (nOvrYFactor == 0)
1282
0
            nOvrYFactor = 1;
1283
0
        int nFullResXSizeQueried =
1284
0
            nFullResXChunk + 2 * nKernelRadius * nOvrXFactor;
1285
0
        int nFullResYSizeQueried =
1286
0
            nFullResYChunk + 2 * nKernelRadius * nOvrYFactor;
1287
1288
0
        if (nFullResXSizeQueried > nRasterXSize)
1289
0
            nFullResXSizeQueried = nRasterXSize;
1290
0
        if (nFullResYSizeQueried > nRasterYSize)
1291
0
            nFullResYSizeQueried = nRasterYSize;
1292
1293
0
        void *pChunk =
1294
0
            VSI_MALLOC3_VERBOSE(GDALGetDataTypeSizeBytes(eWrkDataType),
1295
0
                                nFullResXSizeQueried, nFullResYSizeQueried);
1296
0
        GByte *pabyChunkNoDataMask = nullptr;
1297
1298
0
        GDALRasterBand *poMaskBand = GetMaskBand();
1299
0
        int l_nMaskFlags = GetMaskFlags();
1300
1301
0
        bool bUseNoDataMask = ((l_nMaskFlags & GMF_ALL_VALID) == 0);
1302
0
        if (bUseNoDataMask)
1303
0
        {
1304
0
            pabyChunkNoDataMask = static_cast<GByte *>(VSI_MALLOC2_VERBOSE(
1305
0
                nFullResXSizeQueried, nFullResYSizeQueried));
1306
0
        }
1307
0
        if (pChunk == nullptr ||
1308
0
            (bUseNoDataMask && pabyChunkNoDataMask == nullptr))
1309
0
        {
1310
0
            GDALClose(poMEMDS);
1311
0
            CPLFree(pChunk);
1312
0
            CPLFree(pabyChunkNoDataMask);
1313
0
            VSIFree(pTempBuffer);
1314
0
            return CE_Failure;
1315
0
        }
1316
1317
0
        const int nTotalBlocks = DIV_ROUND_UP(nBufXSize, nDstBlockXSize) *
1318
0
                                 DIV_ROUND_UP(nBufYSize, nDstBlockYSize);
1319
0
        int nBlocksDone = 0;
1320
1321
0
        int nDstYOff;
1322
0
        for (nDstYOff = 0; nDstYOff < nBufYSize && eErr == CE_None;
1323
0
             nDstYOff += nDstBlockYSize)
1324
0
        {
1325
0
            int nDstYCount;
1326
0
            if (nDstYOff + nDstBlockYSize <= nBufYSize)
1327
0
                nDstYCount = nDstBlockYSize;
1328
0
            else
1329
0
                nDstYCount = nBufYSize - nDstYOff;
1330
1331
0
            int nChunkYOff =
1332
0
                nYOff + static_cast<int>(nDstYOff * dfYRatioDstToSrc);
1333
0
            int nChunkYOff2 = nYOff + 1 +
1334
0
                              static_cast<int>(ceil((nDstYOff + nDstYCount) *
1335
0
                                                    dfYRatioDstToSrc));
1336
0
            if (nChunkYOff2 > nRasterYSize)
1337
0
                nChunkYOff2 = nRasterYSize;
1338
0
            int nYCount = nChunkYOff2 - nChunkYOff;
1339
0
            CPLAssert(nYCount <= nFullResYChunk);
1340
1341
0
            int nChunkYOffQueried = nChunkYOff - nKernelRadius * nOvrYFactor;
1342
0
            int nChunkYSizeQueried = nYCount + 2 * nKernelRadius * nOvrYFactor;
1343
0
            if (nChunkYOffQueried < 0)
1344
0
            {
1345
0
                nChunkYSizeQueried += nChunkYOffQueried;
1346
0
                nChunkYOffQueried = 0;
1347
0
            }
1348
0
            if (nChunkYSizeQueried + nChunkYOffQueried > nRasterYSize)
1349
0
                nChunkYSizeQueried = nRasterYSize - nChunkYOffQueried;
1350
0
            CPLAssert(nChunkYSizeQueried <= nFullResYSizeQueried);
1351
1352
0
            int nDstXOff = 0;
1353
0
            for (nDstXOff = 0; nDstXOff < nBufXSize && eErr == CE_None;
1354
0
                 nDstXOff += nDstBlockXSize)
1355
0
            {
1356
0
                int nDstXCount = 0;
1357
0
                if (nDstXOff + nDstBlockXSize <= nBufXSize)
1358
0
                    nDstXCount = nDstBlockXSize;
1359
0
                else
1360
0
                    nDstXCount = nBufXSize - nDstXOff;
1361
1362
0
                int nChunkXOff =
1363
0
                    nXOff + static_cast<int>(nDstXOff * dfXRatioDstToSrc);
1364
0
                int nChunkXOff2 =
1365
0
                    nXOff + 1 +
1366
0
                    static_cast<int>(
1367
0
                        ceil((nDstXOff + nDstXCount) * dfXRatioDstToSrc));
1368
0
                if (nChunkXOff2 > nRasterXSize)
1369
0
                    nChunkXOff2 = nRasterXSize;
1370
0
                int nXCount = nChunkXOff2 - nChunkXOff;
1371
0
                CPLAssert(nXCount <= nFullResXChunk);
1372
1373
0
                int nChunkXOffQueried =
1374
0
                    nChunkXOff - nKernelRadius * nOvrXFactor;
1375
0
                int nChunkXSizeQueried =
1376
0
                    nXCount + 2 * nKernelRadius * nOvrXFactor;
1377
0
                if (nChunkXOffQueried < 0)
1378
0
                {
1379
0
                    nChunkXSizeQueried += nChunkXOffQueried;
1380
0
                    nChunkXOffQueried = 0;
1381
0
                }
1382
0
                if (nChunkXSizeQueried + nChunkXOffQueried > nRasterXSize)
1383
0
                    nChunkXSizeQueried = nRasterXSize - nChunkXOffQueried;
1384
0
                CPLAssert(nChunkXSizeQueried <= nFullResXSizeQueried);
1385
1386
                // Read the source buffers.
1387
0
                eErr = RasterIO(GF_Read, nChunkXOffQueried, nChunkYOffQueried,
1388
0
                                nChunkXSizeQueried, nChunkYSizeQueried, pChunk,
1389
0
                                nChunkXSizeQueried, nChunkYSizeQueried,
1390
0
                                eWrkDataType, 0, 0, nullptr);
1391
1392
0
                bool bSkipResample = false;
1393
0
                bool bNoDataMaskFullyOpaque = false;
1394
0
                if (eErr == CE_None && bUseNoDataMask)
1395
0
                {
1396
0
                    eErr = poMaskBand->RasterIO(
1397
0
                        GF_Read, nChunkXOffQueried, nChunkYOffQueried,
1398
0
                        nChunkXSizeQueried, nChunkYSizeQueried,
1399
0
                        pabyChunkNoDataMask, nChunkXSizeQueried,
1400
0
                        nChunkYSizeQueried, GDT_UInt8, 0, 0, nullptr);
1401
1402
                    /* Optimizations if mask if fully opaque or transparent */
1403
0
                    int nPixels = nChunkXSizeQueried * nChunkYSizeQueried;
1404
0
                    GByte bVal = pabyChunkNoDataMask[0];
1405
0
                    int i = 1;
1406
0
                    for (; i < nPixels; i++)
1407
0
                    {
1408
0
                        if (pabyChunkNoDataMask[i] != bVal)
1409
0
                            break;
1410
0
                    }
1411
0
                    if (i == nPixels)
1412
0
                    {
1413
0
                        if (bVal == 0)
1414
0
                        {
1415
0
                            for (int j = 0; j < nDstYCount; j++)
1416
0
                            {
1417
0
                                GDALCopyWords64(&dfNoDataValue, GDT_Float64, 0,
1418
0
                                                static_cast<GByte *>(pDataMem) +
1419
0
                                                    nLSMem * (j + nDstYOff) +
1420
0
                                                    nDstXOff * nPSMem,
1421
0
                                                eDTMem,
1422
0
                                                static_cast<int>(nPSMem),
1423
0
                                                nDstXCount);
1424
0
                            }
1425
0
                            bSkipResample = true;
1426
0
                        }
1427
0
                        else
1428
0
                        {
1429
0
                            bNoDataMaskFullyOpaque = true;
1430
0
                        }
1431
0
                    }
1432
0
                }
1433
1434
0
                if (!bSkipResample && eErr == CE_None)
1435
0
                {
1436
0
                    const bool bPropagateNoData = false;
1437
0
                    void *pDstBuffer = nullptr;
1438
0
                    GDALDataType eDstBufferDataType = GDT_Unknown;
1439
0
                    GDALRasterBand *poMEMBand =
1440
0
                        GDALRasterBand::FromHandle(hMEMBand);
1441
0
                    GDALOverviewResampleArgs args;
1442
0
                    args.eSrcDataType = eDataType;
1443
0
                    args.eOvrDataType = poMEMBand->GetRasterDataType();
1444
0
                    args.nOvrXSize = poMEMBand->GetXSize();
1445
0
                    args.nOvrYSize = poMEMBand->GetYSize();
1446
0
                    args.nOvrNBITS = nNBITS;
1447
0
                    args.dfXRatioDstToSrc = dfXRatioDstToSrc;
1448
0
                    args.dfYRatioDstToSrc = dfYRatioDstToSrc;
1449
0
                    args.dfSrcXDelta =
1450
0
                        dfXOff - nXOff; /* == 0 if bHasXOffVirtual */
1451
0
                    args.dfSrcYDelta =
1452
0
                        dfYOff - nYOff; /* == 0 if bHasYOffVirtual */
1453
0
                    args.eWrkDataType = eWrkDataType;
1454
0
                    args.pabyChunkNodataMask =
1455
0
                        bNoDataMaskFullyOpaque ? nullptr : pabyChunkNoDataMask;
1456
0
                    args.nChunkXOff =
1457
0
                        nChunkXOffQueried - (bHasXOffVirtual ? 0 : nXOff);
1458
0
                    args.nChunkXSize = nChunkXSizeQueried;
1459
0
                    args.nChunkYOff =
1460
0
                        nChunkYOffQueried - (bHasYOffVirtual ? 0 : nYOff);
1461
0
                    args.nChunkYSize = nChunkYSizeQueried;
1462
0
                    args.nDstXOff = nDstXOff + nDestXOffVirtual;
1463
0
                    args.nDstXOff2 = nDstXOff + nDestXOffVirtual + nDstXCount;
1464
0
                    args.nDstYOff = nDstYOff + nDestYOffVirtual;
1465
0
                    args.nDstYOff2 = nDstYOff + nDestYOffVirtual + nDstYCount;
1466
0
                    args.pszResampling = pszResampling;
1467
0
                    args.bHasNoData = bHasNoData;
1468
0
                    args.dfNoDataValue = dfNoDataValue;
1469
0
                    args.poColorTable = GetColorTable();
1470
0
                    args.bPropagateNoData = bPropagateNoData;
1471
0
                    eErr = pfnResampleFunc(args, pChunk, &pDstBuffer,
1472
0
                                           &eDstBufferDataType);
1473
0
                    if (eErr == CE_None)
1474
0
                    {
1475
0
                        eErr = poMEMBand->RasterIO(
1476
0
                            GF_Write, nDstXOff + nDestXOffVirtual,
1477
0
                            nDstYOff + nDestYOffVirtual, nDstXCount, nDstYCount,
1478
0
                            pDstBuffer, nDstXCount, nDstYCount,
1479
0
                            eDstBufferDataType, 0, 0, nullptr);
1480
0
                    }
1481
0
                    CPLFree(pDstBuffer);
1482
0
                }
1483
1484
0
                nBlocksDone++;
1485
0
                if (eErr == CE_None && psExtraArg->pfnProgress != nullptr &&
1486
0
                    !psExtraArg->pfnProgress(1.0 * nBlocksDone / nTotalBlocks,
1487
0
                                             "", psExtraArg->pProgressData))
1488
0
                {
1489
0
                    eErr = CE_Failure;
1490
0
                }
1491
0
            }
1492
0
        }
1493
1494
0
        CPLFree(pChunk);
1495
0
        CPLFree(pabyChunkNoDataMask);
1496
0
    }
1497
1498
0
    if (pTempBuffer)
1499
0
    {
1500
0
        CPL_IGNORE_RET_VAL(poMEMDS->GetRasterBand(1)->RasterIO(
1501
0
            GF_Read, nDestXOffVirtual, nDestYOffVirtual, nBufXSize, nBufYSize,
1502
0
            pData, nBufXSize, nBufYSize, eBufType, nPixelSpace, nLineSpace,
1503
0
            nullptr));
1504
0
    }
1505
0
    GDALClose(poMEMDS);
1506
0
    VSIFree(pTempBuffer);
1507
1508
0
    return eErr;
1509
0
}
1510
1511
/************************************************************************/
1512
/*                         RasterIOResampled()                          */
1513
/************************************************************************/
1514
1515
CPLErr GDALDataset::RasterIOResampled(
1516
    GDALRWFlag /* eRWFlag */, int nXOff, int nYOff, int nXSize, int nYSize,
1517
    void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
1518
    int nBandCount, const int *panBandMap, GSpacing nPixelSpace,
1519
    GSpacing nLineSpace, GSpacing nBandSpace, GDALRasterIOExtraArg *psExtraArg)
1520
1521
0
{
1522
#if 0
1523
    // Determine if we use warping resampling or overview resampling
1524
    bool bUseWarp = false;
1525
    if( GDALDataTypeIsComplex( eDataType ) )
1526
        bUseWarp = true;
1527
#endif
1528
1529
0
    double dfXOff = nXOff;
1530
0
    double dfYOff = nYOff;
1531
0
    double dfXSize = nXSize;
1532
0
    double dfYSize = nYSize;
1533
0
    if (psExtraArg->bFloatingPointWindowValidity)
1534
0
    {
1535
0
        dfXOff = psExtraArg->dfXOff;
1536
0
        dfYOff = psExtraArg->dfYOff;
1537
0
        dfXSize = psExtraArg->dfXSize;
1538
0
        dfYSize = psExtraArg->dfYSize;
1539
0
    }
1540
1541
0
    const double dfXRatioDstToSrc = dfXSize / nBufXSize;
1542
0
    const double dfYRatioDstToSrc = dfYSize / nBufYSize;
1543
1544
    // Determine the coordinates in the "virtual" output raster to see
1545
    // if there are not integers, in which case we will use them as a shift
1546
    // so that subwindow extracts give the exact same results as entire raster
1547
    // scaling.
1548
0
    double dfDestXOff = dfXOff / dfXRatioDstToSrc;
1549
0
    bool bHasXOffVirtual = false;
1550
0
    int nDestXOffVirtual = 0;
1551
0
    if (fabs(dfDestXOff - static_cast<int>(dfDestXOff + 0.5)) < 1e-8)
1552
0
    {
1553
0
        bHasXOffVirtual = true;
1554
0
        dfXOff = nXOff;
1555
0
        nDestXOffVirtual = static_cast<int>(dfDestXOff + 0.5);
1556
0
    }
1557
1558
0
    double dfDestYOff = dfYOff / dfYRatioDstToSrc;
1559
0
    bool bHasYOffVirtual = false;
1560
0
    int nDestYOffVirtual = 0;
1561
0
    if (fabs(dfDestYOff - static_cast<int>(dfDestYOff + 0.5)) < 1e-8)
1562
0
    {
1563
0
        bHasYOffVirtual = true;
1564
0
        dfYOff = nYOff;
1565
0
        nDestYOffVirtual = static_cast<int>(dfDestYOff + 0.5);
1566
0
    }
1567
1568
    // Create a MEM dataset that wraps the output buffer.
1569
0
    std::unique_ptr<void, VSIFreeReleaser> pTempBuffer;
1570
0
    GSpacing nPSMem = nPixelSpace;
1571
0
    GSpacing nLSMem = nLineSpace;
1572
0
    GSpacing nBandSpaceMEM = nBandSpace;
1573
0
    void *pDataMem = pData;
1574
0
    GDALDataType eDTMem = eBufType;
1575
0
    GDALRasterBand *poFirstSrcBand = GetRasterBand(panBandMap[0]);
1576
0
    const GDALDataType eDataType = poFirstSrcBand->GetRasterDataType();
1577
0
    if (eBufType != eDataType && !GDAL_GET_OPERATE_IN_BUF_TYPE(*psExtraArg))
1578
0
    {
1579
0
        nPSMem = GDALGetDataTypeSizeBytes(eDataType);
1580
0
        nLSMem = nPSMem * nBufXSize;
1581
0
        nBandSpaceMEM = nLSMem * nBandCount;
1582
0
        pTempBuffer.reset(VSI_MALLOC3_VERBOSE(nBandCount, nBufYSize,
1583
0
                                              static_cast<size_t>(nLSMem)));
1584
0
        if (pTempBuffer == nullptr)
1585
0
            return CE_Failure;
1586
0
        pDataMem = pTempBuffer.get();
1587
0
        eDTMem = eDataType;
1588
0
    }
1589
1590
0
    auto poMEMDS = std::unique_ptr<GDALDataset>(
1591
0
        MEMDataset::Create("", nDestXOffVirtual + nBufXSize,
1592
0
                           nDestYOffVirtual + nBufYSize, 0, eDTMem, nullptr));
1593
#ifdef GDAL_ENABLE_RESAMPLING_MULTIBAND
1594
    std::vector<GDALRasterBand *> apoDstBands(nBandCount);
1595
#endif
1596
0
    int nNBITS = 0;
1597
0
    for (int i = 0; i < nBandCount; i++)
1598
0
    {
1599
0
        GByte *const pBandData = static_cast<GByte *>(pDataMem) -
1600
0
                                 nPSMem * nDestXOffVirtual -
1601
0
                                 nLSMem * nDestYOffVirtual + nBandSpaceMEM * i;
1602
0
        auto poMEMBand = GDALRasterBand::FromHandle(MEMCreateRasterBandEx(
1603
0
            poMEMDS.get(), i + 1, pBandData, eDTMem, nPSMem, nLSMem, false));
1604
0
        poMEMDS->SetBand(i + 1, poMEMBand);
1605
1606
0
        GDALRasterBand *poSrcBand = GetRasterBand(panBandMap[i]);
1607
#ifdef GDAL_ENABLE_RESAMPLING_MULTIBAND
1608
        apoDstBands[i] = poMEMBand;
1609
#endif
1610
0
        const char *pszNBITS =
1611
0
            poSrcBand->GetMetadataItem("NBITS", "IMAGE_STRUCTURE");
1612
0
        if (pszNBITS)
1613
0
        {
1614
0
            nNBITS = atoi(pszNBITS);
1615
0
            poMEMDS->GetRasterBand(i + 1)->SetMetadataItem("NBITS", pszNBITS,
1616
0
                                                           "IMAGE_STRUCTURE");
1617
0
        }
1618
0
    }
1619
1620
0
    CPLErr eErr = CE_None;
1621
1622
    // TODO(schwehr): Why disabled?  Why not just delete?
1623
    // Looks like this code was initially added as disable by copying
1624
    // from RasterIO here:
1625
    // https://trac.osgeo.org/gdal/changeset/29572
1626
#if 0
1627
    // Do the resampling.
1628
    if( bUseWarp )
1629
    {
1630
        VRTDatasetH hVRTDS = nullptr;
1631
        GDALRasterBandH hVRTBand = nullptr;
1632
        if( GetDataset() == nullptr )
1633
        {
1634
            /* Create VRT dataset that wraps the whole dataset */
1635
            hVRTDS = VRTCreate(nRasterXSize, nRasterYSize);
1636
            VRTAddBand( hVRTDS, eDataType, nullptr );
1637
            hVRTBand = GDALGetRasterBand(hVRTDS, 1);
1638
            VRTAddSimpleSource( (VRTSourcedRasterBandH)hVRTBand,
1639
                                (GDALRasterBandH)this,
1640
                                0, 0,
1641
                                nRasterXSize, nRasterYSize,
1642
                                0, 0,
1643
                                nRasterXSize, nRasterYSize,
1644
                                nullptr, VRT_NODATA_UNSET );
1645
1646
            /* Add a mask band if needed */
1647
            if( GetMaskFlags() != GMF_ALL_VALID )
1648
            {
1649
                ((GDALDataset*)hVRTDS)->CreateMaskBand(0);
1650
                VRTSourcedRasterBand* poVRTMaskBand =
1651
                    (VRTSourcedRasterBand*)(((GDALRasterBand*)hVRTBand)->GetMaskBand());
1652
                poVRTMaskBand->
1653
                    AddMaskBandSource( this,
1654
                                    0, 0,
1655
                                    nRasterXSize, nRasterYSize,
1656
                                    0, 0,
1657
                                    nRasterXSize, nRasterYSize);
1658
            }
1659
        }
1660
1661
        GDALWarpOptions* psWarpOptions = GDALCreateWarpOptions();
1662
        psWarpOptions->eResampleAlg = (GDALResampleAlg)psExtraArg->eResampleAlg;
1663
        psWarpOptions->hSrcDS = (GDALDatasetH) (hVRTDS ? hVRTDS : GetDataset());
1664
        psWarpOptions->hDstDS = (GDALDatasetH) poMEMDS;
1665
        psWarpOptions->nBandCount = 1;
1666
        int nSrcBandNumber = (hVRTDS ? 1 : nBand);
1667
        int nDstBandNumber = 1;
1668
        psWarpOptions->panSrcBands = &nSrcBandNumber;
1669
        psWarpOptions->panDstBands = &nDstBandNumber;
1670
        psWarpOptions->pfnProgress = psExtraArg->pfnProgress ?
1671
                    psExtraArg->pfnProgress : GDALDummyProgress;
1672
        psWarpOptions->pProgressArg = psExtraArg->pProgressData;
1673
        psWarpOptions->pfnTransformer = GDALRasterIOTransformer;
1674
        GDALRasterIOTransformerStruct sTransformer;
1675
        sTransformer.dfXOff = bHasXOffVirtual ? 0 : dfXOff;
1676
        sTransformer.dfYOff = bHasYOffVirtual ? 0 : dfYOff;
1677
        sTransformer.dfXRatioDstToSrc = dfXRatioDstToSrc;
1678
        sTransformer.dfYRatioDstToSrc = dfYRatioDstToSrc;
1679
        psWarpOptions->pTransformerArg = &sTransformer;
1680
1681
        GDALWarpOperationH hWarpOperation = GDALCreateWarpOperation(psWarpOptions);
1682
        eErr = GDALChunkAndWarpImage( hWarpOperation,
1683
                                      nDestXOffVirtual, nDestYOffVirtual,
1684
                                      nBufXSize, nBufYSize );
1685
        GDALDestroyWarpOperation( hWarpOperation );
1686
1687
        psWarpOptions->panSrcBands = nullptr;
1688
        psWarpOptions->panDstBands = nullptr;
1689
        GDALDestroyWarpOptions( psWarpOptions );
1690
1691
        if( hVRTDS )
1692
            GDALClose(hVRTDS);
1693
    }
1694
    else
1695
#endif
1696
0
    {
1697
0
        const char *pszResampling =
1698
0
            GDALRasterIOGetResampleAlg(psExtraArg->eResampleAlg);
1699
1700
0
        int nBlockXSize, nBlockYSize;
1701
0
        poFirstSrcBand->GetBlockSize(&nBlockXSize, &nBlockYSize);
1702
1703
0
        int nKernelRadius;
1704
0
        GDALResampleFunction pfnResampleFunc =
1705
0
            GDALGetResampleFunction(pszResampling, &nKernelRadius);
1706
0
        CPLAssert(pfnResampleFunc);
1707
#ifdef GDAL_ENABLE_RESAMPLING_MULTIBAND
1708
        GDALResampleFunctionMultiBands pfnResampleFuncMultiBands =
1709
            GDALGetResampleFunctionMultiBands(pszResampling, &nKernelRadius);
1710
#endif
1711
0
        GDALDataType eWrkDataType =
1712
0
            GDALGetOvrWorkDataType(pszResampling, eDataType);
1713
1714
0
        int nDstBlockXSize = nBufXSize;
1715
0
        int nDstBlockYSize = nBufYSize;
1716
0
        int nFullResXChunk, nFullResYChunk;
1717
0
        while (true)
1718
0
        {
1719
0
            nFullResXChunk =
1720
0
                3 + static_cast<int>(nDstBlockXSize * dfXRatioDstToSrc);
1721
0
            nFullResYChunk =
1722
0
                3 + static_cast<int>(nDstBlockYSize * dfYRatioDstToSrc);
1723
0
            if (nFullResXChunk > nRasterXSize)
1724
0
                nFullResXChunk = nRasterXSize;
1725
0
            if (nFullResYChunk > nRasterYSize)
1726
0
                nFullResYChunk = nRasterYSize;
1727
0
            if ((nDstBlockXSize == 1 && nDstBlockYSize == 1) ||
1728
0
                (static_cast<GIntBig>(nFullResXChunk) * nFullResYChunk <=
1729
0
                 1024 * 1024))
1730
0
                break;
1731
            // When operating on the full width of a raster whose block width is
1732
            // the raster width, prefer doing chunks in height.
1733
0
            if (nFullResXChunk >= nXSize && nXSize == nBlockXSize &&
1734
0
                nDstBlockYSize > 1)
1735
0
                nDstBlockYSize /= 2;
1736
            /* Otherwise cut the maximal dimension */
1737
0
            else if (nDstBlockXSize > 1 &&
1738
0
                     (nFullResXChunk > nFullResYChunk || nDstBlockYSize == 1))
1739
0
                nDstBlockXSize /= 2;
1740
0
            else
1741
0
                nDstBlockYSize /= 2;
1742
0
        }
1743
1744
0
        int nOvrFactor = std::max(static_cast<int>(0.5 + dfXRatioDstToSrc),
1745
0
                                  static_cast<int>(0.5 + dfYRatioDstToSrc));
1746
0
        if (nOvrFactor == 0)
1747
0
            nOvrFactor = 1;
1748
0
        int nFullResXSizeQueried =
1749
0
            nFullResXChunk + 2 * nKernelRadius * nOvrFactor;
1750
0
        int nFullResYSizeQueried =
1751
0
            nFullResYChunk + 2 * nKernelRadius * nOvrFactor;
1752
1753
0
        if (nFullResXSizeQueried > nRasterXSize)
1754
0
            nFullResXSizeQueried = nRasterXSize;
1755
0
        if (nFullResYSizeQueried > nRasterYSize)
1756
0
            nFullResYSizeQueried = nRasterYSize;
1757
1758
0
        void *pChunk = VSI_MALLOC3_VERBOSE(
1759
0
            cpl::fits_on<int>(GDALGetDataTypeSizeBytes(eWrkDataType) *
1760
0
                              nBandCount),
1761
0
            nFullResXSizeQueried, nFullResYSizeQueried);
1762
0
        GByte *pabyChunkNoDataMask = nullptr;
1763
1764
0
        GDALRasterBand *poMaskBand = poFirstSrcBand->GetMaskBand();
1765
0
        int nMaskFlags = poFirstSrcBand->GetMaskFlags();
1766
1767
0
        bool bUseNoDataMask = ((nMaskFlags & GMF_ALL_VALID) == 0);
1768
0
        if (bUseNoDataMask)
1769
0
        {
1770
0
            pabyChunkNoDataMask = static_cast<GByte *>(VSI_MALLOC2_VERBOSE(
1771
0
                nFullResXSizeQueried, nFullResYSizeQueried));
1772
0
        }
1773
0
        if (pChunk == nullptr ||
1774
0
            (bUseNoDataMask && pabyChunkNoDataMask == nullptr))
1775
0
        {
1776
0
            CPLFree(pChunk);
1777
0
            CPLFree(pabyChunkNoDataMask);
1778
0
            return CE_Failure;
1779
0
        }
1780
1781
0
        const int nTotalBlocks = DIV_ROUND_UP(nBufXSize, nDstBlockXSize) *
1782
0
                                 DIV_ROUND_UP(nBufYSize, nDstBlockYSize);
1783
0
        int nBlocksDone = 0;
1784
1785
0
        int nDstYOff;
1786
0
        for (nDstYOff = 0; nDstYOff < nBufYSize && eErr == CE_None;
1787
0
             nDstYOff += nDstBlockYSize)
1788
0
        {
1789
0
            int nDstYCount;
1790
0
            if (nDstYOff + nDstBlockYSize <= nBufYSize)
1791
0
                nDstYCount = nDstBlockYSize;
1792
0
            else
1793
0
                nDstYCount = nBufYSize - nDstYOff;
1794
1795
0
            int nChunkYOff =
1796
0
                nYOff + static_cast<int>(nDstYOff * dfYRatioDstToSrc);
1797
0
            int nChunkYOff2 = nYOff + 1 +
1798
0
                              static_cast<int>(ceil((nDstYOff + nDstYCount) *
1799
0
                                                    dfYRatioDstToSrc));
1800
0
            if (nChunkYOff2 > nRasterYSize)
1801
0
                nChunkYOff2 = nRasterYSize;
1802
0
            int nYCount = nChunkYOff2 - nChunkYOff;
1803
0
            CPLAssert(nYCount <= nFullResYChunk);
1804
1805
0
            int nChunkYOffQueried = nChunkYOff - nKernelRadius * nOvrFactor;
1806
0
            int nChunkYSizeQueried = nYCount + 2 * nKernelRadius * nOvrFactor;
1807
0
            if (nChunkYOffQueried < 0)
1808
0
            {
1809
0
                nChunkYSizeQueried += nChunkYOffQueried;
1810
0
                nChunkYOffQueried = 0;
1811
0
            }
1812
0
            if (nChunkYSizeQueried + nChunkYOffQueried > nRasterYSize)
1813
0
                nChunkYSizeQueried = nRasterYSize - nChunkYOffQueried;
1814
0
            CPLAssert(nChunkYSizeQueried <= nFullResYSizeQueried);
1815
1816
0
            int nDstXOff;
1817
0
            for (nDstXOff = 0; nDstXOff < nBufXSize && eErr == CE_None;
1818
0
                 nDstXOff += nDstBlockXSize)
1819
0
            {
1820
0
                int nDstXCount;
1821
0
                if (nDstXOff + nDstBlockXSize <= nBufXSize)
1822
0
                    nDstXCount = nDstBlockXSize;
1823
0
                else
1824
0
                    nDstXCount = nBufXSize - nDstXOff;
1825
1826
0
                int nChunkXOff =
1827
0
                    nXOff + static_cast<int>(nDstXOff * dfXRatioDstToSrc);
1828
0
                int nChunkXOff2 =
1829
0
                    nXOff + 1 +
1830
0
                    static_cast<int>(
1831
0
                        ceil((nDstXOff + nDstXCount) * dfXRatioDstToSrc));
1832
0
                if (nChunkXOff2 > nRasterXSize)
1833
0
                    nChunkXOff2 = nRasterXSize;
1834
0
                int nXCount = nChunkXOff2 - nChunkXOff;
1835
0
                CPLAssert(nXCount <= nFullResXChunk);
1836
1837
0
                int nChunkXOffQueried = nChunkXOff - nKernelRadius * nOvrFactor;
1838
0
                int nChunkXSizeQueried =
1839
0
                    nXCount + 2 * nKernelRadius * nOvrFactor;
1840
0
                if (nChunkXOffQueried < 0)
1841
0
                {
1842
0
                    nChunkXSizeQueried += nChunkXOffQueried;
1843
0
                    nChunkXOffQueried = 0;
1844
0
                }
1845
0
                if (nChunkXSizeQueried + nChunkXOffQueried > nRasterXSize)
1846
0
                    nChunkXSizeQueried = nRasterXSize - nChunkXOffQueried;
1847
0
                CPLAssert(nChunkXSizeQueried <= nFullResXSizeQueried);
1848
1849
0
                bool bSkipResample = false;
1850
0
                bool bNoDataMaskFullyOpaque = false;
1851
0
                if (eErr == CE_None && bUseNoDataMask)
1852
0
                {
1853
0
                    eErr = poMaskBand->RasterIO(
1854
0
                        GF_Read, nChunkXOffQueried, nChunkYOffQueried,
1855
0
                        nChunkXSizeQueried, nChunkYSizeQueried,
1856
0
                        pabyChunkNoDataMask, nChunkXSizeQueried,
1857
0
                        nChunkYSizeQueried, GDT_UInt8, 0, 0, nullptr);
1858
1859
                    /* Optimizations if mask if fully opaque or transparent */
1860
0
                    const int nPixels = nChunkXSizeQueried * nChunkYSizeQueried;
1861
0
                    const GByte bVal = pabyChunkNoDataMask[0];
1862
0
                    int i = 1;  // Used after for.
1863
0
                    for (; i < nPixels; i++)
1864
0
                    {
1865
0
                        if (pabyChunkNoDataMask[i] != bVal)
1866
0
                            break;
1867
0
                    }
1868
0
                    if (i == nPixels)
1869
0
                    {
1870
0
                        if (bVal == 0)
1871
0
                        {
1872
0
                            GByte abyZero[16] = {0};
1873
0
                            for (int iBand = 0; iBand < nBandCount; iBand++)
1874
0
                            {
1875
0
                                for (int j = 0; j < nDstYCount; j++)
1876
0
                                {
1877
0
                                    GDALCopyWords64(
1878
0
                                        abyZero, GDT_UInt8, 0,
1879
0
                                        static_cast<GByte *>(pDataMem) +
1880
0
                                            iBand * nBandSpaceMEM +
1881
0
                                            nLSMem * (j + nDstYOff) +
1882
0
                                            nDstXOff * nPSMem,
1883
0
                                        eBufType, static_cast<int>(nPSMem),
1884
0
                                        nDstXCount);
1885
0
                                }
1886
0
                            }
1887
0
                            bSkipResample = true;
1888
0
                        }
1889
0
                        else
1890
0
                        {
1891
0
                            bNoDataMaskFullyOpaque = true;
1892
0
                        }
1893
0
                    }
1894
0
                }
1895
1896
0
                if (!bSkipResample && eErr == CE_None)
1897
0
                {
1898
                    /* Read the source buffers */
1899
0
                    eErr = RasterIO(
1900
0
                        GF_Read, nChunkXOffQueried, nChunkYOffQueried,
1901
0
                        nChunkXSizeQueried, nChunkYSizeQueried, pChunk,
1902
0
                        nChunkXSizeQueried, nChunkYSizeQueried, eWrkDataType,
1903
0
                        nBandCount, panBandMap, 0, 0, 0, nullptr);
1904
0
                }
1905
1906
#ifdef GDAL_ENABLE_RESAMPLING_MULTIBAND
1907
                if (pfnResampleFuncMultiBands && !bSkipResample &&
1908
                    eErr == CE_None)
1909
                {
1910
                    eErr = pfnResampleFuncMultiBands(
1911
                        dfXRatioDstToSrc, dfYRatioDstToSrc,
1912
                        dfXOff - nXOff, /* == 0 if bHasXOffVirtual */
1913
                        dfYOff - nYOff, /* == 0 if bHasYOffVirtual */
1914
                        eWrkDataType, (GByte *)pChunk, nBandCount,
1915
                        bNoDataMaskFullyOpaque ? nullptr : pabyChunkNoDataMask,
1916
                        nChunkXOffQueried - (bHasXOffVirtual ? 0 : nXOff),
1917
                        nChunkXSizeQueried,
1918
                        nChunkYOffQueried - (bHasYOffVirtual ? 0 : nYOff),
1919
                        nChunkYSizeQueried, nDstXOff + nDestXOffVirtual,
1920
                        nDstXOff + nDestXOffVirtual + nDstXCount,
1921
                        nDstYOff + nDestYOffVirtual,
1922
                        nDstYOff + nDestYOffVirtual + nDstYCount,
1923
                        apoDstBands.data(), pszResampling, FALSE /*bHasNoData*/,
1924
                        0.0 /* dfNoDataValue */, nullptr /* color table*/,
1925
                        eDataType);
1926
                }
1927
                else
1928
#endif
1929
0
                {
1930
0
                    size_t nChunkBandOffset =
1931
0
                        static_cast<size_t>(nChunkXSizeQueried) *
1932
0
                        nChunkYSizeQueried *
1933
0
                        GDALGetDataTypeSizeBytes(eWrkDataType);
1934
0
                    for (int i = 0;
1935
0
                         i < nBandCount && !bSkipResample && eErr == CE_None;
1936
0
                         i++)
1937
0
                    {
1938
0
                        const bool bPropagateNoData = false;
1939
0
                        void *pDstBuffer = nullptr;
1940
0
                        GDALDataType eDstBufferDataType = GDT_Unknown;
1941
0
                        GDALRasterBand *poMEMBand =
1942
0
                            poMEMDS->GetRasterBand(i + 1);
1943
0
                        GDALOverviewResampleArgs args;
1944
0
                        args.eSrcDataType = eDataType;
1945
0
                        args.eOvrDataType = poMEMBand->GetRasterDataType();
1946
0
                        args.nOvrXSize = poMEMBand->GetXSize();
1947
0
                        args.nOvrYSize = poMEMBand->GetYSize();
1948
0
                        args.nOvrNBITS = nNBITS;
1949
0
                        args.dfXRatioDstToSrc = dfXRatioDstToSrc;
1950
0
                        args.dfYRatioDstToSrc = dfYRatioDstToSrc;
1951
0
                        args.dfSrcXDelta =
1952
0
                            dfXOff - nXOff; /* == 0 if bHasXOffVirtual */
1953
0
                        args.dfSrcYDelta =
1954
0
                            dfYOff - nYOff; /* == 0 if bHasYOffVirtual */
1955
0
                        args.eWrkDataType = eWrkDataType;
1956
0
                        args.pabyChunkNodataMask = bNoDataMaskFullyOpaque
1957
0
                                                       ? nullptr
1958
0
                                                       : pabyChunkNoDataMask;
1959
0
                        args.nChunkXOff =
1960
0
                            nChunkXOffQueried - (bHasXOffVirtual ? 0 : nXOff);
1961
0
                        args.nChunkXSize = nChunkXSizeQueried;
1962
0
                        args.nChunkYOff =
1963
0
                            nChunkYOffQueried - (bHasYOffVirtual ? 0 : nYOff);
1964
0
                        args.nChunkYSize = nChunkYSizeQueried;
1965
0
                        args.nDstXOff = nDstXOff + nDestXOffVirtual;
1966
0
                        args.nDstXOff2 =
1967
0
                            nDstXOff + nDestXOffVirtual + nDstXCount;
1968
0
                        args.nDstYOff = nDstYOff + nDestYOffVirtual;
1969
0
                        args.nDstYOff2 =
1970
0
                            nDstYOff + nDestYOffVirtual + nDstYCount;
1971
0
                        args.pszResampling = pszResampling;
1972
0
                        args.bHasNoData = false;
1973
0
                        args.dfNoDataValue = 0.0;
1974
0
                        args.poColorTable = nullptr;
1975
0
                        args.bPropagateNoData = bPropagateNoData;
1976
1977
0
                        eErr =
1978
0
                            pfnResampleFunc(args,
1979
0
                                            reinterpret_cast<GByte *>(pChunk) +
1980
0
                                                i * nChunkBandOffset,
1981
0
                                            &pDstBuffer, &eDstBufferDataType);
1982
0
                        if (eErr == CE_None)
1983
0
                        {
1984
0
                            eErr = poMEMBand->RasterIO(
1985
0
                                GF_Write, nDstXOff + nDestXOffVirtual,
1986
0
                                nDstYOff + nDestYOffVirtual, nDstXCount,
1987
0
                                nDstYCount, pDstBuffer, nDstXCount, nDstYCount,
1988
0
                                eDstBufferDataType, 0, 0, nullptr);
1989
0
                        }
1990
0
                        CPLFree(pDstBuffer);
1991
0
                    }
1992
0
                }
1993
1994
0
                nBlocksDone++;
1995
0
                if (eErr == CE_None && psExtraArg->pfnProgress != nullptr &&
1996
0
                    !psExtraArg->pfnProgress(1.0 * nBlocksDone / nTotalBlocks,
1997
0
                                             "", psExtraArg->pProgressData))
1998
0
                {
1999
0
                    eErr = CE_Failure;
2000
0
                }
2001
0
            }
2002
0
        }
2003
2004
0
        CPLFree(pChunk);
2005
0
        CPLFree(pabyChunkNoDataMask);
2006
0
    }
2007
2008
0
    if (pTempBuffer)
2009
0
    {
2010
0
        CPL_IGNORE_RET_VAL(poMEMDS->RasterIO(
2011
0
            GF_Read, nDestXOffVirtual, nDestYOffVirtual, nBufXSize, nBufYSize,
2012
0
            pData, nBufXSize, nBufYSize, eBufType, nBandCount, nullptr,
2013
0
            nPixelSpace, nLineSpace, nBandSpace, nullptr));
2014
0
    }
2015
2016
0
    return eErr;
2017
0
}
2018
2019
//! @endcond
2020
2021
/************************************************************************/
2022
/*                           GDALSwapWords()                            */
2023
/************************************************************************/
2024
2025
/**
2026
 * Byte swap words in-place.
2027
 *
2028
 * This function will byte swap a set of 2, 4 or 8 byte words "in place" in
2029
 * a memory array.  No assumption is made that the words being swapped are
2030
 * word aligned in memory.  Use the CPL_LSB and CPL_MSB macros from cpl_port.h
2031
 * to determine if the current platform is big endian or little endian.  Use
2032
 * The macros like CPL_SWAP32() to byte swap single values without the overhead
2033
 * of a function call.
2034
 *
2035
 * @param pData pointer to start of data buffer.
2036
 * @param nWordSize size of words being swapped in bytes. Normally 2, 4 or 8.
2037
 * @param nWordCount the number of words to be swapped in this call.
2038
 * @param nWordSkip the byte offset from the start of one word to the start of
2039
 * the next. For packed buffers this is the same as nWordSize.
2040
 */
2041
2042
void CPL_STDCALL GDALSwapWords(void *pData, int nWordSize, int nWordCount,
2043
                               int nWordSkip)
2044
2045
0
{
2046
0
    if (nWordCount > 0)
2047
0
        VALIDATE_POINTER0(pData, "GDALSwapWords");
2048
2049
0
    GByte *pabyData = static_cast<GByte *>(pData);
2050
2051
0
    switch (nWordSize)
2052
0
    {
2053
0
        case 1:
2054
0
            break;
2055
2056
0
        case 2:
2057
0
            CPLAssert(nWordSkip >= 2 || nWordCount == 1);
2058
0
            for (int i = 0; i < nWordCount; i++)
2059
0
            {
2060
0
                CPL_SWAP16PTR(pabyData);
2061
0
                pabyData += nWordSkip;
2062
0
            }
2063
0
            break;
2064
2065
0
        case 4:
2066
0
            CPLAssert(nWordSkip >= 4 || nWordCount == 1);
2067
0
            if (CPL_IS_ALIGNED(pabyData, 4) && (nWordSkip % 4) == 0)
2068
0
            {
2069
0
                for (int i = 0; i < nWordCount; i++)
2070
0
                {
2071
0
                    *reinterpret_cast<GUInt32 *>(pabyData) = CPL_SWAP32(
2072
0
                        *reinterpret_cast<const GUInt32 *>(pabyData));
2073
0
                    pabyData += nWordSkip;
2074
0
                }
2075
0
            }
2076
0
            else
2077
0
            {
2078
0
                for (int i = 0; i < nWordCount; i++)
2079
0
                {
2080
0
                    CPL_SWAP32PTR(pabyData);
2081
0
                    pabyData += nWordSkip;
2082
0
                }
2083
0
            }
2084
0
            break;
2085
2086
0
        case 8:
2087
0
            CPLAssert(nWordSkip >= 8 || nWordCount == 1);
2088
0
            if (CPL_IS_ALIGNED(pabyData, 8) && (nWordSkip % 8) == 0)
2089
0
            {
2090
0
                for (int i = 0; i < nWordCount; i++)
2091
0
                {
2092
0
                    *reinterpret_cast<GUInt64 *>(pabyData) = CPL_SWAP64(
2093
0
                        *reinterpret_cast<const GUInt64 *>(pabyData));
2094
0
                    pabyData += nWordSkip;
2095
0
                }
2096
0
            }
2097
0
            else
2098
0
            {
2099
0
                for (int i = 0; i < nWordCount; i++)
2100
0
                {
2101
0
                    CPL_SWAP64PTR(pabyData);
2102
0
                    pabyData += nWordSkip;
2103
0
                }
2104
0
            }
2105
0
            break;
2106
2107
0
        default:
2108
0
            CPLAssert(false);
2109
0
    }
2110
0
}
2111
2112
/************************************************************************/
2113
/*                          GDALSwapWordsEx()                           */
2114
/************************************************************************/
2115
2116
/**
2117
 * Byte swap words in-place.
2118
 *
2119
 * This function will byte swap a set of 2, 4 or 8 byte words "in place" in
2120
 * a memory array.  No assumption is made that the words being swapped are
2121
 * word aligned in memory.  Use the CPL_LSB and CPL_MSB macros from cpl_port.h
2122
 * to determine if the current platform is big endian or little endian.  Use
2123
 * The macros like CPL_SWAP32() to byte swap single values without the overhead
2124
 * of a function call.
2125
 *
2126
 * @param pData pointer to start of data buffer.
2127
 * @param nWordSize size of words being swapped in bytes. Normally 2, 4 or 8.
2128
 * @param nWordCount the number of words to be swapped in this call.
2129
 * @param nWordSkip the byte offset from the start of one word to the start of
2130
 * the next. For packed buffers this is the same as nWordSize.
2131
 */
2132
void CPL_STDCALL GDALSwapWordsEx(void *pData, int nWordSize, size_t nWordCount,
2133
                                 int nWordSkip)
2134
0
{
2135
0
    GByte *pabyData = static_cast<GByte *>(pData);
2136
0
    while (nWordCount)
2137
0
    {
2138
        // Pick-up a multiple of 8 as max chunk size.
2139
0
        const int nWordCountSmall =
2140
0
            (nWordCount > (1 << 30)) ? (1 << 30) : static_cast<int>(nWordCount);
2141
0
        GDALSwapWords(pabyData, nWordSize, nWordCountSmall, nWordSkip);
2142
0
        pabyData += static_cast<size_t>(nWordSkip) * nWordCountSmall;
2143
0
        nWordCount -= nWordCountSmall;
2144
0
    }
2145
0
}
2146
2147
// Place the new GDALCopyWords helpers in an anonymous namespace
2148
namespace
2149
{
2150
2151
/************************************************************************/
2152
/*                           GDALCopyWordsT()                           */
2153
/************************************************************************/
2154
/**
2155
 * Template function, used to copy data from pSrcData into buffer
2156
 * pDstData, with stride nSrcPixelStride in the source data and
2157
 * stride nDstPixelStride in the destination data. This template can
2158
 * deal with the case where the input data type is real or complex and
2159
 * the output is real.
2160
 *
2161
 * @param pSrcData the source data buffer
2162
 * @param nSrcPixelStride the stride, in the buffer pSrcData for pixels
2163
 *                      of interest.
2164
 * @param pDstData the destination buffer.
2165
 * @param nDstPixelStride the stride in the buffer pDstData for pixels of
2166
 *                      interest.
2167
 * @param nWordCount the total number of pixel words to copy
2168
 *
2169
 * @code
2170
 * // Assume an input buffer of type GUInt16 named pBufferIn
2171
 * GByte *pBufferOut = new GByte[numBytesOut];
2172
 * GDALCopyWordsT<GUInt16, GByte>(pSrcData, 2, pDstData, 1, numBytesOut);
2173
 * @endcode
2174
 * @note
2175
 * This is a private function, and should not be exposed outside of
2176
 * rasterio.cpp. External users should call the GDALCopyWords driver function.
2177
 */
2178
2179
template <class Tin, class Tout>
2180
static void inline GDALCopyWordsGenericT(const Tin *const CPL_RESTRICT pSrcData,
2181
                                         int nSrcPixelStride,
2182
                                         Tout *const CPL_RESTRICT pDstData,
2183
                                         int nDstPixelStride,
2184
                                         GPtrDiff_t nWordCount)
2185
0
{
2186
0
    decltype(nWordCount) nDstOffset = 0;
2187
2188
0
    const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData);
2189
0
    char *const pDstDataPtr = reinterpret_cast<char *>(pDstData);
2190
0
    for (decltype(nWordCount) n = 0; n < nWordCount; n++)
2191
0
    {
2192
0
        const Tin tValue =
2193
0
            *reinterpret_cast<const Tin *>(pSrcDataPtr + (n * nSrcPixelStride));
2194
0
        Tout *const pOutPixel =
2195
0
            reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
2196
2197
0
        GDALCopyWord(tValue, *pOutPixel);
2198
2199
0
        nDstOffset += nDstPixelStride;
2200
0
    }
2201
0
}
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, unsigned char>(unsigned char const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, signed char>(unsigned char const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, unsigned short>(unsigned char const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, short>(unsigned char const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, unsigned int>(unsigned char const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, int>(unsigned char const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, unsigned long>(unsigned char const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, long>(unsigned char const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, cpl::Float16>(unsigned char const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, float>(unsigned char const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, double>(unsigned char const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, unsigned char>(signed char const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, signed char>(signed char const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, unsigned short>(signed char const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, short>(signed char const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, unsigned int>(signed char const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, int>(signed char const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, unsigned long>(signed char const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, long>(signed char const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, cpl::Float16>(signed char const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, float>(signed char const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, double>(signed char const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, unsigned char>(unsigned short const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, signed char>(unsigned short const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, unsigned short>(unsigned short const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, short>(unsigned short const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, unsigned int>(unsigned short const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, int>(unsigned short const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, unsigned long>(unsigned short const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, long>(unsigned short const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, cpl::Float16>(unsigned short const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, float>(unsigned short const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, double>(unsigned short const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, unsigned char>(short const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, signed char>(short const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, unsigned short>(short const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, short>(short const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, unsigned int>(short const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, int>(short const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, unsigned long>(short const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, long>(short const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, cpl::Float16>(short const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, float>(short const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, double>(short const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, unsigned char>(unsigned int const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, signed char>(unsigned int const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, unsigned short>(unsigned int const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, short>(unsigned int const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, unsigned int>(unsigned int const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, int>(unsigned int const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, unsigned long>(unsigned int const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, long>(unsigned int const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, cpl::Float16>(unsigned int const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, float>(unsigned int const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, double>(unsigned int const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, unsigned char>(int const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, signed char>(int const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, unsigned short>(int const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, short>(int const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, unsigned int>(int const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, int>(int const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, unsigned long>(int const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, long>(int const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, cpl::Float16>(int const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, float>(int const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, double>(int const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, unsigned char>(unsigned long const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, signed char>(unsigned long const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, unsigned short>(unsigned long const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, short>(unsigned long const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, unsigned int>(unsigned long const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, int>(unsigned long const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, unsigned long>(unsigned long const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, long>(unsigned long const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, cpl::Float16>(unsigned long const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, float>(unsigned long const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, double>(unsigned long const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, unsigned char>(long const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, signed char>(long const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, unsigned short>(long const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, short>(long const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, unsigned int>(long const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, int>(long const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, unsigned long>(long const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, long>(long const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, cpl::Float16>(long const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, float>(long const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, double>(long const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, unsigned char>(cpl::Float16 const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, signed char>(cpl::Float16 const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, unsigned short>(cpl::Float16 const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, short>(cpl::Float16 const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, unsigned int>(cpl::Float16 const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, int>(cpl::Float16 const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, unsigned long>(cpl::Float16 const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, long>(cpl::Float16 const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, cpl::Float16>(cpl::Float16 const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<float, unsigned int>(float const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<float, unsigned long>(float const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<float, long>(float const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<float, float>(float const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, signed char>(double const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, short>(double const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, unsigned int>(double const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, int>(double const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, unsigned long>(double const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, long>(double const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, double>(double const*, int, double*, int, long long)
2202
2203
template <class Tin, class Tout>
2204
static void CPL_NOINLINE GDALCopyWordsT(const Tin *const CPL_RESTRICT pSrcData,
2205
                                        int nSrcPixelStride,
2206
                                        Tout *const CPL_RESTRICT pDstData,
2207
                                        int nDstPixelStride,
2208
                                        GPtrDiff_t nWordCount)
2209
0
{
2210
0
    GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, nDstPixelStride,
2211
0
                          nWordCount);
2212
0
}
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned char, unsigned char>(unsigned char const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned char, unsigned long>(unsigned char const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned char, long>(unsigned char const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned char, cpl::Float16>(unsigned char const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, signed char>(signed char const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, unsigned short>(signed char const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, short>(signed char const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, unsigned int>(signed char const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, int>(signed char const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, unsigned long>(signed char const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, long>(signed char const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, cpl::Float16>(signed char const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, float>(signed char const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, double>(signed char const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned short, signed char>(unsigned short const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned short, unsigned short>(unsigned short const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned short, unsigned int>(unsigned short const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned short, int>(unsigned short const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned short, unsigned long>(unsigned short const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned short, long>(unsigned short const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned short, cpl::Float16>(unsigned short const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, unsigned char>(short const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, signed char>(short const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, short>(short const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, unsigned int>(short const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, int>(short const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, unsigned long>(short const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, long>(short const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, cpl::Float16>(short const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, unsigned char>(unsigned int const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, signed char>(unsigned int const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, unsigned short>(unsigned int const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, short>(unsigned int const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, unsigned int>(unsigned int const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, int>(unsigned int const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, unsigned long>(unsigned int const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, long>(unsigned int const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, cpl::Float16>(unsigned int const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, float>(unsigned int const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, double>(unsigned int const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, signed char>(int const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, short>(int const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, unsigned int>(int const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, int>(int const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, unsigned long>(int const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, long>(int const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, cpl::Float16>(int const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, float>(int const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, double>(int const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, unsigned char>(unsigned long const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, signed char>(unsigned long const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, unsigned short>(unsigned long const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, short>(unsigned long const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, unsigned int>(unsigned long const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, int>(unsigned long const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, unsigned long>(unsigned long const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, long>(unsigned long const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, cpl::Float16>(unsigned long const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, float>(unsigned long const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, double>(unsigned long const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, unsigned char>(long const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, signed char>(long const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, unsigned short>(long const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, short>(long const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, unsigned int>(long const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, int>(long const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, unsigned long>(long const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, long>(long const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, cpl::Float16>(long const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, float>(long const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, double>(long const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, unsigned char>(cpl::Float16 const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, signed char>(cpl::Float16 const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, unsigned short>(cpl::Float16 const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, short>(cpl::Float16 const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, unsigned int>(cpl::Float16 const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, int>(cpl::Float16 const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, unsigned long>(cpl::Float16 const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, long>(cpl::Float16 const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, cpl::Float16>(cpl::Float16 const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<float, unsigned int>(float const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<float, unsigned long>(float const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<float, long>(float const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<float, float>(float const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, signed char>(double const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, short>(double const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, unsigned int>(double const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, int>(double const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, unsigned long>(double const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, long>(double const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, double>(double const*, int, double*, int, long long)
2213
2214
template <class Tin, class Tout>
2215
static void inline GDALCopyWordsT_8atatime(
2216
    const Tin *const CPL_RESTRICT pSrcData, int nSrcPixelStride,
2217
    Tout *const CPL_RESTRICT pDstData, int nDstPixelStride,
2218
    GPtrDiff_t nWordCount)
2219
0
{
2220
0
    decltype(nWordCount) nDstOffset = 0;
2221
2222
0
    const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData);
2223
0
    char *const pDstDataPtr = reinterpret_cast<char *>(pDstData);
2224
0
    decltype(nWordCount) n = 0;
2225
0
    if (nSrcPixelStride == static_cast<int>(sizeof(Tin)) &&
2226
0
        nDstPixelStride == static_cast<int>(sizeof(Tout)))
2227
0
    {
2228
0
        for (; n < nWordCount - 7; n += 8)
2229
0
        {
2230
0
            const Tin *pInValues = reinterpret_cast<const Tin *>(
2231
0
                pSrcDataPtr + (n * nSrcPixelStride));
2232
0
            Tout *const pOutPixels =
2233
0
                reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
2234
2235
0
            GDALCopy8Words(pInValues, pOutPixels);
2236
2237
0
            nDstOffset += 8 * nDstPixelStride;
2238
0
        }
2239
0
    }
2240
0
    for (; n < nWordCount; n++)
2241
0
    {
2242
0
        const Tin tValue =
2243
0
            *reinterpret_cast<const Tin *>(pSrcDataPtr + (n * nSrcPixelStride));
2244
0
        Tout *const pOutPixel =
2245
0
            reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
2246
2247
0
        GDALCopyWord(tValue, *pOutPixel);
2248
2249
0
        nDstOffset += nDstPixelStride;
2250
0
    }
2251
0
}
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<cpl::Float16, float>(cpl::Float16 const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<cpl::Float16, double>(cpl::Float16 const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<float, unsigned char>(float const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<float, signed char>(float const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<float, unsigned short>(float const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<float, short>(float const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<float, int>(float const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<float, cpl::Float16>(float const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<float, double>(float const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<double, unsigned char>(double const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<double, unsigned short>(double const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<double, cpl::Float16>(double const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<double, float>(double const*, int, float*, int, long long)
2252
2253
#ifdef HAVE_SSE2
2254
2255
template <class Tout>
2256
void GDALCopyWordsByteTo16Bit(const GByte *const CPL_RESTRICT pSrcData,
2257
                              int nSrcPixelStride,
2258
                              Tout *const CPL_RESTRICT pDstData,
2259
                              int nDstPixelStride, GPtrDiff_t nWordCount)
2260
0
{
2261
0
    static_assert(std::is_integral<Tout>::value &&
2262
0
                      sizeof(Tout) == sizeof(uint16_t),
2263
0
                  "Bad Tout");
2264
0
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2265
0
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2266
0
    {
2267
0
        decltype(nWordCount) n = 0;
2268
0
        const __m128i xmm_zero = _mm_setzero_si128();
2269
0
        GByte *CPL_RESTRICT pabyDstDataPtr =
2270
0
            reinterpret_cast<GByte *>(pDstData);
2271
0
        for (; n < nWordCount - 15; n += 16)
2272
0
        {
2273
0
            __m128i xmm = _mm_loadu_si128(
2274
0
                reinterpret_cast<const __m128i *>(pSrcData + n));
2275
0
            __m128i xmm0 = _mm_unpacklo_epi8(xmm, xmm_zero);
2276
0
            __m128i xmm1 = _mm_unpackhi_epi8(xmm, xmm_zero);
2277
0
            _mm_storeu_si128(
2278
0
                reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 2), xmm0);
2279
0
            _mm_storeu_si128(
2280
0
                reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 2 + 16), xmm1);
2281
0
        }
2282
0
        for (; n < nWordCount; n++)
2283
0
        {
2284
0
            pDstData[n] = pSrcData[n];
2285
0
        }
2286
0
    }
2287
0
    else
2288
0
    {
2289
0
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2290
0
                              nDstPixelStride, nWordCount);
2291
0
    }
2292
0
}
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsByteTo16Bit<unsigned short>(unsigned char const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsByteTo16Bit<short>(unsigned char const*, int, short*, int, long long)
2293
2294
template <>
2295
CPL_NOINLINE void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2296
                                 int nSrcPixelStride,
2297
                                 GUInt16 *const CPL_RESTRICT pDstData,
2298
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
2299
0
{
2300
0
    GDALCopyWordsByteTo16Bit(pSrcData, nSrcPixelStride, pDstData,
2301
0
                             nDstPixelStride, nWordCount);
2302
0
}
2303
2304
template <>
2305
CPL_NOINLINE void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2306
                                 int nSrcPixelStride,
2307
                                 GInt16 *const CPL_RESTRICT pDstData,
2308
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
2309
0
{
2310
0
    GDALCopyWordsByteTo16Bit(pSrcData, nSrcPixelStride, pDstData,
2311
0
                             nDstPixelStride, nWordCount);
2312
0
}
2313
2314
template <class Tout>
2315
void GDALCopyWordsByteTo32Bit(const GByte *const CPL_RESTRICT pSrcData,
2316
                              int nSrcPixelStride,
2317
                              Tout *const CPL_RESTRICT pDstData,
2318
                              int nDstPixelStride, GPtrDiff_t nWordCount)
2319
0
{
2320
0
    static_assert(std::is_integral<Tout>::value &&
2321
0
                      sizeof(Tout) == sizeof(uint32_t),
2322
0
                  "Bad Tout");
2323
0
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2324
0
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2325
0
    {
2326
0
        decltype(nWordCount) n = 0;
2327
0
        const __m128i xmm_zero = _mm_setzero_si128();
2328
0
        GByte *CPL_RESTRICT pabyDstDataPtr =
2329
0
            reinterpret_cast<GByte *>(pDstData);
2330
0
        for (; n < nWordCount - 15; n += 16)
2331
0
        {
2332
0
            __m128i xmm = _mm_loadu_si128(
2333
0
                reinterpret_cast<const __m128i *>(pSrcData + n));
2334
0
            __m128i xmm_low = _mm_unpacklo_epi8(xmm, xmm_zero);
2335
0
            __m128i xmm_high = _mm_unpackhi_epi8(xmm, xmm_zero);
2336
0
            __m128i xmm0 = _mm_unpacklo_epi16(xmm_low, xmm_zero);
2337
0
            __m128i xmm1 = _mm_unpackhi_epi16(xmm_low, xmm_zero);
2338
0
            __m128i xmm2 = _mm_unpacklo_epi16(xmm_high, xmm_zero);
2339
0
            __m128i xmm3 = _mm_unpackhi_epi16(xmm_high, xmm_zero);
2340
0
            _mm_storeu_si128(
2341
0
                reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4), xmm0);
2342
0
            _mm_storeu_si128(
2343
0
                reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4 + 16), xmm1);
2344
0
            _mm_storeu_si128(
2345
0
                reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4 + 32), xmm2);
2346
0
            _mm_storeu_si128(
2347
0
                reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4 + 48), xmm3);
2348
0
        }
2349
0
        for (; n < nWordCount; n++)
2350
0
        {
2351
0
            pDstData[n] = pSrcData[n];
2352
0
        }
2353
0
    }
2354
0
    else
2355
0
    {
2356
0
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2357
0
                              nDstPixelStride, nWordCount);
2358
0
    }
2359
0
}
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsByteTo32Bit<unsigned int>(unsigned char const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsByteTo32Bit<int>(unsigned char const*, int, int*, int, long long)
2360
2361
template <>
2362
CPL_NOINLINE void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2363
                                 int nSrcPixelStride,
2364
                                 GUInt32 *const CPL_RESTRICT pDstData,
2365
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
2366
0
{
2367
0
    GDALCopyWordsByteTo32Bit(pSrcData, nSrcPixelStride, pDstData,
2368
0
                             nDstPixelStride, nWordCount);
2369
0
}
2370
2371
template <>
2372
CPL_NOINLINE void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2373
                                 int nSrcPixelStride,
2374
                                 GInt32 *const CPL_RESTRICT pDstData,
2375
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
2376
0
{
2377
0
    GDALCopyWordsByteTo32Bit(pSrcData, nSrcPixelStride, pDstData,
2378
0
                             nDstPixelStride, nWordCount);
2379
0
}
2380
2381
template <>
2382
CPL_NOINLINE void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2383
                                 int nSrcPixelStride,
2384
                                 float *const CPL_RESTRICT pDstData,
2385
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
2386
0
{
2387
0
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2388
0
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2389
0
    {
2390
0
        decltype(nWordCount) n = 0;
2391
0
        const __m128i xmm_zero = _mm_setzero_si128();
2392
0
        GByte *CPL_RESTRICT pabyDstDataPtr =
2393
0
            reinterpret_cast<GByte *>(pDstData);
2394
0
        for (; n < nWordCount - 15; n += 16)
2395
0
        {
2396
0
            __m128i xmm = _mm_loadu_si128(
2397
0
                reinterpret_cast<const __m128i *>(pSrcData + n));
2398
0
            __m128i xmm_low = _mm_unpacklo_epi8(xmm, xmm_zero);
2399
0
            __m128i xmm_high = _mm_unpackhi_epi8(xmm, xmm_zero);
2400
0
            __m128i xmm0 = _mm_unpacklo_epi16(xmm_low, xmm_zero);
2401
0
            __m128i xmm1 = _mm_unpackhi_epi16(xmm_low, xmm_zero);
2402
0
            __m128i xmm2 = _mm_unpacklo_epi16(xmm_high, xmm_zero);
2403
0
            __m128i xmm3 = _mm_unpackhi_epi16(xmm_high, xmm_zero);
2404
0
            __m128 xmm0_f = _mm_cvtepi32_ps(xmm0);
2405
0
            __m128 xmm1_f = _mm_cvtepi32_ps(xmm1);
2406
0
            __m128 xmm2_f = _mm_cvtepi32_ps(xmm2);
2407
0
            __m128 xmm3_f = _mm_cvtepi32_ps(xmm3);
2408
0
            _mm_storeu_ps(reinterpret_cast<float *>(pabyDstDataPtr + n * 4),
2409
0
                          xmm0_f);
2410
0
            _mm_storeu_ps(
2411
0
                reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 16), xmm1_f);
2412
0
            _mm_storeu_ps(
2413
0
                reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 32), xmm2_f);
2414
0
            _mm_storeu_ps(
2415
0
                reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 48), xmm3_f);
2416
0
        }
2417
0
        for (; n < nWordCount; n++)
2418
0
        {
2419
0
            pDstData[n] = pSrcData[n];
2420
0
        }
2421
0
    }
2422
0
    else
2423
0
    {
2424
0
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2425
0
                              nDstPixelStride, nWordCount);
2426
0
    }
2427
0
}
2428
2429
template <>
2430
CPL_NOINLINE void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2431
                                 int nSrcPixelStride,
2432
                                 double *const CPL_RESTRICT pDstData,
2433
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
2434
0
{
2435
0
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2436
0
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2437
0
    {
2438
0
        decltype(nWordCount) n = 0;
2439
0
        const __m128i xmm_zero = _mm_setzero_si128();
2440
0
        GByte *CPL_RESTRICT pabyDstDataPtr =
2441
0
            reinterpret_cast<GByte *>(pDstData);
2442
0
        for (; n < nWordCount - 15; n += 16)
2443
0
        {
2444
0
            __m128i xmm = _mm_loadu_si128(
2445
0
                reinterpret_cast<const __m128i *>(pSrcData + n));
2446
0
            __m128i xmm_low = _mm_unpacklo_epi8(xmm, xmm_zero);
2447
0
            __m128i xmm_high = _mm_unpackhi_epi8(xmm, xmm_zero);
2448
0
            __m128i xmm0 = _mm_unpacklo_epi16(xmm_low, xmm_zero);
2449
0
            __m128i xmm1 = _mm_unpackhi_epi16(xmm_low, xmm_zero);
2450
0
            __m128i xmm2 = _mm_unpacklo_epi16(xmm_high, xmm_zero);
2451
0
            __m128i xmm3 = _mm_unpackhi_epi16(xmm_high, xmm_zero);
2452
2453
#if defined(__AVX2__) && defined(slightly_slower_than_SSE2)
2454
            _mm256_storeu_pd(reinterpret_cast<double *>(pabyDstDataPtr + n * 8),
2455
                             _mm256_cvtepi32_pd(xmm0));
2456
            _mm256_storeu_pd(
2457
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 32),
2458
                _mm256_cvtepi32_pd(xmm1));
2459
            _mm256_storeu_pd(
2460
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 64),
2461
                _mm256_cvtepi32_pd(xmm2));
2462
            _mm256_storeu_pd(
2463
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 96),
2464
                _mm256_cvtepi32_pd(xmm3));
2465
#else
2466
0
            __m128d xmm0_low_d = _mm_cvtepi32_pd(xmm0);
2467
0
            __m128d xmm1_low_d = _mm_cvtepi32_pd(xmm1);
2468
0
            __m128d xmm2_low_d = _mm_cvtepi32_pd(xmm2);
2469
0
            __m128d xmm3_low_d = _mm_cvtepi32_pd(xmm3);
2470
0
            xmm0 = _mm_srli_si128(xmm0, 8);
2471
0
            xmm1 = _mm_srli_si128(xmm1, 8);
2472
0
            xmm2 = _mm_srli_si128(xmm2, 8);
2473
0
            xmm3 = _mm_srli_si128(xmm3, 8);
2474
0
            __m128d xmm0_high_d = _mm_cvtepi32_pd(xmm0);
2475
0
            __m128d xmm1_high_d = _mm_cvtepi32_pd(xmm1);
2476
0
            __m128d xmm2_high_d = _mm_cvtepi32_pd(xmm2);
2477
0
            __m128d xmm3_high_d = _mm_cvtepi32_pd(xmm3);
2478
2479
0
            _mm_storeu_pd(reinterpret_cast<double *>(pabyDstDataPtr + n * 8),
2480
0
                          xmm0_low_d);
2481
0
            _mm_storeu_pd(
2482
0
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 16),
2483
0
                xmm0_high_d);
2484
0
            _mm_storeu_pd(
2485
0
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 32),
2486
0
                xmm1_low_d);
2487
0
            _mm_storeu_pd(
2488
0
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 48),
2489
0
                xmm1_high_d);
2490
0
            _mm_storeu_pd(
2491
0
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 64),
2492
0
                xmm2_low_d);
2493
0
            _mm_storeu_pd(
2494
0
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 80),
2495
0
                xmm2_high_d);
2496
0
            _mm_storeu_pd(
2497
0
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 96),
2498
0
                xmm3_low_d);
2499
0
            _mm_storeu_pd(
2500
0
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 112),
2501
0
                xmm3_high_d);
2502
0
#endif
2503
0
        }
2504
0
        for (; n < nWordCount; n++)
2505
0
        {
2506
0
            pDstData[n] = pSrcData[n];
2507
0
        }
2508
0
    }
2509
0
    else
2510
0
    {
2511
0
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2512
0
                              nDstPixelStride, nWordCount);
2513
0
    }
2514
0
}
2515
2516
template <>
2517
CPL_NOINLINE void GDALCopyWordsT(const uint8_t *const CPL_RESTRICT pSrcData,
2518
                                 int nSrcPixelStride,
2519
                                 int8_t *const CPL_RESTRICT pDstData,
2520
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
2521
0
{
2522
0
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2523
0
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2524
0
    {
2525
0
        decltype(nWordCount) n = 0;
2526
0
        const __m128i xmm_127 = _mm_set1_epi8(127);
2527
0
        for (; n < nWordCount - 31; n += 32)
2528
0
        {
2529
0
            __m128i xmm0 = _mm_loadu_si128(
2530
0
                reinterpret_cast<const __m128i *>(pSrcData + n));
2531
0
            __m128i xmm1 = _mm_loadu_si128(
2532
0
                reinterpret_cast<const __m128i *>(pSrcData + n + 16));
2533
0
            xmm0 = _mm_min_epu8(xmm0, xmm_127);
2534
0
            xmm1 = _mm_min_epu8(xmm1, xmm_127);
2535
0
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n), xmm0);
2536
0
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n + 16),
2537
0
                             xmm1);
2538
0
        }
2539
0
        for (; n < nWordCount; n++)
2540
0
        {
2541
0
            pDstData[n] =
2542
0
                pSrcData[n] >= 127 ? 127 : static_cast<int8_t>(pSrcData[n]);
2543
0
        }
2544
0
    }
2545
0
    else
2546
0
    {
2547
0
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2548
0
                              nDstPixelStride, nWordCount);
2549
0
    }
2550
0
}
2551
2552
template <>
2553
CPL_NOINLINE void GDALCopyWordsT(const int8_t *const CPL_RESTRICT pSrcData,
2554
                                 int nSrcPixelStride,
2555
                                 uint8_t *const CPL_RESTRICT pDstData,
2556
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
2557
0
{
2558
0
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2559
0
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2560
0
    {
2561
0
        decltype(nWordCount) n = 0;
2562
0
#if !(defined(__SSE4_1__) || defined(USE_NEON_OPTIMIZATIONS))
2563
0
        const __m128i xmm_INT8_to_UINT8 = _mm_set1_epi8(-128);
2564
0
#endif
2565
0
        for (; n < nWordCount - 31; n += 32)
2566
0
        {
2567
0
            __m128i xmm0 = _mm_loadu_si128(
2568
0
                reinterpret_cast<const __m128i *>(pSrcData + n));
2569
0
            __m128i xmm1 = _mm_loadu_si128(
2570
0
                reinterpret_cast<const __m128i *>(pSrcData + n + 16));
2571
#if defined(__SSE4_1__) || defined(USE_NEON_OPTIMIZATIONS)
2572
            xmm0 = _mm_max_epi8(xmm0, _mm_setzero_si128());
2573
            xmm1 = _mm_max_epi8(xmm1, _mm_setzero_si128());
2574
#else
2575
0
            xmm0 = _mm_add_epi8(xmm0, xmm_INT8_to_UINT8);
2576
0
            xmm1 = _mm_add_epi8(xmm1, xmm_INT8_to_UINT8);
2577
0
            xmm0 = _mm_max_epu8(xmm0, xmm_INT8_to_UINT8);
2578
0
            xmm1 = _mm_max_epu8(xmm1, xmm_INT8_to_UINT8);
2579
0
            xmm0 = _mm_sub_epi8(xmm0, xmm_INT8_to_UINT8);
2580
0
            xmm1 = _mm_sub_epi8(xmm1, xmm_INT8_to_UINT8);
2581
0
#endif
2582
0
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n), xmm0);
2583
0
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n + 16),
2584
0
                             xmm1);
2585
0
        }
2586
0
        for (; n < nWordCount; n++)
2587
0
        {
2588
0
            pDstData[n] =
2589
0
                pSrcData[n] < 0 ? 0 : static_cast<uint8_t>(pSrcData[n]);
2590
0
        }
2591
0
    }
2592
0
    else
2593
0
    {
2594
0
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2595
0
                              nDstPixelStride, nWordCount);
2596
0
    }
2597
0
}
2598
2599
template <>
2600
CPL_NOINLINE void GDALCopyWordsT(const uint16_t *const CPL_RESTRICT pSrcData,
2601
                                 int nSrcPixelStride,
2602
                                 uint8_t *const CPL_RESTRICT pDstData,
2603
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
2604
0
{
2605
0
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2606
0
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2607
0
    {
2608
0
        decltype(nWordCount) n = 0;
2609
#if defined(__SSE4_1__) || defined(USE_NEON_OPTIMIZATIONS)
2610
        const auto xmm_MAX_INT16 = _mm_set1_epi16(32767);
2611
#else
2612
        // In SSE2, min_epu16 does not exist, so shift from
2613
        // UInt16 to SInt16 to be able to use min_epi16
2614
0
        const __m128i xmm_UINT16_to_INT16 = _mm_set1_epi16(-32768);
2615
0
        const __m128i xmm_m255_shifted = _mm_set1_epi16(255 - 32768);
2616
0
#endif
2617
0
        for (; n < nWordCount - 15; n += 16)
2618
0
        {
2619
0
            __m128i xmm0 = _mm_loadu_si128(
2620
0
                reinterpret_cast<const __m128i *>(pSrcData + n));
2621
0
            __m128i xmm1 = _mm_loadu_si128(
2622
0
                reinterpret_cast<const __m128i *>(pSrcData + n + 8));
2623
#if defined(__SSE4_1__) || defined(USE_NEON_OPTIMIZATIONS)
2624
            xmm0 = _mm_min_epu16(xmm0, xmm_MAX_INT16);
2625
            xmm1 = _mm_min_epu16(xmm1, xmm_MAX_INT16);
2626
#else
2627
0
            xmm0 = _mm_add_epi16(xmm0, xmm_UINT16_to_INT16);
2628
0
            xmm1 = _mm_add_epi16(xmm1, xmm_UINT16_to_INT16);
2629
0
            xmm0 = _mm_min_epi16(xmm0, xmm_m255_shifted);
2630
0
            xmm1 = _mm_min_epi16(xmm1, xmm_m255_shifted);
2631
0
            xmm0 = _mm_sub_epi16(xmm0, xmm_UINT16_to_INT16);
2632
0
            xmm1 = _mm_sub_epi16(xmm1, xmm_UINT16_to_INT16);
2633
0
#endif
2634
0
            xmm0 = _mm_packus_epi16(xmm0, xmm1);
2635
0
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n), xmm0);
2636
0
        }
2637
0
        for (; n < nWordCount; n++)
2638
0
        {
2639
0
            pDstData[n] =
2640
0
                pSrcData[n] >= 255 ? 255 : static_cast<uint8_t>(pSrcData[n]);
2641
0
        }
2642
0
    }
2643
0
    else
2644
0
    {
2645
0
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2646
0
                              nDstPixelStride, nWordCount);
2647
0
    }
2648
0
}
2649
2650
template <>
2651
CPL_NOINLINE void GDALCopyWordsT(const uint16_t *const CPL_RESTRICT pSrcData,
2652
                                 int nSrcPixelStride,
2653
                                 int16_t *const CPL_RESTRICT pDstData,
2654
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
2655
0
{
2656
0
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2657
0
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2658
0
    {
2659
0
        decltype(nWordCount) n = 0;
2660
#if defined(__SSE4_1__) || defined(USE_NEON_OPTIMIZATIONS)
2661
        const __m128i xmm_MAX_INT16 = _mm_set1_epi16(32767);
2662
#else
2663
        // In SSE2, min_epu16 does not exist, so shift from
2664
        // UInt16 to SInt16 to be able to use min_epi16
2665
0
        const __m128i xmm_UINT16_to_INT16 = _mm_set1_epi16(-32768);
2666
0
        const __m128i xmm_32767_shifted = _mm_set1_epi16(32767 - 32768);
2667
0
#endif
2668
0
        for (; n < nWordCount - 15; n += 16)
2669
0
        {
2670
0
            __m128i xmm0 = _mm_loadu_si128(
2671
0
                reinterpret_cast<const __m128i *>(pSrcData + n));
2672
0
            __m128i xmm1 = _mm_loadu_si128(
2673
0
                reinterpret_cast<const __m128i *>(pSrcData + n + 8));
2674
#if defined(__SSE4_1__) || defined(USE_NEON_OPTIMIZATIONS)
2675
            xmm0 = _mm_min_epu16(xmm0, xmm_MAX_INT16);
2676
            xmm1 = _mm_min_epu16(xmm1, xmm_MAX_INT16);
2677
#else
2678
0
            xmm0 = _mm_add_epi16(xmm0, xmm_UINT16_to_INT16);
2679
0
            xmm1 = _mm_add_epi16(xmm1, xmm_UINT16_to_INT16);
2680
0
            xmm0 = _mm_min_epi16(xmm0, xmm_32767_shifted);
2681
0
            xmm1 = _mm_min_epi16(xmm1, xmm_32767_shifted);
2682
0
            xmm0 = _mm_sub_epi16(xmm0, xmm_UINT16_to_INT16);
2683
0
            xmm1 = _mm_sub_epi16(xmm1, xmm_UINT16_to_INT16);
2684
0
#endif
2685
0
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n), xmm0);
2686
0
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n + 8),
2687
0
                             xmm1);
2688
0
        }
2689
0
        for (; n < nWordCount; n++)
2690
0
        {
2691
0
            pDstData[n] = pSrcData[n] >= 32767
2692
0
                              ? 32767
2693
0
                              : static_cast<int16_t>(pSrcData[n]);
2694
0
        }
2695
0
    }
2696
0
    else
2697
0
    {
2698
0
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2699
0
                              nDstPixelStride, nWordCount);
2700
0
    }
2701
0
}
2702
2703
template <>
2704
CPL_NOINLINE void GDALCopyWordsT(const int16_t *const CPL_RESTRICT pSrcData,
2705
                                 int nSrcPixelStride,
2706
                                 uint16_t *const CPL_RESTRICT pDstData,
2707
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
2708
0
{
2709
0
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2710
0
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2711
0
    {
2712
0
        decltype(nWordCount) n = 0;
2713
0
        const __m128i xmm_zero = _mm_setzero_si128();
2714
0
        for (; n < nWordCount - 15; n += 16)
2715
0
        {
2716
0
            __m128i xmm0 = _mm_loadu_si128(
2717
0
                reinterpret_cast<const __m128i *>(pSrcData + n));
2718
0
            __m128i xmm1 = _mm_loadu_si128(
2719
0
                reinterpret_cast<const __m128i *>(pSrcData + n + 8));
2720
0
            xmm0 = _mm_max_epi16(xmm0, xmm_zero);
2721
0
            xmm1 = _mm_max_epi16(xmm1, xmm_zero);
2722
0
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n), xmm0);
2723
0
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n + 8),
2724
0
                             xmm1);
2725
0
        }
2726
0
        for (; n < nWordCount; n++)
2727
0
        {
2728
0
            pDstData[n] =
2729
0
                pSrcData[n] < 0 ? 0 : static_cast<uint16_t>(pSrcData[n]);
2730
0
        }
2731
0
    }
2732
0
    else
2733
0
    {
2734
0
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2735
0
                              nDstPixelStride, nWordCount);
2736
0
    }
2737
0
}
2738
2739
#if defined(__SSE4_1__) || defined(USE_NEON_OPTIMIZATIONS)
2740
2741
template <>
2742
CPL_NOINLINE void GDALCopyWordsT(const uint32_t *const CPL_RESTRICT pSrcData,
2743
                                 int nSrcPixelStride,
2744
                                 int32_t *const CPL_RESTRICT pDstData,
2745
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
2746
{
2747
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2748
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2749
    {
2750
        decltype(nWordCount) n = 0;
2751
        const __m128i xmm_MAX_INT = _mm_set1_epi32(INT_MAX);
2752
        for (; n < nWordCount - 8; n += 7)
2753
        {
2754
            __m128i xmm0 = _mm_loadu_si128(
2755
                reinterpret_cast<const __m128i *>(pSrcData + n));
2756
            __m128i xmm1 = _mm_loadu_si128(
2757
                reinterpret_cast<const __m128i *>(pSrcData + n + 4));
2758
            xmm0 = _mm_min_epu32(xmm0, xmm_MAX_INT);
2759
            xmm1 = _mm_min_epu32(xmm1, xmm_MAX_INT);
2760
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n), xmm0);
2761
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n + 4),
2762
                             xmm1);
2763
        }
2764
        for (; n < nWordCount; n++)
2765
        {
2766
            pDstData[n] = pSrcData[n] >= INT_MAX
2767
                              ? INT_MAX
2768
                              : static_cast<int32_t>(pSrcData[n]);
2769
        }
2770
    }
2771
    else
2772
    {
2773
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2774
                              nDstPixelStride, nWordCount);
2775
    }
2776
}
2777
2778
template <>
2779
CPL_NOINLINE void GDALCopyWordsT(const int32_t *const CPL_RESTRICT pSrcData,
2780
                                 int nSrcPixelStride,
2781
                                 uint32_t *const CPL_RESTRICT pDstData,
2782
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
2783
{
2784
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2785
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2786
    {
2787
        decltype(nWordCount) n = 0;
2788
        const __m128i xmm_zero = _mm_setzero_si128();
2789
        for (; n < nWordCount - 7; n += 8)
2790
        {
2791
            __m128i xmm0 = _mm_loadu_si128(
2792
                reinterpret_cast<const __m128i *>(pSrcData + n));
2793
            __m128i xmm1 = _mm_loadu_si128(
2794
                reinterpret_cast<const __m128i *>(pSrcData + n + 4));
2795
            xmm0 = _mm_max_epi32(xmm0, xmm_zero);
2796
            xmm1 = _mm_max_epi32(xmm1, xmm_zero);
2797
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n), xmm0);
2798
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n + 4),
2799
                             xmm1);
2800
        }
2801
        for (; n < nWordCount; n++)
2802
        {
2803
            pDstData[n] =
2804
                pSrcData[n] < 0 ? 0 : static_cast<uint32_t>(pSrcData[n]);
2805
        }
2806
    }
2807
    else
2808
    {
2809
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2810
                              nDstPixelStride, nWordCount);
2811
    }
2812
}
2813
2814
#endif  // defined(__SSE4_1__) || defined(USE_NEON_OPTIMIZATIONS)
2815
2816
template <>
2817
CPL_NOINLINE void GDALCopyWordsT(const uint16_t *const CPL_RESTRICT pSrcData,
2818
                                 int nSrcPixelStride,
2819
                                 float *const CPL_RESTRICT pDstData,
2820
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
2821
0
{
2822
0
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2823
0
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2824
0
    {
2825
0
        decltype(nWordCount) n = 0;
2826
0
        const __m128i xmm_zero = _mm_setzero_si128();
2827
0
        GByte *CPL_RESTRICT pabyDstDataPtr =
2828
0
            reinterpret_cast<GByte *>(pDstData);
2829
0
        for (; n < nWordCount - 7; n += 8)
2830
0
        {
2831
0
            __m128i xmm = _mm_loadu_si128(
2832
0
                reinterpret_cast<const __m128i *>(pSrcData + n));
2833
0
            __m128i xmm0 = _mm_unpacklo_epi16(xmm, xmm_zero);
2834
0
            __m128i xmm1 = _mm_unpackhi_epi16(xmm, xmm_zero);
2835
0
            __m128 xmm0_f = _mm_cvtepi32_ps(xmm0);
2836
0
            __m128 xmm1_f = _mm_cvtepi32_ps(xmm1);
2837
0
            _mm_storeu_ps(reinterpret_cast<float *>(pabyDstDataPtr + n * 4),
2838
0
                          xmm0_f);
2839
0
            _mm_storeu_ps(
2840
0
                reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 16), xmm1_f);
2841
0
        }
2842
0
        for (; n < nWordCount; n++)
2843
0
        {
2844
0
            pDstData[n] = pSrcData[n];
2845
0
        }
2846
0
    }
2847
0
    else
2848
0
    {
2849
0
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2850
0
                              nDstPixelStride, nWordCount);
2851
0
    }
2852
0
}
2853
2854
template <>
2855
CPL_NOINLINE void GDALCopyWordsT(const int16_t *const CPL_RESTRICT pSrcData,
2856
                                 int nSrcPixelStride,
2857
                                 float *const CPL_RESTRICT pDstData,
2858
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
2859
0
{
2860
0
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2861
0
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2862
0
    {
2863
0
        decltype(nWordCount) n = 0;
2864
0
        GByte *CPL_RESTRICT pabyDstDataPtr =
2865
0
            reinterpret_cast<GByte *>(pDstData);
2866
0
        for (; n < nWordCount - 7; n += 8)
2867
0
        {
2868
0
            __m128i xmm = _mm_loadu_si128(
2869
0
                reinterpret_cast<const __m128i *>(pSrcData + n));
2870
0
            const auto sign = _mm_srai_epi16(xmm, 15);
2871
0
            __m128i xmm0 = _mm_unpacklo_epi16(xmm, sign);
2872
0
            __m128i xmm1 = _mm_unpackhi_epi16(xmm, sign);
2873
0
            __m128 xmm0_f = _mm_cvtepi32_ps(xmm0);
2874
0
            __m128 xmm1_f = _mm_cvtepi32_ps(xmm1);
2875
0
            _mm_storeu_ps(reinterpret_cast<float *>(pabyDstDataPtr + n * 4),
2876
0
                          xmm0_f);
2877
0
            _mm_storeu_ps(
2878
0
                reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 16), xmm1_f);
2879
0
        }
2880
0
        for (; n < nWordCount; n++)
2881
0
        {
2882
0
            pDstData[n] = pSrcData[n];
2883
0
        }
2884
0
    }
2885
0
    else
2886
0
    {
2887
0
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2888
0
                              nDstPixelStride, nWordCount);
2889
0
    }
2890
0
}
2891
2892
template <>
2893
CPL_NOINLINE void GDALCopyWordsT(const uint16_t *const CPL_RESTRICT pSrcData,
2894
                                 int nSrcPixelStride,
2895
                                 double *const CPL_RESTRICT pDstData,
2896
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
2897
0
{
2898
0
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2899
0
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2900
0
    {
2901
0
        decltype(nWordCount) n = 0;
2902
0
        const __m128i xmm_zero = _mm_setzero_si128();
2903
0
        GByte *CPL_RESTRICT pabyDstDataPtr =
2904
0
            reinterpret_cast<GByte *>(pDstData);
2905
0
        for (; n < nWordCount - 7; n += 8)
2906
0
        {
2907
0
            __m128i xmm = _mm_loadu_si128(
2908
0
                reinterpret_cast<const __m128i *>(pSrcData + n));
2909
0
            __m128i xmm0 = _mm_unpacklo_epi16(xmm, xmm_zero);
2910
0
            __m128i xmm1 = _mm_unpackhi_epi16(xmm, xmm_zero);
2911
2912
0
            __m128d xmm0_low_d = _mm_cvtepi32_pd(xmm0);
2913
0
            __m128d xmm1_low_d = _mm_cvtepi32_pd(xmm1);
2914
0
            xmm0 = _mm_srli_si128(xmm0, 8);
2915
0
            xmm1 = _mm_srli_si128(xmm1, 8);
2916
0
            __m128d xmm0_high_d = _mm_cvtepi32_pd(xmm0);
2917
0
            __m128d xmm1_high_d = _mm_cvtepi32_pd(xmm1);
2918
2919
0
            _mm_storeu_pd(reinterpret_cast<double *>(pabyDstDataPtr + n * 8),
2920
0
                          xmm0_low_d);
2921
0
            _mm_storeu_pd(
2922
0
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 16),
2923
0
                xmm0_high_d);
2924
0
            _mm_storeu_pd(
2925
0
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 32),
2926
0
                xmm1_low_d);
2927
0
            _mm_storeu_pd(
2928
0
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 48),
2929
0
                xmm1_high_d);
2930
0
        }
2931
0
        for (; n < nWordCount; n++)
2932
0
        {
2933
0
            pDstData[n] = pSrcData[n];
2934
0
        }
2935
0
    }
2936
0
    else
2937
0
    {
2938
0
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2939
0
                              nDstPixelStride, nWordCount);
2940
0
    }
2941
0
}
2942
2943
template <>
2944
CPL_NOINLINE void GDALCopyWordsT(const int16_t *const CPL_RESTRICT pSrcData,
2945
                                 int nSrcPixelStride,
2946
                                 double *const CPL_RESTRICT pDstData,
2947
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
2948
0
{
2949
0
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2950
0
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2951
0
    {
2952
0
        decltype(nWordCount) n = 0;
2953
0
        GByte *CPL_RESTRICT pabyDstDataPtr =
2954
0
            reinterpret_cast<GByte *>(pDstData);
2955
0
        for (; n < nWordCount - 7; n += 8)
2956
0
        {
2957
0
            __m128i xmm = _mm_loadu_si128(
2958
0
                reinterpret_cast<const __m128i *>(pSrcData + n));
2959
0
            const auto sign = _mm_srai_epi16(xmm, 15);
2960
0
            __m128i xmm0 = _mm_unpacklo_epi16(xmm, sign);
2961
0
            __m128i xmm1 = _mm_unpackhi_epi16(xmm, sign);
2962
2963
0
            __m128d xmm0_low_d = _mm_cvtepi32_pd(xmm0);
2964
0
            __m128d xmm1_low_d = _mm_cvtepi32_pd(xmm1);
2965
0
            xmm0 = _mm_srli_si128(xmm0, 8);
2966
0
            xmm1 = _mm_srli_si128(xmm1, 8);
2967
0
            __m128d xmm0_high_d = _mm_cvtepi32_pd(xmm0);
2968
0
            __m128d xmm1_high_d = _mm_cvtepi32_pd(xmm1);
2969
2970
0
            _mm_storeu_pd(reinterpret_cast<double *>(pabyDstDataPtr + n * 8),
2971
0
                          xmm0_low_d);
2972
0
            _mm_storeu_pd(
2973
0
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 16),
2974
0
                xmm0_high_d);
2975
0
            _mm_storeu_pd(
2976
0
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 32),
2977
0
                xmm1_low_d);
2978
0
            _mm_storeu_pd(
2979
0
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 48),
2980
0
                xmm1_high_d);
2981
0
        }
2982
0
        for (; n < nWordCount; n++)
2983
0
        {
2984
0
            pDstData[n] = pSrcData[n];
2985
0
        }
2986
0
    }
2987
0
    else
2988
0
    {
2989
0
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2990
0
                              nDstPixelStride, nWordCount);
2991
0
    }
2992
0
}
2993
2994
// ---- AVX2 helpers for int32 narrowing (runtime dispatch) ----
2995
2996
#if defined(HAVE_AVX2_DISPATCH)
2997
#if !defined(_MSC_VER)
2998
__attribute__((target("avx2")))
2999
#endif
3000
static void GDALCopyWordsInt32ToUInt8_AVX2(const int32_t *CPL_RESTRICT pSrc,
3001
                                           uint8_t *CPL_RESTRICT pDst,
3002
                                           GPtrDiff_t nWordCount)
3003
0
{
3004
0
    const __m256i permuteIdx = _mm256_setr_epi32(0, 4, 1, 5, 2, 6, 3, 7);
3005
0
    GPtrDiff_t n = 0;
3006
0
    for (; n < nWordCount - 31; n += 32)
3007
0
    {
3008
0
        __m256i v0 =
3009
0
            _mm256_loadu_si256(reinterpret_cast<const __m256i *>(pSrc + n));
3010
0
        __m256i v1 =
3011
0
            _mm256_loadu_si256(reinterpret_cast<const __m256i *>(pSrc + n + 8));
3012
0
        __m256i v2 = _mm256_loadu_si256(
3013
0
            reinterpret_cast<const __m256i *>(pSrc + n + 16));
3014
0
        __m256i v3 = _mm256_loadu_si256(
3015
0
            reinterpret_cast<const __m256i *>(pSrc + n + 24));
3016
        // Clamp to [0, 255]
3017
        // Pack int32 -> int16 -> uint8, then fix cross-lane ordering
3018
0
        __m256i ab16 = _mm256_packs_epi32(v0, v1);
3019
0
        __m256i cd16 = _mm256_packs_epi32(v2, v3);
3020
0
        __m256i bytes = _mm256_packus_epi16(ab16, cd16);
3021
0
        bytes = _mm256_permutevar8x32_epi32(bytes, permuteIdx);
3022
0
        _mm256_storeu_si256(reinterpret_cast<__m256i *>(pDst + n), bytes);
3023
0
    }
3024
0
    for (; n < nWordCount; n++)
3025
0
    {
3026
0
        pDst[n] = pSrc[n] <= 0     ? 0
3027
0
                  : pSrc[n] >= 255 ? 255
3028
0
                                   : static_cast<uint8_t>(pSrc[n]);
3029
0
    }
3030
0
}
3031
3032
#if !defined(_MSC_VER)
3033
__attribute__((target("avx2")))
3034
#endif
3035
static void GDALCopyWordsInt32ToUInt16_AVX2(const int32_t *CPL_RESTRICT pSrc,
3036
                                            uint16_t *CPL_RESTRICT pDst,
3037
                                            GPtrDiff_t nWordCount)
3038
0
{
3039
    // _mm256_packus_epi32(v0, v1) produces per-lane interleaved result:
3040
    //   [v0_lo4, v1_lo4, v0_hi4, v1_hi4] (in uint16 pairs per 32-bit lane)
3041
    // Permute to deinterleave: all v0 values first, then all v1 values
3042
0
    const __m256i permuteIdx = _mm256_setr_epi32(0, 1, 4, 5, 2, 3, 6, 7);
3043
0
    GPtrDiff_t n = 0;
3044
0
    for (; n < nWordCount - 15; n += 16)
3045
0
    {
3046
0
        __m256i v0 =
3047
0
            _mm256_loadu_si256(reinterpret_cast<const __m256i *>(pSrc + n));
3048
0
        __m256i v1 =
3049
0
            _mm256_loadu_si256(reinterpret_cast<const __m256i *>(pSrc + n + 8));
3050
        // Clamp to [0, 65535]: _mm256_packus_epi32 saturates uint
3051
0
        __m256i packed = _mm256_packus_epi32(v0, v1);
3052
        // Fix cross-lane interleave from packus
3053
0
        packed = _mm256_permutevar8x32_epi32(packed, permuteIdx);
3054
0
        _mm256_storeu_si256(reinterpret_cast<__m256i *>(pDst + n), packed);
3055
0
    }
3056
0
    for (; n < nWordCount; n++)
3057
0
    {
3058
0
        pDst[n] = pSrc[n] <= 0       ? 0
3059
0
                  : pSrc[n] >= 65535 ? 65535
3060
0
                                     : static_cast<uint16_t>(pSrc[n]);
3061
0
    }
3062
0
}
3063
#endif  // HAVE_AVX2_DISPATCH
3064
3065
// ---- int32 -> uint8 with clamping to [0, 255] ----
3066
template <>
3067
CPL_NOINLINE void GDALCopyWordsT(const int32_t *const CPL_RESTRICT pSrcData,
3068
                                 int nSrcPixelStride,
3069
                                 uint8_t *const CPL_RESTRICT pDstData,
3070
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
3071
0
{
3072
0
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
3073
0
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
3074
0
    {
3075
0
#if defined(HAVE_AVX2_DISPATCH)
3076
0
        if (CPLHaveRuntimeAVX2())
3077
0
        {
3078
0
            GDALCopyWordsInt32ToUInt8_AVX2(pSrcData, pDstData, nWordCount);
3079
0
            return;
3080
0
        }
3081
0
#endif
3082
0
#ifdef HAVE_SSE2
3083
        // SSE2 path: 16 pixels per iteration
3084
0
        decltype(nWordCount) n = 0;
3085
0
        for (; n < nWordCount - 15; n += 16)
3086
0
        {
3087
0
            __m128i v0 = _mm_loadu_si128(
3088
0
                reinterpret_cast<const __m128i *>(pSrcData + n));
3089
0
            __m128i v1 = _mm_loadu_si128(
3090
0
                reinterpret_cast<const __m128i *>(pSrcData + n + 4));
3091
0
            __m128i v2 = _mm_loadu_si128(
3092
0
                reinterpret_cast<const __m128i *>(pSrcData + n + 8));
3093
0
            __m128i v3 = _mm_loadu_si128(
3094
0
                reinterpret_cast<const __m128i *>(pSrcData + n + 12));
3095
            // Values in [0, 255]: pack int32->int16->uint8
3096
0
            __m128i lo16 = _mm_packs_epi32(v0, v1);
3097
0
            __m128i hi16 = _mm_packs_epi32(v2, v3);
3098
0
            __m128i bytes = _mm_packus_epi16(lo16, hi16);
3099
0
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n), bytes);
3100
0
        }
3101
0
        for (; n < nWordCount; n++)
3102
#else
3103
        for (decltype(nWordCount) n = 0; n < nWordCount; n++)
3104
#endif
3105
0
        {
3106
0
            pDstData[n] = pSrcData[n] <= 0 ? 0
3107
0
                          : pSrcData[n] >= 255
3108
0
                              ? 255
3109
0
                              : static_cast<uint8_t>(pSrcData[n]);
3110
0
        }
3111
0
    }
3112
0
    else
3113
0
    {
3114
0
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
3115
0
                              nDstPixelStride, nWordCount);
3116
0
    }
3117
0
}
3118
3119
// ---- int32 -> uint16 with clamping to [0, 65535] ----
3120
template <>
3121
CPL_NOINLINE void GDALCopyWordsT(const int32_t *const CPL_RESTRICT pSrcData,
3122
                                 int nSrcPixelStride,
3123
                                 uint16_t *const CPL_RESTRICT pDstData,
3124
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
3125
0
{
3126
0
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
3127
0
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
3128
0
    {
3129
0
#if defined(HAVE_AVX2_DISPATCH)
3130
0
        if (CPLHaveRuntimeAVX2())
3131
0
        {
3132
0
            GDALCopyWordsInt32ToUInt16_AVX2(pSrcData, pDstData, nWordCount);
3133
0
            return;
3134
0
        }
3135
0
#endif
3136
0
        decltype(nWordCount) n = 0;
3137
#if defined(__SSE4_1__) || defined(USE_NEON_OPTIMIZATIONS)
3138
        // SSE4.1: _mm_packus_epi32 directly handles uint saturation
3139
        for (; n < nWordCount - 7; n += 8)
3140
        {
3141
            __m128i v0 = _mm_loadu_si128(
3142
                reinterpret_cast<const __m128i *>(pSrcData + n));
3143
            __m128i v1 = _mm_loadu_si128(
3144
                reinterpret_cast<const __m128i *>(pSrcData + n + 4));
3145
            __m128i packed = _mm_packus_epi32(v0, v1);
3146
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n), packed);
3147
        }
3148
#else
3149
        // SSE2: clamp to [0, 65535], bias to signed range, pack, unbias
3150
0
        const __m128i xmm_65535 = _mm_set1_epi32(65535);
3151
0
        const __m128i xmm_bias32 = _mm_set1_epi32(32768);
3152
0
        const __m128i xmm_bias16 = _mm_set1_epi16(-32768);
3153
0
        for (; n < nWordCount - 7; n += 8)
3154
0
        {
3155
0
            __m128i v0 = _mm_loadu_si128(
3156
0
                reinterpret_cast<const __m128i *>(pSrcData + n));
3157
0
            __m128i v1 = _mm_loadu_si128(
3158
0
                reinterpret_cast<const __m128i *>(pSrcData + n + 4));
3159
            // max(v, 0)
3160
0
            v0 = _mm_andnot_si128(_mm_srai_epi32(v0, 31), v0);
3161
0
            v1 = _mm_andnot_si128(_mm_srai_epi32(v1, 31), v1);
3162
            // min(v, 65535)
3163
0
            __m128i gt0 = _mm_cmpgt_epi32(v0, xmm_65535);
3164
0
            __m128i gt1 = _mm_cmpgt_epi32(v1, xmm_65535);
3165
0
            v0 = _mm_or_si128(_mm_andnot_si128(gt0, v0),
3166
0
                              _mm_and_si128(gt0, xmm_65535));
3167
0
            v1 = _mm_or_si128(_mm_andnot_si128(gt1, v1),
3168
0
                              _mm_and_si128(gt1, xmm_65535));
3169
            // Shift [0, 65535] -> [-32768, 32767] for _mm_packs_epi32
3170
0
            v0 = _mm_sub_epi32(v0, xmm_bias32);
3171
0
            v1 = _mm_sub_epi32(v1, xmm_bias32);
3172
0
            __m128i packed = _mm_packs_epi32(v0, v1);
3173
            // Shift back: sub_epi16(x, -32768) == add 32768 (mod 2^16)
3174
0
            packed = _mm_sub_epi16(packed, xmm_bias16);
3175
0
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n), packed);
3176
0
        }
3177
0
#endif
3178
0
        for (; n < nWordCount; n++)
3179
0
        {
3180
0
            pDstData[n] = pSrcData[n] <= 0 ? 0
3181
0
                          : pSrcData[n] >= 65535
3182
0
                              ? 65535
3183
0
                              : static_cast<uint16_t>(pSrcData[n]);
3184
0
        }
3185
0
    }
3186
0
    else
3187
0
    {
3188
0
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
3189
0
                              nDstPixelStride, nWordCount);
3190
0
    }
3191
0
}
3192
3193
#endif  // HAVE_SSE2
3194
3195
template <>
3196
CPL_NOINLINE void GDALCopyWordsT(const double *const CPL_RESTRICT pSrcData,
3197
                                 int nSrcPixelStride,
3198
                                 GByte *const CPL_RESTRICT pDstData,
3199
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
3200
0
{
3201
0
    GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
3202
0
                            nDstPixelStride, nWordCount);
3203
0
}
3204
3205
template <>
3206
CPL_NOINLINE void GDALCopyWordsT(const double *const CPL_RESTRICT pSrcData,
3207
                                 int nSrcPixelStride,
3208
                                 GUInt16 *const CPL_RESTRICT pDstData,
3209
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
3210
0
{
3211
0
    GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
3212
0
                            nDstPixelStride, nWordCount);
3213
0
}
3214
3215
template <>
3216
CPL_NOINLINE void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData,
3217
                                 int nSrcPixelStride,
3218
                                 double *const CPL_RESTRICT pDstData,
3219
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
3220
0
{
3221
0
    GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
3222
0
                            nDstPixelStride, nWordCount);
3223
0
}
3224
3225
template <>
3226
CPL_NOINLINE void GDALCopyWordsT(const double *const CPL_RESTRICT pSrcData,
3227
                                 int nSrcPixelStride,
3228
                                 float *const CPL_RESTRICT pDstData,
3229
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
3230
0
{
3231
0
    GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
3232
0
                            nDstPixelStride, nWordCount);
3233
0
}
3234
3235
template <>
3236
CPL_NOINLINE void GDALCopyWordsT(const GFloat16 *const CPL_RESTRICT pSrcData,
3237
                                 int nSrcPixelStride,
3238
                                 float *const CPL_RESTRICT pDstData,
3239
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
3240
0
{
3241
0
    GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
3242
0
                            nDstPixelStride, nWordCount);
3243
0
}
3244
3245
template <>
3246
CPL_NOINLINE void GDALCopyWordsT(const GFloat16 *const CPL_RESTRICT pSrcData,
3247
                                 int nSrcPixelStride,
3248
                                 double *const CPL_RESTRICT pDstData,
3249
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
3250
0
{
3251
0
    GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
3252
0
                            nDstPixelStride, nWordCount);
3253
0
}
3254
3255
template <>
3256
CPL_NOINLINE void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData,
3257
                                 int nSrcPixelStride,
3258
                                 GByte *const CPL_RESTRICT pDstData,
3259
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
3260
0
{
3261
0
    GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
3262
0
                            nDstPixelStride, nWordCount);
3263
0
}
3264
3265
template <>
3266
CPL_NOINLINE void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData,
3267
                                 int nSrcPixelStride,
3268
                                 GInt8 *const CPL_RESTRICT pDstData,
3269
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
3270
0
{
3271
0
    GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
3272
0
                            nDstPixelStride, nWordCount);
3273
0
}
3274
3275
template <>
3276
CPL_NOINLINE void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData,
3277
                                 int nSrcPixelStride,
3278
                                 GInt16 *const CPL_RESTRICT pDstData,
3279
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
3280
0
{
3281
0
    GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
3282
0
                            nDstPixelStride, nWordCount);
3283
0
}
3284
3285
template <>
3286
CPL_NOINLINE void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData,
3287
                                 int nSrcPixelStride,
3288
                                 GUInt16 *const CPL_RESTRICT pDstData,
3289
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
3290
0
{
3291
0
    GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
3292
0
                            nDstPixelStride, nWordCount);
3293
0
}
3294
3295
template <>
3296
CPL_NOINLINE void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData,
3297
                                 int nSrcPixelStride,
3298
                                 GInt32 *const CPL_RESTRICT pDstData,
3299
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
3300
0
{
3301
0
    GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
3302
0
                            nDstPixelStride, nWordCount);
3303
0
}
3304
3305
template <>
3306
CPL_NOINLINE void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData,
3307
                                 int nSrcPixelStride,
3308
                                 GFloat16 *const CPL_RESTRICT pDstData,
3309
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
3310
0
{
3311
0
    GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
3312
0
                            nDstPixelStride, nWordCount);
3313
0
}
3314
3315
template <>
3316
CPL_NOINLINE void GDALCopyWordsT(const double *const CPL_RESTRICT pSrcData,
3317
                                 int nSrcPixelStride,
3318
                                 GFloat16 *const CPL_RESTRICT pDstData,
3319
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
3320
0
{
3321
0
    GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
3322
0
                            nDstPixelStride, nWordCount);
3323
0
}
3324
3325
/************************************************************************/
3326
/*                       GDALCopyWordsComplexT()                        */
3327
/************************************************************************/
3328
/**
3329
 * Template function, used to copy data from pSrcData into buffer
3330
 * pDstData, with stride nSrcPixelStride in the source data and
3331
 * stride nDstPixelStride in the destination data. Deals with the
3332
 * complex case, where input is complex and output is complex.
3333
 *
3334
 * @param pSrcData the source data buffer
3335
 * @param nSrcPixelStride the stride, in the buffer pSrcData for pixels
3336
 *                      of interest.
3337
 * @param pDstData the destination buffer.
3338
 * @param nDstPixelStride the stride in the buffer pDstData for pixels of
3339
 *                      interest.
3340
 * @param nWordCount the total number of pixel words to copy
3341
 *
3342
 */
3343
template <class Tin, class Tout>
3344
inline void GDALCopyWordsComplexT(const Tin *const CPL_RESTRICT pSrcData,
3345
                                  int nSrcPixelStride,
3346
                                  Tout *const CPL_RESTRICT pDstData,
3347
                                  int nDstPixelStride, GPtrDiff_t nWordCount)
3348
0
{
3349
0
    decltype(nWordCount) nDstOffset = 0;
3350
0
    const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData);
3351
0
    char *const pDstDataPtr = reinterpret_cast<char *>(pDstData);
3352
3353
0
    for (decltype(nWordCount) n = 0; n < nWordCount; n++)
3354
0
    {
3355
0
        const Tin *const pPixelIn =
3356
0
            reinterpret_cast<const Tin *>(pSrcDataPtr + n * nSrcPixelStride);
3357
0
        Tout *const pPixelOut =
3358
0
            reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
3359
3360
0
        GDALCopyWord(pPixelIn[0], pPixelOut[0]);
3361
0
        GDALCopyWord(pPixelIn[1], pPixelOut[1]);
3362
3363
0
        nDstOffset += nDstPixelStride;
3364
0
    }
3365
0
}
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned char, short>(unsigned char const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned char, int>(unsigned char const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned char, cpl::Float16>(unsigned char const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned char, float>(unsigned char const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned char, double>(unsigned char const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<signed char, short>(signed char const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<signed char, int>(signed char const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<signed char, cpl::Float16>(signed char const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<signed char, float>(signed char const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<signed char, double>(signed char const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned short, short>(unsigned short const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned short, int>(unsigned short const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned short, cpl::Float16>(unsigned short const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned short, float>(unsigned short const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned short, double>(unsigned short const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<short, short>(short const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<short, int>(short const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<short, cpl::Float16>(short const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<short, float>(short const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<short, double>(short const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned int, short>(unsigned int const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned int, int>(unsigned int const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned int, cpl::Float16>(unsigned int const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned int, float>(unsigned int const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned int, double>(unsigned int const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<int, short>(int const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<int, int>(int const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<int, cpl::Float16>(int const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<int, float>(int const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<int, double>(int const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned long, short>(unsigned long const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned long, int>(unsigned long const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned long, cpl::Float16>(unsigned long const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned long, float>(unsigned long const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned long, double>(unsigned long const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<long, short>(long const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<long, int>(long const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<long, cpl::Float16>(long const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<long, float>(long const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<long, double>(long const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<cpl::Float16, short>(cpl::Float16 const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<cpl::Float16, int>(cpl::Float16 const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<cpl::Float16, cpl::Float16>(cpl::Float16 const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<cpl::Float16, float>(cpl::Float16 const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<cpl::Float16, double>(cpl::Float16 const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<float, short>(float const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<float, int>(float const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<float, cpl::Float16>(float const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<float, float>(float const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<float, double>(float const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<double, short>(double const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<double, int>(double const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<double, cpl::Float16>(double const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<double, float>(double const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<double, double>(double const*, int, double*, int, long long)
3366
3367
/************************************************************************/
3368
/*                      GDALCopyWordsComplexOutT()                      */
3369
/************************************************************************/
3370
/**
3371
 * Template function, used to copy data from pSrcData into buffer
3372
 * pDstData, with stride nSrcPixelStride in the source data and
3373
 * stride nDstPixelStride in the destination data. Deals with the
3374
 * case where the value is real coming in, but complex going out.
3375
 *
3376
 * @param pSrcData the source data buffer
3377
 * @param nSrcPixelStride the stride, in the buffer pSrcData for pixels
3378
 *                      of interest, in bytes.
3379
 * @param pDstData the destination buffer.
3380
 * @param nDstPixelStride the stride in the buffer pDstData for pixels of
3381
 *                      interest, in bytes.
3382
 * @param nWordCount the total number of pixel words to copy
3383
 *
3384
 */
3385
template <class Tin, class Tout>
3386
inline void GDALCopyWordsComplexOutT(const Tin *const CPL_RESTRICT pSrcData,
3387
                                     int nSrcPixelStride,
3388
                                     Tout *const CPL_RESTRICT pDstData,
3389
                                     int nDstPixelStride, GPtrDiff_t nWordCount)
3390
0
{
3391
0
    decltype(nWordCount) nDstOffset = 0;
3392
3393
0
    const Tout tOutZero = static_cast<Tout>(0);
3394
3395
0
    const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData);
3396
0
    char *const pDstDataPtr = reinterpret_cast<char *>(pDstData);
3397
3398
0
    for (decltype(nWordCount) n = 0; n < nWordCount; n++)
3399
0
    {
3400
0
        const Tin tValue =
3401
0
            *reinterpret_cast<const Tin *>(pSrcDataPtr + n * nSrcPixelStride);
3402
0
        Tout *const pPixelOut =
3403
0
            reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
3404
0
        GDALCopyWord(tValue, *pPixelOut);
3405
3406
0
        pPixelOut[1] = tOutZero;
3407
3408
0
        nDstOffset += nDstPixelStride;
3409
0
    }
3410
0
}
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned char, short>(unsigned char const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned char, int>(unsigned char const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned char, cpl::Float16>(unsigned char const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned char, float>(unsigned char const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned char, double>(unsigned char const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<signed char, short>(signed char const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<signed char, int>(signed char const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<signed char, cpl::Float16>(signed char const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<signed char, float>(signed char const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<signed char, double>(signed char const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned short, short>(unsigned short const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned short, int>(unsigned short const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned short, cpl::Float16>(unsigned short const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned short, float>(unsigned short const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned short, double>(unsigned short const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<short, short>(short const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<short, int>(short const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<short, cpl::Float16>(short const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<short, float>(short const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<short, double>(short const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned int, short>(unsigned int const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned int, int>(unsigned int const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned int, cpl::Float16>(unsigned int const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned int, float>(unsigned int const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned int, double>(unsigned int const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<int, short>(int const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<int, int>(int const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<int, cpl::Float16>(int const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<int, float>(int const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<int, double>(int const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned long, short>(unsigned long const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned long, int>(unsigned long const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned long, cpl::Float16>(unsigned long const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned long, float>(unsigned long const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned long, double>(unsigned long const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<long, short>(long const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<long, int>(long const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<long, cpl::Float16>(long const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<long, float>(long const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<long, double>(long const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<cpl::Float16, short>(cpl::Float16 const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<cpl::Float16, int>(cpl::Float16 const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<cpl::Float16, cpl::Float16>(cpl::Float16 const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<cpl::Float16, float>(cpl::Float16 const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<cpl::Float16, double>(cpl::Float16 const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<float, short>(float const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<float, int>(float const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<float, cpl::Float16>(float const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<float, float>(float const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<float, double>(float const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<double, short>(double const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<double, int>(double const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<double, cpl::Float16>(double const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<double, float>(double const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<double, double>(double const*, int, double*, int, long long)
3411
3412
/************************************************************************/
3413
/*                         GDALCopyWordsFromT()                         */
3414
/************************************************************************/
3415
/**
3416
 * Template driver function. Given the input type T, call the appropriate
3417
 * GDALCopyWordsT function template for the desired output type. You should
3418
 * never call this function directly (call GDALCopyWords instead).
3419
 *
3420
 * @param pSrcData source data buffer
3421
 * @param nSrcPixelStride pixel stride in input buffer, in pixel words
3422
 * @param bInComplex input is complex
3423
 * @param pDstData destination data buffer
3424
 * @param eDstType destination data type
3425
 * @param nDstPixelStride pixel stride in output buffer, in pixel words
3426
 * @param nWordCount number of pixel words to be copied
3427
 */
3428
template <class T>
3429
inline void GDALCopyWordsFromT(const T *const CPL_RESTRICT pSrcData,
3430
                               int nSrcPixelStride, bool bInComplex,
3431
                               void *CPL_RESTRICT pDstData,
3432
                               GDALDataType eDstType, int nDstPixelStride,
3433
                               GPtrDiff_t nWordCount)
3434
0
{
3435
0
    switch (eDstType)
3436
0
    {
3437
0
        case GDT_UInt8:
3438
0
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
3439
0
                           static_cast<unsigned char *>(pDstData),
3440
0
                           nDstPixelStride, nWordCount);
3441
0
            break;
3442
0
        case GDT_Int8:
3443
0
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
3444
0
                           static_cast<signed char *>(pDstData),
3445
0
                           nDstPixelStride, nWordCount);
3446
0
            break;
3447
0
        case GDT_UInt16:
3448
0
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
3449
0
                           static_cast<unsigned short *>(pDstData),
3450
0
                           nDstPixelStride, nWordCount);
3451
0
            break;
3452
0
        case GDT_Int16:
3453
0
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
3454
0
                           static_cast<short *>(pDstData), nDstPixelStride,
3455
0
                           nWordCount);
3456
0
            break;
3457
0
        case GDT_UInt32:
3458
0
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
3459
0
                           static_cast<unsigned int *>(pDstData),
3460
0
                           nDstPixelStride, nWordCount);
3461
0
            break;
3462
0
        case GDT_Int32:
3463
0
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
3464
0
                           static_cast<int *>(pDstData), nDstPixelStride,
3465
0
                           nWordCount);
3466
0
            break;
3467
0
        case GDT_UInt64:
3468
0
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
3469
0
                           static_cast<std::uint64_t *>(pDstData),
3470
0
                           nDstPixelStride, nWordCount);
3471
0
            break;
3472
0
        case GDT_Int64:
3473
0
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
3474
0
                           static_cast<std::int64_t *>(pDstData),
3475
0
                           nDstPixelStride, nWordCount);
3476
0
            break;
3477
0
        case GDT_Float16:
3478
0
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
3479
0
                           static_cast<GFloat16 *>(pDstData), nDstPixelStride,
3480
0
                           nWordCount);
3481
0
            break;
3482
0
        case GDT_Float32:
3483
0
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
3484
0
                           static_cast<float *>(pDstData), nDstPixelStride,
3485
0
                           nWordCount);
3486
0
            break;
3487
0
        case GDT_Float64:
3488
0
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
3489
0
                           static_cast<double *>(pDstData), nDstPixelStride,
3490
0
                           nWordCount);
3491
0
            break;
3492
0
        case GDT_CInt16:
3493
0
            if (bInComplex)
3494
0
            {
3495
0
                GDALCopyWordsComplexT(pSrcData, nSrcPixelStride,
3496
0
                                      static_cast<short *>(pDstData),
3497
0
                                      nDstPixelStride, nWordCount);
3498
0
            }
3499
0
            else  // input is not complex, so we need to promote to a complex
3500
                  // buffer
3501
0
            {
3502
0
                GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride,
3503
0
                                         static_cast<short *>(pDstData),
3504
0
                                         nDstPixelStride, nWordCount);
3505
0
            }
3506
0
            break;
3507
0
        case GDT_CInt32:
3508
0
            if (bInComplex)
3509
0
            {
3510
0
                GDALCopyWordsComplexT(pSrcData, nSrcPixelStride,
3511
0
                                      static_cast<int *>(pDstData),
3512
0
                                      nDstPixelStride, nWordCount);
3513
0
            }
3514
0
            else  // input is not complex, so we need to promote to a complex
3515
                  // buffer
3516
0
            {
3517
0
                GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride,
3518
0
                                         static_cast<int *>(pDstData),
3519
0
                                         nDstPixelStride, nWordCount);
3520
0
            }
3521
0
            break;
3522
0
        case GDT_CFloat16:
3523
0
            if (bInComplex)
3524
0
            {
3525
0
                GDALCopyWordsComplexT(pSrcData, nSrcPixelStride,
3526
0
                                      static_cast<GFloat16 *>(pDstData),
3527
0
                                      nDstPixelStride, nWordCount);
3528
0
            }
3529
0
            else  // input is not complex, so we need to promote to a complex
3530
                  // buffer
3531
0
            {
3532
0
                GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride,
3533
0
                                         static_cast<GFloat16 *>(pDstData),
3534
0
                                         nDstPixelStride, nWordCount);
3535
0
            }
3536
0
            break;
3537
0
        case GDT_CFloat32:
3538
0
            if (bInComplex)
3539
0
            {
3540
0
                GDALCopyWordsComplexT(pSrcData, nSrcPixelStride,
3541
0
                                      static_cast<float *>(pDstData),
3542
0
                                      nDstPixelStride, nWordCount);
3543
0
            }
3544
0
            else  // input is not complex, so we need to promote to a complex
3545
                  // buffer
3546
0
            {
3547
0
                GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride,
3548
0
                                         static_cast<float *>(pDstData),
3549
0
                                         nDstPixelStride, nWordCount);
3550
0
            }
3551
0
            break;
3552
0
        case GDT_CFloat64:
3553
0
            if (bInComplex)
3554
0
            {
3555
0
                GDALCopyWordsComplexT(pSrcData, nSrcPixelStride,
3556
0
                                      static_cast<double *>(pDstData),
3557
0
                                      nDstPixelStride, nWordCount);
3558
0
            }
3559
0
            else  // input is not complex, so we need to promote to a complex
3560
                  // buffer
3561
0
            {
3562
0
                GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride,
3563
0
                                         static_cast<double *>(pDstData),
3564
0
                                         nDstPixelStride, nWordCount);
3565
0
            }
3566
0
            break;
3567
0
        case GDT_Unknown:
3568
0
        case GDT_TypeCount:
3569
0
            CPLAssert(false);
3570
0
    }
3571
0
}
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<unsigned char>(unsigned char const*, int, bool, void*, GDALDataType, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<signed char>(signed char const*, int, bool, void*, GDALDataType, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<unsigned short>(unsigned short const*, int, bool, void*, GDALDataType, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<short>(short const*, int, bool, void*, GDALDataType, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<unsigned int>(unsigned int const*, int, bool, void*, GDALDataType, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<int>(int const*, int, bool, void*, GDALDataType, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<unsigned long>(unsigned long const*, int, bool, void*, GDALDataType, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<long>(long const*, int, bool, void*, GDALDataType, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<cpl::Float16>(cpl::Float16 const*, int, bool, void*, GDALDataType, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<float>(float const*, int, bool, void*, GDALDataType, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<double>(double const*, int, bool, void*, GDALDataType, int, long long)
3572
3573
}  // end anonymous namespace
3574
3575
/************************************************************************/
3576
/*                         GDALReplicateWord()                          */
3577
/************************************************************************/
3578
3579
template <class T>
3580
inline void GDALReplicateWordT(void *pDstData, int nDstPixelStride,
3581
                               GPtrDiff_t nWordCount)
3582
0
{
3583
0
    const T valSet = *static_cast<const T *>(pDstData);
3584
0
    if (nDstPixelStride == static_cast<int>(sizeof(T)))
3585
0
    {
3586
0
        T *pDstPtr = static_cast<T *>(pDstData) + 1;
3587
0
        while (nWordCount >= 4)
3588
0
        {
3589
0
            nWordCount -= 4;
3590
0
            pDstPtr[0] = valSet;
3591
0
            pDstPtr[1] = valSet;
3592
0
            pDstPtr[2] = valSet;
3593
0
            pDstPtr[3] = valSet;
3594
0
            pDstPtr += 4;
3595
0
        }
3596
0
        while (nWordCount > 0)
3597
0
        {
3598
0
            --nWordCount;
3599
0
            *pDstPtr = valSet;
3600
0
            pDstPtr++;
3601
0
        }
3602
0
    }
3603
0
    else
3604
0
    {
3605
0
        GByte *pabyDstPtr = static_cast<GByte *>(pDstData) + nDstPixelStride;
3606
0
        while (nWordCount > 0)
3607
0
        {
3608
0
            --nWordCount;
3609
0
            *reinterpret_cast<T *>(pabyDstPtr) = valSet;
3610
0
            pabyDstPtr += nDstPixelStride;
3611
0
        }
3612
0
    }
3613
0
}
Unexecuted instantiation: void GDALReplicateWordT<unsigned short>(void*, int, long long)
Unexecuted instantiation: void GDALReplicateWordT<short>(void*, int, long long)
Unexecuted instantiation: void GDALReplicateWordT<unsigned int>(void*, int, long long)
Unexecuted instantiation: void GDALReplicateWordT<int>(void*, int, long long)
Unexecuted instantiation: void GDALReplicateWordT<unsigned long>(void*, int, long long)
Unexecuted instantiation: void GDALReplicateWordT<long>(void*, int, long long)
Unexecuted instantiation: void GDALReplicateWordT<cpl::Float16>(void*, int, long long)
Unexecuted instantiation: void GDALReplicateWordT<float>(void*, int, long long)
Unexecuted instantiation: void GDALReplicateWordT<double>(void*, int, long long)
3614
3615
static void GDALReplicateWord(const void *CPL_RESTRICT pSrcData,
3616
                              GDALDataType eSrcType,
3617
                              void *CPL_RESTRICT pDstData,
3618
                              GDALDataType eDstType, int nDstPixelStride,
3619
                              GPtrDiff_t nWordCount)
3620
0
{
3621
    /* -----------------------------------------------------------------------
3622
     */
3623
    /* Special case when the source data is always the same value */
3624
    /* (for VRTSourcedRasterBand::IRasterIO and
3625
     * VRTDerivedRasterBand::IRasterIO*/
3626
    /*  for example) */
3627
    /* -----------------------------------------------------------------------
3628
     */
3629
    // Let the general translation case do the necessary conversions
3630
    // on the first destination element.
3631
0
    GDALCopyWords64(pSrcData, eSrcType, 0, pDstData, eDstType, 0, 1);
3632
3633
    // Now copy the first element to the nWordCount - 1 following destination
3634
    // elements.
3635
0
    nWordCount--;
3636
0
    GByte *pabyDstWord = reinterpret_cast<GByte *>(pDstData) + nDstPixelStride;
3637
3638
0
    switch (eDstType)
3639
0
    {
3640
0
        case GDT_UInt8:
3641
0
        case GDT_Int8:
3642
0
        {
3643
0
            if (nDstPixelStride == 1)
3644
0
            {
3645
0
                if (nWordCount > 0)
3646
0
                    memset(pabyDstWord,
3647
0
                           *reinterpret_cast<const GByte *>(pDstData),
3648
0
                           nWordCount);
3649
0
            }
3650
0
            else
3651
0
            {
3652
0
                GByte valSet = *reinterpret_cast<const GByte *>(pDstData);
3653
0
                while (nWordCount > 0)
3654
0
                {
3655
0
                    --nWordCount;
3656
0
                    *pabyDstWord = valSet;
3657
0
                    pabyDstWord += nDstPixelStride;
3658
0
                }
3659
0
            }
3660
0
            break;
3661
0
        }
3662
3663
0
#define CASE_DUPLICATE_SIMPLE(enum_type, c_type)                               \
3664
0
    case enum_type:                                                            \
3665
0
    {                                                                          \
3666
0
        GDALReplicateWordT<c_type>(pDstData, nDstPixelStride, nWordCount);     \
3667
0
        break;                                                                 \
3668
0
    }
3669
3670
0
            CASE_DUPLICATE_SIMPLE(GDT_UInt16, GUInt16)
3671
0
            CASE_DUPLICATE_SIMPLE(GDT_Int16, GInt16)
3672
0
            CASE_DUPLICATE_SIMPLE(GDT_UInt32, GUInt32)
3673
0
            CASE_DUPLICATE_SIMPLE(GDT_Int32, GInt32)
3674
0
            CASE_DUPLICATE_SIMPLE(GDT_UInt64, std::uint64_t)
3675
0
            CASE_DUPLICATE_SIMPLE(GDT_Int64, std::int64_t)
3676
0
            CASE_DUPLICATE_SIMPLE(GDT_Float16, GFloat16)
3677
0
            CASE_DUPLICATE_SIMPLE(GDT_Float32, float)
3678
0
            CASE_DUPLICATE_SIMPLE(GDT_Float64, double)
3679
3680
0
#define CASE_DUPLICATE_COMPLEX(enum_type, c_type)                              \
3681
0
    case enum_type:                                                            \
3682
0
    {                                                                          \
3683
0
        c_type valSet1 = reinterpret_cast<const c_type *>(pDstData)[0];        \
3684
0
        c_type valSet2 = reinterpret_cast<const c_type *>(pDstData)[1];        \
3685
0
        while (nWordCount > 0)                                                 \
3686
0
        {                                                                      \
3687
0
            --nWordCount;                                                      \
3688
0
            reinterpret_cast<c_type *>(pabyDstWord)[0] = valSet1;              \
3689
0
            reinterpret_cast<c_type *>(pabyDstWord)[1] = valSet2;              \
3690
0
            pabyDstWord += nDstPixelStride;                                    \
3691
0
        }                                                                      \
3692
0
        break;                                                                 \
3693
0
    }
3694
3695
0
            CASE_DUPLICATE_COMPLEX(GDT_CInt16, GInt16)
3696
0
            CASE_DUPLICATE_COMPLEX(GDT_CInt32, GInt32)
3697
0
            CASE_DUPLICATE_COMPLEX(GDT_CFloat16, GFloat16)
3698
0
            CASE_DUPLICATE_COMPLEX(GDT_CFloat32, float)
3699
0
            CASE_DUPLICATE_COMPLEX(GDT_CFloat64, double)
3700
3701
0
        case GDT_Unknown:
3702
0
        case GDT_TypeCount:
3703
0
            CPLAssert(false);
3704
0
    }
3705
0
}
3706
3707
/************************************************************************/
3708
/*                          GDALUnrolledCopy()                          */
3709
/************************************************************************/
3710
3711
template <class T, int srcStride, int dstStride>
3712
#if defined(__GNUC__) && defined(__AVX2__)
3713
__attribute__((optimize("tree-vectorize")))
3714
#endif
3715
static inline void GDALUnrolledCopyGeneric(T *CPL_RESTRICT pDest,
3716
                                           const T *CPL_RESTRICT pSrc,
3717
                                           GPtrDiff_t nIters)
3718
0
{
3719
0
#if !(defined(__GNUC__) && defined(__AVX2__))
3720
0
    if (nIters >= 16)
3721
0
    {
3722
0
        for (GPtrDiff_t i = nIters / 16; i != 0; i--)
3723
0
        {
3724
0
            pDest[0 * dstStride] = pSrc[0 * srcStride];
3725
0
            pDest[1 * dstStride] = pSrc[1 * srcStride];
3726
0
            pDest[2 * dstStride] = pSrc[2 * srcStride];
3727
0
            pDest[3 * dstStride] = pSrc[3 * srcStride];
3728
0
            pDest[4 * dstStride] = pSrc[4 * srcStride];
3729
0
            pDest[5 * dstStride] = pSrc[5 * srcStride];
3730
0
            pDest[6 * dstStride] = pSrc[6 * srcStride];
3731
0
            pDest[7 * dstStride] = pSrc[7 * srcStride];
3732
0
            pDest[8 * dstStride] = pSrc[8 * srcStride];
3733
0
            pDest[9 * dstStride] = pSrc[9 * srcStride];
3734
0
            pDest[10 * dstStride] = pSrc[10 * srcStride];
3735
0
            pDest[11 * dstStride] = pSrc[11 * srcStride];
3736
0
            pDest[12 * dstStride] = pSrc[12 * srcStride];
3737
0
            pDest[13 * dstStride] = pSrc[13 * srcStride];
3738
0
            pDest[14 * dstStride] = pSrc[14 * srcStride];
3739
0
            pDest[15 * dstStride] = pSrc[15 * srcStride];
3740
0
            pDest += 16 * dstStride;
3741
0
            pSrc += 16 * srcStride;
3742
0
        }
3743
0
        nIters = nIters % 16;
3744
0
    }
3745
#else
3746
#pragma GCC unroll 4
3747
#endif
3748
0
    for (GPtrDiff_t i = 0; i < nIters; i++)
3749
0
    {
3750
0
        pDest[i * dstStride] = *pSrc;
3751
0
        pSrc += srcStride;
3752
0
    }
3753
0
}
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<unsigned char, 1, 2>(unsigned char*, unsigned char const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<unsigned char, 1, 3>(unsigned char*, unsigned char const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<unsigned char, 1, 4>(unsigned char*, unsigned char const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<short, 2, 1>(short*, short const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<short, 3, 1>(short*, short const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<short, 4, 1>(short*, short const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<short, 1, 2>(short*, short const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<short, 1, 3>(short*, short const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<short, 1, 4>(short*, short const*, long long)
3754
3755
template <class T, int srcStride, int dstStride>
3756
static inline void GDALUnrolledCopy(T *CPL_RESTRICT pDest,
3757
                                    const T *CPL_RESTRICT pSrc,
3758
                                    GPtrDiff_t nIters)
3759
0
{
3760
0
    GDALUnrolledCopyGeneric<T, srcStride, dstStride>(pDest, pSrc, nIters);
3761
0
}
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<unsigned char, 1, 2>(unsigned char*, unsigned char const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<unsigned char, 1, 3>(unsigned char*, unsigned char const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<unsigned char, 1, 4>(unsigned char*, unsigned char const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<short, 2, 1>(short*, short const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<short, 3, 1>(short*, short const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<short, 4, 1>(short*, short const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<short, 1, 2>(short*, short const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<short, 1, 3>(short*, short const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<short, 1, 4>(short*, short const*, long long)
3762
3763
#if defined(__AVX2__) && defined(HAVE_SSSE3_AT_COMPILE_TIME) &&                \
3764
    (defined(__x86_64) || defined(_M_X64) || defined(USE_NEON_OPTIMIZATIONS))
3765
3766
template <>
3767
void GDALUnrolledCopy<GByte, 3, 1>(GByte *CPL_RESTRICT pDest,
3768
                                   const GByte *CPL_RESTRICT pSrc,
3769
                                   GPtrDiff_t nIters)
3770
{
3771
    if (nIters > 16)
3772
    {
3773
        // The SSSE3 variant is slightly faster than what the gcc autovectorizer
3774
        // generates
3775
        GDALUnrolledCopy_GByte_3_1_SSSE3(pDest, pSrc, nIters);
3776
    }
3777
    else
3778
    {
3779
        for (GPtrDiff_t i = 0; i < nIters; i++)
3780
        {
3781
            pDest[i] = *pSrc;
3782
            pSrc += 3;
3783
        }
3784
    }
3785
}
3786
3787
#elif defined(HAVE_SSE2) && !(defined(__GNUC__) && defined(__AVX2__))
3788
3789
template <>
3790
void GDALUnrolledCopy<GByte, 2, 1>(GByte *CPL_RESTRICT pDest,
3791
                                   const GByte *CPL_RESTRICT pSrc,
3792
                                   GPtrDiff_t nIters)
3793
0
{
3794
0
    decltype(nIters) i = 0;
3795
0
    if (nIters > 16)
3796
0
    {
3797
0
        const __m128i xmm_mask = _mm_set1_epi16(0xff);
3798
        // If we were sure that there would always be 1 trailing byte, we could
3799
        // check against nIters - 15
3800
0
        for (; i < nIters - 16; i += 16)
3801
0
        {
3802
0
            __m128i xmm0 =
3803
0
                _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 0));
3804
0
            __m128i xmm1 =
3805
0
                _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 16));
3806
            // Set higher 8bit of each int16 packed word to 0
3807
0
            xmm0 = _mm_and_si128(xmm0, xmm_mask);
3808
0
            xmm1 = _mm_and_si128(xmm1, xmm_mask);
3809
            // Pack int16 to uint8 and merge back both vector
3810
0
            xmm0 = _mm_packus_epi16(xmm0, xmm1);
3811
3812
            // Store result
3813
0
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pDest + i), xmm0);
3814
3815
0
            pSrc += 2 * 16;
3816
0
        }
3817
0
    }
3818
0
    for (; i < nIters; i++)
3819
0
    {
3820
0
        pDest[i] = *pSrc;
3821
0
        pSrc += 2;
3822
0
    }
3823
0
}
3824
3825
static void GDALUnrolledCopy_GByte_3_1_SSE2(GByte *CPL_RESTRICT pDest,
3826
                                            const GByte *CPL_RESTRICT pSrc,
3827
                                            GPtrDiff_t nIters)
3828
0
{
3829
0
    decltype(nIters) i = 0;
3830
0
    const __m128i xmm_mask_ori = _mm_set_epi32(0, 0, 0, 255);
3831
    // If we were sure that there would always be 2 trailing bytes, we could
3832
    // check against nIters - 15
3833
0
    for (; i < nIters - 16; i += 16)
3834
0
    {
3835
0
        __m128i xmm0 =
3836
0
            _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 0));
3837
0
        __m128i xmm1 =
3838
0
            _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 16));
3839
0
        __m128i xmm2 =
3840
0
            _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 32));
3841
3842
0
        auto xmm_mask0 = xmm_mask_ori;
3843
0
        auto xmm_mask1 = _mm_slli_si128(xmm_mask_ori, 6);
3844
0
        auto xmm_mask2 = _mm_slli_si128(xmm_mask_ori, 11);
3845
3846
0
        auto xmm = _mm_and_si128(xmm0, xmm_mask0);
3847
0
        auto xmm_res1 = _mm_and_si128(_mm_slli_si128(xmm1, 4), xmm_mask1);
3848
3849
0
        xmm_mask0 = _mm_slli_si128(xmm_mask0, 1);
3850
0
        xmm_mask1 = _mm_slli_si128(xmm_mask1, 1);
3851
0
        xmm0 = _mm_srli_si128(xmm0, 2);
3852
0
        xmm = _mm_or_si128(xmm, _mm_and_si128(xmm0, xmm_mask0));
3853
0
        xmm_res1 = _mm_or_si128(
3854
0
            xmm_res1, _mm_and_si128(_mm_slli_si128(xmm1, 2), xmm_mask1));
3855
3856
0
        xmm_mask0 = _mm_slli_si128(xmm_mask0, 1);
3857
0
        xmm_mask1 = _mm_slli_si128(xmm_mask1, 1);
3858
0
        xmm0 = _mm_srli_si128(xmm0, 2);
3859
0
        xmm = _mm_or_si128(xmm, _mm_and_si128(xmm0, xmm_mask0));
3860
0
        xmm_res1 = _mm_or_si128(xmm_res1, _mm_and_si128(xmm1, xmm_mask1));
3861
3862
0
        xmm_mask0 = _mm_slli_si128(xmm_mask0, 1);
3863
0
        xmm_mask1 = _mm_slli_si128(xmm_mask1, 1);
3864
0
        xmm0 = _mm_srli_si128(xmm0, 2);
3865
0
        xmm = _mm_or_si128(xmm, _mm_and_si128(xmm0, xmm_mask0));
3866
0
        xmm_res1 = _mm_or_si128(
3867
0
            xmm_res1, _mm_and_si128(_mm_srli_si128(xmm1, 2), xmm_mask1));
3868
3869
0
        xmm_mask0 = _mm_slli_si128(xmm_mask0, 1);
3870
0
        xmm_mask1 = _mm_slli_si128(xmm_mask1, 1);
3871
0
        xmm0 = _mm_srli_si128(xmm0, 2);
3872
0
        xmm = _mm_or_si128(xmm, _mm_and_si128(xmm0, xmm_mask0));
3873
0
        xmm_res1 = _mm_or_si128(
3874
0
            xmm_res1, _mm_and_si128(_mm_srli_si128(xmm1, 4), xmm_mask1));
3875
0
        xmm = _mm_or_si128(xmm, xmm_res1);
3876
3877
0
        xmm_mask0 = _mm_slli_si128(xmm_mask0, 1);
3878
0
        xmm0 = _mm_srli_si128(xmm0, 2);
3879
0
        xmm = _mm_or_si128(xmm, _mm_and_si128(xmm0, xmm_mask0));
3880
3881
0
        xmm = _mm_or_si128(xmm,
3882
0
                           _mm_and_si128(_mm_slli_si128(xmm2, 10), xmm_mask2));
3883
3884
0
        xmm_mask2 = _mm_slli_si128(xmm_mask2, 1);
3885
0
        xmm = _mm_or_si128(xmm,
3886
0
                           _mm_and_si128(_mm_slli_si128(xmm2, 8), xmm_mask2));
3887
3888
0
        xmm_mask2 = _mm_slli_si128(xmm_mask2, 1);
3889
0
        xmm = _mm_or_si128(xmm,
3890
0
                           _mm_and_si128(_mm_slli_si128(xmm2, 6), xmm_mask2));
3891
3892
0
        xmm_mask2 = _mm_slli_si128(xmm_mask2, 1);
3893
0
        xmm = _mm_or_si128(xmm,
3894
0
                           _mm_and_si128(_mm_slli_si128(xmm2, 4), xmm_mask2));
3895
3896
0
        xmm_mask2 = _mm_slli_si128(xmm_mask2, 1);
3897
0
        xmm = _mm_or_si128(xmm,
3898
0
                           _mm_and_si128(_mm_slli_si128(xmm2, 2), xmm_mask2));
3899
3900
0
        _mm_storeu_si128(reinterpret_cast<__m128i *>(pDest + i), xmm);
3901
3902
0
        pSrc += 3 * 16;
3903
0
    }
3904
0
    for (; i < nIters; i++)
3905
0
    {
3906
0
        pDest[i] = *pSrc;
3907
0
        pSrc += 3;
3908
0
    }
3909
0
}
3910
3911
#ifdef HAVE_SSSE3_AT_COMPILE_TIME
3912
3913
template <>
3914
void GDALUnrolledCopy<GByte, 3, 1>(GByte *CPL_RESTRICT pDest,
3915
                                   const GByte *CPL_RESTRICT pSrc,
3916
                                   GPtrDiff_t nIters)
3917
0
{
3918
0
    if (nIters > 16)
3919
0
    {
3920
0
        if (CPLHaveRuntimeSSSE3())
3921
0
        {
3922
0
            GDALUnrolledCopy_GByte_3_1_SSSE3(pDest, pSrc, nIters);
3923
0
        }
3924
0
        else
3925
0
        {
3926
0
            GDALUnrolledCopy_GByte_3_1_SSE2(pDest, pSrc, nIters);
3927
0
        }
3928
0
    }
3929
0
    else
3930
0
    {
3931
0
        for (GPtrDiff_t i = 0; i < nIters; i++)
3932
0
        {
3933
0
            pDest[i] = *pSrc;
3934
0
            pSrc += 3;
3935
0
        }
3936
0
    }
3937
0
}
3938
3939
#else
3940
3941
template <>
3942
void GDALUnrolledCopy<GByte, 3, 1>(GByte *CPL_RESTRICT pDest,
3943
                                   const GByte *CPL_RESTRICT pSrc,
3944
                                   GPtrDiff_t nIters)
3945
{
3946
    GDALUnrolledCopy_GByte_3_1_SSE2(pDest, pSrc, nIters);
3947
}
3948
#endif
3949
3950
template <>
3951
void GDALUnrolledCopy<GByte, 4, 1>(GByte *CPL_RESTRICT pDest,
3952
                                   const GByte *CPL_RESTRICT pSrc,
3953
                                   GPtrDiff_t nIters)
3954
0
{
3955
0
    decltype(nIters) i = 0;
3956
0
    if (nIters > 16)
3957
0
    {
3958
0
        const __m128i xmm_mask = _mm_set1_epi32(0xff);
3959
        // If we were sure that there would always be 3 trailing bytes, we could
3960
        // check against nIters - 15
3961
0
        for (; i < nIters - 16; i += 16)
3962
0
        {
3963
0
            __m128i xmm0 =
3964
0
                _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 0));
3965
0
            __m128i xmm1 =
3966
0
                _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 16));
3967
0
            __m128i xmm2 =
3968
0
                _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 32));
3969
0
            __m128i xmm3 =
3970
0
                _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 48));
3971
            // Set higher 24bit of each int32 packed word to 0
3972
0
            xmm0 = _mm_and_si128(xmm0, xmm_mask);
3973
0
            xmm1 = _mm_and_si128(xmm1, xmm_mask);
3974
0
            xmm2 = _mm_and_si128(xmm2, xmm_mask);
3975
0
            xmm3 = _mm_and_si128(xmm3, xmm_mask);
3976
            // Pack int32 to int16
3977
0
            xmm0 = _mm_packs_epi32(xmm0, xmm1);
3978
0
            xmm2 = _mm_packs_epi32(xmm2, xmm3);
3979
            // Pack int16 to uint8
3980
0
            xmm0 = _mm_packus_epi16(xmm0, xmm2);
3981
3982
            // Store result
3983
0
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pDest + i), xmm0);
3984
3985
0
            pSrc += 4 * 16;
3986
0
        }
3987
0
    }
3988
0
    for (; i < nIters; i++)
3989
0
    {
3990
0
        pDest[i] = *pSrc;
3991
0
        pSrc += 4;
3992
0
    }
3993
0
}
3994
#endif  // HAVE_SSE2
3995
3996
/************************************************************************/
3997
/*                            GDALFastCopy()                            */
3998
/************************************************************************/
3999
4000
template <class T>
4001
static inline void GDALFastCopy(T *CPL_RESTRICT pDest, int nDestStride,
4002
                                const T *CPL_RESTRICT pSrc, int nSrcStride,
4003
                                GPtrDiff_t nIters)
4004
0
{
4005
0
    constexpr int sizeofT = static_cast<int>(sizeof(T));
4006
0
    if (nIters == 1)
4007
0
    {
4008
0
        *pDest = *pSrc;
4009
0
    }
4010
0
    else if (nDestStride == sizeofT)
4011
0
    {
4012
0
        if (nSrcStride == sizeofT)
4013
0
        {
4014
0
            memcpy(pDest, pSrc, nIters * sizeof(T));
4015
0
        }
4016
0
        else if (nSrcStride == 2 * sizeofT)
4017
0
        {
4018
0
            GDALUnrolledCopy<T, 2, 1>(pDest, pSrc, nIters);
4019
0
        }
4020
0
        else if (nSrcStride == 3 * sizeofT)
4021
0
        {
4022
0
            GDALUnrolledCopy<T, 3, 1>(pDest, pSrc, nIters);
4023
0
        }
4024
0
        else if (nSrcStride == 4 * sizeofT)
4025
0
        {
4026
0
            GDALUnrolledCopy<T, 4, 1>(pDest, pSrc, nIters);
4027
0
        }
4028
0
        else
4029
0
        {
4030
0
            while (nIters-- > 0)
4031
0
            {
4032
0
                *pDest = *pSrc;
4033
0
                pSrc += nSrcStride / sizeofT;
4034
0
                pDest++;
4035
0
            }
4036
0
        }
4037
0
    }
4038
0
    else if (nSrcStride == sizeofT)
4039
0
    {
4040
0
        if (nDestStride == 2 * sizeofT)
4041
0
        {
4042
0
            GDALUnrolledCopy<T, 1, 2>(pDest, pSrc, nIters);
4043
0
        }
4044
0
        else if (nDestStride == 3 * sizeofT)
4045
0
        {
4046
0
            GDALUnrolledCopy<T, 1, 3>(pDest, pSrc, nIters);
4047
0
        }
4048
0
        else if (nDestStride == 4 * sizeofT)
4049
0
        {
4050
0
            GDALUnrolledCopy<T, 1, 4>(pDest, pSrc, nIters);
4051
0
        }
4052
0
        else
4053
0
        {
4054
0
            while (nIters-- > 0)
4055
0
            {
4056
0
                *pDest = *pSrc;
4057
0
                pSrc++;
4058
0
                pDest += nDestStride / sizeofT;
4059
0
            }
4060
0
        }
4061
0
    }
4062
0
    else
4063
0
    {
4064
0
        while (nIters-- > 0)
4065
0
        {
4066
0
            *pDest = *pSrc;
4067
0
            pSrc += nSrcStride / sizeofT;
4068
0
            pDest += nDestStride / sizeofT;
4069
0
        }
4070
0
    }
4071
0
}
Unexecuted instantiation: rasterio.cpp:void GDALFastCopy<unsigned char>(unsigned char*, int, unsigned char const*, int, long long)
Unexecuted instantiation: rasterio.cpp:void GDALFastCopy<short>(short*, int, short const*, int, long long)
4072
4073
/************************************************************************/
4074
/*                          GDALFastCopyByte()                          */
4075
/************************************************************************/
4076
4077
static void GDALFastCopyByte(const GByte *CPL_RESTRICT pSrcData,
4078
                             int nSrcPixelStride, GByte *CPL_RESTRICT pDstData,
4079
                             int nDstPixelStride, GPtrDiff_t nWordCount)
4080
0
{
4081
0
    GDALFastCopy(pDstData, nDstPixelStride, pSrcData, nSrcPixelStride,
4082
0
                 nWordCount);
4083
0
}
4084
4085
/************************************************************************/
4086
/*                           GDALCopyWords()                            */
4087
/************************************************************************/
4088
4089
/**
4090
 * Copy pixel words from buffer to buffer.
4091
 *
4092
 * @see GDALCopyWords64()
4093
 */
4094
void CPL_STDCALL GDALCopyWords(const void *CPL_RESTRICT pSrcData,
4095
                               GDALDataType eSrcType, int nSrcPixelStride,
4096
                               void *CPL_RESTRICT pDstData,
4097
                               GDALDataType eDstType, int nDstPixelStride,
4098
                               int nWordCount)
4099
0
{
4100
0
    GDALCopyWords64(pSrcData, eSrcType, nSrcPixelStride, pDstData, eDstType,
4101
0
                    nDstPixelStride, nWordCount);
4102
0
}
4103
4104
/************************************************************************/
4105
/*                          GDALCopyWords64()                           */
4106
/************************************************************************/
4107
4108
/**
4109
 * Copy pixel words from buffer to buffer.
4110
 *
4111
 * This function is used to copy pixel word values from one memory buffer
4112
 * to another, with support for conversion between data types, and differing
4113
 * step factors. The data type conversion is done using the following
4114
 * rules:
4115
 * <ul>
4116
 * <li>Values assigned to a lower range integer type are clipped. For
4117
 * instance assigning GDT_Int16 values to a GDT_UInt8 buffer will cause values
4118
 * less the 0 to be set to 0, and values larger than 255 to be set to 255.
4119
 * </li>
4120
 * <li>
4121
 * Assignment from floating point to integer rounds to closest integer.
4122
 * +Infinity is mapped to the largest integer. -Infinity is mapped to the
4123
 * smallest integer. NaN is mapped to 0.
4124
 * </li>
4125
 * <li>
4126
 * Assignment from non-complex to complex will result in the imaginary part
4127
 * being set to zero on output.
4128
 * </li>
4129
 * <li> Assignment from complex to
4130
 * non-complex will result in the complex portion being lost and the real
4131
 * component being preserved (<i>not magnitude!</i>).
4132
 * </li>
4133
 * </ul>
4134
 *
4135
 * No assumptions are made about the source or destination words occurring
4136
 * on word boundaries.  It is assumed that all values are in native machine
4137
 * byte order.
4138
 *
4139
 * @param pSrcData Pointer to source data to be converted.
4140
 * @param eSrcType the source data type (see GDALDataType enum)
4141
 * @param nSrcPixelStride Source pixel stride (i.e. distance between 2 words),
4142
 * in bytes
4143
 * @param pDstData Pointer to buffer where destination data should go
4144
 * @param eDstType the destination data type (see GDALDataType enum)
4145
 * @param nDstPixelStride Destination pixel stride (i.e. distance between 2
4146
 * words), in bytes
4147
 * @param nWordCount number of words to be copied
4148
 *
4149
 * @note
4150
 * When adding a new data type to GDAL, you must do the following to
4151
 * support it properly within the GDALCopyWords function:
4152
 * 1. Add the data type to the switch on eSrcType in GDALCopyWords.
4153
 *    This should invoke the appropriate GDALCopyWordsFromT wrapper.
4154
 * 2. Add the data type to the switch on eDstType in GDALCopyWordsFromT.
4155
 *    This should call the appropriate GDALCopyWordsT template.
4156
 * 3. If appropriate, overload the appropriate CopyWord template in the
4157
 *    above namespace. This will ensure that any conversion issues are
4158
 *    handled (cases like the float -> int32 case, where the min/max)
4159
 *    values are subject to roundoff error.
4160
 */
4161
4162
void CPL_STDCALL GDALCopyWords64(const void *CPL_RESTRICT pSrcData,
4163
                                 GDALDataType eSrcType, int nSrcPixelStride,
4164
                                 void *CPL_RESTRICT pDstData,
4165
                                 GDALDataType eDstType, int nDstPixelStride,
4166
                                 GPtrDiff_t nWordCount)
4167
4168
0
{
4169
    // On platforms where alignment matters, be careful
4170
0
    const int nSrcDataTypeSize = GDALGetDataTypeSizeBytes(eSrcType);
4171
0
    const int nDstDataTypeSize = GDALGetDataTypeSizeBytes(eDstType);
4172
0
    if (CPL_UNLIKELY(nSrcDataTypeSize == 0 || nDstDataTypeSize == 0))
4173
0
    {
4174
0
        CPLError(CE_Failure, CPLE_NotSupported,
4175
0
                 "GDALCopyWords64(): unsupported GDT_Unknown/GDT_TypeCount "
4176
0
                 "argument");
4177
0
        return;
4178
0
    }
4179
0
    if (!(eSrcType == eDstType && nSrcPixelStride == nDstPixelStride) &&
4180
0
        ((reinterpret_cast<uintptr_t>(pSrcData) % nSrcDataTypeSize) != 0 ||
4181
0
         (reinterpret_cast<uintptr_t>(pDstData) % nDstDataTypeSize) != 0 ||
4182
0
         (nSrcPixelStride % nSrcDataTypeSize) != 0 ||
4183
0
         (nDstPixelStride % nDstDataTypeSize) != 0))
4184
0
    {
4185
0
        if (eSrcType == eDstType)
4186
0
        {
4187
0
            for (decltype(nWordCount) i = 0; i < nWordCount; i++)
4188
0
            {
4189
0
                memcpy(static_cast<GByte *>(pDstData) + nDstPixelStride * i,
4190
0
                       static_cast<const GByte *>(pSrcData) +
4191
0
                           nSrcPixelStride * i,
4192
0
                       nDstDataTypeSize);
4193
0
            }
4194
0
        }
4195
0
        else
4196
0
        {
4197
0
            const auto getAlignedPtr = [](GByte *ptr, int align)
4198
0
            {
4199
0
                return ptr +
4200
0
                       ((align - (reinterpret_cast<uintptr_t>(ptr) % align)) %
4201
0
                        align);
4202
0
            };
4203
4204
            // The largest we need is for CFloat64 (16 bytes), so 32 bytes to
4205
            // be sure to get correctly aligned pointer.
4206
0
            constexpr size_t SIZEOF_CFLOAT64 = 2 * sizeof(double);
4207
0
            GByte abySrcBuffer[2 * SIZEOF_CFLOAT64];
4208
0
            GByte abyDstBuffer[2 * SIZEOF_CFLOAT64];
4209
0
            GByte *pabySrcBuffer =
4210
0
                getAlignedPtr(abySrcBuffer, nSrcDataTypeSize);
4211
0
            GByte *pabyDstBuffer =
4212
0
                getAlignedPtr(abyDstBuffer, nDstDataTypeSize);
4213
0
            for (decltype(nWordCount) i = 0; i < nWordCount; i++)
4214
0
            {
4215
0
                memcpy(pabySrcBuffer,
4216
0
                       static_cast<const GByte *>(pSrcData) +
4217
0
                           nSrcPixelStride * i,
4218
0
                       nSrcDataTypeSize);
4219
0
                GDALCopyWords64(pabySrcBuffer, eSrcType, 0, pabyDstBuffer,
4220
0
                                eDstType, 0, 1);
4221
0
                memcpy(static_cast<GByte *>(pDstData) + nDstPixelStride * i,
4222
0
                       pabyDstBuffer, nDstDataTypeSize);
4223
0
            }
4224
0
        }
4225
0
        return;
4226
0
    }
4227
4228
    // Deal with the case where we're replicating a single word into the
4229
    // provided buffer
4230
0
    if (nSrcPixelStride == 0 && nWordCount > 1)
4231
0
    {
4232
0
        GDALReplicateWord(pSrcData, eSrcType, pDstData, eDstType,
4233
0
                          nDstPixelStride, nWordCount);
4234
0
        return;
4235
0
    }
4236
4237
0
    if (eSrcType == eDstType)
4238
0
    {
4239
0
        if (eSrcType == GDT_UInt8 || eSrcType == GDT_Int8)
4240
0
        {
4241
0
            GDALFastCopy(static_cast<GByte *>(pDstData), nDstPixelStride,
4242
0
                         static_cast<const GByte *>(pSrcData), nSrcPixelStride,
4243
0
                         nWordCount);
4244
0
            return;
4245
0
        }
4246
4247
0
        if (nSrcDataTypeSize == 2 && (nSrcPixelStride % 2) == 0 &&
4248
0
            (nDstPixelStride % 2) == 0)
4249
0
        {
4250
0
            GDALFastCopy(static_cast<short *>(pDstData), nDstPixelStride,
4251
0
                         static_cast<const short *>(pSrcData), nSrcPixelStride,
4252
0
                         nWordCount);
4253
0
            return;
4254
0
        }
4255
4256
0
        if (nWordCount == 1)
4257
0
        {
4258
#if defined(CSA_BUILD) || defined(__COVERITY__)
4259
            // Avoid false positives...
4260
            memcpy(pDstData, pSrcData, nSrcDataTypeSize);
4261
#else
4262
0
            if (nSrcDataTypeSize == 2)
4263
0
                memcpy(pDstData, pSrcData, 2);
4264
0
            else if (nSrcDataTypeSize == 4)
4265
0
                memcpy(pDstData, pSrcData, 4);
4266
0
            else if (nSrcDataTypeSize == 8)
4267
0
                memcpy(pDstData, pSrcData, 8);
4268
0
            else /* if( eSrcType == GDT_CFloat64 ) */
4269
0
                memcpy(pDstData, pSrcData, 16);
4270
0
#endif
4271
0
            return;
4272
0
        }
4273
4274
        // Let memcpy() handle the case where we're copying a packed buffer
4275
        // of pixels.
4276
0
        if (nSrcPixelStride == nDstPixelStride)
4277
0
        {
4278
0
            if (nSrcPixelStride == nSrcDataTypeSize)
4279
0
            {
4280
0
                memcpy(pDstData, pSrcData, nWordCount * nSrcDataTypeSize);
4281
0
                return;
4282
0
            }
4283
0
        }
4284
0
    }
4285
4286
    // Handle the more general case -- deals with conversion of data types
4287
    // directly.
4288
0
    switch (eSrcType)
4289
0
    {
4290
0
        case GDT_UInt8:
4291
0
            GDALCopyWordsFromT<unsigned char>(
4292
0
                static_cast<const unsigned char *>(pSrcData), nSrcPixelStride,
4293
0
                false, pDstData, eDstType, nDstPixelStride, nWordCount);
4294
0
            break;
4295
0
        case GDT_Int8:
4296
0
            GDALCopyWordsFromT<signed char>(
4297
0
                static_cast<const signed char *>(pSrcData), nSrcPixelStride,
4298
0
                false, pDstData, eDstType, nDstPixelStride, nWordCount);
4299
0
            break;
4300
0
        case GDT_UInt16:
4301
0
            GDALCopyWordsFromT<unsigned short>(
4302
0
                static_cast<const unsigned short *>(pSrcData), nSrcPixelStride,
4303
0
                false, pDstData, eDstType, nDstPixelStride, nWordCount);
4304
0
            break;
4305
0
        case GDT_Int16:
4306
0
            GDALCopyWordsFromT<short>(static_cast<const short *>(pSrcData),
4307
0
                                      nSrcPixelStride, false, pDstData,
4308
0
                                      eDstType, nDstPixelStride, nWordCount);
4309
0
            break;
4310
0
        case GDT_UInt32:
4311
0
            GDALCopyWordsFromT<unsigned int>(
4312
0
                static_cast<const unsigned int *>(pSrcData), nSrcPixelStride,
4313
0
                false, pDstData, eDstType, nDstPixelStride, nWordCount);
4314
0
            break;
4315
0
        case GDT_Int32:
4316
0
            GDALCopyWordsFromT<int>(static_cast<const int *>(pSrcData),
4317
0
                                    nSrcPixelStride, false, pDstData, eDstType,
4318
0
                                    nDstPixelStride, nWordCount);
4319
0
            break;
4320
0
        case GDT_UInt64:
4321
0
            GDALCopyWordsFromT<std::uint64_t>(
4322
0
                static_cast<const std::uint64_t *>(pSrcData), nSrcPixelStride,
4323
0
                false, pDstData, eDstType, nDstPixelStride, nWordCount);
4324
0
            break;
4325
0
        case GDT_Int64:
4326
0
            GDALCopyWordsFromT<std::int64_t>(
4327
0
                static_cast<const std::int64_t *>(pSrcData), nSrcPixelStride,
4328
0
                false, pDstData, eDstType, nDstPixelStride, nWordCount);
4329
0
            break;
4330
0
        case GDT_Float16:
4331
0
            GDALCopyWordsFromT<GFloat16>(
4332
0
                static_cast<const GFloat16 *>(pSrcData), nSrcPixelStride, false,
4333
0
                pDstData, eDstType, nDstPixelStride, nWordCount);
4334
0
            break;
4335
0
        case GDT_Float32:
4336
0
            GDALCopyWordsFromT<float>(static_cast<const float *>(pSrcData),
4337
0
                                      nSrcPixelStride, false, pDstData,
4338
0
                                      eDstType, nDstPixelStride, nWordCount);
4339
0
            break;
4340
0
        case GDT_Float64:
4341
0
            GDALCopyWordsFromT<double>(static_cast<const double *>(pSrcData),
4342
0
                                       nSrcPixelStride, false, pDstData,
4343
0
                                       eDstType, nDstPixelStride, nWordCount);
4344
0
            break;
4345
0
        case GDT_CInt16:
4346
0
            GDALCopyWordsFromT<short>(static_cast<const short *>(pSrcData),
4347
0
                                      nSrcPixelStride, true, pDstData, eDstType,
4348
0
                                      nDstPixelStride, nWordCount);
4349
0
            break;
4350
0
        case GDT_CInt32:
4351
0
            GDALCopyWordsFromT<int>(static_cast<const int *>(pSrcData),
4352
0
                                    nSrcPixelStride, true, pDstData, eDstType,
4353
0
                                    nDstPixelStride, nWordCount);
4354
0
            break;
4355
0
        case GDT_CFloat16:
4356
0
            GDALCopyWordsFromT<GFloat16>(
4357
0
                static_cast<const GFloat16 *>(pSrcData), nSrcPixelStride, true,
4358
0
                pDstData, eDstType, nDstPixelStride, nWordCount);
4359
0
            break;
4360
0
        case GDT_CFloat32:
4361
0
            GDALCopyWordsFromT<float>(static_cast<const float *>(pSrcData),
4362
0
                                      nSrcPixelStride, true, pDstData, eDstType,
4363
0
                                      nDstPixelStride, nWordCount);
4364
0
            break;
4365
0
        case GDT_CFloat64:
4366
0
            GDALCopyWordsFromT<double>(static_cast<const double *>(pSrcData),
4367
0
                                       nSrcPixelStride, true, pDstData,
4368
0
                                       eDstType, nDstPixelStride, nWordCount);
4369
0
            break;
4370
0
        case GDT_Unknown:
4371
0
        case GDT_TypeCount:
4372
0
            CPLAssert(false);
4373
0
    }
4374
0
}
4375
4376
/************************************************************************/
4377
/*                            GDALCopyBits()                            */
4378
/************************************************************************/
4379
4380
/**
4381
 * Bitwise word copying.
4382
 *
4383
 * A function for moving sets of partial bytes around.  Loosely
4384
 * speaking this is a bitwise analog to GDALCopyWords().
4385
 *
4386
 * It copies nStepCount "words" where each word is nBitCount bits long.
4387
 * The nSrcStep and nDstStep are the number of bits from the start of one
4388
 * word to the next (same as nBitCount if they are packed).  The nSrcOffset
4389
 * and nDstOffset are the offset into the source and destination buffers
4390
 * to start at, also measured in bits.
4391
 *
4392
 * All bit offsets are assumed to start from the high order bit in a byte
4393
 * (i.e. most significant bit first).  Currently this function is not very
4394
 * optimized, but it may be improved for some common cases in the future
4395
 * as needed.
4396
 *
4397
 * @param pabySrcData the source data buffer.
4398
 * @param nSrcOffset the offset (in bits) in pabySrcData to the start of the
4399
 * first word to copy.
4400
 * @param nSrcStep the offset in bits from the start one source word to the
4401
 * start of the next.
4402
 * @param pabyDstData the destination data buffer.
4403
 * @param nDstOffset the offset (in bits) in pabyDstData to the start of the
4404
 * first word to copy over.
4405
 * @param nDstStep the offset in bits from the start one word to the
4406
 * start of the next.
4407
 * @param nBitCount the number of bits in a word to be copied.
4408
 * @param nStepCount the number of words to copy.
4409
 */
4410
4411
void GDALCopyBits(const GByte *pabySrcData, int nSrcOffset, int nSrcStep,
4412
                  GByte *pabyDstData, int nDstOffset, int nDstStep,
4413
                  int nBitCount, int nStepCount)
4414
4415
0
{
4416
0
    VALIDATE_POINTER0(pabySrcData, "GDALCopyBits");
4417
4418
0
    for (int iStep = 0; iStep < nStepCount; iStep++)
4419
0
    {
4420
0
        for (int iBit = 0; iBit < nBitCount; iBit++)
4421
0
        {
4422
0
            if (pabySrcData[nSrcOffset >> 3] & (0x80 >> (nSrcOffset & 7)))
4423
0
                pabyDstData[nDstOffset >> 3] |= (0x80 >> (nDstOffset & 7));
4424
0
            else
4425
0
                pabyDstData[nDstOffset >> 3] &= ~(0x80 >> (nDstOffset & 7));
4426
4427
0
            nSrcOffset++;
4428
0
            nDstOffset++;
4429
0
        }
4430
4431
0
        nSrcOffset += (nSrcStep - nBitCount);
4432
0
        nDstOffset += (nDstStep - nBitCount);
4433
0
    }
4434
0
}
4435
4436
/************************************************************************/
4437
/*                    GDALGetBestOverviewLevel()                        */
4438
/*                                                                      */
4439
/* Returns the best overview level to satisfy the query or -1 if none   */
4440
/* Also updates nXOff, nYOff, nXSize, nYSize and psExtraArg when        */
4441
/* returning a valid overview level                                     */
4442
/************************************************************************/
4443
4444
int GDALBandGetBestOverviewLevel(GDALRasterBand *poBand, int &nXOff, int &nYOff,
4445
                                 int &nXSize, int &nYSize, int nBufXSize,
4446
                                 int nBufYSize)
4447
0
{
4448
0
    return GDALBandGetBestOverviewLevel2(poBand, nXOff, nYOff, nXSize, nYSize,
4449
0
                                         nBufXSize, nBufYSize, nullptr);
4450
0
}
4451
4452
int GDALBandGetBestOverviewLevel2(GDALRasterBand *poBand, int &nXOff,
4453
                                  int &nYOff, int &nXSize, int &nYSize,
4454
                                  int nBufXSize, int nBufYSize,
4455
                                  GDALRasterIOExtraArg *psExtraArg)
4456
0
{
4457
0
    if (psExtraArg != nullptr && psExtraArg->nVersion > 1 &&
4458
0
        psExtraArg->bUseOnlyThisScale)
4459
0
        return -1;
4460
    /* -------------------------------------------------------------------- */
4461
    /*      Compute the desired downsampling factor.  It is                 */
4462
    /*      based on the least reduced axis, and represents the number      */
4463
    /*      of source pixels to one destination pixel.                      */
4464
    /* -------------------------------------------------------------------- */
4465
0
    const double dfDesiredDownsamplingFactor =
4466
0
        ((nXSize / static_cast<double>(nBufXSize)) <
4467
0
             (nYSize / static_cast<double>(nBufYSize)) ||
4468
0
         nBufYSize == 1)
4469
0
            ? nXSize / static_cast<double>(nBufXSize)
4470
0
            : nYSize / static_cast<double>(nBufYSize);
4471
4472
    /* -------------------------------------------------------------------- */
4473
    /*      Find the overview level that largest downsampling factor (most  */
4474
    /*      downsampled) that is still less than (or only a little more)    */
4475
    /*      downsampled than the request.                                   */
4476
    /* -------------------------------------------------------------------- */
4477
0
    const int nOverviewCount = poBand->GetOverviewCount();
4478
0
    GDALRasterBand *poBestOverview = nullptr;
4479
0
    double dfBestDownsamplingFactor = 0;
4480
0
    int nBestOverviewLevel = -1;
4481
4482
0
    const char *pszOversampligThreshold =
4483
0
        CPLGetConfigOption("GDAL_OVERVIEW_OVERSAMPLING_THRESHOLD", nullptr);
4484
4485
    // Note: keep this logic for overview selection in sync between
4486
    // gdalwarp_lib.cpp and rasterio.cpp
4487
    // Cf https://github.com/OSGeo/gdal/pull/9040#issuecomment-1898524693
4488
0
    const double dfOversamplingThreshold =
4489
0
        pszOversampligThreshold ? CPLAtof(pszOversampligThreshold)
4490
0
        : psExtraArg && psExtraArg->eResampleAlg != GRIORA_NearestNeighbour
4491
0
            ? 1.0
4492
0
            : 1.2;
4493
0
    for (int iOverview = 0; iOverview < nOverviewCount; iOverview++)
4494
0
    {
4495
0
        GDALRasterBand *poOverview = poBand->GetOverview(iOverview);
4496
0
        if (poOverview == nullptr ||
4497
0
            poOverview->GetXSize() > poBand->GetXSize() ||
4498
0
            poOverview->GetYSize() > poBand->GetYSize())
4499
0
        {
4500
0
            continue;
4501
0
        }
4502
4503
        // Compute downsampling factor of this overview
4504
0
        const double dfDownsamplingFactor = std::min(
4505
0
            poBand->GetXSize() / static_cast<double>(poOverview->GetXSize()),
4506
0
            poBand->GetYSize() / static_cast<double>(poOverview->GetYSize()));
4507
4508
        // Is it nearly the requested factor and better (lower) than
4509
        // the current best factor?
4510
        // Use an epsilon because of numerical instability.
4511
0
        constexpr double EPSILON = 1e-1;
4512
0
        if (dfDownsamplingFactor >=
4513
0
                dfDesiredDownsamplingFactor * dfOversamplingThreshold +
4514
0
                    EPSILON ||
4515
0
            dfDownsamplingFactor <= dfBestDownsamplingFactor)
4516
0
        {
4517
0
            continue;
4518
0
        }
4519
4520
        // Ignore AVERAGE_BIT2GRAYSCALE overviews for RasterIO purposes.
4521
0
        const char *pszResampling = poOverview->GetMetadataItem("RESAMPLING");
4522
4523
0
        if (pszResampling != nullptr &&
4524
0
            STARTS_WITH_CI(pszResampling, "AVERAGE_BIT2"))
4525
0
            continue;
4526
4527
        // OK, this is our new best overview.
4528
0
        poBestOverview = poOverview;
4529
0
        nBestOverviewLevel = iOverview;
4530
0
        dfBestDownsamplingFactor = dfDownsamplingFactor;
4531
4532
0
        if (std::abs(dfDesiredDownsamplingFactor - dfDownsamplingFactor) <
4533
0
            EPSILON)
4534
0
        {
4535
0
            break;
4536
0
        }
4537
0
    }
4538
4539
    /* -------------------------------------------------------------------- */
4540
    /*      If we didn't find an overview that helps us, just return        */
4541
    /*      indicating failure and the full resolution image will be used.  */
4542
    /* -------------------------------------------------------------------- */
4543
0
    if (nBestOverviewLevel < 0)
4544
0
        return -1;
4545
4546
    /* -------------------------------------------------------------------- */
4547
    /*      Recompute the source window in terms of the selected            */
4548
    /*      overview.                                                       */
4549
    /* -------------------------------------------------------------------- */
4550
0
    const double dfXFactor =
4551
0
        poBand->GetXSize() / static_cast<double>(poBestOverview->GetXSize());
4552
0
    const double dfYFactor =
4553
0
        poBand->GetYSize() / static_cast<double>(poBestOverview->GetYSize());
4554
0
    CPLDebug("GDAL", "Selecting overview %d x %d", poBestOverview->GetXSize(),
4555
0
             poBestOverview->GetYSize());
4556
4557
0
    const int nOXOff = std::min(poBestOverview->GetXSize() - 1,
4558
0
                                static_cast<int>(nXOff / dfXFactor + 0.5));
4559
0
    const int nOYOff = std::min(poBestOverview->GetYSize() - 1,
4560
0
                                static_cast<int>(nYOff / dfYFactor + 0.5));
4561
0
    int nOXSize = std::max(1, static_cast<int>(nXSize / dfXFactor + 0.5));
4562
0
    int nOYSize = std::max(1, static_cast<int>(nYSize / dfYFactor + 0.5));
4563
0
    if (nOXOff + nOXSize > poBestOverview->GetXSize())
4564
0
        nOXSize = poBestOverview->GetXSize() - nOXOff;
4565
0
    if (nOYOff + nOYSize > poBestOverview->GetYSize())
4566
0
        nOYSize = poBestOverview->GetYSize() - nOYOff;
4567
4568
0
    if (psExtraArg)
4569
0
    {
4570
0
        if (psExtraArg->bFloatingPointWindowValidity)
4571
0
        {
4572
0
            psExtraArg->dfXOff /= dfXFactor;
4573
0
            psExtraArg->dfXSize /= dfXFactor;
4574
0
            psExtraArg->dfYOff /= dfYFactor;
4575
0
            psExtraArg->dfYSize /= dfYFactor;
4576
0
        }
4577
0
        else if (psExtraArg->eResampleAlg != GRIORA_NearestNeighbour)
4578
0
        {
4579
0
            psExtraArg->bFloatingPointWindowValidity = true;
4580
0
            psExtraArg->dfXOff = nXOff / dfXFactor;
4581
0
            psExtraArg->dfXSize = nXSize / dfXFactor;
4582
0
            psExtraArg->dfYOff = nYOff / dfYFactor;
4583
0
            psExtraArg->dfYSize = nYSize / dfYFactor;
4584
0
        }
4585
0
    }
4586
4587
0
    nXOff = nOXOff;
4588
0
    nYOff = nOYOff;
4589
0
    nXSize = nOXSize;
4590
0
    nYSize = nOYSize;
4591
4592
0
    return nBestOverviewLevel;
4593
0
}
4594
4595
/************************************************************************/
4596
/*                          OverviewRasterIO()                          */
4597
/*                                                                      */
4598
/*      Special work function to utilize available overviews to         */
4599
/*      more efficiently satisfy downsampled requests.  It will         */
4600
/*      return CE_Failure if there are no appropriate overviews         */
4601
/*      available but it doesn't emit any error messages.               */
4602
/************************************************************************/
4603
4604
//! @cond Doxygen_Suppress
4605
CPLErr GDALRasterBand::OverviewRasterIO(
4606
    GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize,
4607
    void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
4608
    GSpacing nPixelSpace, GSpacing nLineSpace, GDALRasterIOExtraArg *psExtraArg)
4609
4610
0
{
4611
0
    GDALRasterIOExtraArg sExtraArg;
4612
0
    GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
4613
4614
0
    const int nOverview = GDALBandGetBestOverviewLevel2(
4615
0
        this, nXOff, nYOff, nXSize, nYSize, nBufXSize, nBufYSize, &sExtraArg);
4616
0
    if (nOverview < 0)
4617
0
        return CE_Failure;
4618
4619
    /* -------------------------------------------------------------------- */
4620
    /*      Recast the call in terms of the new raster layer.               */
4621
    /* -------------------------------------------------------------------- */
4622
0
    GDALRasterBand *poOverviewBand = GetOverview(nOverview);
4623
0
    if (poOverviewBand == nullptr)
4624
0
        return CE_Failure;
4625
4626
0
    return poOverviewBand->RasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize,
4627
0
                                    pData, nBufXSize, nBufYSize, eBufType,
4628
0
                                    nPixelSpace, nLineSpace, &sExtraArg);
4629
0
}
4630
4631
/************************************************************************/
4632
/*                        TryOverviewRasterIO()                         */
4633
/************************************************************************/
4634
4635
CPLErr GDALRasterBand::TryOverviewRasterIO(
4636
    GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize,
4637
    void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
4638
    GSpacing nPixelSpace, GSpacing nLineSpace, GDALRasterIOExtraArg *psExtraArg,
4639
    int *pbTried)
4640
0
{
4641
0
    int nXOffMod = nXOff;
4642
0
    int nYOffMod = nYOff;
4643
0
    int nXSizeMod = nXSize;
4644
0
    int nYSizeMod = nYSize;
4645
0
    GDALRasterIOExtraArg sExtraArg;
4646
4647
0
    GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
4648
4649
0
    int iOvrLevel = GDALBandGetBestOverviewLevel2(
4650
0
        this, nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, nBufXSize, nBufYSize,
4651
0
        &sExtraArg);
4652
4653
0
    if (iOvrLevel >= 0)
4654
0
    {
4655
0
        GDALRasterBand *poOverviewBand = GetOverview(iOvrLevel);
4656
0
        if (poOverviewBand)
4657
0
        {
4658
0
            *pbTried = TRUE;
4659
0
            return poOverviewBand->RasterIO(
4660
0
                eRWFlag, nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, pData,
4661
0
                nBufXSize, nBufYSize, eBufType, nPixelSpace, nLineSpace,
4662
0
                &sExtraArg);
4663
0
        }
4664
0
    }
4665
4666
0
    *pbTried = FALSE;
4667
0
    return CE_None;
4668
0
}
4669
4670
/************************************************************************/
4671
/*                        TryOverviewRasterIO()                         */
4672
/************************************************************************/
4673
4674
CPLErr GDALDataset::TryOverviewRasterIO(
4675
    GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize,
4676
    void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
4677
    int nBandCount, const int *panBandMap, GSpacing nPixelSpace,
4678
    GSpacing nLineSpace, GSpacing nBandSpace, GDALRasterIOExtraArg *psExtraArg,
4679
    int *pbTried)
4680
0
{
4681
0
    int nXOffMod = nXOff;
4682
0
    int nYOffMod = nYOff;
4683
0
    int nXSizeMod = nXSize;
4684
0
    int nYSizeMod = nYSize;
4685
0
    GDALRasterIOExtraArg sExtraArg;
4686
0
    GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
4687
4688
0
    int iOvrLevel = GDALBandGetBestOverviewLevel2(
4689
0
        papoBands[0], nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, nBufXSize,
4690
0
        nBufYSize, &sExtraArg);
4691
4692
0
    if (iOvrLevel >= 0 && papoBands[0]->GetOverview(iOvrLevel) != nullptr &&
4693
0
        papoBands[0]->GetOverview(iOvrLevel)->GetDataset() != nullptr)
4694
0
    {
4695
0
        *pbTried = TRUE;
4696
0
        return papoBands[0]->GetOverview(iOvrLevel)->GetDataset()->RasterIO(
4697
0
            eRWFlag, nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, pData, nBufXSize,
4698
0
            nBufYSize, eBufType, nBandCount, panBandMap, nPixelSpace,
4699
0
            nLineSpace, nBandSpace, &sExtraArg);
4700
0
    }
4701
0
    else
4702
0
    {
4703
0
        *pbTried = FALSE;
4704
0
        return CE_None;
4705
0
    }
4706
0
}
4707
4708
/************************************************************************/
4709
/*                        GetBestOverviewLevel()                        */
4710
/*                                                                      */
4711
/* Returns the best overview level to satisfy the query or -1 if none   */
4712
/* Also updates nXOff, nYOff, nXSize, nYSize when returning a valid     */
4713
/* overview level                                                       */
4714
/************************************************************************/
4715
4716
static int GDALDatasetGetBestOverviewLevel(GDALDataset *poDS, int &nXOff,
4717
                                           int &nYOff, int &nXSize, int &nYSize,
4718
                                           int nBufXSize, int nBufYSize,
4719
                                           int nBandCount,
4720
                                           const int *panBandMap,
4721
                                           GDALRasterIOExtraArg *psExtraArg)
4722
0
{
4723
0
    int nOverviewCount = 0;
4724
0
    GDALRasterBand *poFirstBand = nullptr;
4725
4726
    /* -------------------------------------------------------------------- */
4727
    /* Check that all bands have the same number of overviews and           */
4728
    /* that they have all the same size and block dimensions                */
4729
    /* -------------------------------------------------------------------- */
4730
0
    for (int iBand = 0; iBand < nBandCount; iBand++)
4731
0
    {
4732
0
        GDALRasterBand *poBand = poDS->GetRasterBand(panBandMap[iBand]);
4733
0
        if (poBand == nullptr)
4734
0
            return -1;
4735
0
        if (iBand == 0)
4736
0
        {
4737
0
            poFirstBand = poBand;
4738
0
            nOverviewCount = poBand->GetOverviewCount();
4739
0
        }
4740
0
        else if (nOverviewCount != poBand->GetOverviewCount())
4741
0
        {
4742
0
            CPLDebug("GDAL", "GDALDataset::GetBestOverviewLevel() ... "
4743
0
                             "mismatched overview count, use std method.");
4744
0
            return -1;
4745
0
        }
4746
0
        else
4747
0
        {
4748
0
            for (int iOverview = 0; iOverview < nOverviewCount; iOverview++)
4749
0
            {
4750
0
                GDALRasterBand *poOvrBand = poBand->GetOverview(iOverview);
4751
0
                GDALRasterBand *poOvrFirstBand =
4752
0
                    poFirstBand->GetOverview(iOverview);
4753
0
                if (poOvrBand == nullptr || poOvrFirstBand == nullptr)
4754
0
                    continue;
4755
4756
0
                if (poOvrFirstBand->GetXSize() != poOvrBand->GetXSize() ||
4757
0
                    poOvrFirstBand->GetYSize() != poOvrBand->GetYSize())
4758
0
                {
4759
0
                    CPLDebug("GDAL",
4760
0
                             "GDALDataset::GetBestOverviewLevel() ... "
4761
0
                             "mismatched overview sizes, use std method.");
4762
0
                    return -1;
4763
0
                }
4764
0
                int nBlockXSizeFirst = 0;
4765
0
                int nBlockYSizeFirst = 0;
4766
0
                poOvrFirstBand->GetBlockSize(&nBlockXSizeFirst,
4767
0
                                             &nBlockYSizeFirst);
4768
4769
0
                int nBlockXSizeCurrent = 0;
4770
0
                int nBlockYSizeCurrent = 0;
4771
0
                poOvrBand->GetBlockSize(&nBlockXSizeCurrent,
4772
0
                                        &nBlockYSizeCurrent);
4773
4774
0
                if (nBlockXSizeFirst != nBlockXSizeCurrent ||
4775
0
                    nBlockYSizeFirst != nBlockYSizeCurrent)
4776
0
                {
4777
0
                    CPLDebug("GDAL", "GDALDataset::GetBestOverviewLevel() ... "
4778
0
                                     "mismatched block sizes, use std method.");
4779
0
                    return -1;
4780
0
                }
4781
0
            }
4782
0
        }
4783
0
    }
4784
0
    if (poFirstBand == nullptr)
4785
0
        return -1;
4786
4787
0
    return GDALBandGetBestOverviewLevel2(poFirstBand, nXOff, nYOff, nXSize,
4788
0
                                         nYSize, nBufXSize, nBufYSize,
4789
0
                                         psExtraArg);
4790
0
}
4791
4792
/************************************************************************/
4793
/*                         BlockBasedRasterIO()                         */
4794
/*                                                                      */
4795
/*      This convenience function implements a dataset level            */
4796
/*      RasterIO() interface based on calling down to fetch blocks,     */
4797
/*      much like the GDALRasterBand::IRasterIO(), but it handles       */
4798
/*      all bands at once, so that a format driver that handles a       */
4799
/*      request for different bands of the same block efficiently       */
4800
/*      (i.e. without re-reading interleaved data) will efficiently.    */
4801
/*                                                                      */
4802
/*      This method is intended to be called by an overridden           */
4803
/*      IRasterIO() method in the driver specific GDALDataset           */
4804
/*      derived class.                                                  */
4805
/*                                                                      */
4806
/*      Default internal implementation of RasterIO() ... utilizes      */
4807
/*      the Block access methods to satisfy the request.  This would    */
4808
/*      normally only be overridden by formats with overviews.          */
4809
/*                                                                      */
4810
/*      To keep things relatively simple, this method does not          */
4811
/*      currently take advantage of some special cases addressed in     */
4812
/*      GDALRasterBand::IRasterIO(), so it is likely best to only       */
4813
/*      call it when you know it will help.  That is in cases where     */
4814
/*      data is at 1:1 to the buffer, and you know the driver is        */
4815
/*      implementing interleaved IO efficiently on a block by block     */
4816
/*      basis. Overviews will be used when possible.                    */
4817
/************************************************************************/
4818
4819
CPLErr GDALDataset::BlockBasedRasterIO(
4820
    GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize,
4821
    void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
4822
    int nBandCount, const int *panBandMap, GSpacing nPixelSpace,
4823
    GSpacing nLineSpace, GSpacing nBandSpace, GDALRasterIOExtraArg *psExtraArg)
4824
4825
0
{
4826
0
    CPLAssert(nullptr != pData);
4827
4828
0
    GByte **papabySrcBlock = nullptr;
4829
0
    GDALRasterBlock *poBlock = nullptr;
4830
0
    GDALRasterBlock **papoBlocks = nullptr;
4831
0
    int nLBlockX = -1;
4832
0
    int nLBlockY = -1;
4833
0
    int iBufYOff;
4834
0
    int iBufXOff;
4835
0
    int nBlockXSize = 1;
4836
0
    int nBlockYSize = 1;
4837
0
    CPLErr eErr = CE_None;
4838
0
    GDALDataType eDataType = GDT_UInt8;
4839
4840
0
    const bool bUseIntegerRequestCoords =
4841
0
        (!psExtraArg->bFloatingPointWindowValidity ||
4842
0
         (nXOff == psExtraArg->dfXOff && nYOff == psExtraArg->dfYOff &&
4843
0
          nXSize == psExtraArg->dfXSize && nYSize == psExtraArg->dfYSize));
4844
4845
    /* -------------------------------------------------------------------- */
4846
    /*      Ensure that all bands share a common block size and data type.  */
4847
    /* -------------------------------------------------------------------- */
4848
0
    for (int iBand = 0; iBand < nBandCount; iBand++)
4849
0
    {
4850
0
        GDALRasterBand *poBand = GetRasterBand(panBandMap[iBand]);
4851
4852
0
        if (iBand == 0)
4853
0
        {
4854
0
            poBand->GetBlockSize(&nBlockXSize, &nBlockYSize);
4855
0
            eDataType = poBand->GetRasterDataType();
4856
0
        }
4857
0
        else
4858
0
        {
4859
0
            int nThisBlockXSize = 0;
4860
0
            int nThisBlockYSize = 0;
4861
0
            poBand->GetBlockSize(&nThisBlockXSize, &nThisBlockYSize);
4862
0
            if (nThisBlockXSize != nBlockXSize ||
4863
0
                nThisBlockYSize != nBlockYSize)
4864
0
            {
4865
0
                CPLDebug("GDAL", "GDALDataset::BlockBasedRasterIO() ... "
4866
0
                                 "mismatched block sizes, use std method.");
4867
0
                return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize,
4868
0
                                         pData, nBufXSize, nBufYSize, eBufType,
4869
0
                                         nBandCount, panBandMap, nPixelSpace,
4870
0
                                         nLineSpace, nBandSpace, psExtraArg);
4871
0
            }
4872
4873
0
            if (eDataType != poBand->GetRasterDataType() &&
4874
0
                (nXSize != nBufXSize || nYSize != nBufYSize))
4875
0
            {
4876
0
                CPLDebug("GDAL", "GDALDataset::BlockBasedRasterIO() ... "
4877
0
                                 "mismatched band data types, use std method.");
4878
0
                return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize,
4879
0
                                         pData, nBufXSize, nBufYSize, eBufType,
4880
0
                                         nBandCount, panBandMap, nPixelSpace,
4881
0
                                         nLineSpace, nBandSpace, psExtraArg);
4882
0
            }
4883
0
        }
4884
0
    }
4885
4886
    /* ==================================================================== */
4887
    /*      In this special case at full resolution we step through in      */
4888
    /*      blocks, turning the request over to the per-band                */
4889
    /*      IRasterIO(), but ensuring that all bands of one block are       */
4890
    /*      called before proceeding to the next.                           */
4891
    /* ==================================================================== */
4892
4893
0
    if (nXSize == nBufXSize && nYSize == nBufYSize && bUseIntegerRequestCoords)
4894
0
    {
4895
0
        GDALRasterIOExtraArg sDummyExtraArg;
4896
0
        INIT_RASTERIO_EXTRA_ARG(sDummyExtraArg);
4897
4898
0
        int nChunkYSize = 0;
4899
0
        int nChunkXSize = 0;
4900
4901
0
        for (iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff += nChunkYSize)
4902
0
        {
4903
0
            const int nChunkYOff = iBufYOff + nYOff;
4904
0
            nChunkYSize = nBlockYSize - (nChunkYOff % nBlockYSize);
4905
0
            if (nChunkYOff + nChunkYSize > nYOff + nYSize)
4906
0
                nChunkYSize = (nYOff + nYSize) - nChunkYOff;
4907
4908
0
            for (iBufXOff = 0; iBufXOff < nBufXSize; iBufXOff += nChunkXSize)
4909
0
            {
4910
0
                const int nChunkXOff = iBufXOff + nXOff;
4911
0
                nChunkXSize = nBlockXSize - (nChunkXOff % nBlockXSize);
4912
0
                if (nChunkXOff + nChunkXSize > nXOff + nXSize)
4913
0
                    nChunkXSize = (nXOff + nXSize) - nChunkXOff;
4914
4915
0
                GByte *pabyChunkData =
4916
0
                    static_cast<GByte *>(pData) + iBufXOff * nPixelSpace +
4917
0
                    static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace;
4918
4919
0
                for (int iBand = 0; iBand < nBandCount; iBand++)
4920
0
                {
4921
0
                    GDALRasterBand *poBand = GetRasterBand(panBandMap[iBand]);
4922
4923
0
                    eErr = poBand->IRasterIO(
4924
0
                        eRWFlag, nChunkXOff, nChunkYOff, nChunkXSize,
4925
0
                        nChunkYSize,
4926
0
                        pabyChunkData +
4927
0
                            static_cast<GPtrDiff_t>(iBand) * nBandSpace,
4928
0
                        nChunkXSize, nChunkYSize, eBufType, nPixelSpace,
4929
0
                        nLineSpace, &sDummyExtraArg);
4930
0
                    if (eErr != CE_None)
4931
0
                        return eErr;
4932
0
                }
4933
0
            }
4934
4935
0
            if (psExtraArg->pfnProgress != nullptr &&
4936
0
                !psExtraArg->pfnProgress(
4937
0
                    1.0 * std::min(nBufYSize, iBufYOff + nChunkYSize) /
4938
0
                        nBufYSize,
4939
0
                    "", psExtraArg->pProgressData))
4940
0
            {
4941
0
                return CE_Failure;
4942
0
            }
4943
0
        }
4944
4945
0
        return CE_None;
4946
0
    }
4947
4948
    /* Below code is not compatible with that case. It would need a complete */
4949
    /* separate code like done in GDALRasterBand::IRasterIO. */
4950
0
    if (eRWFlag == GF_Write && (nBufXSize < nXSize || nBufYSize < nYSize))
4951
0
    {
4952
0
        return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize, pData,
4953
0
                                 nBufXSize, nBufYSize, eBufType, nBandCount,
4954
0
                                 panBandMap, nPixelSpace, nLineSpace,
4955
0
                                 nBandSpace, psExtraArg);
4956
0
    }
4957
4958
    /* We could have a smarter implementation, but that will do for now */
4959
0
    if (psExtraArg->eResampleAlg != GRIORA_NearestNeighbour &&
4960
0
        (nBufXSize != nXSize || nBufYSize != nYSize))
4961
0
    {
4962
0
        return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize, pData,
4963
0
                                 nBufXSize, nBufYSize, eBufType, nBandCount,
4964
0
                                 panBandMap, nPixelSpace, nLineSpace,
4965
0
                                 nBandSpace, psExtraArg);
4966
0
    }
4967
4968
    /* ==================================================================== */
4969
    /*      Loop reading required source blocks to satisfy output           */
4970
    /*      request.  This is the most general implementation.              */
4971
    /* ==================================================================== */
4972
4973
0
    const int nBandDataSize = GDALGetDataTypeSizeBytes(eDataType);
4974
4975
0
    papabySrcBlock =
4976
0
        static_cast<GByte **>(CPLCalloc(sizeof(GByte *), nBandCount));
4977
0
    papoBlocks =
4978
0
        static_cast<GDALRasterBlock **>(CPLCalloc(sizeof(void *), nBandCount));
4979
4980
    /* -------------------------------------------------------------------- */
4981
    /*      Select an overview level if appropriate.                        */
4982
    /* -------------------------------------------------------------------- */
4983
4984
0
    GDALRasterIOExtraArg sExtraArg;
4985
0
    GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
4986
0
    const int nOverviewLevel = GDALDatasetGetBestOverviewLevel(
4987
0
        this, nXOff, nYOff, nXSize, nYSize, nBufXSize, nBufYSize, nBandCount,
4988
0
        panBandMap, &sExtraArg);
4989
0
    if (nOverviewLevel >= 0)
4990
0
    {
4991
0
        GetRasterBand(panBandMap[0])
4992
0
            ->GetOverview(nOverviewLevel)
4993
0
            ->GetBlockSize(&nBlockXSize, &nBlockYSize);
4994
0
    }
4995
4996
0
    double dfXOff = nXOff;
4997
0
    double dfYOff = nYOff;
4998
0
    double dfXSize = nXSize;
4999
0
    double dfYSize = nYSize;
5000
0
    if (sExtraArg.bFloatingPointWindowValidity)
5001
0
    {
5002
0
        dfXOff = sExtraArg.dfXOff;
5003
0
        dfYOff = sExtraArg.dfYOff;
5004
0
        dfXSize = sExtraArg.dfXSize;
5005
0
        dfYSize = sExtraArg.dfYSize;
5006
0
    }
5007
5008
    /* -------------------------------------------------------------------- */
5009
    /*      Compute stepping increment.                                     */
5010
    /* -------------------------------------------------------------------- */
5011
0
    const double dfSrcXInc = dfXSize / static_cast<double>(nBufXSize);
5012
0
    const double dfSrcYInc = dfYSize / static_cast<double>(nBufYSize);
5013
5014
0
    constexpr double EPS = 1e-10;
5015
    /* -------------------------------------------------------------------- */
5016
    /*      Loop over buffer computing source locations.                    */
5017
    /* -------------------------------------------------------------------- */
5018
0
    for (iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff++)
5019
0
    {
5020
0
        GPtrDiff_t iSrcOffset;
5021
5022
        // Add small epsilon to avoid some numeric precision issues.
5023
0
        const double dfSrcY = (iBufYOff + 0.5) * dfSrcYInc + dfYOff + EPS;
5024
0
        const int iSrcY = static_cast<int>(std::min(
5025
0
            std::max(0.0, dfSrcY), static_cast<double>(nRasterYSize - 1)));
5026
5027
0
        GPtrDiff_t iBufOffset = static_cast<GPtrDiff_t>(iBufYOff) *
5028
0
                                static_cast<GPtrDiff_t>(nLineSpace);
5029
5030
0
        for (iBufXOff = 0; iBufXOff < nBufXSize; iBufXOff++)
5031
0
        {
5032
0
            const double dfSrcX = (iBufXOff + 0.5) * dfSrcXInc + dfXOff + EPS;
5033
0
            const int iSrcX = static_cast<int>(std::min(
5034
0
                std::max(0.0, dfSrcX), static_cast<double>(nRasterXSize - 1)));
5035
5036
            // FIXME: this code likely doesn't work if the dirty block gets
5037
            // flushed to disk before being completely written. In the meantime,
5038
            // bJustInitialize should probably be set to FALSE even if it is not
5039
            // ideal performance wise, and for lossy compression
5040
5041
            /* --------------------------------------------------------------------
5042
             */
5043
            /*      Ensure we have the appropriate block loaded. */
5044
            /* --------------------------------------------------------------------
5045
             */
5046
0
            if (iSrcX < nLBlockX * nBlockXSize ||
5047
0
                iSrcX - nBlockXSize >= nLBlockX * nBlockXSize ||
5048
0
                iSrcY < nLBlockY * nBlockYSize ||
5049
0
                iSrcY - nBlockYSize >= nLBlockY * nBlockYSize)
5050
0
            {
5051
0
                nLBlockX = iSrcX / nBlockXSize;
5052
0
                nLBlockY = iSrcY / nBlockYSize;
5053
5054
0
                const bool bJustInitialize =
5055
0
                    eRWFlag == GF_Write && nYOff <= nLBlockY * nBlockYSize &&
5056
0
                    nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize &&
5057
0
                    nXOff <= nLBlockX * nBlockXSize &&
5058
0
                    nXOff + nXSize - nBlockXSize >= nLBlockX * nBlockXSize;
5059
                /*bool bMemZeroBuffer = FALSE;
5060
                if( eRWFlag == GF_Write && !bJustInitialize &&
5061
                    nXOff <= nLBlockX * nBlockXSize &&
5062
                    nYOff <= nLBlockY * nBlockYSize &&
5063
                    (nXOff + nXSize >= (nLBlockX+1) * nBlockXSize ||
5064
                     (nXOff + nXSize == GetRasterXSize() &&
5065
                     (nLBlockX+1) * nBlockXSize > GetRasterXSize())) &&
5066
                    (nYOff + nYSize >= (nLBlockY+1) * nBlockYSize ||
5067
                     (nYOff + nYSize == GetRasterYSize() &&
5068
                     (nLBlockY+1) * nBlockYSize > GetRasterYSize())) )
5069
                {
5070
                    bJustInitialize = TRUE;
5071
                    bMemZeroBuffer = TRUE;
5072
                }*/
5073
0
                for (int iBand = 0; iBand < nBandCount; iBand++)
5074
0
                {
5075
0
                    GDALRasterBand *poBand = GetRasterBand(panBandMap[iBand]);
5076
0
                    if (nOverviewLevel >= 0)
5077
0
                        poBand = poBand->GetOverview(nOverviewLevel);
5078
0
                    poBlock = poBand->GetLockedBlockRef(nLBlockX, nLBlockY,
5079
0
                                                        bJustInitialize);
5080
0
                    if (poBlock == nullptr)
5081
0
                    {
5082
0
                        eErr = CE_Failure;
5083
0
                        goto CleanupAndReturn;
5084
0
                    }
5085
5086
0
                    if (eRWFlag == GF_Write)
5087
0
                        poBlock->MarkDirty();
5088
5089
0
                    if (papoBlocks[iBand] != nullptr)
5090
0
                        papoBlocks[iBand]->DropLock();
5091
5092
0
                    papoBlocks[iBand] = poBlock;
5093
5094
0
                    papabySrcBlock[iBand] =
5095
0
                        static_cast<GByte *>(poBlock->GetDataRef());
5096
                    /*if( bMemZeroBuffer )
5097
                    {
5098
                        memset(papabySrcBlock[iBand], 0,
5099
                            static_cast<GPtrDiff_t>(nBandDataSize) * nBlockXSize
5100
                    * nBlockYSize);
5101
                    }*/
5102
0
                }
5103
0
            }
5104
5105
            /* --------------------------------------------------------------------
5106
             */
5107
            /*      Copy over this pixel of data. */
5108
            /* --------------------------------------------------------------------
5109
             */
5110
0
            iSrcOffset = (static_cast<GPtrDiff_t>(iSrcX) -
5111
0
                          static_cast<GPtrDiff_t>(nLBlockX) * nBlockXSize +
5112
0
                          (static_cast<GPtrDiff_t>(iSrcY) -
5113
0
                           static_cast<GPtrDiff_t>(nLBlockY) * nBlockYSize) *
5114
0
                              nBlockXSize) *
5115
0
                         nBandDataSize;
5116
5117
0
            for (int iBand = 0; iBand < nBandCount; iBand++)
5118
0
            {
5119
0
                GByte *pabySrcBlock = papabySrcBlock[iBand];
5120
0
                GPtrDiff_t iBandBufOffset =
5121
0
                    iBufOffset + static_cast<GPtrDiff_t>(iBand) *
5122
0
                                     static_cast<GPtrDiff_t>(nBandSpace);
5123
5124
0
                if (eDataType == eBufType)
5125
0
                {
5126
0
                    if (eRWFlag == GF_Read)
5127
0
                        memcpy(static_cast<GByte *>(pData) + iBandBufOffset,
5128
0
                               pabySrcBlock + iSrcOffset, nBandDataSize);
5129
0
                    else
5130
0
                        memcpy(pabySrcBlock + iSrcOffset,
5131
0
                               static_cast<const GByte *>(pData) +
5132
0
                                   iBandBufOffset,
5133
0
                               nBandDataSize);
5134
0
                }
5135
0
                else
5136
0
                {
5137
                    /* type to type conversion ... ouch, this is expensive way
5138
                       of handling single words */
5139
5140
0
                    if (eRWFlag == GF_Read)
5141
0
                        GDALCopyWords64(pabySrcBlock + iSrcOffset, eDataType, 0,
5142
0
                                        static_cast<GByte *>(pData) +
5143
0
                                            iBandBufOffset,
5144
0
                                        eBufType, 0, 1);
5145
0
                    else
5146
0
                        GDALCopyWords64(static_cast<const GByte *>(pData) +
5147
0
                                            iBandBufOffset,
5148
0
                                        eBufType, 0, pabySrcBlock + iSrcOffset,
5149
0
                                        eDataType, 0, 1);
5150
0
                }
5151
0
            }
5152
5153
0
            iBufOffset += static_cast<int>(nPixelSpace);
5154
0
        }
5155
0
    }
5156
5157
    /* -------------------------------------------------------------------- */
5158
    /*      CleanupAndReturn.                                               */
5159
    /* -------------------------------------------------------------------- */
5160
0
CleanupAndReturn:
5161
0
    CPLFree(papabySrcBlock);
5162
0
    if (papoBlocks != nullptr)
5163
0
    {
5164
0
        for (int iBand = 0; iBand < nBandCount; iBand++)
5165
0
        {
5166
0
            if (papoBlocks[iBand] != nullptr)
5167
0
                papoBlocks[iBand]->DropLock();
5168
0
        }
5169
0
        CPLFree(papoBlocks);
5170
0
    }
5171
5172
0
    return eErr;
5173
0
}
5174
5175
//! @endcond
5176
5177
/************************************************************************/
5178
/*                  GDALCopyWholeRasterGetSwathSize()                   */
5179
/************************************************************************/
5180
5181
static void GDALCopyWholeRasterGetSwathSize(GDALRasterBand *poSrcPrototypeBand,
5182
                                            GDALRasterBand *poDstPrototypeBand,
5183
                                            int nBandCount,
5184
                                            int bDstIsCompressed,
5185
                                            int bInterleave, int *pnSwathCols,
5186
                                            int *pnSwathLines)
5187
0
{
5188
0
    GDALDataType eDT = poDstPrototypeBand->GetRasterDataType();
5189
0
    int nSrcBlockXSize = 0;
5190
0
    int nSrcBlockYSize = 0;
5191
0
    int nBlockXSize = 0;
5192
0
    int nBlockYSize = 0;
5193
5194
0
    int nXSize = poSrcPrototypeBand->GetXSize();
5195
0
    int nYSize = poSrcPrototypeBand->GetYSize();
5196
5197
0
    poSrcPrototypeBand->GetBlockSize(&nSrcBlockXSize, &nSrcBlockYSize);
5198
0
    poDstPrototypeBand->GetBlockSize(&nBlockXSize, &nBlockYSize);
5199
5200
0
    const int nMaxBlockXSize = std::max(nBlockXSize, nSrcBlockXSize);
5201
0
    const int nMaxBlockYSize = std::max(nBlockYSize, nSrcBlockYSize);
5202
5203
0
    int nPixelSize = GDALGetDataTypeSizeBytes(eDT);
5204
0
    if (bInterleave)
5205
0
        nPixelSize *= nBandCount;
5206
5207
    // aim for one row of blocks.  Do not settle for less.
5208
0
    int nSwathCols = nXSize;
5209
0
    int nSwathLines = nMaxBlockYSize;
5210
5211
0
    const char *pszSrcCompression =
5212
0
        poSrcPrototypeBand->GetMetadataItem("COMPRESSION", "IMAGE_STRUCTURE");
5213
0
    if (pszSrcCompression == nullptr)
5214
0
    {
5215
0
        auto poSrcDS = poSrcPrototypeBand->GetDataset();
5216
0
        if (poSrcDS)
5217
0
            pszSrcCompression =
5218
0
                poSrcDS->GetMetadataItem("COMPRESSION", "IMAGE_STRUCTURE");
5219
0
    }
5220
5221
    /* -------------------------------------------------------------------- */
5222
    /*      What will our swath size be?                                    */
5223
    /* -------------------------------------------------------------------- */
5224
    // When writing interleaved data in a compressed format, we want to be sure
5225
    // that each block will only be written once, so the swath size must not be
5226
    // greater than the block cache.
5227
0
    const char *pszSwathSize = CPLGetConfigOption("GDAL_SWATH_SIZE", nullptr);
5228
0
    int nTargetSwathSize;
5229
0
    if (pszSwathSize != nullptr)
5230
0
        nTargetSwathSize = static_cast<int>(
5231
0
            std::min(GIntBig(INT_MAX), CPLAtoGIntBig(pszSwathSize)));
5232
0
    else
5233
0
    {
5234
        // As a default, take one 1/4 of the cache size.
5235
0
        nTargetSwathSize = static_cast<int>(
5236
0
            std::min(GIntBig(INT_MAX), GDALGetCacheMax64() / 4));
5237
5238
        // but if the minimum idal swath buf size is less, then go for it to
5239
        // avoid unnecessarily abusing RAM usage.
5240
        // but try to use 10 MB at least.
5241
0
        GIntBig nIdealSwathBufSize =
5242
0
            static_cast<GIntBig>(nSwathCols) * nSwathLines * nPixelSize;
5243
0
        int nMinTargetSwathSize = 10 * 1000 * 1000;
5244
5245
0
        if ((poSrcPrototypeBand->GetSuggestedBlockAccessPattern() &
5246
0
             GSBAP_LARGEST_CHUNK_POSSIBLE) != 0)
5247
0
        {
5248
0
            nMinTargetSwathSize = nTargetSwathSize;
5249
0
        }
5250
5251
0
        if (nIdealSwathBufSize < nTargetSwathSize &&
5252
0
            nIdealSwathBufSize < nMinTargetSwathSize)
5253
0
        {
5254
0
            nIdealSwathBufSize = nMinTargetSwathSize;
5255
0
        }
5256
5257
0
        if (pszSrcCompression != nullptr &&
5258
0
            EQUAL(pszSrcCompression, "JPEG2000") &&
5259
0
            (!bDstIsCompressed || ((nSrcBlockXSize % nBlockXSize) == 0 &&
5260
0
                                   (nSrcBlockYSize % nBlockYSize) == 0)))
5261
0
        {
5262
0
            nIdealSwathBufSize =
5263
0
                std::max(nIdealSwathBufSize, static_cast<GIntBig>(nSwathCols) *
5264
0
                                                 nSrcBlockYSize * nPixelSize);
5265
0
        }
5266
0
        if (nTargetSwathSize > nIdealSwathBufSize)
5267
0
            nTargetSwathSize = static_cast<int>(
5268
0
                std::min(GIntBig(INT_MAX), nIdealSwathBufSize));
5269
0
    }
5270
5271
0
    if (nTargetSwathSize < 1000000)
5272
0
        nTargetSwathSize = 1000000;
5273
5274
    /* But let's check that  */
5275
0
    if (bDstIsCompressed && bInterleave &&
5276
0
        nTargetSwathSize > GDALGetCacheMax64())
5277
0
    {
5278
0
        CPLError(CE_Warning, CPLE_AppDefined,
5279
0
                 "When translating into a compressed interleave format, "
5280
0
                 "the block cache size (" CPL_FRMT_GIB ") "
5281
0
                 "should be at least the size of the swath (%d) "
5282
0
                 "(GDAL_SWATH_SIZE config. option)",
5283
0
                 GDALGetCacheMax64(), nTargetSwathSize);
5284
0
    }
5285
5286
0
#define IS_DIVIDER_OF(x, y) ((y) % (x) == 0)
5287
0
#define ROUND_TO(x, y) (((x) / (y)) * (y))
5288
5289
    // if both input and output datasets are tiled, that the tile dimensions
5290
    // are "compatible", try to stick  to a swath dimension that is a multiple
5291
    // of input and output block dimensions.
5292
0
    if (nBlockXSize != nXSize && nSrcBlockXSize != nXSize &&
5293
0
        IS_DIVIDER_OF(nBlockXSize, nMaxBlockXSize) &&
5294
0
        IS_DIVIDER_OF(nSrcBlockXSize, nMaxBlockXSize) &&
5295
0
        IS_DIVIDER_OF(nBlockYSize, nMaxBlockYSize) &&
5296
0
        IS_DIVIDER_OF(nSrcBlockYSize, nMaxBlockYSize))
5297
0
    {
5298
0
        if (static_cast<GIntBig>(nMaxBlockXSize) * nMaxBlockYSize *
5299
0
                nPixelSize <=
5300
0
            static_cast<GIntBig>(nTargetSwathSize))
5301
0
        {
5302
0
            nSwathCols = nTargetSwathSize / (nMaxBlockYSize * nPixelSize);
5303
0
            nSwathCols = ROUND_TO(nSwathCols, nMaxBlockXSize);
5304
0
            if (nSwathCols == 0)
5305
0
                nSwathCols = nMaxBlockXSize;
5306
0
            if (nSwathCols > nXSize)
5307
0
                nSwathCols = nXSize;
5308
0
            nSwathLines = nMaxBlockYSize;
5309
5310
0
            if (static_cast<GIntBig>(nSwathCols) * nSwathLines * nPixelSize >
5311
0
                static_cast<GIntBig>(nTargetSwathSize))
5312
0
            {
5313
0
                nSwathCols = nXSize;
5314
0
                nSwathLines = nBlockYSize;
5315
0
            }
5316
0
        }
5317
0
    }
5318
5319
0
    const GIntBig nMemoryPerCol = static_cast<GIntBig>(nSwathCols) * nPixelSize;
5320
0
    const GIntBig nSwathBufSize = nMemoryPerCol * nSwathLines;
5321
0
    if (nSwathBufSize > static_cast<GIntBig>(nTargetSwathSize))
5322
0
    {
5323
0
        nSwathLines = static_cast<int>(nTargetSwathSize / nMemoryPerCol);
5324
0
        if (nSwathLines == 0)
5325
0
            nSwathLines = 1;
5326
5327
0
        CPLDebug(
5328
0
            "GDAL",
5329
0
            "GDALCopyWholeRasterGetSwathSize(): adjusting to %d line swath "
5330
0
            "since requirement (" CPL_FRMT_GIB " bytes) exceed target swath "
5331
0
            "size (%d bytes) (GDAL_SWATH_SIZE config. option)",
5332
0
            nSwathLines, nBlockYSize * nMemoryPerCol, nTargetSwathSize);
5333
0
    }
5334
    // If we are processing single scans, try to handle several at once.
5335
    // If we are handling swaths already, only grow the swath if a row
5336
    // of blocks is substantially less than our target buffer size.
5337
0
    else if (nSwathLines == 1 ||
5338
0
             nMemoryPerCol * nSwathLines <
5339
0
                 static_cast<GIntBig>(nTargetSwathSize) / 10)
5340
0
    {
5341
0
        nSwathLines = std::min(
5342
0
            nYSize,
5343
0
            std::max(1, static_cast<int>(nTargetSwathSize / nMemoryPerCol)));
5344
5345
        /* If possible try to align to source and target block height */
5346
0
        if ((nSwathLines % nMaxBlockYSize) != 0 &&
5347
0
            nSwathLines > nMaxBlockYSize &&
5348
0
            IS_DIVIDER_OF(nBlockYSize, nMaxBlockYSize) &&
5349
0
            IS_DIVIDER_OF(nSrcBlockYSize, nMaxBlockYSize))
5350
0
            nSwathLines = ROUND_TO(nSwathLines, nMaxBlockYSize);
5351
0
    }
5352
5353
0
    if (pszSrcCompression != nullptr && EQUAL(pszSrcCompression, "JPEG2000") &&
5354
0
        (!bDstIsCompressed || (IS_DIVIDER_OF(nBlockXSize, nSrcBlockXSize) &&
5355
0
                               IS_DIVIDER_OF(nBlockYSize, nSrcBlockYSize))))
5356
0
    {
5357
        // Typical use case: converting from Pleaiades that is 2048x2048 tiled.
5358
0
        if (nSwathLines < nSrcBlockYSize)
5359
0
        {
5360
0
            nSwathLines = nSrcBlockYSize;
5361
5362
            // Number of pixels that can be read/write simultaneously.
5363
0
            nSwathCols = nTargetSwathSize / (nSrcBlockXSize * nPixelSize);
5364
0
            nSwathCols = ROUND_TO(nSwathCols, nSrcBlockXSize);
5365
0
            if (nSwathCols == 0)
5366
0
                nSwathCols = nSrcBlockXSize;
5367
0
            if (nSwathCols > nXSize)
5368
0
                nSwathCols = nXSize;
5369
5370
0
            CPLDebug(
5371
0
                "GDAL",
5372
0
                "GDALCopyWholeRasterGetSwathSize(): because of compression and "
5373
0
                "too high block, "
5374
0
                "use partial width at one time");
5375
0
        }
5376
0
        else if ((nSwathLines % nSrcBlockYSize) != 0)
5377
0
        {
5378
            /* Round on a multiple of nSrcBlockYSize */
5379
0
            nSwathLines = ROUND_TO(nSwathLines, nSrcBlockYSize);
5380
0
            CPLDebug(
5381
0
                "GDAL",
5382
0
                "GDALCopyWholeRasterGetSwathSize(): because of compression, "
5383
0
                "round nSwathLines to block height : %d",
5384
0
                nSwathLines);
5385
0
        }
5386
0
    }
5387
0
    else if (bDstIsCompressed)
5388
0
    {
5389
0
        if (nSwathLines < nBlockYSize)
5390
0
        {
5391
0
            nSwathLines = nBlockYSize;
5392
5393
            // Number of pixels that can be read/write simultaneously.
5394
0
            nSwathCols = nTargetSwathSize / (nSwathLines * nPixelSize);
5395
0
            nSwathCols = ROUND_TO(nSwathCols, nBlockXSize);
5396
0
            if (nSwathCols == 0)
5397
0
                nSwathCols = nBlockXSize;
5398
0
            if (nSwathCols > nXSize)
5399
0
                nSwathCols = nXSize;
5400
5401
0
            CPLDebug(
5402
0
                "GDAL",
5403
0
                "GDALCopyWholeRasterGetSwathSize(): because of compression and "
5404
0
                "too high block, "
5405
0
                "use partial width at one time");
5406
0
        }
5407
0
        else if ((nSwathLines % nBlockYSize) != 0)
5408
0
        {
5409
            // Round on a multiple of nBlockYSize.
5410
0
            nSwathLines = ROUND_TO(nSwathLines, nBlockYSize);
5411
0
            CPLDebug(
5412
0
                "GDAL",
5413
0
                "GDALCopyWholeRasterGetSwathSize(): because of compression, "
5414
0
                "round nSwathLines to block height : %d",
5415
0
                nSwathLines);
5416
0
        }
5417
0
    }
5418
5419
0
    *pnSwathCols = nSwathCols;
5420
0
    *pnSwathLines = nSwathLines;
5421
0
}
5422
5423
/************************************************************************/
5424
/*                     GDALDatasetCopyWholeRaster()                     */
5425
/************************************************************************/
5426
5427
/**
5428
 * \brief Copy all dataset raster data.
5429
 *
5430
 * This function copies the complete raster contents of one dataset to
5431
 * another similarly configured dataset.  The source and destination
5432
 * dataset must have the same number of bands, and the same width
5433
 * and height.  The bands do not have to have the same data type.
5434
 *
5435
 * This function is primarily intended to support implementation of
5436
 * driver specific CreateCopy() functions.  It implements efficient copying,
5437
 * in particular "chunking" the copy in substantial blocks and, if appropriate,
5438
 * performing the transfer in a pixel interleaved fashion.
5439
 *
5440
 * Currently the only papszOptions value supported are :
5441
 * <ul>
5442
 * <li>"INTERLEAVE=PIXEL/BAND" to force pixel (resp. band) interleaved read and
5443
 * write access pattern (this does not modify the layout of the destination
5444
 * data)</li>
5445
 * <li>"COMPRESSED=YES" to force alignment on target dataset block
5446
 * sizes to achieve best compression.</li>
5447
 * <li>"SKIP_HOLES=YES" to skip chunks
5448
 * for which GDALGetDataCoverageStatus() returns GDAL_DATA_COVERAGE_STATUS_EMPTY
5449
 * (GDAL &gt;= 2.2)</li>
5450
 * </ul>
5451
 * More options may be supported in the future.
5452
 *
5453
 * @param hSrcDS the source dataset
5454
 * @param hDstDS the destination dataset
5455
 * @param papszOptions transfer hints in "StringList" Name=Value format.
5456
 * @param pfnProgress progress reporting function.
5457
 * @param pProgressData callback data for progress function.
5458
 *
5459
 * @return CE_None on success, or CE_Failure on failure.
5460
 */
5461
5462
CPLErr CPL_STDCALL GDALDatasetCopyWholeRaster(GDALDatasetH hSrcDS,
5463
                                              GDALDatasetH hDstDS,
5464
                                              CSLConstList papszOptions,
5465
                                              GDALProgressFunc pfnProgress,
5466
                                              void *pProgressData)
5467
5468
0
{
5469
0
    VALIDATE_POINTER1(hSrcDS, "GDALDatasetCopyWholeRaster", CE_Failure);
5470
0
    VALIDATE_POINTER1(hDstDS, "GDALDatasetCopyWholeRaster", CE_Failure);
5471
5472
0
    GDALDataset *poSrcDS = GDALDataset::FromHandle(hSrcDS);
5473
0
    GDALDataset *poDstDS = GDALDataset::FromHandle(hDstDS);
5474
5475
0
    if (pfnProgress == nullptr)
5476
0
        pfnProgress = GDALDummyProgress;
5477
5478
    /* -------------------------------------------------------------------- */
5479
    /*      Confirm the datasets match in size and band counts.             */
5480
    /* -------------------------------------------------------------------- */
5481
0
    const int nXSize = poDstDS->GetRasterXSize();
5482
0
    const int nYSize = poDstDS->GetRasterYSize();
5483
0
    const int nBandCount = poDstDS->GetRasterCount();
5484
5485
0
    if (poSrcDS->GetRasterXSize() != nXSize ||
5486
0
        poSrcDS->GetRasterYSize() != nYSize ||
5487
0
        poSrcDS->GetRasterCount() != nBandCount)
5488
0
    {
5489
0
        CPLError(CE_Failure, CPLE_AppDefined,
5490
0
                 "Input and output dataset sizes or band counts do not\n"
5491
0
                 "match in GDALDatasetCopyWholeRaster()");
5492
0
        return CE_Failure;
5493
0
    }
5494
5495
    /* -------------------------------------------------------------------- */
5496
    /*      Report preliminary (0) progress.                                */
5497
    /* -------------------------------------------------------------------- */
5498
0
    if (!pfnProgress(0.0, nullptr, pProgressData))
5499
0
    {
5500
0
        CPLError(CE_Failure, CPLE_UserInterrupt,
5501
0
                 "User terminated CreateCopy()");
5502
0
        return CE_Failure;
5503
0
    }
5504
5505
    /* -------------------------------------------------------------------- */
5506
    /*      Get our prototype band, and assume the others are similarly     */
5507
    /*      configured.                                                     */
5508
    /* -------------------------------------------------------------------- */
5509
0
    if (nBandCount == 0)
5510
0
        return CE_None;
5511
5512
0
    GDALRasterBand *poSrcPrototypeBand = poSrcDS->GetRasterBand(1);
5513
0
    GDALRasterBand *poDstPrototypeBand = poDstDS->GetRasterBand(1);
5514
0
    GDALDataType eDT = poDstPrototypeBand->GetRasterDataType();
5515
5516
    /* -------------------------------------------------------------------- */
5517
    /*      Do we want to try and do the operation in a pixel               */
5518
    /*      interleaved fashion?                                            */
5519
    /* -------------------------------------------------------------------- */
5520
0
    bool bInterleave = false;
5521
0
    const char *pszInterleave =
5522
0
        poSrcDS->GetMetadataItem("INTERLEAVE", "IMAGE_STRUCTURE");
5523
0
    if (pszInterleave != nullptr &&
5524
0
        (EQUAL(pszInterleave, "PIXEL") || EQUAL(pszInterleave, "LINE")))
5525
0
        bInterleave = true;
5526
5527
0
    pszInterleave = poDstDS->GetMetadataItem("INTERLEAVE", "IMAGE_STRUCTURE");
5528
0
    if (pszInterleave != nullptr &&
5529
0
        (EQUAL(pszInterleave, "PIXEL") || EQUAL(pszInterleave, "LINE")))
5530
0
        bInterleave = true;
5531
5532
0
    pszInterleave = CSLFetchNameValue(papszOptions, "INTERLEAVE");
5533
0
    if (pszInterleave != nullptr && EQUAL(pszInterleave, "PIXEL"))
5534
0
        bInterleave = true;
5535
0
    else if (pszInterleave != nullptr && EQUAL(pszInterleave, "BAND"))
5536
0
        bInterleave = false;
5537
    // attributes is specific to the TileDB driver
5538
0
    else if (pszInterleave != nullptr && EQUAL(pszInterleave, "ATTRIBUTES"))
5539
0
        bInterleave = true;
5540
0
    else if (pszInterleave != nullptr)
5541
0
    {
5542
0
        CPLError(CE_Warning, CPLE_NotSupported,
5543
0
                 "Unsupported value for option INTERLEAVE");
5544
0
    }
5545
5546
    // If the destination is compressed, we must try to write blocks just once,
5547
    // to save disk space (GTiff case for example), and to avoid data loss
5548
    // (JPEG compression for example).
5549
0
    bool bDstIsCompressed = false;
5550
0
    const char *pszDstCompressed =
5551
0
        CSLFetchNameValue(papszOptions, "COMPRESSED");
5552
0
    if (pszDstCompressed != nullptr && CPLTestBool(pszDstCompressed))
5553
0
        bDstIsCompressed = true;
5554
5555
    /* -------------------------------------------------------------------- */
5556
    /*      What will our swath size be?                                    */
5557
    /* -------------------------------------------------------------------- */
5558
5559
0
    int nSwathCols = 0;
5560
0
    int nSwathLines = 0;
5561
0
    GDALCopyWholeRasterGetSwathSize(poSrcPrototypeBand, poDstPrototypeBand,
5562
0
                                    nBandCount, bDstIsCompressed, bInterleave,
5563
0
                                    &nSwathCols, &nSwathLines);
5564
5565
0
    int nPixelSize = GDALGetDataTypeSizeBytes(eDT);
5566
0
    if (bInterleave)
5567
0
        nPixelSize *= nBandCount;
5568
5569
0
    void *pSwathBuf = VSI_MALLOC3_VERBOSE(nSwathCols, nSwathLines, nPixelSize);
5570
0
    if (pSwathBuf == nullptr)
5571
0
    {
5572
0
        return CE_Failure;
5573
0
    }
5574
5575
0
    CPLDebug("GDAL",
5576
0
             "GDALDatasetCopyWholeRaster(): %d*%d swaths, bInterleave=%d",
5577
0
             nSwathCols, nSwathLines, static_cast<int>(bInterleave));
5578
5579
    // Advise the source raster that we are going to read it completely
5580
    // Note: this might already have been done by GDALCreateCopy() in the
5581
    // likely case this function is indirectly called by it
5582
0
    poSrcDS->AdviseRead(0, 0, nXSize, nYSize, nXSize, nYSize, eDT, nBandCount,
5583
0
                        nullptr, nullptr);
5584
5585
    /* ==================================================================== */
5586
    /*      Band oriented (uninterleaved) case.                             */
5587
    /* ==================================================================== */
5588
0
    CPLErr eErr = CE_None;
5589
0
    const bool bCheckHoles =
5590
0
        CPLTestBool(CSLFetchNameValueDef(papszOptions, "SKIP_HOLES", "NO"));
5591
5592
0
    if (!bInterleave)
5593
0
    {
5594
0
        GDALRasterIOExtraArg sExtraArg;
5595
0
        INIT_RASTERIO_EXTRA_ARG(sExtraArg);
5596
0
        CPL_IGNORE_RET_VAL(sExtraArg.pfnProgress);  // to make cppcheck happy
5597
5598
0
        const GIntBig nTotalBlocks = static_cast<GIntBig>(nBandCount) *
5599
0
                                     DIV_ROUND_UP(nYSize, nSwathLines) *
5600
0
                                     DIV_ROUND_UP(nXSize, nSwathCols);
5601
0
        GIntBig nBlocksDone = 0;
5602
5603
0
        for (int iBand = 0; iBand < nBandCount && eErr == CE_None; iBand++)
5604
0
        {
5605
0
            int nBand = iBand + 1;
5606
5607
0
            for (int iY = 0; iY < nYSize && eErr == CE_None; iY += nSwathLines)
5608
0
            {
5609
0
                int nThisLines = nSwathLines;
5610
5611
0
                if (iY + nThisLines > nYSize)
5612
0
                    nThisLines = nYSize - iY;
5613
5614
0
                for (int iX = 0; iX < nXSize && eErr == CE_None;
5615
0
                     iX += nSwathCols)
5616
0
                {
5617
0
                    int nThisCols = nSwathCols;
5618
5619
0
                    if (iX + nThisCols > nXSize)
5620
0
                        nThisCols = nXSize - iX;
5621
5622
0
                    int nStatus = GDAL_DATA_COVERAGE_STATUS_DATA;
5623
0
                    if (bCheckHoles)
5624
0
                    {
5625
0
                        nStatus = poSrcDS->GetRasterBand(nBand)
5626
0
                                      ->GetDataCoverageStatus(
5627
0
                                          iX, iY, nThisCols, nThisLines,
5628
0
                                          GDAL_DATA_COVERAGE_STATUS_DATA);
5629
0
                    }
5630
0
                    if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA)
5631
0
                    {
5632
0
                        sExtraArg.pfnProgress = GDALScaledProgress;
5633
0
                        sExtraArg.pProgressData = GDALCreateScaledProgress(
5634
0
                            nBlocksDone / static_cast<double>(nTotalBlocks),
5635
0
                            (nBlocksDone + 0.5) /
5636
0
                                static_cast<double>(nTotalBlocks),
5637
0
                            pfnProgress, pProgressData);
5638
0
                        if (sExtraArg.pProgressData == nullptr)
5639
0
                            sExtraArg.pfnProgress = nullptr;
5640
5641
0
                        eErr = poSrcDS->RasterIO(GF_Read, iX, iY, nThisCols,
5642
0
                                                 nThisLines, pSwathBuf,
5643
0
                                                 nThisCols, nThisLines, eDT, 1,
5644
0
                                                 &nBand, 0, 0, 0, &sExtraArg);
5645
5646
0
                        GDALDestroyScaledProgress(sExtraArg.pProgressData);
5647
5648
0
                        if (eErr == CE_None)
5649
0
                            eErr = poDstDS->RasterIO(
5650
0
                                GF_Write, iX, iY, nThisCols, nThisLines,
5651
0
                                pSwathBuf, nThisCols, nThisLines, eDT, 1,
5652
0
                                &nBand, 0, 0, 0, nullptr);
5653
0
                    }
5654
5655
0
                    nBlocksDone++;
5656
0
                    if (eErr == CE_None &&
5657
0
                        !pfnProgress(nBlocksDone /
5658
0
                                         static_cast<double>(nTotalBlocks),
5659
0
                                     nullptr, pProgressData))
5660
0
                    {
5661
0
                        eErr = CE_Failure;
5662
0
                        CPLError(CE_Failure, CPLE_UserInterrupt,
5663
0
                                 "User terminated CreateCopy()");
5664
0
                    }
5665
0
                }
5666
0
            }
5667
0
        }
5668
0
    }
5669
5670
    /* ==================================================================== */
5671
    /*      Pixel interleaved case.                                         */
5672
    /* ==================================================================== */
5673
0
    else /* if( bInterleave ) */
5674
0
    {
5675
0
        GDALRasterIOExtraArg sExtraArg;
5676
0
        INIT_RASTERIO_EXTRA_ARG(sExtraArg);
5677
0
        CPL_IGNORE_RET_VAL(sExtraArg.pfnProgress);  // to make cppcheck happy
5678
5679
0
        const GIntBig nTotalBlocks =
5680
0
            static_cast<GIntBig>(DIV_ROUND_UP(nYSize, nSwathLines)) *
5681
0
            DIV_ROUND_UP(nXSize, nSwathCols);
5682
0
        GIntBig nBlocksDone = 0;
5683
5684
0
        for (int iY = 0; iY < nYSize && eErr == CE_None; iY += nSwathLines)
5685
0
        {
5686
0
            int nThisLines = nSwathLines;
5687
5688
0
            if (iY + nThisLines > nYSize)
5689
0
                nThisLines = nYSize - iY;
5690
5691
0
            for (int iX = 0; iX < nXSize && eErr == CE_None; iX += nSwathCols)
5692
0
            {
5693
0
                int nThisCols = nSwathCols;
5694
5695
0
                if (iX + nThisCols > nXSize)
5696
0
                    nThisCols = nXSize - iX;
5697
5698
0
                int nStatus = GDAL_DATA_COVERAGE_STATUS_DATA;
5699
0
                if (bCheckHoles)
5700
0
                {
5701
0
                    nStatus = 0;
5702
0
                    for (int iBand = 0; iBand < nBandCount; iBand++)
5703
0
                    {
5704
0
                        nStatus |= poSrcDS->GetRasterBand(iBand + 1)
5705
0
                                       ->GetDataCoverageStatus(
5706
0
                                           iX, iY, nThisCols, nThisLines,
5707
0
                                           GDAL_DATA_COVERAGE_STATUS_DATA);
5708
0
                        if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA)
5709
0
                            break;
5710
0
                    }
5711
0
                }
5712
0
                if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA)
5713
0
                {
5714
0
                    sExtraArg.pfnProgress = GDALScaledProgress;
5715
0
                    sExtraArg.pProgressData = GDALCreateScaledProgress(
5716
0
                        nBlocksDone / static_cast<double>(nTotalBlocks),
5717
0
                        (nBlocksDone + 0.5) / static_cast<double>(nTotalBlocks),
5718
0
                        pfnProgress, pProgressData);
5719
0
                    if (sExtraArg.pProgressData == nullptr)
5720
0
                        sExtraArg.pfnProgress = nullptr;
5721
5722
0
                    eErr = poSrcDS->RasterIO(GF_Read, iX, iY, nThisCols,
5723
0
                                             nThisLines, pSwathBuf, nThisCols,
5724
0
                                             nThisLines, eDT, nBandCount,
5725
0
                                             nullptr, 0, 0, 0, &sExtraArg);
5726
5727
0
                    GDALDestroyScaledProgress(sExtraArg.pProgressData);
5728
5729
0
                    if (eErr == CE_None)
5730
0
                        eErr = poDstDS->RasterIO(
5731
0
                            GF_Write, iX, iY, nThisCols, nThisLines, pSwathBuf,
5732
0
                            nThisCols, nThisLines, eDT, nBandCount, nullptr, 0,
5733
0
                            0, 0, nullptr);
5734
0
                }
5735
5736
0
                nBlocksDone++;
5737
0
                if (eErr == CE_None &&
5738
0
                    !pfnProgress(nBlocksDone /
5739
0
                                     static_cast<double>(nTotalBlocks),
5740
0
                                 nullptr, pProgressData))
5741
0
                {
5742
0
                    eErr = CE_Failure;
5743
0
                    CPLError(CE_Failure, CPLE_UserInterrupt,
5744
0
                             "User terminated CreateCopy()");
5745
0
                }
5746
0
            }
5747
0
        }
5748
0
    }
5749
5750
    /* -------------------------------------------------------------------- */
5751
    /*      Cleanup                                                         */
5752
    /* -------------------------------------------------------------------- */
5753
0
    CPLFree(pSwathBuf);
5754
5755
0
    return eErr;
5756
0
}
5757
5758
/************************************************************************/
5759
/*                   GDALRasterBandCopyWholeRaster()                    */
5760
/************************************************************************/
5761
5762
/**
5763
 * \brief Copy a whole raster band
5764
 *
5765
 * This function copies the complete raster contents of one band to
5766
 * another similarly configured band.  The source and destination
5767
 * bands must have the same width and height.  The bands do not have
5768
 * to have the same data type.
5769
 *
5770
 * It implements efficient copying, in particular "chunking" the copy in
5771
 * substantial blocks.
5772
 *
5773
 * Currently the only papszOptions value supported are :
5774
 * <ul>
5775
 * <li>"COMPRESSED=YES" to force alignment on target dataset block sizes to
5776
 * achieve best compression.</li>
5777
 * <li>"SKIP_HOLES=YES" to skip chunks for which GDALGetDataCoverageStatus()
5778
 * returns GDAL_DATA_COVERAGE_STATUS_EMPTY (GDAL &gt;= 2.2)</li>
5779
 * </ul>
5780
 *
5781
 * @param hSrcBand the source band
5782
 * @param hDstBand the destination band
5783
 * @param papszOptions transfer hints in "StringList" Name=Value format.
5784
 * @param pfnProgress progress reporting function.
5785
 * @param pProgressData callback data for progress function.
5786
 *
5787
 * @return CE_None on success, or CE_Failure on failure.
5788
 */
5789
5790
CPLErr CPL_STDCALL GDALRasterBandCopyWholeRaster(
5791
    GDALRasterBandH hSrcBand, GDALRasterBandH hDstBand,
5792
    const char *const *const papszOptions, GDALProgressFunc pfnProgress,
5793
    void *pProgressData)
5794
5795
0
{
5796
0
    VALIDATE_POINTER1(hSrcBand, "GDALRasterBandCopyWholeRaster", CE_Failure);
5797
0
    VALIDATE_POINTER1(hDstBand, "GDALRasterBandCopyWholeRaster", CE_Failure);
5798
5799
0
    GDALRasterBand *poSrcBand = GDALRasterBand::FromHandle(hSrcBand);
5800
0
    GDALRasterBand *poDstBand = GDALRasterBand::FromHandle(hDstBand);
5801
0
    CPLErr eErr = CE_None;
5802
5803
0
    if (pfnProgress == nullptr)
5804
0
        pfnProgress = GDALDummyProgress;
5805
5806
    /* -------------------------------------------------------------------- */
5807
    /*      Confirm the datasets match in size and band counts.             */
5808
    /* -------------------------------------------------------------------- */
5809
0
    int nXSize = poSrcBand->GetXSize();
5810
0
    int nYSize = poSrcBand->GetYSize();
5811
5812
0
    if (poDstBand->GetXSize() != nXSize || poDstBand->GetYSize() != nYSize)
5813
0
    {
5814
0
        CPLError(CE_Failure, CPLE_AppDefined,
5815
0
                 "Input and output band sizes do not\n"
5816
0
                 "match in GDALRasterBandCopyWholeRaster()");
5817
0
        return CE_Failure;
5818
0
    }
5819
5820
    /* -------------------------------------------------------------------- */
5821
    /*      Report preliminary (0) progress.                                */
5822
    /* -------------------------------------------------------------------- */
5823
0
    if (!pfnProgress(0.0, nullptr, pProgressData))
5824
0
    {
5825
0
        CPLError(CE_Failure, CPLE_UserInterrupt,
5826
0
                 "User terminated CreateCopy()");
5827
0
        return CE_Failure;
5828
0
    }
5829
5830
0
    GDALDataType eDT = poDstBand->GetRasterDataType();
5831
5832
    // If the destination is compressed, we must try to write blocks just once,
5833
    // to save disk space (GTiff case for example), and to avoid data loss
5834
    // (JPEG compression for example).
5835
0
    bool bDstIsCompressed = false;
5836
0
    const char *pszDstCompressed =
5837
0
        CSLFetchNameValue(const_cast<char **>(papszOptions), "COMPRESSED");
5838
0
    if (pszDstCompressed != nullptr && CPLTestBool(pszDstCompressed))
5839
0
        bDstIsCompressed = true;
5840
5841
    /* -------------------------------------------------------------------- */
5842
    /*      What will our swath size be?                                    */
5843
    /* -------------------------------------------------------------------- */
5844
5845
0
    int nSwathCols = 0;
5846
0
    int nSwathLines = 0;
5847
0
    GDALCopyWholeRasterGetSwathSize(poSrcBand, poDstBand, 1, bDstIsCompressed,
5848
0
                                    FALSE, &nSwathCols, &nSwathLines);
5849
5850
0
    const int nPixelSize = GDALGetDataTypeSizeBytes(eDT);
5851
5852
0
    void *pSwathBuf = VSI_MALLOC3_VERBOSE(nSwathCols, nSwathLines, nPixelSize);
5853
0
    if (pSwathBuf == nullptr)
5854
0
    {
5855
0
        return CE_Failure;
5856
0
    }
5857
5858
0
    CPLDebug("GDAL", "GDALRasterBandCopyWholeRaster(): %d*%d swaths",
5859
0
             nSwathCols, nSwathLines);
5860
5861
0
    const bool bCheckHoles =
5862
0
        CPLTestBool(CSLFetchNameValueDef(papszOptions, "SKIP_HOLES", "NO"));
5863
5864
    // Advise the source raster that we are going to read it completely
5865
0
    poSrcBand->AdviseRead(0, 0, nXSize, nYSize, nXSize, nYSize, eDT, nullptr);
5866
5867
    /* ==================================================================== */
5868
    /*      Band oriented (uninterleaved) case.                             */
5869
    /* ==================================================================== */
5870
5871
0
    for (int iY = 0; iY < nYSize && eErr == CE_None; iY += nSwathLines)
5872
0
    {
5873
0
        int nThisLines = nSwathLines;
5874
5875
0
        if (iY + nThisLines > nYSize)
5876
0
            nThisLines = nYSize - iY;
5877
5878
0
        for (int iX = 0; iX < nXSize && eErr == CE_None; iX += nSwathCols)
5879
0
        {
5880
0
            int nThisCols = nSwathCols;
5881
5882
0
            if (iX + nThisCols > nXSize)
5883
0
                nThisCols = nXSize - iX;
5884
5885
0
            int nStatus = GDAL_DATA_COVERAGE_STATUS_DATA;
5886
0
            if (bCheckHoles)
5887
0
            {
5888
0
                nStatus = poSrcBand->GetDataCoverageStatus(
5889
0
                    iX, iY, nThisCols, nThisLines,
5890
0
                    GDAL_DATA_COVERAGE_STATUS_DATA);
5891
0
            }
5892
0
            if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA)
5893
0
            {
5894
0
                eErr = poSrcBand->RasterIO(GF_Read, iX, iY, nThisCols,
5895
0
                                           nThisLines, pSwathBuf, nThisCols,
5896
0
                                           nThisLines, eDT, 0, 0, nullptr);
5897
5898
0
                if (eErr == CE_None)
5899
0
                    eErr = poDstBand->RasterIO(GF_Write, iX, iY, nThisCols,
5900
0
                                               nThisLines, pSwathBuf, nThisCols,
5901
0
                                               nThisLines, eDT, 0, 0, nullptr);
5902
0
            }
5903
5904
0
            if (eErr == CE_None && !pfnProgress(double(iY + nThisLines) /
5905
0
                                                    static_cast<double>(nYSize),
5906
0
                                                nullptr, pProgressData))
5907
0
            {
5908
0
                eErr = CE_Failure;
5909
0
                CPLError(CE_Failure, CPLE_UserInterrupt,
5910
0
                         "User terminated CreateCopy()");
5911
0
            }
5912
0
        }
5913
0
    }
5914
5915
    /* -------------------------------------------------------------------- */
5916
    /*      Cleanup                                                         */
5917
    /* -------------------------------------------------------------------- */
5918
0
    CPLFree(pSwathBuf);
5919
5920
0
    return eErr;
5921
0
}
5922
5923
/************************************************************************/
5924
/*                     GDALCopyRasterIOExtraArg ()                      */
5925
/************************************************************************/
5926
5927
void GDALCopyRasterIOExtraArg(GDALRasterIOExtraArg *psDestArg,
5928
                              const GDALRasterIOExtraArg *psSrcArg)
5929
0
{
5930
0
    INIT_RASTERIO_EXTRA_ARG(*psDestArg);
5931
0
    if (psSrcArg)
5932
0
    {
5933
0
        psDestArg->eResampleAlg = psSrcArg->eResampleAlg;
5934
0
        psDestArg->pfnProgress = psSrcArg->pfnProgress;
5935
0
        psDestArg->pProgressData = psSrcArg->pProgressData;
5936
0
        psDestArg->bFloatingPointWindowValidity =
5937
0
            psSrcArg->bFloatingPointWindowValidity;
5938
0
        if (psSrcArg->bFloatingPointWindowValidity)
5939
0
        {
5940
0
            psDestArg->dfXOff = psSrcArg->dfXOff;
5941
0
            psDestArg->dfYOff = psSrcArg->dfYOff;
5942
0
            psDestArg->dfXSize = psSrcArg->dfXSize;
5943
0
            psDestArg->dfYSize = psSrcArg->dfYSize;
5944
0
        }
5945
0
        if (psSrcArg->nVersion >= 2)
5946
0
        {
5947
0
            psDestArg->bUseOnlyThisScale = psSrcArg->bUseOnlyThisScale;
5948
0
        }
5949
0
        if (psSrcArg->nVersion >= 3)
5950
0
        {
5951
0
            psDestArg->bOperateInBufType = psSrcArg->bOperateInBufType;
5952
0
        }
5953
0
    }
5954
0
}
5955
5956
/************************************************************************/
5957
/*                           HasOnlyNoData()                            */
5958
/************************************************************************/
5959
5960
template <class T> static inline bool IsEqualToNoData(T value, T noDataValue)
5961
0
{
5962
0
    return value == noDataValue;
5963
0
}
Unexecuted instantiation: rasterio.cpp:bool IsEqualToNoData<unsigned char>(unsigned char, unsigned char)
Unexecuted instantiation: rasterio.cpp:bool IsEqualToNoData<unsigned short>(unsigned short, unsigned short)
Unexecuted instantiation: rasterio.cpp:bool IsEqualToNoData<unsigned int>(unsigned int, unsigned int)
Unexecuted instantiation: rasterio.cpp:bool IsEqualToNoData<unsigned long>(unsigned long, unsigned long)
5964
5965
template <> bool IsEqualToNoData<GFloat16>(GFloat16 value, GFloat16 noDataValue)
5966
0
{
5967
0
    using std::isnan;
5968
0
    return isnan(noDataValue) ? isnan(value) : value == noDataValue;
5969
0
}
5970
5971
template <> bool IsEqualToNoData<float>(float value, float noDataValue)
5972
0
{
5973
0
    return std::isnan(noDataValue) ? std::isnan(value) : value == noDataValue;
5974
0
}
5975
5976
template <> bool IsEqualToNoData<double>(double value, double noDataValue)
5977
0
{
5978
0
    return std::isnan(noDataValue) ? std::isnan(value) : value == noDataValue;
5979
0
}
5980
5981
template <class T>
5982
static bool HasOnlyNoDataT(const T *pBuffer, T noDataValue, size_t nWidth,
5983
                           size_t nHeight, size_t nLineStride,
5984
                           size_t nComponents)
5985
0
{
5986
    // Fast test: check the 4 corners and the middle pixel.
5987
0
    for (size_t iBand = 0; iBand < nComponents; iBand++)
5988
0
    {
5989
0
        if (!(IsEqualToNoData(pBuffer[iBand], noDataValue) &&
5990
0
              IsEqualToNoData(pBuffer[(nWidth - 1) * nComponents + iBand],
5991
0
                              noDataValue) &&
5992
0
              IsEqualToNoData(
5993
0
                  pBuffer[((nHeight - 1) / 2 * nLineStride + (nWidth - 1) / 2) *
5994
0
                              nComponents +
5995
0
                          iBand],
5996
0
                  noDataValue) &&
5997
0
              IsEqualToNoData(
5998
0
                  pBuffer[(nHeight - 1) * nLineStride * nComponents + iBand],
5999
0
                  noDataValue) &&
6000
0
              IsEqualToNoData(
6001
0
                  pBuffer[((nHeight - 1) * nLineStride + nWidth - 1) *
6002
0
                              nComponents +
6003
0
                          iBand],
6004
0
                  noDataValue)))
6005
0
        {
6006
0
            return false;
6007
0
        }
6008
0
    }
6009
6010
    // Test all pixels.
6011
0
    for (size_t iY = 0; iY < nHeight; iY++)
6012
0
    {
6013
0
        const T *pBufferLine = pBuffer + iY * nLineStride * nComponents;
6014
0
        for (size_t iX = 0; iX < nWidth * nComponents; iX++)
6015
0
        {
6016
0
            if (!IsEqualToNoData(pBufferLine[iX], noDataValue))
6017
0
            {
6018
0
                return false;
6019
0
            }
6020
0
        }
6021
0
    }
6022
0
    return true;
6023
0
}
Unexecuted instantiation: rasterio.cpp:bool HasOnlyNoDataT<unsigned char>(unsigned char const*, unsigned char, unsigned long, unsigned long, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:bool HasOnlyNoDataT<unsigned short>(unsigned short const*, unsigned short, unsigned long, unsigned long, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:bool HasOnlyNoDataT<unsigned int>(unsigned int const*, unsigned int, unsigned long, unsigned long, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:bool HasOnlyNoDataT<unsigned long>(unsigned long const*, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:bool HasOnlyNoDataT<cpl::Float16>(cpl::Float16 const*, cpl::Float16, unsigned long, unsigned long, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:bool HasOnlyNoDataT<float>(float const*, float, unsigned long, unsigned long, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:bool HasOnlyNoDataT<double>(double const*, double, unsigned long, unsigned long, unsigned long, unsigned long)
6024
6025
/************************************************************************/
6026
/*                      GDALBufferHasOnlyNoData()                       */
6027
/************************************************************************/
6028
6029
bool GDALBufferHasOnlyNoData(const void *pBuffer, double dfNoDataValue,
6030
                             size_t nWidth, size_t nHeight, size_t nLineStride,
6031
                             size_t nComponents, int nBitsPerSample,
6032
                             GDALBufferSampleFormat nSampleFormat)
6033
0
{
6034
    // In the case where the nodata is 0, we can compare several bytes at
6035
    // once. Select the largest natural integer type for the architecture.
6036
0
    if (dfNoDataValue == 0.0 && nWidth == nLineStride &&
6037
        // Do not use this optimized code path for floating point numbers,
6038
        // as it can't detect negative zero.
6039
0
        nSampleFormat != GSF_FLOATING_POINT)
6040
0
    {
6041
0
        const GByte *pabyBuffer = static_cast<const GByte *>(pBuffer);
6042
0
        const size_t nSize =
6043
0
            static_cast<size_t>((static_cast<uint64_t>(nWidth) * nHeight *
6044
0
                                     nComponents * nBitsPerSample +
6045
0
                                 7) /
6046
0
                                8);
6047
0
#ifdef HAVE_SSE2
6048
0
        size_t n = nSize;
6049
        // Align to 16 bytes
6050
0
        while ((reinterpret_cast<uintptr_t>(pabyBuffer) & 15) != 0 && n > 0)
6051
0
        {
6052
0
            --n;
6053
0
            if (*pabyBuffer)
6054
0
                return false;
6055
0
            pabyBuffer++;
6056
0
        }
6057
6058
0
        const auto zero = _mm_setzero_si128();
6059
0
        constexpr int UNROLLING = 4;
6060
0
        while (n >= UNROLLING * sizeof(zero))
6061
0
        {
6062
0
            const auto v0 = _mm_load_si128(reinterpret_cast<const __m128i *>(
6063
0
                pabyBuffer + 0 * sizeof(zero)));
6064
0
            const auto v1 = _mm_load_si128(reinterpret_cast<const __m128i *>(
6065
0
                pabyBuffer + 1 * sizeof(zero)));
6066
0
            const auto v2 = _mm_load_si128(reinterpret_cast<const __m128i *>(
6067
0
                pabyBuffer + 2 * sizeof(zero)));
6068
0
            const auto v3 = _mm_load_si128(reinterpret_cast<const __m128i *>(
6069
0
                pabyBuffer + 3 * sizeof(zero)));
6070
0
            const auto v =
6071
0
                _mm_or_si128(_mm_or_si128(v0, v1), _mm_or_si128(v2, v3));
6072
#if defined(__SSE4_1__) || defined(USE_NEON_OPTIMIZATIONS)
6073
            if (!_mm_test_all_zeros(v, v))
6074
#else
6075
0
            if (_mm_movemask_epi8(_mm_cmpeq_epi8(v, zero)) != 0xFFFF)
6076
0
#endif
6077
0
            {
6078
0
                return false;
6079
0
            }
6080
0
            pabyBuffer += UNROLLING * sizeof(zero);
6081
0
            n -= UNROLLING * sizeof(zero);
6082
0
        }
6083
6084
0
        while (n > 0)
6085
0
        {
6086
0
            --n;
6087
0
            if (*pabyBuffer)
6088
0
                return false;
6089
0
            pabyBuffer++;
6090
0
        }
6091
#else
6092
#if SIZEOF_VOIDP >= 8 || defined(__x86_64__)
6093
        // We test __x86_64__ for x32 arch where SIZEOF_VOIDP == 4
6094
        typedef std::uint64_t WordType;
6095
#else
6096
        typedef std::uint32_t WordType;
6097
#endif
6098
6099
        const size_t nInitialIters =
6100
            std::min(sizeof(WordType) -
6101
                         static_cast<size_t>(
6102
                             reinterpret_cast<std::uintptr_t>(pabyBuffer) %
6103
                             sizeof(WordType)),
6104
                     nSize);
6105
        size_t i = 0;
6106
        for (; i < nInitialIters; i++)
6107
        {
6108
            if (pabyBuffer[i])
6109
                return false;
6110
        }
6111
        for (; i + sizeof(WordType) - 1 < nSize; i += sizeof(WordType))
6112
        {
6113
            if (*(reinterpret_cast<const WordType *>(pabyBuffer + i)))
6114
                return false;
6115
        }
6116
        for (; i < nSize; i++)
6117
        {
6118
            if (pabyBuffer[i])
6119
                return false;
6120
        }
6121
#endif
6122
0
        return true;
6123
0
    }
6124
6125
0
#ifdef HAVE_SSE2
6126
0
    else if (dfNoDataValue == 0.0 && nWidth == nLineStride &&
6127
0
             nBitsPerSample == 32 && nSampleFormat == GSF_FLOATING_POINT)
6128
0
    {
6129
0
        const auto signMask = _mm_set1_epi32(0x7FFFFFFF);
6130
0
        const auto zero = _mm_setzero_si128();
6131
0
        const GByte *pabyBuffer = static_cast<const GByte *>(pBuffer);
6132
0
        const size_t n = nWidth * nHeight * nComponents;
6133
6134
0
        size_t i = 0;
6135
0
        constexpr int UNROLLING = 4;
6136
0
        constexpr size_t VALUES_PER_ITER =
6137
0
            UNROLLING * sizeof(zero) / sizeof(float);
6138
0
        for (; i + VALUES_PER_ITER <= n; i += VALUES_PER_ITER)
6139
0
        {
6140
0
            const auto v0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(
6141
0
                pabyBuffer + 0 * sizeof(zero)));
6142
0
            const auto v1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(
6143
0
                pabyBuffer + 1 * sizeof(zero)));
6144
0
            const auto v2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(
6145
0
                pabyBuffer + 2 * sizeof(zero)));
6146
0
            const auto v3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(
6147
0
                pabyBuffer + 3 * sizeof(zero)));
6148
0
            auto v = _mm_or_si128(_mm_or_si128(v0, v1), _mm_or_si128(v2, v3));
6149
            // Clear the sign bit (makes -0.0 become +0.0)
6150
0
            v = _mm_and_si128(v, signMask);
6151
#if defined(__SSE4_1__) || defined(USE_NEON_OPTIMIZATIONS)
6152
            if (!_mm_test_all_zeros(v, v))
6153
#else
6154
0
            if (_mm_movemask_epi8(_mm_cmpeq_epi8(v, zero)) != 0xFFFF)
6155
0
#endif
6156
0
            {
6157
0
                return false;
6158
0
            }
6159
0
            pabyBuffer += UNROLLING * sizeof(zero);
6160
0
        }
6161
6162
0
        for (; i < n; i++)
6163
0
        {
6164
0
            uint32_t bits;
6165
0
            memcpy(&bits, pabyBuffer, sizeof(bits));
6166
0
            pabyBuffer += sizeof(bits);
6167
0
            if ((bits & 0x7FFFFFFF) != 0)
6168
0
                return false;
6169
0
        }
6170
6171
0
        return true;
6172
0
    }
6173
6174
0
    else if (dfNoDataValue == 0.0 && nWidth == nLineStride &&
6175
0
             nBitsPerSample == 64 && nSampleFormat == GSF_FLOATING_POINT)
6176
0
    {
6177
0
        const auto signMask = _mm_set1_epi64x(0x7FFFFFFFFFFFFFFFLL);
6178
0
        const auto zero = _mm_setzero_si128();
6179
0
        const GByte *pabyBuffer = static_cast<const GByte *>(pBuffer);
6180
0
        const size_t n = nWidth * nHeight * nComponents;
6181
6182
0
        size_t i = 0;
6183
0
        constexpr int UNROLLING = 4;
6184
0
        constexpr size_t VALUES_PER_ITER =
6185
0
            UNROLLING * sizeof(zero) / sizeof(double);
6186
0
        for (; i + VALUES_PER_ITER <= n; i += VALUES_PER_ITER)
6187
0
        {
6188
0
            const auto v0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(
6189
0
                pabyBuffer + 0 * sizeof(zero)));
6190
0
            const auto v1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(
6191
0
                pabyBuffer + 1 * sizeof(zero)));
6192
0
            const auto v2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(
6193
0
                pabyBuffer + 2 * sizeof(zero)));
6194
0
            const auto v3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(
6195
0
                pabyBuffer + 3 * sizeof(zero)));
6196
0
            auto v = _mm_or_si128(_mm_or_si128(v0, v1), _mm_or_si128(v2, v3));
6197
            // Clear the sign bit (makes -0.0 become +0.0)
6198
0
            v = _mm_and_si128(v, signMask);
6199
#if defined(__SSE4_1__) || defined(USE_NEON_OPTIMIZATIONS)
6200
            if (!_mm_test_all_zeros(v, v))
6201
#else
6202
0
            if (_mm_movemask_epi8(_mm_cmpeq_epi8(v, zero)) != 0xFFFF)
6203
0
#endif
6204
0
            {
6205
0
                return false;
6206
0
            }
6207
0
            pabyBuffer += UNROLLING * sizeof(zero);
6208
0
        }
6209
6210
0
        for (; i < n; i++)
6211
0
        {
6212
0
            uint64_t bits;
6213
0
            memcpy(&bits, pabyBuffer, sizeof(bits));
6214
0
            pabyBuffer += sizeof(bits);
6215
0
            if ((bits & 0x7FFFFFFFFFFFFFFFULL) != 0)
6216
0
                return false;
6217
0
        }
6218
6219
0
        return true;
6220
0
    }
6221
0
#endif
6222
6223
0
    if (nBitsPerSample == 8 && nSampleFormat == GSF_UNSIGNED_INT)
6224
0
    {
6225
0
        return GDALIsValueInRange<uint8_t>(dfNoDataValue) &&
6226
0
               HasOnlyNoDataT(static_cast<const uint8_t *>(pBuffer),
6227
0
                              static_cast<uint8_t>(dfNoDataValue), nWidth,
6228
0
                              nHeight, nLineStride, nComponents);
6229
0
    }
6230
0
    if (nBitsPerSample == 8 && nSampleFormat == GSF_SIGNED_INT)
6231
0
    {
6232
        // Use unsigned implementation by converting the nodatavalue to
6233
        // unsigned
6234
0
        return GDALIsValueInRange<int8_t>(dfNoDataValue) &&
6235
0
               HasOnlyNoDataT(
6236
0
                   static_cast<const uint8_t *>(pBuffer),
6237
0
                   static_cast<uint8_t>(static_cast<int8_t>(dfNoDataValue)),
6238
0
                   nWidth, nHeight, nLineStride, nComponents);
6239
0
    }
6240
0
    if (nBitsPerSample == 16 && nSampleFormat == GSF_UNSIGNED_INT)
6241
0
    {
6242
0
        return GDALIsValueInRange<uint16_t>(dfNoDataValue) &&
6243
0
               HasOnlyNoDataT(static_cast<const uint16_t *>(pBuffer),
6244
0
                              static_cast<uint16_t>(dfNoDataValue), nWidth,
6245
0
                              nHeight, nLineStride, nComponents);
6246
0
    }
6247
0
    if (nBitsPerSample == 16 && nSampleFormat == GSF_SIGNED_INT)
6248
0
    {
6249
        // Use unsigned implementation by converting the nodatavalue to
6250
        // unsigned
6251
0
        return GDALIsValueInRange<int16_t>(dfNoDataValue) &&
6252
0
               HasOnlyNoDataT(
6253
0
                   static_cast<const uint16_t *>(pBuffer),
6254
0
                   static_cast<uint16_t>(static_cast<int16_t>(dfNoDataValue)),
6255
0
                   nWidth, nHeight, nLineStride, nComponents);
6256
0
    }
6257
0
    if (nBitsPerSample == 32 && nSampleFormat == GSF_UNSIGNED_INT)
6258
0
    {
6259
0
        return GDALIsValueInRange<uint32_t>(dfNoDataValue) &&
6260
0
               HasOnlyNoDataT(static_cast<const uint32_t *>(pBuffer),
6261
0
                              static_cast<uint32_t>(dfNoDataValue), nWidth,
6262
0
                              nHeight, nLineStride, nComponents);
6263
0
    }
6264
0
    if (nBitsPerSample == 32 && nSampleFormat == GSF_SIGNED_INT)
6265
0
    {
6266
        // Use unsigned implementation by converting the nodatavalue to
6267
        // unsigned
6268
0
        return GDALIsValueInRange<int32_t>(dfNoDataValue) &&
6269
0
               HasOnlyNoDataT(
6270
0
                   static_cast<const uint32_t *>(pBuffer),
6271
0
                   static_cast<uint32_t>(static_cast<int32_t>(dfNoDataValue)),
6272
0
                   nWidth, nHeight, nLineStride, nComponents);
6273
0
    }
6274
0
    if (nBitsPerSample == 64 && nSampleFormat == GSF_UNSIGNED_INT)
6275
0
    {
6276
0
        return GDALIsValueInRange<uint64_t>(dfNoDataValue) &&
6277
0
               HasOnlyNoDataT(static_cast<const uint64_t *>(pBuffer),
6278
0
                              static_cast<uint64_t>(dfNoDataValue), nWidth,
6279
0
                              nHeight, nLineStride, nComponents);
6280
0
    }
6281
0
    if (nBitsPerSample == 64 && nSampleFormat == GSF_SIGNED_INT)
6282
0
    {
6283
        // Use unsigned implementation by converting the nodatavalue to
6284
        // unsigned
6285
0
        return GDALIsValueInRange<int64_t>(dfNoDataValue) &&
6286
0
               HasOnlyNoDataT(
6287
0
                   static_cast<const uint64_t *>(pBuffer),
6288
0
                   static_cast<uint64_t>(static_cast<int64_t>(dfNoDataValue)),
6289
0
                   nWidth, nHeight, nLineStride, nComponents);
6290
0
    }
6291
0
    if (nBitsPerSample == 16 && nSampleFormat == GSF_FLOATING_POINT)
6292
0
    {
6293
0
        return (std::isnan(dfNoDataValue) ||
6294
0
                GDALIsValueInRange<GFloat16>(dfNoDataValue)) &&
6295
0
               HasOnlyNoDataT(static_cast<const GFloat16 *>(pBuffer),
6296
0
                              static_cast<GFloat16>(dfNoDataValue), nWidth,
6297
0
                              nHeight, nLineStride, nComponents);
6298
0
    }
6299
0
    if (nBitsPerSample == 32 && nSampleFormat == GSF_FLOATING_POINT)
6300
0
    {
6301
0
        return (std::isnan(dfNoDataValue) ||
6302
0
                GDALIsValueInRange<float>(dfNoDataValue)) &&
6303
0
               HasOnlyNoDataT(static_cast<const float *>(pBuffer),
6304
0
                              static_cast<float>(dfNoDataValue), nWidth,
6305
0
                              nHeight, nLineStride, nComponents);
6306
0
    }
6307
0
    if (nBitsPerSample == 64 && nSampleFormat == GSF_FLOATING_POINT)
6308
0
    {
6309
0
        return HasOnlyNoDataT(static_cast<const double *>(pBuffer),
6310
0
                              dfNoDataValue, nWidth, nHeight, nLineStride,
6311
0
                              nComponents);
6312
0
    }
6313
0
    return false;
6314
0
}
6315
6316
#ifdef HAVE_SSE2
6317
6318
/************************************************************************/
6319
/*                       GDALDeinterleave3Byte()                        */
6320
/************************************************************************/
6321
6322
#if defined(__GNUC__) && !defined(__clang__)
6323
__attribute__((optimize("no-tree-vectorize")))
6324
#endif
6325
static void GDALDeinterleave3Byte(const GByte *CPL_RESTRICT pabySrc,
6326
                                  GByte *CPL_RESTRICT pabyDest0,
6327
                                  GByte *CPL_RESTRICT pabyDest1,
6328
                                  GByte *CPL_RESTRICT pabyDest2, size_t nIters)
6329
#ifdef USE_NEON_OPTIMIZATIONS
6330
{
6331
    return GDALDeinterleave3Byte_SSSE3(pabySrc, pabyDest0, pabyDest1, pabyDest2,
6332
                                       nIters);
6333
}
6334
#else
6335
0
{
6336
0
#ifdef HAVE_SSSE3_AT_COMPILE_TIME
6337
0
    if (CPLHaveRuntimeSSSE3())
6338
0
    {
6339
0
        return GDALDeinterleave3Byte_SSSE3(pabySrc, pabyDest0, pabyDest1,
6340
0
                                           pabyDest2, nIters);
6341
0
    }
6342
0
#endif
6343
6344
0
    size_t i = 0;
6345
0
    if (((reinterpret_cast<uintptr_t>(pabySrc) |
6346
0
          reinterpret_cast<uintptr_t>(pabyDest0) |
6347
0
          reinterpret_cast<uintptr_t>(pabyDest1) |
6348
0
          reinterpret_cast<uintptr_t>(pabyDest2)) %
6349
0
         sizeof(unsigned int)) == 0)
6350
0
    {
6351
        // Slightly better than GCC autovectorizer
6352
0
        for (size_t j = 0; i + 3 < nIters; i += 4, ++j)
6353
0
        {
6354
0
            unsigned int word0 =
6355
0
                *reinterpret_cast<const unsigned int *>(pabySrc + 3 * i);
6356
0
            unsigned int word1 =
6357
0
                *reinterpret_cast<const unsigned int *>(pabySrc + 3 * i + 4);
6358
0
            unsigned int word2 =
6359
0
                *reinterpret_cast<const unsigned int *>(pabySrc + 3 * i + 8);
6360
0
            reinterpret_cast<unsigned int *>(pabyDest0)[j] =
6361
0
                (word0 & 0xff) | ((word0 >> 24) << 8) | (word1 & 0x00ff0000) |
6362
0
                ((word2 >> 8) << 24);
6363
0
            reinterpret_cast<unsigned int *>(pabyDest1)[j] =
6364
0
                ((word0 >> 8) & 0xff) | ((word1 & 0xff) << 8) |
6365
0
                (((word1 >> 24)) << 16) | ((word2 >> 16) << 24);
6366
0
            pabyDest2[j * 4] = static_cast<GByte>(word0 >> 16);
6367
0
            pabyDest2[j * 4 + 1] = static_cast<GByte>(word1 >> 8);
6368
0
            pabyDest2[j * 4 + 2] = static_cast<GByte>(word2);
6369
0
            pabyDest2[j * 4 + 3] = static_cast<GByte>(word2 >> 24);
6370
0
        }
6371
0
    }
6372
0
#if defined(__clang__)
6373
0
#pragma clang loop vectorize(disable)
6374
0
#endif
6375
0
    for (; i < nIters; ++i)
6376
0
    {
6377
0
        pabyDest0[i] = pabySrc[3 * i + 0];
6378
0
        pabyDest1[i] = pabySrc[3 * i + 1];
6379
0
        pabyDest2[i] = pabySrc[3 * i + 2];
6380
0
    }
6381
0
}
6382
#endif
6383
6384
/************************************************************************/
6385
/*                       GDALDeinterleave4Byte()                        */
6386
/************************************************************************/
6387
6388
#if !defined(__GNUC__) || defined(__clang__)
6389
6390
/************************************************************************/
6391
/*                            deinterleave()                            */
6392
/************************************************************************/
6393
6394
template <bool SHIFT, bool MASK>
6395
inline __m128i deinterleave(__m128i &xmm0_ori, __m128i &xmm1_ori,
6396
                            __m128i &xmm2_ori, __m128i &xmm3_ori)
6397
0
{
6398
    // Set higher 24bit of each int32 packed word to 0
6399
0
    if (SHIFT)
6400
0
    {
6401
0
        xmm0_ori = _mm_srli_epi32(xmm0_ori, 8);
6402
0
        xmm1_ori = _mm_srli_epi32(xmm1_ori, 8);
6403
0
        xmm2_ori = _mm_srli_epi32(xmm2_ori, 8);
6404
0
        xmm3_ori = _mm_srli_epi32(xmm3_ori, 8);
6405
0
    }
6406
0
    __m128i xmm0;
6407
0
    __m128i xmm1;
6408
0
    __m128i xmm2;
6409
0
    __m128i xmm3;
6410
0
    if (MASK)
6411
0
    {
6412
0
        const __m128i xmm_mask = _mm_set1_epi32(0xff);
6413
0
        xmm0 = _mm_and_si128(xmm0_ori, xmm_mask);
6414
0
        xmm1 = _mm_and_si128(xmm1_ori, xmm_mask);
6415
0
        xmm2 = _mm_and_si128(xmm2_ori, xmm_mask);
6416
0
        xmm3 = _mm_and_si128(xmm3_ori, xmm_mask);
6417
0
    }
6418
0
    else
6419
0
    {
6420
0
        xmm0 = xmm0_ori;
6421
0
        xmm1 = xmm1_ori;
6422
0
        xmm2 = xmm2_ori;
6423
0
        xmm3 = xmm3_ori;
6424
0
    }
6425
    // Pack int32 to int16
6426
0
    xmm0 = _mm_packs_epi32(xmm0, xmm1);
6427
0
    xmm2 = _mm_packs_epi32(xmm2, xmm3);
6428
    // Pack int16 to uint8
6429
0
    xmm0 = _mm_packus_epi16(xmm0, xmm2);
6430
0
    return xmm0;
6431
0
}
Unexecuted instantiation: long long __vector(2) deinterleave<false, true>(long long __vector(2)&, long long __vector(2)&, long long __vector(2)&, long long __vector(2)&)
Unexecuted instantiation: long long __vector(2) deinterleave<true, true>(long long __vector(2)&, long long __vector(2)&, long long __vector(2)&, long long __vector(2)&)
Unexecuted instantiation: long long __vector(2) deinterleave<true, false>(long long __vector(2)&, long long __vector(2)&, long long __vector(2)&, long long __vector(2)&)
6432
6433
static void GDALDeinterleave4Byte(const GByte *CPL_RESTRICT pabySrc,
6434
                                  GByte *CPL_RESTRICT pabyDest0,
6435
                                  GByte *CPL_RESTRICT pabyDest1,
6436
                                  GByte *CPL_RESTRICT pabyDest2,
6437
                                  GByte *CPL_RESTRICT pabyDest3, size_t nIters)
6438
#ifdef USE_NEON_OPTIMIZATIONS
6439
{
6440
    return GDALDeinterleave4Byte_SSSE3(pabySrc, pabyDest0, pabyDest1, pabyDest2,
6441
                                       pabyDest3, nIters);
6442
}
6443
#else
6444
0
{
6445
0
#ifdef HAVE_SSSE3_AT_COMPILE_TIME
6446
0
    if (CPLHaveRuntimeSSSE3())
6447
0
    {
6448
0
        return GDALDeinterleave4Byte_SSSE3(pabySrc, pabyDest0, pabyDest1,
6449
0
                                           pabyDest2, pabyDest3, nIters);
6450
0
    }
6451
0
#endif
6452
6453
    // Not the optimal SSE2-only code, as gcc auto-vectorizer manages to
6454
    // do something slightly better.
6455
0
    size_t i = 0;
6456
0
    for (; i + 15 < nIters; i += 16)
6457
0
    {
6458
0
        __m128i xmm0_ori = _mm_loadu_si128(
6459
0
            reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 0));
6460
0
        __m128i xmm1_ori = _mm_loadu_si128(
6461
0
            reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 16));
6462
0
        __m128i xmm2_ori = _mm_loadu_si128(
6463
0
            reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 32));
6464
0
        __m128i xmm3_ori = _mm_loadu_si128(
6465
0
            reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 48));
6466
6467
0
        _mm_storeu_si128(
6468
0
            reinterpret_cast<__m128i *>(pabyDest0 + i),
6469
0
            deinterleave<false, true>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori));
6470
0
        _mm_storeu_si128(
6471
0
            reinterpret_cast<__m128i *>(pabyDest1 + i),
6472
0
            deinterleave<true, true>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori));
6473
0
        _mm_storeu_si128(
6474
0
            reinterpret_cast<__m128i *>(pabyDest2 + i),
6475
0
            deinterleave<true, true>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori));
6476
0
        _mm_storeu_si128(
6477
0
            reinterpret_cast<__m128i *>(pabyDest3 + i),
6478
0
            deinterleave<true, false>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori));
6479
0
    }
6480
6481
0
#if defined(__clang__)
6482
0
#pragma clang loop vectorize(disable)
6483
0
#endif
6484
0
    for (; i < nIters; ++i)
6485
0
    {
6486
0
        pabyDest0[i] = pabySrc[4 * i + 0];
6487
0
        pabyDest1[i] = pabySrc[4 * i + 1];
6488
0
        pabyDest2[i] = pabySrc[4 * i + 2];
6489
0
        pabyDest3[i] = pabySrc[4 * i + 3];
6490
0
    }
6491
0
}
6492
#endif
6493
#else
6494
// GCC autovectorizer does an excellent job
6495
__attribute__((optimize("tree-vectorize"))) static void GDALDeinterleave4Byte(
6496
    const GByte *CPL_RESTRICT pabySrc, GByte *CPL_RESTRICT pabyDest0,
6497
    GByte *CPL_RESTRICT pabyDest1, GByte *CPL_RESTRICT pabyDest2,
6498
    GByte *CPL_RESTRICT pabyDest3, size_t nIters)
6499
{
6500
    for (size_t i = 0; i < nIters; ++i)
6501
    {
6502
        pabyDest0[i] = pabySrc[4 * i + 0];
6503
        pabyDest1[i] = pabySrc[4 * i + 1];
6504
        pabyDest2[i] = pabySrc[4 * i + 2];
6505
        pabyDest3[i] = pabySrc[4 * i + 3];
6506
    }
6507
}
6508
#endif
6509
6510
#else
6511
6512
/************************************************************************/
6513
/*                       GDALDeinterleave3Byte()                        */
6514
/************************************************************************/
6515
6516
// TODO: Enabling below could help on non-Intel architectures where GCC knows
6517
// how to auto-vectorize
6518
// #if defined(__GNUC__)
6519
//__attribute__((optimize("tree-vectorize")))
6520
// #endif
6521
static void GDALDeinterleave3Byte(const GByte *CPL_RESTRICT pabySrc,
6522
                                  GByte *CPL_RESTRICT pabyDest0,
6523
                                  GByte *CPL_RESTRICT pabyDest1,
6524
                                  GByte *CPL_RESTRICT pabyDest2, size_t nIters)
6525
{
6526
    for (size_t i = 0; i < nIters; ++i)
6527
    {
6528
        pabyDest0[i] = pabySrc[3 * i + 0];
6529
        pabyDest1[i] = pabySrc[3 * i + 1];
6530
        pabyDest2[i] = pabySrc[3 * i + 2];
6531
    }
6532
}
6533
6534
/************************************************************************/
6535
/*                       GDALDeinterleave4Byte()                        */
6536
/************************************************************************/
6537
6538
// TODO: Enabling below could help on non-Intel architectures where gcc knows
6539
// how to auto-vectorize
6540
// #if defined(__GNUC__)
6541
//__attribute__((optimize("tree-vectorize")))
6542
// #endif
6543
static void GDALDeinterleave4Byte(const GByte *CPL_RESTRICT pabySrc,
6544
                                  GByte *CPL_RESTRICT pabyDest0,
6545
                                  GByte *CPL_RESTRICT pabyDest1,
6546
                                  GByte *CPL_RESTRICT pabyDest2,
6547
                                  GByte *CPL_RESTRICT pabyDest3, size_t nIters)
6548
{
6549
    for (size_t i = 0; i < nIters; ++i)
6550
    {
6551
        pabyDest0[i] = pabySrc[4 * i + 0];
6552
        pabyDest1[i] = pabySrc[4 * i + 1];
6553
        pabyDest2[i] = pabySrc[4 * i + 2];
6554
        pabyDest3[i] = pabySrc[4 * i + 3];
6555
    }
6556
}
6557
6558
#endif
6559
6560
/************************************************************************/
6561
/*                          GDALDeinterleave()                          */
6562
/************************************************************************/
6563
6564
/*! Copy values from a pixel-interleave buffer to multiple per-component
6565
    buffers.
6566
6567
    In pseudo-code
6568
    \verbatim
6569
    for(size_t i = 0; i < nIters; ++i)
6570
        for(int iComp = 0; iComp < nComponents; iComp++ )
6571
            ppDestBuffer[iComp][i] = pSourceBuffer[nComponents * i + iComp]
6572
    \endverbatim
6573
6574
    The implementation is optimized for a few cases, like de-interleaving
6575
    of 3 or 4-components Byte buffers.
6576
6577
    \since GDAL 3.6
6578
 */
6579
void GDALDeinterleave(const void *pSourceBuffer, GDALDataType eSourceDT,
6580
                      int nComponents, void **ppDestBuffer,
6581
                      GDALDataType eDestDT, size_t nIters)
6582
0
{
6583
0
    if (eSourceDT == eDestDT)
6584
0
    {
6585
0
        if (eSourceDT == GDT_UInt8 || eSourceDT == GDT_Int8)
6586
0
        {
6587
0
            if (nComponents == 3)
6588
0
            {
6589
0
                const GByte *CPL_RESTRICT pabySrc =
6590
0
                    static_cast<const GByte *>(pSourceBuffer);
6591
0
                GByte *CPL_RESTRICT pabyDest0 =
6592
0
                    static_cast<GByte *>(ppDestBuffer[0]);
6593
0
                GByte *CPL_RESTRICT pabyDest1 =
6594
0
                    static_cast<GByte *>(ppDestBuffer[1]);
6595
0
                GByte *CPL_RESTRICT pabyDest2 =
6596
0
                    static_cast<GByte *>(ppDestBuffer[2]);
6597
0
                GDALDeinterleave3Byte(pabySrc, pabyDest0, pabyDest1, pabyDest2,
6598
0
                                      nIters);
6599
0
                return;
6600
0
            }
6601
0
            else if (nComponents == 4)
6602
0
            {
6603
0
                const GByte *CPL_RESTRICT pabySrc =
6604
0
                    static_cast<const GByte *>(pSourceBuffer);
6605
0
                GByte *CPL_RESTRICT pabyDest0 =
6606
0
                    static_cast<GByte *>(ppDestBuffer[0]);
6607
0
                GByte *CPL_RESTRICT pabyDest1 =
6608
0
                    static_cast<GByte *>(ppDestBuffer[1]);
6609
0
                GByte *CPL_RESTRICT pabyDest2 =
6610
0
                    static_cast<GByte *>(ppDestBuffer[2]);
6611
0
                GByte *CPL_RESTRICT pabyDest3 =
6612
0
                    static_cast<GByte *>(ppDestBuffer[3]);
6613
0
                GDALDeinterleave4Byte(pabySrc, pabyDest0, pabyDest1, pabyDest2,
6614
0
                                      pabyDest3, nIters);
6615
0
                return;
6616
0
            }
6617
0
        }
6618
#if ((defined(__GNUC__) && !defined(__clang__)) ||                             \
6619
     defined(__INTEL_CLANG_COMPILER)) &&                                       \
6620
    defined(HAVE_SSE2) && defined(HAVE_SSSE3_AT_COMPILE_TIME)
6621
        else if ((eSourceDT == GDT_Int16 || eSourceDT == GDT_UInt16) &&
6622
                 CPLHaveRuntimeSSSE3())
6623
        {
6624
            if (nComponents == 3)
6625
            {
6626
                const GUInt16 *CPL_RESTRICT panSrc =
6627
                    static_cast<const GUInt16 *>(pSourceBuffer);
6628
                GUInt16 *CPL_RESTRICT panDest0 =
6629
                    static_cast<GUInt16 *>(ppDestBuffer[0]);
6630
                GUInt16 *CPL_RESTRICT panDest1 =
6631
                    static_cast<GUInt16 *>(ppDestBuffer[1]);
6632
                GUInt16 *CPL_RESTRICT panDest2 =
6633
                    static_cast<GUInt16 *>(ppDestBuffer[2]);
6634
                GDALDeinterleave3UInt16_SSSE3(panSrc, panDest0, panDest1,
6635
                                              panDest2, nIters);
6636
                return;
6637
            }
6638
#if !defined(__INTEL_CLANG_COMPILER)
6639
            // ICC autovectorizer doesn't do a good job, at least with icx
6640
            // 2022.1.0.20220316
6641
            else if (nComponents == 4)
6642
            {
6643
                const GUInt16 *CPL_RESTRICT panSrc =
6644
                    static_cast<const GUInt16 *>(pSourceBuffer);
6645
                GUInt16 *CPL_RESTRICT panDest0 =
6646
                    static_cast<GUInt16 *>(ppDestBuffer[0]);
6647
                GUInt16 *CPL_RESTRICT panDest1 =
6648
                    static_cast<GUInt16 *>(ppDestBuffer[1]);
6649
                GUInt16 *CPL_RESTRICT panDest2 =
6650
                    static_cast<GUInt16 *>(ppDestBuffer[2]);
6651
                GUInt16 *CPL_RESTRICT panDest3 =
6652
                    static_cast<GUInt16 *>(ppDestBuffer[3]);
6653
                GDALDeinterleave4UInt16_SSSE3(panSrc, panDest0, panDest1,
6654
                                              panDest2, panDest3, nIters);
6655
                return;
6656
            }
6657
#endif
6658
        }
6659
#endif
6660
0
    }
6661
6662
0
    const int nSourceDTSize = GDALGetDataTypeSizeBytes(eSourceDT);
6663
0
    const int nDestDTSize = GDALGetDataTypeSizeBytes(eDestDT);
6664
0
    for (int iComp = 0; iComp < nComponents; iComp++)
6665
0
    {
6666
0
        GDALCopyWords64(static_cast<const GByte *>(pSourceBuffer) +
6667
0
                            iComp * nSourceDTSize,
6668
0
                        eSourceDT, nComponents * nSourceDTSize,
6669
0
                        ppDestBuffer[iComp], eDestDT, nDestDTSize, nIters);
6670
0
    }
6671
0
}
6672
6673
/************************************************************************/
6674
/*                   GDALTranspose2DSingleToSingle()                    */
6675
/************************************************************************/
6676
/**
6677
 * Transpose a 2D array of non-complex values, in a efficient (cache-oblivious) way.
6678
 *
6679
 * @param pSrc Source array of height = nSrcHeight and width = nSrcWidth.
6680
 * @param pDst Destination transposed array of height = nSrcWidth and width = nSrcHeight.
6681
 * @param nSrcWidth Width of pSrc array.
6682
 * @param nSrcHeight Height of pSrc array.
6683
 */
6684
6685
template <class DST, class SRC>
6686
void GDALTranspose2DSingleToSingle(const SRC *CPL_RESTRICT pSrc,
6687
                                   DST *CPL_RESTRICT pDst, size_t nSrcWidth,
6688
                                   size_t nSrcHeight)
6689
0
{
6690
0
    constexpr size_t blocksize = 32;
6691
0
    for (size_t i = 0; i < nSrcHeight; i += blocksize)
6692
0
    {
6693
0
        const size_t max_k = std::min(i + blocksize, nSrcHeight);
6694
0
        for (size_t j = 0; j < nSrcWidth; j += blocksize)
6695
0
        {
6696
            // transpose the block beginning at [i,j]
6697
0
            const size_t max_l = std::min(j + blocksize, nSrcWidth);
6698
0
            for (size_t k = i; k < max_k; ++k)
6699
0
            {
6700
0
                for (size_t l = j; l < max_l; ++l)
6701
0
                {
6702
0
                    GDALCopyWord(pSrc[l + k * nSrcWidth],
6703
0
                                 pDst[k + l * nSrcHeight]);
6704
0
                }
6705
0
            }
6706
0
        }
6707
0
    }
6708
0
}
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, unsigned char>(unsigned char const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, signed char>(signed char const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, unsigned short>(unsigned short const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, short>(short const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, unsigned int>(unsigned int const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, int>(int const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, unsigned long>(unsigned long const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, long>(long const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, cpl::Float16>(cpl::Float16 const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, float>(float const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, double>(double const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, unsigned char>(unsigned char const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, signed char>(signed char const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, unsigned short>(unsigned short const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, short>(short const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, unsigned int>(unsigned int const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, int>(int const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, unsigned long>(unsigned long const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, long>(long const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, cpl::Float16>(cpl::Float16 const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, float>(float const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, double>(double const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, unsigned char>(unsigned char const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, signed char>(signed char const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, unsigned short>(unsigned short const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, short>(short const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, unsigned int>(unsigned int const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, int>(int const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, unsigned long>(unsigned long const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, long>(long const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, cpl::Float16>(cpl::Float16 const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, float>(float const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, double>(double const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, unsigned char>(unsigned char const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, signed char>(signed char const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, unsigned short>(unsigned short const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, short>(short const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, unsigned int>(unsigned int const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, int>(int const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, unsigned long>(unsigned long const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, long>(long const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, cpl::Float16>(cpl::Float16 const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, float>(float const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, double>(double const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, unsigned char>(unsigned char const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, signed char>(signed char const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, unsigned short>(unsigned short const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, short>(short const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, unsigned int>(unsigned int const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, int>(int const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, unsigned long>(unsigned long const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, long>(long const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, cpl::Float16>(cpl::Float16 const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, float>(float const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, double>(double const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, unsigned char>(unsigned char const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, signed char>(signed char const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, unsigned short>(unsigned short const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, short>(short const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, unsigned int>(unsigned int const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, int>(int const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, unsigned long>(unsigned long const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, long>(long const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, cpl::Float16>(cpl::Float16 const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, float>(float const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, double>(double const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, unsigned char>(unsigned char const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, signed char>(signed char const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, unsigned short>(unsigned short const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, short>(short const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, unsigned int>(unsigned int const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, int>(int const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, unsigned long>(unsigned long const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, long>(long const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, cpl::Float16>(cpl::Float16 const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, float>(float const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, double>(double const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, unsigned char>(unsigned char const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, signed char>(signed char const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, unsigned short>(unsigned short const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, short>(short const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, unsigned int>(unsigned int const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, int>(int const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, unsigned long>(unsigned long const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, long>(long const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, cpl::Float16>(cpl::Float16 const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, float>(float const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, double>(double const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, unsigned char>(unsigned char const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, signed char>(signed char const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, unsigned short>(unsigned short const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, short>(short const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, unsigned int>(unsigned int const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, int>(int const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, unsigned long>(unsigned long const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, long>(long const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, cpl::Float16>(cpl::Float16 const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, float>(float const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, double>(double const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, unsigned char>(unsigned char const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, signed char>(signed char const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, unsigned short>(unsigned short const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, short>(short const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, unsigned int>(unsigned int const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, int>(int const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, unsigned long>(unsigned long const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, long>(long const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, cpl::Float16>(cpl::Float16 const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, float>(float const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, double>(double const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, unsigned char>(unsigned char const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, signed char>(signed char const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, unsigned short>(unsigned short const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, short>(short const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, unsigned int>(unsigned int const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, int>(int const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, unsigned long>(unsigned long const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, long>(long const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, cpl::Float16>(cpl::Float16 const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, float>(float const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, double>(double const*, double*, unsigned long, unsigned long)
6709
6710
/************************************************************************/
6711
/*                  GDALTranspose2DComplexToComplex()                   */
6712
/************************************************************************/
6713
/**
6714
 * Transpose a 2D array of complex values into an array of complex values,
6715
 * in a efficient (cache-oblivious) way.
6716
 *
6717
 * @param pSrc Source array of height = nSrcHeight and width = nSrcWidth.
6718
 * @param pDst Destination transposed array of height = nSrcWidth and width = nSrcHeight.
6719
 * @param nSrcWidth Width of pSrc array.
6720
 * @param nSrcHeight Height of pSrc array.
6721
 */
6722
template <class DST, class SRC>
6723
void GDALTranspose2DComplexToComplex(const SRC *CPL_RESTRICT pSrc,
6724
                                     DST *CPL_RESTRICT pDst, size_t nSrcWidth,
6725
                                     size_t nSrcHeight)
6726
0
{
6727
0
    constexpr size_t blocksize = 32;
6728
0
    for (size_t i = 0; i < nSrcHeight; i += blocksize)
6729
0
    {
6730
0
        const size_t max_k = std::min(i + blocksize, nSrcHeight);
6731
0
        for (size_t j = 0; j < nSrcWidth; j += blocksize)
6732
0
        {
6733
            // transpose the block beginning at [i,j]
6734
0
            const size_t max_l = std::min(j + blocksize, nSrcWidth);
6735
0
            for (size_t k = i; k < max_k; ++k)
6736
0
            {
6737
0
                for (size_t l = j; l < max_l; ++l)
6738
0
                {
6739
0
                    GDALCopyWord(pSrc[2 * (l + k * nSrcWidth) + 0],
6740
0
                                 pDst[2 * (k + l * nSrcHeight) + 0]);
6741
0
                    GDALCopyWord(pSrc[2 * (l + k * nSrcWidth) + 1],
6742
0
                                 pDst[2 * (k + l * nSrcHeight) + 1]);
6743
0
                }
6744
0
            }
6745
0
        }
6746
0
    }
6747
0
}
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<short, short>(short const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<short, int>(int const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<short, cpl::Float16>(cpl::Float16 const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<short, float>(float const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<short, double>(double const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<int, short>(short const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<int, int>(int const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<int, cpl::Float16>(cpl::Float16 const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<int, float>(float const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<int, double>(double const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<cpl::Float16, short>(short const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<cpl::Float16, int>(int const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<cpl::Float16, cpl::Float16>(cpl::Float16 const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<cpl::Float16, float>(float const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<cpl::Float16, double>(double const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<float, short>(short const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<float, int>(int const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<float, cpl::Float16>(cpl::Float16 const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<float, float>(float const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<float, double>(double const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<double, short>(short const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<double, int>(int const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<double, cpl::Float16>(cpl::Float16 const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<double, float>(float const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<double, double>(double const*, double*, unsigned long, unsigned long)
6748
6749
/************************************************************************/
6750
/*                   GDALTranspose2DComplexToSingle()                   */
6751
/************************************************************************/
6752
/**
6753
 * Transpose a 2D array of complex values into an array of non-complex values,
6754
 * in a efficient (cache-oblivious) way.
6755
 *
6756
 * @param pSrc Source array of height = nSrcHeight and width = nSrcWidth.
6757
 * @param pDst Destination transposed array of height = nSrcWidth and width = nSrcHeight.
6758
 * @param nSrcWidth Width of pSrc array.
6759
 * @param nSrcHeight Height of pSrc array.
6760
 */
6761
template <class DST, class SRC>
6762
void GDALTranspose2DComplexToSingle(const SRC *CPL_RESTRICT pSrc,
6763
                                    DST *CPL_RESTRICT pDst, size_t nSrcWidth,
6764
                                    size_t nSrcHeight)
6765
0
{
6766
0
    constexpr size_t blocksize = 32;
6767
0
    for (size_t i = 0; i < nSrcHeight; i += blocksize)
6768
0
    {
6769
0
        const size_t max_k = std::min(i + blocksize, nSrcHeight);
6770
0
        for (size_t j = 0; j < nSrcWidth; j += blocksize)
6771
0
        {
6772
            // transpose the block beginning at [i,j]
6773
0
            const size_t max_l = std::min(j + blocksize, nSrcWidth);
6774
0
            for (size_t k = i; k < max_k; ++k)
6775
0
            {
6776
0
                for (size_t l = j; l < max_l; ++l)
6777
0
                {
6778
0
                    GDALCopyWord(pSrc[2 * (l + k * nSrcWidth) + 0],
6779
0
                                 pDst[k + l * nSrcHeight]);
6780
0
                }
6781
0
            }
6782
0
        }
6783
0
    }
6784
0
}
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned char, short>(short const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned char, int>(int const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned char, cpl::Float16>(cpl::Float16 const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned char, float>(float const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned char, double>(double const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<signed char, short>(short const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<signed char, int>(int const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<signed char, cpl::Float16>(cpl::Float16 const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<signed char, float>(float const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<signed char, double>(double const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned short, short>(short const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned short, int>(int const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned short, cpl::Float16>(cpl::Float16 const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned short, float>(float const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned short, double>(double const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<short, short>(short const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<short, int>(int const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<short, cpl::Float16>(cpl::Float16 const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<short, float>(float const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<short, double>(double const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned int, short>(short const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned int, int>(int const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned int, cpl::Float16>(cpl::Float16 const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned int, float>(float const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned int, double>(double const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<int, short>(short const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<int, int>(int const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<int, cpl::Float16>(cpl::Float16 const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<int, float>(float const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<int, double>(double const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned long, short>(short const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned long, int>(int const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned long, cpl::Float16>(cpl::Float16 const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned long, float>(float const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned long, double>(double const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<long, short>(short const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<long, int>(int const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<long, cpl::Float16>(cpl::Float16 const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<long, float>(float const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<long, double>(double const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<cpl::Float16, short>(short const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<cpl::Float16, int>(int const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<cpl::Float16, cpl::Float16>(cpl::Float16 const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<cpl::Float16, float>(float const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<cpl::Float16, double>(double const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<float, short>(short const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<float, int>(int const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<float, cpl::Float16>(cpl::Float16 const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<float, float>(float const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<float, double>(double const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<double, short>(short const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<double, int>(int const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<double, cpl::Float16>(cpl::Float16 const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<double, float>(float const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<double, double>(double const*, double*, unsigned long, unsigned long)
6785
6786
/************************************************************************/
6787
/*                   GDALTranspose2DSingleToComplex()                   */
6788
/************************************************************************/
6789
/**
6790
 * Transpose a 2D array of non-complex values into an array of complex values,
6791
 * in a efficient (cache-oblivious) way.
6792
 *
6793
 * @param pSrc Source array of height = nSrcHeight and width = nSrcWidth.
6794
 * @param pDst Destination transposed array of height = nSrcWidth and width = nSrcHeight.
6795
 * @param nSrcWidth Width of pSrc array.
6796
 * @param nSrcHeight Height of pSrc array.
6797
 */
6798
template <class DST, class SRC>
6799
void GDALTranspose2DSingleToComplex(const SRC *CPL_RESTRICT pSrc,
6800
                                    DST *CPL_RESTRICT pDst, size_t nSrcWidth,
6801
                                    size_t nSrcHeight)
6802
0
{
6803
0
    constexpr size_t blocksize = 32;
6804
0
    for (size_t i = 0; i < nSrcHeight; i += blocksize)
6805
0
    {
6806
0
        const size_t max_k = std::min(i + blocksize, nSrcHeight);
6807
0
        for (size_t j = 0; j < nSrcWidth; j += blocksize)
6808
0
        {
6809
            // transpose the block beginning at [i,j]
6810
0
            const size_t max_l = std::min(j + blocksize, nSrcWidth);
6811
0
            for (size_t k = i; k < max_k; ++k)
6812
0
            {
6813
0
                for (size_t l = j; l < max_l; ++l)
6814
0
                {
6815
0
                    GDALCopyWord(pSrc[l + k * nSrcWidth],
6816
0
                                 pDst[2 * (k + l * nSrcHeight) + 0]);
6817
0
                    pDst[2 * (k + l * nSrcHeight) + 1] = 0;
6818
0
                }
6819
0
            }
6820
0
        }
6821
0
    }
6822
0
}
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, unsigned char>(unsigned char const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, signed char>(signed char const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, unsigned short>(unsigned short const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, short>(short const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, unsigned int>(unsigned int const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, int>(int const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, unsigned long>(unsigned long const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, long>(long const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, cpl::Float16>(cpl::Float16 const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, float>(float const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, double>(double const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, unsigned char>(unsigned char const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, signed char>(signed char const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, unsigned short>(unsigned short const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, short>(short const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, unsigned int>(unsigned int const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, int>(int const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, unsigned long>(unsigned long const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, long>(long const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, cpl::Float16>(cpl::Float16 const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, float>(float const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, double>(double const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, unsigned char>(unsigned char const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, signed char>(signed char const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, unsigned short>(unsigned short const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, short>(short const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, unsigned int>(unsigned int const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, int>(int const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, unsigned long>(unsigned long const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, long>(long const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, cpl::Float16>(cpl::Float16 const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, float>(float const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, double>(double const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, unsigned char>(unsigned char const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, signed char>(signed char const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, unsigned short>(unsigned short const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, short>(short const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, unsigned int>(unsigned int const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, int>(int const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, unsigned long>(unsigned long const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, long>(long const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, cpl::Float16>(cpl::Float16 const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, float>(float const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, double>(double const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, unsigned char>(unsigned char const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, signed char>(signed char const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, unsigned short>(unsigned short const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, short>(short const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, unsigned int>(unsigned int const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, int>(int const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, unsigned long>(unsigned long const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, long>(long const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, cpl::Float16>(cpl::Float16 const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, float>(float const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, double>(double const*, double*, unsigned long, unsigned long)
6823
6824
/************************************************************************/
6825
/*                          GDALTranspose2D()                           */
6826
/************************************************************************/
6827
6828
template <class DST, bool DST_IS_COMPLEX>
6829
static void GDALTranspose2D(const void *pSrc, GDALDataType eSrcType, DST *pDst,
6830
                            size_t nSrcWidth, size_t nSrcHeight)
6831
0
{
6832
0
#define CALL_GDALTranspose2D_internal(SRC_TYPE)                                \
6833
0
    do                                                                         \
6834
0
    {                                                                          \
6835
0
        if constexpr (DST_IS_COMPLEX)                                          \
6836
0
        {                                                                      \
6837
0
            GDALTranspose2DSingleToComplex(                                    \
6838
0
                static_cast<const SRC_TYPE *>(pSrc), pDst, nSrcWidth,          \
6839
0
                nSrcHeight);                                                   \
6840
0
        }                                                                      \
6841
0
        else                                                                   \
6842
0
        {                                                                      \
6843
0
            GDALTranspose2DSingleToSingle(static_cast<const SRC_TYPE *>(pSrc), \
6844
0
                                          pDst, nSrcWidth, nSrcHeight);        \
6845
0
        }                                                                      \
6846
0
    } while (0)
6847
6848
0
#define CALL_GDALTranspose2DComplex_internal(SRC_TYPE)                         \
6849
0
    do                                                                         \
6850
0
    {                                                                          \
6851
0
        if constexpr (DST_IS_COMPLEX)                                          \
6852
0
        {                                                                      \
6853
0
            GDALTranspose2DComplexToComplex(                                   \
6854
0
                static_cast<const SRC_TYPE *>(pSrc), pDst, nSrcWidth,          \
6855
0
                nSrcHeight);                                                   \
6856
0
        }                                                                      \
6857
0
        else                                                                   \
6858
0
        {                                                                      \
6859
0
            GDALTranspose2DComplexToSingle(                                    \
6860
0
                static_cast<const SRC_TYPE *>(pSrc), pDst, nSrcWidth,          \
6861
0
                nSrcHeight);                                                   \
6862
0
        }                                                                      \
6863
0
    } while (0)
6864
6865
    // clang-format off
6866
0
    switch (eSrcType)
6867
0
    {
6868
0
        case GDT_UInt8:     CALL_GDALTranspose2D_internal(uint8_t); break;
6869
0
        case GDT_Int8:     CALL_GDALTranspose2D_internal(int8_t); break;
6870
0
        case GDT_UInt16:   CALL_GDALTranspose2D_internal(uint16_t); break;
6871
0
        case GDT_Int16:    CALL_GDALTranspose2D_internal(int16_t); break;
6872
0
        case GDT_UInt32:   CALL_GDALTranspose2D_internal(uint32_t); break;
6873
0
        case GDT_Int32:    CALL_GDALTranspose2D_internal(int32_t); break;
6874
0
        case GDT_UInt64:   CALL_GDALTranspose2D_internal(uint64_t); break;
6875
0
        case GDT_Int64:    CALL_GDALTranspose2D_internal(int64_t); break;
6876
0
        case GDT_Float16:  CALL_GDALTranspose2D_internal(GFloat16); break;
6877
0
        case GDT_Float32:  CALL_GDALTranspose2D_internal(float); break;
6878
0
        case GDT_Float64:  CALL_GDALTranspose2D_internal(double); break;
6879
0
        case GDT_CInt16:   CALL_GDALTranspose2DComplex_internal(int16_t); break;
6880
0
        case GDT_CInt32:   CALL_GDALTranspose2DComplex_internal(int32_t); break;
6881
0
        case GDT_CFloat16: CALL_GDALTranspose2DComplex_internal(GFloat16); break;
6882
0
        case GDT_CFloat32: CALL_GDALTranspose2DComplex_internal(float); break;
6883
0
        case GDT_CFloat64: CALL_GDALTranspose2DComplex_internal(double); break;
6884
0
        case GDT_Unknown:
6885
0
        case GDT_TypeCount:
6886
0
            break;
6887
0
    }
6888
        // clang-format on
6889
6890
0
#undef CALL_GDALTranspose2D_internal
6891
0
#undef CALL_GDALTranspose2DComplex_internal
6892
0
}
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<unsigned char, false>(void const*, GDALDataType, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<signed char, false>(void const*, GDALDataType, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<unsigned short, false>(void const*, GDALDataType, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<short, false>(void const*, GDALDataType, short*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<unsigned int, false>(void const*, GDALDataType, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<int, false>(void const*, GDALDataType, int*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<unsigned long, false>(void const*, GDALDataType, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<long, false>(void const*, GDALDataType, long*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<cpl::Float16, false>(void const*, GDALDataType, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<float, false>(void const*, GDALDataType, float*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<double, false>(void const*, GDALDataType, double*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<short, true>(void const*, GDALDataType, short*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<int, true>(void const*, GDALDataType, int*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<cpl::Float16, true>(void const*, GDALDataType, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<float, true>(void const*, GDALDataType, float*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<double, true>(void const*, GDALDataType, double*, unsigned long, unsigned long)
6893
6894
/************************************************************************/
6895
/*                        GDALInterleave2Byte()                         */
6896
/************************************************************************/
6897
6898
#if defined(HAVE_SSE2) &&                                                      \
6899
    (!defined(__GNUC__) || defined(__INTEL_CLANG_COMPILER))
6900
6901
// ICC autovectorizer doesn't do a good job at generating good SSE code,
6902
// at least with icx 2024.0.2.20231213, but it nicely unrolls the below loop.
6903
#if defined(__GNUC__)
6904
__attribute__((noinline))
6905
#endif
6906
static void GDALInterleave2Byte(const uint8_t *CPL_RESTRICT pSrc,
6907
                                uint8_t *CPL_RESTRICT pDst, size_t nIters)
6908
{
6909
    size_t i = 0;
6910
    constexpr size_t VALS_PER_ITER = 16;
6911
    for (i = 0; i + VALS_PER_ITER <= nIters; i += VALS_PER_ITER)
6912
    {
6913
        __m128i xmm0 =
6914
            _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + i));
6915
        __m128i xmm1 = _mm_loadu_si128(
6916
            reinterpret_cast<__m128i const *>(pSrc + i + nIters));
6917
        _mm_storeu_si128(reinterpret_cast<__m128i *>(pDst + 2 * i),
6918
                         _mm_unpacklo_epi8(xmm0, xmm1));
6919
        _mm_storeu_si128(
6920
            reinterpret_cast<__m128i *>(pDst + 2 * i + VALS_PER_ITER),
6921
            _mm_unpackhi_epi8(xmm0, xmm1));
6922
    }
6923
#if defined(__clang__)
6924
#pragma clang loop vectorize(disable)
6925
#endif
6926
    for (; i < nIters; ++i)
6927
    {
6928
        pDst[2 * i + 0] = pSrc[i + 0 * nIters];
6929
        pDst[2 * i + 1] = pSrc[i + 1 * nIters];
6930
    }
6931
}
6932
6933
#else
6934
6935
#if defined(__GNUC__) && !defined(__clang__)
6936
__attribute__((optimize("tree-vectorize")))
6937
#endif
6938
#if defined(__GNUC__)
6939
__attribute__((noinline))
6940
#endif
6941
#if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER)
6942
// clang++ -O2 -fsanitize=undefined fails to vectorize, ignore that warning
6943
#pragma clang diagnostic push
6944
#pragma clang diagnostic ignored "-Wpass-failed"
6945
#endif
6946
static void GDALInterleave2Byte(const uint8_t *CPL_RESTRICT pSrc,
6947
                                uint8_t *CPL_RESTRICT pDst, size_t nIters)
6948
0
{
6949
0
#if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER)
6950
0
#pragma clang loop vectorize(enable)
6951
0
#endif
6952
0
    for (size_t i = 0; i < nIters; ++i)
6953
0
    {
6954
0
        pDst[2 * i + 0] = pSrc[i + 0 * nIters];
6955
0
        pDst[2 * i + 1] = pSrc[i + 1 * nIters];
6956
0
    }
6957
0
}
6958
#if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER)
6959
#pragma clang diagnostic pop
6960
#endif
6961
6962
#endif
6963
6964
/************************************************************************/
6965
/*                        GDALInterleave4Byte()                         */
6966
/************************************************************************/
6967
6968
#if defined(HAVE_SSE2) &&                                                      \
6969
    (!defined(__GNUC__) || defined(__INTEL_CLANG_COMPILER))
6970
6971
// ICC autovectorizer doesn't do a good job at generating good SSE code,
6972
// at least with icx 2024.0.2.20231213, but it nicely unrolls the below loop.
6973
#if defined(__GNUC__)
6974
__attribute__((noinline))
6975
#endif
6976
static void GDALInterleave4Byte(const uint8_t *CPL_RESTRICT pSrc,
6977
                                uint8_t *CPL_RESTRICT pDst, size_t nIters)
6978
{
6979
    size_t i = 0;
6980
    constexpr size_t VALS_PER_ITER = 16;
6981
    for (i = 0; i + VALS_PER_ITER <= nIters; i += VALS_PER_ITER)
6982
    {
6983
        __m128i xmm0 = _mm_loadu_si128(
6984
            reinterpret_cast<__m128i const *>(pSrc + i + 0 * nIters));
6985
        __m128i xmm1 = _mm_loadu_si128(
6986
            reinterpret_cast<__m128i const *>(pSrc + i + 1 * nIters));
6987
        __m128i xmm2 = _mm_loadu_si128(
6988
            reinterpret_cast<__m128i const *>(pSrc + i + 2 * nIters));
6989
        __m128i xmm3 = _mm_loadu_si128(
6990
            reinterpret_cast<__m128i const *>(pSrc + i + 3 * nIters));
6991
        auto tmp0 = _mm_unpacklo_epi8(
6992
            xmm0,
6993
            xmm1);  // (xmm0_0, xmm1_0, xmm0_1, xmm1_1, xmm0_2, xmm1_2, ...)
6994
        auto tmp1 = _mm_unpackhi_epi8(
6995
            xmm0,
6996
            xmm1);  // (xmm0_8, xmm1_8, xmm0_9, xmm1_9, xmm0_10, xmm1_10, ...)
6997
        auto tmp2 = _mm_unpacklo_epi8(
6998
            xmm2,
6999
            xmm3);  // (xmm2_0, xmm3_0, xmm2_1, xmm3_1, xmm2_2, xmm3_2, ...)
7000
        auto tmp3 = _mm_unpackhi_epi8(
7001
            xmm2,
7002
            xmm3);  // (xmm2_8, xmm3_8, xmm2_9, xmm3_9, xmm2_10, xmm3_10, ...)
7003
        auto tmp2_0 = _mm_unpacklo_epi16(
7004
            tmp0,
7005
            tmp2);  // (xmm0_0, xmm1_0, xmm2_0, xmm3_0, xmm0_1, xmm1_1, xmm2_1, xmm3_1, ...)
7006
        auto tmp2_1 = _mm_unpackhi_epi16(tmp0, tmp2);
7007
        auto tmp2_2 = _mm_unpacklo_epi16(tmp1, tmp3);
7008
        auto tmp2_3 = _mm_unpackhi_epi16(tmp1, tmp3);
7009
        _mm_storeu_si128(
7010
            reinterpret_cast<__m128i *>(pDst + 4 * i + 0 * VALS_PER_ITER),
7011
            tmp2_0);
7012
        _mm_storeu_si128(
7013
            reinterpret_cast<__m128i *>(pDst + 4 * i + 1 * VALS_PER_ITER),
7014
            tmp2_1);
7015
        _mm_storeu_si128(
7016
            reinterpret_cast<__m128i *>(pDst + 4 * i + 2 * VALS_PER_ITER),
7017
            tmp2_2);
7018
        _mm_storeu_si128(
7019
            reinterpret_cast<__m128i *>(pDst + 4 * i + 3 * VALS_PER_ITER),
7020
            tmp2_3);
7021
    }
7022
#if defined(__clang__)
7023
#pragma clang loop vectorize(disable)
7024
#endif
7025
    for (; i < nIters; ++i)
7026
    {
7027
        pDst[4 * i + 0] = pSrc[i + 0 * nIters];
7028
        pDst[4 * i + 1] = pSrc[i + 1 * nIters];
7029
        pDst[4 * i + 2] = pSrc[i + 2 * nIters];
7030
        pDst[4 * i + 3] = pSrc[i + 3 * nIters];
7031
    }
7032
}
7033
7034
#else
7035
7036
#if defined(__GNUC__) && !defined(__clang__)
7037
__attribute__((optimize("tree-vectorize")))
7038
#endif
7039
#if defined(__GNUC__)
7040
__attribute__((noinline))
7041
#endif
7042
#if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER)
7043
// clang++ -O2 -fsanitize=undefined fails to vectorize, ignore that warning
7044
#pragma clang diagnostic push
7045
#pragma clang diagnostic ignored "-Wpass-failed"
7046
#endif
7047
static void GDALInterleave4Byte(const uint8_t *CPL_RESTRICT pSrc,
7048
                                uint8_t *CPL_RESTRICT pDst, size_t nIters)
7049
0
{
7050
0
#if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER)
7051
0
#pragma clang loop vectorize(enable)
7052
0
#endif
7053
0
    for (size_t i = 0; i < nIters; ++i)
7054
0
    {
7055
0
        pDst[4 * i + 0] = pSrc[i + 0 * nIters];
7056
0
        pDst[4 * i + 1] = pSrc[i + 1 * nIters];
7057
0
        pDst[4 * i + 2] = pSrc[i + 2 * nIters];
7058
0
        pDst[4 * i + 3] = pSrc[i + 3 * nIters];
7059
0
    }
7060
0
}
7061
#if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER)
7062
#pragma clang diagnostic pop
7063
#endif
7064
7065
#endif
7066
7067
/************************************************************************/
7068
/*                          GDALTranspose2D()                           */
7069
/************************************************************************/
7070
7071
/**
7072
 * Transpose a 2D array in a efficient (cache-oblivious) way.
7073
 *
7074
 * @param pSrc Source array of width = nSrcWidth and height = nSrcHeight.
7075
 * @param eSrcType Data type of pSrc.
7076
 * @param pDst Destination transposed array of width = nSrcHeight and height = nSrcWidth.
7077
 * @param eDstType Data type of pDst.
7078
 * @param nSrcWidth Width of pSrc array.
7079
 * @param nSrcHeight Height of pSrc array.
7080
 * @since GDAL 3.11
7081
 */
7082
7083
void GDALTranspose2D(const void *pSrc, GDALDataType eSrcType, void *pDst,
7084
                     GDALDataType eDstType, size_t nSrcWidth, size_t nSrcHeight)
7085
0
{
7086
0
    if (eSrcType == eDstType && (eSrcType == GDT_UInt8 || eSrcType == GDT_Int8))
7087
0
    {
7088
0
        if (nSrcHeight == 2)
7089
0
        {
7090
0
            GDALInterleave2Byte(static_cast<const uint8_t *>(pSrc),
7091
0
                                static_cast<uint8_t *>(pDst), nSrcWidth);
7092
0
            return;
7093
0
        }
7094
0
        if (nSrcHeight == 4)
7095
0
        {
7096
0
            GDALInterleave4Byte(static_cast<const uint8_t *>(pSrc),
7097
0
                                static_cast<uint8_t *>(pDst), nSrcWidth);
7098
0
            return;
7099
0
        }
7100
0
#if (defined(HAVE_SSSE3_AT_COMPILE_TIME) &&                                    \
7101
0
     (defined(__x86_64) || defined(_M_X64)))
7102
0
        if (CPLHaveRuntimeSSSE3())
7103
0
        {
7104
0
            GDALTranspose2D_Byte_SSSE3(static_cast<const uint8_t *>(pSrc),
7105
0
                                       static_cast<uint8_t *>(pDst), nSrcWidth,
7106
0
                                       nSrcHeight);
7107
0
            return;
7108
0
        }
7109
#elif defined(USE_NEON_OPTIMIZATIONS)
7110
        {
7111
            GDALTranspose2D_Byte_SSSE3(static_cast<const uint8_t *>(pSrc),
7112
                                       static_cast<uint8_t *>(pDst), nSrcWidth,
7113
                                       nSrcHeight);
7114
            return;
7115
        }
7116
#endif
7117
0
    }
7118
7119
0
#define CALL_GDALTranspose2D_internal(DST_TYPE, DST_IS_COMPLEX)                \
7120
0
    GDALTranspose2D<DST_TYPE, DST_IS_COMPLEX>(                                 \
7121
0
        pSrc, eSrcType, static_cast<DST_TYPE *>(pDst), nSrcWidth, nSrcHeight)
7122
7123
    // clang-format off
7124
0
    switch (eDstType)
7125
0
    {
7126
0
        case GDT_UInt8:     CALL_GDALTranspose2D_internal(uint8_t, false); break;
7127
0
        case GDT_Int8:     CALL_GDALTranspose2D_internal(int8_t, false); break;
7128
0
        case GDT_UInt16:   CALL_GDALTranspose2D_internal(uint16_t, false); break;
7129
0
        case GDT_Int16:    CALL_GDALTranspose2D_internal(int16_t, false); break;
7130
0
        case GDT_UInt32:   CALL_GDALTranspose2D_internal(uint32_t, false); break;
7131
0
        case GDT_Int32:    CALL_GDALTranspose2D_internal(int32_t, false); break;
7132
0
        case GDT_UInt64:   CALL_GDALTranspose2D_internal(uint64_t, false); break;
7133
0
        case GDT_Int64:    CALL_GDALTranspose2D_internal(int64_t, false); break;
7134
0
        case GDT_Float16:  CALL_GDALTranspose2D_internal(GFloat16, false); break;
7135
0
        case GDT_Float32:  CALL_GDALTranspose2D_internal(float, false); break;
7136
0
        case GDT_Float64:  CALL_GDALTranspose2D_internal(double, false); break;
7137
0
        case GDT_CInt16:   CALL_GDALTranspose2D_internal(int16_t, true); break;
7138
0
        case GDT_CInt32:   CALL_GDALTranspose2D_internal(int32_t, true); break;
7139
0
        case GDT_CFloat16: CALL_GDALTranspose2D_internal(GFloat16, true); break;
7140
0
        case GDT_CFloat32: CALL_GDALTranspose2D_internal(float, true); break;
7141
0
        case GDT_CFloat64: CALL_GDALTranspose2D_internal(double, true); break;
7142
0
        case GDT_Unknown:
7143
0
        case GDT_TypeCount:
7144
0
            break;
7145
0
    }
7146
        // clang-format on
7147
7148
0
#undef CALL_GDALTranspose2D_internal
7149
0
}
7150
7151
/************************************************************************/
7152
/*                     ExtractBitAndConvertTo255()                      */
7153
/************************************************************************/
7154
7155
#if defined(__GNUC__) || defined(_MSC_VER)
7156
// Signedness of char implementation dependent, so be explicit.
7157
// Assumes 2-complement integer types and sign extension of right shifting
7158
// GCC guarantees such:
7159
// https://gcc.gnu.org/onlinedocs/gcc/Integers-implementation.html#Integers-implementation
7160
static inline GByte ExtractBitAndConvertTo255(GByte byVal, int nBit)
7161
0
{
7162
0
    return static_cast<GByte>(static_cast<signed char>(byVal << (7 - nBit)) >>
7163
0
                              7);
7164
0
}
7165
#else
7166
// Portable way
7167
static inline GByte ExtractBitAndConvertTo255(GByte byVal, int nBit)
7168
{
7169
    return (byVal & (1 << nBit)) ? 255 : 0;
7170
}
7171
#endif
7172
7173
/************************************************************************/
7174
/*                  ExpandEightPackedBitsToByteAt255()                  */
7175
/************************************************************************/
7176
7177
static inline void ExpandEightPackedBitsToByteAt255(GByte byVal,
7178
                                                    GByte abyOutput[8])
7179
0
{
7180
0
    abyOutput[0] = ExtractBitAndConvertTo255(byVal, 7);
7181
0
    abyOutput[1] = ExtractBitAndConvertTo255(byVal, 6);
7182
0
    abyOutput[2] = ExtractBitAndConvertTo255(byVal, 5);
7183
0
    abyOutput[3] = ExtractBitAndConvertTo255(byVal, 4);
7184
0
    abyOutput[4] = ExtractBitAndConvertTo255(byVal, 3);
7185
0
    abyOutput[5] = ExtractBitAndConvertTo255(byVal, 2);
7186
0
    abyOutput[6] = ExtractBitAndConvertTo255(byVal, 1);
7187
0
    abyOutput[7] = ExtractBitAndConvertTo255(byVal, 0);
7188
0
}
7189
7190
/************************************************************************/
7191
/*                 GDALExpandPackedBitsToByteAt0Or255()                 */
7192
/************************************************************************/
7193
7194
/** Expand packed-bits (ordered from most-significant bit to least one)
7195
  into a byte each, where a bit at 0 is expanded to a byte at 0, and a bit
7196
  at 1 to a byte at 255.
7197
7198
 The function does (in a possibly more optimized way) the following:
7199
 \code{.cpp}
7200
 for (size_t i = 0; i < nInputBits; ++i )
7201
 {
7202
     pabyOutput[i] = (pabyInput[i / 8] & (1 << (7 - (i % 8)))) ? 255 : 0;
7203
 }
7204
 \endcode
7205
7206
 @param pabyInput Input array of (nInputBits + 7) / 8 bytes.
7207
 @param pabyOutput Output array of nInputBits bytes.
7208
 @param nInputBits Number of valid bits in pabyInput.
7209
7210
 @since 3.11
7211
*/
7212
7213
void GDALExpandPackedBitsToByteAt0Or255(const GByte *CPL_RESTRICT pabyInput,
7214
                                        GByte *CPL_RESTRICT pabyOutput,
7215
                                        size_t nInputBits)
7216
0
{
7217
0
    const size_t nInputWholeBytes = nInputBits / 8;
7218
0
    size_t iByte = 0;
7219
7220
0
#ifdef HAVE_SSE2
7221
    // Mask to isolate each bit
7222
0
    const __m128i bit_mask = _mm_set_epi8(1, 2, 4, 8, 16, 32, 64, -128, 1, 2, 4,
7223
0
                                          8, 16, 32, 64, -128);
7224
0
    const __m128i zero = _mm_setzero_si128();
7225
0
    const __m128i all_ones = _mm_set1_epi8(-1);
7226
#ifdef __SSSE3__
7227
    const __m128i dispatch_two_bytes =
7228
        _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0);
7229
#endif
7230
0
    constexpr size_t SSE_REG_SIZE = sizeof(bit_mask);
7231
0
    for (; iByte + SSE_REG_SIZE <= nInputWholeBytes; iByte += SSE_REG_SIZE)
7232
0
    {
7233
0
        __m128i reg_ori = _mm_loadu_si128(
7234
0
            reinterpret_cast<const __m128i *>(pabyInput + iByte));
7235
7236
0
        constexpr int NUM_PROCESSED_BYTES_PER_REG = 2;
7237
0
        for (size_t k = 0; k < SSE_REG_SIZE / NUM_PROCESSED_BYTES_PER_REG; ++k)
7238
0
        {
7239
            // Given reg_ori = (A, B, ... 14 other bytes ...),
7240
            // expand to (A, A, A, A, A, A, A, A, B, B, B, B, B, B, B, B)
7241
#ifdef __SSSE3__
7242
            __m128i reg = _mm_shuffle_epi8(reg_ori, dispatch_two_bytes);
7243
#else
7244
0
            __m128i reg = _mm_unpacklo_epi8(reg_ori, reg_ori);
7245
0
            reg = _mm_unpacklo_epi16(reg, reg);
7246
0
            reg = _mm_unpacklo_epi32(reg, reg);
7247
0
#endif
7248
7249
            // Test if bits of interest are set
7250
0
            reg = _mm_and_si128(reg, bit_mask);
7251
7252
            // Now test if those bits are set, by comparing to zero. So the
7253
            // result will be that bytes where bits are set will be at 0, and
7254
            // ones where they are cleared will be at 0xFF. So the inverse of
7255
            // the end result we want!
7256
0
            reg = _mm_cmpeq_epi8(reg, zero);
7257
7258
            // Invert the result
7259
0
            reg = _mm_andnot_si128(reg, all_ones);
7260
7261
0
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pabyOutput), reg);
7262
7263
0
            pabyOutput += SSE_REG_SIZE;
7264
7265
            // Right-shift of 2 bytes
7266
0
            reg_ori = _mm_bsrli_si128(reg_ori, NUM_PROCESSED_BYTES_PER_REG);
7267
0
        }
7268
0
    }
7269
7270
0
#endif  // HAVE_SSE2
7271
7272
0
    for (; iByte < nInputWholeBytes; ++iByte)
7273
0
    {
7274
0
        ExpandEightPackedBitsToByteAt255(pabyInput[iByte], pabyOutput);
7275
0
        pabyOutput += 8;
7276
0
    }
7277
0
    for (int iBit = 0; iBit < static_cast<int>(nInputBits % 8); ++iBit)
7278
0
    {
7279
0
        *pabyOutput = ExtractBitAndConvertTo255(pabyInput[iByte], 7 - iBit);
7280
0
        ++pabyOutput;
7281
0
    }
7282
0
}
7283
7284
/************************************************************************/
7285
/*                   ExpandEightPackedBitsToByteAt1()                   */
7286
/************************************************************************/
7287
7288
static inline void ExpandEightPackedBitsToByteAt1(GByte byVal,
7289
                                                  GByte abyOutput[8])
7290
0
{
7291
0
    abyOutput[0] = (byVal >> 7) & 0x1;
7292
0
    abyOutput[1] = (byVal >> 6) & 0x1;
7293
0
    abyOutput[2] = (byVal >> 5) & 0x1;
7294
0
    abyOutput[3] = (byVal >> 4) & 0x1;
7295
0
    abyOutput[4] = (byVal >> 3) & 0x1;
7296
0
    abyOutput[5] = (byVal >> 2) & 0x1;
7297
0
    abyOutput[6] = (byVal >> 1) & 0x1;
7298
0
    abyOutput[7] = (byVal >> 0) & 0x1;
7299
0
}
7300
7301
/************************************************************************/
7302
/*                  GDALExpandPackedBitsToByteAt0Or1()                  */
7303
/************************************************************************/
7304
7305
/** Expand packed-bits (ordered from most-significant bit to least one)
7306
  into a byte each, where a bit at 0 is expanded to a byte at 0, and a bit
7307
  at 1 to a byte at 1.
7308
7309
 The function does (in a possibly more optimized way) the following:
7310
 \code{.cpp}
7311
 for (size_t i = 0; i < nInputBits; ++i )
7312
 {
7313
     pabyOutput[i] = (pabyInput[i / 8] & (1 << (7 - (i % 8)))) ? 1 : 0;
7314
 }
7315
 \endcode
7316
7317
 @param pabyInput Input array of (nInputBits + 7) / 8 bytes.
7318
 @param pabyOutput Output array of nInputBits bytes.
7319
 @param nInputBits Number of valid bits in pabyInput.
7320
7321
 @since 3.11
7322
*/
7323
7324
void GDALExpandPackedBitsToByteAt0Or1(const GByte *CPL_RESTRICT pabyInput,
7325
                                      GByte *CPL_RESTRICT pabyOutput,
7326
                                      size_t nInputBits)
7327
0
{
7328
0
    const size_t nInputWholeBytes = nInputBits / 8;
7329
0
    size_t iByte = 0;
7330
0
    for (; iByte < nInputWholeBytes; ++iByte)
7331
0
    {
7332
0
        ExpandEightPackedBitsToByteAt1(pabyInput[iByte], pabyOutput);
7333
0
        pabyOutput += 8;
7334
0
    }
7335
0
    for (int iBit = 0; iBit < static_cast<int>(nInputBits % 8); ++iBit)
7336
0
    {
7337
0
        *pabyOutput = (pabyInput[iByte] >> (7 - iBit)) & 0x1;
7338
0
        ++pabyOutput;
7339
0
    }
7340
0
}