Coverage Report

Created: 2025-08-28 06:57

/src/gdal/gcore/rasterio.cpp
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 *
3
 * Project:  GDAL Core
4
 * Purpose:  Contains default implementation of GDALRasterBand::IRasterIO()
5
 *           and supporting functions of broader utility.
6
 * Author:   Frank Warmerdam, warmerdam@pobox.com
7
 *
8
 ******************************************************************************
9
 * Copyright (c) 1998, Frank Warmerdam
10
 * Copyright (c) 2007-2014, Even Rouault <even dot rouault at spatialys.com>
11
 *
12
 * SPDX-License-Identifier: MIT
13
 ****************************************************************************/
14
15
#include "cpl_port.h"
16
#include "gdal.h"
17
#include "gdal_priv.h"
18
19
#include <cassert>
20
#include <climits>
21
#include <cmath>
22
#include <cstddef>
23
#include <cstdio>
24
#include <cstdlib>
25
#include <cstring>
26
27
#include <algorithm>
28
#include <limits>
29
#include <stdexcept>
30
#include <type_traits>
31
32
#include "cpl_conv.h"
33
#include "cpl_cpu_features.h"
34
#include "cpl_error.h"
35
#include "cpl_float.h"
36
#include "cpl_progress.h"
37
#include "cpl_string.h"
38
#include "cpl_vsi.h"
39
#include "gdal_priv_templates.hpp"
40
#include "gdal_vrt.h"
41
#include "gdalwarper.h"
42
#include "memdataset.h"
43
#include "vrtdataset.h"
44
45
#if defined(__x86_64) || defined(_M_X64)
46
#include <emmintrin.h>
47
#define HAVE_SSE2
48
#elif defined(USE_NEON_OPTIMIZATIONS)
49
#include "include_sse2neon.h"
50
#define HAVE_SSE2
51
#endif
52
53
#ifdef HAVE_SSSE3_AT_COMPILE_TIME
54
#include "rasterio_ssse3.h"
55
#ifdef __SSSE3__
56
#include <tmmintrin.h>
57
#endif
58
#endif
59
60
#ifdef __SSE4_1__
61
#include <smmintrin.h>
62
#endif
63
64
#ifdef __GNUC__
65
#define CPL_NOINLINE __attribute__((noinline))
66
#else
67
#define CPL_NOINLINE
68
#endif
69
70
static void GDALFastCopyByte(const GByte *CPL_RESTRICT pSrcData,
71
                             int nSrcPixelStride, GByte *CPL_RESTRICT pDstData,
72
                             int nDstPixelStride, GPtrDiff_t nWordCount);
73
74
/************************************************************************/
75
/*                    DownsamplingIntegerXFactor()                      */
76
/************************************************************************/
77
78
template <bool bSameDataType, int DATA_TYPE_SIZE>
79
static bool DownsamplingIntegerXFactor(
80
    GDALRasterBand *poBand, int iSrcX, int nSrcXInc, GPtrDiff_t iSrcOffsetCst,
81
    GByte *CPL_RESTRICT pabyDstData, int nPixelSpace, int nBufXSize,
82
    GDALDataType eDataType, GDALDataType eBufType, int &nStartBlockX,
83
    int nBlockXSize, GDALRasterBlock *&poBlock, int nLBlockY)
84
0
{
85
0
    const int nBandDataSize =
86
0
        bSameDataType ? DATA_TYPE_SIZE : GDALGetDataTypeSizeBytes(eDataType);
87
0
    int nOuterLoopIters = nBufXSize - 1;
88
0
    const int nIncSrcOffset = nSrcXInc * nBandDataSize;
89
0
    const GByte *CPL_RESTRICT pabySrcData;
90
0
    int nEndBlockX = nBlockXSize + nStartBlockX;
91
92
0
    if (iSrcX < nEndBlockX)
93
0
    {
94
0
        CPLAssert(poBlock);
95
0
        goto no_reload_block;
96
0
    }
97
0
    goto reload_block;
98
99
    // Don't do the last iteration in the loop, as iSrcX might go beyond
100
    // nRasterXSize - 1
101
0
    while (--nOuterLoopIters >= 1)
102
0
    {
103
0
        iSrcX += nSrcXInc;
104
0
        pabySrcData += nIncSrcOffset;
105
0
        pabyDstData += nPixelSpace;
106
107
        /* --------------------------------------------------------------------
108
         */
109
        /*      Ensure we have the appropriate block loaded. */
110
        /* --------------------------------------------------------------------
111
         */
112
0
        if (iSrcX >= nEndBlockX)
113
0
        {
114
0
        reload_block:
115
0
        {
116
0
            const int nLBlockX = iSrcX / nBlockXSize;
117
0
            nStartBlockX = nLBlockX * nBlockXSize;
118
0
            nEndBlockX = nStartBlockX + nBlockXSize;
119
120
0
            if (poBlock != nullptr)
121
0
                poBlock->DropLock();
122
123
0
            poBlock = poBand->GetLockedBlockRef(nLBlockX, nLBlockY, FALSE);
124
0
            if (poBlock == nullptr)
125
0
            {
126
0
                return false;
127
0
            }
128
0
        }
129
130
0
        no_reload_block:
131
0
            const GByte *pabySrcBlock =
132
0
                static_cast<const GByte *>(poBlock->GetDataRef());
133
0
            GPtrDiff_t iSrcOffset =
134
0
                (iSrcX - nStartBlockX + iSrcOffsetCst) * nBandDataSize;
135
0
            pabySrcData = pabySrcBlock + iSrcOffset;
136
0
        }
137
138
        /* --------------------------------------------------------------------
139
         */
140
        /*      Copy the maximum run of pixels. */
141
        /* --------------------------------------------------------------------
142
         */
143
144
0
        const int nIters = std::min(
145
0
            (nEndBlockX - iSrcX + (nSrcXInc - 1)) / nSrcXInc, nOuterLoopIters);
146
0
        if (bSameDataType)
147
0
        {
148
0
            memcpy(pabyDstData, pabySrcData, nBandDataSize);
149
0
            if (nIters > 1)
150
0
            {
151
0
                if (DATA_TYPE_SIZE == 1)
152
0
                {
153
0
                    pabySrcData += nIncSrcOffset;
154
0
                    pabyDstData += nPixelSpace;
155
0
                    GDALFastCopyByte(pabySrcData, nIncSrcOffset, pabyDstData,
156
0
                                     nPixelSpace, nIters - 1);
157
0
                    pabySrcData +=
158
0
                        static_cast<GPtrDiff_t>(nIncSrcOffset) * (nIters - 2);
159
0
                    pabyDstData +=
160
0
                        static_cast<GPtrDiff_t>(nPixelSpace) * (nIters - 2);
161
0
                }
162
0
                else
163
0
                {
164
0
                    for (int i = 0; i < nIters - 1; i++)
165
0
                    {
166
0
                        pabySrcData += nIncSrcOffset;
167
0
                        pabyDstData += nPixelSpace;
168
0
                        memcpy(pabyDstData, pabySrcData, nBandDataSize);
169
0
                    }
170
0
                }
171
0
                iSrcX += nSrcXInc * (nIters - 1);
172
0
                nOuterLoopIters -= nIters - 1;
173
0
            }
174
0
        }
175
0
        else
176
0
        {
177
            // Type to type conversion ...
178
0
            GDALCopyWords64(pabySrcData, eDataType, nIncSrcOffset, pabyDstData,
179
0
                            eBufType, nPixelSpace, std::max(1, nIters));
180
0
            if (nIters > 1)
181
0
            {
182
0
                pabySrcData +=
183
0
                    static_cast<GPtrDiff_t>(nIncSrcOffset) * (nIters - 1);
184
0
                pabyDstData +=
185
0
                    static_cast<GPtrDiff_t>(nPixelSpace) * (nIters - 1);
186
0
                iSrcX += nSrcXInc * (nIters - 1);
187
0
                nOuterLoopIters -= nIters - 1;
188
0
            }
189
0
        }
190
0
    }
191
192
    // Deal with last iteration to avoid iSrcX to go beyond nRasterXSize - 1
193
0
    if (nOuterLoopIters == 0)
194
0
    {
195
0
        const int nRasterXSize = poBand->GetXSize();
196
0
        iSrcX =
197
0
            static_cast<int>(std::min(static_cast<GInt64>(iSrcX) + nSrcXInc,
198
0
                                      static_cast<GInt64>(nRasterXSize - 1)));
199
0
        pabyDstData += nPixelSpace;
200
0
        if (iSrcX < nEndBlockX)
201
0
        {
202
0
            goto no_reload_block;
203
0
        }
204
0
        goto reload_block;
205
0
    }
206
0
    return true;
207
0
}
Unexecuted instantiation: rasterio.cpp:bool DownsamplingIntegerXFactor<true, 1>(GDALRasterBand*, int, int, long long, unsigned char*, int, int, GDALDataType, GDALDataType, int&, int, GDALRasterBlock*&, int)
Unexecuted instantiation: rasterio.cpp:bool DownsamplingIntegerXFactor<true, 2>(GDALRasterBand*, int, int, long long, unsigned char*, int, int, GDALDataType, GDALDataType, int&, int, GDALRasterBlock*&, int)
Unexecuted instantiation: rasterio.cpp:bool DownsamplingIntegerXFactor<true, 4>(GDALRasterBand*, int, int, long long, unsigned char*, int, int, GDALDataType, GDALDataType, int&, int, GDALRasterBlock*&, int)
Unexecuted instantiation: rasterio.cpp:bool DownsamplingIntegerXFactor<true, 8>(GDALRasterBand*, int, int, long long, unsigned char*, int, int, GDALDataType, GDALDataType, int&, int, GDALRasterBlock*&, int)
Unexecuted instantiation: rasterio.cpp:bool DownsamplingIntegerXFactor<true, 16>(GDALRasterBand*, int, int, long long, unsigned char*, int, int, GDALDataType, GDALDataType, int&, int, GDALRasterBlock*&, int)
Unexecuted instantiation: rasterio.cpp:bool DownsamplingIntegerXFactor<false, 0>(GDALRasterBand*, int, int, long long, unsigned char*, int, int, GDALDataType, GDALDataType, int&, int, GDALRasterBlock*&, int)
208
209
template <class A, class B>
210
CPL_NOSANITIZE_UNSIGNED_INT_OVERFLOW inline auto CPLUnsanitizedMul(A a, B b)
211
0
{
212
0
    return a * b;
213
0
}
214
215
/************************************************************************/
216
/*                             IRasterIO()                              */
217
/*                                                                      */
218
/*      Default internal implementation of RasterIO() ... utilizes      */
219
/*      the Block access methods to satisfy the request.  This would    */
220
/*      normally only be overridden by formats with overviews.          */
221
/************************************************************************/
222
223
CPLErr GDALRasterBand::IRasterIO(GDALRWFlag eRWFlag, int nXOff, int nYOff,
224
                                 int nXSize, int nYSize, void *pData,
225
                                 int nBufXSize, int nBufYSize,
226
                                 GDALDataType eBufType, GSpacing nPixelSpace,
227
                                 GSpacing nLineSpace,
228
                                 GDALRasterIOExtraArg *psExtraArg)
229
230
0
{
231
0
    if (eRWFlag == GF_Write && eFlushBlockErr != CE_None)
232
0
    {
233
0
        CPLError(eFlushBlockErr, CPLE_AppDefined,
234
0
                 "An error occurred while writing a dirty block "
235
0
                 "from GDALRasterBand::IRasterIO");
236
0
        CPLErr eErr = eFlushBlockErr;
237
0
        eFlushBlockErr = CE_None;
238
0
        return eErr;
239
0
    }
240
0
    if (nBlockXSize <= 0 || nBlockYSize <= 0)
241
0
    {
242
0
        CPLError(CE_Failure, CPLE_AppDefined, "Invalid block size");
243
0
        return CE_Failure;
244
0
    }
245
246
0
    const int nBandDataSize = GDALGetDataTypeSizeBytes(eDataType);
247
0
    const int nBufDataSize = GDALGetDataTypeSizeBytes(eBufType);
248
0
    GByte dummyBlock[2] = {0, 0};
249
0
    GByte *pabySrcBlock =
250
0
        dummyBlock; /* to avoid Coverity warning about nullptr dereference */
251
0
    GDALRasterBlock *poBlock = nullptr;
252
0
    const bool bUseIntegerRequestCoords =
253
0
        (!psExtraArg->bFloatingPointWindowValidity ||
254
0
         (nXOff == psExtraArg->dfXOff && nYOff == psExtraArg->dfYOff &&
255
0
          nXSize == psExtraArg->dfXSize && nYSize == psExtraArg->dfYSize));
256
257
    /* ==================================================================== */
258
    /*      A common case is the data requested with the destination        */
259
    /*      is packed, and the block width is the raster width.             */
260
    /* ==================================================================== */
261
0
    if (nPixelSpace == nBufDataSize && nLineSpace == nPixelSpace * nXSize &&
262
0
        nBlockXSize == GetXSize() && nBufXSize == nXSize &&
263
0
        nBufYSize == nYSize && bUseIntegerRequestCoords)
264
0
    {
265
0
        CPLErr eErr = CE_None;
266
0
        int nLBlockY = -1;
267
268
0
        for (int iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff++)
269
0
        {
270
0
            const int iSrcY = iBufYOff + nYOff;
271
272
0
            if (iSrcY < nLBlockY * nBlockYSize ||
273
0
                iSrcY - nBlockYSize >= nLBlockY * nBlockYSize)
274
0
            {
275
0
                nLBlockY = iSrcY / nBlockYSize;
276
0
                bool bJustInitialize =
277
0
                    eRWFlag == GF_Write && nXOff == 0 &&
278
0
                    nXSize == nBlockXSize && nYOff <= nLBlockY * nBlockYSize &&
279
0
                    nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize;
280
281
                // Is this a partial tile at right and/or bottom edges of
282
                // the raster, and that is going to be completely written?
283
                // If so, do not load it from storage, but zero it so that
284
                // the content outsize of the validity area is initialized.
285
0
                bool bMemZeroBuffer = false;
286
0
                if (eRWFlag == GF_Write && !bJustInitialize && nXOff == 0 &&
287
0
                    nXSize == nBlockXSize && nYOff <= nLBlockY * nBlockYSize &&
288
0
                    nYOff + nYSize == GetYSize() &&
289
0
                    nLBlockY * nBlockYSize > GetYSize() - nBlockYSize)
290
0
                {
291
0
                    bJustInitialize = true;
292
0
                    bMemZeroBuffer = true;
293
0
                }
294
295
0
                if (poBlock)
296
0
                    poBlock->DropLock();
297
298
0
                const GUInt32 nErrorCounter = CPLGetErrorCounter();
299
0
                poBlock = GetLockedBlockRef(0, nLBlockY, bJustInitialize);
300
0
                if (poBlock == nullptr)
301
0
                {
302
0
                    if (strstr(CPLGetLastErrorMsg(), "IReadBlock failed") ==
303
0
                        nullptr)
304
0
                    {
305
0
                        CPLError(CE_Failure, CPLE_AppDefined,
306
0
                                 "GetBlockRef failed at X block offset %d, "
307
0
                                 "Y block offset %d%s",
308
0
                                 0, nLBlockY,
309
0
                                 (nErrorCounter != CPLGetErrorCounter())
310
0
                                     ? CPLSPrintf(": %s", CPLGetLastErrorMsg())
311
0
                                     : "");
312
0
                    }
313
0
                    eErr = CE_Failure;
314
0
                    break;
315
0
                }
316
317
0
                if (eRWFlag == GF_Write)
318
0
                    poBlock->MarkDirty();
319
320
0
                pabySrcBlock = static_cast<GByte *>(poBlock->GetDataRef());
321
0
                if (bMemZeroBuffer)
322
0
                {
323
0
                    memset(pabySrcBlock, 0,
324
0
                           static_cast<GPtrDiff_t>(nBandDataSize) *
325
0
                               nBlockXSize * nBlockYSize);
326
0
                }
327
0
            }
328
329
0
            const auto nSrcByteOffset =
330
0
                (static_cast<GPtrDiff_t>(iSrcY - nLBlockY * nBlockYSize) *
331
0
                     nBlockXSize +
332
0
                 nXOff) *
333
0
                nBandDataSize;
334
335
0
            if (eDataType == eBufType)
336
0
            {
337
0
                if (eRWFlag == GF_Read)
338
0
                    memcpy(static_cast<GByte *>(pData) +
339
0
                               static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace,
340
0
                           pabySrcBlock + nSrcByteOffset,
341
0
                           static_cast<size_t>(nLineSpace));
342
0
                else
343
0
                    memcpy(pabySrcBlock + nSrcByteOffset,
344
0
                           static_cast<GByte *>(pData) +
345
0
                               static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace,
346
0
                           static_cast<size_t>(nLineSpace));
347
0
            }
348
0
            else
349
0
            {
350
                // Type to type conversion.
351
0
                if (eRWFlag == GF_Read)
352
0
                    GDALCopyWords64(
353
0
                        pabySrcBlock + nSrcByteOffset, eDataType, nBandDataSize,
354
0
                        static_cast<GByte *>(pData) +
355
0
                            static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace,
356
0
                        eBufType, static_cast<int>(nPixelSpace), nBufXSize);
357
0
                else
358
0
                    GDALCopyWords64(static_cast<GByte *>(pData) +
359
0
                                        static_cast<GPtrDiff_t>(iBufYOff) *
360
0
                                            nLineSpace,
361
0
                                    eBufType, static_cast<int>(nPixelSpace),
362
0
                                    pabySrcBlock + nSrcByteOffset, eDataType,
363
0
                                    nBandDataSize, nBufXSize);
364
0
            }
365
366
0
            if (psExtraArg->pfnProgress != nullptr &&
367
0
                !psExtraArg->pfnProgress(1.0 * (iBufYOff + 1) / nBufYSize, "",
368
0
                                         psExtraArg->pProgressData))
369
0
            {
370
0
                eErr = CE_Failure;
371
0
                break;
372
0
            }
373
0
        }
374
375
0
        if (poBlock)
376
0
            poBlock->DropLock();
377
378
0
        return eErr;
379
0
    }
380
381
    /* ==================================================================== */
382
    /*      Do we have overviews that would be appropriate to satisfy       */
383
    /*      this request?                                                   */
384
    /* ==================================================================== */
385
0
    if ((nBufXSize < nXSize || nBufYSize < nYSize) && GetOverviewCount() > 0 &&
386
0
        eRWFlag == GF_Read)
387
0
    {
388
0
        GDALRasterIOExtraArg sExtraArg;
389
0
        GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
390
391
0
        const int nOverview =
392
0
            GDALBandGetBestOverviewLevel2(this, nXOff, nYOff, nXSize, nYSize,
393
0
                                          nBufXSize, nBufYSize, &sExtraArg);
394
0
        if (nOverview >= 0)
395
0
        {
396
0
            GDALRasterBand *poOverviewBand = GetOverview(nOverview);
397
0
            if (poOverviewBand == nullptr)
398
0
                return CE_Failure;
399
400
0
            return poOverviewBand->RasterIO(
401
0
                eRWFlag, nXOff, nYOff, nXSize, nYSize, pData, nBufXSize,
402
0
                nBufYSize, eBufType, nPixelSpace, nLineSpace, &sExtraArg);
403
0
        }
404
0
    }
405
406
0
    if (eRWFlag == GF_Read && nBufXSize < nXSize / 100 &&
407
0
        nBufYSize < nYSize / 100 && nPixelSpace == nBufDataSize &&
408
0
        nLineSpace == nPixelSpace * nBufXSize &&
409
0
        CPLTestBool(CPLGetConfigOption("GDAL_NO_COSTLY_OVERVIEW", "NO")))
410
0
    {
411
0
        memset(pData, 0, static_cast<size_t>(nLineSpace * nBufYSize));
412
0
        return CE_None;
413
0
    }
414
415
    /* ==================================================================== */
416
    /*      The second case when we don't need subsample data but likely    */
417
    /*      need data type conversion.                                      */
418
    /* ==================================================================== */
419
0
    if (  // nPixelSpace == nBufDataSize &&
420
0
        nXSize == nBufXSize && nYSize == nBufYSize && bUseIntegerRequestCoords)
421
0
    {
422
#if DEBUG_VERBOSE
423
        printf("IRasterIO(%d,%d,%d,%d) rw=%d case 2\n", /*ok*/
424
               nXOff, nYOff, nXSize, nYSize, static_cast<int>(eRWFlag));
425
#endif
426
427
        /* --------------------------------------------------------------------
428
         */
429
        /*      Loop over buffer computing source locations. */
430
        /* --------------------------------------------------------------------
431
         */
432
        // Calculate starting values out of loop
433
0
        const int nLBlockXStart = nXOff / nBlockXSize;
434
0
        const int nXSpanEnd = nBufXSize + nXOff;
435
436
0
        int nYInc = 0;
437
0
        for (int iBufYOff = 0, iSrcY = nYOff; iBufYOff < nBufYSize;
438
0
             iBufYOff += nYInc, iSrcY += nYInc)
439
0
        {
440
0
            GPtrDiff_t iBufOffset = static_cast<GPtrDiff_t>(iBufYOff) *
441
0
                                    static_cast<GPtrDiff_t>(nLineSpace);
442
0
            int nLBlockY = iSrcY / nBlockYSize;
443
0
            int nLBlockX = nLBlockXStart;
444
0
            int iSrcX = nXOff;
445
0
            while (iSrcX < nXSpanEnd)
446
0
            {
447
0
                int nXSpan = nLBlockX * nBlockXSize;
448
0
                if (nXSpan < INT_MAX - nBlockXSize)
449
0
                    nXSpan += nBlockXSize;
450
0
                else
451
0
                    nXSpan = INT_MAX;
452
0
                const int nXRight = nXSpan;
453
0
                nXSpan = (nXSpan < nXSpanEnd ? nXSpan : nXSpanEnd) - iSrcX;
454
455
0
                const size_t nXSpanSize =
456
0
                    CPLUnsanitizedMul(nXSpan, static_cast<size_t>(nPixelSpace));
457
458
0
                bool bJustInitialize =
459
0
                    eRWFlag == GF_Write && nYOff <= nLBlockY * nBlockYSize &&
460
0
                    nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize &&
461
0
                    nXOff <= nLBlockX * nBlockXSize &&
462
0
                    nXOff + nXSize >= nXRight;
463
464
                // Is this a partial tile at right and/or bottom edges of
465
                // the raster, and that is going to be completely written?
466
                // If so, do not load it from storage, but zero it so that
467
                // the content outsize of the validity area is initialized.
468
0
                bool bMemZeroBuffer = false;
469
0
                if (eRWFlag == GF_Write && !bJustInitialize &&
470
0
                    nXOff <= nLBlockX * nBlockXSize &&
471
0
                    nYOff <= nLBlockY * nBlockYSize &&
472
0
                    (nXOff + nXSize >= nXRight ||
473
                     // cppcheck-suppress knownConditionTrueFalse
474
0
                     (nXOff + nXSize == GetXSize() && nXRight > GetXSize())) &&
475
0
                    (nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize ||
476
0
                     (nYOff + nYSize == GetYSize() &&
477
0
                      nLBlockY * nBlockYSize > GetYSize() - nBlockYSize)))
478
0
                {
479
0
                    bJustInitialize = true;
480
0
                    bMemZeroBuffer = true;
481
0
                }
482
483
                /* --------------------------------------------------------------------
484
                 */
485
                /*      Ensure we have the appropriate block loaded. */
486
                /* --------------------------------------------------------------------
487
                 */
488
0
                const GUInt32 nErrorCounter = CPLGetErrorCounter();
489
0
                poBlock =
490
0
                    GetLockedBlockRef(nLBlockX, nLBlockY, bJustInitialize);
491
0
                if (!poBlock)
492
0
                {
493
0
                    if (strstr(CPLGetLastErrorMsg(), "IReadBlock failed") ==
494
0
                        nullptr)
495
0
                    {
496
0
                        CPLError(CE_Failure, CPLE_AppDefined,
497
0
                                 "GetBlockRef failed at X block offset %d, "
498
0
                                 "Y block offset %d%s",
499
0
                                 nLBlockX, nLBlockY,
500
0
                                 (nErrorCounter != CPLGetErrorCounter())
501
0
                                     ? CPLSPrintf(": %s", CPLGetLastErrorMsg())
502
0
                                     : "");
503
0
                    }
504
0
                    return (CE_Failure);
505
0
                }
506
507
0
                if (eRWFlag == GF_Write)
508
0
                    poBlock->MarkDirty();
509
510
0
                pabySrcBlock = static_cast<GByte *>(poBlock->GetDataRef());
511
0
                if (bMemZeroBuffer)
512
0
                {
513
0
                    memset(pabySrcBlock, 0,
514
0
                           static_cast<GPtrDiff_t>(nBandDataSize) *
515
0
                               nBlockXSize * nBlockYSize);
516
0
                }
517
                /* --------------------------------------------------------------------
518
                 */
519
                /*      Copy over this chunk of data. */
520
                /* --------------------------------------------------------------------
521
                 */
522
0
                GPtrDiff_t iSrcOffset =
523
0
                    (static_cast<GPtrDiff_t>(iSrcX) -
524
0
                     static_cast<GPtrDiff_t>(nLBlockX * nBlockXSize) +
525
0
                     (static_cast<GPtrDiff_t>(iSrcY) -
526
0
                      static_cast<GPtrDiff_t>(nLBlockY) * nBlockYSize) *
527
0
                         nBlockXSize) *
528
0
                    nBandDataSize;
529
                // Fill up as many rows as possible for the loaded block.
530
0
                const int kmax = std::min(nBlockYSize - (iSrcY % nBlockYSize),
531
0
                                          nBufYSize - iBufYOff);
532
0
                for (int k = 0; k < kmax; k++)
533
0
                {
534
0
                    if (eDataType == eBufType && nPixelSpace == nBufDataSize)
535
0
                    {
536
0
                        if (eRWFlag == GF_Read)
537
0
                            memcpy(static_cast<GByte *>(pData) + iBufOffset +
538
0
                                       static_cast<GPtrDiff_t>(k) * nLineSpace,
539
0
                                   pabySrcBlock + iSrcOffset, nXSpanSize);
540
0
                        else
541
0
                            memcpy(pabySrcBlock + iSrcOffset,
542
0
                                   static_cast<GByte *>(pData) + iBufOffset +
543
0
                                       static_cast<GPtrDiff_t>(k) * nLineSpace,
544
0
                                   nXSpanSize);
545
0
                    }
546
0
                    else
547
0
                    {
548
                        /* type to type conversion */
549
0
                        if (eRWFlag == GF_Read)
550
0
                            GDALCopyWords64(
551
0
                                pabySrcBlock + iSrcOffset, eDataType,
552
0
                                nBandDataSize,
553
0
                                static_cast<GByte *>(pData) + iBufOffset +
554
0
                                    static_cast<GPtrDiff_t>(k) * nLineSpace,
555
0
                                eBufType, static_cast<int>(nPixelSpace),
556
0
                                nXSpan);
557
0
                        else
558
0
                            GDALCopyWords64(
559
0
                                static_cast<GByte *>(pData) + iBufOffset +
560
0
                                    static_cast<GPtrDiff_t>(k) * nLineSpace,
561
0
                                eBufType, static_cast<int>(nPixelSpace),
562
0
                                pabySrcBlock + iSrcOffset, eDataType,
563
0
                                nBandDataSize, nXSpan);
564
0
                    }
565
566
0
                    iSrcOffset +=
567
0
                        static_cast<GPtrDiff_t>(nBlockXSize) * nBandDataSize;
568
0
                }
569
570
0
                iBufOffset =
571
0
                    CPLUnsanitizedAdd<GPtrDiff_t>(iBufOffset, nXSpanSize);
572
0
                nLBlockX++;
573
0
                iSrcX += nXSpan;
574
575
0
                poBlock->DropLock();
576
0
                poBlock = nullptr;
577
0
            }
578
579
            /* Compute the increment to go on a block boundary */
580
0
            nYInc = nBlockYSize - (iSrcY % nBlockYSize);
581
582
0
            if (psExtraArg->pfnProgress != nullptr &&
583
0
                !psExtraArg->pfnProgress(
584
0
                    1.0 * std::min(nBufYSize, iBufYOff + nYInc) / nBufYSize, "",
585
0
                    psExtraArg->pProgressData))
586
0
            {
587
0
                return CE_Failure;
588
0
            }
589
0
        }
590
591
0
        return CE_None;
592
0
    }
593
594
    /* ==================================================================== */
595
    /*      Loop reading required source blocks to satisfy output           */
596
    /*      request.  This is the most general implementation.              */
597
    /* ==================================================================== */
598
599
0
    double dfXOff = nXOff;
600
0
    double dfYOff = nYOff;
601
0
    double dfXSize = nXSize;
602
0
    double dfYSize = nYSize;
603
0
    if (psExtraArg->bFloatingPointWindowValidity)
604
0
    {
605
0
        dfXOff = psExtraArg->dfXOff;
606
0
        dfYOff = psExtraArg->dfYOff;
607
0
        dfXSize = psExtraArg->dfXSize;
608
0
        dfYSize = psExtraArg->dfYSize;
609
0
    }
610
611
    /* -------------------------------------------------------------------- */
612
    /*      Compute stepping increment.                                     */
613
    /* -------------------------------------------------------------------- */
614
0
    const double dfSrcXInc = dfXSize / static_cast<double>(nBufXSize);
615
0
    const double dfSrcYInc = dfYSize / static_cast<double>(nBufYSize);
616
0
    CPLErr eErr = CE_None;
617
618
0
    if (eRWFlag == GF_Write)
619
0
    {
620
        /* --------------------------------------------------------------------
621
         */
622
        /*    Write case */
623
        /*    Loop over raster window computing source locations in the buffer.
624
         */
625
        /* --------------------------------------------------------------------
626
         */
627
0
        GByte *pabyDstBlock = nullptr;
628
0
        int nLBlockX = -1;
629
0
        int nLBlockY = -1;
630
631
0
        for (int iDstY = nYOff; iDstY < nYOff + nYSize; iDstY++)
632
0
        {
633
0
            const int iBufYOff = static_cast<int>((iDstY - nYOff) / dfSrcYInc);
634
635
0
            for (int iDstX = nXOff; iDstX < nXOff + nXSize; iDstX++)
636
0
            {
637
0
                const int iBufXOff =
638
0
                    static_cast<int>((iDstX - nXOff) / dfSrcXInc);
639
0
                GPtrDiff_t iBufOffset =
640
0
                    static_cast<GPtrDiff_t>(iBufYOff) *
641
0
                        static_cast<GPtrDiff_t>(nLineSpace) +
642
0
                    iBufXOff * static_cast<GPtrDiff_t>(nPixelSpace);
643
644
                // FIXME: this code likely doesn't work if the dirty block gets
645
                // flushed to disk before being completely written.
646
                // In the meantime, bJustInitialize should probably be set to
647
                // FALSE even if it is not ideal performance wise, and for
648
                // lossy compression.
649
650
                /* --------------------------------------------------------------------
651
                 */
652
                /*      Ensure we have the appropriate block loaded. */
653
                /* --------------------------------------------------------------------
654
                 */
655
0
                if (iDstX < nLBlockX * nBlockXSize ||
656
0
                    iDstX - nBlockXSize >= nLBlockX * nBlockXSize ||
657
0
                    iDstY < nLBlockY * nBlockYSize ||
658
0
                    iDstY - nBlockYSize >= nLBlockY * nBlockYSize)
659
0
                {
660
0
                    nLBlockX = iDstX / nBlockXSize;
661
0
                    nLBlockY = iDstY / nBlockYSize;
662
663
0
                    const bool bJustInitialize =
664
0
                        nYOff <= nLBlockY * nBlockYSize &&
665
0
                        nYOff + nYSize - nBlockYSize >=
666
0
                            nLBlockY * nBlockYSize &&
667
0
                        nXOff <= nLBlockX * nBlockXSize &&
668
0
                        nXOff + nXSize - nBlockXSize >= nLBlockX * nBlockXSize;
669
                    /*bool bMemZeroBuffer = FALSE;
670
                    if( !bJustInitialize &&
671
                        nXOff <= nLBlockX * nBlockXSize &&
672
                        nYOff <= nLBlockY * nBlockYSize &&
673
                        (nXOff + nXSize >= (nLBlockX+1) * nBlockXSize ||
674
                         (nXOff + nXSize == GetXSize() &&
675
                         (nLBlockX+1) * nBlockXSize > GetXSize())) &&
676
                        (nYOff + nYSize >= (nLBlockY+1) * nBlockYSize ||
677
                         (nYOff + nYSize == GetYSize() &&
678
                         (nLBlockY+1) * nBlockYSize > GetYSize())) )
679
                    {
680
                        bJustInitialize = TRUE;
681
                        bMemZeroBuffer = TRUE;
682
                    }*/
683
0
                    if (poBlock != nullptr)
684
0
                        poBlock->DropLock();
685
686
0
                    poBlock =
687
0
                        GetLockedBlockRef(nLBlockX, nLBlockY, bJustInitialize);
688
0
                    if (poBlock == nullptr)
689
0
                    {
690
0
                        return (CE_Failure);
691
0
                    }
692
693
0
                    poBlock->MarkDirty();
694
695
0
                    pabyDstBlock = static_cast<GByte *>(poBlock->GetDataRef());
696
                    /*if( bMemZeroBuffer )
697
                    {
698
                        memset(pabyDstBlock, 0,
699
                            static_cast<GPtrDiff_t>(nBandDataSize) * nBlockXSize
700
                    * nBlockYSize);
701
                    }*/
702
0
                }
703
704
                // To make Coverity happy. Should not happen by design.
705
0
                if (pabyDstBlock == nullptr)
706
0
                {
707
0
                    CPLAssert(false);
708
0
                    eErr = CE_Failure;
709
0
                    break;
710
0
                }
711
712
                /* --------------------------------------------------------------------
713
                 */
714
                /*      Copy over this pixel of data. */
715
                /* --------------------------------------------------------------------
716
                 */
717
0
                GPtrDiff_t iDstOffset =
718
0
                    (static_cast<GPtrDiff_t>(iDstX) -
719
0
                     static_cast<GPtrDiff_t>(nLBlockX) * nBlockXSize +
720
0
                     (static_cast<GPtrDiff_t>(iDstY) -
721
0
                      static_cast<GPtrDiff_t>(nLBlockY) * nBlockYSize) *
722
0
                         nBlockXSize) *
723
0
                    nBandDataSize;
724
725
0
                if (eDataType == eBufType)
726
0
                {
727
0
                    memcpy(pabyDstBlock + iDstOffset,
728
0
                           static_cast<GByte *>(pData) + iBufOffset,
729
0
                           nBandDataSize);
730
0
                }
731
0
                else
732
0
                {
733
                    /* type to type conversion ... ouch, this is expensive way
734
                    of handling single words */
735
0
                    GDALCopyWords64(static_cast<GByte *>(pData) + iBufOffset,
736
0
                                    eBufType, 0, pabyDstBlock + iDstOffset,
737
0
                                    eDataType, 0, 1);
738
0
                }
739
0
            }
740
741
0
            if (psExtraArg->pfnProgress != nullptr &&
742
0
                !psExtraArg->pfnProgress(1.0 * (iDstY - nYOff + 1) / nYSize, "",
743
0
                                         psExtraArg->pProgressData))
744
0
            {
745
0
                eErr = CE_Failure;
746
0
                break;
747
0
            }
748
0
        }
749
0
    }
750
0
    else
751
0
    {
752
0
        if (psExtraArg->eResampleAlg != GRIORA_NearestNeighbour)
753
0
        {
754
0
            if ((psExtraArg->eResampleAlg == GRIORA_Cubic ||
755
0
                 psExtraArg->eResampleAlg == GRIORA_CubicSpline ||
756
0
                 psExtraArg->eResampleAlg == GRIORA_Bilinear ||
757
0
                 psExtraArg->eResampleAlg == GRIORA_Lanczos) &&
758
0
                GetColorTable() != nullptr)
759
0
            {
760
0
                CPLError(CE_Warning, CPLE_NotSupported,
761
0
                         "Resampling method not supported on paletted band. "
762
0
                         "Falling back to nearest neighbour");
763
0
            }
764
0
            else if (psExtraArg->eResampleAlg == GRIORA_Gauss &&
765
0
                     GDALDataTypeIsComplex(eDataType))
766
0
            {
767
0
                CPLError(CE_Warning, CPLE_NotSupported,
768
0
                         "Resampling method not supported on complex data type "
769
0
                         "band. Falling back to nearest neighbour");
770
0
            }
771
0
            else
772
0
            {
773
0
                return RasterIOResampled(eRWFlag, nXOff, nYOff, nXSize, nYSize,
774
0
                                         pData, nBufXSize, nBufYSize, eBufType,
775
0
                                         nPixelSpace, nLineSpace, psExtraArg);
776
0
            }
777
0
        }
778
779
0
        int nLimitBlockY = 0;
780
0
        const bool bByteCopy = eDataType == eBufType && nBandDataSize == 1;
781
0
        int nStartBlockX = -nBlockXSize;
782
0
        const double EPS = 1e-10;
783
0
        int nLBlockY = -1;
784
0
        const double dfSrcXStart = 0.5 * dfSrcXInc + dfXOff + EPS;
785
0
        const bool bIntegerXFactor =
786
0
            bUseIntegerRequestCoords &&
787
0
            static_cast<int>(dfSrcXInc) == dfSrcXInc &&
788
0
            static_cast<int>(dfSrcXInc) < INT_MAX / nBandDataSize;
789
790
        /* --------------------------------------------------------------------
791
         */
792
        /*      Read case */
793
        /*      Loop over buffer computing source locations. */
794
        /* --------------------------------------------------------------------
795
         */
796
0
        for (int iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff++)
797
0
        {
798
            // Add small epsilon to avoid some numeric precision issues.
799
0
            const double dfSrcY = (iBufYOff + 0.5) * dfSrcYInc + dfYOff + EPS;
800
0
            const int iSrcY = static_cast<int>(std::min(
801
0
                std::max(0.0, dfSrcY), static_cast<double>(nRasterYSize - 1)));
802
803
0
            GPtrDiff_t iBufOffset = static_cast<GPtrDiff_t>(iBufYOff) *
804
0
                                    static_cast<GPtrDiff_t>(nLineSpace);
805
806
0
            if (iSrcY >= nLimitBlockY)
807
0
            {
808
0
                nLBlockY = iSrcY / nBlockYSize;
809
0
                nLimitBlockY = nLBlockY * nBlockYSize;
810
0
                if (nLimitBlockY < INT_MAX - nBlockYSize)
811
0
                    nLimitBlockY += nBlockYSize;
812
0
                else
813
0
                    nLimitBlockY = INT_MAX;
814
                // Make sure a new block is loaded.
815
0
                nStartBlockX = -nBlockXSize;
816
0
            }
817
0
            else if (static_cast<int>(dfSrcXStart) < nStartBlockX)
818
0
            {
819
                // Make sure a new block is loaded.
820
0
                nStartBlockX = -nBlockXSize;
821
0
            }
822
823
0
            GPtrDiff_t iSrcOffsetCst = (iSrcY - nLBlockY * nBlockYSize) *
824
0
                                       static_cast<GPtrDiff_t>(nBlockXSize);
825
826
0
            if (bIntegerXFactor)
827
0
            {
828
0
                int iSrcX = static_cast<int>(dfSrcXStart);
829
0
                const int nSrcXInc = static_cast<int>(dfSrcXInc);
830
0
                GByte *pabyDstData = static_cast<GByte *>(pData) + iBufOffset;
831
0
                bool bRet = false;
832
0
                if (bByteCopy)
833
0
                {
834
0
                    bRet = DownsamplingIntegerXFactor<true, 1>(
835
0
                        this, iSrcX, nSrcXInc, iSrcOffsetCst, pabyDstData,
836
0
                        static_cast<int>(nPixelSpace), nBufXSize, GDT_Byte,
837
0
                        GDT_Byte, nStartBlockX, nBlockXSize, poBlock, nLBlockY);
838
0
                }
839
0
                else if (eDataType == eBufType)
840
0
                {
841
0
                    switch (nBandDataSize)
842
0
                    {
843
0
                        case 2:
844
0
                            bRet = DownsamplingIntegerXFactor<true, 2>(
845
0
                                this, iSrcX, nSrcXInc, iSrcOffsetCst,
846
0
                                pabyDstData, static_cast<int>(nPixelSpace),
847
0
                                nBufXSize, eDataType, eDataType, nStartBlockX,
848
0
                                nBlockXSize, poBlock, nLBlockY);
849
0
                            break;
850
0
                        case 4:
851
0
                            bRet = DownsamplingIntegerXFactor<true, 4>(
852
0
                                this, iSrcX, nSrcXInc, iSrcOffsetCst,
853
0
                                pabyDstData, static_cast<int>(nPixelSpace),
854
0
                                nBufXSize, eDataType, eDataType, nStartBlockX,
855
0
                                nBlockXSize, poBlock, nLBlockY);
856
0
                            break;
857
0
                        case 8:
858
0
                            bRet = DownsamplingIntegerXFactor<true, 8>(
859
0
                                this, iSrcX, nSrcXInc, iSrcOffsetCst,
860
0
                                pabyDstData, static_cast<int>(nPixelSpace),
861
0
                                nBufXSize, eDataType, eDataType, nStartBlockX,
862
0
                                nBlockXSize, poBlock, nLBlockY);
863
0
                            break;
864
0
                        case 16:
865
0
                            bRet = DownsamplingIntegerXFactor<true, 16>(
866
0
                                this, iSrcX, nSrcXInc, iSrcOffsetCst,
867
0
                                pabyDstData, static_cast<int>(nPixelSpace),
868
0
                                nBufXSize, eDataType, eDataType, nStartBlockX,
869
0
                                nBlockXSize, poBlock, nLBlockY);
870
0
                            break;
871
0
                        default:
872
0
                            CPLAssert(false);
873
0
                            break;
874
0
                    }
875
0
                }
876
0
                else
877
0
                {
878
0
                    bRet = DownsamplingIntegerXFactor<false, 0>(
879
0
                        this, iSrcX, nSrcXInc, iSrcOffsetCst, pabyDstData,
880
0
                        static_cast<int>(nPixelSpace), nBufXSize, eDataType,
881
0
                        eBufType, nStartBlockX, nBlockXSize, poBlock, nLBlockY);
882
0
                }
883
0
                if (!bRet)
884
0
                    eErr = CE_Failure;
885
0
            }
886
0
            else
887
0
            {
888
0
                double dfSrcX = dfSrcXStart;
889
0
                for (int iBufXOff = 0; iBufXOff < nBufXSize;
890
0
                     iBufXOff++, dfSrcX += dfSrcXInc)
891
0
                {
892
                    // TODO?: try to avoid the clamping for most iterations
893
0
                    const int iSrcX = static_cast<int>(
894
0
                        std::min(std::max(0.0, dfSrcX),
895
0
                                 static_cast<double>(nRasterXSize - 1)));
896
897
                    /* --------------------------------------------------------------------
898
                     */
899
                    /*      Ensure we have the appropriate block loaded. */
900
                    /* --------------------------------------------------------------------
901
                     */
902
0
                    if (iSrcX >= nBlockXSize + nStartBlockX)
903
0
                    {
904
0
                        const int nLBlockX = iSrcX / nBlockXSize;
905
0
                        nStartBlockX = nLBlockX * nBlockXSize;
906
907
0
                        if (poBlock != nullptr)
908
0
                            poBlock->DropLock();
909
910
0
                        poBlock = GetLockedBlockRef(nLBlockX, nLBlockY, FALSE);
911
0
                        if (poBlock == nullptr)
912
0
                        {
913
0
                            eErr = CE_Failure;
914
0
                            break;
915
0
                        }
916
917
0
                        pabySrcBlock =
918
0
                            static_cast<GByte *>(poBlock->GetDataRef());
919
0
                    }
920
0
                    const GPtrDiff_t nDiffX =
921
0
                        static_cast<GPtrDiff_t>(iSrcX - nStartBlockX);
922
923
                    /* --------------------------------------------------------------------
924
                     */
925
                    /*      Copy over this pixel of data. */
926
                    /* --------------------------------------------------------------------
927
                     */
928
929
0
                    if (bByteCopy)
930
0
                    {
931
0
                        GPtrDiff_t iSrcOffset = nDiffX + iSrcOffsetCst;
932
0
                        static_cast<GByte *>(pData)[iBufOffset] =
933
0
                            pabySrcBlock[iSrcOffset];
934
0
                    }
935
0
                    else if (eDataType == eBufType)
936
0
                    {
937
0
                        GPtrDiff_t iSrcOffset =
938
0
                            (nDiffX + iSrcOffsetCst) * nBandDataSize;
939
0
                        memcpy(static_cast<GByte *>(pData) + iBufOffset,
940
0
                               pabySrcBlock + iSrcOffset, nBandDataSize);
941
0
                    }
942
0
                    else
943
0
                    {
944
                        // Type to type conversion ...
945
0
                        GPtrDiff_t iSrcOffset =
946
0
                            (nDiffX + iSrcOffsetCst) * nBandDataSize;
947
0
                        GDALCopyWords64(pabySrcBlock + iSrcOffset, eDataType, 0,
948
0
                                        static_cast<GByte *>(pData) +
949
0
                                            iBufOffset,
950
0
                                        eBufType, 0, 1);
951
0
                    }
952
953
0
                    iBufOffset += static_cast<int>(nPixelSpace);
954
0
                }
955
0
            }
956
0
            if (eErr == CE_Failure)
957
0
                break;
958
959
0
            if (psExtraArg->pfnProgress != nullptr &&
960
0
                !psExtraArg->pfnProgress(1.0 * (iBufYOff + 1) / nBufYSize, "",
961
0
                                         psExtraArg->pProgressData))
962
0
            {
963
0
                eErr = CE_Failure;
964
0
                break;
965
0
            }
966
0
        }
967
0
    }
968
969
0
    if (poBlock != nullptr)
970
0
        poBlock->DropLock();
971
972
0
    return eErr;
973
0
}
974
975
/************************************************************************/
976
/*                         GDALRasterIOTransformer()                    */
977
/************************************************************************/
978
979
struct GDALRasterIOTransformerStruct
980
{
981
    double dfXOff;
982
    double dfYOff;
983
    double dfXRatioDstToSrc;
984
    double dfYRatioDstToSrc;
985
};
986
987
static int GDALRasterIOTransformer(void *pTransformerArg, int bDstToSrc,
988
                                   int nPointCount, double *x, double *y,
989
                                   double * /* z */, int *panSuccess)
990
0
{
991
0
    GDALRasterIOTransformerStruct *psParams =
992
0
        static_cast<GDALRasterIOTransformerStruct *>(pTransformerArg);
993
0
    if (bDstToSrc)
994
0
    {
995
0
        for (int i = 0; i < nPointCount; i++)
996
0
        {
997
0
            x[i] = x[i] * psParams->dfXRatioDstToSrc + psParams->dfXOff;
998
0
            y[i] = y[i] * psParams->dfYRatioDstToSrc + psParams->dfYOff;
999
0
            panSuccess[i] = TRUE;
1000
0
        }
1001
0
    }
1002
0
    else
1003
0
    {
1004
0
        for (int i = 0; i < nPointCount; i++)
1005
0
        {
1006
0
            x[i] = (x[i] - psParams->dfXOff) / psParams->dfXRatioDstToSrc;
1007
0
            y[i] = (y[i] - psParams->dfYOff) / psParams->dfYRatioDstToSrc;
1008
0
            panSuccess[i] = TRUE;
1009
0
        }
1010
0
    }
1011
0
    return TRUE;
1012
0
}
1013
1014
/************************************************************************/
1015
/*                          RasterIOResampled()                         */
1016
/************************************************************************/
1017
1018
//! @cond Doxygen_Suppress
1019
CPLErr GDALRasterBand::RasterIOResampled(
1020
    GDALRWFlag /* eRWFlag */, int nXOff, int nYOff, int nXSize, int nYSize,
1021
    void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
1022
    GSpacing nPixelSpace, GSpacing nLineSpace, GDALRasterIOExtraArg *psExtraArg)
1023
0
{
1024
    // Determine if we use warping resampling or overview resampling
1025
0
    const bool bUseWarp =
1026
0
        (GDALDataTypeIsComplex(eDataType) &&
1027
0
         psExtraArg->eResampleAlg != GRIORA_NearestNeighbour &&
1028
0
         psExtraArg->eResampleAlg != GRIORA_Mode);
1029
1030
0
    double dfXOff = nXOff;
1031
0
    double dfYOff = nYOff;
1032
0
    double dfXSize = nXSize;
1033
0
    double dfYSize = nYSize;
1034
0
    if (psExtraArg->bFloatingPointWindowValidity)
1035
0
    {
1036
0
        dfXOff = psExtraArg->dfXOff;
1037
0
        dfYOff = psExtraArg->dfYOff;
1038
0
        dfXSize = psExtraArg->dfXSize;
1039
0
        dfYSize = psExtraArg->dfYSize;
1040
0
    }
1041
1042
0
    const double dfXRatioDstToSrc = dfXSize / nBufXSize;
1043
0
    const double dfYRatioDstToSrc = dfYSize / nBufYSize;
1044
1045
    // Determine the coordinates in the "virtual" output raster to see
1046
    // if there are not integers, in which case we will use them as a shift
1047
    // so that subwindow extracts give the exact same results as entire raster
1048
    // scaling.
1049
0
    double dfDestXOff = dfXOff / dfXRatioDstToSrc;
1050
0
    bool bHasXOffVirtual = false;
1051
0
    int nDestXOffVirtual = 0;
1052
0
    if (fabs(dfDestXOff - static_cast<int>(dfDestXOff + 0.5)) < 1e-8)
1053
0
    {
1054
0
        bHasXOffVirtual = true;
1055
0
        dfXOff = nXOff;
1056
0
        nDestXOffVirtual = static_cast<int>(dfDestXOff + 0.5);
1057
0
    }
1058
1059
0
    double dfDestYOff = dfYOff / dfYRatioDstToSrc;
1060
0
    bool bHasYOffVirtual = false;
1061
0
    int nDestYOffVirtual = 0;
1062
0
    if (fabs(dfDestYOff - static_cast<int>(dfDestYOff + 0.5)) < 1e-8)
1063
0
    {
1064
0
        bHasYOffVirtual = true;
1065
0
        dfYOff = nYOff;
1066
0
        nDestYOffVirtual = static_cast<int>(dfDestYOff + 0.5);
1067
0
    }
1068
1069
    // Create a MEM dataset that wraps the output buffer.
1070
0
    GDALDataset *poMEMDS;
1071
0
    void *pTempBuffer = nullptr;
1072
0
    GSpacing nPSMem = nPixelSpace;
1073
0
    GSpacing nLSMem = nLineSpace;
1074
0
    void *pDataMem = pData;
1075
0
    GDALDataType eDTMem = eBufType;
1076
0
    if (eBufType != eDataType)
1077
0
    {
1078
0
        nPSMem = GDALGetDataTypeSizeBytes(eDataType);
1079
0
        nLSMem = nPSMem * nBufXSize;
1080
0
        pTempBuffer =
1081
0
            VSI_MALLOC2_VERBOSE(nBufYSize, static_cast<size_t>(nLSMem));
1082
0
        if (pTempBuffer == nullptr)
1083
0
            return CE_Failure;
1084
0
        pDataMem = pTempBuffer;
1085
0
        eDTMem = eDataType;
1086
0
    }
1087
1088
0
    poMEMDS =
1089
0
        MEMDataset::Create("", nDestXOffVirtual + nBufXSize,
1090
0
                           nDestYOffVirtual + nBufYSize, 0, eDTMem, nullptr);
1091
0
    GByte *pabyData = static_cast<GByte *>(pDataMem) -
1092
0
                      nPSMem * nDestXOffVirtual - nLSMem * nDestYOffVirtual;
1093
0
    GDALRasterBandH hMEMBand = MEMCreateRasterBandEx(
1094
0
        poMEMDS, 1, pabyData, eDTMem, nPSMem, nLSMem, false);
1095
0
    poMEMDS->SetBand(1, GDALRasterBand::FromHandle(hMEMBand));
1096
1097
0
    const char *pszNBITS = GetMetadataItem("NBITS", "IMAGE_STRUCTURE");
1098
0
    const int nNBITS = pszNBITS ? atoi(pszNBITS) : 0;
1099
0
    if (pszNBITS)
1100
0
        GDALRasterBand::FromHandle(hMEMBand)->SetMetadataItem(
1101
0
            "NBITS", pszNBITS, "IMAGE_STRUCTURE");
1102
1103
0
    CPLErr eErr = CE_None;
1104
1105
    // Do the resampling.
1106
0
    if (bUseWarp)
1107
0
    {
1108
0
        int bHasNoData = FALSE;
1109
0
        double dfNoDataValue = GetNoDataValue(&bHasNoData);
1110
1111
0
        VRTDatasetH hVRTDS = nullptr;
1112
0
        GDALRasterBandH hVRTBand = nullptr;
1113
0
        if (GetDataset() == nullptr)
1114
0
        {
1115
            /* Create VRT dataset that wraps the whole dataset */
1116
0
            hVRTDS = VRTCreate(nRasterXSize, nRasterYSize);
1117
0
            VRTAddBand(hVRTDS, eDataType, nullptr);
1118
0
            hVRTBand = GDALGetRasterBand(hVRTDS, 1);
1119
0
            VRTAddSimpleSource(hVRTBand, this, 0, 0, nRasterXSize, nRasterYSize,
1120
0
                               0, 0, nRasterXSize, nRasterYSize, nullptr,
1121
0
                               VRT_NODATA_UNSET);
1122
1123
            /* Add a mask band if needed */
1124
0
            if (GetMaskFlags() != GMF_ALL_VALID)
1125
0
            {
1126
0
                GDALDataset::FromHandle(hVRTDS)->CreateMaskBand(0);
1127
0
                VRTSourcedRasterBand *poVRTMaskBand =
1128
0
                    reinterpret_cast<VRTSourcedRasterBand *>(
1129
0
                        reinterpret_cast<GDALRasterBand *>(hVRTBand)
1130
0
                            ->GetMaskBand());
1131
0
                poVRTMaskBand->AddMaskBandSource(this, 0, 0, nRasterXSize,
1132
0
                                                 nRasterYSize, 0, 0,
1133
0
                                                 nRasterXSize, nRasterYSize);
1134
0
            }
1135
0
        }
1136
1137
0
        GDALWarpOptions *psWarpOptions = GDALCreateWarpOptions();
1138
0
        switch (psExtraArg->eResampleAlg)
1139
0
        {
1140
0
            case GRIORA_NearestNeighbour:
1141
0
                psWarpOptions->eResampleAlg = GRA_NearestNeighbour;
1142
0
                break;
1143
0
            case GRIORA_Bilinear:
1144
0
                psWarpOptions->eResampleAlg = GRA_Bilinear;
1145
0
                break;
1146
0
            case GRIORA_Cubic:
1147
0
                psWarpOptions->eResampleAlg = GRA_Cubic;
1148
0
                break;
1149
0
            case GRIORA_CubicSpline:
1150
0
                psWarpOptions->eResampleAlg = GRA_CubicSpline;
1151
0
                break;
1152
0
            case GRIORA_Lanczos:
1153
0
                psWarpOptions->eResampleAlg = GRA_Lanczos;
1154
0
                break;
1155
0
            case GRIORA_Average:
1156
0
                psWarpOptions->eResampleAlg = GRA_Average;
1157
0
                break;
1158
0
            case GRIORA_RMS:
1159
0
                psWarpOptions->eResampleAlg = GRA_RMS;
1160
0
                break;
1161
0
            case GRIORA_Mode:
1162
0
                psWarpOptions->eResampleAlg = GRA_Mode;
1163
0
                break;
1164
0
            default:
1165
0
                CPLAssert(false);
1166
0
                psWarpOptions->eResampleAlg = GRA_NearestNeighbour;
1167
0
                break;
1168
0
        }
1169
0
        psWarpOptions->hSrcDS = hVRTDS ? hVRTDS : GetDataset();
1170
0
        psWarpOptions->hDstDS = poMEMDS;
1171
0
        psWarpOptions->nBandCount = 1;
1172
0
        int nSrcBandNumber = hVRTDS ? 1 : nBand;
1173
0
        int nDstBandNumber = 1;
1174
0
        psWarpOptions->panSrcBands = &nSrcBandNumber;
1175
0
        psWarpOptions->panDstBands = &nDstBandNumber;
1176
0
        psWarpOptions->pfnProgress = psExtraArg->pfnProgress
1177
0
                                         ? psExtraArg->pfnProgress
1178
0
                                         : GDALDummyProgress;
1179
0
        psWarpOptions->pProgressArg = psExtraArg->pProgressData;
1180
0
        psWarpOptions->pfnTransformer = GDALRasterIOTransformer;
1181
0
        if (bHasNoData)
1182
0
        {
1183
0
            psWarpOptions->papszWarpOptions = CSLSetNameValue(
1184
0
                psWarpOptions->papszWarpOptions, "INIT_DEST", "NO_DATA");
1185
0
            if (psWarpOptions->padfSrcNoDataReal == nullptr)
1186
0
            {
1187
0
                psWarpOptions->padfSrcNoDataReal =
1188
0
                    static_cast<double *>(CPLMalloc(sizeof(double)));
1189
0
                psWarpOptions->padfSrcNoDataReal[0] = dfNoDataValue;
1190
0
            }
1191
1192
0
            if (psWarpOptions->padfDstNoDataReal == nullptr)
1193
0
            {
1194
0
                psWarpOptions->padfDstNoDataReal =
1195
0
                    static_cast<double *>(CPLMalloc(sizeof(double)));
1196
0
                psWarpOptions->padfDstNoDataReal[0] = dfNoDataValue;
1197
0
            }
1198
0
        }
1199
1200
0
        GDALRasterIOTransformerStruct sTransformer;
1201
0
        sTransformer.dfXOff = bHasXOffVirtual ? 0 : dfXOff;
1202
0
        sTransformer.dfYOff = bHasYOffVirtual ? 0 : dfYOff;
1203
0
        sTransformer.dfXRatioDstToSrc = dfXRatioDstToSrc;
1204
0
        sTransformer.dfYRatioDstToSrc = dfYRatioDstToSrc;
1205
0
        psWarpOptions->pTransformerArg = &sTransformer;
1206
1207
0
        GDALWarpOperationH hWarpOperation =
1208
0
            GDALCreateWarpOperation(psWarpOptions);
1209
0
        eErr = GDALChunkAndWarpImage(hWarpOperation, nDestXOffVirtual,
1210
0
                                     nDestYOffVirtual, nBufXSize, nBufYSize);
1211
0
        GDALDestroyWarpOperation(hWarpOperation);
1212
1213
0
        psWarpOptions->panSrcBands = nullptr;
1214
0
        psWarpOptions->panDstBands = nullptr;
1215
0
        GDALDestroyWarpOptions(psWarpOptions);
1216
1217
0
        if (hVRTDS)
1218
0
            GDALClose(hVRTDS);
1219
0
    }
1220
0
    else
1221
0
    {
1222
0
        const char *pszResampling =
1223
0
            (psExtraArg->eResampleAlg == GRIORA_Bilinear)      ? "BILINEAR"
1224
0
            : (psExtraArg->eResampleAlg == GRIORA_Cubic)       ? "CUBIC"
1225
0
            : (psExtraArg->eResampleAlg == GRIORA_CubicSpline) ? "CUBICSPLINE"
1226
0
            : (psExtraArg->eResampleAlg == GRIORA_Lanczos)     ? "LANCZOS"
1227
0
            : (psExtraArg->eResampleAlg == GRIORA_Average)     ? "AVERAGE"
1228
0
            : (psExtraArg->eResampleAlg == GRIORA_RMS)         ? "RMS"
1229
0
            : (psExtraArg->eResampleAlg == GRIORA_Mode)        ? "MODE"
1230
0
            : (psExtraArg->eResampleAlg == GRIORA_Gauss)       ? "GAUSS"
1231
0
                                                               : "UNKNOWN";
1232
1233
0
        int nKernelRadius = 0;
1234
0
        GDALResampleFunction pfnResampleFunc =
1235
0
            GDALGetResampleFunction(pszResampling, &nKernelRadius);
1236
0
        CPLAssert(pfnResampleFunc);
1237
0
        GDALDataType eWrkDataType =
1238
0
            GDALGetOvrWorkDataType(pszResampling, eDataType);
1239
0
        int nHasNoData = 0;
1240
0
        double dfNoDataValue = GetNoDataValue(&nHasNoData);
1241
0
        const bool bHasNoData = CPL_TO_BOOL(nHasNoData);
1242
0
        if (!bHasNoData)
1243
0
            dfNoDataValue = 0.0;
1244
1245
0
        int nDstBlockXSize = nBufXSize;
1246
0
        int nDstBlockYSize = nBufYSize;
1247
0
        int nFullResXChunk = 0;
1248
0
        int nFullResYChunk = 0;
1249
0
        while (true)
1250
0
        {
1251
0
            nFullResXChunk =
1252
0
                3 + static_cast<int>(nDstBlockXSize * dfXRatioDstToSrc);
1253
0
            nFullResYChunk =
1254
0
                3 + static_cast<int>(nDstBlockYSize * dfYRatioDstToSrc);
1255
0
            if (nFullResXChunk > nRasterXSize)
1256
0
                nFullResXChunk = nRasterXSize;
1257
0
            if (nFullResYChunk > nRasterYSize)
1258
0
                nFullResYChunk = nRasterYSize;
1259
0
            if ((nDstBlockXSize == 1 && nDstBlockYSize == 1) ||
1260
0
                (static_cast<GIntBig>(nFullResXChunk) * nFullResYChunk <=
1261
0
                 1024 * 1024))
1262
0
                break;
1263
            // When operating on the full width of a raster whose block width is
1264
            // the raster width, prefer doing chunks in height.
1265
0
            if (nFullResXChunk >= nXSize && nXSize == nBlockXSize &&
1266
0
                nDstBlockYSize > 1)
1267
0
                nDstBlockYSize /= 2;
1268
            /* Otherwise cut the maximal dimension */
1269
0
            else if (nDstBlockXSize > 1 &&
1270
0
                     (nFullResXChunk > nFullResYChunk || nDstBlockYSize == 1))
1271
0
                nDstBlockXSize /= 2;
1272
0
            else
1273
0
                nDstBlockYSize /= 2;
1274
0
        }
1275
1276
0
        int nOvrXFactor = static_cast<int>(0.5 + dfXRatioDstToSrc);
1277
0
        int nOvrYFactor = static_cast<int>(0.5 + dfYRatioDstToSrc);
1278
0
        if (nOvrXFactor == 0)
1279
0
            nOvrXFactor = 1;
1280
0
        if (nOvrYFactor == 0)
1281
0
            nOvrYFactor = 1;
1282
0
        int nFullResXSizeQueried =
1283
0
            nFullResXChunk + 2 * nKernelRadius * nOvrXFactor;
1284
0
        int nFullResYSizeQueried =
1285
0
            nFullResYChunk + 2 * nKernelRadius * nOvrYFactor;
1286
1287
0
        if (nFullResXSizeQueried > nRasterXSize)
1288
0
            nFullResXSizeQueried = nRasterXSize;
1289
0
        if (nFullResYSizeQueried > nRasterYSize)
1290
0
            nFullResYSizeQueried = nRasterYSize;
1291
1292
0
        void *pChunk =
1293
0
            VSI_MALLOC3_VERBOSE(GDALGetDataTypeSizeBytes(eWrkDataType),
1294
0
                                nFullResXSizeQueried, nFullResYSizeQueried);
1295
0
        GByte *pabyChunkNoDataMask = nullptr;
1296
1297
0
        GDALRasterBand *poMaskBand = GetMaskBand();
1298
0
        int l_nMaskFlags = GetMaskFlags();
1299
1300
0
        bool bUseNoDataMask = ((l_nMaskFlags & GMF_ALL_VALID) == 0);
1301
0
        if (bUseNoDataMask)
1302
0
        {
1303
0
            pabyChunkNoDataMask = static_cast<GByte *>(VSI_MALLOC2_VERBOSE(
1304
0
                nFullResXSizeQueried, nFullResYSizeQueried));
1305
0
        }
1306
0
        if (pChunk == nullptr ||
1307
0
            (bUseNoDataMask && pabyChunkNoDataMask == nullptr))
1308
0
        {
1309
0
            GDALClose(poMEMDS);
1310
0
            CPLFree(pChunk);
1311
0
            CPLFree(pabyChunkNoDataMask);
1312
0
            VSIFree(pTempBuffer);
1313
0
            return CE_Failure;
1314
0
        }
1315
1316
0
        const int nTotalBlocks = DIV_ROUND_UP(nBufXSize, nDstBlockXSize) *
1317
0
                                 DIV_ROUND_UP(nBufYSize, nDstBlockYSize);
1318
0
        int nBlocksDone = 0;
1319
1320
0
        int nDstYOff;
1321
0
        for (nDstYOff = 0; nDstYOff < nBufYSize && eErr == CE_None;
1322
0
             nDstYOff += nDstBlockYSize)
1323
0
        {
1324
0
            int nDstYCount;
1325
0
            if (nDstYOff + nDstBlockYSize <= nBufYSize)
1326
0
                nDstYCount = nDstBlockYSize;
1327
0
            else
1328
0
                nDstYCount = nBufYSize - nDstYOff;
1329
1330
0
            int nChunkYOff =
1331
0
                nYOff + static_cast<int>(nDstYOff * dfYRatioDstToSrc);
1332
0
            int nChunkYOff2 = nYOff + 1 +
1333
0
                              static_cast<int>(ceil((nDstYOff + nDstYCount) *
1334
0
                                                    dfYRatioDstToSrc));
1335
0
            if (nChunkYOff2 > nRasterYSize)
1336
0
                nChunkYOff2 = nRasterYSize;
1337
0
            int nYCount = nChunkYOff2 - nChunkYOff;
1338
0
            CPLAssert(nYCount <= nFullResYChunk);
1339
1340
0
            int nChunkYOffQueried = nChunkYOff - nKernelRadius * nOvrYFactor;
1341
0
            int nChunkYSizeQueried = nYCount + 2 * nKernelRadius * nOvrYFactor;
1342
0
            if (nChunkYOffQueried < 0)
1343
0
            {
1344
0
                nChunkYSizeQueried += nChunkYOffQueried;
1345
0
                nChunkYOffQueried = 0;
1346
0
            }
1347
0
            if (nChunkYSizeQueried + nChunkYOffQueried > nRasterYSize)
1348
0
                nChunkYSizeQueried = nRasterYSize - nChunkYOffQueried;
1349
0
            CPLAssert(nChunkYSizeQueried <= nFullResYSizeQueried);
1350
1351
0
            int nDstXOff = 0;
1352
0
            for (nDstXOff = 0; nDstXOff < nBufXSize && eErr == CE_None;
1353
0
                 nDstXOff += nDstBlockXSize)
1354
0
            {
1355
0
                int nDstXCount = 0;
1356
0
                if (nDstXOff + nDstBlockXSize <= nBufXSize)
1357
0
                    nDstXCount = nDstBlockXSize;
1358
0
                else
1359
0
                    nDstXCount = nBufXSize - nDstXOff;
1360
1361
0
                int nChunkXOff =
1362
0
                    nXOff + static_cast<int>(nDstXOff * dfXRatioDstToSrc);
1363
0
                int nChunkXOff2 =
1364
0
                    nXOff + 1 +
1365
0
                    static_cast<int>(
1366
0
                        ceil((nDstXOff + nDstXCount) * dfXRatioDstToSrc));
1367
0
                if (nChunkXOff2 > nRasterXSize)
1368
0
                    nChunkXOff2 = nRasterXSize;
1369
0
                int nXCount = nChunkXOff2 - nChunkXOff;
1370
0
                CPLAssert(nXCount <= nFullResXChunk);
1371
1372
0
                int nChunkXOffQueried =
1373
0
                    nChunkXOff - nKernelRadius * nOvrXFactor;
1374
0
                int nChunkXSizeQueried =
1375
0
                    nXCount + 2 * nKernelRadius * nOvrXFactor;
1376
0
                if (nChunkXOffQueried < 0)
1377
0
                {
1378
0
                    nChunkXSizeQueried += nChunkXOffQueried;
1379
0
                    nChunkXOffQueried = 0;
1380
0
                }
1381
0
                if (nChunkXSizeQueried + nChunkXOffQueried > nRasterXSize)
1382
0
                    nChunkXSizeQueried = nRasterXSize - nChunkXOffQueried;
1383
0
                CPLAssert(nChunkXSizeQueried <= nFullResXSizeQueried);
1384
1385
                // Read the source buffers.
1386
0
                eErr = RasterIO(GF_Read, nChunkXOffQueried, nChunkYOffQueried,
1387
0
                                nChunkXSizeQueried, nChunkYSizeQueried, pChunk,
1388
0
                                nChunkXSizeQueried, nChunkYSizeQueried,
1389
0
                                eWrkDataType, 0, 0, nullptr);
1390
1391
0
                bool bSkipResample = false;
1392
0
                bool bNoDataMaskFullyOpaque = false;
1393
0
                if (eErr == CE_None && bUseNoDataMask)
1394
0
                {
1395
0
                    eErr = poMaskBand->RasterIO(
1396
0
                        GF_Read, nChunkXOffQueried, nChunkYOffQueried,
1397
0
                        nChunkXSizeQueried, nChunkYSizeQueried,
1398
0
                        pabyChunkNoDataMask, nChunkXSizeQueried,
1399
0
                        nChunkYSizeQueried, GDT_Byte, 0, 0, nullptr);
1400
1401
                    /* Optimizations if mask if fully opaque or transparent */
1402
0
                    int nPixels = nChunkXSizeQueried * nChunkYSizeQueried;
1403
0
                    GByte bVal = pabyChunkNoDataMask[0];
1404
0
                    int i = 1;
1405
0
                    for (; i < nPixels; i++)
1406
0
                    {
1407
0
                        if (pabyChunkNoDataMask[i] != bVal)
1408
0
                            break;
1409
0
                    }
1410
0
                    if (i == nPixels)
1411
0
                    {
1412
0
                        if (bVal == 0)
1413
0
                        {
1414
0
                            for (int j = 0; j < nDstYCount; j++)
1415
0
                            {
1416
0
                                GDALCopyWords64(&dfNoDataValue, GDT_Float64, 0,
1417
0
                                                static_cast<GByte *>(pDataMem) +
1418
0
                                                    nLSMem * (j + nDstYOff) +
1419
0
                                                    nDstXOff * nPSMem,
1420
0
                                                eDTMem,
1421
0
                                                static_cast<int>(nPSMem),
1422
0
                                                nDstXCount);
1423
0
                            }
1424
0
                            bSkipResample = true;
1425
0
                        }
1426
0
                        else
1427
0
                        {
1428
0
                            bNoDataMaskFullyOpaque = true;
1429
0
                        }
1430
0
                    }
1431
0
                }
1432
1433
0
                if (!bSkipResample && eErr == CE_None)
1434
0
                {
1435
0
                    const bool bPropagateNoData = false;
1436
0
                    void *pDstBuffer = nullptr;
1437
0
                    GDALDataType eDstBufferDataType = GDT_Unknown;
1438
0
                    GDALRasterBand *poMEMBand =
1439
0
                        GDALRasterBand::FromHandle(hMEMBand);
1440
0
                    GDALOverviewResampleArgs args;
1441
0
                    args.eSrcDataType = eDataType;
1442
0
                    args.eOvrDataType = poMEMBand->GetRasterDataType();
1443
0
                    args.nOvrXSize = poMEMBand->GetXSize();
1444
0
                    args.nOvrYSize = poMEMBand->GetYSize();
1445
0
                    args.nOvrNBITS = nNBITS;
1446
0
                    args.dfXRatioDstToSrc = dfXRatioDstToSrc;
1447
0
                    args.dfYRatioDstToSrc = dfYRatioDstToSrc;
1448
0
                    args.dfSrcXDelta =
1449
0
                        dfXOff - nXOff; /* == 0 if bHasXOffVirtual */
1450
0
                    args.dfSrcYDelta =
1451
0
                        dfYOff - nYOff; /* == 0 if bHasYOffVirtual */
1452
0
                    args.eWrkDataType = eWrkDataType;
1453
0
                    args.pabyChunkNodataMask =
1454
0
                        bNoDataMaskFullyOpaque ? nullptr : pabyChunkNoDataMask;
1455
0
                    args.nChunkXOff =
1456
0
                        nChunkXOffQueried - (bHasXOffVirtual ? 0 : nXOff);
1457
0
                    args.nChunkXSize = nChunkXSizeQueried;
1458
0
                    args.nChunkYOff =
1459
0
                        nChunkYOffQueried - (bHasYOffVirtual ? 0 : nYOff);
1460
0
                    args.nChunkYSize = nChunkYSizeQueried;
1461
0
                    args.nDstXOff = nDstXOff + nDestXOffVirtual;
1462
0
                    args.nDstXOff2 = nDstXOff + nDestXOffVirtual + nDstXCount;
1463
0
                    args.nDstYOff = nDstYOff + nDestYOffVirtual;
1464
0
                    args.nDstYOff2 = nDstYOff + nDestYOffVirtual + nDstYCount;
1465
0
                    args.pszResampling = pszResampling;
1466
0
                    args.bHasNoData = bHasNoData;
1467
0
                    args.dfNoDataValue = dfNoDataValue;
1468
0
                    args.poColorTable = GetColorTable();
1469
0
                    args.bPropagateNoData = bPropagateNoData;
1470
0
                    eErr = pfnResampleFunc(args, pChunk, &pDstBuffer,
1471
0
                                           &eDstBufferDataType);
1472
0
                    if (eErr == CE_None)
1473
0
                    {
1474
0
                        eErr = poMEMBand->RasterIO(
1475
0
                            GF_Write, nDstXOff + nDestXOffVirtual,
1476
0
                            nDstYOff + nDestYOffVirtual, nDstXCount, nDstYCount,
1477
0
                            pDstBuffer, nDstXCount, nDstYCount,
1478
0
                            eDstBufferDataType, 0, 0, nullptr);
1479
0
                    }
1480
0
                    CPLFree(pDstBuffer);
1481
0
                }
1482
1483
0
                nBlocksDone++;
1484
0
                if (eErr == CE_None && psExtraArg->pfnProgress != nullptr &&
1485
0
                    !psExtraArg->pfnProgress(1.0 * nBlocksDone / nTotalBlocks,
1486
0
                                             "", psExtraArg->pProgressData))
1487
0
                {
1488
0
                    eErr = CE_Failure;
1489
0
                }
1490
0
            }
1491
0
        }
1492
1493
0
        CPLFree(pChunk);
1494
0
        CPLFree(pabyChunkNoDataMask);
1495
0
    }
1496
1497
0
    if (eBufType != eDataType)
1498
0
    {
1499
0
        CPL_IGNORE_RET_VAL(poMEMDS->GetRasterBand(1)->RasterIO(
1500
0
            GF_Read, nDestXOffVirtual, nDestYOffVirtual, nBufXSize, nBufYSize,
1501
0
            pData, nBufXSize, nBufYSize, eBufType, nPixelSpace, nLineSpace,
1502
0
            nullptr));
1503
0
    }
1504
0
    GDALClose(poMEMDS);
1505
0
    VSIFree(pTempBuffer);
1506
1507
0
    return eErr;
1508
0
}
1509
1510
/************************************************************************/
1511
/*                          RasterIOResampled()                         */
1512
/************************************************************************/
1513
1514
CPLErr GDALDataset::RasterIOResampled(
1515
    GDALRWFlag /* eRWFlag */, int nXOff, int nYOff, int nXSize, int nYSize,
1516
    void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
1517
    int nBandCount, const int *panBandMap, GSpacing nPixelSpace,
1518
    GSpacing nLineSpace, GSpacing nBandSpace, GDALRasterIOExtraArg *psExtraArg)
1519
1520
0
{
1521
#if 0
1522
    // Determine if we use warping resampling or overview resampling
1523
    bool bUseWarp = false;
1524
    if( GDALDataTypeIsComplex( eDataType ) )
1525
        bUseWarp = true;
1526
#endif
1527
1528
0
    double dfXOff = nXOff;
1529
0
    double dfYOff = nYOff;
1530
0
    double dfXSize = nXSize;
1531
0
    double dfYSize = nYSize;
1532
0
    if (psExtraArg->bFloatingPointWindowValidity)
1533
0
    {
1534
0
        dfXOff = psExtraArg->dfXOff;
1535
0
        dfYOff = psExtraArg->dfYOff;
1536
0
        dfXSize = psExtraArg->dfXSize;
1537
0
        dfYSize = psExtraArg->dfYSize;
1538
0
    }
1539
1540
0
    const double dfXRatioDstToSrc = dfXSize / nBufXSize;
1541
0
    const double dfYRatioDstToSrc = dfYSize / nBufYSize;
1542
1543
    // Determine the coordinates in the "virtual" output raster to see
1544
    // if there are not integers, in which case we will use them as a shift
1545
    // so that subwindow extracts give the exact same results as entire raster
1546
    // scaling.
1547
0
    double dfDestXOff = dfXOff / dfXRatioDstToSrc;
1548
0
    bool bHasXOffVirtual = false;
1549
0
    int nDestXOffVirtual = 0;
1550
0
    if (fabs(dfDestXOff - static_cast<int>(dfDestXOff + 0.5)) < 1e-8)
1551
0
    {
1552
0
        bHasXOffVirtual = true;
1553
0
        dfXOff = nXOff;
1554
0
        nDestXOffVirtual = static_cast<int>(dfDestXOff + 0.5);
1555
0
    }
1556
1557
0
    double dfDestYOff = dfYOff / dfYRatioDstToSrc;
1558
0
    bool bHasYOffVirtual = false;
1559
0
    int nDestYOffVirtual = 0;
1560
0
    if (fabs(dfDestYOff - static_cast<int>(dfDestYOff + 0.5)) < 1e-8)
1561
0
    {
1562
0
        bHasYOffVirtual = true;
1563
0
        dfYOff = nYOff;
1564
0
        nDestYOffVirtual = static_cast<int>(dfDestYOff + 0.5);
1565
0
    }
1566
1567
    // Create a MEM dataset that wraps the output buffer.
1568
0
    GDALDataset *poMEMDS =
1569
0
        MEMDataset::Create("", nDestXOffVirtual + nBufXSize,
1570
0
                           nDestYOffVirtual + nBufYSize, 0, eBufType, nullptr);
1571
0
    GDALRasterBand **papoDstBands = static_cast<GDALRasterBand **>(
1572
0
        CPLMalloc(nBandCount * sizeof(GDALRasterBand *)));
1573
0
    int nNBITS = 0;
1574
0
    for (int i = 0; i < nBandCount; i++)
1575
0
    {
1576
0
        char szBuffer[32] = {'\0'};
1577
0
        int nRet = CPLPrintPointer(
1578
0
            szBuffer,
1579
0
            static_cast<GByte *>(pData) - nPixelSpace * nDestXOffVirtual -
1580
0
                nLineSpace * nDestYOffVirtual + nBandSpace * i,
1581
0
            sizeof(szBuffer));
1582
0
        szBuffer[nRet] = 0;
1583
1584
0
        char szBuffer0[64] = {'\0'};
1585
0
        snprintf(szBuffer0, sizeof(szBuffer0), "DATAPOINTER=%s", szBuffer);
1586
1587
0
        char szBuffer1[64] = {'\0'};
1588
0
        snprintf(szBuffer1, sizeof(szBuffer1), "PIXELOFFSET=" CPL_FRMT_GIB,
1589
0
                 static_cast<GIntBig>(nPixelSpace));
1590
1591
0
        char szBuffer2[64] = {'\0'};
1592
0
        snprintf(szBuffer2, sizeof(szBuffer2), "LINEOFFSET=" CPL_FRMT_GIB,
1593
0
                 static_cast<GIntBig>(nLineSpace));
1594
1595
0
        char *apszOptions[4] = {szBuffer0, szBuffer1, szBuffer2, nullptr};
1596
1597
0
        poMEMDS->AddBand(eBufType, apszOptions);
1598
1599
0
        GDALRasterBand *poSrcBand = GetRasterBand(panBandMap[i]);
1600
0
        papoDstBands[i] = poMEMDS->GetRasterBand(i + 1);
1601
0
        const char *pszNBITS =
1602
0
            poSrcBand->GetMetadataItem("NBITS", "IMAGE_STRUCTURE");
1603
0
        if (pszNBITS)
1604
0
        {
1605
0
            nNBITS = atoi(pszNBITS);
1606
0
            poMEMDS->GetRasterBand(i + 1)->SetMetadataItem("NBITS", pszNBITS,
1607
0
                                                           "IMAGE_STRUCTURE");
1608
0
        }
1609
0
    }
1610
1611
0
    CPLErr eErr = CE_None;
1612
1613
    // TODO(schwehr): Why disabled?  Why not just delete?
1614
    // Looks like this code was initially added as disable by copying
1615
    // from RasterIO here:
1616
    // https://trac.osgeo.org/gdal/changeset/29572
1617
#if 0
1618
    // Do the resampling.
1619
    if( bUseWarp )
1620
    {
1621
        VRTDatasetH hVRTDS = nullptr;
1622
        GDALRasterBandH hVRTBand = nullptr;
1623
        if( GetDataset() == nullptr )
1624
        {
1625
            /* Create VRT dataset that wraps the whole dataset */
1626
            hVRTDS = VRTCreate(nRasterXSize, nRasterYSize);
1627
            VRTAddBand( hVRTDS, eDataType, nullptr );
1628
            hVRTBand = GDALGetRasterBand(hVRTDS, 1);
1629
            VRTAddSimpleSource( (VRTSourcedRasterBandH)hVRTBand,
1630
                                (GDALRasterBandH)this,
1631
                                0, 0,
1632
                                nRasterXSize, nRasterYSize,
1633
                                0, 0,
1634
                                nRasterXSize, nRasterYSize,
1635
                                nullptr, VRT_NODATA_UNSET );
1636
1637
            /* Add a mask band if needed */
1638
            if( GetMaskFlags() != GMF_ALL_VALID )
1639
            {
1640
                ((GDALDataset*)hVRTDS)->CreateMaskBand(0);
1641
                VRTSourcedRasterBand* poVRTMaskBand =
1642
                    (VRTSourcedRasterBand*)(((GDALRasterBand*)hVRTBand)->GetMaskBand());
1643
                poVRTMaskBand->
1644
                    AddMaskBandSource( this,
1645
                                    0, 0,
1646
                                    nRasterXSize, nRasterYSize,
1647
                                    0, 0,
1648
                                    nRasterXSize, nRasterYSize);
1649
            }
1650
        }
1651
1652
        GDALWarpOptions* psWarpOptions = GDALCreateWarpOptions();
1653
        psWarpOptions->eResampleAlg = (GDALResampleAlg)psExtraArg->eResampleAlg;
1654
        psWarpOptions->hSrcDS = (GDALDatasetH) (hVRTDS ? hVRTDS : GetDataset());
1655
        psWarpOptions->hDstDS = (GDALDatasetH) poMEMDS;
1656
        psWarpOptions->nBandCount = 1;
1657
        int nSrcBandNumber = (hVRTDS ? 1 : nBand);
1658
        int nDstBandNumber = 1;
1659
        psWarpOptions->panSrcBands = &nSrcBandNumber;
1660
        psWarpOptions->panDstBands = &nDstBandNumber;
1661
        psWarpOptions->pfnProgress = psExtraArg->pfnProgress ?
1662
                    psExtraArg->pfnProgress : GDALDummyProgress;
1663
        psWarpOptions->pProgressArg = psExtraArg->pProgressData;
1664
        psWarpOptions->pfnTransformer = GDALRasterIOTransformer;
1665
        GDALRasterIOTransformerStruct sTransformer;
1666
        sTransformer.dfXOff = bHasXOffVirtual ? 0 : dfXOff;
1667
        sTransformer.dfYOff = bHasYOffVirtual ? 0 : dfYOff;
1668
        sTransformer.dfXRatioDstToSrc = dfXRatioDstToSrc;
1669
        sTransformer.dfYRatioDstToSrc = dfYRatioDstToSrc;
1670
        psWarpOptions->pTransformerArg = &sTransformer;
1671
1672
        GDALWarpOperationH hWarpOperation = GDALCreateWarpOperation(psWarpOptions);
1673
        eErr = GDALChunkAndWarpImage( hWarpOperation,
1674
                                      nDestXOffVirtual, nDestYOffVirtual,
1675
                                      nBufXSize, nBufYSize );
1676
        GDALDestroyWarpOperation( hWarpOperation );
1677
1678
        psWarpOptions->panSrcBands = nullptr;
1679
        psWarpOptions->panDstBands = nullptr;
1680
        GDALDestroyWarpOptions( psWarpOptions );
1681
1682
        if( hVRTDS )
1683
            GDALClose(hVRTDS);
1684
    }
1685
    else
1686
#endif
1687
0
    {
1688
0
        const char *pszResampling =
1689
0
            (psExtraArg->eResampleAlg == GRIORA_Bilinear)      ? "BILINEAR"
1690
0
            : (psExtraArg->eResampleAlg == GRIORA_Cubic)       ? "CUBIC"
1691
0
            : (psExtraArg->eResampleAlg == GRIORA_CubicSpline) ? "CUBICSPLINE"
1692
0
            : (psExtraArg->eResampleAlg == GRIORA_Lanczos)     ? "LANCZOS"
1693
0
            : (psExtraArg->eResampleAlg == GRIORA_Average)     ? "AVERAGE"
1694
0
            : (psExtraArg->eResampleAlg == GRIORA_RMS)         ? "RMS"
1695
0
            : (psExtraArg->eResampleAlg == GRIORA_Mode)        ? "MODE"
1696
0
            : (psExtraArg->eResampleAlg == GRIORA_Gauss)       ? "GAUSS"
1697
0
                                                               : "UNKNOWN";
1698
1699
0
        GDALRasterBand *poFirstSrcBand = GetRasterBand(panBandMap[0]);
1700
0
        GDALDataType eDataType = poFirstSrcBand->GetRasterDataType();
1701
0
        int nBlockXSize, nBlockYSize;
1702
0
        poFirstSrcBand->GetBlockSize(&nBlockXSize, &nBlockYSize);
1703
1704
0
        int nKernelRadius;
1705
0
        GDALResampleFunction pfnResampleFunc =
1706
0
            GDALGetResampleFunction(pszResampling, &nKernelRadius);
1707
0
        CPLAssert(pfnResampleFunc);
1708
#ifdef GDAL_ENABLE_RESAMPLING_MULTIBAND
1709
        GDALResampleFunctionMultiBands pfnResampleFuncMultiBands =
1710
            GDALGetResampleFunctionMultiBands(pszResampling, &nKernelRadius);
1711
#endif
1712
0
        GDALDataType eWrkDataType =
1713
0
            GDALGetOvrWorkDataType(pszResampling, eDataType);
1714
1715
0
        int nDstBlockXSize = nBufXSize;
1716
0
        int nDstBlockYSize = nBufYSize;
1717
0
        int nFullResXChunk, nFullResYChunk;
1718
0
        while (true)
1719
0
        {
1720
0
            nFullResXChunk =
1721
0
                3 + static_cast<int>(nDstBlockXSize * dfXRatioDstToSrc);
1722
0
            nFullResYChunk =
1723
0
                3 + static_cast<int>(nDstBlockYSize * dfYRatioDstToSrc);
1724
0
            if (nFullResXChunk > nRasterXSize)
1725
0
                nFullResXChunk = nRasterXSize;
1726
0
            if (nFullResYChunk > nRasterYSize)
1727
0
                nFullResYChunk = nRasterYSize;
1728
0
            if ((nDstBlockXSize == 1 && nDstBlockYSize == 1) ||
1729
0
                (static_cast<GIntBig>(nFullResXChunk) * nFullResYChunk <=
1730
0
                 1024 * 1024))
1731
0
                break;
1732
            // When operating on the full width of a raster whose block width is
1733
            // the raster width, prefer doing chunks in height.
1734
0
            if (nFullResXChunk >= nXSize && nXSize == nBlockXSize &&
1735
0
                nDstBlockYSize > 1)
1736
0
                nDstBlockYSize /= 2;
1737
            /* Otherwise cut the maximal dimension */
1738
0
            else if (nDstBlockXSize > 1 &&
1739
0
                     (nFullResXChunk > nFullResYChunk || nDstBlockYSize == 1))
1740
0
                nDstBlockXSize /= 2;
1741
0
            else
1742
0
                nDstBlockYSize /= 2;
1743
0
        }
1744
1745
0
        int nOvrFactor = std::max(static_cast<int>(0.5 + dfXRatioDstToSrc),
1746
0
                                  static_cast<int>(0.5 + dfYRatioDstToSrc));
1747
0
        if (nOvrFactor == 0)
1748
0
            nOvrFactor = 1;
1749
0
        int nFullResXSizeQueried =
1750
0
            nFullResXChunk + 2 * nKernelRadius * nOvrFactor;
1751
0
        int nFullResYSizeQueried =
1752
0
            nFullResYChunk + 2 * nKernelRadius * nOvrFactor;
1753
1754
0
        if (nFullResXSizeQueried > nRasterXSize)
1755
0
            nFullResXSizeQueried = nRasterXSize;
1756
0
        if (nFullResYSizeQueried > nRasterYSize)
1757
0
            nFullResYSizeQueried = nRasterYSize;
1758
1759
0
        void *pChunk = VSI_MALLOC3_VERBOSE(
1760
0
            cpl::fits_on<int>(GDALGetDataTypeSizeBytes(eWrkDataType) *
1761
0
                              nBandCount),
1762
0
            nFullResXSizeQueried, nFullResYSizeQueried);
1763
0
        GByte *pabyChunkNoDataMask = nullptr;
1764
1765
0
        GDALRasterBand *poMaskBand = poFirstSrcBand->GetMaskBand();
1766
0
        int nMaskFlags = poFirstSrcBand->GetMaskFlags();
1767
1768
0
        bool bUseNoDataMask = ((nMaskFlags & GMF_ALL_VALID) == 0);
1769
0
        if (bUseNoDataMask)
1770
0
        {
1771
0
            pabyChunkNoDataMask = static_cast<GByte *>(VSI_MALLOC2_VERBOSE(
1772
0
                nFullResXSizeQueried, nFullResYSizeQueried));
1773
0
        }
1774
0
        if (pChunk == nullptr ||
1775
0
            (bUseNoDataMask && pabyChunkNoDataMask == nullptr))
1776
0
        {
1777
0
            GDALClose(poMEMDS);
1778
0
            CPLFree(pChunk);
1779
0
            CPLFree(pabyChunkNoDataMask);
1780
0
            CPLFree(papoDstBands);
1781
0
            return CE_Failure;
1782
0
        }
1783
1784
0
        const int nTotalBlocks = DIV_ROUND_UP(nBufXSize, nDstBlockXSize) *
1785
0
                                 DIV_ROUND_UP(nBufYSize, nDstBlockYSize);
1786
0
        int nBlocksDone = 0;
1787
1788
0
        int nDstYOff;
1789
0
        for (nDstYOff = 0; nDstYOff < nBufYSize && eErr == CE_None;
1790
0
             nDstYOff += nDstBlockYSize)
1791
0
        {
1792
0
            int nDstYCount;
1793
0
            if (nDstYOff + nDstBlockYSize <= nBufYSize)
1794
0
                nDstYCount = nDstBlockYSize;
1795
0
            else
1796
0
                nDstYCount = nBufYSize - nDstYOff;
1797
1798
0
            int nChunkYOff =
1799
0
                nYOff + static_cast<int>(nDstYOff * dfYRatioDstToSrc);
1800
0
            int nChunkYOff2 = nYOff + 1 +
1801
0
                              static_cast<int>(ceil((nDstYOff + nDstYCount) *
1802
0
                                                    dfYRatioDstToSrc));
1803
0
            if (nChunkYOff2 > nRasterYSize)
1804
0
                nChunkYOff2 = nRasterYSize;
1805
0
            int nYCount = nChunkYOff2 - nChunkYOff;
1806
0
            CPLAssert(nYCount <= nFullResYChunk);
1807
1808
0
            int nChunkYOffQueried = nChunkYOff - nKernelRadius * nOvrFactor;
1809
0
            int nChunkYSizeQueried = nYCount + 2 * nKernelRadius * nOvrFactor;
1810
0
            if (nChunkYOffQueried < 0)
1811
0
            {
1812
0
                nChunkYSizeQueried += nChunkYOffQueried;
1813
0
                nChunkYOffQueried = 0;
1814
0
            }
1815
0
            if (nChunkYSizeQueried + nChunkYOffQueried > nRasterYSize)
1816
0
                nChunkYSizeQueried = nRasterYSize - nChunkYOffQueried;
1817
0
            CPLAssert(nChunkYSizeQueried <= nFullResYSizeQueried);
1818
1819
0
            int nDstXOff;
1820
0
            for (nDstXOff = 0; nDstXOff < nBufXSize && eErr == CE_None;
1821
0
                 nDstXOff += nDstBlockXSize)
1822
0
            {
1823
0
                int nDstXCount;
1824
0
                if (nDstXOff + nDstBlockXSize <= nBufXSize)
1825
0
                    nDstXCount = nDstBlockXSize;
1826
0
                else
1827
0
                    nDstXCount = nBufXSize - nDstXOff;
1828
1829
0
                int nChunkXOff =
1830
0
                    nXOff + static_cast<int>(nDstXOff * dfXRatioDstToSrc);
1831
0
                int nChunkXOff2 =
1832
0
                    nXOff + 1 +
1833
0
                    static_cast<int>(
1834
0
                        ceil((nDstXOff + nDstXCount) * dfXRatioDstToSrc));
1835
0
                if (nChunkXOff2 > nRasterXSize)
1836
0
                    nChunkXOff2 = nRasterXSize;
1837
0
                int nXCount = nChunkXOff2 - nChunkXOff;
1838
0
                CPLAssert(nXCount <= nFullResXChunk);
1839
1840
0
                int nChunkXOffQueried = nChunkXOff - nKernelRadius * nOvrFactor;
1841
0
                int nChunkXSizeQueried =
1842
0
                    nXCount + 2 * nKernelRadius * nOvrFactor;
1843
0
                if (nChunkXOffQueried < 0)
1844
0
                {
1845
0
                    nChunkXSizeQueried += nChunkXOffQueried;
1846
0
                    nChunkXOffQueried = 0;
1847
0
                }
1848
0
                if (nChunkXSizeQueried + nChunkXOffQueried > nRasterXSize)
1849
0
                    nChunkXSizeQueried = nRasterXSize - nChunkXOffQueried;
1850
0
                CPLAssert(nChunkXSizeQueried <= nFullResXSizeQueried);
1851
1852
0
                bool bSkipResample = false;
1853
0
                bool bNoDataMaskFullyOpaque = false;
1854
0
                if (eErr == CE_None && bUseNoDataMask)
1855
0
                {
1856
0
                    eErr = poMaskBand->RasterIO(
1857
0
                        GF_Read, nChunkXOffQueried, nChunkYOffQueried,
1858
0
                        nChunkXSizeQueried, nChunkYSizeQueried,
1859
0
                        pabyChunkNoDataMask, nChunkXSizeQueried,
1860
0
                        nChunkYSizeQueried, GDT_Byte, 0, 0, nullptr);
1861
1862
                    /* Optimizations if mask if fully opaque or transparent */
1863
0
                    const int nPixels = nChunkXSizeQueried * nChunkYSizeQueried;
1864
0
                    const GByte bVal = pabyChunkNoDataMask[0];
1865
0
                    int i = 1;  // Used after for.
1866
0
                    for (; i < nPixels; i++)
1867
0
                    {
1868
0
                        if (pabyChunkNoDataMask[i] != bVal)
1869
0
                            break;
1870
0
                    }
1871
0
                    if (i == nPixels)
1872
0
                    {
1873
0
                        if (bVal == 0)
1874
0
                        {
1875
0
                            GByte abyZero[16] = {0};
1876
0
                            for (int iBand = 0; iBand < nBandCount; iBand++)
1877
0
                            {
1878
0
                                for (int j = 0; j < nDstYCount; j++)
1879
0
                                {
1880
0
                                    GDALCopyWords64(
1881
0
                                        abyZero, GDT_Byte, 0,
1882
0
                                        static_cast<GByte *>(pData) +
1883
0
                                            iBand * nBandSpace +
1884
0
                                            nLineSpace * (j + nDstYOff) +
1885
0
                                            nDstXOff * nPixelSpace,
1886
0
                                        eBufType, static_cast<int>(nPixelSpace),
1887
0
                                        nDstXCount);
1888
0
                                }
1889
0
                            }
1890
0
                            bSkipResample = true;
1891
0
                        }
1892
0
                        else
1893
0
                        {
1894
0
                            bNoDataMaskFullyOpaque = true;
1895
0
                        }
1896
0
                    }
1897
0
                }
1898
1899
0
                if (!bSkipResample && eErr == CE_None)
1900
0
                {
1901
                    /* Read the source buffers */
1902
0
                    eErr = RasterIO(
1903
0
                        GF_Read, nChunkXOffQueried, nChunkYOffQueried,
1904
0
                        nChunkXSizeQueried, nChunkYSizeQueried, pChunk,
1905
0
                        nChunkXSizeQueried, nChunkYSizeQueried, eWrkDataType,
1906
0
                        nBandCount, panBandMap, 0, 0, 0, nullptr);
1907
0
                }
1908
1909
#ifdef GDAL_ENABLE_RESAMPLING_MULTIBAND
1910
                if (pfnResampleFuncMultiBands && !bSkipResample &&
1911
                    eErr == CE_None)
1912
                {
1913
                    eErr = pfnResampleFuncMultiBands(
1914
                        dfXRatioDstToSrc, dfYRatioDstToSrc,
1915
                        dfXOff - nXOff, /* == 0 if bHasXOffVirtual */
1916
                        dfYOff - nYOff, /* == 0 if bHasYOffVirtual */
1917
                        eWrkDataType, (GByte *)pChunk, nBandCount,
1918
                        bNoDataMaskFullyOpaque ? nullptr : pabyChunkNoDataMask,
1919
                        nChunkXOffQueried - (bHasXOffVirtual ? 0 : nXOff),
1920
                        nChunkXSizeQueried,
1921
                        nChunkYOffQueried - (bHasYOffVirtual ? 0 : nYOff),
1922
                        nChunkYSizeQueried, nDstXOff + nDestXOffVirtual,
1923
                        nDstXOff + nDestXOffVirtual + nDstXCount,
1924
                        nDstYOff + nDestYOffVirtual,
1925
                        nDstYOff + nDestYOffVirtual + nDstYCount, papoDstBands,
1926
                        pszResampling, FALSE /*bHasNoData*/,
1927
                        0.0 /* dfNoDataValue */, nullptr /* color table*/,
1928
                        eDataType);
1929
                }
1930
                else
1931
#endif
1932
0
                {
1933
0
                    size_t nChunkBandOffset =
1934
0
                        static_cast<size_t>(nChunkXSizeQueried) *
1935
0
                        nChunkYSizeQueried *
1936
0
                        GDALGetDataTypeSizeBytes(eWrkDataType);
1937
0
                    for (int i = 0;
1938
0
                         i < nBandCount && !bSkipResample && eErr == CE_None;
1939
0
                         i++)
1940
0
                    {
1941
0
                        const bool bPropagateNoData = false;
1942
0
                        void *pDstBuffer = nullptr;
1943
0
                        GDALDataType eDstBufferDataType = GDT_Unknown;
1944
0
                        GDALRasterBand *poMEMBand =
1945
0
                            poMEMDS->GetRasterBand(i + 1);
1946
0
                        GDALOverviewResampleArgs args;
1947
0
                        args.eSrcDataType = eDataType;
1948
0
                        args.eOvrDataType = poMEMBand->GetRasterDataType();
1949
0
                        args.nOvrXSize = poMEMBand->GetXSize();
1950
0
                        args.nOvrYSize = poMEMBand->GetYSize();
1951
0
                        args.nOvrNBITS = nNBITS;
1952
0
                        args.dfXRatioDstToSrc = dfXRatioDstToSrc;
1953
0
                        args.dfYRatioDstToSrc = dfYRatioDstToSrc;
1954
0
                        args.dfSrcXDelta =
1955
0
                            dfXOff - nXOff; /* == 0 if bHasXOffVirtual */
1956
0
                        args.dfSrcYDelta =
1957
0
                            dfYOff - nYOff; /* == 0 if bHasYOffVirtual */
1958
0
                        args.eWrkDataType = eWrkDataType;
1959
0
                        args.pabyChunkNodataMask = bNoDataMaskFullyOpaque
1960
0
                                                       ? nullptr
1961
0
                                                       : pabyChunkNoDataMask;
1962
0
                        args.nChunkXOff =
1963
0
                            nChunkXOffQueried - (bHasXOffVirtual ? 0 : nXOff);
1964
0
                        args.nChunkXSize = nChunkXSizeQueried;
1965
0
                        args.nChunkYOff =
1966
0
                            nChunkYOffQueried - (bHasYOffVirtual ? 0 : nYOff);
1967
0
                        args.nChunkYSize = nChunkYSizeQueried;
1968
0
                        args.nDstXOff = nDstXOff + nDestXOffVirtual;
1969
0
                        args.nDstXOff2 =
1970
0
                            nDstXOff + nDestXOffVirtual + nDstXCount;
1971
0
                        args.nDstYOff = nDstYOff + nDestYOffVirtual;
1972
0
                        args.nDstYOff2 =
1973
0
                            nDstYOff + nDestYOffVirtual + nDstYCount;
1974
0
                        args.pszResampling = pszResampling;
1975
0
                        args.bHasNoData = false;
1976
0
                        args.dfNoDataValue = 0.0;
1977
0
                        args.poColorTable = nullptr;
1978
0
                        args.bPropagateNoData = bPropagateNoData;
1979
1980
0
                        eErr =
1981
0
                            pfnResampleFunc(args,
1982
0
                                            reinterpret_cast<GByte *>(pChunk) +
1983
0
                                                i * nChunkBandOffset,
1984
0
                                            &pDstBuffer, &eDstBufferDataType);
1985
0
                        if (eErr == CE_None)
1986
0
                        {
1987
0
                            eErr = poMEMBand->RasterIO(
1988
0
                                GF_Write, nDstXOff + nDestXOffVirtual,
1989
0
                                nDstYOff + nDestYOffVirtual, nDstXCount,
1990
0
                                nDstYCount, pDstBuffer, nDstXCount, nDstYCount,
1991
0
                                eDstBufferDataType, 0, 0, nullptr);
1992
0
                        }
1993
0
                        CPLFree(pDstBuffer);
1994
0
                    }
1995
0
                }
1996
1997
0
                nBlocksDone++;
1998
0
                if (eErr == CE_None && psExtraArg->pfnProgress != nullptr &&
1999
0
                    !psExtraArg->pfnProgress(1.0 * nBlocksDone / nTotalBlocks,
2000
0
                                             "", psExtraArg->pProgressData))
2001
0
                {
2002
0
                    eErr = CE_Failure;
2003
0
                }
2004
0
            }
2005
0
        }
2006
2007
0
        CPLFree(pChunk);
2008
0
        CPLFree(pabyChunkNoDataMask);
2009
0
    }
2010
2011
0
    CPLFree(papoDstBands);
2012
0
    GDALClose(poMEMDS);
2013
2014
0
    return eErr;
2015
0
}
2016
2017
//! @endcond
2018
2019
/************************************************************************/
2020
/*                           GDALSwapWords()                            */
2021
/************************************************************************/
2022
2023
/**
2024
 * Byte swap words in-place.
2025
 *
2026
 * This function will byte swap a set of 2, 4 or 8 byte words "in place" in
2027
 * a memory array.  No assumption is made that the words being swapped are
2028
 * word aligned in memory.  Use the CPL_LSB and CPL_MSB macros from cpl_port.h
2029
 * to determine if the current platform is big endian or little endian.  Use
2030
 * The macros like CPL_SWAP32() to byte swap single values without the overhead
2031
 * of a function call.
2032
 *
2033
 * @param pData pointer to start of data buffer.
2034
 * @param nWordSize size of words being swapped in bytes. Normally 2, 4 or 8.
2035
 * @param nWordCount the number of words to be swapped in this call.
2036
 * @param nWordSkip the byte offset from the start of one word to the start of
2037
 * the next. For packed buffers this is the same as nWordSize.
2038
 */
2039
2040
void CPL_STDCALL GDALSwapWords(void *pData, int nWordSize, int nWordCount,
2041
                               int nWordSkip)
2042
2043
0
{
2044
0
    if (nWordCount > 0)
2045
0
        VALIDATE_POINTER0(pData, "GDALSwapWords");
2046
2047
0
    GByte *pabyData = static_cast<GByte *>(pData);
2048
2049
0
    switch (nWordSize)
2050
0
    {
2051
0
        case 1:
2052
0
            break;
2053
2054
0
        case 2:
2055
0
            CPLAssert(nWordSkip >= 2 || nWordCount == 1);
2056
0
            for (int i = 0; i < nWordCount; i++)
2057
0
            {
2058
0
                CPL_SWAP16PTR(pabyData);
2059
0
                pabyData += nWordSkip;
2060
0
            }
2061
0
            break;
2062
2063
0
        case 4:
2064
0
            CPLAssert(nWordSkip >= 4 || nWordCount == 1);
2065
0
            if (CPL_IS_ALIGNED(pabyData, 4) && (nWordSkip % 4) == 0)
2066
0
            {
2067
0
                for (int i = 0; i < nWordCount; i++)
2068
0
                {
2069
0
                    *reinterpret_cast<GUInt32 *>(pabyData) = CPL_SWAP32(
2070
0
                        *reinterpret_cast<const GUInt32 *>(pabyData));
2071
0
                    pabyData += nWordSkip;
2072
0
                }
2073
0
            }
2074
0
            else
2075
0
            {
2076
0
                for (int i = 0; i < nWordCount; i++)
2077
0
                {
2078
0
                    CPL_SWAP32PTR(pabyData);
2079
0
                    pabyData += nWordSkip;
2080
0
                }
2081
0
            }
2082
0
            break;
2083
2084
0
        case 8:
2085
0
            CPLAssert(nWordSkip >= 8 || nWordCount == 1);
2086
0
            if (CPL_IS_ALIGNED(pabyData, 8) && (nWordSkip % 8) == 0)
2087
0
            {
2088
0
                for (int i = 0; i < nWordCount; i++)
2089
0
                {
2090
0
                    *reinterpret_cast<GUInt64 *>(pabyData) = CPL_SWAP64(
2091
0
                        *reinterpret_cast<const GUInt64 *>(pabyData));
2092
0
                    pabyData += nWordSkip;
2093
0
                }
2094
0
            }
2095
0
            else
2096
0
            {
2097
0
                for (int i = 0; i < nWordCount; i++)
2098
0
                {
2099
0
                    CPL_SWAP64PTR(pabyData);
2100
0
                    pabyData += nWordSkip;
2101
0
                }
2102
0
            }
2103
0
            break;
2104
2105
0
        default:
2106
0
            CPLAssert(false);
2107
0
    }
2108
0
}
2109
2110
/************************************************************************/
2111
/*                           GDALSwapWordsEx()                          */
2112
/************************************************************************/
2113
2114
/**
2115
 * Byte swap words in-place.
2116
 *
2117
 * This function will byte swap a set of 2, 4 or 8 byte words "in place" in
2118
 * a memory array.  No assumption is made that the words being swapped are
2119
 * word aligned in memory.  Use the CPL_LSB and CPL_MSB macros from cpl_port.h
2120
 * to determine if the current platform is big endian or little endian.  Use
2121
 * The macros like CPL_SWAP32() to byte swap single values without the overhead
2122
 * of a function call.
2123
 *
2124
 * @param pData pointer to start of data buffer.
2125
 * @param nWordSize size of words being swapped in bytes. Normally 2, 4 or 8.
2126
 * @param nWordCount the number of words to be swapped in this call.
2127
 * @param nWordSkip the byte offset from the start of one word to the start of
2128
 * the next. For packed buffers this is the same as nWordSize.
2129
 * @since GDAL 2.1
2130
 */
2131
void CPL_STDCALL GDALSwapWordsEx(void *pData, int nWordSize, size_t nWordCount,
2132
                                 int nWordSkip)
2133
0
{
2134
0
    GByte *pabyData = static_cast<GByte *>(pData);
2135
0
    while (nWordCount)
2136
0
    {
2137
        // Pick-up a multiple of 8 as max chunk size.
2138
0
        const int nWordCountSmall =
2139
0
            (nWordCount > (1 << 30)) ? (1 << 30) : static_cast<int>(nWordCount);
2140
0
        GDALSwapWords(pabyData, nWordSize, nWordCountSmall, nWordSkip);
2141
0
        pabyData += static_cast<size_t>(nWordSkip) * nWordCountSmall;
2142
0
        nWordCount -= nWordCountSmall;
2143
0
    }
2144
0
}
2145
2146
// Place the new GDALCopyWords helpers in an anonymous namespace
2147
namespace
2148
{
2149
2150
/************************************************************************/
2151
/*                           GDALCopyWordsT()                           */
2152
/************************************************************************/
2153
/**
2154
 * Template function, used to copy data from pSrcData into buffer
2155
 * pDstData, with stride nSrcPixelStride in the source data and
2156
 * stride nDstPixelStride in the destination data. This template can
2157
 * deal with the case where the input data type is real or complex and
2158
 * the output is real.
2159
 *
2160
 * @param pSrcData the source data buffer
2161
 * @param nSrcPixelStride the stride, in the buffer pSrcData for pixels
2162
 *                      of interest.
2163
 * @param pDstData the destination buffer.
2164
 * @param nDstPixelStride the stride in the buffer pDstData for pixels of
2165
 *                      interest.
2166
 * @param nWordCount the total number of pixel words to copy
2167
 *
2168
 * @code
2169
 * // Assume an input buffer of type GUInt16 named pBufferIn
2170
 * GByte *pBufferOut = new GByte[numBytesOut];
2171
 * GDALCopyWordsT<GUInt16, GByte>(pSrcData, 2, pDstData, 1, numBytesOut);
2172
 * @endcode
2173
 * @note
2174
 * This is a private function, and should not be exposed outside of
2175
 * rasterio.cpp. External users should call the GDALCopyWords driver function.
2176
 */
2177
2178
template <class Tin, class Tout>
2179
static void inline GDALCopyWordsGenericT(const Tin *const CPL_RESTRICT pSrcData,
2180
                                         int nSrcPixelStride,
2181
                                         Tout *const CPL_RESTRICT pDstData,
2182
                                         int nDstPixelStride,
2183
                                         GPtrDiff_t nWordCount)
2184
0
{
2185
0
    decltype(nWordCount) nDstOffset = 0;
2186
2187
0
    const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData);
2188
0
    char *const pDstDataPtr = reinterpret_cast<char *>(pDstData);
2189
0
    for (decltype(nWordCount) n = 0; n < nWordCount; n++)
2190
0
    {
2191
0
        const Tin tValue =
2192
0
            *reinterpret_cast<const Tin *>(pSrcDataPtr + (n * nSrcPixelStride));
2193
0
        Tout *const pOutPixel =
2194
0
            reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
2195
2196
0
        GDALCopyWord(tValue, *pOutPixel);
2197
2198
0
        nDstOffset += nDstPixelStride;
2199
0
    }
2200
0
}
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, unsigned char>(unsigned char const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, signed char>(unsigned char const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, unsigned short>(unsigned char const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, short>(unsigned char const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, unsigned int>(unsigned char const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, int>(unsigned char const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, unsigned long>(unsigned char const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, long>(unsigned char const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, cpl::Float16>(unsigned char const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, float>(unsigned char const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, double>(unsigned char const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, unsigned char>(signed char const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, signed char>(signed char const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, unsigned short>(signed char const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, short>(signed char const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, unsigned int>(signed char const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, int>(signed char const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, unsigned long>(signed char const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, long>(signed char const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, cpl::Float16>(signed char const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, float>(signed char const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, double>(signed char const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, unsigned char>(unsigned short const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, signed char>(unsigned short const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, unsigned short>(unsigned short const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, short>(unsigned short const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, unsigned int>(unsigned short const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, int>(unsigned short const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, unsigned long>(unsigned short const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, long>(unsigned short const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, cpl::Float16>(unsigned short const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, float>(unsigned short const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, double>(unsigned short const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, unsigned char>(short const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, signed char>(short const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, unsigned short>(short const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, short>(short const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, unsigned int>(short const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, int>(short const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, unsigned long>(short const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, long>(short const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, cpl::Float16>(short const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, float>(short const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, double>(short const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, unsigned char>(unsigned int const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, signed char>(unsigned int const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, unsigned short>(unsigned int const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, short>(unsigned int const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, unsigned int>(unsigned int const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, int>(unsigned int const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, unsigned long>(unsigned int const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, long>(unsigned int const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, cpl::Float16>(unsigned int const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, float>(unsigned int const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, double>(unsigned int const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, unsigned char>(int const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, signed char>(int const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, unsigned short>(int const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, short>(int const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, unsigned int>(int const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, int>(int const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, unsigned long>(int const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, long>(int const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, cpl::Float16>(int const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, float>(int const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, double>(int const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, unsigned char>(unsigned long const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, signed char>(unsigned long const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, unsigned short>(unsigned long const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, short>(unsigned long const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, unsigned int>(unsigned long const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, int>(unsigned long const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, unsigned long>(unsigned long const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, long>(unsigned long const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, cpl::Float16>(unsigned long const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, float>(unsigned long const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, double>(unsigned long const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, unsigned char>(long const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, signed char>(long const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, unsigned short>(long const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, short>(long const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, unsigned int>(long const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, int>(long const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, unsigned long>(long const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, long>(long const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, cpl::Float16>(long const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, float>(long const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, double>(long const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, unsigned char>(cpl::Float16 const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, signed char>(cpl::Float16 const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, unsigned short>(cpl::Float16 const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, short>(cpl::Float16 const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, unsigned int>(cpl::Float16 const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, int>(cpl::Float16 const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, unsigned long>(cpl::Float16 const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, long>(cpl::Float16 const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, cpl::Float16>(cpl::Float16 const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<float, signed char>(float const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<float, unsigned int>(float const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<float, int>(float const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<float, unsigned long>(float const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<float, long>(float const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<float, cpl::Float16>(float const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<float, float>(float const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, signed char>(double const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, short>(double const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, unsigned int>(double const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, int>(double const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, unsigned long>(double const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, long>(double const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, cpl::Float16>(double const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, double>(double const*, int, double*, int, long long)
2201
2202
template <class Tin, class Tout>
2203
static void CPL_NOINLINE GDALCopyWordsT(const Tin *const CPL_RESTRICT pSrcData,
2204
                                        int nSrcPixelStride,
2205
                                        Tout *const CPL_RESTRICT pDstData,
2206
                                        int nDstPixelStride,
2207
                                        GPtrDiff_t nWordCount)
2208
0
{
2209
0
    GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, nDstPixelStride,
2210
0
                          nWordCount);
2211
0
}
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned char, unsigned char>(unsigned char const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned char, unsigned long>(unsigned char const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned char, long>(unsigned char const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned char, cpl::Float16>(unsigned char const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, signed char>(signed char const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, unsigned short>(signed char const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, short>(signed char const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, unsigned int>(signed char const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, int>(signed char const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, unsigned long>(signed char const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, long>(signed char const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, cpl::Float16>(signed char const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, float>(signed char const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, double>(signed char const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned short, signed char>(unsigned short const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned short, unsigned short>(unsigned short const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned short, unsigned int>(unsigned short const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned short, int>(unsigned short const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned short, unsigned long>(unsigned short const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned short, long>(unsigned short const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned short, cpl::Float16>(unsigned short const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, unsigned char>(short const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, signed char>(short const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, short>(short const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, unsigned int>(short const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, int>(short const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, unsigned long>(short const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, long>(short const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, cpl::Float16>(short const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, unsigned char>(unsigned int const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, signed char>(unsigned int const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, unsigned short>(unsigned int const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, short>(unsigned int const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, unsigned int>(unsigned int const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, int>(unsigned int const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, unsigned long>(unsigned int const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, long>(unsigned int const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, cpl::Float16>(unsigned int const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, float>(unsigned int const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, double>(unsigned int const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, unsigned char>(int const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, signed char>(int const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, unsigned short>(int const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, short>(int const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, unsigned int>(int const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, int>(int const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, unsigned long>(int const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, long>(int const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, cpl::Float16>(int const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, float>(int const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, double>(int const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, unsigned char>(unsigned long const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, signed char>(unsigned long const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, unsigned short>(unsigned long const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, short>(unsigned long const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, unsigned int>(unsigned long const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, int>(unsigned long const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, unsigned long>(unsigned long const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, long>(unsigned long const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, cpl::Float16>(unsigned long const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, float>(unsigned long const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, double>(unsigned long const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, unsigned char>(long const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, signed char>(long const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, unsigned short>(long const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, short>(long const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, unsigned int>(long const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, int>(long const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, unsigned long>(long const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, long>(long const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, cpl::Float16>(long const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, float>(long const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, double>(long const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, unsigned char>(cpl::Float16 const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, signed char>(cpl::Float16 const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, unsigned short>(cpl::Float16 const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, short>(cpl::Float16 const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, unsigned int>(cpl::Float16 const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, int>(cpl::Float16 const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, unsigned long>(cpl::Float16 const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, long>(cpl::Float16 const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, cpl::Float16>(cpl::Float16 const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<float, signed char>(float const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<float, unsigned int>(float const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<float, int>(float const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<float, unsigned long>(float const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<float, long>(float const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<float, cpl::Float16>(float const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<float, float>(float const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, signed char>(double const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, short>(double const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, unsigned int>(double const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, int>(double const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, unsigned long>(double const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, long>(double const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, cpl::Float16>(double const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, double>(double const*, int, double*, int, long long)
2212
2213
template <class Tin, class Tout>
2214
static void inline GDALCopyWordsT_8atatime(
2215
    const Tin *const CPL_RESTRICT pSrcData, int nSrcPixelStride,
2216
    Tout *const CPL_RESTRICT pDstData, int nDstPixelStride,
2217
    GPtrDiff_t nWordCount)
2218
0
{
2219
0
    decltype(nWordCount) nDstOffset = 0;
2220
2221
0
    const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData);
2222
0
    char *const pDstDataPtr = reinterpret_cast<char *>(pDstData);
2223
0
    decltype(nWordCount) n = 0;
2224
0
    if (nSrcPixelStride == static_cast<int>(sizeof(Tin)) &&
2225
0
        nDstPixelStride == static_cast<int>(sizeof(Tout)))
2226
0
    {
2227
0
        for (; n < nWordCount - 7; n += 8)
2228
0
        {
2229
0
            const Tin *pInValues = reinterpret_cast<const Tin *>(
2230
0
                pSrcDataPtr + (n * nSrcPixelStride));
2231
0
            Tout *const pOutPixels =
2232
0
                reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
2233
2234
0
            GDALCopy8Words(pInValues, pOutPixels);
2235
2236
0
            nDstOffset += 8 * nDstPixelStride;
2237
0
        }
2238
0
    }
2239
0
    for (; n < nWordCount; n++)
2240
0
    {
2241
0
        const Tin tValue =
2242
0
            *reinterpret_cast<const Tin *>(pSrcDataPtr + (n * nSrcPixelStride));
2243
0
        Tout *const pOutPixel =
2244
0
            reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
2245
2246
0
        GDALCopyWord(tValue, *pOutPixel);
2247
2248
0
        nDstOffset += nDstPixelStride;
2249
0
    }
2250
0
}
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<cpl::Float16, float>(cpl::Float16 const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<cpl::Float16, double>(cpl::Float16 const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<float, unsigned char>(float const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<float, unsigned short>(float const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<float, short>(float const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<float, double>(float const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<double, unsigned char>(double const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<double, unsigned short>(double const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<double, float>(double const*, int, float*, int, long long)
2251
2252
#ifdef HAVE_SSE2
2253
2254
template <class Tout>
2255
void GDALCopyWordsByteTo16Bit(const GByte *const CPL_RESTRICT pSrcData,
2256
                              int nSrcPixelStride,
2257
                              Tout *const CPL_RESTRICT pDstData,
2258
                              int nDstPixelStride, GPtrDiff_t nWordCount)
2259
0
{
2260
0
    static_assert(std::is_integral<Tout>::value &&
2261
0
                      sizeof(Tout) == sizeof(uint16_t),
2262
0
                  "Bad Tout");
2263
0
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2264
0
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2265
0
    {
2266
0
        decltype(nWordCount) n = 0;
2267
0
        const __m128i xmm_zero = _mm_setzero_si128();
2268
0
        GByte *CPL_RESTRICT pabyDstDataPtr =
2269
0
            reinterpret_cast<GByte *>(pDstData);
2270
0
        for (; n < nWordCount - 15; n += 16)
2271
0
        {
2272
0
            __m128i xmm = _mm_loadu_si128(
2273
0
                reinterpret_cast<const __m128i *>(pSrcData + n));
2274
0
            __m128i xmm0 = _mm_unpacklo_epi8(xmm, xmm_zero);
2275
0
            __m128i xmm1 = _mm_unpackhi_epi8(xmm, xmm_zero);
2276
0
            _mm_storeu_si128(
2277
0
                reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 2), xmm0);
2278
0
            _mm_storeu_si128(
2279
0
                reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 2 + 16), xmm1);
2280
0
        }
2281
0
        for (; n < nWordCount; n++)
2282
0
        {
2283
0
            pDstData[n] = pSrcData[n];
2284
0
        }
2285
0
    }
2286
0
    else
2287
0
    {
2288
0
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2289
0
                              nDstPixelStride, nWordCount);
2290
0
    }
2291
0
}
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsByteTo16Bit<unsigned short>(unsigned char const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsByteTo16Bit<short>(unsigned char const*, int, short*, int, long long)
2292
2293
template <>
2294
CPL_NOINLINE void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2295
                                 int nSrcPixelStride,
2296
                                 GUInt16 *const CPL_RESTRICT pDstData,
2297
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
2298
0
{
2299
0
    GDALCopyWordsByteTo16Bit(pSrcData, nSrcPixelStride, pDstData,
2300
0
                             nDstPixelStride, nWordCount);
2301
0
}
2302
2303
template <>
2304
CPL_NOINLINE void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2305
                                 int nSrcPixelStride,
2306
                                 GInt16 *const CPL_RESTRICT pDstData,
2307
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
2308
0
{
2309
0
    GDALCopyWordsByteTo16Bit(pSrcData, nSrcPixelStride, pDstData,
2310
0
                             nDstPixelStride, nWordCount);
2311
0
}
2312
2313
template <class Tout>
2314
void GDALCopyWordsByteTo32Bit(const GByte *const CPL_RESTRICT pSrcData,
2315
                              int nSrcPixelStride,
2316
                              Tout *const CPL_RESTRICT pDstData,
2317
                              int nDstPixelStride, GPtrDiff_t nWordCount)
2318
0
{
2319
0
    static_assert(std::is_integral<Tout>::value &&
2320
0
                      sizeof(Tout) == sizeof(uint32_t),
2321
0
                  "Bad Tout");
2322
0
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2323
0
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2324
0
    {
2325
0
        decltype(nWordCount) n = 0;
2326
0
        const __m128i xmm_zero = _mm_setzero_si128();
2327
0
        GByte *CPL_RESTRICT pabyDstDataPtr =
2328
0
            reinterpret_cast<GByte *>(pDstData);
2329
0
        for (; n < nWordCount - 15; n += 16)
2330
0
        {
2331
0
            __m128i xmm = _mm_loadu_si128(
2332
0
                reinterpret_cast<const __m128i *>(pSrcData + n));
2333
0
            __m128i xmm_low = _mm_unpacklo_epi8(xmm, xmm_zero);
2334
0
            __m128i xmm_high = _mm_unpackhi_epi8(xmm, xmm_zero);
2335
0
            __m128i xmm0 = _mm_unpacklo_epi16(xmm_low, xmm_zero);
2336
0
            __m128i xmm1 = _mm_unpackhi_epi16(xmm_low, xmm_zero);
2337
0
            __m128i xmm2 = _mm_unpacklo_epi16(xmm_high, xmm_zero);
2338
0
            __m128i xmm3 = _mm_unpackhi_epi16(xmm_high, xmm_zero);
2339
0
            _mm_storeu_si128(
2340
0
                reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4), xmm0);
2341
0
            _mm_storeu_si128(
2342
0
                reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4 + 16), xmm1);
2343
0
            _mm_storeu_si128(
2344
0
                reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4 + 32), xmm2);
2345
0
            _mm_storeu_si128(
2346
0
                reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4 + 48), xmm3);
2347
0
        }
2348
0
        for (; n < nWordCount; n++)
2349
0
        {
2350
0
            pDstData[n] = pSrcData[n];
2351
0
        }
2352
0
    }
2353
0
    else
2354
0
    {
2355
0
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2356
0
                              nDstPixelStride, nWordCount);
2357
0
    }
2358
0
}
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsByteTo32Bit<unsigned int>(unsigned char const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsByteTo32Bit<int>(unsigned char const*, int, int*, int, long long)
2359
2360
template <>
2361
CPL_NOINLINE void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2362
                                 int nSrcPixelStride,
2363
                                 GUInt32 *const CPL_RESTRICT pDstData,
2364
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
2365
0
{
2366
0
    GDALCopyWordsByteTo32Bit(pSrcData, nSrcPixelStride, pDstData,
2367
0
                             nDstPixelStride, nWordCount);
2368
0
}
2369
2370
template <>
2371
CPL_NOINLINE void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2372
                                 int nSrcPixelStride,
2373
                                 GInt32 *const CPL_RESTRICT pDstData,
2374
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
2375
0
{
2376
0
    GDALCopyWordsByteTo32Bit(pSrcData, nSrcPixelStride, pDstData,
2377
0
                             nDstPixelStride, nWordCount);
2378
0
}
2379
2380
template <>
2381
CPL_NOINLINE void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2382
                                 int nSrcPixelStride,
2383
                                 float *const CPL_RESTRICT pDstData,
2384
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
2385
0
{
2386
0
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2387
0
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2388
0
    {
2389
0
        decltype(nWordCount) n = 0;
2390
0
        const __m128i xmm_zero = _mm_setzero_si128();
2391
0
        GByte *CPL_RESTRICT pabyDstDataPtr =
2392
0
            reinterpret_cast<GByte *>(pDstData);
2393
0
        for (; n < nWordCount - 15; n += 16)
2394
0
        {
2395
0
            __m128i xmm = _mm_loadu_si128(
2396
0
                reinterpret_cast<const __m128i *>(pSrcData + n));
2397
0
            __m128i xmm_low = _mm_unpacklo_epi8(xmm, xmm_zero);
2398
0
            __m128i xmm_high = _mm_unpackhi_epi8(xmm, xmm_zero);
2399
0
            __m128i xmm0 = _mm_unpacklo_epi16(xmm_low, xmm_zero);
2400
0
            __m128i xmm1 = _mm_unpackhi_epi16(xmm_low, xmm_zero);
2401
0
            __m128i xmm2 = _mm_unpacklo_epi16(xmm_high, xmm_zero);
2402
0
            __m128i xmm3 = _mm_unpackhi_epi16(xmm_high, xmm_zero);
2403
0
            __m128 xmm0_f = _mm_cvtepi32_ps(xmm0);
2404
0
            __m128 xmm1_f = _mm_cvtepi32_ps(xmm1);
2405
0
            __m128 xmm2_f = _mm_cvtepi32_ps(xmm2);
2406
0
            __m128 xmm3_f = _mm_cvtepi32_ps(xmm3);
2407
0
            _mm_storeu_ps(reinterpret_cast<float *>(pabyDstDataPtr + n * 4),
2408
0
                          xmm0_f);
2409
0
            _mm_storeu_ps(
2410
0
                reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 16), xmm1_f);
2411
0
            _mm_storeu_ps(
2412
0
                reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 32), xmm2_f);
2413
0
            _mm_storeu_ps(
2414
0
                reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 48), xmm3_f);
2415
0
        }
2416
0
        for (; n < nWordCount; n++)
2417
0
        {
2418
0
            pDstData[n] = pSrcData[n];
2419
0
        }
2420
0
    }
2421
0
    else
2422
0
    {
2423
0
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2424
0
                              nDstPixelStride, nWordCount);
2425
0
    }
2426
0
}
2427
2428
template <>
2429
CPL_NOINLINE void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2430
                                 int nSrcPixelStride,
2431
                                 double *const CPL_RESTRICT pDstData,
2432
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
2433
0
{
2434
0
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2435
0
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2436
0
    {
2437
0
        decltype(nWordCount) n = 0;
2438
0
        const __m128i xmm_zero = _mm_setzero_si128();
2439
0
        GByte *CPL_RESTRICT pabyDstDataPtr =
2440
0
            reinterpret_cast<GByte *>(pDstData);
2441
0
        for (; n < nWordCount - 15; n += 16)
2442
0
        {
2443
0
            __m128i xmm = _mm_loadu_si128(
2444
0
                reinterpret_cast<const __m128i *>(pSrcData + n));
2445
0
            __m128i xmm_low = _mm_unpacklo_epi8(xmm, xmm_zero);
2446
0
            __m128i xmm_high = _mm_unpackhi_epi8(xmm, xmm_zero);
2447
0
            __m128i xmm0 = _mm_unpacklo_epi16(xmm_low, xmm_zero);
2448
0
            __m128i xmm1 = _mm_unpackhi_epi16(xmm_low, xmm_zero);
2449
0
            __m128i xmm2 = _mm_unpacklo_epi16(xmm_high, xmm_zero);
2450
0
            __m128i xmm3 = _mm_unpackhi_epi16(xmm_high, xmm_zero);
2451
2452
#if defined(__AVX2__) && defined(slightly_slower_than_SSE2)
2453
            _mm256_storeu_pd(reinterpret_cast<double *>(pabyDstDataPtr + n * 8),
2454
                             _mm256_cvtepi32_pd(xmm0));
2455
            _mm256_storeu_pd(
2456
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 32),
2457
                _mm256_cvtepi32_pd(xmm1));
2458
            _mm256_storeu_pd(
2459
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 64),
2460
                _mm256_cvtepi32_pd(xmm2));
2461
            _mm256_storeu_pd(
2462
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 96),
2463
                _mm256_cvtepi32_pd(xmm3));
2464
#else
2465
0
            __m128d xmm0_low_d = _mm_cvtepi32_pd(xmm0);
2466
0
            __m128d xmm1_low_d = _mm_cvtepi32_pd(xmm1);
2467
0
            __m128d xmm2_low_d = _mm_cvtepi32_pd(xmm2);
2468
0
            __m128d xmm3_low_d = _mm_cvtepi32_pd(xmm3);
2469
0
            xmm0 = _mm_srli_si128(xmm0, 8);
2470
0
            xmm1 = _mm_srli_si128(xmm1, 8);
2471
0
            xmm2 = _mm_srli_si128(xmm2, 8);
2472
0
            xmm3 = _mm_srli_si128(xmm3, 8);
2473
0
            __m128d xmm0_high_d = _mm_cvtepi32_pd(xmm0);
2474
0
            __m128d xmm1_high_d = _mm_cvtepi32_pd(xmm1);
2475
0
            __m128d xmm2_high_d = _mm_cvtepi32_pd(xmm2);
2476
0
            __m128d xmm3_high_d = _mm_cvtepi32_pd(xmm3);
2477
2478
0
            _mm_storeu_pd(reinterpret_cast<double *>(pabyDstDataPtr + n * 8),
2479
0
                          xmm0_low_d);
2480
0
            _mm_storeu_pd(
2481
0
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 16),
2482
0
                xmm0_high_d);
2483
0
            _mm_storeu_pd(
2484
0
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 32),
2485
0
                xmm1_low_d);
2486
0
            _mm_storeu_pd(
2487
0
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 48),
2488
0
                xmm1_high_d);
2489
0
            _mm_storeu_pd(
2490
0
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 64),
2491
0
                xmm2_low_d);
2492
0
            _mm_storeu_pd(
2493
0
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 80),
2494
0
                xmm2_high_d);
2495
0
            _mm_storeu_pd(
2496
0
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 96),
2497
0
                xmm3_low_d);
2498
0
            _mm_storeu_pd(
2499
0
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 112),
2500
0
                xmm3_high_d);
2501
0
#endif
2502
0
        }
2503
0
        for (; n < nWordCount; n++)
2504
0
        {
2505
0
            pDstData[n] = pSrcData[n];
2506
0
        }
2507
0
    }
2508
0
    else
2509
0
    {
2510
0
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2511
0
                              nDstPixelStride, nWordCount);
2512
0
    }
2513
0
}
2514
2515
template <>
2516
CPL_NOINLINE void GDALCopyWordsT(const uint8_t *const CPL_RESTRICT pSrcData,
2517
                                 int nSrcPixelStride,
2518
                                 int8_t *const CPL_RESTRICT pDstData,
2519
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
2520
0
{
2521
0
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2522
0
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2523
0
    {
2524
0
        decltype(nWordCount) n = 0;
2525
0
        const __m128i xmm_127 = _mm_set1_epi8(127);
2526
0
        for (; n < nWordCount - 31; n += 32)
2527
0
        {
2528
0
            __m128i xmm0 = _mm_loadu_si128(
2529
0
                reinterpret_cast<const __m128i *>(pSrcData + n));
2530
0
            __m128i xmm1 = _mm_loadu_si128(
2531
0
                reinterpret_cast<const __m128i *>(pSrcData + n + 16));
2532
0
            xmm0 = _mm_min_epu8(xmm0, xmm_127);
2533
0
            xmm1 = _mm_min_epu8(xmm1, xmm_127);
2534
0
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n), xmm0);
2535
0
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n + 16),
2536
0
                             xmm1);
2537
0
        }
2538
0
        for (; n < nWordCount; n++)
2539
0
        {
2540
0
            pDstData[n] =
2541
0
                pSrcData[n] >= 127 ? 127 : static_cast<int8_t>(pSrcData[n]);
2542
0
        }
2543
0
    }
2544
0
    else
2545
0
    {
2546
0
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2547
0
                              nDstPixelStride, nWordCount);
2548
0
    }
2549
0
}
2550
2551
template <>
2552
CPL_NOINLINE void GDALCopyWordsT(const int8_t *const CPL_RESTRICT pSrcData,
2553
                                 int nSrcPixelStride,
2554
                                 uint8_t *const CPL_RESTRICT pDstData,
2555
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
2556
0
{
2557
0
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2558
0
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2559
0
    {
2560
0
        decltype(nWordCount) n = 0;
2561
0
#if !(defined(__SSE4_1__) || defined(USE_NEON_OPTIMIZATIONS))
2562
0
        const __m128i xmm_INT8_to_UINT8 = _mm_set1_epi8(-128);
2563
0
#endif
2564
0
        for (; n < nWordCount - 31; n += 32)
2565
0
        {
2566
0
            __m128i xmm0 = _mm_loadu_si128(
2567
0
                reinterpret_cast<const __m128i *>(pSrcData + n));
2568
0
            __m128i xmm1 = _mm_loadu_si128(
2569
0
                reinterpret_cast<const __m128i *>(pSrcData + n + 16));
2570
#if defined(__SSE4_1__) || defined(USE_NEON_OPTIMIZATIONS)
2571
            xmm0 = _mm_max_epi8(xmm0, _mm_setzero_si128());
2572
            xmm1 = _mm_max_epi8(xmm1, _mm_setzero_si128());
2573
#else
2574
0
            xmm0 = _mm_add_epi8(xmm0, xmm_INT8_to_UINT8);
2575
0
            xmm1 = _mm_add_epi8(xmm1, xmm_INT8_to_UINT8);
2576
0
            xmm0 = _mm_max_epu8(xmm0, xmm_INT8_to_UINT8);
2577
0
            xmm1 = _mm_max_epu8(xmm1, xmm_INT8_to_UINT8);
2578
0
            xmm0 = _mm_sub_epi8(xmm0, xmm_INT8_to_UINT8);
2579
0
            xmm1 = _mm_sub_epi8(xmm1, xmm_INT8_to_UINT8);
2580
0
#endif
2581
0
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n), xmm0);
2582
0
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n + 16),
2583
0
                             xmm1);
2584
0
        }
2585
0
        for (; n < nWordCount; n++)
2586
0
        {
2587
0
            pDstData[n] =
2588
0
                pSrcData[n] < 0 ? 0 : static_cast<uint8_t>(pSrcData[n]);
2589
0
        }
2590
0
    }
2591
0
    else
2592
0
    {
2593
0
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2594
0
                              nDstPixelStride, nWordCount);
2595
0
    }
2596
0
}
2597
2598
template <>
2599
CPL_NOINLINE void GDALCopyWordsT(const uint16_t *const CPL_RESTRICT pSrcData,
2600
                                 int nSrcPixelStride,
2601
                                 uint8_t *const CPL_RESTRICT pDstData,
2602
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
2603
0
{
2604
0
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2605
0
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2606
0
    {
2607
0
        decltype(nWordCount) n = 0;
2608
#if defined(__SSE4_1__) || defined(USE_NEON_OPTIMIZATIONS)
2609
        const auto xmm_MAX_INT16 = _mm_set1_epi16(32767);
2610
#else
2611
        // In SSE2, min_epu16 does not exist, so shift from
2612
        // UInt16 to SInt16 to be able to use min_epi16
2613
0
        const __m128i xmm_UINT16_to_INT16 = _mm_set1_epi16(-32768);
2614
0
        const __m128i xmm_m255_shifted = _mm_set1_epi16(255 - 32768);
2615
0
#endif
2616
0
        for (; n < nWordCount - 15; n += 16)
2617
0
        {
2618
0
            __m128i xmm0 = _mm_loadu_si128(
2619
0
                reinterpret_cast<const __m128i *>(pSrcData + n));
2620
0
            __m128i xmm1 = _mm_loadu_si128(
2621
0
                reinterpret_cast<const __m128i *>(pSrcData + n + 8));
2622
#if defined(__SSE4_1__) || defined(USE_NEON_OPTIMIZATIONS)
2623
            xmm0 = _mm_min_epu16(xmm0, xmm_MAX_INT16);
2624
            xmm1 = _mm_min_epu16(xmm1, xmm_MAX_INT16);
2625
#else
2626
0
            xmm0 = _mm_add_epi16(xmm0, xmm_UINT16_to_INT16);
2627
0
            xmm1 = _mm_add_epi16(xmm1, xmm_UINT16_to_INT16);
2628
0
            xmm0 = _mm_min_epi16(xmm0, xmm_m255_shifted);
2629
0
            xmm1 = _mm_min_epi16(xmm1, xmm_m255_shifted);
2630
0
            xmm0 = _mm_sub_epi16(xmm0, xmm_UINT16_to_INT16);
2631
0
            xmm1 = _mm_sub_epi16(xmm1, xmm_UINT16_to_INT16);
2632
0
#endif
2633
0
            xmm0 = _mm_packus_epi16(xmm0, xmm1);
2634
0
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n), xmm0);
2635
0
        }
2636
0
        for (; n < nWordCount; n++)
2637
0
        {
2638
0
            pDstData[n] =
2639
0
                pSrcData[n] >= 255 ? 255 : static_cast<uint8_t>(pSrcData[n]);
2640
0
        }
2641
0
    }
2642
0
    else
2643
0
    {
2644
0
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2645
0
                              nDstPixelStride, nWordCount);
2646
0
    }
2647
0
}
2648
2649
template <>
2650
CPL_NOINLINE void GDALCopyWordsT(const uint16_t *const CPL_RESTRICT pSrcData,
2651
                                 int nSrcPixelStride,
2652
                                 int16_t *const CPL_RESTRICT pDstData,
2653
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
2654
0
{
2655
0
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2656
0
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2657
0
    {
2658
0
        decltype(nWordCount) n = 0;
2659
#if defined(__SSE4_1__) || defined(USE_NEON_OPTIMIZATIONS)
2660
        const __m128i xmm_MAX_INT16 = _mm_set1_epi16(32767);
2661
#else
2662
        // In SSE2, min_epu16 does not exist, so shift from
2663
        // UInt16 to SInt16 to be able to use min_epi16
2664
0
        const __m128i xmm_UINT16_to_INT16 = _mm_set1_epi16(-32768);
2665
0
        const __m128i xmm_32767_shifted = _mm_set1_epi16(32767 - 32768);
2666
0
#endif
2667
0
        for (; n < nWordCount - 15; n += 16)
2668
0
        {
2669
0
            __m128i xmm0 = _mm_loadu_si128(
2670
0
                reinterpret_cast<const __m128i *>(pSrcData + n));
2671
0
            __m128i xmm1 = _mm_loadu_si128(
2672
0
                reinterpret_cast<const __m128i *>(pSrcData + n + 8));
2673
#if defined(__SSE4_1__) || defined(USE_NEON_OPTIMIZATIONS)
2674
            xmm0 = _mm_min_epu16(xmm0, xmm_MAX_INT16);
2675
            xmm1 = _mm_min_epu16(xmm1, xmm_MAX_INT16);
2676
#else
2677
0
            xmm0 = _mm_add_epi16(xmm0, xmm_UINT16_to_INT16);
2678
0
            xmm1 = _mm_add_epi16(xmm1, xmm_UINT16_to_INT16);
2679
0
            xmm0 = _mm_min_epi16(xmm0, xmm_32767_shifted);
2680
0
            xmm1 = _mm_min_epi16(xmm1, xmm_32767_shifted);
2681
0
            xmm0 = _mm_sub_epi16(xmm0, xmm_UINT16_to_INT16);
2682
0
            xmm1 = _mm_sub_epi16(xmm1, xmm_UINT16_to_INT16);
2683
0
#endif
2684
0
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n), xmm0);
2685
0
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n + 8),
2686
0
                             xmm1);
2687
0
        }
2688
0
        for (; n < nWordCount; n++)
2689
0
        {
2690
0
            pDstData[n] = pSrcData[n] >= 32767
2691
0
                              ? 32767
2692
0
                              : static_cast<int16_t>(pSrcData[n]);
2693
0
        }
2694
0
    }
2695
0
    else
2696
0
    {
2697
0
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2698
0
                              nDstPixelStride, nWordCount);
2699
0
    }
2700
0
}
2701
2702
template <>
2703
CPL_NOINLINE void GDALCopyWordsT(const int16_t *const CPL_RESTRICT pSrcData,
2704
                                 int nSrcPixelStride,
2705
                                 uint16_t *const CPL_RESTRICT pDstData,
2706
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
2707
0
{
2708
0
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2709
0
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2710
0
    {
2711
0
        decltype(nWordCount) n = 0;
2712
0
        const __m128i xmm_zero = _mm_setzero_si128();
2713
0
        for (; n < nWordCount - 15; n += 16)
2714
0
        {
2715
0
            __m128i xmm0 = _mm_loadu_si128(
2716
0
                reinterpret_cast<const __m128i *>(pSrcData + n));
2717
0
            __m128i xmm1 = _mm_loadu_si128(
2718
0
                reinterpret_cast<const __m128i *>(pSrcData + n + 8));
2719
0
            xmm0 = _mm_max_epi16(xmm0, xmm_zero);
2720
0
            xmm1 = _mm_max_epi16(xmm1, xmm_zero);
2721
0
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n), xmm0);
2722
0
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n + 8),
2723
0
                             xmm1);
2724
0
        }
2725
0
        for (; n < nWordCount; n++)
2726
0
        {
2727
0
            pDstData[n] =
2728
0
                pSrcData[n] < 0 ? 0 : static_cast<uint16_t>(pSrcData[n]);
2729
0
        }
2730
0
    }
2731
0
    else
2732
0
    {
2733
0
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2734
0
                              nDstPixelStride, nWordCount);
2735
0
    }
2736
0
}
2737
2738
#if defined(__SSE4_1__) || defined(USE_NEON_OPTIMIZATIONS)
2739
2740
template <>
2741
CPL_NOINLINE void GDALCopyWordsT(const uint32_t *const CPL_RESTRICT pSrcData,
2742
                                 int nSrcPixelStride,
2743
                                 int32_t *const CPL_RESTRICT pDstData,
2744
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
2745
{
2746
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2747
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2748
    {
2749
        decltype(nWordCount) n = 0;
2750
        const __m128i xmm_MAX_INT = _mm_set1_epi32(INT_MAX);
2751
        for (; n < nWordCount - 8; n += 7)
2752
        {
2753
            __m128i xmm0 = _mm_loadu_si128(
2754
                reinterpret_cast<const __m128i *>(pSrcData + n));
2755
            __m128i xmm1 = _mm_loadu_si128(
2756
                reinterpret_cast<const __m128i *>(pSrcData + n + 4));
2757
            xmm0 = _mm_min_epu32(xmm0, xmm_MAX_INT);
2758
            xmm1 = _mm_min_epu32(xmm1, xmm_MAX_INT);
2759
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n), xmm0);
2760
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n + 4),
2761
                             xmm1);
2762
        }
2763
        for (; n < nWordCount; n++)
2764
        {
2765
            pDstData[n] = pSrcData[n] >= INT_MAX
2766
                              ? INT_MAX
2767
                              : static_cast<int32_t>(pSrcData[n]);
2768
        }
2769
    }
2770
    else
2771
    {
2772
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2773
                              nDstPixelStride, nWordCount);
2774
    }
2775
}
2776
2777
template <>
2778
CPL_NOINLINE void GDALCopyWordsT(const int32_t *const CPL_RESTRICT pSrcData,
2779
                                 int nSrcPixelStride,
2780
                                 uint32_t *const CPL_RESTRICT pDstData,
2781
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
2782
{
2783
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2784
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2785
    {
2786
        decltype(nWordCount) n = 0;
2787
        const __m128i xmm_zero = _mm_setzero_si128();
2788
        for (; n < nWordCount - 7; n += 8)
2789
        {
2790
            __m128i xmm0 = _mm_loadu_si128(
2791
                reinterpret_cast<const __m128i *>(pSrcData + n));
2792
            __m128i xmm1 = _mm_loadu_si128(
2793
                reinterpret_cast<const __m128i *>(pSrcData + n + 4));
2794
            xmm0 = _mm_max_epi32(xmm0, xmm_zero);
2795
            xmm1 = _mm_max_epi32(xmm1, xmm_zero);
2796
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n), xmm0);
2797
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n + 4),
2798
                             xmm1);
2799
        }
2800
        for (; n < nWordCount; n++)
2801
        {
2802
            pDstData[n] =
2803
                pSrcData[n] < 0 ? 0 : static_cast<uint32_t>(pSrcData[n]);
2804
        }
2805
    }
2806
    else
2807
    {
2808
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2809
                              nDstPixelStride, nWordCount);
2810
    }
2811
}
2812
2813
#endif  // defined(__SSE4_1__) || defined(USE_NEON_OPTIMIZATIONS)
2814
2815
template <>
2816
CPL_NOINLINE void GDALCopyWordsT(const uint16_t *const CPL_RESTRICT pSrcData,
2817
                                 int nSrcPixelStride,
2818
                                 float *const CPL_RESTRICT pDstData,
2819
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
2820
0
{
2821
0
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2822
0
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2823
0
    {
2824
0
        decltype(nWordCount) n = 0;
2825
0
        const __m128i xmm_zero = _mm_setzero_si128();
2826
0
        GByte *CPL_RESTRICT pabyDstDataPtr =
2827
0
            reinterpret_cast<GByte *>(pDstData);
2828
0
        for (; n < nWordCount - 7; n += 8)
2829
0
        {
2830
0
            __m128i xmm = _mm_loadu_si128(
2831
0
                reinterpret_cast<const __m128i *>(pSrcData + n));
2832
0
            __m128i xmm0 = _mm_unpacklo_epi16(xmm, xmm_zero);
2833
0
            __m128i xmm1 = _mm_unpackhi_epi16(xmm, xmm_zero);
2834
0
            __m128 xmm0_f = _mm_cvtepi32_ps(xmm0);
2835
0
            __m128 xmm1_f = _mm_cvtepi32_ps(xmm1);
2836
0
            _mm_storeu_ps(reinterpret_cast<float *>(pabyDstDataPtr + n * 4),
2837
0
                          xmm0_f);
2838
0
            _mm_storeu_ps(
2839
0
                reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 16), xmm1_f);
2840
0
        }
2841
0
        for (; n < nWordCount; n++)
2842
0
        {
2843
0
            pDstData[n] = pSrcData[n];
2844
0
        }
2845
0
    }
2846
0
    else
2847
0
    {
2848
0
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2849
0
                              nDstPixelStride, nWordCount);
2850
0
    }
2851
0
}
2852
2853
template <>
2854
CPL_NOINLINE void GDALCopyWordsT(const int16_t *const CPL_RESTRICT pSrcData,
2855
                                 int nSrcPixelStride,
2856
                                 float *const CPL_RESTRICT pDstData,
2857
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
2858
0
{
2859
0
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2860
0
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2861
0
    {
2862
0
        decltype(nWordCount) n = 0;
2863
0
        GByte *CPL_RESTRICT pabyDstDataPtr =
2864
0
            reinterpret_cast<GByte *>(pDstData);
2865
0
        for (; n < nWordCount - 7; n += 8)
2866
0
        {
2867
0
            __m128i xmm = _mm_loadu_si128(
2868
0
                reinterpret_cast<const __m128i *>(pSrcData + n));
2869
0
            const auto sign = _mm_srai_epi16(xmm, 15);
2870
0
            __m128i xmm0 = _mm_unpacklo_epi16(xmm, sign);
2871
0
            __m128i xmm1 = _mm_unpackhi_epi16(xmm, sign);
2872
0
            __m128 xmm0_f = _mm_cvtepi32_ps(xmm0);
2873
0
            __m128 xmm1_f = _mm_cvtepi32_ps(xmm1);
2874
0
            _mm_storeu_ps(reinterpret_cast<float *>(pabyDstDataPtr + n * 4),
2875
0
                          xmm0_f);
2876
0
            _mm_storeu_ps(
2877
0
                reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 16), xmm1_f);
2878
0
        }
2879
0
        for (; n < nWordCount; n++)
2880
0
        {
2881
0
            pDstData[n] = pSrcData[n];
2882
0
        }
2883
0
    }
2884
0
    else
2885
0
    {
2886
0
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2887
0
                              nDstPixelStride, nWordCount);
2888
0
    }
2889
0
}
2890
2891
template <>
2892
CPL_NOINLINE void GDALCopyWordsT(const uint16_t *const CPL_RESTRICT pSrcData,
2893
                                 int nSrcPixelStride,
2894
                                 double *const CPL_RESTRICT pDstData,
2895
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
2896
0
{
2897
0
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2898
0
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2899
0
    {
2900
0
        decltype(nWordCount) n = 0;
2901
0
        const __m128i xmm_zero = _mm_setzero_si128();
2902
0
        GByte *CPL_RESTRICT pabyDstDataPtr =
2903
0
            reinterpret_cast<GByte *>(pDstData);
2904
0
        for (; n < nWordCount - 7; n += 8)
2905
0
        {
2906
0
            __m128i xmm = _mm_loadu_si128(
2907
0
                reinterpret_cast<const __m128i *>(pSrcData + n));
2908
0
            __m128i xmm0 = _mm_unpacklo_epi16(xmm, xmm_zero);
2909
0
            __m128i xmm1 = _mm_unpackhi_epi16(xmm, xmm_zero);
2910
2911
0
            __m128d xmm0_low_d = _mm_cvtepi32_pd(xmm0);
2912
0
            __m128d xmm1_low_d = _mm_cvtepi32_pd(xmm1);
2913
0
            xmm0 = _mm_srli_si128(xmm0, 8);
2914
0
            xmm1 = _mm_srli_si128(xmm1, 8);
2915
0
            __m128d xmm0_high_d = _mm_cvtepi32_pd(xmm0);
2916
0
            __m128d xmm1_high_d = _mm_cvtepi32_pd(xmm1);
2917
2918
0
            _mm_storeu_pd(reinterpret_cast<double *>(pabyDstDataPtr + n * 8),
2919
0
                          xmm0_low_d);
2920
0
            _mm_storeu_pd(
2921
0
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 16),
2922
0
                xmm0_high_d);
2923
0
            _mm_storeu_pd(
2924
0
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 32),
2925
0
                xmm1_low_d);
2926
0
            _mm_storeu_pd(
2927
0
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 48),
2928
0
                xmm1_high_d);
2929
0
        }
2930
0
        for (; n < nWordCount; n++)
2931
0
        {
2932
0
            pDstData[n] = pSrcData[n];
2933
0
        }
2934
0
    }
2935
0
    else
2936
0
    {
2937
0
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2938
0
                              nDstPixelStride, nWordCount);
2939
0
    }
2940
0
}
2941
2942
template <>
2943
CPL_NOINLINE void GDALCopyWordsT(const int16_t *const CPL_RESTRICT pSrcData,
2944
                                 int nSrcPixelStride,
2945
                                 double *const CPL_RESTRICT pDstData,
2946
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
2947
0
{
2948
0
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2949
0
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2950
0
    {
2951
0
        decltype(nWordCount) n = 0;
2952
0
        GByte *CPL_RESTRICT pabyDstDataPtr =
2953
0
            reinterpret_cast<GByte *>(pDstData);
2954
0
        for (; n < nWordCount - 7; n += 8)
2955
0
        {
2956
0
            __m128i xmm = _mm_loadu_si128(
2957
0
                reinterpret_cast<const __m128i *>(pSrcData + n));
2958
0
            const auto sign = _mm_srai_epi16(xmm, 15);
2959
0
            __m128i xmm0 = _mm_unpacklo_epi16(xmm, sign);
2960
0
            __m128i xmm1 = _mm_unpackhi_epi16(xmm, sign);
2961
2962
0
            __m128d xmm0_low_d = _mm_cvtepi32_pd(xmm0);
2963
0
            __m128d xmm1_low_d = _mm_cvtepi32_pd(xmm1);
2964
0
            xmm0 = _mm_srli_si128(xmm0, 8);
2965
0
            xmm1 = _mm_srli_si128(xmm1, 8);
2966
0
            __m128d xmm0_high_d = _mm_cvtepi32_pd(xmm0);
2967
0
            __m128d xmm1_high_d = _mm_cvtepi32_pd(xmm1);
2968
2969
0
            _mm_storeu_pd(reinterpret_cast<double *>(pabyDstDataPtr + n * 8),
2970
0
                          xmm0_low_d);
2971
0
            _mm_storeu_pd(
2972
0
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 16),
2973
0
                xmm0_high_d);
2974
0
            _mm_storeu_pd(
2975
0
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 32),
2976
0
                xmm1_low_d);
2977
0
            _mm_storeu_pd(
2978
0
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 48),
2979
0
                xmm1_high_d);
2980
0
        }
2981
0
        for (; n < nWordCount; n++)
2982
0
        {
2983
0
            pDstData[n] = pSrcData[n];
2984
0
        }
2985
0
    }
2986
0
    else
2987
0
    {
2988
0
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2989
0
                              nDstPixelStride, nWordCount);
2990
0
    }
2991
0
}
2992
2993
template <>
2994
CPL_NOINLINE void GDALCopyWordsT(const double *const CPL_RESTRICT pSrcData,
2995
                                 int nSrcPixelStride,
2996
                                 GByte *const CPL_RESTRICT pDstData,
2997
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
2998
0
{
2999
0
    GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
3000
0
                            nDstPixelStride, nWordCount);
3001
0
}
3002
3003
template <>
3004
CPL_NOINLINE void GDALCopyWordsT(const double *const CPL_RESTRICT pSrcData,
3005
                                 int nSrcPixelStride,
3006
                                 GUInt16 *const CPL_RESTRICT pDstData,
3007
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
3008
0
{
3009
0
    GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
3010
0
                            nDstPixelStride, nWordCount);
3011
0
}
3012
3013
template <>
3014
CPL_NOINLINE void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData,
3015
                                 int nSrcPixelStride,
3016
                                 double *const CPL_RESTRICT pDstData,
3017
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
3018
0
{
3019
0
    GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
3020
0
                            nDstPixelStride, nWordCount);
3021
0
}
3022
3023
template <>
3024
CPL_NOINLINE void GDALCopyWordsT(const double *const CPL_RESTRICT pSrcData,
3025
                                 int nSrcPixelStride,
3026
                                 float *const CPL_RESTRICT pDstData,
3027
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
3028
0
{
3029
0
    GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
3030
0
                            nDstPixelStride, nWordCount);
3031
0
}
3032
3033
template <>
3034
CPL_NOINLINE void GDALCopyWordsT(const GFloat16 *const CPL_RESTRICT pSrcData,
3035
                                 int nSrcPixelStride,
3036
                                 float *const CPL_RESTRICT pDstData,
3037
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
3038
0
{
3039
0
    GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
3040
0
                            nDstPixelStride, nWordCount);
3041
0
}
3042
3043
template <>
3044
CPL_NOINLINE void GDALCopyWordsT(const GFloat16 *const CPL_RESTRICT pSrcData,
3045
                                 int nSrcPixelStride,
3046
                                 double *const CPL_RESTRICT pDstData,
3047
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
3048
0
{
3049
0
    GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
3050
0
                            nDstPixelStride, nWordCount);
3051
0
}
3052
3053
#ifdef __F16C__
3054
3055
template <>
3056
CPL_NOINLINE void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData,
3057
                                 int nSrcPixelStride,
3058
                                 GFloat16 *const CPL_RESTRICT pDstData,
3059
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
3060
{
3061
    GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
3062
                            nDstPixelStride, nWordCount);
3063
}
3064
3065
template <>
3066
CPL_NOINLINE void GDALCopyWordsT(const double *const CPL_RESTRICT pSrcData,
3067
                                 int nSrcPixelStride,
3068
                                 GFloat16 *const CPL_RESTRICT pDstData,
3069
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
3070
{
3071
    GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
3072
                            nDstPixelStride, nWordCount);
3073
}
3074
3075
#endif  // __F16C__
3076
3077
#endif  // HAVE_SSE2
3078
3079
template <>
3080
CPL_NOINLINE void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData,
3081
                                 int nSrcPixelStride,
3082
                                 GByte *const CPL_RESTRICT pDstData,
3083
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
3084
0
{
3085
0
    GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
3086
0
                            nDstPixelStride, nWordCount);
3087
0
}
3088
3089
template <>
3090
CPL_NOINLINE void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData,
3091
                                 int nSrcPixelStride,
3092
                                 GInt16 *const CPL_RESTRICT pDstData,
3093
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
3094
0
{
3095
0
    GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
3096
0
                            nDstPixelStride, nWordCount);
3097
0
}
3098
3099
template <>
3100
CPL_NOINLINE void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData,
3101
                                 int nSrcPixelStride,
3102
                                 GUInt16 *const CPL_RESTRICT pDstData,
3103
                                 int nDstPixelStride, GPtrDiff_t nWordCount)
3104
0
{
3105
0
    GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
3106
0
                            nDstPixelStride, nWordCount);
3107
0
}
3108
3109
/************************************************************************/
3110
/*                   GDALCopyWordsComplexT()                            */
3111
/************************************************************************/
3112
/**
3113
 * Template function, used to copy data from pSrcData into buffer
3114
 * pDstData, with stride nSrcPixelStride in the source data and
3115
 * stride nDstPixelStride in the destination data. Deals with the
3116
 * complex case, where input is complex and output is complex.
3117
 *
3118
 * @param pSrcData the source data buffer
3119
 * @param nSrcPixelStride the stride, in the buffer pSrcData for pixels
3120
 *                      of interest.
3121
 * @param pDstData the destination buffer.
3122
 * @param nDstPixelStride the stride in the buffer pDstData for pixels of
3123
 *                      interest.
3124
 * @param nWordCount the total number of pixel words to copy
3125
 *
3126
 */
3127
template <class Tin, class Tout>
3128
inline void GDALCopyWordsComplexT(const Tin *const CPL_RESTRICT pSrcData,
3129
                                  int nSrcPixelStride,
3130
                                  Tout *const CPL_RESTRICT pDstData,
3131
                                  int nDstPixelStride, GPtrDiff_t nWordCount)
3132
0
{
3133
0
    decltype(nWordCount) nDstOffset = 0;
3134
0
    const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData);
3135
0
    char *const pDstDataPtr = reinterpret_cast<char *>(pDstData);
3136
3137
0
    for (decltype(nWordCount) n = 0; n < nWordCount; n++)
3138
0
    {
3139
0
        const Tin *const pPixelIn =
3140
0
            reinterpret_cast<const Tin *>(pSrcDataPtr + n * nSrcPixelStride);
3141
0
        Tout *const pPixelOut =
3142
0
            reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
3143
3144
0
        GDALCopyWord(pPixelIn[0], pPixelOut[0]);
3145
0
        GDALCopyWord(pPixelIn[1], pPixelOut[1]);
3146
3147
0
        nDstOffset += nDstPixelStride;
3148
0
    }
3149
0
}
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned char, short>(unsigned char const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned char, int>(unsigned char const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned char, cpl::Float16>(unsigned char const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned char, float>(unsigned char const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned char, double>(unsigned char const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<signed char, short>(signed char const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<signed char, int>(signed char const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<signed char, cpl::Float16>(signed char const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<signed char, float>(signed char const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<signed char, double>(signed char const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned short, short>(unsigned short const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned short, int>(unsigned short const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned short, cpl::Float16>(unsigned short const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned short, float>(unsigned short const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned short, double>(unsigned short const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<short, short>(short const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<short, int>(short const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<short, cpl::Float16>(short const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<short, float>(short const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<short, double>(short const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned int, short>(unsigned int const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned int, int>(unsigned int const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned int, cpl::Float16>(unsigned int const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned int, float>(unsigned int const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned int, double>(unsigned int const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<int, short>(int const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<int, int>(int const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<int, cpl::Float16>(int const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<int, float>(int const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<int, double>(int const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned long, short>(unsigned long const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned long, int>(unsigned long const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned long, cpl::Float16>(unsigned long const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned long, float>(unsigned long const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned long, double>(unsigned long const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<long, short>(long const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<long, int>(long const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<long, cpl::Float16>(long const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<long, float>(long const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<long, double>(long const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<cpl::Float16, short>(cpl::Float16 const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<cpl::Float16, int>(cpl::Float16 const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<cpl::Float16, cpl::Float16>(cpl::Float16 const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<cpl::Float16, float>(cpl::Float16 const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<cpl::Float16, double>(cpl::Float16 const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<float, short>(float const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<float, int>(float const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<float, cpl::Float16>(float const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<float, float>(float const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<float, double>(float const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<double, short>(double const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<double, int>(double const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<double, cpl::Float16>(double const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<double, float>(double const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<double, double>(double const*, int, double*, int, long long)
3150
3151
/************************************************************************/
3152
/*                   GDALCopyWordsComplexOutT()                         */
3153
/************************************************************************/
3154
/**
3155
 * Template function, used to copy data from pSrcData into buffer
3156
 * pDstData, with stride nSrcPixelStride in the source data and
3157
 * stride nDstPixelStride in the destination data. Deals with the
3158
 * case where the value is real coming in, but complex going out.
3159
 *
3160
 * @param pSrcData the source data buffer
3161
 * @param nSrcPixelStride the stride, in the buffer pSrcData for pixels
3162
 *                      of interest, in bytes.
3163
 * @param pDstData the destination buffer.
3164
 * @param nDstPixelStride the stride in the buffer pDstData for pixels of
3165
 *                      interest, in bytes.
3166
 * @param nWordCount the total number of pixel words to copy
3167
 *
3168
 */
3169
template <class Tin, class Tout>
3170
inline void GDALCopyWordsComplexOutT(const Tin *const CPL_RESTRICT pSrcData,
3171
                                     int nSrcPixelStride,
3172
                                     Tout *const CPL_RESTRICT pDstData,
3173
                                     int nDstPixelStride, GPtrDiff_t nWordCount)
3174
0
{
3175
0
    decltype(nWordCount) nDstOffset = 0;
3176
3177
0
    const Tout tOutZero = static_cast<Tout>(0);
3178
3179
0
    const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData);
3180
0
    char *const pDstDataPtr = reinterpret_cast<char *>(pDstData);
3181
3182
0
    for (decltype(nWordCount) n = 0; n < nWordCount; n++)
3183
0
    {
3184
0
        const Tin tValue =
3185
0
            *reinterpret_cast<const Tin *>(pSrcDataPtr + n * nSrcPixelStride);
3186
0
        Tout *const pPixelOut =
3187
0
            reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
3188
0
        GDALCopyWord(tValue, *pPixelOut);
3189
3190
0
        pPixelOut[1] = tOutZero;
3191
3192
0
        nDstOffset += nDstPixelStride;
3193
0
    }
3194
0
}
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned char, short>(unsigned char const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned char, int>(unsigned char const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned char, cpl::Float16>(unsigned char const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned char, float>(unsigned char const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned char, double>(unsigned char const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<signed char, short>(signed char const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<signed char, int>(signed char const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<signed char, cpl::Float16>(signed char const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<signed char, float>(signed char const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<signed char, double>(signed char const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned short, short>(unsigned short const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned short, int>(unsigned short const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned short, cpl::Float16>(unsigned short const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned short, float>(unsigned short const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned short, double>(unsigned short const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<short, short>(short const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<short, int>(short const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<short, cpl::Float16>(short const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<short, float>(short const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<short, double>(short const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned int, short>(unsigned int const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned int, int>(unsigned int const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned int, cpl::Float16>(unsigned int const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned int, float>(unsigned int const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned int, double>(unsigned int const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<int, short>(int const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<int, int>(int const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<int, cpl::Float16>(int const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<int, float>(int const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<int, double>(int const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned long, short>(unsigned long const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned long, int>(unsigned long const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned long, cpl::Float16>(unsigned long const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned long, float>(unsigned long const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned long, double>(unsigned long const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<long, short>(long const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<long, int>(long const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<long, cpl::Float16>(long const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<long, float>(long const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<long, double>(long const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<cpl::Float16, short>(cpl::Float16 const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<cpl::Float16, int>(cpl::Float16 const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<cpl::Float16, cpl::Float16>(cpl::Float16 const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<cpl::Float16, float>(cpl::Float16 const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<cpl::Float16, double>(cpl::Float16 const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<float, short>(float const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<float, int>(float const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<float, cpl::Float16>(float const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<float, float>(float const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<float, double>(float const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<double, short>(double const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<double, int>(double const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<double, cpl::Float16>(double const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<double, float>(double const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<double, double>(double const*, int, double*, int, long long)
3195
3196
/************************************************************************/
3197
/*                           GDALCopyWordsFromT()                       */
3198
/************************************************************************/
3199
/**
3200
 * Template driver function. Given the input type T, call the appropriate
3201
 * GDALCopyWordsT function template for the desired output type. You should
3202
 * never call this function directly (call GDALCopyWords instead).
3203
 *
3204
 * @param pSrcData source data buffer
3205
 * @param nSrcPixelStride pixel stride in input buffer, in pixel words
3206
 * @param bInComplex input is complex
3207
 * @param pDstData destination data buffer
3208
 * @param eDstType destination data type
3209
 * @param nDstPixelStride pixel stride in output buffer, in pixel words
3210
 * @param nWordCount number of pixel words to be copied
3211
 */
3212
template <class T>
3213
inline void GDALCopyWordsFromT(const T *const CPL_RESTRICT pSrcData,
3214
                               int nSrcPixelStride, bool bInComplex,
3215
                               void *CPL_RESTRICT pDstData,
3216
                               GDALDataType eDstType, int nDstPixelStride,
3217
                               GPtrDiff_t nWordCount)
3218
0
{
3219
0
    switch (eDstType)
3220
0
    {
3221
0
        case GDT_Byte:
3222
0
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
3223
0
                           static_cast<unsigned char *>(pDstData),
3224
0
                           nDstPixelStride, nWordCount);
3225
0
            break;
3226
0
        case GDT_Int8:
3227
0
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
3228
0
                           static_cast<signed char *>(pDstData),
3229
0
                           nDstPixelStride, nWordCount);
3230
0
            break;
3231
0
        case GDT_UInt16:
3232
0
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
3233
0
                           static_cast<unsigned short *>(pDstData),
3234
0
                           nDstPixelStride, nWordCount);
3235
0
            break;
3236
0
        case GDT_Int16:
3237
0
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
3238
0
                           static_cast<short *>(pDstData), nDstPixelStride,
3239
0
                           nWordCount);
3240
0
            break;
3241
0
        case GDT_UInt32:
3242
0
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
3243
0
                           static_cast<unsigned int *>(pDstData),
3244
0
                           nDstPixelStride, nWordCount);
3245
0
            break;
3246
0
        case GDT_Int32:
3247
0
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
3248
0
                           static_cast<int *>(pDstData), nDstPixelStride,
3249
0
                           nWordCount);
3250
0
            break;
3251
0
        case GDT_UInt64:
3252
0
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
3253
0
                           static_cast<std::uint64_t *>(pDstData),
3254
0
                           nDstPixelStride, nWordCount);
3255
0
            break;
3256
0
        case GDT_Int64:
3257
0
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
3258
0
                           static_cast<std::int64_t *>(pDstData),
3259
0
                           nDstPixelStride, nWordCount);
3260
0
            break;
3261
0
        case GDT_Float16:
3262
0
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
3263
0
                           static_cast<GFloat16 *>(pDstData), nDstPixelStride,
3264
0
                           nWordCount);
3265
0
            break;
3266
0
        case GDT_Float32:
3267
0
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
3268
0
                           static_cast<float *>(pDstData), nDstPixelStride,
3269
0
                           nWordCount);
3270
0
            break;
3271
0
        case GDT_Float64:
3272
0
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
3273
0
                           static_cast<double *>(pDstData), nDstPixelStride,
3274
0
                           nWordCount);
3275
0
            break;
3276
0
        case GDT_CInt16:
3277
0
            if (bInComplex)
3278
0
            {
3279
0
                GDALCopyWordsComplexT(pSrcData, nSrcPixelStride,
3280
0
                                      static_cast<short *>(pDstData),
3281
0
                                      nDstPixelStride, nWordCount);
3282
0
            }
3283
0
            else  // input is not complex, so we need to promote to a complex
3284
                  // buffer
3285
0
            {
3286
0
                GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride,
3287
0
                                         static_cast<short *>(pDstData),
3288
0
                                         nDstPixelStride, nWordCount);
3289
0
            }
3290
0
            break;
3291
0
        case GDT_CInt32:
3292
0
            if (bInComplex)
3293
0
            {
3294
0
                GDALCopyWordsComplexT(pSrcData, nSrcPixelStride,
3295
0
                                      static_cast<int *>(pDstData),
3296
0
                                      nDstPixelStride, nWordCount);
3297
0
            }
3298
0
            else  // input is not complex, so we need to promote to a complex
3299
                  // buffer
3300
0
            {
3301
0
                GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride,
3302
0
                                         static_cast<int *>(pDstData),
3303
0
                                         nDstPixelStride, nWordCount);
3304
0
            }
3305
0
            break;
3306
0
        case GDT_CFloat16:
3307
0
            if (bInComplex)
3308
0
            {
3309
0
                GDALCopyWordsComplexT(pSrcData, nSrcPixelStride,
3310
0
                                      static_cast<GFloat16 *>(pDstData),
3311
0
                                      nDstPixelStride, nWordCount);
3312
0
            }
3313
0
            else  // input is not complex, so we need to promote to a complex
3314
                  // buffer
3315
0
            {
3316
0
                GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride,
3317
0
                                         static_cast<GFloat16 *>(pDstData),
3318
0
                                         nDstPixelStride, nWordCount);
3319
0
            }
3320
0
            break;
3321
0
        case GDT_CFloat32:
3322
0
            if (bInComplex)
3323
0
            {
3324
0
                GDALCopyWordsComplexT(pSrcData, nSrcPixelStride,
3325
0
                                      static_cast<float *>(pDstData),
3326
0
                                      nDstPixelStride, nWordCount);
3327
0
            }
3328
0
            else  // input is not complex, so we need to promote to a complex
3329
                  // buffer
3330
0
            {
3331
0
                GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride,
3332
0
                                         static_cast<float *>(pDstData),
3333
0
                                         nDstPixelStride, nWordCount);
3334
0
            }
3335
0
            break;
3336
0
        case GDT_CFloat64:
3337
0
            if (bInComplex)
3338
0
            {
3339
0
                GDALCopyWordsComplexT(pSrcData, nSrcPixelStride,
3340
0
                                      static_cast<double *>(pDstData),
3341
0
                                      nDstPixelStride, nWordCount);
3342
0
            }
3343
0
            else  // input is not complex, so we need to promote to a complex
3344
                  // buffer
3345
0
            {
3346
0
                GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride,
3347
0
                                         static_cast<double *>(pDstData),
3348
0
                                         nDstPixelStride, nWordCount);
3349
0
            }
3350
0
            break;
3351
0
        case GDT_Unknown:
3352
0
        case GDT_TypeCount:
3353
0
            CPLAssert(false);
3354
0
    }
3355
0
}
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<unsigned char>(unsigned char const*, int, bool, void*, GDALDataType, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<signed char>(signed char const*, int, bool, void*, GDALDataType, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<unsigned short>(unsigned short const*, int, bool, void*, GDALDataType, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<short>(short const*, int, bool, void*, GDALDataType, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<unsigned int>(unsigned int const*, int, bool, void*, GDALDataType, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<int>(int const*, int, bool, void*, GDALDataType, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<unsigned long>(unsigned long const*, int, bool, void*, GDALDataType, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<long>(long const*, int, bool, void*, GDALDataType, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<cpl::Float16>(cpl::Float16 const*, int, bool, void*, GDALDataType, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<float>(float const*, int, bool, void*, GDALDataType, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<double>(double const*, int, bool, void*, GDALDataType, int, long long)
3356
3357
}  // end anonymous namespace
3358
3359
/************************************************************************/
3360
/*                          GDALReplicateWord()                         */
3361
/************************************************************************/
3362
3363
template <class T>
3364
inline void GDALReplicateWordT(void *pDstData, int nDstPixelStride,
3365
                               GPtrDiff_t nWordCount)
3366
0
{
3367
0
    const T valSet = *static_cast<const T *>(pDstData);
3368
0
    if (nDstPixelStride == static_cast<int>(sizeof(T)))
3369
0
    {
3370
0
        T *pDstPtr = static_cast<T *>(pDstData) + 1;
3371
0
        while (nWordCount >= 4)
3372
0
        {
3373
0
            nWordCount -= 4;
3374
0
            pDstPtr[0] = valSet;
3375
0
            pDstPtr[1] = valSet;
3376
0
            pDstPtr[2] = valSet;
3377
0
            pDstPtr[3] = valSet;
3378
0
            pDstPtr += 4;
3379
0
        }
3380
0
        while (nWordCount > 0)
3381
0
        {
3382
0
            --nWordCount;
3383
0
            *pDstPtr = valSet;
3384
0
            pDstPtr++;
3385
0
        }
3386
0
    }
3387
0
    else
3388
0
    {
3389
0
        GByte *pabyDstPtr = static_cast<GByte *>(pDstData) + nDstPixelStride;
3390
0
        while (nWordCount > 0)
3391
0
        {
3392
0
            --nWordCount;
3393
0
            *reinterpret_cast<T *>(pabyDstPtr) = valSet;
3394
0
            pabyDstPtr += nDstPixelStride;
3395
0
        }
3396
0
    }
3397
0
}
Unexecuted instantiation: void GDALReplicateWordT<unsigned short>(void*, int, long long)
Unexecuted instantiation: void GDALReplicateWordT<short>(void*, int, long long)
Unexecuted instantiation: void GDALReplicateWordT<unsigned int>(void*, int, long long)
Unexecuted instantiation: void GDALReplicateWordT<int>(void*, int, long long)
Unexecuted instantiation: void GDALReplicateWordT<unsigned long>(void*, int, long long)
Unexecuted instantiation: void GDALReplicateWordT<long>(void*, int, long long)
Unexecuted instantiation: void GDALReplicateWordT<cpl::Float16>(void*, int, long long)
Unexecuted instantiation: void GDALReplicateWordT<float>(void*, int, long long)
Unexecuted instantiation: void GDALReplicateWordT<double>(void*, int, long long)
3398
3399
static void GDALReplicateWord(const void *CPL_RESTRICT pSrcData,
3400
                              GDALDataType eSrcType,
3401
                              void *CPL_RESTRICT pDstData,
3402
                              GDALDataType eDstType, int nDstPixelStride,
3403
                              GPtrDiff_t nWordCount)
3404
0
{
3405
    /* -----------------------------------------------------------------------
3406
     */
3407
    /* Special case when the source data is always the same value */
3408
    /* (for VRTSourcedRasterBand::IRasterIO and
3409
     * VRTDerivedRasterBand::IRasterIO*/
3410
    /*  for example) */
3411
    /* -----------------------------------------------------------------------
3412
     */
3413
    // Let the general translation case do the necessary conversions
3414
    // on the first destination element.
3415
0
    GDALCopyWords64(pSrcData, eSrcType, 0, pDstData, eDstType, 0, 1);
3416
3417
    // Now copy the first element to the nWordCount - 1 following destination
3418
    // elements.
3419
0
    nWordCount--;
3420
0
    GByte *pabyDstWord = reinterpret_cast<GByte *>(pDstData) + nDstPixelStride;
3421
3422
0
    switch (eDstType)
3423
0
    {
3424
0
        case GDT_Byte:
3425
0
        case GDT_Int8:
3426
0
        {
3427
0
            if (nDstPixelStride == 1)
3428
0
            {
3429
0
                if (nWordCount > 0)
3430
0
                    memset(pabyDstWord,
3431
0
                           *reinterpret_cast<const GByte *>(pDstData),
3432
0
                           nWordCount);
3433
0
            }
3434
0
            else
3435
0
            {
3436
0
                GByte valSet = *reinterpret_cast<const GByte *>(pDstData);
3437
0
                while (nWordCount > 0)
3438
0
                {
3439
0
                    --nWordCount;
3440
0
                    *pabyDstWord = valSet;
3441
0
                    pabyDstWord += nDstPixelStride;
3442
0
                }
3443
0
            }
3444
0
            break;
3445
0
        }
3446
3447
0
#define CASE_DUPLICATE_SIMPLE(enum_type, c_type)                               \
3448
0
    case enum_type:                                                            \
3449
0
    {                                                                          \
3450
0
        GDALReplicateWordT<c_type>(pDstData, nDstPixelStride, nWordCount);     \
3451
0
        break;                                                                 \
3452
0
    }
3453
3454
0
            CASE_DUPLICATE_SIMPLE(GDT_UInt16, GUInt16)
3455
0
            CASE_DUPLICATE_SIMPLE(GDT_Int16, GInt16)
3456
0
            CASE_DUPLICATE_SIMPLE(GDT_UInt32, GUInt32)
3457
0
            CASE_DUPLICATE_SIMPLE(GDT_Int32, GInt32)
3458
0
            CASE_DUPLICATE_SIMPLE(GDT_UInt64, std::uint64_t)
3459
0
            CASE_DUPLICATE_SIMPLE(GDT_Int64, std::int64_t)
3460
0
            CASE_DUPLICATE_SIMPLE(GDT_Float16, GFloat16)
3461
0
            CASE_DUPLICATE_SIMPLE(GDT_Float32, float)
3462
0
            CASE_DUPLICATE_SIMPLE(GDT_Float64, double)
3463
3464
0
#define CASE_DUPLICATE_COMPLEX(enum_type, c_type)                              \
3465
0
    case enum_type:                                                            \
3466
0
    {                                                                          \
3467
0
        c_type valSet1 = reinterpret_cast<const c_type *>(pDstData)[0];        \
3468
0
        c_type valSet2 = reinterpret_cast<const c_type *>(pDstData)[1];        \
3469
0
        while (nWordCount > 0)                                                 \
3470
0
        {                                                                      \
3471
0
            --nWordCount;                                                      \
3472
0
            reinterpret_cast<c_type *>(pabyDstWord)[0] = valSet1;              \
3473
0
            reinterpret_cast<c_type *>(pabyDstWord)[1] = valSet2;              \
3474
0
            pabyDstWord += nDstPixelStride;                                    \
3475
0
        }                                                                      \
3476
0
        break;                                                                 \
3477
0
    }
3478
3479
0
            CASE_DUPLICATE_COMPLEX(GDT_CInt16, GInt16)
3480
0
            CASE_DUPLICATE_COMPLEX(GDT_CInt32, GInt32)
3481
0
            CASE_DUPLICATE_COMPLEX(GDT_CFloat16, GFloat16)
3482
0
            CASE_DUPLICATE_COMPLEX(GDT_CFloat32, float)
3483
0
            CASE_DUPLICATE_COMPLEX(GDT_CFloat64, double)
3484
3485
0
        case GDT_Unknown:
3486
0
        case GDT_TypeCount:
3487
0
            CPLAssert(false);
3488
0
    }
3489
0
}
3490
3491
/************************************************************************/
3492
/*                        GDALUnrolledCopy()                            */
3493
/************************************************************************/
3494
3495
template <class T, int srcStride, int dstStride>
3496
static inline void GDALUnrolledCopyGeneric(T *CPL_RESTRICT pDest,
3497
                                           const T *CPL_RESTRICT pSrc,
3498
                                           GPtrDiff_t nIters)
3499
0
{
3500
0
    if (nIters >= 16)
3501
0
    {
3502
0
        for (GPtrDiff_t i = nIters / 16; i != 0; i--)
3503
0
        {
3504
0
            pDest[0 * dstStride] = pSrc[0 * srcStride];
3505
0
            pDest[1 * dstStride] = pSrc[1 * srcStride];
3506
0
            pDest[2 * dstStride] = pSrc[2 * srcStride];
3507
0
            pDest[3 * dstStride] = pSrc[3 * srcStride];
3508
0
            pDest[4 * dstStride] = pSrc[4 * srcStride];
3509
0
            pDest[5 * dstStride] = pSrc[5 * srcStride];
3510
0
            pDest[6 * dstStride] = pSrc[6 * srcStride];
3511
0
            pDest[7 * dstStride] = pSrc[7 * srcStride];
3512
0
            pDest[8 * dstStride] = pSrc[8 * srcStride];
3513
0
            pDest[9 * dstStride] = pSrc[9 * srcStride];
3514
0
            pDest[10 * dstStride] = pSrc[10 * srcStride];
3515
0
            pDest[11 * dstStride] = pSrc[11 * srcStride];
3516
0
            pDest[12 * dstStride] = pSrc[12 * srcStride];
3517
0
            pDest[13 * dstStride] = pSrc[13 * srcStride];
3518
0
            pDest[14 * dstStride] = pSrc[14 * srcStride];
3519
0
            pDest[15 * dstStride] = pSrc[15 * srcStride];
3520
0
            pDest += 16 * dstStride;
3521
0
            pSrc += 16 * srcStride;
3522
0
        }
3523
0
        nIters = nIters % 16;
3524
0
    }
3525
0
    for (GPtrDiff_t i = 0; i < nIters; i++)
3526
0
    {
3527
0
        pDest[i * dstStride] = *pSrc;
3528
0
        pSrc += srcStride;
3529
0
    }
3530
0
}
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<unsigned char, 3, 1>(unsigned char*, unsigned char const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<unsigned char, 1, 2>(unsigned char*, unsigned char const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<unsigned char, 1, 3>(unsigned char*, unsigned char const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<unsigned char, 1, 4>(unsigned char*, unsigned char const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<short, 2, 1>(short*, short const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<short, 3, 1>(short*, short const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<short, 4, 1>(short*, short const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<short, 1, 2>(short*, short const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<short, 1, 3>(short*, short const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<short, 1, 4>(short*, short const*, long long)
3531
3532
template <class T, int srcStride, int dstStride>
3533
static inline void GDALUnrolledCopy(T *CPL_RESTRICT pDest,
3534
                                    const T *CPL_RESTRICT pSrc,
3535
                                    GPtrDiff_t nIters)
3536
0
{
3537
0
    GDALUnrolledCopyGeneric<T, srcStride, dstStride>(pDest, pSrc, nIters);
3538
0
}
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<unsigned char, 1, 2>(unsigned char*, unsigned char const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<unsigned char, 1, 3>(unsigned char*, unsigned char const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<unsigned char, 1, 4>(unsigned char*, unsigned char const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<short, 2, 1>(short*, short const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<short, 3, 1>(short*, short const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<short, 4, 1>(short*, short const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<short, 1, 2>(short*, short const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<short, 1, 3>(short*, short const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<short, 1, 4>(short*, short const*, long long)
3539
3540
#ifdef HAVE_SSE2
3541
3542
template <>
3543
void GDALUnrolledCopy<GByte, 2, 1>(GByte *CPL_RESTRICT pDest,
3544
                                   const GByte *CPL_RESTRICT pSrc,
3545
                                   GPtrDiff_t nIters)
3546
0
{
3547
0
    decltype(nIters) i = 0;
3548
0
    if (nIters > 16)
3549
0
    {
3550
0
        const __m128i xmm_mask = _mm_set1_epi16(0xff);
3551
        // If we were sure that there would always be 1 trailing byte, we could
3552
        // check against nIters - 15
3553
0
        for (; i < nIters - 16; i += 16)
3554
0
        {
3555
0
            __m128i xmm0 =
3556
0
                _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 0));
3557
0
            __m128i xmm1 =
3558
0
                _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 16));
3559
            // Set higher 8bit of each int16 packed word to 0
3560
0
            xmm0 = _mm_and_si128(xmm0, xmm_mask);
3561
0
            xmm1 = _mm_and_si128(xmm1, xmm_mask);
3562
            // Pack int16 to uint8 and merge back both vector
3563
0
            xmm0 = _mm_packus_epi16(xmm0, xmm1);
3564
3565
            // Store result
3566
0
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pDest + i), xmm0);
3567
3568
0
            pSrc += 2 * 16;
3569
0
        }
3570
0
    }
3571
0
    for (; i < nIters; i++)
3572
0
    {
3573
0
        pDest[i] = *pSrc;
3574
0
        pSrc += 2;
3575
0
    }
3576
0
}
3577
3578
#ifdef HAVE_SSSE3_AT_COMPILE_TIME
3579
3580
template <>
3581
void GDALUnrolledCopy<GByte, 3, 1>(GByte *CPL_RESTRICT pDest,
3582
                                   const GByte *CPL_RESTRICT pSrc,
3583
                                   GPtrDiff_t nIters)
3584
0
{
3585
0
    if (nIters > 16 && CPLHaveRuntimeSSSE3())
3586
0
    {
3587
0
        GDALUnrolledCopy_GByte_3_1_SSSE3(pDest, pSrc, nIters);
3588
0
    }
3589
0
    else
3590
0
    {
3591
0
        GDALUnrolledCopyGeneric<GByte, 3, 1>(pDest, pSrc, nIters);
3592
0
    }
3593
0
}
3594
3595
#endif
3596
3597
template <>
3598
void GDALUnrolledCopy<GByte, 4, 1>(GByte *CPL_RESTRICT pDest,
3599
                                   const GByte *CPL_RESTRICT pSrc,
3600
                                   GPtrDiff_t nIters)
3601
0
{
3602
0
    decltype(nIters) i = 0;
3603
0
    if (nIters > 16)
3604
0
    {
3605
0
        const __m128i xmm_mask = _mm_set1_epi32(0xff);
3606
        // If we were sure that there would always be 3 trailing bytes, we could
3607
        // check against nIters - 15
3608
0
        for (; i < nIters - 16; i += 16)
3609
0
        {
3610
0
            __m128i xmm0 =
3611
0
                _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 0));
3612
0
            __m128i xmm1 =
3613
0
                _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 16));
3614
0
            __m128i xmm2 =
3615
0
                _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 32));
3616
0
            __m128i xmm3 =
3617
0
                _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 48));
3618
            // Set higher 24bit of each int32 packed word to 0
3619
0
            xmm0 = _mm_and_si128(xmm0, xmm_mask);
3620
0
            xmm1 = _mm_and_si128(xmm1, xmm_mask);
3621
0
            xmm2 = _mm_and_si128(xmm2, xmm_mask);
3622
0
            xmm3 = _mm_and_si128(xmm3, xmm_mask);
3623
            // Pack int32 to int16
3624
0
            xmm0 = _mm_packs_epi32(xmm0, xmm1);
3625
0
            xmm2 = _mm_packs_epi32(xmm2, xmm3);
3626
            // Pack int16 to uint8
3627
0
            xmm0 = _mm_packus_epi16(xmm0, xmm2);
3628
3629
            // Store result
3630
0
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pDest + i), xmm0);
3631
3632
0
            pSrc += 4 * 16;
3633
0
        }
3634
0
    }
3635
0
    for (; i < nIters; i++)
3636
0
    {
3637
0
        pDest[i] = *pSrc;
3638
0
        pSrc += 4;
3639
0
    }
3640
0
}
3641
#endif  // HAVE_SSE2
3642
3643
/************************************************************************/
3644
/*                         GDALFastCopy()                               */
3645
/************************************************************************/
3646
3647
template <class T>
3648
static inline void GDALFastCopy(T *CPL_RESTRICT pDest, int nDestStride,
3649
                                const T *CPL_RESTRICT pSrc, int nSrcStride,
3650
                                GPtrDiff_t nIters)
3651
0
{
3652
0
    constexpr int sizeofT = static_cast<int>(sizeof(T));
3653
0
    if (nIters == 1)
3654
0
    {
3655
0
        *pDest = *pSrc;
3656
0
    }
3657
0
    else if (nDestStride == sizeofT)
3658
0
    {
3659
0
        if (nSrcStride == sizeofT)
3660
0
        {
3661
0
            memcpy(pDest, pSrc, nIters * sizeof(T));
3662
0
        }
3663
0
        else if (nSrcStride == 2 * sizeofT)
3664
0
        {
3665
0
            GDALUnrolledCopy<T, 2, 1>(pDest, pSrc, nIters);
3666
0
        }
3667
0
        else if (nSrcStride == 3 * sizeofT)
3668
0
        {
3669
0
            GDALUnrolledCopy<T, 3, 1>(pDest, pSrc, nIters);
3670
0
        }
3671
0
        else if (nSrcStride == 4 * sizeofT)
3672
0
        {
3673
0
            GDALUnrolledCopy<T, 4, 1>(pDest, pSrc, nIters);
3674
0
        }
3675
0
        else
3676
0
        {
3677
0
            while (nIters-- > 0)
3678
0
            {
3679
0
                *pDest = *pSrc;
3680
0
                pSrc += nSrcStride / sizeofT;
3681
0
                pDest++;
3682
0
            }
3683
0
        }
3684
0
    }
3685
0
    else if (nSrcStride == sizeofT)
3686
0
    {
3687
0
        if (nDestStride == 2 * sizeofT)
3688
0
        {
3689
0
            GDALUnrolledCopy<T, 1, 2>(pDest, pSrc, nIters);
3690
0
        }
3691
0
        else if (nDestStride == 3 * sizeofT)
3692
0
        {
3693
0
            GDALUnrolledCopy<T, 1, 3>(pDest, pSrc, nIters);
3694
0
        }
3695
0
        else if (nDestStride == 4 * sizeofT)
3696
0
        {
3697
0
            GDALUnrolledCopy<T, 1, 4>(pDest, pSrc, nIters);
3698
0
        }
3699
0
        else
3700
0
        {
3701
0
            while (nIters-- > 0)
3702
0
            {
3703
0
                *pDest = *pSrc;
3704
0
                pSrc++;
3705
0
                pDest += nDestStride / sizeofT;
3706
0
            }
3707
0
        }
3708
0
    }
3709
0
    else
3710
0
    {
3711
0
        while (nIters-- > 0)
3712
0
        {
3713
0
            *pDest = *pSrc;
3714
0
            pSrc += nSrcStride / sizeofT;
3715
0
            pDest += nDestStride / sizeofT;
3716
0
        }
3717
0
    }
3718
0
}
Unexecuted instantiation: rasterio.cpp:void GDALFastCopy<unsigned char>(unsigned char*, int, unsigned char const*, int, long long)
Unexecuted instantiation: rasterio.cpp:void GDALFastCopy<short>(short*, int, short const*, int, long long)
3719
3720
/************************************************************************/
3721
/*                         GDALFastCopyByte()                           */
3722
/************************************************************************/
3723
3724
static void GDALFastCopyByte(const GByte *CPL_RESTRICT pSrcData,
3725
                             int nSrcPixelStride, GByte *CPL_RESTRICT pDstData,
3726
                             int nDstPixelStride, GPtrDiff_t nWordCount)
3727
0
{
3728
0
    GDALFastCopy(pDstData, nDstPixelStride, pSrcData, nSrcPixelStride,
3729
0
                 nWordCount);
3730
0
}
3731
3732
/************************************************************************/
3733
/*                           GDALCopyWords()                            */
3734
/************************************************************************/
3735
3736
/**
3737
 * Copy pixel words from buffer to buffer.
3738
 *
3739
 * @see GDALCopyWords64()
3740
 */
3741
void CPL_STDCALL GDALCopyWords(const void *CPL_RESTRICT pSrcData,
3742
                               GDALDataType eSrcType, int nSrcPixelStride,
3743
                               void *CPL_RESTRICT pDstData,
3744
                               GDALDataType eDstType, int nDstPixelStride,
3745
                               int nWordCount)
3746
0
{
3747
0
    GDALCopyWords64(pSrcData, eSrcType, nSrcPixelStride, pDstData, eDstType,
3748
0
                    nDstPixelStride, nWordCount);
3749
0
}
3750
3751
/************************************************************************/
3752
/*                          GDALCopyWords64()                           */
3753
/************************************************************************/
3754
3755
/**
3756
 * Copy pixel words from buffer to buffer.
3757
 *
3758
 * This function is used to copy pixel word values from one memory buffer
3759
 * to another, with support for conversion between data types, and differing
3760
 * step factors. The data type conversion is done using the following
3761
 * rules:
3762
 * <ul>
3763
 * <li>Values assigned to a lower range integer type are clipped. For
3764
 * instance assigning GDT_Int16 values to a GDT_Byte buffer will cause values
3765
 * less the 0 to be set to 0, and values larger than 255 to be set to 255.
3766
 * </li>
3767
 * <li>
3768
 * Assignment from floating point to integer rounds to closest integer.
3769
 * +Infinity is mapped to the largest integer. -Infinity is mapped to the
3770
 * smallest integer. NaN is mapped to 0.
3771
 * </li>
3772
 * <li>
3773
 * Assignment from non-complex to complex will result in the imaginary part
3774
 * being set to zero on output.
3775
 * </li>
3776
 * <li> Assignment from complex to
3777
 * non-complex will result in the complex portion being lost and the real
3778
 * component being preserved (<i>not magnitude!</i>).
3779
 * </li>
3780
 * </ul>
3781
 *
3782
 * No assumptions are made about the source or destination words occurring
3783
 * on word boundaries.  It is assumed that all values are in native machine
3784
 * byte order.
3785
 *
3786
 * @param pSrcData Pointer to source data to be converted.
3787
 * @param eSrcType the source data type (see GDALDataType enum)
3788
 * @param nSrcPixelStride Source pixel stride (i.e. distance between 2 words),
3789
 * in bytes
3790
 * @param pDstData Pointer to buffer where destination data should go
3791
 * @param eDstType the destination data type (see GDALDataType enum)
3792
 * @param nDstPixelStride Destination pixel stride (i.e. distance between 2
3793
 * words), in bytes
3794
 * @param nWordCount number of words to be copied
3795
 *
3796
 * @note
3797
 * When adding a new data type to GDAL, you must do the following to
3798
 * support it properly within the GDALCopyWords function:
3799
 * 1. Add the data type to the switch on eSrcType in GDALCopyWords.
3800
 *    This should invoke the appropriate GDALCopyWordsFromT wrapper.
3801
 * 2. Add the data type to the switch on eDstType in GDALCopyWordsFromT.
3802
 *    This should call the appropriate GDALCopyWordsT template.
3803
 * 3. If appropriate, overload the appropriate CopyWord template in the
3804
 *    above namespace. This will ensure that any conversion issues are
3805
 *    handled (cases like the float -> int32 case, where the min/max)
3806
 *    values are subject to roundoff error.
3807
 */
3808
3809
void CPL_STDCALL GDALCopyWords64(const void *CPL_RESTRICT pSrcData,
3810
                                 GDALDataType eSrcType, int nSrcPixelStride,
3811
                                 void *CPL_RESTRICT pDstData,
3812
                                 GDALDataType eDstType, int nDstPixelStride,
3813
                                 GPtrDiff_t nWordCount)
3814
3815
0
{
3816
    // On platforms where alignment matters, be careful
3817
0
    const int nSrcDataTypeSize = GDALGetDataTypeSizeBytes(eSrcType);
3818
0
    const int nDstDataTypeSize = GDALGetDataTypeSizeBytes(eDstType);
3819
0
    if (CPL_UNLIKELY(nSrcDataTypeSize == 0 || nDstDataTypeSize == 0))
3820
0
    {
3821
0
        CPLError(CE_Failure, CPLE_NotSupported,
3822
0
                 "GDALCopyWords64(): unsupported GDT_Unknown/GDT_TypeCount "
3823
0
                 "argument");
3824
0
        return;
3825
0
    }
3826
0
    if (!(eSrcType == eDstType && nSrcPixelStride == nDstPixelStride) &&
3827
0
        ((reinterpret_cast<uintptr_t>(pSrcData) % nSrcDataTypeSize) != 0 ||
3828
0
         (reinterpret_cast<uintptr_t>(pDstData) % nDstDataTypeSize) != 0 ||
3829
0
         (nSrcPixelStride % nSrcDataTypeSize) != 0 ||
3830
0
         (nDstPixelStride % nDstDataTypeSize) != 0))
3831
0
    {
3832
0
        if (eSrcType == eDstType)
3833
0
        {
3834
0
            for (decltype(nWordCount) i = 0; i < nWordCount; i++)
3835
0
            {
3836
0
                memcpy(static_cast<GByte *>(pDstData) + nDstPixelStride * i,
3837
0
                       static_cast<const GByte *>(pSrcData) +
3838
0
                           nSrcPixelStride * i,
3839
0
                       nDstDataTypeSize);
3840
0
            }
3841
0
        }
3842
0
        else
3843
0
        {
3844
0
            const auto getAlignedPtr = [](GByte *ptr, int align)
3845
0
            {
3846
0
                return ptr +
3847
0
                       ((align - (reinterpret_cast<uintptr_t>(ptr) % align)) %
3848
0
                        align);
3849
0
            };
3850
3851
            // The largest we need is for CFloat64 (16 bytes), so 32 bytes to
3852
            // be sure to get correctly aligned pointer.
3853
0
            constexpr size_t SIZEOF_CFLOAT64 = 2 * sizeof(double);
3854
0
            GByte abySrcBuffer[2 * SIZEOF_CFLOAT64];
3855
0
            GByte abyDstBuffer[2 * SIZEOF_CFLOAT64];
3856
0
            GByte *pabySrcBuffer =
3857
0
                getAlignedPtr(abySrcBuffer, nSrcDataTypeSize);
3858
0
            GByte *pabyDstBuffer =
3859
0
                getAlignedPtr(abyDstBuffer, nDstDataTypeSize);
3860
0
            for (decltype(nWordCount) i = 0; i < nWordCount; i++)
3861
0
            {
3862
0
                memcpy(pabySrcBuffer,
3863
0
                       static_cast<const GByte *>(pSrcData) +
3864
0
                           nSrcPixelStride * i,
3865
0
                       nSrcDataTypeSize);
3866
0
                GDALCopyWords64(pabySrcBuffer, eSrcType, 0, pabyDstBuffer,
3867
0
                                eDstType, 0, 1);
3868
0
                memcpy(static_cast<GByte *>(pDstData) + nDstPixelStride * i,
3869
0
                       pabyDstBuffer, nDstDataTypeSize);
3870
0
            }
3871
0
        }
3872
0
        return;
3873
0
    }
3874
3875
    // Deal with the case where we're replicating a single word into the
3876
    // provided buffer
3877
0
    if (nSrcPixelStride == 0 && nWordCount > 1)
3878
0
    {
3879
0
        GDALReplicateWord(pSrcData, eSrcType, pDstData, eDstType,
3880
0
                          nDstPixelStride, nWordCount);
3881
0
        return;
3882
0
    }
3883
3884
0
    if (eSrcType == eDstType)
3885
0
    {
3886
0
        if (eSrcType == GDT_Byte || eSrcType == GDT_Int8)
3887
0
        {
3888
0
            GDALFastCopy(static_cast<GByte *>(pDstData), nDstPixelStride,
3889
0
                         static_cast<const GByte *>(pSrcData), nSrcPixelStride,
3890
0
                         nWordCount);
3891
0
            return;
3892
0
        }
3893
3894
0
        if (nSrcDataTypeSize == 2 && (nSrcPixelStride % 2) == 0 &&
3895
0
            (nDstPixelStride % 2) == 0)
3896
0
        {
3897
0
            GDALFastCopy(static_cast<short *>(pDstData), nDstPixelStride,
3898
0
                         static_cast<const short *>(pSrcData), nSrcPixelStride,
3899
0
                         nWordCount);
3900
0
            return;
3901
0
        }
3902
3903
0
        if (nWordCount == 1)
3904
0
        {
3905
#if defined(CSA_BUILD) || defined(__COVERITY__)
3906
            // Avoid false positives...
3907
            memcpy(pDstData, pSrcData, nSrcDataTypeSize);
3908
#else
3909
0
            if (nSrcDataTypeSize == 2)
3910
0
                memcpy(pDstData, pSrcData, 2);
3911
0
            else if (nSrcDataTypeSize == 4)
3912
0
                memcpy(pDstData, pSrcData, 4);
3913
0
            else if (nSrcDataTypeSize == 8)
3914
0
                memcpy(pDstData, pSrcData, 8);
3915
0
            else /* if( eSrcType == GDT_CFloat64 ) */
3916
0
                memcpy(pDstData, pSrcData, 16);
3917
0
#endif
3918
0
            return;
3919
0
        }
3920
3921
        // Let memcpy() handle the case where we're copying a packed buffer
3922
        // of pixels.
3923
0
        if (nSrcPixelStride == nDstPixelStride)
3924
0
        {
3925
0
            if (nSrcPixelStride == nSrcDataTypeSize)
3926
0
            {
3927
0
                memcpy(pDstData, pSrcData, nWordCount * nSrcDataTypeSize);
3928
0
                return;
3929
0
            }
3930
0
        }
3931
0
    }
3932
3933
    // Handle the more general case -- deals with conversion of data types
3934
    // directly.
3935
0
    switch (eSrcType)
3936
0
    {
3937
0
        case GDT_Byte:
3938
0
            GDALCopyWordsFromT<unsigned char>(
3939
0
                static_cast<const unsigned char *>(pSrcData), nSrcPixelStride,
3940
0
                false, pDstData, eDstType, nDstPixelStride, nWordCount);
3941
0
            break;
3942
0
        case GDT_Int8:
3943
0
            GDALCopyWordsFromT<signed char>(
3944
0
                static_cast<const signed char *>(pSrcData), nSrcPixelStride,
3945
0
                false, pDstData, eDstType, nDstPixelStride, nWordCount);
3946
0
            break;
3947
0
        case GDT_UInt16:
3948
0
            GDALCopyWordsFromT<unsigned short>(
3949
0
                static_cast<const unsigned short *>(pSrcData), nSrcPixelStride,
3950
0
                false, pDstData, eDstType, nDstPixelStride, nWordCount);
3951
0
            break;
3952
0
        case GDT_Int16:
3953
0
            GDALCopyWordsFromT<short>(static_cast<const short *>(pSrcData),
3954
0
                                      nSrcPixelStride, false, pDstData,
3955
0
                                      eDstType, nDstPixelStride, nWordCount);
3956
0
            break;
3957
0
        case GDT_UInt32:
3958
0
            GDALCopyWordsFromT<unsigned int>(
3959
0
                static_cast<const unsigned int *>(pSrcData), nSrcPixelStride,
3960
0
                false, pDstData, eDstType, nDstPixelStride, nWordCount);
3961
0
            break;
3962
0
        case GDT_Int32:
3963
0
            GDALCopyWordsFromT<int>(static_cast<const int *>(pSrcData),
3964
0
                                    nSrcPixelStride, false, pDstData, eDstType,
3965
0
                                    nDstPixelStride, nWordCount);
3966
0
            break;
3967
0
        case GDT_UInt64:
3968
0
            GDALCopyWordsFromT<std::uint64_t>(
3969
0
                static_cast<const std::uint64_t *>(pSrcData), nSrcPixelStride,
3970
0
                false, pDstData, eDstType, nDstPixelStride, nWordCount);
3971
0
            break;
3972
0
        case GDT_Int64:
3973
0
            GDALCopyWordsFromT<std::int64_t>(
3974
0
                static_cast<const std::int64_t *>(pSrcData), nSrcPixelStride,
3975
0
                false, pDstData, eDstType, nDstPixelStride, nWordCount);
3976
0
            break;
3977
0
        case GDT_Float16:
3978
0
            GDALCopyWordsFromT<GFloat16>(
3979
0
                static_cast<const GFloat16 *>(pSrcData), nSrcPixelStride, false,
3980
0
                pDstData, eDstType, nDstPixelStride, nWordCount);
3981
0
            break;
3982
0
        case GDT_Float32:
3983
0
            GDALCopyWordsFromT<float>(static_cast<const float *>(pSrcData),
3984
0
                                      nSrcPixelStride, false, pDstData,
3985
0
                                      eDstType, nDstPixelStride, nWordCount);
3986
0
            break;
3987
0
        case GDT_Float64:
3988
0
            GDALCopyWordsFromT<double>(static_cast<const double *>(pSrcData),
3989
0
                                       nSrcPixelStride, false, pDstData,
3990
0
                                       eDstType, nDstPixelStride, nWordCount);
3991
0
            break;
3992
0
        case GDT_CInt16:
3993
0
            GDALCopyWordsFromT<short>(static_cast<const short *>(pSrcData),
3994
0
                                      nSrcPixelStride, true, pDstData, eDstType,
3995
0
                                      nDstPixelStride, nWordCount);
3996
0
            break;
3997
0
        case GDT_CInt32:
3998
0
            GDALCopyWordsFromT<int>(static_cast<const int *>(pSrcData),
3999
0
                                    nSrcPixelStride, true, pDstData, eDstType,
4000
0
                                    nDstPixelStride, nWordCount);
4001
0
            break;
4002
0
        case GDT_CFloat16:
4003
0
            GDALCopyWordsFromT<GFloat16>(
4004
0
                static_cast<const GFloat16 *>(pSrcData), nSrcPixelStride, true,
4005
0
                pDstData, eDstType, nDstPixelStride, nWordCount);
4006
0
            break;
4007
0
        case GDT_CFloat32:
4008
0
            GDALCopyWordsFromT<float>(static_cast<const float *>(pSrcData),
4009
0
                                      nSrcPixelStride, true, pDstData, eDstType,
4010
0
                                      nDstPixelStride, nWordCount);
4011
0
            break;
4012
0
        case GDT_CFloat64:
4013
0
            GDALCopyWordsFromT<double>(static_cast<const double *>(pSrcData),
4014
0
                                       nSrcPixelStride, true, pDstData,
4015
0
                                       eDstType, nDstPixelStride, nWordCount);
4016
0
            break;
4017
0
        case GDT_Unknown:
4018
0
        case GDT_TypeCount:
4019
0
            CPLAssert(false);
4020
0
    }
4021
0
}
4022
4023
/************************************************************************/
4024
/*                            GDALCopyBits()                            */
4025
/************************************************************************/
4026
4027
/**
4028
 * Bitwise word copying.
4029
 *
4030
 * A function for moving sets of partial bytes around.  Loosely
4031
 * speaking this is a bitwise analog to GDALCopyWords().
4032
 *
4033
 * It copies nStepCount "words" where each word is nBitCount bits long.
4034
 * The nSrcStep and nDstStep are the number of bits from the start of one
4035
 * word to the next (same as nBitCount if they are packed).  The nSrcOffset
4036
 * and nDstOffset are the offset into the source and destination buffers
4037
 * to start at, also measured in bits.
4038
 *
4039
 * All bit offsets are assumed to start from the high order bit in a byte
4040
 * (i.e. most significant bit first).  Currently this function is not very
4041
 * optimized, but it may be improved for some common cases in the future
4042
 * as needed.
4043
 *
4044
 * @param pabySrcData the source data buffer.
4045
 * @param nSrcOffset the offset (in bits) in pabySrcData to the start of the
4046
 * first word to copy.
4047
 * @param nSrcStep the offset in bits from the start one source word to the
4048
 * start of the next.
4049
 * @param pabyDstData the destination data buffer.
4050
 * @param nDstOffset the offset (in bits) in pabyDstData to the start of the
4051
 * first word to copy over.
4052
 * @param nDstStep the offset in bits from the start one word to the
4053
 * start of the next.
4054
 * @param nBitCount the number of bits in a word to be copied.
4055
 * @param nStepCount the number of words to copy.
4056
 */
4057
4058
void GDALCopyBits(const GByte *pabySrcData, int nSrcOffset, int nSrcStep,
4059
                  GByte *pabyDstData, int nDstOffset, int nDstStep,
4060
                  int nBitCount, int nStepCount)
4061
4062
0
{
4063
0
    VALIDATE_POINTER0(pabySrcData, "GDALCopyBits");
4064
4065
0
    for (int iStep = 0; iStep < nStepCount; iStep++)
4066
0
    {
4067
0
        for (int iBit = 0; iBit < nBitCount; iBit++)
4068
0
        {
4069
0
            if (pabySrcData[nSrcOffset >> 3] & (0x80 >> (nSrcOffset & 7)))
4070
0
                pabyDstData[nDstOffset >> 3] |= (0x80 >> (nDstOffset & 7));
4071
0
            else
4072
0
                pabyDstData[nDstOffset >> 3] &= ~(0x80 >> (nDstOffset & 7));
4073
4074
0
            nSrcOffset++;
4075
0
            nDstOffset++;
4076
0
        }
4077
4078
0
        nSrcOffset += (nSrcStep - nBitCount);
4079
0
        nDstOffset += (nDstStep - nBitCount);
4080
0
    }
4081
0
}
4082
4083
/************************************************************************/
4084
/*                    GDALGetBestOverviewLevel()                        */
4085
/*                                                                      */
4086
/* Returns the best overview level to satisfy the query or -1 if none   */
4087
/* Also updates nXOff, nYOff, nXSize, nYSize and psExtraArg when        */
4088
/* returning a valid overview level                                     */
4089
/************************************************************************/
4090
4091
int GDALBandGetBestOverviewLevel(GDALRasterBand *poBand, int &nXOff, int &nYOff,
4092
                                 int &nXSize, int &nYSize, int nBufXSize,
4093
                                 int nBufYSize)
4094
0
{
4095
0
    return GDALBandGetBestOverviewLevel2(poBand, nXOff, nYOff, nXSize, nYSize,
4096
0
                                         nBufXSize, nBufYSize, nullptr);
4097
0
}
4098
4099
int GDALBandGetBestOverviewLevel2(GDALRasterBand *poBand, int &nXOff,
4100
                                  int &nYOff, int &nXSize, int &nYSize,
4101
                                  int nBufXSize, int nBufYSize,
4102
                                  GDALRasterIOExtraArg *psExtraArg)
4103
0
{
4104
0
    if (psExtraArg != nullptr && psExtraArg->nVersion > 1 &&
4105
0
        psExtraArg->bUseOnlyThisScale)
4106
0
        return -1;
4107
    /* -------------------------------------------------------------------- */
4108
    /*      Compute the desired downsampling factor.  It is                 */
4109
    /*      based on the least reduced axis, and represents the number      */
4110
    /*      of source pixels to one destination pixel.                      */
4111
    /* -------------------------------------------------------------------- */
4112
0
    const double dfDesiredDownsamplingFactor =
4113
0
        ((nXSize / static_cast<double>(nBufXSize)) <
4114
0
             (nYSize / static_cast<double>(nBufYSize)) ||
4115
0
         nBufYSize == 1)
4116
0
            ? nXSize / static_cast<double>(nBufXSize)
4117
0
            : nYSize / static_cast<double>(nBufYSize);
4118
4119
    /* -------------------------------------------------------------------- */
4120
    /*      Find the overview level that largest downsampling factor (most  */
4121
    /*      downsampled) that is still less than (or only a little more)    */
4122
    /*      downsampled than the request.                                   */
4123
    /* -------------------------------------------------------------------- */
4124
0
    const int nOverviewCount = poBand->GetOverviewCount();
4125
0
    GDALRasterBand *poBestOverview = nullptr;
4126
0
    double dfBestDownsamplingFactor = 0;
4127
0
    int nBestOverviewLevel = -1;
4128
4129
0
    const char *pszOversampligThreshold =
4130
0
        CPLGetConfigOption("GDAL_OVERVIEW_OVERSAMPLING_THRESHOLD", nullptr);
4131
4132
    // Note: keep this logic for overview selection in sync between
4133
    // gdalwarp_lib.cpp and rasterio.cpp
4134
    // Cf https://github.com/OSGeo/gdal/pull/9040#issuecomment-1898524693
4135
0
    const double dfOversamplingThreshold =
4136
0
        pszOversampligThreshold ? CPLAtof(pszOversampligThreshold)
4137
0
        : psExtraArg && psExtraArg->eResampleAlg != GRIORA_NearestNeighbour
4138
0
            ? 1.0
4139
0
            : 1.2;
4140
0
    for (int iOverview = 0; iOverview < nOverviewCount; iOverview++)
4141
0
    {
4142
0
        GDALRasterBand *poOverview = poBand->GetOverview(iOverview);
4143
0
        if (poOverview == nullptr ||
4144
0
            poOverview->GetXSize() > poBand->GetXSize() ||
4145
0
            poOverview->GetYSize() > poBand->GetYSize())
4146
0
        {
4147
0
            continue;
4148
0
        }
4149
4150
        // Compute downsampling factor of this overview
4151
0
        const double dfDownsamplingFactor = std::min(
4152
0
            poBand->GetXSize() / static_cast<double>(poOverview->GetXSize()),
4153
0
            poBand->GetYSize() / static_cast<double>(poOverview->GetYSize()));
4154
4155
        // Is it nearly the requested factor and better (lower) than
4156
        // the current best factor?
4157
        // Use an epsilon because of numerical instability.
4158
0
        constexpr double EPSILON = 1e-1;
4159
0
        if (dfDownsamplingFactor >=
4160
0
                dfDesiredDownsamplingFactor * dfOversamplingThreshold +
4161
0
                    EPSILON ||
4162
0
            dfDownsamplingFactor <= dfBestDownsamplingFactor)
4163
0
        {
4164
0
            continue;
4165
0
        }
4166
4167
        // Ignore AVERAGE_BIT2GRAYSCALE overviews for RasterIO purposes.
4168
0
        const char *pszResampling = poOverview->GetMetadataItem("RESAMPLING");
4169
4170
0
        if (pszResampling != nullptr &&
4171
0
            STARTS_WITH_CI(pszResampling, "AVERAGE_BIT2"))
4172
0
            continue;
4173
4174
        // OK, this is our new best overview.
4175
0
        poBestOverview = poOverview;
4176
0
        nBestOverviewLevel = iOverview;
4177
0
        dfBestDownsamplingFactor = dfDownsamplingFactor;
4178
4179
0
        if (std::abs(dfDesiredDownsamplingFactor - dfDownsamplingFactor) <
4180
0
            EPSILON)
4181
0
        {
4182
0
            break;
4183
0
        }
4184
0
    }
4185
4186
    /* -------------------------------------------------------------------- */
4187
    /*      If we didn't find an overview that helps us, just return        */
4188
    /*      indicating failure and the full resolution image will be used.  */
4189
    /* -------------------------------------------------------------------- */
4190
0
    if (nBestOverviewLevel < 0)
4191
0
        return -1;
4192
4193
    /* -------------------------------------------------------------------- */
4194
    /*      Recompute the source window in terms of the selected            */
4195
    /*      overview.                                                       */
4196
    /* -------------------------------------------------------------------- */
4197
0
    const double dfXFactor =
4198
0
        poBand->GetXSize() / static_cast<double>(poBestOverview->GetXSize());
4199
0
    const double dfYFactor =
4200
0
        poBand->GetYSize() / static_cast<double>(poBestOverview->GetYSize());
4201
0
    CPLDebug("GDAL", "Selecting overview %d x %d", poBestOverview->GetXSize(),
4202
0
             poBestOverview->GetYSize());
4203
4204
0
    const int nOXOff = std::min(poBestOverview->GetXSize() - 1,
4205
0
                                static_cast<int>(nXOff / dfXFactor + 0.5));
4206
0
    const int nOYOff = std::min(poBestOverview->GetYSize() - 1,
4207
0
                                static_cast<int>(nYOff / dfYFactor + 0.5));
4208
0
    int nOXSize = std::max(1, static_cast<int>(nXSize / dfXFactor + 0.5));
4209
0
    int nOYSize = std::max(1, static_cast<int>(nYSize / dfYFactor + 0.5));
4210
0
    if (nOXOff + nOXSize > poBestOverview->GetXSize())
4211
0
        nOXSize = poBestOverview->GetXSize() - nOXOff;
4212
0
    if (nOYOff + nOYSize > poBestOverview->GetYSize())
4213
0
        nOYSize = poBestOverview->GetYSize() - nOYOff;
4214
4215
0
    if (psExtraArg)
4216
0
    {
4217
0
        if (psExtraArg->bFloatingPointWindowValidity)
4218
0
        {
4219
0
            psExtraArg->dfXOff /= dfXFactor;
4220
0
            psExtraArg->dfXSize /= dfXFactor;
4221
0
            psExtraArg->dfYOff /= dfYFactor;
4222
0
            psExtraArg->dfYSize /= dfYFactor;
4223
0
        }
4224
0
        else if (psExtraArg->eResampleAlg != GRIORA_NearestNeighbour)
4225
0
        {
4226
0
            psExtraArg->bFloatingPointWindowValidity = true;
4227
0
            psExtraArg->dfXOff = nXOff / dfXFactor;
4228
0
            psExtraArg->dfXSize = nXSize / dfXFactor;
4229
0
            psExtraArg->dfYOff = nYOff / dfYFactor;
4230
0
            psExtraArg->dfYSize = nYSize / dfYFactor;
4231
0
        }
4232
0
    }
4233
4234
0
    nXOff = nOXOff;
4235
0
    nYOff = nOYOff;
4236
0
    nXSize = nOXSize;
4237
0
    nYSize = nOYSize;
4238
4239
0
    return nBestOverviewLevel;
4240
0
}
4241
4242
/************************************************************************/
4243
/*                          OverviewRasterIO()                          */
4244
/*                                                                      */
4245
/*      Special work function to utilize available overviews to         */
4246
/*      more efficiently satisfy downsampled requests.  It will         */
4247
/*      return CE_Failure if there are no appropriate overviews         */
4248
/*      available but it doesn't emit any error messages.               */
4249
/************************************************************************/
4250
4251
//! @cond Doxygen_Suppress
4252
CPLErr GDALRasterBand::OverviewRasterIO(
4253
    GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize,
4254
    void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
4255
    GSpacing nPixelSpace, GSpacing nLineSpace, GDALRasterIOExtraArg *psExtraArg)
4256
4257
0
{
4258
0
    GDALRasterIOExtraArg sExtraArg;
4259
0
    GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
4260
4261
0
    const int nOverview = GDALBandGetBestOverviewLevel2(
4262
0
        this, nXOff, nYOff, nXSize, nYSize, nBufXSize, nBufYSize, &sExtraArg);
4263
0
    if (nOverview < 0)
4264
0
        return CE_Failure;
4265
4266
    /* -------------------------------------------------------------------- */
4267
    /*      Recast the call in terms of the new raster layer.               */
4268
    /* -------------------------------------------------------------------- */
4269
0
    GDALRasterBand *poOverviewBand = GetOverview(nOverview);
4270
0
    if (poOverviewBand == nullptr)
4271
0
        return CE_Failure;
4272
4273
0
    return poOverviewBand->RasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize,
4274
0
                                    pData, nBufXSize, nBufYSize, eBufType,
4275
0
                                    nPixelSpace, nLineSpace, &sExtraArg);
4276
0
}
4277
4278
/************************************************************************/
4279
/*                      TryOverviewRasterIO()                           */
4280
/************************************************************************/
4281
4282
CPLErr GDALRasterBand::TryOverviewRasterIO(
4283
    GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize,
4284
    void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
4285
    GSpacing nPixelSpace, GSpacing nLineSpace, GDALRasterIOExtraArg *psExtraArg,
4286
    int *pbTried)
4287
0
{
4288
0
    int nXOffMod = nXOff;
4289
0
    int nYOffMod = nYOff;
4290
0
    int nXSizeMod = nXSize;
4291
0
    int nYSizeMod = nYSize;
4292
0
    GDALRasterIOExtraArg sExtraArg;
4293
4294
0
    GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
4295
4296
0
    int iOvrLevel = GDALBandGetBestOverviewLevel2(
4297
0
        this, nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, nBufXSize, nBufYSize,
4298
0
        &sExtraArg);
4299
4300
0
    if (iOvrLevel >= 0)
4301
0
    {
4302
0
        GDALRasterBand *poOverviewBand = GetOverview(iOvrLevel);
4303
0
        if (poOverviewBand)
4304
0
        {
4305
0
            *pbTried = TRUE;
4306
0
            return poOverviewBand->RasterIO(
4307
0
                eRWFlag, nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, pData,
4308
0
                nBufXSize, nBufYSize, eBufType, nPixelSpace, nLineSpace,
4309
0
                &sExtraArg);
4310
0
        }
4311
0
    }
4312
4313
0
    *pbTried = FALSE;
4314
0
    return CE_None;
4315
0
}
4316
4317
/************************************************************************/
4318
/*                      TryOverviewRasterIO()                           */
4319
/************************************************************************/
4320
4321
CPLErr GDALDataset::TryOverviewRasterIO(
4322
    GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize,
4323
    void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
4324
    int nBandCount, const int *panBandMap, GSpacing nPixelSpace,
4325
    GSpacing nLineSpace, GSpacing nBandSpace, GDALRasterIOExtraArg *psExtraArg,
4326
    int *pbTried)
4327
0
{
4328
0
    int nXOffMod = nXOff;
4329
0
    int nYOffMod = nYOff;
4330
0
    int nXSizeMod = nXSize;
4331
0
    int nYSizeMod = nYSize;
4332
0
    GDALRasterIOExtraArg sExtraArg;
4333
0
    GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
4334
4335
0
    int iOvrLevel = GDALBandGetBestOverviewLevel2(
4336
0
        papoBands[0], nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, nBufXSize,
4337
0
        nBufYSize, &sExtraArg);
4338
4339
0
    if (iOvrLevel >= 0 && papoBands[0]->GetOverview(iOvrLevel) != nullptr &&
4340
0
        papoBands[0]->GetOverview(iOvrLevel)->GetDataset() != nullptr)
4341
0
    {
4342
0
        *pbTried = TRUE;
4343
0
        return papoBands[0]->GetOverview(iOvrLevel)->GetDataset()->RasterIO(
4344
0
            eRWFlag, nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, pData, nBufXSize,
4345
0
            nBufYSize, eBufType, nBandCount, panBandMap, nPixelSpace,
4346
0
            nLineSpace, nBandSpace, &sExtraArg);
4347
0
    }
4348
0
    else
4349
0
    {
4350
0
        *pbTried = FALSE;
4351
0
        return CE_None;
4352
0
    }
4353
0
}
4354
4355
/************************************************************************/
4356
/*                        GetBestOverviewLevel()                        */
4357
/*                                                                      */
4358
/* Returns the best overview level to satisfy the query or -1 if none   */
4359
/* Also updates nXOff, nYOff, nXSize, nYSize when returning a valid     */
4360
/* overview level                                                       */
4361
/************************************************************************/
4362
4363
static int GDALDatasetGetBestOverviewLevel(GDALDataset *poDS, int &nXOff,
4364
                                           int &nYOff, int &nXSize, int &nYSize,
4365
                                           int nBufXSize, int nBufYSize,
4366
                                           int nBandCount,
4367
                                           const int *panBandMap,
4368
                                           GDALRasterIOExtraArg *psExtraArg)
4369
0
{
4370
0
    int nOverviewCount = 0;
4371
0
    GDALRasterBand *poFirstBand = nullptr;
4372
4373
    /* -------------------------------------------------------------------- */
4374
    /* Check that all bands have the same number of overviews and           */
4375
    /* that they have all the same size and block dimensions                */
4376
    /* -------------------------------------------------------------------- */
4377
0
    for (int iBand = 0; iBand < nBandCount; iBand++)
4378
0
    {
4379
0
        GDALRasterBand *poBand = poDS->GetRasterBand(panBandMap[iBand]);
4380
0
        if (poBand == nullptr)
4381
0
            return -1;
4382
0
        if (iBand == 0)
4383
0
        {
4384
0
            poFirstBand = poBand;
4385
0
            nOverviewCount = poBand->GetOverviewCount();
4386
0
        }
4387
0
        else if (nOverviewCount != poBand->GetOverviewCount())
4388
0
        {
4389
0
            CPLDebug("GDAL", "GDALDataset::GetBestOverviewLevel() ... "
4390
0
                             "mismatched overview count, use std method.");
4391
0
            return -1;
4392
0
        }
4393
0
        else
4394
0
        {
4395
0
            for (int iOverview = 0; iOverview < nOverviewCount; iOverview++)
4396
0
            {
4397
0
                GDALRasterBand *poOvrBand = poBand->GetOverview(iOverview);
4398
0
                GDALRasterBand *poOvrFirstBand =
4399
0
                    poFirstBand->GetOverview(iOverview);
4400
0
                if (poOvrBand == nullptr || poOvrFirstBand == nullptr)
4401
0
                    continue;
4402
4403
0
                if (poOvrFirstBand->GetXSize() != poOvrBand->GetXSize() ||
4404
0
                    poOvrFirstBand->GetYSize() != poOvrBand->GetYSize())
4405
0
                {
4406
0
                    CPLDebug("GDAL",
4407
0
                             "GDALDataset::GetBestOverviewLevel() ... "
4408
0
                             "mismatched overview sizes, use std method.");
4409
0
                    return -1;
4410
0
                }
4411
0
                int nBlockXSizeFirst = 0;
4412
0
                int nBlockYSizeFirst = 0;
4413
0
                poOvrFirstBand->GetBlockSize(&nBlockXSizeFirst,
4414
0
                                             &nBlockYSizeFirst);
4415
4416
0
                int nBlockXSizeCurrent = 0;
4417
0
                int nBlockYSizeCurrent = 0;
4418
0
                poOvrBand->GetBlockSize(&nBlockXSizeCurrent,
4419
0
                                        &nBlockYSizeCurrent);
4420
4421
0
                if (nBlockXSizeFirst != nBlockXSizeCurrent ||
4422
0
                    nBlockYSizeFirst != nBlockYSizeCurrent)
4423
0
                {
4424
0
                    CPLDebug("GDAL", "GDALDataset::GetBestOverviewLevel() ... "
4425
0
                                     "mismatched block sizes, use std method.");
4426
0
                    return -1;
4427
0
                }
4428
0
            }
4429
0
        }
4430
0
    }
4431
0
    if (poFirstBand == nullptr)
4432
0
        return -1;
4433
4434
0
    return GDALBandGetBestOverviewLevel2(poFirstBand, nXOff, nYOff, nXSize,
4435
0
                                         nYSize, nBufXSize, nBufYSize,
4436
0
                                         psExtraArg);
4437
0
}
4438
4439
/************************************************************************/
4440
/*                         BlockBasedRasterIO()                         */
4441
/*                                                                      */
4442
/*      This convenience function implements a dataset level            */
4443
/*      RasterIO() interface based on calling down to fetch blocks,     */
4444
/*      much like the GDALRasterBand::IRasterIO(), but it handles       */
4445
/*      all bands at once, so that a format driver that handles a       */
4446
/*      request for different bands of the same block efficiently       */
4447
/*      (i.e. without re-reading interleaved data) will efficiently.    */
4448
/*                                                                      */
4449
/*      This method is intended to be called by an overridden           */
4450
/*      IRasterIO() method in the driver specific GDALDataset           */
4451
/*      derived class.                                                  */
4452
/*                                                                      */
4453
/*      Default internal implementation of RasterIO() ... utilizes      */
4454
/*      the Block access methods to satisfy the request.  This would    */
4455
/*      normally only be overridden by formats with overviews.          */
4456
/*                                                                      */
4457
/*      To keep things relatively simple, this method does not          */
4458
/*      currently take advantage of some special cases addressed in     */
4459
/*      GDALRasterBand::IRasterIO(), so it is likely best to only       */
4460
/*      call it when you know it will help.  That is in cases where     */
4461
/*      data is at 1:1 to the buffer, and you know the driver is        */
4462
/*      implementing interleaved IO efficiently on a block by block     */
4463
/*      basis. Overviews will be used when possible.                    */
4464
/************************************************************************/
4465
4466
CPLErr GDALDataset::BlockBasedRasterIO(
4467
    GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize,
4468
    void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
4469
    int nBandCount, const int *panBandMap, GSpacing nPixelSpace,
4470
    GSpacing nLineSpace, GSpacing nBandSpace, GDALRasterIOExtraArg *psExtraArg)
4471
4472
0
{
4473
0
    CPLAssert(nullptr != pData);
4474
4475
0
    GByte **papabySrcBlock = nullptr;
4476
0
    GDALRasterBlock *poBlock = nullptr;
4477
0
    GDALRasterBlock **papoBlocks = nullptr;
4478
0
    int nLBlockX = -1;
4479
0
    int nLBlockY = -1;
4480
0
    int iBufYOff;
4481
0
    int iBufXOff;
4482
0
    int nBlockXSize = 1;
4483
0
    int nBlockYSize = 1;
4484
0
    CPLErr eErr = CE_None;
4485
0
    GDALDataType eDataType = GDT_Byte;
4486
4487
0
    const bool bUseIntegerRequestCoords =
4488
0
        (!psExtraArg->bFloatingPointWindowValidity ||
4489
0
         (nXOff == psExtraArg->dfXOff && nYOff == psExtraArg->dfYOff &&
4490
0
          nXSize == psExtraArg->dfXSize && nYSize == psExtraArg->dfYSize));
4491
4492
    /* -------------------------------------------------------------------- */
4493
    /*      Ensure that all bands share a common block size and data type.  */
4494
    /* -------------------------------------------------------------------- */
4495
0
    for (int iBand = 0; iBand < nBandCount; iBand++)
4496
0
    {
4497
0
        GDALRasterBand *poBand = GetRasterBand(panBandMap[iBand]);
4498
4499
0
        if (iBand == 0)
4500
0
        {
4501
0
            poBand->GetBlockSize(&nBlockXSize, &nBlockYSize);
4502
0
            eDataType = poBand->GetRasterDataType();
4503
0
        }
4504
0
        else
4505
0
        {
4506
0
            int nThisBlockXSize = 0;
4507
0
            int nThisBlockYSize = 0;
4508
0
            poBand->GetBlockSize(&nThisBlockXSize, &nThisBlockYSize);
4509
0
            if (nThisBlockXSize != nBlockXSize ||
4510
0
                nThisBlockYSize != nBlockYSize)
4511
0
            {
4512
0
                CPLDebug("GDAL", "GDALDataset::BlockBasedRasterIO() ... "
4513
0
                                 "mismatched block sizes, use std method.");
4514
0
                return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize,
4515
0
                                         pData, nBufXSize, nBufYSize, eBufType,
4516
0
                                         nBandCount, panBandMap, nPixelSpace,
4517
0
                                         nLineSpace, nBandSpace, psExtraArg);
4518
0
            }
4519
4520
0
            if (eDataType != poBand->GetRasterDataType() &&
4521
0
                (nXSize != nBufXSize || nYSize != nBufYSize))
4522
0
            {
4523
0
                CPLDebug("GDAL", "GDALDataset::BlockBasedRasterIO() ... "
4524
0
                                 "mismatched band data types, use std method.");
4525
0
                return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize,
4526
0
                                         pData, nBufXSize, nBufYSize, eBufType,
4527
0
                                         nBandCount, panBandMap, nPixelSpace,
4528
0
                                         nLineSpace, nBandSpace, psExtraArg);
4529
0
            }
4530
0
        }
4531
0
    }
4532
4533
    /* ==================================================================== */
4534
    /*      In this special case at full resolution we step through in      */
4535
    /*      blocks, turning the request over to the per-band                */
4536
    /*      IRasterIO(), but ensuring that all bands of one block are       */
4537
    /*      called before proceeding to the next.                           */
4538
    /* ==================================================================== */
4539
4540
0
    if (nXSize == nBufXSize && nYSize == nBufYSize && bUseIntegerRequestCoords)
4541
0
    {
4542
0
        GDALRasterIOExtraArg sDummyExtraArg;
4543
0
        INIT_RASTERIO_EXTRA_ARG(sDummyExtraArg);
4544
4545
0
        int nChunkYSize = 0;
4546
0
        int nChunkXSize = 0;
4547
4548
0
        for (iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff += nChunkYSize)
4549
0
        {
4550
0
            const int nChunkYOff = iBufYOff + nYOff;
4551
0
            nChunkYSize = nBlockYSize - (nChunkYOff % nBlockYSize);
4552
0
            if (nChunkYOff + nChunkYSize > nYOff + nYSize)
4553
0
                nChunkYSize = (nYOff + nYSize) - nChunkYOff;
4554
4555
0
            for (iBufXOff = 0; iBufXOff < nBufXSize; iBufXOff += nChunkXSize)
4556
0
            {
4557
0
                const int nChunkXOff = iBufXOff + nXOff;
4558
0
                nChunkXSize = nBlockXSize - (nChunkXOff % nBlockXSize);
4559
0
                if (nChunkXOff + nChunkXSize > nXOff + nXSize)
4560
0
                    nChunkXSize = (nXOff + nXSize) - nChunkXOff;
4561
4562
0
                GByte *pabyChunkData =
4563
0
                    static_cast<GByte *>(pData) + iBufXOff * nPixelSpace +
4564
0
                    static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace;
4565
4566
0
                for (int iBand = 0; iBand < nBandCount; iBand++)
4567
0
                {
4568
0
                    GDALRasterBand *poBand = GetRasterBand(panBandMap[iBand]);
4569
4570
0
                    eErr = poBand->IRasterIO(
4571
0
                        eRWFlag, nChunkXOff, nChunkYOff, nChunkXSize,
4572
0
                        nChunkYSize,
4573
0
                        pabyChunkData +
4574
0
                            static_cast<GPtrDiff_t>(iBand) * nBandSpace,
4575
0
                        nChunkXSize, nChunkYSize, eBufType, nPixelSpace,
4576
0
                        nLineSpace, &sDummyExtraArg);
4577
0
                    if (eErr != CE_None)
4578
0
                        return eErr;
4579
0
                }
4580
0
            }
4581
4582
0
            if (psExtraArg->pfnProgress != nullptr &&
4583
0
                !psExtraArg->pfnProgress(
4584
0
                    1.0 * std::min(nBufYSize, iBufYOff + nChunkYSize) /
4585
0
                        nBufYSize,
4586
0
                    "", psExtraArg->pProgressData))
4587
0
            {
4588
0
                return CE_Failure;
4589
0
            }
4590
0
        }
4591
4592
0
        return CE_None;
4593
0
    }
4594
4595
    /* Below code is not compatible with that case. It would need a complete */
4596
    /* separate code like done in GDALRasterBand::IRasterIO. */
4597
0
    if (eRWFlag == GF_Write && (nBufXSize < nXSize || nBufYSize < nYSize))
4598
0
    {
4599
0
        return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize, pData,
4600
0
                                 nBufXSize, nBufYSize, eBufType, nBandCount,
4601
0
                                 panBandMap, nPixelSpace, nLineSpace,
4602
0
                                 nBandSpace, psExtraArg);
4603
0
    }
4604
4605
    /* We could have a smarter implementation, but that will do for now */
4606
0
    if (psExtraArg->eResampleAlg != GRIORA_NearestNeighbour &&
4607
0
        (nBufXSize != nXSize || nBufYSize != nYSize))
4608
0
    {
4609
0
        return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize, pData,
4610
0
                                 nBufXSize, nBufYSize, eBufType, nBandCount,
4611
0
                                 panBandMap, nPixelSpace, nLineSpace,
4612
0
                                 nBandSpace, psExtraArg);
4613
0
    }
4614
4615
    /* ==================================================================== */
4616
    /*      Loop reading required source blocks to satisfy output           */
4617
    /*      request.  This is the most general implementation.              */
4618
    /* ==================================================================== */
4619
4620
0
    const int nBandDataSize = GDALGetDataTypeSizeBytes(eDataType);
4621
4622
0
    papabySrcBlock =
4623
0
        static_cast<GByte **>(CPLCalloc(sizeof(GByte *), nBandCount));
4624
0
    papoBlocks =
4625
0
        static_cast<GDALRasterBlock **>(CPLCalloc(sizeof(void *), nBandCount));
4626
4627
    /* -------------------------------------------------------------------- */
4628
    /*      Select an overview level if appropriate.                        */
4629
    /* -------------------------------------------------------------------- */
4630
4631
0
    GDALRasterIOExtraArg sExtraArg;
4632
0
    GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
4633
0
    const int nOverviewLevel = GDALDatasetGetBestOverviewLevel(
4634
0
        this, nXOff, nYOff, nXSize, nYSize, nBufXSize, nBufYSize, nBandCount,
4635
0
        panBandMap, &sExtraArg);
4636
0
    if (nOverviewLevel >= 0)
4637
0
    {
4638
0
        GetRasterBand(panBandMap[0])
4639
0
            ->GetOverview(nOverviewLevel)
4640
0
            ->GetBlockSize(&nBlockXSize, &nBlockYSize);
4641
0
    }
4642
4643
0
    double dfXOff = nXOff;
4644
0
    double dfYOff = nYOff;
4645
0
    double dfXSize = nXSize;
4646
0
    double dfYSize = nYSize;
4647
0
    if (sExtraArg.bFloatingPointWindowValidity)
4648
0
    {
4649
0
        dfXOff = sExtraArg.dfXOff;
4650
0
        dfYOff = sExtraArg.dfYOff;
4651
0
        dfXSize = sExtraArg.dfXSize;
4652
0
        dfYSize = sExtraArg.dfYSize;
4653
0
    }
4654
4655
    /* -------------------------------------------------------------------- */
4656
    /*      Compute stepping increment.                                     */
4657
    /* -------------------------------------------------------------------- */
4658
0
    const double dfSrcXInc = dfXSize / static_cast<double>(nBufXSize);
4659
0
    const double dfSrcYInc = dfYSize / static_cast<double>(nBufYSize);
4660
4661
0
    constexpr double EPS = 1e-10;
4662
    /* -------------------------------------------------------------------- */
4663
    /*      Loop over buffer computing source locations.                    */
4664
    /* -------------------------------------------------------------------- */
4665
0
    for (iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff++)
4666
0
    {
4667
0
        GPtrDiff_t iSrcOffset;
4668
4669
        // Add small epsilon to avoid some numeric precision issues.
4670
0
        const double dfSrcY = (iBufYOff + 0.5) * dfSrcYInc + dfYOff + EPS;
4671
0
        const int iSrcY = static_cast<int>(std::min(
4672
0
            std::max(0.0, dfSrcY), static_cast<double>(nRasterYSize - 1)));
4673
4674
0
        GPtrDiff_t iBufOffset = static_cast<GPtrDiff_t>(iBufYOff) *
4675
0
                                static_cast<GPtrDiff_t>(nLineSpace);
4676
4677
0
        for (iBufXOff = 0; iBufXOff < nBufXSize; iBufXOff++)
4678
0
        {
4679
0
            const double dfSrcX = (iBufXOff + 0.5) * dfSrcXInc + dfXOff + EPS;
4680
0
            const int iSrcX = static_cast<int>(std::min(
4681
0
                std::max(0.0, dfSrcX), static_cast<double>(nRasterXSize - 1)));
4682
4683
            // FIXME: this code likely doesn't work if the dirty block gets
4684
            // flushed to disk before being completely written. In the meantime,
4685
            // bJustInitialize should probably be set to FALSE even if it is not
4686
            // ideal performance wise, and for lossy compression
4687
4688
            /* --------------------------------------------------------------------
4689
             */
4690
            /*      Ensure we have the appropriate block loaded. */
4691
            /* --------------------------------------------------------------------
4692
             */
4693
0
            if (iSrcX < nLBlockX * nBlockXSize ||
4694
0
                iSrcX - nBlockXSize >= nLBlockX * nBlockXSize ||
4695
0
                iSrcY < nLBlockY * nBlockYSize ||
4696
0
                iSrcY - nBlockYSize >= nLBlockY * nBlockYSize)
4697
0
            {
4698
0
                nLBlockX = iSrcX / nBlockXSize;
4699
0
                nLBlockY = iSrcY / nBlockYSize;
4700
4701
0
                const bool bJustInitialize =
4702
0
                    eRWFlag == GF_Write && nYOff <= nLBlockY * nBlockYSize &&
4703
0
                    nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize &&
4704
0
                    nXOff <= nLBlockX * nBlockXSize &&
4705
0
                    nXOff + nXSize - nBlockXSize >= nLBlockX * nBlockXSize;
4706
                /*bool bMemZeroBuffer = FALSE;
4707
                if( eRWFlag == GF_Write && !bJustInitialize &&
4708
                    nXOff <= nLBlockX * nBlockXSize &&
4709
                    nYOff <= nLBlockY * nBlockYSize &&
4710
                    (nXOff + nXSize >= (nLBlockX+1) * nBlockXSize ||
4711
                     (nXOff + nXSize == GetRasterXSize() &&
4712
                     (nLBlockX+1) * nBlockXSize > GetRasterXSize())) &&
4713
                    (nYOff + nYSize >= (nLBlockY+1) * nBlockYSize ||
4714
                     (nYOff + nYSize == GetRasterYSize() &&
4715
                     (nLBlockY+1) * nBlockYSize > GetRasterYSize())) )
4716
                {
4717
                    bJustInitialize = TRUE;
4718
                    bMemZeroBuffer = TRUE;
4719
                }*/
4720
0
                for (int iBand = 0; iBand < nBandCount; iBand++)
4721
0
                {
4722
0
                    GDALRasterBand *poBand = GetRasterBand(panBandMap[iBand]);
4723
0
                    if (nOverviewLevel >= 0)
4724
0
                        poBand = poBand->GetOverview(nOverviewLevel);
4725
0
                    poBlock = poBand->GetLockedBlockRef(nLBlockX, nLBlockY,
4726
0
                                                        bJustInitialize);
4727
0
                    if (poBlock == nullptr)
4728
0
                    {
4729
0
                        eErr = CE_Failure;
4730
0
                        goto CleanupAndReturn;
4731
0
                    }
4732
4733
0
                    if (eRWFlag == GF_Write)
4734
0
                        poBlock->MarkDirty();
4735
4736
0
                    if (papoBlocks[iBand] != nullptr)
4737
0
                        papoBlocks[iBand]->DropLock();
4738
4739
0
                    papoBlocks[iBand] = poBlock;
4740
4741
0
                    papabySrcBlock[iBand] =
4742
0
                        static_cast<GByte *>(poBlock->GetDataRef());
4743
                    /*if( bMemZeroBuffer )
4744
                    {
4745
                        memset(papabySrcBlock[iBand], 0,
4746
                            static_cast<GPtrDiff_t>(nBandDataSize) * nBlockXSize
4747
                    * nBlockYSize);
4748
                    }*/
4749
0
                }
4750
0
            }
4751
4752
            /* --------------------------------------------------------------------
4753
             */
4754
            /*      Copy over this pixel of data. */
4755
            /* --------------------------------------------------------------------
4756
             */
4757
0
            iSrcOffset = (static_cast<GPtrDiff_t>(iSrcX) -
4758
0
                          static_cast<GPtrDiff_t>(nLBlockX) * nBlockXSize +
4759
0
                          (static_cast<GPtrDiff_t>(iSrcY) -
4760
0
                           static_cast<GPtrDiff_t>(nLBlockY) * nBlockYSize) *
4761
0
                              nBlockXSize) *
4762
0
                         nBandDataSize;
4763
4764
0
            for (int iBand = 0; iBand < nBandCount; iBand++)
4765
0
            {
4766
0
                GByte *pabySrcBlock = papabySrcBlock[iBand];
4767
0
                GPtrDiff_t iBandBufOffset =
4768
0
                    iBufOffset + static_cast<GPtrDiff_t>(iBand) *
4769
0
                                     static_cast<GPtrDiff_t>(nBandSpace);
4770
4771
0
                if (eDataType == eBufType)
4772
0
                {
4773
0
                    if (eRWFlag == GF_Read)
4774
0
                        memcpy(static_cast<GByte *>(pData) + iBandBufOffset,
4775
0
                               pabySrcBlock + iSrcOffset, nBandDataSize);
4776
0
                    else
4777
0
                        memcpy(pabySrcBlock + iSrcOffset,
4778
0
                               static_cast<const GByte *>(pData) +
4779
0
                                   iBandBufOffset,
4780
0
                               nBandDataSize);
4781
0
                }
4782
0
                else
4783
0
                {
4784
                    /* type to type conversion ... ouch, this is expensive way
4785
                       of handling single words */
4786
4787
0
                    if (eRWFlag == GF_Read)
4788
0
                        GDALCopyWords64(pabySrcBlock + iSrcOffset, eDataType, 0,
4789
0
                                        static_cast<GByte *>(pData) +
4790
0
                                            iBandBufOffset,
4791
0
                                        eBufType, 0, 1);
4792
0
                    else
4793
0
                        GDALCopyWords64(static_cast<const GByte *>(pData) +
4794
0
                                            iBandBufOffset,
4795
0
                                        eBufType, 0, pabySrcBlock + iSrcOffset,
4796
0
                                        eDataType, 0, 1);
4797
0
                }
4798
0
            }
4799
4800
0
            iBufOffset += static_cast<int>(nPixelSpace);
4801
0
        }
4802
0
    }
4803
4804
    /* -------------------------------------------------------------------- */
4805
    /*      CleanupAndReturn.                                               */
4806
    /* -------------------------------------------------------------------- */
4807
0
CleanupAndReturn:
4808
0
    CPLFree(papabySrcBlock);
4809
0
    if (papoBlocks != nullptr)
4810
0
    {
4811
0
        for (int iBand = 0; iBand < nBandCount; iBand++)
4812
0
        {
4813
0
            if (papoBlocks[iBand] != nullptr)
4814
0
                papoBlocks[iBand]->DropLock();
4815
0
        }
4816
0
        CPLFree(papoBlocks);
4817
0
    }
4818
4819
0
    return eErr;
4820
0
}
4821
4822
//! @endcond
4823
4824
/************************************************************************/
4825
/*                  GDALCopyWholeRasterGetSwathSize()                   */
4826
/************************************************************************/
4827
4828
static void GDALCopyWholeRasterGetSwathSize(GDALRasterBand *poSrcPrototypeBand,
4829
                                            GDALRasterBand *poDstPrototypeBand,
4830
                                            int nBandCount,
4831
                                            int bDstIsCompressed,
4832
                                            int bInterleave, int *pnSwathCols,
4833
                                            int *pnSwathLines)
4834
0
{
4835
0
    GDALDataType eDT = poDstPrototypeBand->GetRasterDataType();
4836
0
    int nSrcBlockXSize = 0;
4837
0
    int nSrcBlockYSize = 0;
4838
0
    int nBlockXSize = 0;
4839
0
    int nBlockYSize = 0;
4840
4841
0
    int nXSize = poSrcPrototypeBand->GetXSize();
4842
0
    int nYSize = poSrcPrototypeBand->GetYSize();
4843
4844
0
    poSrcPrototypeBand->GetBlockSize(&nSrcBlockXSize, &nSrcBlockYSize);
4845
0
    poDstPrototypeBand->GetBlockSize(&nBlockXSize, &nBlockYSize);
4846
4847
0
    const int nMaxBlockXSize = std::max(nBlockXSize, nSrcBlockXSize);
4848
0
    const int nMaxBlockYSize = std::max(nBlockYSize, nSrcBlockYSize);
4849
4850
0
    int nPixelSize = GDALGetDataTypeSizeBytes(eDT);
4851
0
    if (bInterleave)
4852
0
        nPixelSize *= nBandCount;
4853
4854
    // aim for one row of blocks.  Do not settle for less.
4855
0
    int nSwathCols = nXSize;
4856
0
    int nSwathLines = nMaxBlockYSize;
4857
4858
0
    const char *pszSrcCompression =
4859
0
        poSrcPrototypeBand->GetMetadataItem("COMPRESSION", "IMAGE_STRUCTURE");
4860
0
    if (pszSrcCompression == nullptr)
4861
0
    {
4862
0
        auto poSrcDS = poSrcPrototypeBand->GetDataset();
4863
0
        if (poSrcDS)
4864
0
            pszSrcCompression =
4865
0
                poSrcDS->GetMetadataItem("COMPRESSION", "IMAGE_STRUCTURE");
4866
0
    }
4867
4868
    /* -------------------------------------------------------------------- */
4869
    /*      What will our swath size be?                                    */
4870
    /* -------------------------------------------------------------------- */
4871
    // When writing interleaved data in a compressed format, we want to be sure
4872
    // that each block will only be written once, so the swath size must not be
4873
    // greater than the block cache.
4874
0
    const char *pszSwathSize = CPLGetConfigOption("GDAL_SWATH_SIZE", nullptr);
4875
0
    int nTargetSwathSize;
4876
0
    if (pszSwathSize != nullptr)
4877
0
        nTargetSwathSize = static_cast<int>(
4878
0
            std::min(GIntBig(INT_MAX), CPLAtoGIntBig(pszSwathSize)));
4879
0
    else
4880
0
    {
4881
        // As a default, take one 1/4 of the cache size.
4882
0
        nTargetSwathSize = static_cast<int>(
4883
0
            std::min(GIntBig(INT_MAX), GDALGetCacheMax64() / 4));
4884
4885
        // but if the minimum idal swath buf size is less, then go for it to
4886
        // avoid unnecessarily abusing RAM usage.
4887
        // but try to use 10 MB at least.
4888
0
        GIntBig nIdealSwathBufSize =
4889
0
            static_cast<GIntBig>(nSwathCols) * nSwathLines * nPixelSize;
4890
0
        int nMinTargetSwathSize = 10 * 1000 * 1000;
4891
4892
0
        if ((poSrcPrototypeBand->GetSuggestedBlockAccessPattern() &
4893
0
             GSBAP_LARGEST_CHUNK_POSSIBLE) != 0)
4894
0
        {
4895
0
            nMinTargetSwathSize = nTargetSwathSize;
4896
0
        }
4897
4898
0
        if (nIdealSwathBufSize < nTargetSwathSize &&
4899
0
            nIdealSwathBufSize < nMinTargetSwathSize)
4900
0
        {
4901
0
            nIdealSwathBufSize = nMinTargetSwathSize;
4902
0
        }
4903
4904
0
        if (pszSrcCompression != nullptr &&
4905
0
            EQUAL(pszSrcCompression, "JPEG2000") &&
4906
0
            (!bDstIsCompressed || ((nSrcBlockXSize % nBlockXSize) == 0 &&
4907
0
                                   (nSrcBlockYSize % nBlockYSize) == 0)))
4908
0
        {
4909
0
            nIdealSwathBufSize =
4910
0
                std::max(nIdealSwathBufSize, static_cast<GIntBig>(nSwathCols) *
4911
0
                                                 nSrcBlockYSize * nPixelSize);
4912
0
        }
4913
0
        if (nTargetSwathSize > nIdealSwathBufSize)
4914
0
            nTargetSwathSize = static_cast<int>(
4915
0
                std::min(GIntBig(INT_MAX), nIdealSwathBufSize));
4916
0
    }
4917
4918
0
    if (nTargetSwathSize < 1000000)
4919
0
        nTargetSwathSize = 1000000;
4920
4921
    /* But let's check that  */
4922
0
    if (bDstIsCompressed && bInterleave &&
4923
0
        nTargetSwathSize > GDALGetCacheMax64())
4924
0
    {
4925
0
        CPLError(CE_Warning, CPLE_AppDefined,
4926
0
                 "When translating into a compressed interleave format, "
4927
0
                 "the block cache size (" CPL_FRMT_GIB ") "
4928
0
                 "should be at least the size of the swath (%d) "
4929
0
                 "(GDAL_SWATH_SIZE config. option)",
4930
0
                 GDALGetCacheMax64(), nTargetSwathSize);
4931
0
    }
4932
4933
0
#define IS_DIVIDER_OF(x, y) ((y) % (x) == 0)
4934
0
#define ROUND_TO(x, y) (((x) / (y)) * (y))
4935
4936
    // if both input and output datasets are tiled, that the tile dimensions
4937
    // are "compatible", try to stick  to a swath dimension that is a multiple
4938
    // of input and output block dimensions.
4939
0
    if (nBlockXSize != nXSize && nSrcBlockXSize != nXSize &&
4940
0
        IS_DIVIDER_OF(nBlockXSize, nMaxBlockXSize) &&
4941
0
        IS_DIVIDER_OF(nSrcBlockXSize, nMaxBlockXSize) &&
4942
0
        IS_DIVIDER_OF(nBlockYSize, nMaxBlockYSize) &&
4943
0
        IS_DIVIDER_OF(nSrcBlockYSize, nMaxBlockYSize))
4944
0
    {
4945
0
        if (static_cast<GIntBig>(nMaxBlockXSize) * nMaxBlockYSize *
4946
0
                nPixelSize <=
4947
0
            static_cast<GIntBig>(nTargetSwathSize))
4948
0
        {
4949
0
            nSwathCols = nTargetSwathSize / (nMaxBlockYSize * nPixelSize);
4950
0
            nSwathCols = ROUND_TO(nSwathCols, nMaxBlockXSize);
4951
0
            if (nSwathCols == 0)
4952
0
                nSwathCols = nMaxBlockXSize;
4953
0
            if (nSwathCols > nXSize)
4954
0
                nSwathCols = nXSize;
4955
0
            nSwathLines = nMaxBlockYSize;
4956
4957
0
            if (static_cast<GIntBig>(nSwathCols) * nSwathLines * nPixelSize >
4958
0
                static_cast<GIntBig>(nTargetSwathSize))
4959
0
            {
4960
0
                nSwathCols = nXSize;
4961
0
                nSwathLines = nBlockYSize;
4962
0
            }
4963
0
        }
4964
0
    }
4965
4966
0
    const GIntBig nMemoryPerCol = static_cast<GIntBig>(nSwathCols) * nPixelSize;
4967
0
    const GIntBig nSwathBufSize = nMemoryPerCol * nSwathLines;
4968
0
    if (nSwathBufSize > static_cast<GIntBig>(nTargetSwathSize))
4969
0
    {
4970
0
        nSwathLines = static_cast<int>(nTargetSwathSize / nMemoryPerCol);
4971
0
        if (nSwathLines == 0)
4972
0
            nSwathLines = 1;
4973
4974
0
        CPLDebug(
4975
0
            "GDAL",
4976
0
            "GDALCopyWholeRasterGetSwathSize(): adjusting to %d line swath "
4977
0
            "since requirement (" CPL_FRMT_GIB " bytes) exceed target swath "
4978
0
            "size (%d bytes) (GDAL_SWATH_SIZE config. option)",
4979
0
            nSwathLines, nBlockYSize * nMemoryPerCol, nTargetSwathSize);
4980
0
    }
4981
    // If we are processing single scans, try to handle several at once.
4982
    // If we are handling swaths already, only grow the swath if a row
4983
    // of blocks is substantially less than our target buffer size.
4984
0
    else if (nSwathLines == 1 ||
4985
0
             nMemoryPerCol * nSwathLines <
4986
0
                 static_cast<GIntBig>(nTargetSwathSize) / 10)
4987
0
    {
4988
0
        nSwathLines = std::min(
4989
0
            nYSize,
4990
0
            std::max(1, static_cast<int>(nTargetSwathSize / nMemoryPerCol)));
4991
4992
        /* If possible try to align to source and target block height */
4993
0
        if ((nSwathLines % nMaxBlockYSize) != 0 &&
4994
0
            nSwathLines > nMaxBlockYSize &&
4995
0
            IS_DIVIDER_OF(nBlockYSize, nMaxBlockYSize) &&
4996
0
            IS_DIVIDER_OF(nSrcBlockYSize, nMaxBlockYSize))
4997
0
            nSwathLines = ROUND_TO(nSwathLines, nMaxBlockYSize);
4998
0
    }
4999
5000
0
    if (pszSrcCompression != nullptr && EQUAL(pszSrcCompression, "JPEG2000") &&
5001
0
        (!bDstIsCompressed || (IS_DIVIDER_OF(nBlockXSize, nSrcBlockXSize) &&
5002
0
                               IS_DIVIDER_OF(nBlockYSize, nSrcBlockYSize))))
5003
0
    {
5004
        // Typical use case: converting from Pleaiades that is 2048x2048 tiled.
5005
0
        if (nSwathLines < nSrcBlockYSize)
5006
0
        {
5007
0
            nSwathLines = nSrcBlockYSize;
5008
5009
            // Number of pixels that can be read/write simultaneously.
5010
0
            nSwathCols = nTargetSwathSize / (nSrcBlockXSize * nPixelSize);
5011
0
            nSwathCols = ROUND_TO(nSwathCols, nSrcBlockXSize);
5012
0
            if (nSwathCols == 0)
5013
0
                nSwathCols = nSrcBlockXSize;
5014
0
            if (nSwathCols > nXSize)
5015
0
                nSwathCols = nXSize;
5016
5017
0
            CPLDebug(
5018
0
                "GDAL",
5019
0
                "GDALCopyWholeRasterGetSwathSize(): because of compression and "
5020
0
                "too high block, "
5021
0
                "use partial width at one time");
5022
0
        }
5023
0
        else if ((nSwathLines % nSrcBlockYSize) != 0)
5024
0
        {
5025
            /* Round on a multiple of nSrcBlockYSize */
5026
0
            nSwathLines = ROUND_TO(nSwathLines, nSrcBlockYSize);
5027
0
            CPLDebug(
5028
0
                "GDAL",
5029
0
                "GDALCopyWholeRasterGetSwathSize(): because of compression, "
5030
0
                "round nSwathLines to block height : %d",
5031
0
                nSwathLines);
5032
0
        }
5033
0
    }
5034
0
    else if (bDstIsCompressed)
5035
0
    {
5036
0
        if (nSwathLines < nBlockYSize)
5037
0
        {
5038
0
            nSwathLines = nBlockYSize;
5039
5040
            // Number of pixels that can be read/write simultaneously.
5041
0
            nSwathCols = nTargetSwathSize / (nSwathLines * nPixelSize);
5042
0
            nSwathCols = ROUND_TO(nSwathCols, nBlockXSize);
5043
0
            if (nSwathCols == 0)
5044
0
                nSwathCols = nBlockXSize;
5045
0
            if (nSwathCols > nXSize)
5046
0
                nSwathCols = nXSize;
5047
5048
0
            CPLDebug(
5049
0
                "GDAL",
5050
0
                "GDALCopyWholeRasterGetSwathSize(): because of compression and "
5051
0
                "too high block, "
5052
0
                "use partial width at one time");
5053
0
        }
5054
0
        else if ((nSwathLines % nBlockYSize) != 0)
5055
0
        {
5056
            // Round on a multiple of nBlockYSize.
5057
0
            nSwathLines = ROUND_TO(nSwathLines, nBlockYSize);
5058
0
            CPLDebug(
5059
0
                "GDAL",
5060
0
                "GDALCopyWholeRasterGetSwathSize(): because of compression, "
5061
0
                "round nSwathLines to block height : %d",
5062
0
                nSwathLines);
5063
0
        }
5064
0
    }
5065
5066
0
    *pnSwathCols = nSwathCols;
5067
0
    *pnSwathLines = nSwathLines;
5068
0
}
5069
5070
/************************************************************************/
5071
/*                     GDALDatasetCopyWholeRaster()                     */
5072
/************************************************************************/
5073
5074
/**
5075
 * \brief Copy all dataset raster data.
5076
 *
5077
 * This function copies the complete raster contents of one dataset to
5078
 * another similarly configured dataset.  The source and destination
5079
 * dataset must have the same number of bands, and the same width
5080
 * and height.  The bands do not have to have the same data type.
5081
 *
5082
 * This function is primarily intended to support implementation of
5083
 * driver specific CreateCopy() functions.  It implements efficient copying,
5084
 * in particular "chunking" the copy in substantial blocks and, if appropriate,
5085
 * performing the transfer in a pixel interleaved fashion.
5086
 *
5087
 * Currently the only papszOptions value supported are :
5088
 * <ul>
5089
 * <li>"INTERLEAVE=PIXEL/BAND" to force pixel (resp. band) interleaved read and
5090
 * write access pattern (this does not modify the layout of the destination
5091
 * data)</li> <li>"COMPRESSED=YES" to force alignment on target dataset block
5092
 * sizes to achieve best compression.</li> <li>"SKIP_HOLES=YES" to skip chunks
5093
 * for which GDALGetDataCoverageStatus() returns GDAL_DATA_COVERAGE_STATUS_EMPTY
5094
 * (GDAL &gt;= 2.2)</li>
5095
 * </ul>
5096
 * More options may be supported in the future.
5097
 *
5098
 * @param hSrcDS the source dataset
5099
 * @param hDstDS the destination dataset
5100
 * @param papszOptions transfer hints in "StringList" Name=Value format.
5101
 * @param pfnProgress progress reporting function.
5102
 * @param pProgressData callback data for progress function.
5103
 *
5104
 * @return CE_None on success, or CE_Failure on failure.
5105
 */
5106
5107
CPLErr CPL_STDCALL GDALDatasetCopyWholeRaster(GDALDatasetH hSrcDS,
5108
                                              GDALDatasetH hDstDS,
5109
                                              CSLConstList papszOptions,
5110
                                              GDALProgressFunc pfnProgress,
5111
                                              void *pProgressData)
5112
5113
0
{
5114
0
    VALIDATE_POINTER1(hSrcDS, "GDALDatasetCopyWholeRaster", CE_Failure);
5115
0
    VALIDATE_POINTER1(hDstDS, "GDALDatasetCopyWholeRaster", CE_Failure);
5116
5117
0
    GDALDataset *poSrcDS = GDALDataset::FromHandle(hSrcDS);
5118
0
    GDALDataset *poDstDS = GDALDataset::FromHandle(hDstDS);
5119
5120
0
    if (pfnProgress == nullptr)
5121
0
        pfnProgress = GDALDummyProgress;
5122
5123
    /* -------------------------------------------------------------------- */
5124
    /*      Confirm the datasets match in size and band counts.             */
5125
    /* -------------------------------------------------------------------- */
5126
0
    const int nXSize = poDstDS->GetRasterXSize();
5127
0
    const int nYSize = poDstDS->GetRasterYSize();
5128
0
    const int nBandCount = poDstDS->GetRasterCount();
5129
5130
0
    if (poSrcDS->GetRasterXSize() != nXSize ||
5131
0
        poSrcDS->GetRasterYSize() != nYSize ||
5132
0
        poSrcDS->GetRasterCount() != nBandCount)
5133
0
    {
5134
0
        CPLError(CE_Failure, CPLE_AppDefined,
5135
0
                 "Input and output dataset sizes or band counts do not\n"
5136
0
                 "match in GDALDatasetCopyWholeRaster()");
5137
0
        return CE_Failure;
5138
0
    }
5139
5140
    /* -------------------------------------------------------------------- */
5141
    /*      Report preliminary (0) progress.                                */
5142
    /* -------------------------------------------------------------------- */
5143
0
    if (!pfnProgress(0.0, nullptr, pProgressData))
5144
0
    {
5145
0
        CPLError(CE_Failure, CPLE_UserInterrupt,
5146
0
                 "User terminated CreateCopy()");
5147
0
        return CE_Failure;
5148
0
    }
5149
5150
    /* -------------------------------------------------------------------- */
5151
    /*      Get our prototype band, and assume the others are similarly     */
5152
    /*      configured.                                                     */
5153
    /* -------------------------------------------------------------------- */
5154
0
    if (nBandCount == 0)
5155
0
        return CE_None;
5156
5157
0
    GDALRasterBand *poSrcPrototypeBand = poSrcDS->GetRasterBand(1);
5158
0
    GDALRasterBand *poDstPrototypeBand = poDstDS->GetRasterBand(1);
5159
0
    GDALDataType eDT = poDstPrototypeBand->GetRasterDataType();
5160
5161
    /* -------------------------------------------------------------------- */
5162
    /*      Do we want to try and do the operation in a pixel               */
5163
    /*      interleaved fashion?                                            */
5164
    /* -------------------------------------------------------------------- */
5165
0
    bool bInterleave = false;
5166
0
    const char *pszInterleave =
5167
0
        poSrcDS->GetMetadataItem("INTERLEAVE", "IMAGE_STRUCTURE");
5168
0
    if (pszInterleave != nullptr &&
5169
0
        (EQUAL(pszInterleave, "PIXEL") || EQUAL(pszInterleave, "LINE")))
5170
0
        bInterleave = true;
5171
5172
0
    pszInterleave = poDstDS->GetMetadataItem("INTERLEAVE", "IMAGE_STRUCTURE");
5173
0
    if (pszInterleave != nullptr &&
5174
0
        (EQUAL(pszInterleave, "PIXEL") || EQUAL(pszInterleave, "LINE")))
5175
0
        bInterleave = true;
5176
5177
0
    pszInterleave = CSLFetchNameValue(papszOptions, "INTERLEAVE");
5178
0
    if (pszInterleave != nullptr && EQUAL(pszInterleave, "PIXEL"))
5179
0
        bInterleave = true;
5180
0
    else if (pszInterleave != nullptr && EQUAL(pszInterleave, "BAND"))
5181
0
        bInterleave = false;
5182
    // attributes is specific to the TileDB driver
5183
0
    else if (pszInterleave != nullptr && EQUAL(pszInterleave, "ATTRIBUTES"))
5184
0
        bInterleave = true;
5185
0
    else if (pszInterleave != nullptr)
5186
0
    {
5187
0
        CPLError(CE_Warning, CPLE_NotSupported,
5188
0
                 "Unsupported value for option INTERLEAVE");
5189
0
    }
5190
5191
    // If the destination is compressed, we must try to write blocks just once,
5192
    // to save disk space (GTiff case for example), and to avoid data loss
5193
    // (JPEG compression for example).
5194
0
    bool bDstIsCompressed = false;
5195
0
    const char *pszDstCompressed =
5196
0
        CSLFetchNameValue(papszOptions, "COMPRESSED");
5197
0
    if (pszDstCompressed != nullptr && CPLTestBool(pszDstCompressed))
5198
0
        bDstIsCompressed = true;
5199
5200
    /* -------------------------------------------------------------------- */
5201
    /*      What will our swath size be?                                    */
5202
    /* -------------------------------------------------------------------- */
5203
5204
0
    int nSwathCols = 0;
5205
0
    int nSwathLines = 0;
5206
0
    GDALCopyWholeRasterGetSwathSize(poSrcPrototypeBand, poDstPrototypeBand,
5207
0
                                    nBandCount, bDstIsCompressed, bInterleave,
5208
0
                                    &nSwathCols, &nSwathLines);
5209
5210
0
    int nPixelSize = GDALGetDataTypeSizeBytes(eDT);
5211
0
    if (bInterleave)
5212
0
        nPixelSize *= nBandCount;
5213
5214
0
    void *pSwathBuf = VSI_MALLOC3_VERBOSE(nSwathCols, nSwathLines, nPixelSize);
5215
0
    if (pSwathBuf == nullptr)
5216
0
    {
5217
0
        return CE_Failure;
5218
0
    }
5219
5220
0
    CPLDebug("GDAL",
5221
0
             "GDALDatasetCopyWholeRaster(): %d*%d swaths, bInterleave=%d",
5222
0
             nSwathCols, nSwathLines, static_cast<int>(bInterleave));
5223
5224
    // Advise the source raster that we are going to read it completely
5225
    // Note: this might already have been done by GDALCreateCopy() in the
5226
    // likely case this function is indirectly called by it
5227
0
    poSrcDS->AdviseRead(0, 0, nXSize, nYSize, nXSize, nYSize, eDT, nBandCount,
5228
0
                        nullptr, nullptr);
5229
5230
    /* ==================================================================== */
5231
    /*      Band oriented (uninterleaved) case.                             */
5232
    /* ==================================================================== */
5233
0
    CPLErr eErr = CE_None;
5234
0
    const bool bCheckHoles =
5235
0
        CPLTestBool(CSLFetchNameValueDef(papszOptions, "SKIP_HOLES", "NO"));
5236
5237
0
    if (!bInterleave)
5238
0
    {
5239
0
        GDALRasterIOExtraArg sExtraArg;
5240
0
        INIT_RASTERIO_EXTRA_ARG(sExtraArg);
5241
0
        CPL_IGNORE_RET_VAL(sExtraArg.pfnProgress);  // to make cppcheck happy
5242
5243
0
        const GIntBig nTotalBlocks = static_cast<GIntBig>(nBandCount) *
5244
0
                                     DIV_ROUND_UP(nYSize, nSwathLines) *
5245
0
                                     DIV_ROUND_UP(nXSize, nSwathCols);
5246
0
        GIntBig nBlocksDone = 0;
5247
5248
0
        for (int iBand = 0; iBand < nBandCount && eErr == CE_None; iBand++)
5249
0
        {
5250
0
            int nBand = iBand + 1;
5251
5252
0
            for (int iY = 0; iY < nYSize && eErr == CE_None; iY += nSwathLines)
5253
0
            {
5254
0
                int nThisLines = nSwathLines;
5255
5256
0
                if (iY + nThisLines > nYSize)
5257
0
                    nThisLines = nYSize - iY;
5258
5259
0
                for (int iX = 0; iX < nXSize && eErr == CE_None;
5260
0
                     iX += nSwathCols)
5261
0
                {
5262
0
                    int nThisCols = nSwathCols;
5263
5264
0
                    if (iX + nThisCols > nXSize)
5265
0
                        nThisCols = nXSize - iX;
5266
5267
0
                    int nStatus = GDAL_DATA_COVERAGE_STATUS_DATA;
5268
0
                    if (bCheckHoles)
5269
0
                    {
5270
0
                        nStatus = poSrcDS->GetRasterBand(nBand)
5271
0
                                      ->GetDataCoverageStatus(
5272
0
                                          iX, iY, nThisCols, nThisLines,
5273
0
                                          GDAL_DATA_COVERAGE_STATUS_DATA);
5274
0
                    }
5275
0
                    if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA)
5276
0
                    {
5277
0
                        sExtraArg.pfnProgress = GDALScaledProgress;
5278
0
                        sExtraArg.pProgressData = GDALCreateScaledProgress(
5279
0
                            nBlocksDone / static_cast<double>(nTotalBlocks),
5280
0
                            (nBlocksDone + 0.5) /
5281
0
                                static_cast<double>(nTotalBlocks),
5282
0
                            pfnProgress, pProgressData);
5283
0
                        if (sExtraArg.pProgressData == nullptr)
5284
0
                            sExtraArg.pfnProgress = nullptr;
5285
5286
0
                        eErr = poSrcDS->RasterIO(GF_Read, iX, iY, nThisCols,
5287
0
                                                 nThisLines, pSwathBuf,
5288
0
                                                 nThisCols, nThisLines, eDT, 1,
5289
0
                                                 &nBand, 0, 0, 0, &sExtraArg);
5290
5291
0
                        GDALDestroyScaledProgress(sExtraArg.pProgressData);
5292
5293
0
                        if (eErr == CE_None)
5294
0
                            eErr = poDstDS->RasterIO(
5295
0
                                GF_Write, iX, iY, nThisCols, nThisLines,
5296
0
                                pSwathBuf, nThisCols, nThisLines, eDT, 1,
5297
0
                                &nBand, 0, 0, 0, nullptr);
5298
0
                    }
5299
5300
0
                    nBlocksDone++;
5301
0
                    if (eErr == CE_None &&
5302
0
                        !pfnProgress(nBlocksDone /
5303
0
                                         static_cast<double>(nTotalBlocks),
5304
0
                                     nullptr, pProgressData))
5305
0
                    {
5306
0
                        eErr = CE_Failure;
5307
0
                        CPLError(CE_Failure, CPLE_UserInterrupt,
5308
0
                                 "User terminated CreateCopy()");
5309
0
                    }
5310
0
                }
5311
0
            }
5312
0
        }
5313
0
    }
5314
5315
    /* ==================================================================== */
5316
    /*      Pixel interleaved case.                                         */
5317
    /* ==================================================================== */
5318
0
    else /* if( bInterleave ) */
5319
0
    {
5320
0
        GDALRasterIOExtraArg sExtraArg;
5321
0
        INIT_RASTERIO_EXTRA_ARG(sExtraArg);
5322
0
        CPL_IGNORE_RET_VAL(sExtraArg.pfnProgress);  // to make cppcheck happy
5323
5324
0
        const GIntBig nTotalBlocks =
5325
0
            static_cast<GIntBig>(DIV_ROUND_UP(nYSize, nSwathLines)) *
5326
0
            DIV_ROUND_UP(nXSize, nSwathCols);
5327
0
        GIntBig nBlocksDone = 0;
5328
5329
0
        for (int iY = 0; iY < nYSize && eErr == CE_None; iY += nSwathLines)
5330
0
        {
5331
0
            int nThisLines = nSwathLines;
5332
5333
0
            if (iY + nThisLines > nYSize)
5334
0
                nThisLines = nYSize - iY;
5335
5336
0
            for (int iX = 0; iX < nXSize && eErr == CE_None; iX += nSwathCols)
5337
0
            {
5338
0
                int nThisCols = nSwathCols;
5339
5340
0
                if (iX + nThisCols > nXSize)
5341
0
                    nThisCols = nXSize - iX;
5342
5343
0
                int nStatus = GDAL_DATA_COVERAGE_STATUS_DATA;
5344
0
                if (bCheckHoles)
5345
0
                {
5346
0
                    nStatus = 0;
5347
0
                    for (int iBand = 0; iBand < nBandCount; iBand++)
5348
0
                    {
5349
0
                        nStatus |= poSrcDS->GetRasterBand(iBand + 1)
5350
0
                                       ->GetDataCoverageStatus(
5351
0
                                           iX, iY, nThisCols, nThisLines,
5352
0
                                           GDAL_DATA_COVERAGE_STATUS_DATA);
5353
0
                        if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA)
5354
0
                            break;
5355
0
                    }
5356
0
                }
5357
0
                if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA)
5358
0
                {
5359
0
                    sExtraArg.pfnProgress = GDALScaledProgress;
5360
0
                    sExtraArg.pProgressData = GDALCreateScaledProgress(
5361
0
                        nBlocksDone / static_cast<double>(nTotalBlocks),
5362
0
                        (nBlocksDone + 0.5) / static_cast<double>(nTotalBlocks),
5363
0
                        pfnProgress, pProgressData);
5364
0
                    if (sExtraArg.pProgressData == nullptr)
5365
0
                        sExtraArg.pfnProgress = nullptr;
5366
5367
0
                    eErr = poSrcDS->RasterIO(GF_Read, iX, iY, nThisCols,
5368
0
                                             nThisLines, pSwathBuf, nThisCols,
5369
0
                                             nThisLines, eDT, nBandCount,
5370
0
                                             nullptr, 0, 0, 0, &sExtraArg);
5371
5372
0
                    GDALDestroyScaledProgress(sExtraArg.pProgressData);
5373
5374
0
                    if (eErr == CE_None)
5375
0
                        eErr = poDstDS->RasterIO(
5376
0
                            GF_Write, iX, iY, nThisCols, nThisLines, pSwathBuf,
5377
0
                            nThisCols, nThisLines, eDT, nBandCount, nullptr, 0,
5378
0
                            0, 0, nullptr);
5379
0
                }
5380
5381
0
                nBlocksDone++;
5382
0
                if (eErr == CE_None &&
5383
0
                    !pfnProgress(nBlocksDone /
5384
0
                                     static_cast<double>(nTotalBlocks),
5385
0
                                 nullptr, pProgressData))
5386
0
                {
5387
0
                    eErr = CE_Failure;
5388
0
                    CPLError(CE_Failure, CPLE_UserInterrupt,
5389
0
                             "User terminated CreateCopy()");
5390
0
                }
5391
0
            }
5392
0
        }
5393
0
    }
5394
5395
    /* -------------------------------------------------------------------- */
5396
    /*      Cleanup                                                         */
5397
    /* -------------------------------------------------------------------- */
5398
0
    CPLFree(pSwathBuf);
5399
5400
0
    return eErr;
5401
0
}
5402
5403
/************************************************************************/
5404
/*                     GDALRasterBandCopyWholeRaster()                  */
5405
/************************************************************************/
5406
5407
/**
5408
 * \brief Copy a whole raster band
5409
 *
5410
 * This function copies the complete raster contents of one band to
5411
 * another similarly configured band.  The source and destination
5412
 * bands must have the same width and height.  The bands do not have
5413
 * to have the same data type.
5414
 *
5415
 * It implements efficient copying, in particular "chunking" the copy in
5416
 * substantial blocks.
5417
 *
5418
 * Currently the only papszOptions value supported are :
5419
 * <ul>
5420
 * <li>"COMPRESSED=YES" to force alignment on target dataset block sizes to
5421
 * achieve best compression.</li>
5422
 * <li>"SKIP_HOLES=YES" to skip chunks for which GDALGetDataCoverageStatus()
5423
 * returns GDAL_DATA_COVERAGE_STATUS_EMPTY (GDAL &gt;= 2.2)</li>
5424
 * </ul>
5425
 *
5426
 * @param hSrcBand the source band
5427
 * @param hDstBand the destination band
5428
 * @param papszOptions transfer hints in "StringList" Name=Value format.
5429
 * @param pfnProgress progress reporting function.
5430
 * @param pProgressData callback data for progress function.
5431
 *
5432
 * @return CE_None on success, or CE_Failure on failure.
5433
 */
5434
5435
CPLErr CPL_STDCALL GDALRasterBandCopyWholeRaster(
5436
    GDALRasterBandH hSrcBand, GDALRasterBandH hDstBand,
5437
    const char *const *const papszOptions, GDALProgressFunc pfnProgress,
5438
    void *pProgressData)
5439
5440
0
{
5441
0
    VALIDATE_POINTER1(hSrcBand, "GDALRasterBandCopyWholeRaster", CE_Failure);
5442
0
    VALIDATE_POINTER1(hDstBand, "GDALRasterBandCopyWholeRaster", CE_Failure);
5443
5444
0
    GDALRasterBand *poSrcBand = GDALRasterBand::FromHandle(hSrcBand);
5445
0
    GDALRasterBand *poDstBand = GDALRasterBand::FromHandle(hDstBand);
5446
0
    CPLErr eErr = CE_None;
5447
5448
0
    if (pfnProgress == nullptr)
5449
0
        pfnProgress = GDALDummyProgress;
5450
5451
    /* -------------------------------------------------------------------- */
5452
    /*      Confirm the datasets match in size and band counts.             */
5453
    /* -------------------------------------------------------------------- */
5454
0
    int nXSize = poSrcBand->GetXSize();
5455
0
    int nYSize = poSrcBand->GetYSize();
5456
5457
0
    if (poDstBand->GetXSize() != nXSize || poDstBand->GetYSize() != nYSize)
5458
0
    {
5459
0
        CPLError(CE_Failure, CPLE_AppDefined,
5460
0
                 "Input and output band sizes do not\n"
5461
0
                 "match in GDALRasterBandCopyWholeRaster()");
5462
0
        return CE_Failure;
5463
0
    }
5464
5465
    /* -------------------------------------------------------------------- */
5466
    /*      Report preliminary (0) progress.                                */
5467
    /* -------------------------------------------------------------------- */
5468
0
    if (!pfnProgress(0.0, nullptr, pProgressData))
5469
0
    {
5470
0
        CPLError(CE_Failure, CPLE_UserInterrupt,
5471
0
                 "User terminated CreateCopy()");
5472
0
        return CE_Failure;
5473
0
    }
5474
5475
0
    GDALDataType eDT = poDstBand->GetRasterDataType();
5476
5477
    // If the destination is compressed, we must try to write blocks just once,
5478
    // to save disk space (GTiff case for example), and to avoid data loss
5479
    // (JPEG compression for example).
5480
0
    bool bDstIsCompressed = false;
5481
0
    const char *pszDstCompressed =
5482
0
        CSLFetchNameValue(const_cast<char **>(papszOptions), "COMPRESSED");
5483
0
    if (pszDstCompressed != nullptr && CPLTestBool(pszDstCompressed))
5484
0
        bDstIsCompressed = true;
5485
5486
    /* -------------------------------------------------------------------- */
5487
    /*      What will our swath size be?                                    */
5488
    /* -------------------------------------------------------------------- */
5489
5490
0
    int nSwathCols = 0;
5491
0
    int nSwathLines = 0;
5492
0
    GDALCopyWholeRasterGetSwathSize(poSrcBand, poDstBand, 1, bDstIsCompressed,
5493
0
                                    FALSE, &nSwathCols, &nSwathLines);
5494
5495
0
    const int nPixelSize = GDALGetDataTypeSizeBytes(eDT);
5496
5497
0
    void *pSwathBuf = VSI_MALLOC3_VERBOSE(nSwathCols, nSwathLines, nPixelSize);
5498
0
    if (pSwathBuf == nullptr)
5499
0
    {
5500
0
        return CE_Failure;
5501
0
    }
5502
5503
0
    CPLDebug("GDAL", "GDALRasterBandCopyWholeRaster(): %d*%d swaths",
5504
0
             nSwathCols, nSwathLines);
5505
5506
0
    const bool bCheckHoles =
5507
0
        CPLTestBool(CSLFetchNameValueDef(papszOptions, "SKIP_HOLES", "NO"));
5508
5509
    // Advise the source raster that we are going to read it completely
5510
0
    poSrcBand->AdviseRead(0, 0, nXSize, nYSize, nXSize, nYSize, eDT, nullptr);
5511
5512
    /* ==================================================================== */
5513
    /*      Band oriented (uninterleaved) case.                             */
5514
    /* ==================================================================== */
5515
5516
0
    for (int iY = 0; iY < nYSize && eErr == CE_None; iY += nSwathLines)
5517
0
    {
5518
0
        int nThisLines = nSwathLines;
5519
5520
0
        if (iY + nThisLines > nYSize)
5521
0
            nThisLines = nYSize - iY;
5522
5523
0
        for (int iX = 0; iX < nXSize && eErr == CE_None; iX += nSwathCols)
5524
0
        {
5525
0
            int nThisCols = nSwathCols;
5526
5527
0
            if (iX + nThisCols > nXSize)
5528
0
                nThisCols = nXSize - iX;
5529
5530
0
            int nStatus = GDAL_DATA_COVERAGE_STATUS_DATA;
5531
0
            if (bCheckHoles)
5532
0
            {
5533
0
                nStatus = poSrcBand->GetDataCoverageStatus(
5534
0
                    iX, iY, nThisCols, nThisLines,
5535
0
                    GDAL_DATA_COVERAGE_STATUS_DATA);
5536
0
            }
5537
0
            if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA)
5538
0
            {
5539
0
                eErr = poSrcBand->RasterIO(GF_Read, iX, iY, nThisCols,
5540
0
                                           nThisLines, pSwathBuf, nThisCols,
5541
0
                                           nThisLines, eDT, 0, 0, nullptr);
5542
5543
0
                if (eErr == CE_None)
5544
0
                    eErr = poDstBand->RasterIO(GF_Write, iX, iY, nThisCols,
5545
0
                                               nThisLines, pSwathBuf, nThisCols,
5546
0
                                               nThisLines, eDT, 0, 0, nullptr);
5547
0
            }
5548
5549
0
            if (eErr == CE_None &&
5550
0
                !pfnProgress((iY + nThisLines) / static_cast<float>(nYSize),
5551
0
                             nullptr, pProgressData))
5552
0
            {
5553
0
                eErr = CE_Failure;
5554
0
                CPLError(CE_Failure, CPLE_UserInterrupt,
5555
0
                         "User terminated CreateCopy()");
5556
0
            }
5557
0
        }
5558
0
    }
5559
5560
    /* -------------------------------------------------------------------- */
5561
    /*      Cleanup                                                         */
5562
    /* -------------------------------------------------------------------- */
5563
0
    CPLFree(pSwathBuf);
5564
5565
0
    return eErr;
5566
0
}
5567
5568
/************************************************************************/
5569
/*                      GDALCopyRasterIOExtraArg ()                     */
5570
/************************************************************************/
5571
5572
void GDALCopyRasterIOExtraArg(GDALRasterIOExtraArg *psDestArg,
5573
                              GDALRasterIOExtraArg *psSrcArg)
5574
0
{
5575
0
    INIT_RASTERIO_EXTRA_ARG(*psDestArg);
5576
0
    if (psSrcArg)
5577
0
    {
5578
0
        psDestArg->eResampleAlg = psSrcArg->eResampleAlg;
5579
0
        psDestArg->pfnProgress = psSrcArg->pfnProgress;
5580
0
        psDestArg->pProgressData = psSrcArg->pProgressData;
5581
0
        psDestArg->bFloatingPointWindowValidity =
5582
0
            psSrcArg->bFloatingPointWindowValidity;
5583
0
        if (psSrcArg->bFloatingPointWindowValidity)
5584
0
        {
5585
0
            psDestArg->dfXOff = psSrcArg->dfXOff;
5586
0
            psDestArg->dfYOff = psSrcArg->dfYOff;
5587
0
            psDestArg->dfXSize = psSrcArg->dfXSize;
5588
0
            psDestArg->dfYSize = psSrcArg->dfYSize;
5589
0
        }
5590
0
        if (psSrcArg->nVersion >= 2)
5591
0
        {
5592
0
            psDestArg->bUseOnlyThisScale = psSrcArg->bUseOnlyThisScale;
5593
0
        }
5594
0
    }
5595
0
}
5596
5597
/************************************************************************/
5598
/*                         HasOnlyNoData()                              */
5599
/************************************************************************/
5600
5601
template <class T> static inline bool IsEqualToNoData(T value, T noDataValue)
5602
0
{
5603
0
    return value == noDataValue;
5604
0
}
Unexecuted instantiation: rasterio.cpp:bool IsEqualToNoData<unsigned char>(unsigned char, unsigned char)
Unexecuted instantiation: rasterio.cpp:bool IsEqualToNoData<unsigned short>(unsigned short, unsigned short)
Unexecuted instantiation: rasterio.cpp:bool IsEqualToNoData<unsigned int>(unsigned int, unsigned int)
Unexecuted instantiation: rasterio.cpp:bool IsEqualToNoData<unsigned long>(unsigned long, unsigned long)
5605
5606
template <> bool IsEqualToNoData<GFloat16>(GFloat16 value, GFloat16 noDataValue)
5607
0
{
5608
0
    using std::isnan;
5609
0
    return isnan(noDataValue) ? isnan(value) : value == noDataValue;
5610
0
}
5611
5612
template <> bool IsEqualToNoData<float>(float value, float noDataValue)
5613
0
{
5614
0
    return std::isnan(noDataValue) ? std::isnan(value) : value == noDataValue;
5615
0
}
5616
5617
template <> bool IsEqualToNoData<double>(double value, double noDataValue)
5618
0
{
5619
0
    return std::isnan(noDataValue) ? std::isnan(value) : value == noDataValue;
5620
0
}
5621
5622
template <class T>
5623
static bool HasOnlyNoDataT(const T *pBuffer, T noDataValue, size_t nWidth,
5624
                           size_t nHeight, size_t nLineStride,
5625
                           size_t nComponents)
5626
0
{
5627
    // Fast test: check the 4 corners and the middle pixel.
5628
0
    for (size_t iBand = 0; iBand < nComponents; iBand++)
5629
0
    {
5630
0
        if (!(IsEqualToNoData(pBuffer[iBand], noDataValue) &&
5631
0
              IsEqualToNoData(pBuffer[(nWidth - 1) * nComponents + iBand],
5632
0
                              noDataValue) &&
5633
0
              IsEqualToNoData(
5634
0
                  pBuffer[((nHeight - 1) / 2 * nLineStride + (nWidth - 1) / 2) *
5635
0
                              nComponents +
5636
0
                          iBand],
5637
0
                  noDataValue) &&
5638
0
              IsEqualToNoData(
5639
0
                  pBuffer[(nHeight - 1) * nLineStride * nComponents + iBand],
5640
0
                  noDataValue) &&
5641
0
              IsEqualToNoData(
5642
0
                  pBuffer[((nHeight - 1) * nLineStride + nWidth - 1) *
5643
0
                              nComponents +
5644
0
                          iBand],
5645
0
                  noDataValue)))
5646
0
        {
5647
0
            return false;
5648
0
        }
5649
0
    }
5650
5651
    // Test all pixels.
5652
0
    for (size_t iY = 0; iY < nHeight; iY++)
5653
0
    {
5654
0
        const T *pBufferLine = pBuffer + iY * nLineStride * nComponents;
5655
0
        for (size_t iX = 0; iX < nWidth * nComponents; iX++)
5656
0
        {
5657
0
            if (!IsEqualToNoData(pBufferLine[iX], noDataValue))
5658
0
            {
5659
0
                return false;
5660
0
            }
5661
0
        }
5662
0
    }
5663
0
    return true;
5664
0
}
Unexecuted instantiation: rasterio.cpp:bool HasOnlyNoDataT<unsigned char>(unsigned char const*, unsigned char, unsigned long, unsigned long, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:bool HasOnlyNoDataT<unsigned short>(unsigned short const*, unsigned short, unsigned long, unsigned long, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:bool HasOnlyNoDataT<unsigned int>(unsigned int const*, unsigned int, unsigned long, unsigned long, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:bool HasOnlyNoDataT<unsigned long>(unsigned long const*, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:bool HasOnlyNoDataT<cpl::Float16>(cpl::Float16 const*, cpl::Float16, unsigned long, unsigned long, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:bool HasOnlyNoDataT<float>(float const*, float, unsigned long, unsigned long, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:bool HasOnlyNoDataT<double>(double const*, double, unsigned long, unsigned long, unsigned long, unsigned long)
5665
5666
/************************************************************************/
5667
/*                    GDALBufferHasOnlyNoData()                         */
5668
/************************************************************************/
5669
5670
bool GDALBufferHasOnlyNoData(const void *pBuffer, double dfNoDataValue,
5671
                             size_t nWidth, size_t nHeight, size_t nLineStride,
5672
                             size_t nComponents, int nBitsPerSample,
5673
                             GDALBufferSampleFormat nSampleFormat)
5674
0
{
5675
    // In the case where the nodata is 0, we can compare several bytes at
5676
    // once. Select the largest natural integer type for the architecture.
5677
0
#if SIZEOF_VOIDP >= 8 || defined(__x86_64__)
5678
    // We test __x86_64__ for x32 arch where SIZEOF_VOIDP == 4
5679
0
    typedef std::uint64_t WordType;
5680
#else
5681
    typedef std::uint32_t WordType;
5682
#endif
5683
0
    if (dfNoDataValue == 0.0 && nWidth == nLineStride &&
5684
        // Do not use this optimized code path for floating point numbers,
5685
        // as it can't detect negative zero.
5686
0
        nSampleFormat != GSF_FLOATING_POINT)
5687
0
    {
5688
0
        const GByte *pabyBuffer = static_cast<const GByte *>(pBuffer);
5689
0
        const size_t nSize =
5690
0
            (nWidth * nHeight * nComponents * nBitsPerSample + 7) / 8;
5691
0
        size_t i = 0;
5692
0
        const size_t nInitialIters =
5693
0
            std::min(sizeof(WordType) -
5694
0
                         static_cast<size_t>(
5695
0
                             reinterpret_cast<std::uintptr_t>(pabyBuffer) %
5696
0
                             sizeof(WordType)),
5697
0
                     nSize);
5698
0
        for (; i < nInitialIters; i++)
5699
0
        {
5700
0
            if (pabyBuffer[i])
5701
0
                return false;
5702
0
        }
5703
0
        for (; i + sizeof(WordType) - 1 < nSize; i += sizeof(WordType))
5704
0
        {
5705
0
            if (*(reinterpret_cast<const WordType *>(pabyBuffer + i)))
5706
0
                return false;
5707
0
        }
5708
0
        for (; i < nSize; i++)
5709
0
        {
5710
0
            if (pabyBuffer[i])
5711
0
                return false;
5712
0
        }
5713
0
        return true;
5714
0
    }
5715
5716
0
    if (nBitsPerSample == 8 && nSampleFormat == GSF_UNSIGNED_INT)
5717
0
    {
5718
0
        return GDALIsValueInRange<uint8_t>(dfNoDataValue) &&
5719
0
               HasOnlyNoDataT(static_cast<const uint8_t *>(pBuffer),
5720
0
                              static_cast<uint8_t>(dfNoDataValue), nWidth,
5721
0
                              nHeight, nLineStride, nComponents);
5722
0
    }
5723
0
    if (nBitsPerSample == 8 && nSampleFormat == GSF_SIGNED_INT)
5724
0
    {
5725
        // Use unsigned implementation by converting the nodatavalue to
5726
        // unsigned
5727
0
        return GDALIsValueInRange<int8_t>(dfNoDataValue) &&
5728
0
               HasOnlyNoDataT(
5729
0
                   static_cast<const uint8_t *>(pBuffer),
5730
0
                   static_cast<uint8_t>(static_cast<int8_t>(dfNoDataValue)),
5731
0
                   nWidth, nHeight, nLineStride, nComponents);
5732
0
    }
5733
0
    if (nBitsPerSample == 16 && nSampleFormat == GSF_UNSIGNED_INT)
5734
0
    {
5735
0
        return GDALIsValueInRange<uint16_t>(dfNoDataValue) &&
5736
0
               HasOnlyNoDataT(static_cast<const uint16_t *>(pBuffer),
5737
0
                              static_cast<uint16_t>(dfNoDataValue), nWidth,
5738
0
                              nHeight, nLineStride, nComponents);
5739
0
    }
5740
0
    if (nBitsPerSample == 16 && nSampleFormat == GSF_SIGNED_INT)
5741
0
    {
5742
        // Use unsigned implementation by converting the nodatavalue to
5743
        // unsigned
5744
0
        return GDALIsValueInRange<int16_t>(dfNoDataValue) &&
5745
0
               HasOnlyNoDataT(
5746
0
                   static_cast<const uint16_t *>(pBuffer),
5747
0
                   static_cast<uint16_t>(static_cast<int16_t>(dfNoDataValue)),
5748
0
                   nWidth, nHeight, nLineStride, nComponents);
5749
0
    }
5750
0
    if (nBitsPerSample == 32 && nSampleFormat == GSF_UNSIGNED_INT)
5751
0
    {
5752
0
        return GDALIsValueInRange<uint32_t>(dfNoDataValue) &&
5753
0
               HasOnlyNoDataT(static_cast<const uint32_t *>(pBuffer),
5754
0
                              static_cast<uint32_t>(dfNoDataValue), nWidth,
5755
0
                              nHeight, nLineStride, nComponents);
5756
0
    }
5757
0
    if (nBitsPerSample == 32 && nSampleFormat == GSF_SIGNED_INT)
5758
0
    {
5759
        // Use unsigned implementation by converting the nodatavalue to
5760
        // unsigned
5761
0
        return GDALIsValueInRange<int32_t>(dfNoDataValue) &&
5762
0
               HasOnlyNoDataT(
5763
0
                   static_cast<const uint32_t *>(pBuffer),
5764
0
                   static_cast<uint32_t>(static_cast<int32_t>(dfNoDataValue)),
5765
0
                   nWidth, nHeight, nLineStride, nComponents);
5766
0
    }
5767
0
    if (nBitsPerSample == 64 && nSampleFormat == GSF_UNSIGNED_INT)
5768
0
    {
5769
0
        return GDALIsValueInRange<uint64_t>(dfNoDataValue) &&
5770
0
               HasOnlyNoDataT(static_cast<const uint64_t *>(pBuffer),
5771
0
                              static_cast<uint64_t>(dfNoDataValue), nWidth,
5772
0
                              nHeight, nLineStride, nComponents);
5773
0
    }
5774
0
    if (nBitsPerSample == 64 && nSampleFormat == GSF_SIGNED_INT)
5775
0
    {
5776
        // Use unsigned implementation by converting the nodatavalue to
5777
        // unsigned
5778
0
        return GDALIsValueInRange<int64_t>(dfNoDataValue) &&
5779
0
               HasOnlyNoDataT(
5780
0
                   static_cast<const uint64_t *>(pBuffer),
5781
0
                   static_cast<uint64_t>(static_cast<int64_t>(dfNoDataValue)),
5782
0
                   nWidth, nHeight, nLineStride, nComponents);
5783
0
    }
5784
0
    if (nBitsPerSample == 16 && nSampleFormat == GSF_FLOATING_POINT)
5785
0
    {
5786
0
        return (std::isnan(dfNoDataValue) ||
5787
0
                GDALIsValueInRange<GFloat16>(dfNoDataValue)) &&
5788
0
               HasOnlyNoDataT(static_cast<const GFloat16 *>(pBuffer),
5789
0
                              static_cast<GFloat16>(dfNoDataValue), nWidth,
5790
0
                              nHeight, nLineStride, nComponents);
5791
0
    }
5792
0
    if (nBitsPerSample == 32 && nSampleFormat == GSF_FLOATING_POINT)
5793
0
    {
5794
0
        return (std::isnan(dfNoDataValue) ||
5795
0
                GDALIsValueInRange<float>(dfNoDataValue)) &&
5796
0
               HasOnlyNoDataT(static_cast<const float *>(pBuffer),
5797
0
                              static_cast<float>(dfNoDataValue), nWidth,
5798
0
                              nHeight, nLineStride, nComponents);
5799
0
    }
5800
0
    if (nBitsPerSample == 64 && nSampleFormat == GSF_FLOATING_POINT)
5801
0
    {
5802
0
        return HasOnlyNoDataT(static_cast<const double *>(pBuffer),
5803
0
                              dfNoDataValue, nWidth, nHeight, nLineStride,
5804
0
                              nComponents);
5805
0
    }
5806
0
    return false;
5807
0
}
5808
5809
#ifdef HAVE_SSE2
5810
5811
/************************************************************************/
5812
/*                    GDALDeinterleave3Byte()                           */
5813
/************************************************************************/
5814
5815
#if defined(__GNUC__) && !defined(__clang__)
5816
__attribute__((optimize("no-tree-vectorize")))
5817
#endif
5818
static void
5819
GDALDeinterleave3Byte(const GByte *CPL_RESTRICT pabySrc,
5820
                      GByte *CPL_RESTRICT pabyDest0,
5821
                      GByte *CPL_RESTRICT pabyDest1,
5822
                      GByte *CPL_RESTRICT pabyDest2, size_t nIters)
5823
#ifdef USE_NEON_OPTIMIZATIONS
5824
{
5825
    return GDALDeinterleave3Byte_SSSE3(pabySrc, pabyDest0, pabyDest1, pabyDest2,
5826
                                       nIters);
5827
}
5828
#else
5829
0
{
5830
0
#ifdef HAVE_SSSE3_AT_COMPILE_TIME
5831
0
    if (CPLHaveRuntimeSSSE3())
5832
0
    {
5833
0
        return GDALDeinterleave3Byte_SSSE3(pabySrc, pabyDest0, pabyDest1,
5834
0
                                           pabyDest2, nIters);
5835
0
    }
5836
0
#endif
5837
5838
0
    size_t i = 0;
5839
0
    if (((reinterpret_cast<uintptr_t>(pabySrc) |
5840
0
          reinterpret_cast<uintptr_t>(pabyDest0) |
5841
0
          reinterpret_cast<uintptr_t>(pabyDest1) |
5842
0
          reinterpret_cast<uintptr_t>(pabyDest2)) %
5843
0
         sizeof(unsigned int)) == 0)
5844
0
    {
5845
        // Slightly better than GCC autovectorizer
5846
0
        for (size_t j = 0; i + 3 < nIters; i += 4, ++j)
5847
0
        {
5848
0
            unsigned int word0 =
5849
0
                *reinterpret_cast<const unsigned int *>(pabySrc + 3 * i);
5850
0
            unsigned int word1 =
5851
0
                *reinterpret_cast<const unsigned int *>(pabySrc + 3 * i + 4);
5852
0
            unsigned int word2 =
5853
0
                *reinterpret_cast<const unsigned int *>(pabySrc + 3 * i + 8);
5854
0
            reinterpret_cast<unsigned int *>(pabyDest0)[j] =
5855
0
                (word0 & 0xff) | ((word0 >> 24) << 8) | (word1 & 0x00ff0000) |
5856
0
                ((word2 >> 8) << 24);
5857
0
            reinterpret_cast<unsigned int *>(pabyDest1)[j] =
5858
0
                ((word0 >> 8) & 0xff) | ((word1 & 0xff) << 8) |
5859
0
                (((word1 >> 24)) << 16) | ((word2 >> 16) << 24);
5860
0
            pabyDest2[j * 4] = static_cast<GByte>(word0 >> 16);
5861
0
            pabyDest2[j * 4 + 1] = static_cast<GByte>(word1 >> 8);
5862
0
            pabyDest2[j * 4 + 2] = static_cast<GByte>(word2);
5863
0
            pabyDest2[j * 4 + 3] = static_cast<GByte>(word2 >> 24);
5864
0
        }
5865
0
    }
5866
0
#if defined(__clang__)
5867
0
#pragma clang loop vectorize(disable)
5868
0
#endif
5869
0
    for (; i < nIters; ++i)
5870
0
    {
5871
0
        pabyDest0[i] = pabySrc[3 * i + 0];
5872
0
        pabyDest1[i] = pabySrc[3 * i + 1];
5873
0
        pabyDest2[i] = pabySrc[3 * i + 2];
5874
0
    }
5875
0
}
5876
#endif
5877
5878
/************************************************************************/
5879
/*                    GDALDeinterleave4Byte()                           */
5880
/************************************************************************/
5881
5882
#if !defined(__GNUC__) || defined(__clang__)
5883
5884
/************************************************************************/
5885
/*                         deinterleave()                               */
5886
/************************************************************************/
5887
5888
template <bool SHIFT, bool MASK>
5889
inline __m128i deinterleave(__m128i &xmm0_ori, __m128i &xmm1_ori,
5890
                            __m128i &xmm2_ori, __m128i &xmm3_ori)
5891
0
{
5892
    // Set higher 24bit of each int32 packed word to 0
5893
0
    if (SHIFT)
5894
0
    {
5895
0
        xmm0_ori = _mm_srli_epi32(xmm0_ori, 8);
5896
0
        xmm1_ori = _mm_srli_epi32(xmm1_ori, 8);
5897
0
        xmm2_ori = _mm_srli_epi32(xmm2_ori, 8);
5898
0
        xmm3_ori = _mm_srli_epi32(xmm3_ori, 8);
5899
0
    }
5900
0
    __m128i xmm0;
5901
0
    __m128i xmm1;
5902
0
    __m128i xmm2;
5903
0
    __m128i xmm3;
5904
0
    if (MASK)
5905
0
    {
5906
0
        const __m128i xmm_mask = _mm_set1_epi32(0xff);
5907
0
        xmm0 = _mm_and_si128(xmm0_ori, xmm_mask);
5908
0
        xmm1 = _mm_and_si128(xmm1_ori, xmm_mask);
5909
0
        xmm2 = _mm_and_si128(xmm2_ori, xmm_mask);
5910
0
        xmm3 = _mm_and_si128(xmm3_ori, xmm_mask);
5911
0
    }
5912
0
    else
5913
0
    {
5914
0
        xmm0 = xmm0_ori;
5915
0
        xmm1 = xmm1_ori;
5916
0
        xmm2 = xmm2_ori;
5917
0
        xmm3 = xmm3_ori;
5918
0
    }
5919
    // Pack int32 to int16
5920
0
    xmm0 = _mm_packs_epi32(xmm0, xmm1);
5921
0
    xmm2 = _mm_packs_epi32(xmm2, xmm3);
5922
    // Pack int16 to uint8
5923
0
    xmm0 = _mm_packus_epi16(xmm0, xmm2);
5924
0
    return xmm0;
5925
0
}
Unexecuted instantiation: long long __vector(2) deinterleave<false, true>(long long __vector(2)&, long long __vector(2)&, long long __vector(2)&, long long __vector(2)&)
Unexecuted instantiation: long long __vector(2) deinterleave<true, true>(long long __vector(2)&, long long __vector(2)&, long long __vector(2)&, long long __vector(2)&)
Unexecuted instantiation: long long __vector(2) deinterleave<true, false>(long long __vector(2)&, long long __vector(2)&, long long __vector(2)&, long long __vector(2)&)
5926
5927
static void GDALDeinterleave4Byte(const GByte *CPL_RESTRICT pabySrc,
5928
                                  GByte *CPL_RESTRICT pabyDest0,
5929
                                  GByte *CPL_RESTRICT pabyDest1,
5930
                                  GByte *CPL_RESTRICT pabyDest2,
5931
                                  GByte *CPL_RESTRICT pabyDest3, size_t nIters)
5932
#ifdef USE_NEON_OPTIMIZATIONS
5933
{
5934
    return GDALDeinterleave4Byte_SSSE3(pabySrc, pabyDest0, pabyDest1, pabyDest2,
5935
                                       pabyDest3, nIters);
5936
}
5937
#else
5938
0
{
5939
0
#ifdef HAVE_SSSE3_AT_COMPILE_TIME
5940
0
    if (CPLHaveRuntimeSSSE3())
5941
0
    {
5942
0
        return GDALDeinterleave4Byte_SSSE3(pabySrc, pabyDest0, pabyDest1,
5943
0
                                           pabyDest2, pabyDest3, nIters);
5944
0
    }
5945
0
#endif
5946
5947
    // Not the optimal SSE2-only code, as gcc auto-vectorizer manages to
5948
    // do something slightly better.
5949
0
    size_t i = 0;
5950
0
    for (; i + 15 < nIters; i += 16)
5951
0
    {
5952
0
        __m128i xmm0_ori = _mm_loadu_si128(
5953
0
            reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 0));
5954
0
        __m128i xmm1_ori = _mm_loadu_si128(
5955
0
            reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 16));
5956
0
        __m128i xmm2_ori = _mm_loadu_si128(
5957
0
            reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 32));
5958
0
        __m128i xmm3_ori = _mm_loadu_si128(
5959
0
            reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 48));
5960
5961
0
        _mm_storeu_si128(
5962
0
            reinterpret_cast<__m128i *>(pabyDest0 + i),
5963
0
            deinterleave<false, true>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori));
5964
0
        _mm_storeu_si128(
5965
0
            reinterpret_cast<__m128i *>(pabyDest1 + i),
5966
0
            deinterleave<true, true>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori));
5967
0
        _mm_storeu_si128(
5968
0
            reinterpret_cast<__m128i *>(pabyDest2 + i),
5969
0
            deinterleave<true, true>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori));
5970
0
        _mm_storeu_si128(
5971
0
            reinterpret_cast<__m128i *>(pabyDest3 + i),
5972
0
            deinterleave<true, false>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori));
5973
0
    }
5974
5975
0
#if defined(__clang__)
5976
0
#pragma clang loop vectorize(disable)
5977
0
#endif
5978
0
    for (; i < nIters; ++i)
5979
0
    {
5980
0
        pabyDest0[i] = pabySrc[4 * i + 0];
5981
0
        pabyDest1[i] = pabySrc[4 * i + 1];
5982
0
        pabyDest2[i] = pabySrc[4 * i + 2];
5983
0
        pabyDest3[i] = pabySrc[4 * i + 3];
5984
0
    }
5985
0
}
5986
#endif
5987
#else
5988
// GCC autovectorizer does an excellent job
5989
__attribute__((optimize("tree-vectorize"))) static void GDALDeinterleave4Byte(
5990
    const GByte *CPL_RESTRICT pabySrc, GByte *CPL_RESTRICT pabyDest0,
5991
    GByte *CPL_RESTRICT pabyDest1, GByte *CPL_RESTRICT pabyDest2,
5992
    GByte *CPL_RESTRICT pabyDest3, size_t nIters)
5993
{
5994
    for (size_t i = 0; i < nIters; ++i)
5995
    {
5996
        pabyDest0[i] = pabySrc[4 * i + 0];
5997
        pabyDest1[i] = pabySrc[4 * i + 1];
5998
        pabyDest2[i] = pabySrc[4 * i + 2];
5999
        pabyDest3[i] = pabySrc[4 * i + 3];
6000
    }
6001
}
6002
#endif
6003
6004
#else
6005
6006
/************************************************************************/
6007
/*                    GDALDeinterleave3Byte()                           */
6008
/************************************************************************/
6009
6010
// TODO: Enabling below could help on non-Intel architectures where GCC knows
6011
// how to auto-vectorize
6012
// #if defined(__GNUC__)
6013
//__attribute__((optimize("tree-vectorize")))
6014
// #endif
6015
static void GDALDeinterleave3Byte(const GByte *CPL_RESTRICT pabySrc,
6016
                                  GByte *CPL_RESTRICT pabyDest0,
6017
                                  GByte *CPL_RESTRICT pabyDest1,
6018
                                  GByte *CPL_RESTRICT pabyDest2, size_t nIters)
6019
{
6020
    for (size_t i = 0; i < nIters; ++i)
6021
    {
6022
        pabyDest0[i] = pabySrc[3 * i + 0];
6023
        pabyDest1[i] = pabySrc[3 * i + 1];
6024
        pabyDest2[i] = pabySrc[3 * i + 2];
6025
    }
6026
}
6027
6028
/************************************************************************/
6029
/*                    GDALDeinterleave4Byte()                           */
6030
/************************************************************************/
6031
6032
// TODO: Enabling below could help on non-Intel architectures where gcc knows
6033
// how to auto-vectorize
6034
// #if defined(__GNUC__)
6035
//__attribute__((optimize("tree-vectorize")))
6036
// #endif
6037
static void GDALDeinterleave4Byte(const GByte *CPL_RESTRICT pabySrc,
6038
                                  GByte *CPL_RESTRICT pabyDest0,
6039
                                  GByte *CPL_RESTRICT pabyDest1,
6040
                                  GByte *CPL_RESTRICT pabyDest2,
6041
                                  GByte *CPL_RESTRICT pabyDest3, size_t nIters)
6042
{
6043
    for (size_t i = 0; i < nIters; ++i)
6044
    {
6045
        pabyDest0[i] = pabySrc[4 * i + 0];
6046
        pabyDest1[i] = pabySrc[4 * i + 1];
6047
        pabyDest2[i] = pabySrc[4 * i + 2];
6048
        pabyDest3[i] = pabySrc[4 * i + 3];
6049
    }
6050
}
6051
6052
#endif
6053
6054
/************************************************************************/
6055
/*                      GDALDeinterleave()                              */
6056
/************************************************************************/
6057
6058
/*! Copy values from a pixel-interleave buffer to multiple per-component
6059
    buffers.
6060
6061
    In pseudo-code
6062
    \verbatim
6063
    for(size_t i = 0; i < nIters; ++i)
6064
        for(int iComp = 0; iComp < nComponents; iComp++ )
6065
            ppDestBuffer[iComp][i] = pSourceBuffer[nComponents * i + iComp]
6066
    \endverbatim
6067
6068
    The implementation is optimized for a few cases, like de-interleaving
6069
    of 3 or 4-components Byte buffers.
6070
6071
    \since GDAL 3.6
6072
 */
6073
void GDALDeinterleave(const void *pSourceBuffer, GDALDataType eSourceDT,
6074
                      int nComponents, void **ppDestBuffer,
6075
                      GDALDataType eDestDT, size_t nIters)
6076
0
{
6077
0
    if (eSourceDT == eDestDT)
6078
0
    {
6079
0
        if (eSourceDT == GDT_Byte || eSourceDT == GDT_Int8)
6080
0
        {
6081
0
            if (nComponents == 3)
6082
0
            {
6083
0
                const GByte *CPL_RESTRICT pabySrc =
6084
0
                    static_cast<const GByte *>(pSourceBuffer);
6085
0
                GByte *CPL_RESTRICT pabyDest0 =
6086
0
                    static_cast<GByte *>(ppDestBuffer[0]);
6087
0
                GByte *CPL_RESTRICT pabyDest1 =
6088
0
                    static_cast<GByte *>(ppDestBuffer[1]);
6089
0
                GByte *CPL_RESTRICT pabyDest2 =
6090
0
                    static_cast<GByte *>(ppDestBuffer[2]);
6091
0
                GDALDeinterleave3Byte(pabySrc, pabyDest0, pabyDest1, pabyDest2,
6092
0
                                      nIters);
6093
0
                return;
6094
0
            }
6095
0
            else if (nComponents == 4)
6096
0
            {
6097
0
                const GByte *CPL_RESTRICT pabySrc =
6098
0
                    static_cast<const GByte *>(pSourceBuffer);
6099
0
                GByte *CPL_RESTRICT pabyDest0 =
6100
0
                    static_cast<GByte *>(ppDestBuffer[0]);
6101
0
                GByte *CPL_RESTRICT pabyDest1 =
6102
0
                    static_cast<GByte *>(ppDestBuffer[1]);
6103
0
                GByte *CPL_RESTRICT pabyDest2 =
6104
0
                    static_cast<GByte *>(ppDestBuffer[2]);
6105
0
                GByte *CPL_RESTRICT pabyDest3 =
6106
0
                    static_cast<GByte *>(ppDestBuffer[3]);
6107
0
                GDALDeinterleave4Byte(pabySrc, pabyDest0, pabyDest1, pabyDest2,
6108
0
                                      pabyDest3, nIters);
6109
0
                return;
6110
0
            }
6111
0
        }
6112
#if ((defined(__GNUC__) && !defined(__clang__)) ||                             \
6113
     defined(__INTEL_CLANG_COMPILER)) &&                                       \
6114
    defined(HAVE_SSE2) && defined(HAVE_SSSE3_AT_COMPILE_TIME)
6115
        else if ((eSourceDT == GDT_Int16 || eSourceDT == GDT_UInt16) &&
6116
                 CPLHaveRuntimeSSSE3())
6117
        {
6118
            if (nComponents == 3)
6119
            {
6120
                const GUInt16 *CPL_RESTRICT panSrc =
6121
                    static_cast<const GUInt16 *>(pSourceBuffer);
6122
                GUInt16 *CPL_RESTRICT panDest0 =
6123
                    static_cast<GUInt16 *>(ppDestBuffer[0]);
6124
                GUInt16 *CPL_RESTRICT panDest1 =
6125
                    static_cast<GUInt16 *>(ppDestBuffer[1]);
6126
                GUInt16 *CPL_RESTRICT panDest2 =
6127
                    static_cast<GUInt16 *>(ppDestBuffer[2]);
6128
                GDALDeinterleave3UInt16_SSSE3(panSrc, panDest0, panDest1,
6129
                                              panDest2, nIters);
6130
                return;
6131
            }
6132
#if !defined(__INTEL_CLANG_COMPILER)
6133
            // ICC autovectorizer doesn't do a good job, at least with icx
6134
            // 2022.1.0.20220316
6135
            else if (nComponents == 4)
6136
            {
6137
                const GUInt16 *CPL_RESTRICT panSrc =
6138
                    static_cast<const GUInt16 *>(pSourceBuffer);
6139
                GUInt16 *CPL_RESTRICT panDest0 =
6140
                    static_cast<GUInt16 *>(ppDestBuffer[0]);
6141
                GUInt16 *CPL_RESTRICT panDest1 =
6142
                    static_cast<GUInt16 *>(ppDestBuffer[1]);
6143
                GUInt16 *CPL_RESTRICT panDest2 =
6144
                    static_cast<GUInt16 *>(ppDestBuffer[2]);
6145
                GUInt16 *CPL_RESTRICT panDest3 =
6146
                    static_cast<GUInt16 *>(ppDestBuffer[3]);
6147
                GDALDeinterleave4UInt16_SSSE3(panSrc, panDest0, panDest1,
6148
                                              panDest2, panDest3, nIters);
6149
                return;
6150
            }
6151
#endif
6152
        }
6153
#endif
6154
0
    }
6155
6156
0
    const int nSourceDTSize = GDALGetDataTypeSizeBytes(eSourceDT);
6157
0
    const int nDestDTSize = GDALGetDataTypeSizeBytes(eDestDT);
6158
0
    for (int iComp = 0; iComp < nComponents; iComp++)
6159
0
    {
6160
0
        GDALCopyWords64(static_cast<const GByte *>(pSourceBuffer) +
6161
0
                            iComp * nSourceDTSize,
6162
0
                        eSourceDT, nComponents * nSourceDTSize,
6163
0
                        ppDestBuffer[iComp], eDestDT, nDestDTSize, nIters);
6164
0
    }
6165
0
}
6166
6167
/************************************************************************/
6168
/*                    GDALTranspose2DSingleToSingle()                   */
6169
/************************************************************************/
6170
/**
6171
 * Transpose a 2D array of non-complex values, in a efficient (cache-oblivious) way.
6172
 *
6173
 * @param pSrc Source array of height = nSrcHeight and width = nSrcWidth.
6174
 * @param pDst Destination transposed array of height = nSrcWidth and width = nSrcHeight.
6175
 * @param nSrcWidth Width of pSrc array.
6176
 * @param nSrcHeight Height of pSrc array.
6177
 */
6178
6179
template <class DST, class SRC>
6180
void GDALTranspose2DSingleToSingle(const SRC *CPL_RESTRICT pSrc,
6181
                                   DST *CPL_RESTRICT pDst, size_t nSrcWidth,
6182
                                   size_t nSrcHeight)
6183
0
{
6184
0
    constexpr size_t blocksize = 32;
6185
0
    for (size_t i = 0; i < nSrcHeight; i += blocksize)
6186
0
    {
6187
0
        const size_t max_k = std::min(i + blocksize, nSrcHeight);
6188
0
        for (size_t j = 0; j < nSrcWidth; j += blocksize)
6189
0
        {
6190
            // transpose the block beginning at [i,j]
6191
0
            const size_t max_l = std::min(j + blocksize, nSrcWidth);
6192
0
            for (size_t k = i; k < max_k; ++k)
6193
0
            {
6194
0
                for (size_t l = j; l < max_l; ++l)
6195
0
                {
6196
0
                    GDALCopyWord(pSrc[l + k * nSrcWidth],
6197
0
                                 pDst[k + l * nSrcHeight]);
6198
0
                }
6199
0
            }
6200
0
        }
6201
0
    }
6202
0
}
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, unsigned char>(unsigned char const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, signed char>(signed char const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, unsigned short>(unsigned short const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, short>(short const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, unsigned int>(unsigned int const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, int>(int const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, unsigned long>(unsigned long const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, long>(long const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, cpl::Float16>(cpl::Float16 const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, float>(float const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, double>(double const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, unsigned char>(unsigned char const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, signed char>(signed char const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, unsigned short>(unsigned short const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, short>(short const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, unsigned int>(unsigned int const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, int>(int const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, unsigned long>(unsigned long const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, long>(long const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, cpl::Float16>(cpl::Float16 const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, float>(float const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, double>(double const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, unsigned char>(unsigned char const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, signed char>(signed char const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, unsigned short>(unsigned short const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, short>(short const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, unsigned int>(unsigned int const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, int>(int const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, unsigned long>(unsigned long const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, long>(long const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, cpl::Float16>(cpl::Float16 const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, float>(float const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, double>(double const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, unsigned char>(unsigned char const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, signed char>(signed char const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, unsigned short>(unsigned short const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, short>(short const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, unsigned int>(unsigned int const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, int>(int const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, unsigned long>(unsigned long const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, long>(long const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, cpl::Float16>(cpl::Float16 const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, float>(float const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, double>(double const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, unsigned char>(unsigned char const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, signed char>(signed char const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, unsigned short>(unsigned short const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, short>(short const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, unsigned int>(unsigned int const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, int>(int const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, unsigned long>(unsigned long const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, long>(long const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, cpl::Float16>(cpl::Float16 const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, float>(float const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, double>(double const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, unsigned char>(unsigned char const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, signed char>(signed char const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, unsigned short>(unsigned short const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, short>(short const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, unsigned int>(unsigned int const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, int>(int const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, unsigned long>(unsigned long const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, long>(long const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, cpl::Float16>(cpl::Float16 const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, float>(float const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, double>(double const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, unsigned char>(unsigned char const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, signed char>(signed char const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, unsigned short>(unsigned short const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, short>(short const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, unsigned int>(unsigned int const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, int>(int const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, unsigned long>(unsigned long const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, long>(long const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, cpl::Float16>(cpl::Float16 const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, float>(float const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, double>(double const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, unsigned char>(unsigned char const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, signed char>(signed char const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, unsigned short>(unsigned short const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, short>(short const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, unsigned int>(unsigned int const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, int>(int const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, unsigned long>(unsigned long const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, long>(long const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, cpl::Float16>(cpl::Float16 const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, float>(float const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, double>(double const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, unsigned char>(unsigned char const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, signed char>(signed char const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, unsigned short>(unsigned short const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, short>(short const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, unsigned int>(unsigned int const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, int>(int const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, unsigned long>(unsigned long const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, long>(long const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, cpl::Float16>(cpl::Float16 const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, float>(float const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, double>(double const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, unsigned char>(unsigned char const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, signed char>(signed char const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, unsigned short>(unsigned short const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, short>(short const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, unsigned int>(unsigned int const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, int>(int const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, unsigned long>(unsigned long const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, long>(long const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, cpl::Float16>(cpl::Float16 const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, float>(float const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, double>(double const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, unsigned char>(unsigned char const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, signed char>(signed char const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, unsigned short>(unsigned short const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, short>(short const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, unsigned int>(unsigned int const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, int>(int const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, unsigned long>(unsigned long const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, long>(long const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, cpl::Float16>(cpl::Float16 const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, float>(float const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, double>(double const*, double*, unsigned long, unsigned long)
6203
6204
/************************************************************************/
6205
/*                   GDALTranspose2DComplexToComplex()                  */
6206
/************************************************************************/
6207
/**
6208
 * Transpose a 2D array of complex values into an array of complex values,
6209
 * in a efficient (cache-oblivious) way.
6210
 *
6211
 * @param pSrc Source array of height = nSrcHeight and width = nSrcWidth.
6212
 * @param pDst Destination transposed array of height = nSrcWidth and width = nSrcHeight.
6213
 * @param nSrcWidth Width of pSrc array.
6214
 * @param nSrcHeight Height of pSrc array.
6215
 */
6216
template <class DST, class SRC>
6217
void GDALTranspose2DComplexToComplex(const SRC *CPL_RESTRICT pSrc,
6218
                                     DST *CPL_RESTRICT pDst, size_t nSrcWidth,
6219
                                     size_t nSrcHeight)
6220
0
{
6221
0
    constexpr size_t blocksize = 32;
6222
0
    for (size_t i = 0; i < nSrcHeight; i += blocksize)
6223
0
    {
6224
0
        const size_t max_k = std::min(i + blocksize, nSrcHeight);
6225
0
        for (size_t j = 0; j < nSrcWidth; j += blocksize)
6226
0
        {
6227
            // transpose the block beginning at [i,j]
6228
0
            const size_t max_l = std::min(j + blocksize, nSrcWidth);
6229
0
            for (size_t k = i; k < max_k; ++k)
6230
0
            {
6231
0
                for (size_t l = j; l < max_l; ++l)
6232
0
                {
6233
0
                    GDALCopyWord(pSrc[2 * (l + k * nSrcWidth) + 0],
6234
0
                                 pDst[2 * (k + l * nSrcHeight) + 0]);
6235
0
                    GDALCopyWord(pSrc[2 * (l + k * nSrcWidth) + 1],
6236
0
                                 pDst[2 * (k + l * nSrcHeight) + 1]);
6237
0
                }
6238
0
            }
6239
0
        }
6240
0
    }
6241
0
}
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<short, short>(short const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<short, int>(int const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<short, cpl::Float16>(cpl::Float16 const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<short, float>(float const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<short, double>(double const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<int, short>(short const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<int, int>(int const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<int, cpl::Float16>(cpl::Float16 const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<int, float>(float const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<int, double>(double const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<cpl::Float16, short>(short const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<cpl::Float16, int>(int const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<cpl::Float16, cpl::Float16>(cpl::Float16 const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<cpl::Float16, float>(float const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<cpl::Float16, double>(double const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<float, short>(short const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<float, int>(int const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<float, cpl::Float16>(cpl::Float16 const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<float, float>(float const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<float, double>(double const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<double, short>(short const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<double, int>(int const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<double, cpl::Float16>(cpl::Float16 const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<double, float>(float const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<double, double>(double const*, double*, unsigned long, unsigned long)
6242
6243
/************************************************************************/
6244
/*                   GDALTranspose2DComplexToSingle()                  */
6245
/************************************************************************/
6246
/**
6247
 * Transpose a 2D array of complex values into an array of non-complex values,
6248
 * in a efficient (cache-oblivious) way.
6249
 *
6250
 * @param pSrc Source array of height = nSrcHeight and width = nSrcWidth.
6251
 * @param pDst Destination transposed array of height = nSrcWidth and width = nSrcHeight.
6252
 * @param nSrcWidth Width of pSrc array.
6253
 * @param nSrcHeight Height of pSrc array.
6254
 */
6255
template <class DST, class SRC>
6256
void GDALTranspose2DComplexToSingle(const SRC *CPL_RESTRICT pSrc,
6257
                                    DST *CPL_RESTRICT pDst, size_t nSrcWidth,
6258
                                    size_t nSrcHeight)
6259
0
{
6260
0
    constexpr size_t blocksize = 32;
6261
0
    for (size_t i = 0; i < nSrcHeight; i += blocksize)
6262
0
    {
6263
0
        const size_t max_k = std::min(i + blocksize, nSrcHeight);
6264
0
        for (size_t j = 0; j < nSrcWidth; j += blocksize)
6265
0
        {
6266
            // transpose the block beginning at [i,j]
6267
0
            const size_t max_l = std::min(j + blocksize, nSrcWidth);
6268
0
            for (size_t k = i; k < max_k; ++k)
6269
0
            {
6270
0
                for (size_t l = j; l < max_l; ++l)
6271
0
                {
6272
0
                    GDALCopyWord(pSrc[2 * (l + k * nSrcWidth) + 0],
6273
0
                                 pDst[k + l * nSrcHeight]);
6274
0
                }
6275
0
            }
6276
0
        }
6277
0
    }
6278
0
}
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned char, short>(short const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned char, int>(int const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned char, cpl::Float16>(cpl::Float16 const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned char, float>(float const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned char, double>(double const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<signed char, short>(short const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<signed char, int>(int const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<signed char, cpl::Float16>(cpl::Float16 const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<signed char, float>(float const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<signed char, double>(double const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned short, short>(short const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned short, int>(int const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned short, cpl::Float16>(cpl::Float16 const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned short, float>(float const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned short, double>(double const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<short, short>(short const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<short, int>(int const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<short, cpl::Float16>(cpl::Float16 const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<short, float>(float const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<short, double>(double const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned int, short>(short const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned int, int>(int const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned int, cpl::Float16>(cpl::Float16 const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned int, float>(float const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned int, double>(double const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<int, short>(short const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<int, int>(int const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<int, cpl::Float16>(cpl::Float16 const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<int, float>(float const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<int, double>(double const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned long, short>(short const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned long, int>(int const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned long, cpl::Float16>(cpl::Float16 const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned long, float>(float const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned long, double>(double const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<long, short>(short const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<long, int>(int const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<long, cpl::Float16>(cpl::Float16 const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<long, float>(float const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<long, double>(double const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<cpl::Float16, short>(short const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<cpl::Float16, int>(int const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<cpl::Float16, cpl::Float16>(cpl::Float16 const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<cpl::Float16, float>(float const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<cpl::Float16, double>(double const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<float, short>(short const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<float, int>(int const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<float, cpl::Float16>(cpl::Float16 const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<float, float>(float const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<float, double>(double const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<double, short>(short const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<double, int>(int const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<double, cpl::Float16>(cpl::Float16 const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<double, float>(float const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<double, double>(double const*, double*, unsigned long, unsigned long)
6279
6280
/************************************************************************/
6281
/*                   GDALTranspose2DSingleToComplex()                  */
6282
/************************************************************************/
6283
/**
6284
 * Transpose a 2D array of non-complex values into an array of complex values,
6285
 * in a efficient (cache-oblivious) way.
6286
 *
6287
 * @param pSrc Source array of height = nSrcHeight and width = nSrcWidth.
6288
 * @param pDst Destination transposed array of height = nSrcWidth and width = nSrcHeight.
6289
 * @param nSrcWidth Width of pSrc array.
6290
 * @param nSrcHeight Height of pSrc array.
6291
 */
6292
template <class DST, class SRC>
6293
void GDALTranspose2DSingleToComplex(const SRC *CPL_RESTRICT pSrc,
6294
                                    DST *CPL_RESTRICT pDst, size_t nSrcWidth,
6295
                                    size_t nSrcHeight)
6296
0
{
6297
0
    constexpr size_t blocksize = 32;
6298
0
    for (size_t i = 0; i < nSrcHeight; i += blocksize)
6299
0
    {
6300
0
        const size_t max_k = std::min(i + blocksize, nSrcHeight);
6301
0
        for (size_t j = 0; j < nSrcWidth; j += blocksize)
6302
0
        {
6303
            // transpose the block beginning at [i,j]
6304
0
            const size_t max_l = std::min(j + blocksize, nSrcWidth);
6305
0
            for (size_t k = i; k < max_k; ++k)
6306
0
            {
6307
0
                for (size_t l = j; l < max_l; ++l)
6308
0
                {
6309
0
                    GDALCopyWord(pSrc[l + k * nSrcWidth],
6310
0
                                 pDst[2 * (k + l * nSrcHeight) + 0]);
6311
0
                    pDst[2 * (k + l * nSrcHeight) + 1] = 0;
6312
0
                }
6313
0
            }
6314
0
        }
6315
0
    }
6316
0
}
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, unsigned char>(unsigned char const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, signed char>(signed char const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, unsigned short>(unsigned short const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, short>(short const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, unsigned int>(unsigned int const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, int>(int const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, unsigned long>(unsigned long const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, long>(long const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, cpl::Float16>(cpl::Float16 const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, float>(float const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, double>(double const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, unsigned char>(unsigned char const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, signed char>(signed char const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, unsigned short>(unsigned short const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, short>(short const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, unsigned int>(unsigned int const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, int>(int const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, unsigned long>(unsigned long const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, long>(long const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, cpl::Float16>(cpl::Float16 const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, float>(float const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, double>(double const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, unsigned char>(unsigned char const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, signed char>(signed char const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, unsigned short>(unsigned short const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, short>(short const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, unsigned int>(unsigned int const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, int>(int const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, unsigned long>(unsigned long const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, long>(long const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, cpl::Float16>(cpl::Float16 const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, float>(float const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, double>(double const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, unsigned char>(unsigned char const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, signed char>(signed char const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, unsigned short>(unsigned short const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, short>(short const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, unsigned int>(unsigned int const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, int>(int const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, unsigned long>(unsigned long const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, long>(long const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, cpl::Float16>(cpl::Float16 const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, float>(float const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, double>(double const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, unsigned char>(unsigned char const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, signed char>(signed char const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, unsigned short>(unsigned short const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, short>(short const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, unsigned int>(unsigned int const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, int>(int const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, unsigned long>(unsigned long const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, long>(long const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, cpl::Float16>(cpl::Float16 const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, float>(float const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, double>(double const*, double*, unsigned long, unsigned long)
6317
6318
/************************************************************************/
6319
/*                        GDALTranspose2D()                             */
6320
/************************************************************************/
6321
6322
template <class DST, bool DST_IS_COMPLEX>
6323
static void GDALTranspose2D(const void *pSrc, GDALDataType eSrcType, DST *pDst,
6324
                            size_t nSrcWidth, size_t nSrcHeight)
6325
0
{
6326
0
#define CALL_GDALTranspose2D_internal(SRC_TYPE)                                \
6327
0
    do                                                                         \
6328
0
    {                                                                          \
6329
0
        if constexpr (DST_IS_COMPLEX)                                          \
6330
0
        {                                                                      \
6331
0
            GDALTranspose2DSingleToComplex(                                    \
6332
0
                static_cast<const SRC_TYPE *>(pSrc), pDst, nSrcWidth,          \
6333
0
                nSrcHeight);                                                   \
6334
0
        }                                                                      \
6335
0
        else                                                                   \
6336
0
        {                                                                      \
6337
0
            GDALTranspose2DSingleToSingle(static_cast<const SRC_TYPE *>(pSrc), \
6338
0
                                          pDst, nSrcWidth, nSrcHeight);        \
6339
0
        }                                                                      \
6340
0
    } while (0)
6341
6342
0
#define CALL_GDALTranspose2DComplex_internal(SRC_TYPE)                         \
6343
0
    do                                                                         \
6344
0
    {                                                                          \
6345
0
        if constexpr (DST_IS_COMPLEX)                                          \
6346
0
        {                                                                      \
6347
0
            GDALTranspose2DComplexToComplex(                                   \
6348
0
                static_cast<const SRC_TYPE *>(pSrc), pDst, nSrcWidth,          \
6349
0
                nSrcHeight);                                                   \
6350
0
        }                                                                      \
6351
0
        else                                                                   \
6352
0
        {                                                                      \
6353
0
            GDALTranspose2DComplexToSingle(                                    \
6354
0
                static_cast<const SRC_TYPE *>(pSrc), pDst, nSrcWidth,          \
6355
0
                nSrcHeight);                                                   \
6356
0
        }                                                                      \
6357
0
    } while (0)
6358
6359
    // clang-format off
6360
0
    switch (eSrcType)
6361
0
    {
6362
0
        case GDT_Byte:     CALL_GDALTranspose2D_internal(uint8_t); break;
6363
0
        case GDT_Int8:     CALL_GDALTranspose2D_internal(int8_t); break;
6364
0
        case GDT_UInt16:   CALL_GDALTranspose2D_internal(uint16_t); break;
6365
0
        case GDT_Int16:    CALL_GDALTranspose2D_internal(int16_t); break;
6366
0
        case GDT_UInt32:   CALL_GDALTranspose2D_internal(uint32_t); break;
6367
0
        case GDT_Int32:    CALL_GDALTranspose2D_internal(int32_t); break;
6368
0
        case GDT_UInt64:   CALL_GDALTranspose2D_internal(uint64_t); break;
6369
0
        case GDT_Int64:    CALL_GDALTranspose2D_internal(int64_t); break;
6370
0
        case GDT_Float16:  CALL_GDALTranspose2D_internal(GFloat16); break;
6371
0
        case GDT_Float32:  CALL_GDALTranspose2D_internal(float); break;
6372
0
        case GDT_Float64:  CALL_GDALTranspose2D_internal(double); break;
6373
0
        case GDT_CInt16:   CALL_GDALTranspose2DComplex_internal(int16_t); break;
6374
0
        case GDT_CInt32:   CALL_GDALTranspose2DComplex_internal(int32_t); break;
6375
0
        case GDT_CFloat16: CALL_GDALTranspose2DComplex_internal(GFloat16); break;
6376
0
        case GDT_CFloat32: CALL_GDALTranspose2DComplex_internal(float); break;
6377
0
        case GDT_CFloat64: CALL_GDALTranspose2DComplex_internal(double); break;
6378
0
        case GDT_Unknown:
6379
0
        case GDT_TypeCount:
6380
0
            break;
6381
0
    }
6382
        // clang-format on
6383
6384
0
#undef CALL_GDALTranspose2D_internal
6385
0
#undef CALL_GDALTranspose2DComplex_internal
6386
0
}
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<unsigned char, false>(void const*, GDALDataType, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<signed char, false>(void const*, GDALDataType, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<unsigned short, false>(void const*, GDALDataType, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<short, false>(void const*, GDALDataType, short*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<unsigned int, false>(void const*, GDALDataType, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<int, false>(void const*, GDALDataType, int*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<unsigned long, false>(void const*, GDALDataType, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<long, false>(void const*, GDALDataType, long*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<cpl::Float16, false>(void const*, GDALDataType, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<float, false>(void const*, GDALDataType, float*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<double, false>(void const*, GDALDataType, double*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<short, true>(void const*, GDALDataType, short*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<int, true>(void const*, GDALDataType, int*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<cpl::Float16, true>(void const*, GDALDataType, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<float, true>(void const*, GDALDataType, float*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<double, true>(void const*, GDALDataType, double*, unsigned long, unsigned long)
6387
6388
/************************************************************************/
6389
/*                      GDALInterleave2Byte()                           */
6390
/************************************************************************/
6391
6392
#if defined(HAVE_SSE2) &&                                                      \
6393
    (!defined(__GNUC__) || defined(__INTEL_CLANG_COMPILER))
6394
6395
// ICC autovectorizer doesn't do a good job at generating good SSE code,
6396
// at least with icx 2024.0.2.20231213, but it nicely unrolls the below loop.
6397
#if defined(__GNUC__)
6398
__attribute__((noinline))
6399
#endif
6400
static void
6401
GDALInterleave2Byte(const uint8_t *CPL_RESTRICT pSrc,
6402
                    uint8_t *CPL_RESTRICT pDst, size_t nIters)
6403
{
6404
    size_t i = 0;
6405
    constexpr size_t VALS_PER_ITER = 16;
6406
    for (i = 0; i + VALS_PER_ITER <= nIters; i += VALS_PER_ITER)
6407
    {
6408
        __m128i xmm0 =
6409
            _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + i));
6410
        __m128i xmm1 = _mm_loadu_si128(
6411
            reinterpret_cast<__m128i const *>(pSrc + i + nIters));
6412
        _mm_storeu_si128(reinterpret_cast<__m128i *>(pDst + 2 * i),
6413
                         _mm_unpacklo_epi8(xmm0, xmm1));
6414
        _mm_storeu_si128(
6415
            reinterpret_cast<__m128i *>(pDst + 2 * i + VALS_PER_ITER),
6416
            _mm_unpackhi_epi8(xmm0, xmm1));
6417
    }
6418
#if defined(__clang__)
6419
#pragma clang loop vectorize(disable)
6420
#endif
6421
    for (; i < nIters; ++i)
6422
    {
6423
        pDst[2 * i + 0] = pSrc[i + 0 * nIters];
6424
        pDst[2 * i + 1] = pSrc[i + 1 * nIters];
6425
    }
6426
}
6427
6428
#else
6429
6430
#if defined(__GNUC__) && !defined(__clang__)
6431
__attribute__((optimize("tree-vectorize")))
6432
#endif
6433
#if defined(__GNUC__)
6434
__attribute__((noinline))
6435
#endif
6436
#if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER)
6437
// clang++ -O2 -fsanitize=undefined fails to vectorize, ignore that warning
6438
#pragma clang diagnostic push
6439
#pragma clang diagnostic ignored "-Wpass-failed"
6440
#endif
6441
static void
6442
GDALInterleave2Byte(const uint8_t *CPL_RESTRICT pSrc,
6443
                    uint8_t *CPL_RESTRICT pDst, size_t nIters)
6444
0
{
6445
0
#if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER)
6446
0
#pragma clang loop vectorize(enable)
6447
0
#endif
6448
0
    for (size_t i = 0; i < nIters; ++i)
6449
0
    {
6450
0
        pDst[2 * i + 0] = pSrc[i + 0 * nIters];
6451
0
        pDst[2 * i + 1] = pSrc[i + 1 * nIters];
6452
0
    }
6453
0
}
6454
#if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER)
6455
#pragma clang diagnostic pop
6456
#endif
6457
6458
#endif
6459
6460
/************************************************************************/
6461
/*                      GDALInterleave4Byte()                           */
6462
/************************************************************************/
6463
6464
#if defined(HAVE_SSE2) &&                                                      \
6465
    (!defined(__GNUC__) || defined(__INTEL_CLANG_COMPILER))
6466
6467
// ICC autovectorizer doesn't do a good job at generating good SSE code,
6468
// at least with icx 2024.0.2.20231213, but it nicely unrolls the below loop.
6469
#if defined(__GNUC__)
6470
__attribute__((noinline))
6471
#endif
6472
static void
6473
GDALInterleave4Byte(const uint8_t *CPL_RESTRICT pSrc,
6474
                    uint8_t *CPL_RESTRICT pDst, size_t nIters)
6475
{
6476
    size_t i = 0;
6477
    constexpr size_t VALS_PER_ITER = 16;
6478
    for (i = 0; i + VALS_PER_ITER <= nIters; i += VALS_PER_ITER)
6479
    {
6480
        __m128i xmm0 = _mm_loadu_si128(
6481
            reinterpret_cast<__m128i const *>(pSrc + i + 0 * nIters));
6482
        __m128i xmm1 = _mm_loadu_si128(
6483
            reinterpret_cast<__m128i const *>(pSrc + i + 1 * nIters));
6484
        __m128i xmm2 = _mm_loadu_si128(
6485
            reinterpret_cast<__m128i const *>(pSrc + i + 2 * nIters));
6486
        __m128i xmm3 = _mm_loadu_si128(
6487
            reinterpret_cast<__m128i const *>(pSrc + i + 3 * nIters));
6488
        auto tmp0 = _mm_unpacklo_epi8(
6489
            xmm0,
6490
            xmm1);  // (xmm0_0, xmm1_0, xmm0_1, xmm1_1, xmm0_2, xmm1_2, ...)
6491
        auto tmp1 = _mm_unpackhi_epi8(
6492
            xmm0,
6493
            xmm1);  // (xmm0_8, xmm1_8, xmm0_9, xmm1_9, xmm0_10, xmm1_10, ...)
6494
        auto tmp2 = _mm_unpacklo_epi8(
6495
            xmm2,
6496
            xmm3);  // (xmm2_0, xmm3_0, xmm2_1, xmm3_1, xmm2_2, xmm3_2, ...)
6497
        auto tmp3 = _mm_unpackhi_epi8(
6498
            xmm2,
6499
            xmm3);  // (xmm2_8, xmm3_8, xmm2_9, xmm3_9, xmm2_10, xmm3_10, ...)
6500
        auto tmp2_0 = _mm_unpacklo_epi16(
6501
            tmp0,
6502
            tmp2);  // (xmm0_0, xmm1_0, xmm2_0, xmm3_0, xmm0_1, xmm1_1, xmm2_1, xmm3_1, ...)
6503
        auto tmp2_1 = _mm_unpackhi_epi16(tmp0, tmp2);
6504
        auto tmp2_2 = _mm_unpacklo_epi16(tmp1, tmp3);
6505
        auto tmp2_3 = _mm_unpackhi_epi16(tmp1, tmp3);
6506
        _mm_storeu_si128(
6507
            reinterpret_cast<__m128i *>(pDst + 4 * i + 0 * VALS_PER_ITER),
6508
            tmp2_0);
6509
        _mm_storeu_si128(
6510
            reinterpret_cast<__m128i *>(pDst + 4 * i + 1 * VALS_PER_ITER),
6511
            tmp2_1);
6512
        _mm_storeu_si128(
6513
            reinterpret_cast<__m128i *>(pDst + 4 * i + 2 * VALS_PER_ITER),
6514
            tmp2_2);
6515
        _mm_storeu_si128(
6516
            reinterpret_cast<__m128i *>(pDst + 4 * i + 3 * VALS_PER_ITER),
6517
            tmp2_3);
6518
    }
6519
#if defined(__clang__)
6520
#pragma clang loop vectorize(disable)
6521
#endif
6522
    for (; i < nIters; ++i)
6523
    {
6524
        pDst[4 * i + 0] = pSrc[i + 0 * nIters];
6525
        pDst[4 * i + 1] = pSrc[i + 1 * nIters];
6526
        pDst[4 * i + 2] = pSrc[i + 2 * nIters];
6527
        pDst[4 * i + 3] = pSrc[i + 3 * nIters];
6528
    }
6529
}
6530
6531
#else
6532
6533
#if defined(__GNUC__) && !defined(__clang__)
6534
__attribute__((optimize("tree-vectorize")))
6535
#endif
6536
#if defined(__GNUC__)
6537
__attribute__((noinline))
6538
#endif
6539
#if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER)
6540
// clang++ -O2 -fsanitize=undefined fails to vectorize, ignore that warning
6541
#pragma clang diagnostic push
6542
#pragma clang diagnostic ignored "-Wpass-failed"
6543
#endif
6544
static void
6545
GDALInterleave4Byte(const uint8_t *CPL_RESTRICT pSrc,
6546
                    uint8_t *CPL_RESTRICT pDst, size_t nIters)
6547
0
{
6548
0
#if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER)
6549
0
#pragma clang loop vectorize(enable)
6550
0
#endif
6551
0
    for (size_t i = 0; i < nIters; ++i)
6552
0
    {
6553
0
        pDst[4 * i + 0] = pSrc[i + 0 * nIters];
6554
0
        pDst[4 * i + 1] = pSrc[i + 1 * nIters];
6555
0
        pDst[4 * i + 2] = pSrc[i + 2 * nIters];
6556
0
        pDst[4 * i + 3] = pSrc[i + 3 * nIters];
6557
0
    }
6558
0
}
6559
#if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER)
6560
#pragma clang diagnostic pop
6561
#endif
6562
6563
#endif
6564
6565
/************************************************************************/
6566
/*                        GDALTranspose2D()                             */
6567
/************************************************************************/
6568
6569
/**
6570
 * Transpose a 2D array in a efficient (cache-oblivious) way.
6571
 *
6572
 * @param pSrc Source array of width = nSrcWidth and height = nSrcHeight.
6573
 * @param eSrcType Data type of pSrc.
6574
 * @param pDst Destination transposed array of width = nSrcHeight and height = nSrcWidth.
6575
 * @param eDstType Data type of pDst.
6576
 * @param nSrcWidth Width of pSrc array.
6577
 * @param nSrcHeight Height of pSrc array.
6578
 * @since GDAL 3.11
6579
 */
6580
6581
void GDALTranspose2D(const void *pSrc, GDALDataType eSrcType, void *pDst,
6582
                     GDALDataType eDstType, size_t nSrcWidth, size_t nSrcHeight)
6583
0
{
6584
0
    if (eSrcType == eDstType && (eSrcType == GDT_Byte || eSrcType == GDT_Int8))
6585
0
    {
6586
0
        if (nSrcHeight == 2)
6587
0
        {
6588
0
            GDALInterleave2Byte(static_cast<const uint8_t *>(pSrc),
6589
0
                                static_cast<uint8_t *>(pDst), nSrcWidth);
6590
0
            return;
6591
0
        }
6592
0
        if (nSrcHeight == 4)
6593
0
        {
6594
0
            GDALInterleave4Byte(static_cast<const uint8_t *>(pSrc),
6595
0
                                static_cast<uint8_t *>(pDst), nSrcWidth);
6596
0
            return;
6597
0
        }
6598
0
#if (defined(HAVE_SSSE3_AT_COMPILE_TIME) &&                                    \
6599
0
     (defined(__x86_64) || defined(_M_X64)))
6600
0
        if (CPLHaveRuntimeSSSE3())
6601
0
        {
6602
0
            GDALTranspose2D_Byte_SSSE3(static_cast<const uint8_t *>(pSrc),
6603
0
                                       static_cast<uint8_t *>(pDst), nSrcWidth,
6604
0
                                       nSrcHeight);
6605
0
            return;
6606
0
        }
6607
#elif defined(USE_NEON_OPTIMIZATIONS)
6608
        {
6609
            GDALTranspose2D_Byte_SSSE3(static_cast<const uint8_t *>(pSrc),
6610
                                       static_cast<uint8_t *>(pDst), nSrcWidth,
6611
                                       nSrcHeight);
6612
            return;
6613
        }
6614
#endif
6615
0
    }
6616
6617
0
#define CALL_GDALTranspose2D_internal(DST_TYPE, DST_IS_COMPLEX)                \
6618
0
    GDALTranspose2D<DST_TYPE, DST_IS_COMPLEX>(                                 \
6619
0
        pSrc, eSrcType, static_cast<DST_TYPE *>(pDst), nSrcWidth, nSrcHeight)
6620
6621
    // clang-format off
6622
0
    switch (eDstType)
6623
0
    {
6624
0
        case GDT_Byte:     CALL_GDALTranspose2D_internal(uint8_t, false); break;
6625
0
        case GDT_Int8:     CALL_GDALTranspose2D_internal(int8_t, false); break;
6626
0
        case GDT_UInt16:   CALL_GDALTranspose2D_internal(uint16_t, false); break;
6627
0
        case GDT_Int16:    CALL_GDALTranspose2D_internal(int16_t, false); break;
6628
0
        case GDT_UInt32:   CALL_GDALTranspose2D_internal(uint32_t, false); break;
6629
0
        case GDT_Int32:    CALL_GDALTranspose2D_internal(int32_t, false); break;
6630
0
        case GDT_UInt64:   CALL_GDALTranspose2D_internal(uint64_t, false); break;
6631
0
        case GDT_Int64:    CALL_GDALTranspose2D_internal(int64_t, false); break;
6632
0
        case GDT_Float16:  CALL_GDALTranspose2D_internal(GFloat16, false); break;
6633
0
        case GDT_Float32:  CALL_GDALTranspose2D_internal(float, false); break;
6634
0
        case GDT_Float64:  CALL_GDALTranspose2D_internal(double, false); break;
6635
0
        case GDT_CInt16:   CALL_GDALTranspose2D_internal(int16_t, true); break;
6636
0
        case GDT_CInt32:   CALL_GDALTranspose2D_internal(int32_t, true); break;
6637
0
        case GDT_CFloat16: CALL_GDALTranspose2D_internal(GFloat16, true); break;
6638
0
        case GDT_CFloat32: CALL_GDALTranspose2D_internal(float, true); break;
6639
0
        case GDT_CFloat64: CALL_GDALTranspose2D_internal(double, true); break;
6640
0
        case GDT_Unknown:
6641
0
        case GDT_TypeCount:
6642
0
            break;
6643
0
    }
6644
        // clang-format on
6645
6646
0
#undef CALL_GDALTranspose2D_internal
6647
0
}
6648
6649
/************************************************************************/
6650
/*                     ExtractBitAndConvertTo255()                      */
6651
/************************************************************************/
6652
6653
#if defined(__GNUC__) || defined(_MSC_VER)
6654
// Signedness of char implementation dependent, so be explicit.
6655
// Assumes 2-complement integer types and sign extension of right shifting
6656
// GCC guarantees such:
6657
// https://gcc.gnu.org/onlinedocs/gcc/Integers-implementation.html#Integers-implementation
6658
static inline GByte ExtractBitAndConvertTo255(GByte byVal, int nBit)
6659
0
{
6660
0
    return static_cast<GByte>(static_cast<signed char>(byVal << (7 - nBit)) >>
6661
0
                              7);
6662
0
}
6663
#else
6664
// Portable way
6665
static inline GByte ExtractBitAndConvertTo255(GByte byVal, int nBit)
6666
{
6667
    return (byVal & (1 << nBit)) ? 255 : 0;
6668
}
6669
#endif
6670
6671
/************************************************************************/
6672
/*                   ExpandEightPackedBitsToByteAt255()                 */
6673
/************************************************************************/
6674
6675
static inline void ExpandEightPackedBitsToByteAt255(GByte byVal,
6676
                                                    GByte abyOutput[8])
6677
0
{
6678
0
    abyOutput[0] = ExtractBitAndConvertTo255(byVal, 7);
6679
0
    abyOutput[1] = ExtractBitAndConvertTo255(byVal, 6);
6680
0
    abyOutput[2] = ExtractBitAndConvertTo255(byVal, 5);
6681
0
    abyOutput[3] = ExtractBitAndConvertTo255(byVal, 4);
6682
0
    abyOutput[4] = ExtractBitAndConvertTo255(byVal, 3);
6683
0
    abyOutput[5] = ExtractBitAndConvertTo255(byVal, 2);
6684
0
    abyOutput[6] = ExtractBitAndConvertTo255(byVal, 1);
6685
0
    abyOutput[7] = ExtractBitAndConvertTo255(byVal, 0);
6686
0
}
6687
6688
/************************************************************************/
6689
/*                GDALExpandPackedBitsToByteAt0Or255()                  */
6690
/************************************************************************/
6691
6692
/** Expand packed-bits (ordered from most-significant bit to least one)
6693
  into a byte each, where a bit at 0 is expanded to a byte at 0, and a bit
6694
  at 1 to a byte at 255.
6695
6696
 The function does (in a possibly more optimized way) the following:
6697
 \code{.cpp}
6698
 for (size_t i = 0; i < nInputBits; ++i )
6699
 {
6700
     pabyOutput[i] = (pabyInput[i / 8] & (1 << (7 - (i % 8)))) ? 255 : 0;
6701
 }
6702
 \endcode
6703
6704
 @param pabyInput Input array of (nInputBits + 7) / 8 bytes.
6705
 @param pabyOutput Output array of nInputBits bytes.
6706
 @param nInputBits Number of valid bits in pabyInput.
6707
6708
 @since 3.11
6709
*/
6710
6711
void GDALExpandPackedBitsToByteAt0Or255(const GByte *CPL_RESTRICT pabyInput,
6712
                                        GByte *CPL_RESTRICT pabyOutput,
6713
                                        size_t nInputBits)
6714
0
{
6715
0
    const size_t nInputWholeBytes = nInputBits / 8;
6716
0
    size_t iByte = 0;
6717
6718
0
#ifdef HAVE_SSE2
6719
    // Mask to isolate each bit
6720
0
    const __m128i bit_mask = _mm_set_epi8(1, 2, 4, 8, 16, 32, 64, -128, 1, 2, 4,
6721
0
                                          8, 16, 32, 64, -128);
6722
0
    const __m128i zero = _mm_setzero_si128();
6723
0
    const __m128i all_ones = _mm_set1_epi8(-1);
6724
#ifdef __SSSE3__
6725
    const __m128i dispatch_two_bytes =
6726
        _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0);
6727
#endif
6728
0
    constexpr size_t SSE_REG_SIZE = sizeof(bit_mask);
6729
0
    for (; iByte + SSE_REG_SIZE <= nInputWholeBytes; iByte += SSE_REG_SIZE)
6730
0
    {
6731
0
        __m128i reg_ori = _mm_loadu_si128(
6732
0
            reinterpret_cast<const __m128i *>(pabyInput + iByte));
6733
6734
0
        constexpr int NUM_PROCESSED_BYTES_PER_REG = 2;
6735
0
        for (size_t k = 0; k < SSE_REG_SIZE / NUM_PROCESSED_BYTES_PER_REG; ++k)
6736
0
        {
6737
            // Given reg_ori = (A, B, ... 14 other bytes ...),
6738
            // expand to (A, A, A, A, A, A, A, A, B, B, B, B, B, B, B, B)
6739
#ifdef __SSSE3__
6740
            __m128i reg = _mm_shuffle_epi8(reg_ori, dispatch_two_bytes);
6741
#else
6742
0
            __m128i reg = _mm_unpacklo_epi8(reg_ori, reg_ori);
6743
0
            reg = _mm_unpacklo_epi16(reg, reg);
6744
0
            reg = _mm_unpacklo_epi32(reg, reg);
6745
0
#endif
6746
6747
            // Test if bits of interest are set
6748
0
            reg = _mm_and_si128(reg, bit_mask);
6749
6750
            // Now test if those bits are set, by comparing to zero. So the
6751
            // result will be that bytes where bits are set will be at 0, and
6752
            // ones where they are cleared will be at 0xFF. So the inverse of
6753
            // the end result we want!
6754
0
            reg = _mm_cmpeq_epi8(reg, zero);
6755
6756
            // Invert the result
6757
0
            reg = _mm_andnot_si128(reg, all_ones);
6758
6759
0
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pabyOutput), reg);
6760
6761
0
            pabyOutput += SSE_REG_SIZE;
6762
6763
            // Right-shift of 2 bytes
6764
0
            reg_ori = _mm_bsrli_si128(reg_ori, NUM_PROCESSED_BYTES_PER_REG);
6765
0
        }
6766
0
    }
6767
6768
0
#endif  // HAVE_SSE2
6769
6770
0
    for (; iByte < nInputWholeBytes; ++iByte)
6771
0
    {
6772
0
        ExpandEightPackedBitsToByteAt255(pabyInput[iByte], pabyOutput);
6773
0
        pabyOutput += 8;
6774
0
    }
6775
0
    for (int iBit = 0; iBit < static_cast<int>(nInputBits % 8); ++iBit)
6776
0
    {
6777
0
        *pabyOutput = ExtractBitAndConvertTo255(pabyInput[iByte], 7 - iBit);
6778
0
        ++pabyOutput;
6779
0
    }
6780
0
}
6781
6782
/************************************************************************/
6783
/*                   ExpandEightPackedBitsToByteAt1()                   */
6784
/************************************************************************/
6785
6786
static inline void ExpandEightPackedBitsToByteAt1(GByte byVal,
6787
                                                  GByte abyOutput[8])
6788
0
{
6789
0
    abyOutput[0] = (byVal >> 7) & 0x1;
6790
0
    abyOutput[1] = (byVal >> 6) & 0x1;
6791
0
    abyOutput[2] = (byVal >> 5) & 0x1;
6792
0
    abyOutput[3] = (byVal >> 4) & 0x1;
6793
0
    abyOutput[4] = (byVal >> 3) & 0x1;
6794
0
    abyOutput[5] = (byVal >> 2) & 0x1;
6795
0
    abyOutput[6] = (byVal >> 1) & 0x1;
6796
0
    abyOutput[7] = (byVal >> 0) & 0x1;
6797
0
}
6798
6799
/************************************************************************/
6800
/*                GDALExpandPackedBitsToByteAt0Or1()                    */
6801
/************************************************************************/
6802
6803
/** Expand packed-bits (ordered from most-significant bit to least one)
6804
  into a byte each, where a bit at 0 is expanded to a byte at 0, and a bit
6805
  at 1 to a byte at 1.
6806
6807
 The function does (in a possibly more optimized way) the following:
6808
 \code{.cpp}
6809
 for (size_t i = 0; i < nInputBits; ++i )
6810
 {
6811
     pabyOutput[i] = (pabyInput[i / 8] & (1 << (7 - (i % 8)))) ? 1 : 0;
6812
 }
6813
 \endcode
6814
6815
 @param pabyInput Input array of (nInputBits + 7) / 8 bytes.
6816
 @param pabyOutput Output array of nInputBits bytes.
6817
 @param nInputBits Number of valid bits in pabyInput.
6818
6819
 @since 3.11
6820
*/
6821
6822
void GDALExpandPackedBitsToByteAt0Or1(const GByte *CPL_RESTRICT pabyInput,
6823
                                      GByte *CPL_RESTRICT pabyOutput,
6824
                                      size_t nInputBits)
6825
0
{
6826
0
    const size_t nInputWholeBytes = nInputBits / 8;
6827
0
    size_t iByte = 0;
6828
0
    for (; iByte < nInputWholeBytes; ++iByte)
6829
0
    {
6830
0
        ExpandEightPackedBitsToByteAt1(pabyInput[iByte], pabyOutput);
6831
0
        pabyOutput += 8;
6832
0
    }
6833
0
    for (int iBit = 0; iBit < static_cast<int>(nInputBits % 8); ++iBit)
6834
0
    {
6835
0
        *pabyOutput = (pabyInput[iByte] >> (7 - iBit)) & 0x1;
6836
0
        ++pabyOutput;
6837
0
    }
6838
0
}