Coverage Report

Created: 2025-06-13 06:18

/src/gdal/gcore/rasterio.cpp
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 *
3
 * Project:  GDAL Core
4
 * Purpose:  Contains default implementation of GDALRasterBand::IRasterIO()
5
 *           and supporting functions of broader utility.
6
 * Author:   Frank Warmerdam, warmerdam@pobox.com
7
 *
8
 ******************************************************************************
9
 * Copyright (c) 1998, Frank Warmerdam
10
 * Copyright (c) 2007-2014, Even Rouault <even dot rouault at spatialys.com>
11
 *
12
 * SPDX-License-Identifier: MIT
13
 ****************************************************************************/
14
15
#include "cpl_port.h"
16
#include "gdal.h"
17
#include "gdal_priv.h"
18
19
#include <cassert>
20
#include <climits>
21
#include <cmath>
22
#include <cstddef>
23
#include <cstdio>
24
#include <cstdlib>
25
#include <cstring>
26
27
#include <algorithm>
28
#include <limits>
29
#include <stdexcept>
30
#include <type_traits>
31
32
#include "cpl_conv.h"
33
#include "cpl_cpu_features.h"
34
#include "cpl_error.h"
35
#include "cpl_float.h"
36
#include "cpl_progress.h"
37
#include "cpl_string.h"
38
#include "cpl_vsi.h"
39
#include "gdal_priv_templates.hpp"
40
#include "gdal_vrt.h"
41
#include "gdalwarper.h"
42
#include "memdataset.h"
43
#include "vrtdataset.h"
44
45
#if defined(__x86_64) || defined(_M_X64)
46
#include <emmintrin.h>
47
#define HAVE_SSE2
48
#elif defined(USE_NEON_OPTIMIZATIONS)
49
#include "include_sse2neon.h"
50
#define HAVE_SSE2
51
#endif
52
53
#ifdef HAVE_SSSE3_AT_COMPILE_TIME
54
#include "rasterio_ssse3.h"
55
#ifdef __SSSE3__
56
#include <tmmintrin.h>
57
#endif
58
#endif
59
60
static void GDALFastCopyByte(const GByte *CPL_RESTRICT pSrcData,
61
                             int nSrcPixelStride, GByte *CPL_RESTRICT pDstData,
62
                             int nDstPixelStride, GPtrDiff_t nWordCount);
63
64
/************************************************************************/
65
/*                    DownsamplingIntegerXFactor()                      */
66
/************************************************************************/
67
68
template <bool bSameDataType, int DATA_TYPE_SIZE>
69
static bool DownsamplingIntegerXFactor(
70
    GDALRasterBand *poBand, int iSrcX, int nSrcXInc, GPtrDiff_t iSrcOffsetCst,
71
    GByte *CPL_RESTRICT pabyDstData, int nPixelSpace, int nBufXSize,
72
    GDALDataType eDataType, GDALDataType eBufType, int &nStartBlockX,
73
    int nBlockXSize, GDALRasterBlock *&poBlock, int nLBlockY)
74
0
{
75
0
    const int nBandDataSize =
76
0
        bSameDataType ? DATA_TYPE_SIZE : GDALGetDataTypeSizeBytes(eDataType);
77
0
    int nOuterLoopIters = nBufXSize - 1;
78
0
    const int nIncSrcOffset = nSrcXInc * nBandDataSize;
79
0
    const GByte *CPL_RESTRICT pabySrcData;
80
0
    int nEndBlockX = nBlockXSize + nStartBlockX;
81
82
0
    if (iSrcX < nEndBlockX)
83
0
    {
84
0
        CPLAssert(poBlock);
85
0
        goto no_reload_block;
86
0
    }
87
0
    goto reload_block;
88
89
    // Don't do the last iteration in the loop, as iSrcX might go beyond
90
    // nRasterXSize - 1
91
0
    while (--nOuterLoopIters >= 1)
92
0
    {
93
0
        iSrcX += nSrcXInc;
94
0
        pabySrcData += nIncSrcOffset;
95
0
        pabyDstData += nPixelSpace;
96
97
        /* --------------------------------------------------------------------
98
         */
99
        /*      Ensure we have the appropriate block loaded. */
100
        /* --------------------------------------------------------------------
101
         */
102
0
        if (iSrcX >= nEndBlockX)
103
0
        {
104
0
        reload_block:
105
0
        {
106
0
            const int nLBlockX = iSrcX / nBlockXSize;
107
0
            nStartBlockX = nLBlockX * nBlockXSize;
108
0
            nEndBlockX = nStartBlockX + nBlockXSize;
109
110
0
            if (poBlock != nullptr)
111
0
                poBlock->DropLock();
112
113
0
            poBlock = poBand->GetLockedBlockRef(nLBlockX, nLBlockY, FALSE);
114
0
            if (poBlock == nullptr)
115
0
            {
116
0
                return false;
117
0
            }
118
0
        }
119
120
0
        no_reload_block:
121
0
            const GByte *pabySrcBlock =
122
0
                static_cast<const GByte *>(poBlock->GetDataRef());
123
0
            GPtrDiff_t iSrcOffset =
124
0
                (iSrcX - nStartBlockX + iSrcOffsetCst) * nBandDataSize;
125
0
            pabySrcData = pabySrcBlock + iSrcOffset;
126
0
        }
127
128
        /* --------------------------------------------------------------------
129
         */
130
        /*      Copy the maximum run of pixels. */
131
        /* --------------------------------------------------------------------
132
         */
133
134
0
        const int nIters = std::min(
135
0
            (nEndBlockX - iSrcX + (nSrcXInc - 1)) / nSrcXInc, nOuterLoopIters);
136
0
        if (bSameDataType)
137
0
        {
138
0
            memcpy(pabyDstData, pabySrcData, nBandDataSize);
139
0
            if (nIters > 1)
140
0
            {
141
0
                if (DATA_TYPE_SIZE == 1)
142
0
                {
143
0
                    pabySrcData += nIncSrcOffset;
144
0
                    pabyDstData += nPixelSpace;
145
0
                    GDALFastCopyByte(pabySrcData, nIncSrcOffset, pabyDstData,
146
0
                                     nPixelSpace, nIters - 1);
147
0
                    pabySrcData +=
148
0
                        static_cast<GPtrDiff_t>(nIncSrcOffset) * (nIters - 2);
149
0
                    pabyDstData +=
150
0
                        static_cast<GPtrDiff_t>(nPixelSpace) * (nIters - 2);
151
0
                }
152
0
                else
153
0
                {
154
0
                    for (int i = 0; i < nIters - 1; i++)
155
0
                    {
156
0
                        pabySrcData += nIncSrcOffset;
157
0
                        pabyDstData += nPixelSpace;
158
0
                        memcpy(pabyDstData, pabySrcData, nBandDataSize);
159
0
                    }
160
0
                }
161
0
                iSrcX += nSrcXInc * (nIters - 1);
162
0
                nOuterLoopIters -= nIters - 1;
163
0
            }
164
0
        }
165
0
        else
166
0
        {
167
            // Type to type conversion ...
168
0
            GDALCopyWords64(pabySrcData, eDataType, nIncSrcOffset, pabyDstData,
169
0
                            eBufType, nPixelSpace, std::max(1, nIters));
170
0
            if (nIters > 1)
171
0
            {
172
0
                pabySrcData +=
173
0
                    static_cast<GPtrDiff_t>(nIncSrcOffset) * (nIters - 1);
174
0
                pabyDstData +=
175
0
                    static_cast<GPtrDiff_t>(nPixelSpace) * (nIters - 1);
176
0
                iSrcX += nSrcXInc * (nIters - 1);
177
0
                nOuterLoopIters -= nIters - 1;
178
0
            }
179
0
        }
180
0
    }
181
182
    // Deal with last iteration to avoid iSrcX to go beyond nRasterXSize - 1
183
0
    if (nOuterLoopIters == 0)
184
0
    {
185
0
        const int nRasterXSize = poBand->GetXSize();
186
0
        iSrcX =
187
0
            static_cast<int>(std::min(static_cast<GInt64>(iSrcX) + nSrcXInc,
188
0
                                      static_cast<GInt64>(nRasterXSize - 1)));
189
0
        pabyDstData += nPixelSpace;
190
0
        if (iSrcX < nEndBlockX)
191
0
        {
192
0
            goto no_reload_block;
193
0
        }
194
0
        goto reload_block;
195
0
    }
196
0
    return true;
197
0
}
Unexecuted instantiation: rasterio.cpp:bool DownsamplingIntegerXFactor<true, 1>(GDALRasterBand*, int, int, long long, unsigned char*, int, int, GDALDataType, GDALDataType, int&, int, GDALRasterBlock*&, int)
Unexecuted instantiation: rasterio.cpp:bool DownsamplingIntegerXFactor<true, 2>(GDALRasterBand*, int, int, long long, unsigned char*, int, int, GDALDataType, GDALDataType, int&, int, GDALRasterBlock*&, int)
Unexecuted instantiation: rasterio.cpp:bool DownsamplingIntegerXFactor<true, 4>(GDALRasterBand*, int, int, long long, unsigned char*, int, int, GDALDataType, GDALDataType, int&, int, GDALRasterBlock*&, int)
Unexecuted instantiation: rasterio.cpp:bool DownsamplingIntegerXFactor<true, 8>(GDALRasterBand*, int, int, long long, unsigned char*, int, int, GDALDataType, GDALDataType, int&, int, GDALRasterBlock*&, int)
Unexecuted instantiation: rasterio.cpp:bool DownsamplingIntegerXFactor<true, 16>(GDALRasterBand*, int, int, long long, unsigned char*, int, int, GDALDataType, GDALDataType, int&, int, GDALRasterBlock*&, int)
Unexecuted instantiation: rasterio.cpp:bool DownsamplingIntegerXFactor<false, 0>(GDALRasterBand*, int, int, long long, unsigned char*, int, int, GDALDataType, GDALDataType, int&, int, GDALRasterBlock*&, int)
198
199
template <class A, class B>
200
CPL_NOSANITIZE_UNSIGNED_INT_OVERFLOW inline auto CPLUnsanitizedMul(A a, B b)
201
0
{
202
0
    return a * b;
203
0
}
204
205
/************************************************************************/
206
/*                             IRasterIO()                              */
207
/*                                                                      */
208
/*      Default internal implementation of RasterIO() ... utilizes      */
209
/*      the Block access methods to satisfy the request.  This would    */
210
/*      normally only be overridden by formats with overviews.          */
211
/************************************************************************/
212
213
CPLErr GDALRasterBand::IRasterIO(GDALRWFlag eRWFlag, int nXOff, int nYOff,
214
                                 int nXSize, int nYSize, void *pData,
215
                                 int nBufXSize, int nBufYSize,
216
                                 GDALDataType eBufType, GSpacing nPixelSpace,
217
                                 GSpacing nLineSpace,
218
                                 GDALRasterIOExtraArg *psExtraArg)
219
220
0
{
221
0
    if (eRWFlag == GF_Write && eFlushBlockErr != CE_None)
222
0
    {
223
0
        CPLError(eFlushBlockErr, CPLE_AppDefined,
224
0
                 "An error occurred while writing a dirty block "
225
0
                 "from GDALRasterBand::IRasterIO");
226
0
        CPLErr eErr = eFlushBlockErr;
227
0
        eFlushBlockErr = CE_None;
228
0
        return eErr;
229
0
    }
230
0
    if (nBlockXSize <= 0 || nBlockYSize <= 0)
231
0
    {
232
0
        CPLError(CE_Failure, CPLE_AppDefined, "Invalid block size");
233
0
        return CE_Failure;
234
0
    }
235
236
0
    const int nBandDataSize = GDALGetDataTypeSizeBytes(eDataType);
237
0
    const int nBufDataSize = GDALGetDataTypeSizeBytes(eBufType);
238
0
    GByte dummyBlock[2] = {0, 0};
239
0
    GByte *pabySrcBlock =
240
0
        dummyBlock; /* to avoid Coverity warning about nullptr dereference */
241
0
    GDALRasterBlock *poBlock = nullptr;
242
0
    const bool bUseIntegerRequestCoords =
243
0
        (!psExtraArg->bFloatingPointWindowValidity ||
244
0
         (nXOff == psExtraArg->dfXOff && nYOff == psExtraArg->dfYOff &&
245
0
          nXSize == psExtraArg->dfXSize && nYSize == psExtraArg->dfYSize));
246
247
    /* ==================================================================== */
248
    /*      A common case is the data requested with the destination        */
249
    /*      is packed, and the block width is the raster width.             */
250
    /* ==================================================================== */
251
0
    if (nPixelSpace == nBufDataSize && nLineSpace == nPixelSpace * nXSize &&
252
0
        nBlockXSize == GetXSize() && nBufXSize == nXSize &&
253
0
        nBufYSize == nYSize && bUseIntegerRequestCoords)
254
0
    {
255
0
        CPLErr eErr = CE_None;
256
0
        int nLBlockY = -1;
257
258
0
        for (int iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff++)
259
0
        {
260
0
            const int iSrcY = iBufYOff + nYOff;
261
262
0
            if (iSrcY < nLBlockY * nBlockYSize ||
263
0
                iSrcY - nBlockYSize >= nLBlockY * nBlockYSize)
264
0
            {
265
0
                nLBlockY = iSrcY / nBlockYSize;
266
0
                bool bJustInitialize =
267
0
                    eRWFlag == GF_Write && nXOff == 0 &&
268
0
                    nXSize == nBlockXSize && nYOff <= nLBlockY * nBlockYSize &&
269
0
                    nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize;
270
271
                // Is this a partial tile at right and/or bottom edges of
272
                // the raster, and that is going to be completely written?
273
                // If so, do not load it from storage, but zero it so that
274
                // the content outsize of the validity area is initialized.
275
0
                bool bMemZeroBuffer = false;
276
0
                if (eRWFlag == GF_Write && !bJustInitialize && nXOff == 0 &&
277
0
                    nXSize == nBlockXSize && nYOff <= nLBlockY * nBlockYSize &&
278
0
                    nYOff + nYSize == GetYSize() &&
279
0
                    nLBlockY * nBlockYSize > GetYSize() - nBlockYSize)
280
0
                {
281
0
                    bJustInitialize = true;
282
0
                    bMemZeroBuffer = true;
283
0
                }
284
285
0
                if (poBlock)
286
0
                    poBlock->DropLock();
287
288
0
                const GUInt32 nErrorCounter = CPLGetErrorCounter();
289
0
                poBlock = GetLockedBlockRef(0, nLBlockY, bJustInitialize);
290
0
                if (poBlock == nullptr)
291
0
                {
292
0
                    if (strstr(CPLGetLastErrorMsg(), "IReadBlock failed") ==
293
0
                        nullptr)
294
0
                    {
295
0
                        CPLError(CE_Failure, CPLE_AppDefined,
296
0
                                 "GetBlockRef failed at X block offset %d, "
297
0
                                 "Y block offset %d%s",
298
0
                                 0, nLBlockY,
299
0
                                 (nErrorCounter != CPLGetErrorCounter())
300
0
                                     ? CPLSPrintf(": %s", CPLGetLastErrorMsg())
301
0
                                     : "");
302
0
                    }
303
0
                    eErr = CE_Failure;
304
0
                    break;
305
0
                }
306
307
0
                if (eRWFlag == GF_Write)
308
0
                    poBlock->MarkDirty();
309
310
0
                pabySrcBlock = static_cast<GByte *>(poBlock->GetDataRef());
311
0
                if (bMemZeroBuffer)
312
0
                {
313
0
                    memset(pabySrcBlock, 0,
314
0
                           static_cast<GPtrDiff_t>(nBandDataSize) *
315
0
                               nBlockXSize * nBlockYSize);
316
0
                }
317
0
            }
318
319
0
            const auto nSrcByteOffset =
320
0
                (static_cast<GPtrDiff_t>(iSrcY - nLBlockY * nBlockYSize) *
321
0
                     nBlockXSize +
322
0
                 nXOff) *
323
0
                nBandDataSize;
324
325
0
            if (eDataType == eBufType)
326
0
            {
327
0
                if (eRWFlag == GF_Read)
328
0
                    memcpy(static_cast<GByte *>(pData) +
329
0
                               static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace,
330
0
                           pabySrcBlock + nSrcByteOffset,
331
0
                           static_cast<size_t>(nLineSpace));
332
0
                else
333
0
                    memcpy(pabySrcBlock + nSrcByteOffset,
334
0
                           static_cast<GByte *>(pData) +
335
0
                               static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace,
336
0
                           static_cast<size_t>(nLineSpace));
337
0
            }
338
0
            else
339
0
            {
340
                // Type to type conversion.
341
0
                if (eRWFlag == GF_Read)
342
0
                    GDALCopyWords64(
343
0
                        pabySrcBlock + nSrcByteOffset, eDataType, nBandDataSize,
344
0
                        static_cast<GByte *>(pData) +
345
0
                            static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace,
346
0
                        eBufType, static_cast<int>(nPixelSpace), nBufXSize);
347
0
                else
348
0
                    GDALCopyWords64(static_cast<GByte *>(pData) +
349
0
                                        static_cast<GPtrDiff_t>(iBufYOff) *
350
0
                                            nLineSpace,
351
0
                                    eBufType, static_cast<int>(nPixelSpace),
352
0
                                    pabySrcBlock + nSrcByteOffset, eDataType,
353
0
                                    nBandDataSize, nBufXSize);
354
0
            }
355
356
0
            if (psExtraArg->pfnProgress != nullptr &&
357
0
                !psExtraArg->pfnProgress(1.0 * (iBufYOff + 1) / nBufYSize, "",
358
0
                                         psExtraArg->pProgressData))
359
0
            {
360
0
                eErr = CE_Failure;
361
0
                break;
362
0
            }
363
0
        }
364
365
0
        if (poBlock)
366
0
            poBlock->DropLock();
367
368
0
        return eErr;
369
0
    }
370
371
    /* ==================================================================== */
372
    /*      Do we have overviews that would be appropriate to satisfy       */
373
    /*      this request?                                                   */
374
    /* ==================================================================== */
375
0
    if ((nBufXSize < nXSize || nBufYSize < nYSize) && GetOverviewCount() > 0 &&
376
0
        eRWFlag == GF_Read)
377
0
    {
378
0
        GDALRasterIOExtraArg sExtraArg;
379
0
        GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
380
381
0
        const int nOverview =
382
0
            GDALBandGetBestOverviewLevel2(this, nXOff, nYOff, nXSize, nYSize,
383
0
                                          nBufXSize, nBufYSize, &sExtraArg);
384
0
        if (nOverview >= 0)
385
0
        {
386
0
            GDALRasterBand *poOverviewBand = GetOverview(nOverview);
387
0
            if (poOverviewBand == nullptr)
388
0
                return CE_Failure;
389
390
0
            return poOverviewBand->RasterIO(
391
0
                eRWFlag, nXOff, nYOff, nXSize, nYSize, pData, nBufXSize,
392
0
                nBufYSize, eBufType, nPixelSpace, nLineSpace, &sExtraArg);
393
0
        }
394
0
    }
395
396
0
    if (eRWFlag == GF_Read && nBufXSize < nXSize / 100 &&
397
0
        nBufYSize < nYSize / 100 && nPixelSpace == nBufDataSize &&
398
0
        nLineSpace == nPixelSpace * nBufXSize &&
399
0
        CPLTestBool(CPLGetConfigOption("GDAL_NO_COSTLY_OVERVIEW", "NO")))
400
0
    {
401
0
        memset(pData, 0, static_cast<size_t>(nLineSpace * nBufYSize));
402
0
        return CE_None;
403
0
    }
404
405
    /* ==================================================================== */
406
    /*      The second case when we don't need subsample data but likely    */
407
    /*      need data type conversion.                                      */
408
    /* ==================================================================== */
409
0
    if (  // nPixelSpace == nBufDataSize &&
410
0
        nXSize == nBufXSize && nYSize == nBufYSize && bUseIntegerRequestCoords)
411
0
    {
412
#if DEBUG_VERBOSE
413
        printf("IRasterIO(%d,%d,%d,%d) rw=%d case 2\n", /*ok*/
414
               nXOff, nYOff, nXSize, nYSize, static_cast<int>(eRWFlag));
415
#endif
416
417
        /* --------------------------------------------------------------------
418
         */
419
        /*      Loop over buffer computing source locations. */
420
        /* --------------------------------------------------------------------
421
         */
422
        // Calculate starting values out of loop
423
0
        const int nLBlockXStart = nXOff / nBlockXSize;
424
0
        const int nXSpanEnd = nBufXSize + nXOff;
425
426
0
        int nYInc = 0;
427
0
        for (int iBufYOff = 0, iSrcY = nYOff; iBufYOff < nBufYSize;
428
0
             iBufYOff += nYInc, iSrcY += nYInc)
429
0
        {
430
0
            GPtrDiff_t iBufOffset = static_cast<GPtrDiff_t>(iBufYOff) *
431
0
                                    static_cast<GPtrDiff_t>(nLineSpace);
432
0
            int nLBlockY = iSrcY / nBlockYSize;
433
0
            int nLBlockX = nLBlockXStart;
434
0
            int iSrcX = nXOff;
435
0
            while (iSrcX < nXSpanEnd)
436
0
            {
437
0
                int nXSpan = nLBlockX * nBlockXSize;
438
0
                if (nXSpan < INT_MAX - nBlockXSize)
439
0
                    nXSpan += nBlockXSize;
440
0
                else
441
0
                    nXSpan = INT_MAX;
442
0
                const int nXRight = nXSpan;
443
0
                nXSpan = (nXSpan < nXSpanEnd ? nXSpan : nXSpanEnd) - iSrcX;
444
445
0
                const size_t nXSpanSize =
446
0
                    CPLUnsanitizedMul(nXSpan, static_cast<size_t>(nPixelSpace));
447
448
0
                bool bJustInitialize =
449
0
                    eRWFlag == GF_Write && nYOff <= nLBlockY * nBlockYSize &&
450
0
                    nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize &&
451
0
                    nXOff <= nLBlockX * nBlockXSize &&
452
0
                    nXOff + nXSize >= nXRight;
453
454
                // Is this a partial tile at right and/or bottom edges of
455
                // the raster, and that is going to be completely written?
456
                // If so, do not load it from storage, but zero it so that
457
                // the content outsize of the validity area is initialized.
458
0
                bool bMemZeroBuffer = false;
459
0
                if (eRWFlag == GF_Write && !bJustInitialize &&
460
0
                    nXOff <= nLBlockX * nBlockXSize &&
461
0
                    nYOff <= nLBlockY * nBlockYSize &&
462
0
                    (nXOff + nXSize >= nXRight ||
463
                     // cppcheck-suppress knownConditionTrueFalse
464
0
                     (nXOff + nXSize == GetXSize() && nXRight > GetXSize())) &&
465
0
                    (nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize ||
466
0
                     (nYOff + nYSize == GetYSize() &&
467
0
                      nLBlockY * nBlockYSize > GetYSize() - nBlockYSize)))
468
0
                {
469
0
                    bJustInitialize = true;
470
0
                    bMemZeroBuffer = true;
471
0
                }
472
473
                /* --------------------------------------------------------------------
474
                 */
475
                /*      Ensure we have the appropriate block loaded. */
476
                /* --------------------------------------------------------------------
477
                 */
478
0
                const GUInt32 nErrorCounter = CPLGetErrorCounter();
479
0
                poBlock =
480
0
                    GetLockedBlockRef(nLBlockX, nLBlockY, bJustInitialize);
481
0
                if (!poBlock)
482
0
                {
483
0
                    if (strstr(CPLGetLastErrorMsg(), "IReadBlock failed") ==
484
0
                        nullptr)
485
0
                    {
486
0
                        CPLError(CE_Failure, CPLE_AppDefined,
487
0
                                 "GetBlockRef failed at X block offset %d, "
488
0
                                 "Y block offset %d%s",
489
0
                                 nLBlockX, nLBlockY,
490
0
                                 (nErrorCounter != CPLGetErrorCounter())
491
0
                                     ? CPLSPrintf(": %s", CPLGetLastErrorMsg())
492
0
                                     : "");
493
0
                    }
494
0
                    return (CE_Failure);
495
0
                }
496
497
0
                if (eRWFlag == GF_Write)
498
0
                    poBlock->MarkDirty();
499
500
0
                pabySrcBlock = static_cast<GByte *>(poBlock->GetDataRef());
501
0
                if (bMemZeroBuffer)
502
0
                {
503
0
                    memset(pabySrcBlock, 0,
504
0
                           static_cast<GPtrDiff_t>(nBandDataSize) *
505
0
                               nBlockXSize * nBlockYSize);
506
0
                }
507
                /* --------------------------------------------------------------------
508
                 */
509
                /*      Copy over this chunk of data. */
510
                /* --------------------------------------------------------------------
511
                 */
512
0
                GPtrDiff_t iSrcOffset =
513
0
                    (static_cast<GPtrDiff_t>(iSrcX) -
514
0
                     static_cast<GPtrDiff_t>(nLBlockX * nBlockXSize) +
515
0
                     (static_cast<GPtrDiff_t>(iSrcY) -
516
0
                      static_cast<GPtrDiff_t>(nLBlockY) * nBlockYSize) *
517
0
                         nBlockXSize) *
518
0
                    nBandDataSize;
519
                // Fill up as many rows as possible for the loaded block.
520
0
                const int kmax = std::min(nBlockYSize - (iSrcY % nBlockYSize),
521
0
                                          nBufYSize - iBufYOff);
522
0
                for (int k = 0; k < kmax; k++)
523
0
                {
524
0
                    if (eDataType == eBufType && nPixelSpace == nBufDataSize)
525
0
                    {
526
0
                        if (eRWFlag == GF_Read)
527
0
                            memcpy(static_cast<GByte *>(pData) + iBufOffset +
528
0
                                       static_cast<GPtrDiff_t>(k) * nLineSpace,
529
0
                                   pabySrcBlock + iSrcOffset, nXSpanSize);
530
0
                        else
531
0
                            memcpy(pabySrcBlock + iSrcOffset,
532
0
                                   static_cast<GByte *>(pData) + iBufOffset +
533
0
                                       static_cast<GPtrDiff_t>(k) * nLineSpace,
534
0
                                   nXSpanSize);
535
0
                    }
536
0
                    else
537
0
                    {
538
                        /* type to type conversion */
539
0
                        if (eRWFlag == GF_Read)
540
0
                            GDALCopyWords64(
541
0
                                pabySrcBlock + iSrcOffset, eDataType,
542
0
                                nBandDataSize,
543
0
                                static_cast<GByte *>(pData) + iBufOffset +
544
0
                                    static_cast<GPtrDiff_t>(k) * nLineSpace,
545
0
                                eBufType, static_cast<int>(nPixelSpace),
546
0
                                nXSpan);
547
0
                        else
548
0
                            GDALCopyWords64(
549
0
                                static_cast<GByte *>(pData) + iBufOffset +
550
0
                                    static_cast<GPtrDiff_t>(k) * nLineSpace,
551
0
                                eBufType, static_cast<int>(nPixelSpace),
552
0
                                pabySrcBlock + iSrcOffset, eDataType,
553
0
                                nBandDataSize, nXSpan);
554
0
                    }
555
556
0
                    iSrcOffset +=
557
0
                        static_cast<GPtrDiff_t>(nBlockXSize) * nBandDataSize;
558
0
                }
559
560
0
                iBufOffset =
561
0
                    CPLUnsanitizedAdd<GPtrDiff_t>(iBufOffset, nXSpanSize);
562
0
                nLBlockX++;
563
0
                iSrcX += nXSpan;
564
565
0
                poBlock->DropLock();
566
0
                poBlock = nullptr;
567
0
            }
568
569
            /* Compute the increment to go on a block boundary */
570
0
            nYInc = nBlockYSize - (iSrcY % nBlockYSize);
571
572
0
            if (psExtraArg->pfnProgress != nullptr &&
573
0
                !psExtraArg->pfnProgress(
574
0
                    1.0 * std::min(nBufYSize, iBufYOff + nYInc) / nBufYSize, "",
575
0
                    psExtraArg->pProgressData))
576
0
            {
577
0
                return CE_Failure;
578
0
            }
579
0
        }
580
581
0
        return CE_None;
582
0
    }
583
584
    /* ==================================================================== */
585
    /*      Loop reading required source blocks to satisfy output           */
586
    /*      request.  This is the most general implementation.              */
587
    /* ==================================================================== */
588
589
0
    double dfXOff = nXOff;
590
0
    double dfYOff = nYOff;
591
0
    double dfXSize = nXSize;
592
0
    double dfYSize = nYSize;
593
0
    if (psExtraArg->bFloatingPointWindowValidity)
594
0
    {
595
0
        dfXOff = psExtraArg->dfXOff;
596
0
        dfYOff = psExtraArg->dfYOff;
597
0
        dfXSize = psExtraArg->dfXSize;
598
0
        dfYSize = psExtraArg->dfYSize;
599
0
    }
600
601
    /* -------------------------------------------------------------------- */
602
    /*      Compute stepping increment.                                     */
603
    /* -------------------------------------------------------------------- */
604
0
    const double dfSrcXInc = dfXSize / static_cast<double>(nBufXSize);
605
0
    const double dfSrcYInc = dfYSize / static_cast<double>(nBufYSize);
606
0
    CPLErr eErr = CE_None;
607
608
0
    if (eRWFlag == GF_Write)
609
0
    {
610
        /* --------------------------------------------------------------------
611
         */
612
        /*    Write case */
613
        /*    Loop over raster window computing source locations in the buffer.
614
         */
615
        /* --------------------------------------------------------------------
616
         */
617
0
        GByte *pabyDstBlock = nullptr;
618
0
        int nLBlockX = -1;
619
0
        int nLBlockY = -1;
620
621
0
        for (int iDstY = nYOff; iDstY < nYOff + nYSize; iDstY++)
622
0
        {
623
0
            const int iBufYOff = static_cast<int>((iDstY - nYOff) / dfSrcYInc);
624
625
0
            for (int iDstX = nXOff; iDstX < nXOff + nXSize; iDstX++)
626
0
            {
627
0
                const int iBufXOff =
628
0
                    static_cast<int>((iDstX - nXOff) / dfSrcXInc);
629
0
                GPtrDiff_t iBufOffset =
630
0
                    static_cast<GPtrDiff_t>(iBufYOff) *
631
0
                        static_cast<GPtrDiff_t>(nLineSpace) +
632
0
                    iBufXOff * static_cast<GPtrDiff_t>(nPixelSpace);
633
634
                // FIXME: this code likely doesn't work if the dirty block gets
635
                // flushed to disk before being completely written.
636
                // In the meantime, bJustInitialize should probably be set to
637
                // FALSE even if it is not ideal performance wise, and for
638
                // lossy compression.
639
640
                /* --------------------------------------------------------------------
641
                 */
642
                /*      Ensure we have the appropriate block loaded. */
643
                /* --------------------------------------------------------------------
644
                 */
645
0
                if (iDstX < nLBlockX * nBlockXSize ||
646
0
                    iDstX - nBlockXSize >= nLBlockX * nBlockXSize ||
647
0
                    iDstY < nLBlockY * nBlockYSize ||
648
0
                    iDstY - nBlockYSize >= nLBlockY * nBlockYSize)
649
0
                {
650
0
                    nLBlockX = iDstX / nBlockXSize;
651
0
                    nLBlockY = iDstY / nBlockYSize;
652
653
0
                    const bool bJustInitialize =
654
0
                        nYOff <= nLBlockY * nBlockYSize &&
655
0
                        nYOff + nYSize - nBlockYSize >=
656
0
                            nLBlockY * nBlockYSize &&
657
0
                        nXOff <= nLBlockX * nBlockXSize &&
658
0
                        nXOff + nXSize - nBlockXSize >= nLBlockX * nBlockXSize;
659
                    /*bool bMemZeroBuffer = FALSE;
660
                    if( !bJustInitialize &&
661
                        nXOff <= nLBlockX * nBlockXSize &&
662
                        nYOff <= nLBlockY * nBlockYSize &&
663
                        (nXOff + nXSize >= (nLBlockX+1) * nBlockXSize ||
664
                         (nXOff + nXSize == GetXSize() &&
665
                         (nLBlockX+1) * nBlockXSize > GetXSize())) &&
666
                        (nYOff + nYSize >= (nLBlockY+1) * nBlockYSize ||
667
                         (nYOff + nYSize == GetYSize() &&
668
                         (nLBlockY+1) * nBlockYSize > GetYSize())) )
669
                    {
670
                        bJustInitialize = TRUE;
671
                        bMemZeroBuffer = TRUE;
672
                    }*/
673
0
                    if (poBlock != nullptr)
674
0
                        poBlock->DropLock();
675
676
0
                    poBlock =
677
0
                        GetLockedBlockRef(nLBlockX, nLBlockY, bJustInitialize);
678
0
                    if (poBlock == nullptr)
679
0
                    {
680
0
                        return (CE_Failure);
681
0
                    }
682
683
0
                    poBlock->MarkDirty();
684
685
0
                    pabyDstBlock = static_cast<GByte *>(poBlock->GetDataRef());
686
                    /*if( bMemZeroBuffer )
687
                    {
688
                        memset(pabyDstBlock, 0,
689
                            static_cast<GPtrDiff_t>(nBandDataSize) * nBlockXSize
690
                    * nBlockYSize);
691
                    }*/
692
0
                }
693
694
                // To make Coverity happy. Should not happen by design.
695
0
                if (pabyDstBlock == nullptr)
696
0
                {
697
0
                    CPLAssert(false);
698
0
                    eErr = CE_Failure;
699
0
                    break;
700
0
                }
701
702
                /* --------------------------------------------------------------------
703
                 */
704
                /*      Copy over this pixel of data. */
705
                /* --------------------------------------------------------------------
706
                 */
707
0
                GPtrDiff_t iDstOffset =
708
0
                    (static_cast<GPtrDiff_t>(iDstX) -
709
0
                     static_cast<GPtrDiff_t>(nLBlockX) * nBlockXSize +
710
0
                     (static_cast<GPtrDiff_t>(iDstY) -
711
0
                      static_cast<GPtrDiff_t>(nLBlockY) * nBlockYSize) *
712
0
                         nBlockXSize) *
713
0
                    nBandDataSize;
714
715
0
                if (eDataType == eBufType)
716
0
                {
717
0
                    memcpy(pabyDstBlock + iDstOffset,
718
0
                           static_cast<GByte *>(pData) + iBufOffset,
719
0
                           nBandDataSize);
720
0
                }
721
0
                else
722
0
                {
723
                    /* type to type conversion ... ouch, this is expensive way
724
                    of handling single words */
725
0
                    GDALCopyWords64(static_cast<GByte *>(pData) + iBufOffset,
726
0
                                    eBufType, 0, pabyDstBlock + iDstOffset,
727
0
                                    eDataType, 0, 1);
728
0
                }
729
0
            }
730
731
0
            if (psExtraArg->pfnProgress != nullptr &&
732
0
                !psExtraArg->pfnProgress(1.0 * (iDstY - nYOff + 1) / nYSize, "",
733
0
                                         psExtraArg->pProgressData))
734
0
            {
735
0
                eErr = CE_Failure;
736
0
                break;
737
0
            }
738
0
        }
739
0
    }
740
0
    else
741
0
    {
742
0
        if (psExtraArg->eResampleAlg != GRIORA_NearestNeighbour)
743
0
        {
744
0
            if ((psExtraArg->eResampleAlg == GRIORA_Cubic ||
745
0
                 psExtraArg->eResampleAlg == GRIORA_CubicSpline ||
746
0
                 psExtraArg->eResampleAlg == GRIORA_Bilinear ||
747
0
                 psExtraArg->eResampleAlg == GRIORA_Lanczos) &&
748
0
                GetColorTable() != nullptr)
749
0
            {
750
0
                CPLError(CE_Warning, CPLE_NotSupported,
751
0
                         "Resampling method not supported on paletted band. "
752
0
                         "Falling back to nearest neighbour");
753
0
            }
754
0
            else if (psExtraArg->eResampleAlg == GRIORA_Gauss &&
755
0
                     GDALDataTypeIsComplex(eDataType))
756
0
            {
757
0
                CPLError(CE_Warning, CPLE_NotSupported,
758
0
                         "Resampling method not supported on complex data type "
759
0
                         "band. Falling back to nearest neighbour");
760
0
            }
761
0
            else
762
0
            {
763
0
                return RasterIOResampled(eRWFlag, nXOff, nYOff, nXSize, nYSize,
764
0
                                         pData, nBufXSize, nBufYSize, eBufType,
765
0
                                         nPixelSpace, nLineSpace, psExtraArg);
766
0
            }
767
0
        }
768
769
0
        int nLimitBlockY = 0;
770
0
        const bool bByteCopy = eDataType == eBufType && nBandDataSize == 1;
771
0
        int nStartBlockX = -nBlockXSize;
772
0
        const double EPS = 1e-10;
773
0
        int nLBlockY = -1;
774
0
        const double dfSrcXStart = 0.5 * dfSrcXInc + dfXOff + EPS;
775
0
        const bool bIntegerXFactor =
776
0
            bUseIntegerRequestCoords &&
777
0
            static_cast<int>(dfSrcXInc) == dfSrcXInc &&
778
0
            static_cast<int>(dfSrcXInc) < INT_MAX / nBandDataSize;
779
780
        /* --------------------------------------------------------------------
781
         */
782
        /*      Read case */
783
        /*      Loop over buffer computing source locations. */
784
        /* --------------------------------------------------------------------
785
         */
786
0
        for (int iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff++)
787
0
        {
788
            // Add small epsilon to avoid some numeric precision issues.
789
0
            const double dfSrcY = (iBufYOff + 0.5) * dfSrcYInc + dfYOff + EPS;
790
0
            const int iSrcY = static_cast<int>(std::min(
791
0
                std::max(0.0, dfSrcY), static_cast<double>(nRasterYSize - 1)));
792
793
0
            GPtrDiff_t iBufOffset = static_cast<GPtrDiff_t>(iBufYOff) *
794
0
                                    static_cast<GPtrDiff_t>(nLineSpace);
795
796
0
            if (iSrcY >= nLimitBlockY)
797
0
            {
798
0
                nLBlockY = iSrcY / nBlockYSize;
799
0
                nLimitBlockY = nLBlockY * nBlockYSize;
800
0
                if (nLimitBlockY < INT_MAX - nBlockYSize)
801
0
                    nLimitBlockY += nBlockYSize;
802
0
                else
803
0
                    nLimitBlockY = INT_MAX;
804
                // Make sure a new block is loaded.
805
0
                nStartBlockX = -nBlockXSize;
806
0
            }
807
0
            else if (static_cast<int>(dfSrcXStart) < nStartBlockX)
808
0
            {
809
                // Make sure a new block is loaded.
810
0
                nStartBlockX = -nBlockXSize;
811
0
            }
812
813
0
            GPtrDiff_t iSrcOffsetCst = (iSrcY - nLBlockY * nBlockYSize) *
814
0
                                       static_cast<GPtrDiff_t>(nBlockXSize);
815
816
0
            if (bIntegerXFactor)
817
0
            {
818
0
                int iSrcX = static_cast<int>(dfSrcXStart);
819
0
                const int nSrcXInc = static_cast<int>(dfSrcXInc);
820
0
                GByte *pabyDstData = static_cast<GByte *>(pData) + iBufOffset;
821
0
                bool bRet = false;
822
0
                if (bByteCopy)
823
0
                {
824
0
                    bRet = DownsamplingIntegerXFactor<true, 1>(
825
0
                        this, iSrcX, nSrcXInc, iSrcOffsetCst, pabyDstData,
826
0
                        static_cast<int>(nPixelSpace), nBufXSize, GDT_Byte,
827
0
                        GDT_Byte, nStartBlockX, nBlockXSize, poBlock, nLBlockY);
828
0
                }
829
0
                else if (eDataType == eBufType)
830
0
                {
831
0
                    switch (nBandDataSize)
832
0
                    {
833
0
                        case 2:
834
0
                            bRet = DownsamplingIntegerXFactor<true, 2>(
835
0
                                this, iSrcX, nSrcXInc, iSrcOffsetCst,
836
0
                                pabyDstData, static_cast<int>(nPixelSpace),
837
0
                                nBufXSize, eDataType, eDataType, nStartBlockX,
838
0
                                nBlockXSize, poBlock, nLBlockY);
839
0
                            break;
840
0
                        case 4:
841
0
                            bRet = DownsamplingIntegerXFactor<true, 4>(
842
0
                                this, iSrcX, nSrcXInc, iSrcOffsetCst,
843
0
                                pabyDstData, static_cast<int>(nPixelSpace),
844
0
                                nBufXSize, eDataType, eDataType, nStartBlockX,
845
0
                                nBlockXSize, poBlock, nLBlockY);
846
0
                            break;
847
0
                        case 8:
848
0
                            bRet = DownsamplingIntegerXFactor<true, 8>(
849
0
                                this, iSrcX, nSrcXInc, iSrcOffsetCst,
850
0
                                pabyDstData, static_cast<int>(nPixelSpace),
851
0
                                nBufXSize, eDataType, eDataType, nStartBlockX,
852
0
                                nBlockXSize, poBlock, nLBlockY);
853
0
                            break;
854
0
                        case 16:
855
0
                            bRet = DownsamplingIntegerXFactor<true, 16>(
856
0
                                this, iSrcX, nSrcXInc, iSrcOffsetCst,
857
0
                                pabyDstData, static_cast<int>(nPixelSpace),
858
0
                                nBufXSize, eDataType, eDataType, nStartBlockX,
859
0
                                nBlockXSize, poBlock, nLBlockY);
860
0
                            break;
861
0
                        default:
862
0
                            CPLAssert(false);
863
0
                            break;
864
0
                    }
865
0
                }
866
0
                else
867
0
                {
868
0
                    bRet = DownsamplingIntegerXFactor<false, 0>(
869
0
                        this, iSrcX, nSrcXInc, iSrcOffsetCst, pabyDstData,
870
0
                        static_cast<int>(nPixelSpace), nBufXSize, eDataType,
871
0
                        eBufType, nStartBlockX, nBlockXSize, poBlock, nLBlockY);
872
0
                }
873
0
                if (!bRet)
874
0
                    eErr = CE_Failure;
875
0
            }
876
0
            else
877
0
            {
878
0
                double dfSrcX = dfSrcXStart;
879
0
                for (int iBufXOff = 0; iBufXOff < nBufXSize;
880
0
                     iBufXOff++, dfSrcX += dfSrcXInc)
881
0
                {
882
                    // TODO?: try to avoid the clamping for most iterations
883
0
                    const int iSrcX = static_cast<int>(
884
0
                        std::min(std::max(0.0, dfSrcX),
885
0
                                 static_cast<double>(nRasterXSize - 1)));
886
887
                    /* --------------------------------------------------------------------
888
                     */
889
                    /*      Ensure we have the appropriate block loaded. */
890
                    /* --------------------------------------------------------------------
891
                     */
892
0
                    if (iSrcX >= nBlockXSize + nStartBlockX)
893
0
                    {
894
0
                        const int nLBlockX = iSrcX / nBlockXSize;
895
0
                        nStartBlockX = nLBlockX * nBlockXSize;
896
897
0
                        if (poBlock != nullptr)
898
0
                            poBlock->DropLock();
899
900
0
                        poBlock = GetLockedBlockRef(nLBlockX, nLBlockY, FALSE);
901
0
                        if (poBlock == nullptr)
902
0
                        {
903
0
                            eErr = CE_Failure;
904
0
                            break;
905
0
                        }
906
907
0
                        pabySrcBlock =
908
0
                            static_cast<GByte *>(poBlock->GetDataRef());
909
0
                    }
910
0
                    const GPtrDiff_t nDiffX =
911
0
                        static_cast<GPtrDiff_t>(iSrcX - nStartBlockX);
912
913
                    /* --------------------------------------------------------------------
914
                     */
915
                    /*      Copy over this pixel of data. */
916
                    /* --------------------------------------------------------------------
917
                     */
918
919
0
                    if (bByteCopy)
920
0
                    {
921
0
                        GPtrDiff_t iSrcOffset = nDiffX + iSrcOffsetCst;
922
0
                        static_cast<GByte *>(pData)[iBufOffset] =
923
0
                            pabySrcBlock[iSrcOffset];
924
0
                    }
925
0
                    else if (eDataType == eBufType)
926
0
                    {
927
0
                        GPtrDiff_t iSrcOffset =
928
0
                            (nDiffX + iSrcOffsetCst) * nBandDataSize;
929
0
                        memcpy(static_cast<GByte *>(pData) + iBufOffset,
930
0
                               pabySrcBlock + iSrcOffset, nBandDataSize);
931
0
                    }
932
0
                    else
933
0
                    {
934
                        // Type to type conversion ...
935
0
                        GPtrDiff_t iSrcOffset =
936
0
                            (nDiffX + iSrcOffsetCst) * nBandDataSize;
937
0
                        GDALCopyWords64(pabySrcBlock + iSrcOffset, eDataType, 0,
938
0
                                        static_cast<GByte *>(pData) +
939
0
                                            iBufOffset,
940
0
                                        eBufType, 0, 1);
941
0
                    }
942
943
0
                    iBufOffset += static_cast<int>(nPixelSpace);
944
0
                }
945
0
            }
946
0
            if (eErr == CE_Failure)
947
0
                break;
948
949
0
            if (psExtraArg->pfnProgress != nullptr &&
950
0
                !psExtraArg->pfnProgress(1.0 * (iBufYOff + 1) / nBufYSize, "",
951
0
                                         psExtraArg->pProgressData))
952
0
            {
953
0
                eErr = CE_Failure;
954
0
                break;
955
0
            }
956
0
        }
957
0
    }
958
959
0
    if (poBlock != nullptr)
960
0
        poBlock->DropLock();
961
962
0
    return eErr;
963
0
}
964
965
/************************************************************************/
966
/*                         GDALRasterIOTransformer()                    */
967
/************************************************************************/
968
969
struct GDALRasterIOTransformerStruct
970
{
971
    double dfXOff;
972
    double dfYOff;
973
    double dfXRatioDstToSrc;
974
    double dfYRatioDstToSrc;
975
};
976
977
static int GDALRasterIOTransformer(void *pTransformerArg, int bDstToSrc,
978
                                   int nPointCount, double *x, double *y,
979
                                   double * /* z */, int *panSuccess)
980
0
{
981
0
    GDALRasterIOTransformerStruct *psParams =
982
0
        static_cast<GDALRasterIOTransformerStruct *>(pTransformerArg);
983
0
    if (bDstToSrc)
984
0
    {
985
0
        for (int i = 0; i < nPointCount; i++)
986
0
        {
987
0
            x[i] = x[i] * psParams->dfXRatioDstToSrc + psParams->dfXOff;
988
0
            y[i] = y[i] * psParams->dfYRatioDstToSrc + psParams->dfYOff;
989
0
            panSuccess[i] = TRUE;
990
0
        }
991
0
    }
992
0
    else
993
0
    {
994
0
        for (int i = 0; i < nPointCount; i++)
995
0
        {
996
0
            x[i] = (x[i] - psParams->dfXOff) / psParams->dfXRatioDstToSrc;
997
0
            y[i] = (y[i] - psParams->dfYOff) / psParams->dfYRatioDstToSrc;
998
0
            panSuccess[i] = TRUE;
999
0
        }
1000
0
    }
1001
0
    return TRUE;
1002
0
}
1003
1004
/************************************************************************/
1005
/*                          RasterIOResampled()                         */
1006
/************************************************************************/
1007
1008
//! @cond Doxygen_Suppress
1009
CPLErr GDALRasterBand::RasterIOResampled(
1010
    GDALRWFlag /* eRWFlag */, int nXOff, int nYOff, int nXSize, int nYSize,
1011
    void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
1012
    GSpacing nPixelSpace, GSpacing nLineSpace, GDALRasterIOExtraArg *psExtraArg)
1013
0
{
1014
    // Determine if we use warping resampling or overview resampling
1015
0
    const bool bUseWarp =
1016
0
        (GDALDataTypeIsComplex(eDataType) &&
1017
0
         psExtraArg->eResampleAlg != GRIORA_NearestNeighbour &&
1018
0
         psExtraArg->eResampleAlg != GRIORA_Mode);
1019
1020
0
    double dfXOff = nXOff;
1021
0
    double dfYOff = nYOff;
1022
0
    double dfXSize = nXSize;
1023
0
    double dfYSize = nYSize;
1024
0
    if (psExtraArg->bFloatingPointWindowValidity)
1025
0
    {
1026
0
        dfXOff = psExtraArg->dfXOff;
1027
0
        dfYOff = psExtraArg->dfYOff;
1028
0
        dfXSize = psExtraArg->dfXSize;
1029
0
        dfYSize = psExtraArg->dfYSize;
1030
0
    }
1031
1032
0
    const double dfXRatioDstToSrc = dfXSize / nBufXSize;
1033
0
    const double dfYRatioDstToSrc = dfYSize / nBufYSize;
1034
1035
    // Determine the coordinates in the "virtual" output raster to see
1036
    // if there are not integers, in which case we will use them as a shift
1037
    // so that subwindow extracts give the exact same results as entire raster
1038
    // scaling.
1039
0
    double dfDestXOff = dfXOff / dfXRatioDstToSrc;
1040
0
    bool bHasXOffVirtual = false;
1041
0
    int nDestXOffVirtual = 0;
1042
0
    if (fabs(dfDestXOff - static_cast<int>(dfDestXOff + 0.5)) < 1e-8)
1043
0
    {
1044
0
        bHasXOffVirtual = true;
1045
0
        dfXOff = nXOff;
1046
0
        nDestXOffVirtual = static_cast<int>(dfDestXOff + 0.5);
1047
0
    }
1048
1049
0
    double dfDestYOff = dfYOff / dfYRatioDstToSrc;
1050
0
    bool bHasYOffVirtual = false;
1051
0
    int nDestYOffVirtual = 0;
1052
0
    if (fabs(dfDestYOff - static_cast<int>(dfDestYOff + 0.5)) < 1e-8)
1053
0
    {
1054
0
        bHasYOffVirtual = true;
1055
0
        dfYOff = nYOff;
1056
0
        nDestYOffVirtual = static_cast<int>(dfDestYOff + 0.5);
1057
0
    }
1058
1059
    // Create a MEM dataset that wraps the output buffer.
1060
0
    GDALDataset *poMEMDS;
1061
0
    void *pTempBuffer = nullptr;
1062
0
    GSpacing nPSMem = nPixelSpace;
1063
0
    GSpacing nLSMem = nLineSpace;
1064
0
    void *pDataMem = pData;
1065
0
    GDALDataType eDTMem = eBufType;
1066
0
    if (eBufType != eDataType)
1067
0
    {
1068
0
        nPSMem = GDALGetDataTypeSizeBytes(eDataType);
1069
0
        nLSMem = nPSMem * nBufXSize;
1070
0
        pTempBuffer =
1071
0
            VSI_MALLOC2_VERBOSE(nBufYSize, static_cast<size_t>(nLSMem));
1072
0
        if (pTempBuffer == nullptr)
1073
0
            return CE_Failure;
1074
0
        pDataMem = pTempBuffer;
1075
0
        eDTMem = eDataType;
1076
0
    }
1077
1078
0
    poMEMDS =
1079
0
        MEMDataset::Create("", nDestXOffVirtual + nBufXSize,
1080
0
                           nDestYOffVirtual + nBufYSize, 0, eDTMem, nullptr);
1081
0
    GByte *pabyData = static_cast<GByte *>(pDataMem) -
1082
0
                      nPSMem * nDestXOffVirtual - nLSMem * nDestYOffVirtual;
1083
0
    GDALRasterBandH hMEMBand = MEMCreateRasterBandEx(
1084
0
        poMEMDS, 1, pabyData, eDTMem, nPSMem, nLSMem, false);
1085
0
    poMEMDS->SetBand(1, GDALRasterBand::FromHandle(hMEMBand));
1086
1087
0
    const char *pszNBITS = GetMetadataItem("NBITS", "IMAGE_STRUCTURE");
1088
0
    const int nNBITS = pszNBITS ? atoi(pszNBITS) : 0;
1089
0
    if (pszNBITS)
1090
0
        GDALRasterBand::FromHandle(hMEMBand)->SetMetadataItem(
1091
0
            "NBITS", pszNBITS, "IMAGE_STRUCTURE");
1092
1093
0
    CPLErr eErr = CE_None;
1094
1095
    // Do the resampling.
1096
0
    if (bUseWarp)
1097
0
    {
1098
0
        int bHasNoData = FALSE;
1099
0
        double dfNoDataValue = GetNoDataValue(&bHasNoData);
1100
1101
0
        VRTDatasetH hVRTDS = nullptr;
1102
0
        GDALRasterBandH hVRTBand = nullptr;
1103
0
        if (GetDataset() == nullptr)
1104
0
        {
1105
            /* Create VRT dataset that wraps the whole dataset */
1106
0
            hVRTDS = VRTCreate(nRasterXSize, nRasterYSize);
1107
0
            VRTAddBand(hVRTDS, eDataType, nullptr);
1108
0
            hVRTBand = GDALGetRasterBand(hVRTDS, 1);
1109
0
            VRTAddSimpleSource(hVRTBand, this, 0, 0, nRasterXSize, nRasterYSize,
1110
0
                               0, 0, nRasterXSize, nRasterYSize, nullptr,
1111
0
                               VRT_NODATA_UNSET);
1112
1113
            /* Add a mask band if needed */
1114
0
            if (GetMaskFlags() != GMF_ALL_VALID)
1115
0
            {
1116
0
                GDALDataset::FromHandle(hVRTDS)->CreateMaskBand(0);
1117
0
                VRTSourcedRasterBand *poVRTMaskBand =
1118
0
                    reinterpret_cast<VRTSourcedRasterBand *>(
1119
0
                        reinterpret_cast<GDALRasterBand *>(hVRTBand)
1120
0
                            ->GetMaskBand());
1121
0
                poVRTMaskBand->AddMaskBandSource(this, 0, 0, nRasterXSize,
1122
0
                                                 nRasterYSize, 0, 0,
1123
0
                                                 nRasterXSize, nRasterYSize);
1124
0
            }
1125
0
        }
1126
1127
0
        GDALWarpOptions *psWarpOptions = GDALCreateWarpOptions();
1128
0
        switch (psExtraArg->eResampleAlg)
1129
0
        {
1130
0
            case GRIORA_NearestNeighbour:
1131
0
                psWarpOptions->eResampleAlg = GRA_NearestNeighbour;
1132
0
                break;
1133
0
            case GRIORA_Bilinear:
1134
0
                psWarpOptions->eResampleAlg = GRA_Bilinear;
1135
0
                break;
1136
0
            case GRIORA_Cubic:
1137
0
                psWarpOptions->eResampleAlg = GRA_Cubic;
1138
0
                break;
1139
0
            case GRIORA_CubicSpline:
1140
0
                psWarpOptions->eResampleAlg = GRA_CubicSpline;
1141
0
                break;
1142
0
            case GRIORA_Lanczos:
1143
0
                psWarpOptions->eResampleAlg = GRA_Lanczos;
1144
0
                break;
1145
0
            case GRIORA_Average:
1146
0
                psWarpOptions->eResampleAlg = GRA_Average;
1147
0
                break;
1148
0
            case GRIORA_RMS:
1149
0
                psWarpOptions->eResampleAlg = GRA_RMS;
1150
0
                break;
1151
0
            case GRIORA_Mode:
1152
0
                psWarpOptions->eResampleAlg = GRA_Mode;
1153
0
                break;
1154
0
            default:
1155
0
                CPLAssert(false);
1156
0
                psWarpOptions->eResampleAlg = GRA_NearestNeighbour;
1157
0
                break;
1158
0
        }
1159
0
        psWarpOptions->hSrcDS = hVRTDS ? hVRTDS : GetDataset();
1160
0
        psWarpOptions->hDstDS = poMEMDS;
1161
0
        psWarpOptions->nBandCount = 1;
1162
0
        int nSrcBandNumber = hVRTDS ? 1 : nBand;
1163
0
        int nDstBandNumber = 1;
1164
0
        psWarpOptions->panSrcBands = &nSrcBandNumber;
1165
0
        psWarpOptions->panDstBands = &nDstBandNumber;
1166
0
        psWarpOptions->pfnProgress = psExtraArg->pfnProgress
1167
0
                                         ? psExtraArg->pfnProgress
1168
0
                                         : GDALDummyProgress;
1169
0
        psWarpOptions->pProgressArg = psExtraArg->pProgressData;
1170
0
        psWarpOptions->pfnTransformer = GDALRasterIOTransformer;
1171
0
        if (bHasNoData)
1172
0
        {
1173
0
            psWarpOptions->papszWarpOptions = CSLSetNameValue(
1174
0
                psWarpOptions->papszWarpOptions, "INIT_DEST", "NO_DATA");
1175
0
            if (psWarpOptions->padfSrcNoDataReal == nullptr)
1176
0
            {
1177
0
                psWarpOptions->padfSrcNoDataReal =
1178
0
                    static_cast<double *>(CPLMalloc(sizeof(double)));
1179
0
                psWarpOptions->padfSrcNoDataReal[0] = dfNoDataValue;
1180
0
            }
1181
1182
0
            if (psWarpOptions->padfDstNoDataReal == nullptr)
1183
0
            {
1184
0
                psWarpOptions->padfDstNoDataReal =
1185
0
                    static_cast<double *>(CPLMalloc(sizeof(double)));
1186
0
                psWarpOptions->padfDstNoDataReal[0] = dfNoDataValue;
1187
0
            }
1188
0
        }
1189
1190
0
        GDALRasterIOTransformerStruct sTransformer;
1191
0
        sTransformer.dfXOff = bHasXOffVirtual ? 0 : dfXOff;
1192
0
        sTransformer.dfYOff = bHasYOffVirtual ? 0 : dfYOff;
1193
0
        sTransformer.dfXRatioDstToSrc = dfXRatioDstToSrc;
1194
0
        sTransformer.dfYRatioDstToSrc = dfYRatioDstToSrc;
1195
0
        psWarpOptions->pTransformerArg = &sTransformer;
1196
1197
0
        GDALWarpOperationH hWarpOperation =
1198
0
            GDALCreateWarpOperation(psWarpOptions);
1199
0
        eErr = GDALChunkAndWarpImage(hWarpOperation, nDestXOffVirtual,
1200
0
                                     nDestYOffVirtual, nBufXSize, nBufYSize);
1201
0
        GDALDestroyWarpOperation(hWarpOperation);
1202
1203
0
        psWarpOptions->panSrcBands = nullptr;
1204
0
        psWarpOptions->panDstBands = nullptr;
1205
0
        GDALDestroyWarpOptions(psWarpOptions);
1206
1207
0
        if (hVRTDS)
1208
0
            GDALClose(hVRTDS);
1209
0
    }
1210
0
    else
1211
0
    {
1212
0
        const char *pszResampling =
1213
0
            (psExtraArg->eResampleAlg == GRIORA_Bilinear)      ? "BILINEAR"
1214
0
            : (psExtraArg->eResampleAlg == GRIORA_Cubic)       ? "CUBIC"
1215
0
            : (psExtraArg->eResampleAlg == GRIORA_CubicSpline) ? "CUBICSPLINE"
1216
0
            : (psExtraArg->eResampleAlg == GRIORA_Lanczos)     ? "LANCZOS"
1217
0
            : (psExtraArg->eResampleAlg == GRIORA_Average)     ? "AVERAGE"
1218
0
            : (psExtraArg->eResampleAlg == GRIORA_RMS)         ? "RMS"
1219
0
            : (psExtraArg->eResampleAlg == GRIORA_Mode)        ? "MODE"
1220
0
            : (psExtraArg->eResampleAlg == GRIORA_Gauss)       ? "GAUSS"
1221
0
                                                               : "UNKNOWN";
1222
1223
0
        int nKernelRadius = 0;
1224
0
        GDALResampleFunction pfnResampleFunc =
1225
0
            GDALGetResampleFunction(pszResampling, &nKernelRadius);
1226
0
        CPLAssert(pfnResampleFunc);
1227
0
        GDALDataType eWrkDataType =
1228
0
            GDALGetOvrWorkDataType(pszResampling, eDataType);
1229
0
        int nHasNoData = 0;
1230
0
        double dfNoDataValue = GetNoDataValue(&nHasNoData);
1231
0
        const bool bHasNoData = CPL_TO_BOOL(nHasNoData);
1232
0
        if (!bHasNoData)
1233
0
            dfNoDataValue = 0.0;
1234
1235
0
        int nDstBlockXSize = nBufXSize;
1236
0
        int nDstBlockYSize = nBufYSize;
1237
0
        int nFullResXChunk = 0;
1238
0
        int nFullResYChunk = 0;
1239
0
        while (true)
1240
0
        {
1241
0
            nFullResXChunk =
1242
0
                3 + static_cast<int>(nDstBlockXSize * dfXRatioDstToSrc);
1243
0
            nFullResYChunk =
1244
0
                3 + static_cast<int>(nDstBlockYSize * dfYRatioDstToSrc);
1245
0
            if (nFullResXChunk > nRasterXSize)
1246
0
                nFullResXChunk = nRasterXSize;
1247
0
            if (nFullResYChunk > nRasterYSize)
1248
0
                nFullResYChunk = nRasterYSize;
1249
0
            if ((nDstBlockXSize == 1 && nDstBlockYSize == 1) ||
1250
0
                (static_cast<GIntBig>(nFullResXChunk) * nFullResYChunk <=
1251
0
                 1024 * 1024))
1252
0
                break;
1253
            // When operating on the full width of a raster whose block width is
1254
            // the raster width, prefer doing chunks in height.
1255
0
            if (nFullResXChunk >= nXSize && nXSize == nBlockXSize &&
1256
0
                nDstBlockYSize > 1)
1257
0
                nDstBlockYSize /= 2;
1258
            /* Otherwise cut the maximal dimension */
1259
0
            else if (nDstBlockXSize > 1 &&
1260
0
                     (nFullResXChunk > nFullResYChunk || nDstBlockYSize == 1))
1261
0
                nDstBlockXSize /= 2;
1262
0
            else
1263
0
                nDstBlockYSize /= 2;
1264
0
        }
1265
1266
0
        int nOvrXFactor = static_cast<int>(0.5 + dfXRatioDstToSrc);
1267
0
        int nOvrYFactor = static_cast<int>(0.5 + dfYRatioDstToSrc);
1268
0
        if (nOvrXFactor == 0)
1269
0
            nOvrXFactor = 1;
1270
0
        if (nOvrYFactor == 0)
1271
0
            nOvrYFactor = 1;
1272
0
        int nFullResXSizeQueried =
1273
0
            nFullResXChunk + 2 * nKernelRadius * nOvrXFactor;
1274
0
        int nFullResYSizeQueried =
1275
0
            nFullResYChunk + 2 * nKernelRadius * nOvrYFactor;
1276
1277
0
        if (nFullResXSizeQueried > nRasterXSize)
1278
0
            nFullResXSizeQueried = nRasterXSize;
1279
0
        if (nFullResYSizeQueried > nRasterYSize)
1280
0
            nFullResYSizeQueried = nRasterYSize;
1281
1282
0
        void *pChunk =
1283
0
            VSI_MALLOC3_VERBOSE(GDALGetDataTypeSizeBytes(eWrkDataType),
1284
0
                                nFullResXSizeQueried, nFullResYSizeQueried);
1285
0
        GByte *pabyChunkNoDataMask = nullptr;
1286
1287
0
        GDALRasterBand *poMaskBand = GetMaskBand();
1288
0
        int l_nMaskFlags = GetMaskFlags();
1289
1290
0
        bool bUseNoDataMask = ((l_nMaskFlags & GMF_ALL_VALID) == 0);
1291
0
        if (bUseNoDataMask)
1292
0
        {
1293
0
            pabyChunkNoDataMask = static_cast<GByte *>(VSI_MALLOC2_VERBOSE(
1294
0
                nFullResXSizeQueried, nFullResYSizeQueried));
1295
0
        }
1296
0
        if (pChunk == nullptr ||
1297
0
            (bUseNoDataMask && pabyChunkNoDataMask == nullptr))
1298
0
        {
1299
0
            GDALClose(poMEMDS);
1300
0
            CPLFree(pChunk);
1301
0
            CPLFree(pabyChunkNoDataMask);
1302
0
            VSIFree(pTempBuffer);
1303
0
            return CE_Failure;
1304
0
        }
1305
1306
0
        const int nTotalBlocks = DIV_ROUND_UP(nBufXSize, nDstBlockXSize) *
1307
0
                                 DIV_ROUND_UP(nBufYSize, nDstBlockYSize);
1308
0
        int nBlocksDone = 0;
1309
1310
0
        int nDstYOff;
1311
0
        for (nDstYOff = 0; nDstYOff < nBufYSize && eErr == CE_None;
1312
0
             nDstYOff += nDstBlockYSize)
1313
0
        {
1314
0
            int nDstYCount;
1315
0
            if (nDstYOff + nDstBlockYSize <= nBufYSize)
1316
0
                nDstYCount = nDstBlockYSize;
1317
0
            else
1318
0
                nDstYCount = nBufYSize - nDstYOff;
1319
1320
0
            int nChunkYOff =
1321
0
                nYOff + static_cast<int>(nDstYOff * dfYRatioDstToSrc);
1322
0
            int nChunkYOff2 = nYOff + 1 +
1323
0
                              static_cast<int>(ceil((nDstYOff + nDstYCount) *
1324
0
                                                    dfYRatioDstToSrc));
1325
0
            if (nChunkYOff2 > nRasterYSize)
1326
0
                nChunkYOff2 = nRasterYSize;
1327
0
            int nYCount = nChunkYOff2 - nChunkYOff;
1328
0
            CPLAssert(nYCount <= nFullResYChunk);
1329
1330
0
            int nChunkYOffQueried = nChunkYOff - nKernelRadius * nOvrYFactor;
1331
0
            int nChunkYSizeQueried = nYCount + 2 * nKernelRadius * nOvrYFactor;
1332
0
            if (nChunkYOffQueried < 0)
1333
0
            {
1334
0
                nChunkYSizeQueried += nChunkYOffQueried;
1335
0
                nChunkYOffQueried = 0;
1336
0
            }
1337
0
            if (nChunkYSizeQueried + nChunkYOffQueried > nRasterYSize)
1338
0
                nChunkYSizeQueried = nRasterYSize - nChunkYOffQueried;
1339
0
            CPLAssert(nChunkYSizeQueried <= nFullResYSizeQueried);
1340
1341
0
            int nDstXOff = 0;
1342
0
            for (nDstXOff = 0; nDstXOff < nBufXSize && eErr == CE_None;
1343
0
                 nDstXOff += nDstBlockXSize)
1344
0
            {
1345
0
                int nDstXCount = 0;
1346
0
                if (nDstXOff + nDstBlockXSize <= nBufXSize)
1347
0
                    nDstXCount = nDstBlockXSize;
1348
0
                else
1349
0
                    nDstXCount = nBufXSize - nDstXOff;
1350
1351
0
                int nChunkXOff =
1352
0
                    nXOff + static_cast<int>(nDstXOff * dfXRatioDstToSrc);
1353
0
                int nChunkXOff2 =
1354
0
                    nXOff + 1 +
1355
0
                    static_cast<int>(
1356
0
                        ceil((nDstXOff + nDstXCount) * dfXRatioDstToSrc));
1357
0
                if (nChunkXOff2 > nRasterXSize)
1358
0
                    nChunkXOff2 = nRasterXSize;
1359
0
                int nXCount = nChunkXOff2 - nChunkXOff;
1360
0
                CPLAssert(nXCount <= nFullResXChunk);
1361
1362
0
                int nChunkXOffQueried =
1363
0
                    nChunkXOff - nKernelRadius * nOvrXFactor;
1364
0
                int nChunkXSizeQueried =
1365
0
                    nXCount + 2 * nKernelRadius * nOvrXFactor;
1366
0
                if (nChunkXOffQueried < 0)
1367
0
                {
1368
0
                    nChunkXSizeQueried += nChunkXOffQueried;
1369
0
                    nChunkXOffQueried = 0;
1370
0
                }
1371
0
                if (nChunkXSizeQueried + nChunkXOffQueried > nRasterXSize)
1372
0
                    nChunkXSizeQueried = nRasterXSize - nChunkXOffQueried;
1373
0
                CPLAssert(nChunkXSizeQueried <= nFullResXSizeQueried);
1374
1375
                // Read the source buffers.
1376
0
                eErr = RasterIO(GF_Read, nChunkXOffQueried, nChunkYOffQueried,
1377
0
                                nChunkXSizeQueried, nChunkYSizeQueried, pChunk,
1378
0
                                nChunkXSizeQueried, nChunkYSizeQueried,
1379
0
                                eWrkDataType, 0, 0, nullptr);
1380
1381
0
                bool bSkipResample = false;
1382
0
                bool bNoDataMaskFullyOpaque = false;
1383
0
                if (eErr == CE_None && bUseNoDataMask)
1384
0
                {
1385
0
                    eErr = poMaskBand->RasterIO(
1386
0
                        GF_Read, nChunkXOffQueried, nChunkYOffQueried,
1387
0
                        nChunkXSizeQueried, nChunkYSizeQueried,
1388
0
                        pabyChunkNoDataMask, nChunkXSizeQueried,
1389
0
                        nChunkYSizeQueried, GDT_Byte, 0, 0, nullptr);
1390
1391
                    /* Optimizations if mask if fully opaque or transparent */
1392
0
                    int nPixels = nChunkXSizeQueried * nChunkYSizeQueried;
1393
0
                    GByte bVal = pabyChunkNoDataMask[0];
1394
0
                    int i = 1;
1395
0
                    for (; i < nPixels; i++)
1396
0
                    {
1397
0
                        if (pabyChunkNoDataMask[i] != bVal)
1398
0
                            break;
1399
0
                    }
1400
0
                    if (i == nPixels)
1401
0
                    {
1402
0
                        if (bVal == 0)
1403
0
                        {
1404
0
                            for (int j = 0; j < nDstYCount; j++)
1405
0
                            {
1406
0
                                GDALCopyWords64(&dfNoDataValue, GDT_Float64, 0,
1407
0
                                                static_cast<GByte *>(pDataMem) +
1408
0
                                                    nLSMem * (j + nDstYOff) +
1409
0
                                                    nDstXOff * nPSMem,
1410
0
                                                eDTMem,
1411
0
                                                static_cast<int>(nPSMem),
1412
0
                                                nDstXCount);
1413
0
                            }
1414
0
                            bSkipResample = true;
1415
0
                        }
1416
0
                        else
1417
0
                        {
1418
0
                            bNoDataMaskFullyOpaque = true;
1419
0
                        }
1420
0
                    }
1421
0
                }
1422
1423
0
                if (!bSkipResample && eErr == CE_None)
1424
0
                {
1425
0
                    const bool bPropagateNoData = false;
1426
0
                    void *pDstBuffer = nullptr;
1427
0
                    GDALDataType eDstBufferDataType = GDT_Unknown;
1428
0
                    GDALRasterBand *poMEMBand =
1429
0
                        GDALRasterBand::FromHandle(hMEMBand);
1430
0
                    GDALOverviewResampleArgs args;
1431
0
                    args.eSrcDataType = eDataType;
1432
0
                    args.eOvrDataType = poMEMBand->GetRasterDataType();
1433
0
                    args.nOvrXSize = poMEMBand->GetXSize();
1434
0
                    args.nOvrYSize = poMEMBand->GetYSize();
1435
0
                    args.nOvrNBITS = nNBITS;
1436
0
                    args.dfXRatioDstToSrc = dfXRatioDstToSrc;
1437
0
                    args.dfYRatioDstToSrc = dfYRatioDstToSrc;
1438
0
                    args.dfSrcXDelta =
1439
0
                        dfXOff - nXOff; /* == 0 if bHasXOffVirtual */
1440
0
                    args.dfSrcYDelta =
1441
0
                        dfYOff - nYOff; /* == 0 if bHasYOffVirtual */
1442
0
                    args.eWrkDataType = eWrkDataType;
1443
0
                    args.pabyChunkNodataMask =
1444
0
                        bNoDataMaskFullyOpaque ? nullptr : pabyChunkNoDataMask;
1445
0
                    args.nChunkXOff =
1446
0
                        nChunkXOffQueried - (bHasXOffVirtual ? 0 : nXOff);
1447
0
                    args.nChunkXSize = nChunkXSizeQueried;
1448
0
                    args.nChunkYOff =
1449
0
                        nChunkYOffQueried - (bHasYOffVirtual ? 0 : nYOff);
1450
0
                    args.nChunkYSize = nChunkYSizeQueried;
1451
0
                    args.nDstXOff = nDstXOff + nDestXOffVirtual;
1452
0
                    args.nDstXOff2 = nDstXOff + nDestXOffVirtual + nDstXCount;
1453
0
                    args.nDstYOff = nDstYOff + nDestYOffVirtual;
1454
0
                    args.nDstYOff2 = nDstYOff + nDestYOffVirtual + nDstYCount;
1455
0
                    args.pszResampling = pszResampling;
1456
0
                    args.bHasNoData = bHasNoData;
1457
0
                    args.dfNoDataValue = dfNoDataValue;
1458
0
                    args.poColorTable = GetColorTable();
1459
0
                    args.bPropagateNoData = bPropagateNoData;
1460
0
                    eErr = pfnResampleFunc(args, pChunk, &pDstBuffer,
1461
0
                                           &eDstBufferDataType);
1462
0
                    if (eErr == CE_None)
1463
0
                    {
1464
0
                        eErr = poMEMBand->RasterIO(
1465
0
                            GF_Write, nDstXOff + nDestXOffVirtual,
1466
0
                            nDstYOff + nDestYOffVirtual, nDstXCount, nDstYCount,
1467
0
                            pDstBuffer, nDstXCount, nDstYCount,
1468
0
                            eDstBufferDataType, 0, 0, nullptr);
1469
0
                    }
1470
0
                    CPLFree(pDstBuffer);
1471
0
                }
1472
1473
0
                nBlocksDone++;
1474
0
                if (eErr == CE_None && psExtraArg->pfnProgress != nullptr &&
1475
0
                    !psExtraArg->pfnProgress(1.0 * nBlocksDone / nTotalBlocks,
1476
0
                                             "", psExtraArg->pProgressData))
1477
0
                {
1478
0
                    eErr = CE_Failure;
1479
0
                }
1480
0
            }
1481
0
        }
1482
1483
0
        CPLFree(pChunk);
1484
0
        CPLFree(pabyChunkNoDataMask);
1485
0
    }
1486
1487
0
    if (eBufType != eDataType)
1488
0
    {
1489
0
        CPL_IGNORE_RET_VAL(poMEMDS->GetRasterBand(1)->RasterIO(
1490
0
            GF_Read, nDestXOffVirtual, nDestYOffVirtual, nBufXSize, nBufYSize,
1491
0
            pData, nBufXSize, nBufYSize, eBufType, nPixelSpace, nLineSpace,
1492
0
            nullptr));
1493
0
    }
1494
0
    GDALClose(poMEMDS);
1495
0
    VSIFree(pTempBuffer);
1496
1497
0
    return eErr;
1498
0
}
1499
1500
/************************************************************************/
1501
/*                          RasterIOResampled()                         */
1502
/************************************************************************/
1503
1504
CPLErr GDALDataset::RasterIOResampled(
1505
    GDALRWFlag /* eRWFlag */, int nXOff, int nYOff, int nXSize, int nYSize,
1506
    void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
1507
    int nBandCount, const int *panBandMap, GSpacing nPixelSpace,
1508
    GSpacing nLineSpace, GSpacing nBandSpace, GDALRasterIOExtraArg *psExtraArg)
1509
1510
0
{
1511
#if 0
1512
    // Determine if we use warping resampling or overview resampling
1513
    bool bUseWarp = false;
1514
    if( GDALDataTypeIsComplex( eDataType ) )
1515
        bUseWarp = true;
1516
#endif
1517
1518
0
    double dfXOff = nXOff;
1519
0
    double dfYOff = nYOff;
1520
0
    double dfXSize = nXSize;
1521
0
    double dfYSize = nYSize;
1522
0
    if (psExtraArg->bFloatingPointWindowValidity)
1523
0
    {
1524
0
        dfXOff = psExtraArg->dfXOff;
1525
0
        dfYOff = psExtraArg->dfYOff;
1526
0
        dfXSize = psExtraArg->dfXSize;
1527
0
        dfYSize = psExtraArg->dfYSize;
1528
0
    }
1529
1530
0
    const double dfXRatioDstToSrc = dfXSize / nBufXSize;
1531
0
    const double dfYRatioDstToSrc = dfYSize / nBufYSize;
1532
1533
    // Determine the coordinates in the "virtual" output raster to see
1534
    // if there are not integers, in which case we will use them as a shift
1535
    // so that subwindow extracts give the exact same results as entire raster
1536
    // scaling.
1537
0
    double dfDestXOff = dfXOff / dfXRatioDstToSrc;
1538
0
    bool bHasXOffVirtual = false;
1539
0
    int nDestXOffVirtual = 0;
1540
0
    if (fabs(dfDestXOff - static_cast<int>(dfDestXOff + 0.5)) < 1e-8)
1541
0
    {
1542
0
        bHasXOffVirtual = true;
1543
0
        dfXOff = nXOff;
1544
0
        nDestXOffVirtual = static_cast<int>(dfDestXOff + 0.5);
1545
0
    }
1546
1547
0
    double dfDestYOff = dfYOff / dfYRatioDstToSrc;
1548
0
    bool bHasYOffVirtual = false;
1549
0
    int nDestYOffVirtual = 0;
1550
0
    if (fabs(dfDestYOff - static_cast<int>(dfDestYOff + 0.5)) < 1e-8)
1551
0
    {
1552
0
        bHasYOffVirtual = true;
1553
0
        dfYOff = nYOff;
1554
0
        nDestYOffVirtual = static_cast<int>(dfDestYOff + 0.5);
1555
0
    }
1556
1557
    // Create a MEM dataset that wraps the output buffer.
1558
0
    GDALDataset *poMEMDS =
1559
0
        MEMDataset::Create("", nDestXOffVirtual + nBufXSize,
1560
0
                           nDestYOffVirtual + nBufYSize, 0, eBufType, nullptr);
1561
0
    GDALRasterBand **papoDstBands = static_cast<GDALRasterBand **>(
1562
0
        CPLMalloc(nBandCount * sizeof(GDALRasterBand *)));
1563
0
    int nNBITS = 0;
1564
0
    for (int i = 0; i < nBandCount; i++)
1565
0
    {
1566
0
        char szBuffer[32] = {'\0'};
1567
0
        int nRet = CPLPrintPointer(
1568
0
            szBuffer,
1569
0
            static_cast<GByte *>(pData) - nPixelSpace * nDestXOffVirtual -
1570
0
                nLineSpace * nDestYOffVirtual + nBandSpace * i,
1571
0
            sizeof(szBuffer));
1572
0
        szBuffer[nRet] = 0;
1573
1574
0
        char szBuffer0[64] = {'\0'};
1575
0
        snprintf(szBuffer0, sizeof(szBuffer0), "DATAPOINTER=%s", szBuffer);
1576
1577
0
        char szBuffer1[64] = {'\0'};
1578
0
        snprintf(szBuffer1, sizeof(szBuffer1), "PIXELOFFSET=" CPL_FRMT_GIB,
1579
0
                 static_cast<GIntBig>(nPixelSpace));
1580
1581
0
        char szBuffer2[64] = {'\0'};
1582
0
        snprintf(szBuffer2, sizeof(szBuffer2), "LINEOFFSET=" CPL_FRMT_GIB,
1583
0
                 static_cast<GIntBig>(nLineSpace));
1584
1585
0
        char *apszOptions[4] = {szBuffer0, szBuffer1, szBuffer2, nullptr};
1586
1587
0
        poMEMDS->AddBand(eBufType, apszOptions);
1588
1589
0
        GDALRasterBand *poSrcBand = GetRasterBand(panBandMap[i]);
1590
0
        papoDstBands[i] = poMEMDS->GetRasterBand(i + 1);
1591
0
        const char *pszNBITS =
1592
0
            poSrcBand->GetMetadataItem("NBITS", "IMAGE_STRUCTURE");
1593
0
        if (pszNBITS)
1594
0
        {
1595
0
            nNBITS = atoi(pszNBITS);
1596
0
            poMEMDS->GetRasterBand(i + 1)->SetMetadataItem("NBITS", pszNBITS,
1597
0
                                                           "IMAGE_STRUCTURE");
1598
0
        }
1599
0
    }
1600
1601
0
    CPLErr eErr = CE_None;
1602
1603
    // TODO(schwehr): Why disabled?  Why not just delete?
1604
    // Looks like this code was initially added as disable by copying
1605
    // from RasterIO here:
1606
    // https://trac.osgeo.org/gdal/changeset/29572
1607
#if 0
1608
    // Do the resampling.
1609
    if( bUseWarp )
1610
    {
1611
        VRTDatasetH hVRTDS = nullptr;
1612
        GDALRasterBandH hVRTBand = nullptr;
1613
        if( GetDataset() == nullptr )
1614
        {
1615
            /* Create VRT dataset that wraps the whole dataset */
1616
            hVRTDS = VRTCreate(nRasterXSize, nRasterYSize);
1617
            VRTAddBand( hVRTDS, eDataType, nullptr );
1618
            hVRTBand = GDALGetRasterBand(hVRTDS, 1);
1619
            VRTAddSimpleSource( (VRTSourcedRasterBandH)hVRTBand,
1620
                                (GDALRasterBandH)this,
1621
                                0, 0,
1622
                                nRasterXSize, nRasterYSize,
1623
                                0, 0,
1624
                                nRasterXSize, nRasterYSize,
1625
                                nullptr, VRT_NODATA_UNSET );
1626
1627
            /* Add a mask band if needed */
1628
            if( GetMaskFlags() != GMF_ALL_VALID )
1629
            {
1630
                ((GDALDataset*)hVRTDS)->CreateMaskBand(0);
1631
                VRTSourcedRasterBand* poVRTMaskBand =
1632
                    (VRTSourcedRasterBand*)(((GDALRasterBand*)hVRTBand)->GetMaskBand());
1633
                poVRTMaskBand->
1634
                    AddMaskBandSource( this,
1635
                                    0, 0,
1636
                                    nRasterXSize, nRasterYSize,
1637
                                    0, 0,
1638
                                    nRasterXSize, nRasterYSize);
1639
            }
1640
        }
1641
1642
        GDALWarpOptions* psWarpOptions = GDALCreateWarpOptions();
1643
        psWarpOptions->eResampleAlg = (GDALResampleAlg)psExtraArg->eResampleAlg;
1644
        psWarpOptions->hSrcDS = (GDALDatasetH) (hVRTDS ? hVRTDS : GetDataset());
1645
        psWarpOptions->hDstDS = (GDALDatasetH) poMEMDS;
1646
        psWarpOptions->nBandCount = 1;
1647
        int nSrcBandNumber = (hVRTDS ? 1 : nBand);
1648
        int nDstBandNumber = 1;
1649
        psWarpOptions->panSrcBands = &nSrcBandNumber;
1650
        psWarpOptions->panDstBands = &nDstBandNumber;
1651
        psWarpOptions->pfnProgress = psExtraArg->pfnProgress ?
1652
                    psExtraArg->pfnProgress : GDALDummyProgress;
1653
        psWarpOptions->pProgressArg = psExtraArg->pProgressData;
1654
        psWarpOptions->pfnTransformer = GDALRasterIOTransformer;
1655
        GDALRasterIOTransformerStruct sTransformer;
1656
        sTransformer.dfXOff = bHasXOffVirtual ? 0 : dfXOff;
1657
        sTransformer.dfYOff = bHasYOffVirtual ? 0 : dfYOff;
1658
        sTransformer.dfXRatioDstToSrc = dfXRatioDstToSrc;
1659
        sTransformer.dfYRatioDstToSrc = dfYRatioDstToSrc;
1660
        psWarpOptions->pTransformerArg = &sTransformer;
1661
1662
        GDALWarpOperationH hWarpOperation = GDALCreateWarpOperation(psWarpOptions);
1663
        eErr = GDALChunkAndWarpImage( hWarpOperation,
1664
                                      nDestXOffVirtual, nDestYOffVirtual,
1665
                                      nBufXSize, nBufYSize );
1666
        GDALDestroyWarpOperation( hWarpOperation );
1667
1668
        psWarpOptions->panSrcBands = nullptr;
1669
        psWarpOptions->panDstBands = nullptr;
1670
        GDALDestroyWarpOptions( psWarpOptions );
1671
1672
        if( hVRTDS )
1673
            GDALClose(hVRTDS);
1674
    }
1675
    else
1676
#endif
1677
0
    {
1678
0
        const char *pszResampling =
1679
0
            (psExtraArg->eResampleAlg == GRIORA_Bilinear)      ? "BILINEAR"
1680
0
            : (psExtraArg->eResampleAlg == GRIORA_Cubic)       ? "CUBIC"
1681
0
            : (psExtraArg->eResampleAlg == GRIORA_CubicSpline) ? "CUBICSPLINE"
1682
0
            : (psExtraArg->eResampleAlg == GRIORA_Lanczos)     ? "LANCZOS"
1683
0
            : (psExtraArg->eResampleAlg == GRIORA_Average)     ? "AVERAGE"
1684
0
            : (psExtraArg->eResampleAlg == GRIORA_RMS)         ? "RMS"
1685
0
            : (psExtraArg->eResampleAlg == GRIORA_Mode)        ? "MODE"
1686
0
            : (psExtraArg->eResampleAlg == GRIORA_Gauss)       ? "GAUSS"
1687
0
                                                               : "UNKNOWN";
1688
1689
0
        GDALRasterBand *poFirstSrcBand = GetRasterBand(panBandMap[0]);
1690
0
        GDALDataType eDataType = poFirstSrcBand->GetRasterDataType();
1691
0
        int nBlockXSize, nBlockYSize;
1692
0
        poFirstSrcBand->GetBlockSize(&nBlockXSize, &nBlockYSize);
1693
1694
0
        int nKernelRadius;
1695
0
        GDALResampleFunction pfnResampleFunc =
1696
0
            GDALGetResampleFunction(pszResampling, &nKernelRadius);
1697
0
        CPLAssert(pfnResampleFunc);
1698
#ifdef GDAL_ENABLE_RESAMPLING_MULTIBAND
1699
        GDALResampleFunctionMultiBands pfnResampleFuncMultiBands =
1700
            GDALGetResampleFunctionMultiBands(pszResampling, &nKernelRadius);
1701
#endif
1702
0
        GDALDataType eWrkDataType =
1703
0
            GDALGetOvrWorkDataType(pszResampling, eDataType);
1704
1705
0
        int nDstBlockXSize = nBufXSize;
1706
0
        int nDstBlockYSize = nBufYSize;
1707
0
        int nFullResXChunk, nFullResYChunk;
1708
0
        while (true)
1709
0
        {
1710
0
            nFullResXChunk =
1711
0
                3 + static_cast<int>(nDstBlockXSize * dfXRatioDstToSrc);
1712
0
            nFullResYChunk =
1713
0
                3 + static_cast<int>(nDstBlockYSize * dfYRatioDstToSrc);
1714
0
            if (nFullResXChunk > nRasterXSize)
1715
0
                nFullResXChunk = nRasterXSize;
1716
0
            if (nFullResYChunk > nRasterYSize)
1717
0
                nFullResYChunk = nRasterYSize;
1718
0
            if ((nDstBlockXSize == 1 && nDstBlockYSize == 1) ||
1719
0
                (static_cast<GIntBig>(nFullResXChunk) * nFullResYChunk <=
1720
0
                 1024 * 1024))
1721
0
                break;
1722
            // When operating on the full width of a raster whose block width is
1723
            // the raster width, prefer doing chunks in height.
1724
0
            if (nFullResXChunk >= nXSize && nXSize == nBlockXSize &&
1725
0
                nDstBlockYSize > 1)
1726
0
                nDstBlockYSize /= 2;
1727
            /* Otherwise cut the maximal dimension */
1728
0
            else if (nDstBlockXSize > 1 &&
1729
0
                     (nFullResXChunk > nFullResYChunk || nDstBlockYSize == 1))
1730
0
                nDstBlockXSize /= 2;
1731
0
            else
1732
0
                nDstBlockYSize /= 2;
1733
0
        }
1734
1735
0
        int nOvrFactor = std::max(static_cast<int>(0.5 + dfXRatioDstToSrc),
1736
0
                                  static_cast<int>(0.5 + dfYRatioDstToSrc));
1737
0
        if (nOvrFactor == 0)
1738
0
            nOvrFactor = 1;
1739
0
        int nFullResXSizeQueried =
1740
0
            nFullResXChunk + 2 * nKernelRadius * nOvrFactor;
1741
0
        int nFullResYSizeQueried =
1742
0
            nFullResYChunk + 2 * nKernelRadius * nOvrFactor;
1743
1744
0
        if (nFullResXSizeQueried > nRasterXSize)
1745
0
            nFullResXSizeQueried = nRasterXSize;
1746
0
        if (nFullResYSizeQueried > nRasterYSize)
1747
0
            nFullResYSizeQueried = nRasterYSize;
1748
1749
0
        void *pChunk = VSI_MALLOC3_VERBOSE(
1750
0
            cpl::fits_on<int>(GDALGetDataTypeSizeBytes(eWrkDataType) *
1751
0
                              nBandCount),
1752
0
            nFullResXSizeQueried, nFullResYSizeQueried);
1753
0
        GByte *pabyChunkNoDataMask = nullptr;
1754
1755
0
        GDALRasterBand *poMaskBand = poFirstSrcBand->GetMaskBand();
1756
0
        int nMaskFlags = poFirstSrcBand->GetMaskFlags();
1757
1758
0
        bool bUseNoDataMask = ((nMaskFlags & GMF_ALL_VALID) == 0);
1759
0
        if (bUseNoDataMask)
1760
0
        {
1761
0
            pabyChunkNoDataMask = static_cast<GByte *>(VSI_MALLOC2_VERBOSE(
1762
0
                nFullResXSizeQueried, nFullResYSizeQueried));
1763
0
        }
1764
0
        if (pChunk == nullptr ||
1765
0
            (bUseNoDataMask && pabyChunkNoDataMask == nullptr))
1766
0
        {
1767
0
            GDALClose(poMEMDS);
1768
0
            CPLFree(pChunk);
1769
0
            CPLFree(pabyChunkNoDataMask);
1770
0
            CPLFree(papoDstBands);
1771
0
            return CE_Failure;
1772
0
        }
1773
1774
0
        const int nTotalBlocks = DIV_ROUND_UP(nBufXSize, nDstBlockXSize) *
1775
0
                                 DIV_ROUND_UP(nBufYSize, nDstBlockYSize);
1776
0
        int nBlocksDone = 0;
1777
1778
0
        int nDstYOff;
1779
0
        for (nDstYOff = 0; nDstYOff < nBufYSize && eErr == CE_None;
1780
0
             nDstYOff += nDstBlockYSize)
1781
0
        {
1782
0
            int nDstYCount;
1783
0
            if (nDstYOff + nDstBlockYSize <= nBufYSize)
1784
0
                nDstYCount = nDstBlockYSize;
1785
0
            else
1786
0
                nDstYCount = nBufYSize - nDstYOff;
1787
1788
0
            int nChunkYOff =
1789
0
                nYOff + static_cast<int>(nDstYOff * dfYRatioDstToSrc);
1790
0
            int nChunkYOff2 = nYOff + 1 +
1791
0
                              static_cast<int>(ceil((nDstYOff + nDstYCount) *
1792
0
                                                    dfYRatioDstToSrc));
1793
0
            if (nChunkYOff2 > nRasterYSize)
1794
0
                nChunkYOff2 = nRasterYSize;
1795
0
            int nYCount = nChunkYOff2 - nChunkYOff;
1796
0
            CPLAssert(nYCount <= nFullResYChunk);
1797
1798
0
            int nChunkYOffQueried = nChunkYOff - nKernelRadius * nOvrFactor;
1799
0
            int nChunkYSizeQueried = nYCount + 2 * nKernelRadius * nOvrFactor;
1800
0
            if (nChunkYOffQueried < 0)
1801
0
            {
1802
0
                nChunkYSizeQueried += nChunkYOffQueried;
1803
0
                nChunkYOffQueried = 0;
1804
0
            }
1805
0
            if (nChunkYSizeQueried + nChunkYOffQueried > nRasterYSize)
1806
0
                nChunkYSizeQueried = nRasterYSize - nChunkYOffQueried;
1807
0
            CPLAssert(nChunkYSizeQueried <= nFullResYSizeQueried);
1808
1809
0
            int nDstXOff;
1810
0
            for (nDstXOff = 0; nDstXOff < nBufXSize && eErr == CE_None;
1811
0
                 nDstXOff += nDstBlockXSize)
1812
0
            {
1813
0
                int nDstXCount;
1814
0
                if (nDstXOff + nDstBlockXSize <= nBufXSize)
1815
0
                    nDstXCount = nDstBlockXSize;
1816
0
                else
1817
0
                    nDstXCount = nBufXSize - nDstXOff;
1818
1819
0
                int nChunkXOff =
1820
0
                    nXOff + static_cast<int>(nDstXOff * dfXRatioDstToSrc);
1821
0
                int nChunkXOff2 =
1822
0
                    nXOff + 1 +
1823
0
                    static_cast<int>(
1824
0
                        ceil((nDstXOff + nDstXCount) * dfXRatioDstToSrc));
1825
0
                if (nChunkXOff2 > nRasterXSize)
1826
0
                    nChunkXOff2 = nRasterXSize;
1827
0
                int nXCount = nChunkXOff2 - nChunkXOff;
1828
0
                CPLAssert(nXCount <= nFullResXChunk);
1829
1830
0
                int nChunkXOffQueried = nChunkXOff - nKernelRadius * nOvrFactor;
1831
0
                int nChunkXSizeQueried =
1832
0
                    nXCount + 2 * nKernelRadius * nOvrFactor;
1833
0
                if (nChunkXOffQueried < 0)
1834
0
                {
1835
0
                    nChunkXSizeQueried += nChunkXOffQueried;
1836
0
                    nChunkXOffQueried = 0;
1837
0
                }
1838
0
                if (nChunkXSizeQueried + nChunkXOffQueried > nRasterXSize)
1839
0
                    nChunkXSizeQueried = nRasterXSize - nChunkXOffQueried;
1840
0
                CPLAssert(nChunkXSizeQueried <= nFullResXSizeQueried);
1841
1842
0
                bool bSkipResample = false;
1843
0
                bool bNoDataMaskFullyOpaque = false;
1844
0
                if (eErr == CE_None && bUseNoDataMask)
1845
0
                {
1846
0
                    eErr = poMaskBand->RasterIO(
1847
0
                        GF_Read, nChunkXOffQueried, nChunkYOffQueried,
1848
0
                        nChunkXSizeQueried, nChunkYSizeQueried,
1849
0
                        pabyChunkNoDataMask, nChunkXSizeQueried,
1850
0
                        nChunkYSizeQueried, GDT_Byte, 0, 0, nullptr);
1851
1852
                    /* Optimizations if mask if fully opaque or transparent */
1853
0
                    const int nPixels = nChunkXSizeQueried * nChunkYSizeQueried;
1854
0
                    const GByte bVal = pabyChunkNoDataMask[0];
1855
0
                    int i = 1;  // Used after for.
1856
0
                    for (; i < nPixels; i++)
1857
0
                    {
1858
0
                        if (pabyChunkNoDataMask[i] != bVal)
1859
0
                            break;
1860
0
                    }
1861
0
                    if (i == nPixels)
1862
0
                    {
1863
0
                        if (bVal == 0)
1864
0
                        {
1865
0
                            GByte abyZero[16] = {0};
1866
0
                            for (int iBand = 0; iBand < nBandCount; iBand++)
1867
0
                            {
1868
0
                                for (int j = 0; j < nDstYCount; j++)
1869
0
                                {
1870
0
                                    GDALCopyWords64(
1871
0
                                        abyZero, GDT_Byte, 0,
1872
0
                                        static_cast<GByte *>(pData) +
1873
0
                                            iBand * nBandSpace +
1874
0
                                            nLineSpace * (j + nDstYOff) +
1875
0
                                            nDstXOff * nPixelSpace,
1876
0
                                        eBufType, static_cast<int>(nPixelSpace),
1877
0
                                        nDstXCount);
1878
0
                                }
1879
0
                            }
1880
0
                            bSkipResample = true;
1881
0
                        }
1882
0
                        else
1883
0
                        {
1884
0
                            bNoDataMaskFullyOpaque = true;
1885
0
                        }
1886
0
                    }
1887
0
                }
1888
1889
0
                if (!bSkipResample && eErr == CE_None)
1890
0
                {
1891
                    /* Read the source buffers */
1892
0
                    eErr = RasterIO(
1893
0
                        GF_Read, nChunkXOffQueried, nChunkYOffQueried,
1894
0
                        nChunkXSizeQueried, nChunkYSizeQueried, pChunk,
1895
0
                        nChunkXSizeQueried, nChunkYSizeQueried, eWrkDataType,
1896
0
                        nBandCount, panBandMap, 0, 0, 0, nullptr);
1897
0
                }
1898
1899
#ifdef GDAL_ENABLE_RESAMPLING_MULTIBAND
1900
                if (pfnResampleFuncMultiBands && !bSkipResample &&
1901
                    eErr == CE_None)
1902
                {
1903
                    eErr = pfnResampleFuncMultiBands(
1904
                        dfXRatioDstToSrc, dfYRatioDstToSrc,
1905
                        dfXOff - nXOff, /* == 0 if bHasXOffVirtual */
1906
                        dfYOff - nYOff, /* == 0 if bHasYOffVirtual */
1907
                        eWrkDataType, (GByte *)pChunk, nBandCount,
1908
                        bNoDataMaskFullyOpaque ? nullptr : pabyChunkNoDataMask,
1909
                        nChunkXOffQueried - (bHasXOffVirtual ? 0 : nXOff),
1910
                        nChunkXSizeQueried,
1911
                        nChunkYOffQueried - (bHasYOffVirtual ? 0 : nYOff),
1912
                        nChunkYSizeQueried, nDstXOff + nDestXOffVirtual,
1913
                        nDstXOff + nDestXOffVirtual + nDstXCount,
1914
                        nDstYOff + nDestYOffVirtual,
1915
                        nDstYOff + nDestYOffVirtual + nDstYCount, papoDstBands,
1916
                        pszResampling, FALSE /*bHasNoData*/,
1917
                        0.0 /* dfNoDataValue */, nullptr /* color table*/,
1918
                        eDataType);
1919
                }
1920
                else
1921
#endif
1922
0
                {
1923
0
                    size_t nChunkBandOffset =
1924
0
                        static_cast<size_t>(nChunkXSizeQueried) *
1925
0
                        nChunkYSizeQueried *
1926
0
                        GDALGetDataTypeSizeBytes(eWrkDataType);
1927
0
                    for (int i = 0;
1928
0
                         i < nBandCount && !bSkipResample && eErr == CE_None;
1929
0
                         i++)
1930
0
                    {
1931
0
                        const bool bPropagateNoData = false;
1932
0
                        void *pDstBuffer = nullptr;
1933
0
                        GDALDataType eDstBufferDataType = GDT_Unknown;
1934
0
                        GDALRasterBand *poMEMBand =
1935
0
                            poMEMDS->GetRasterBand(i + 1);
1936
0
                        GDALOverviewResampleArgs args;
1937
0
                        args.eSrcDataType = eDataType;
1938
0
                        args.eOvrDataType = poMEMBand->GetRasterDataType();
1939
0
                        args.nOvrXSize = poMEMBand->GetXSize();
1940
0
                        args.nOvrYSize = poMEMBand->GetYSize();
1941
0
                        args.nOvrNBITS = nNBITS;
1942
0
                        args.dfXRatioDstToSrc = dfXRatioDstToSrc;
1943
0
                        args.dfYRatioDstToSrc = dfYRatioDstToSrc;
1944
0
                        args.dfSrcXDelta =
1945
0
                            dfXOff - nXOff; /* == 0 if bHasXOffVirtual */
1946
0
                        args.dfSrcYDelta =
1947
0
                            dfYOff - nYOff; /* == 0 if bHasYOffVirtual */
1948
0
                        args.eWrkDataType = eWrkDataType;
1949
0
                        args.pabyChunkNodataMask = bNoDataMaskFullyOpaque
1950
0
                                                       ? nullptr
1951
0
                                                       : pabyChunkNoDataMask;
1952
0
                        args.nChunkXOff =
1953
0
                            nChunkXOffQueried - (bHasXOffVirtual ? 0 : nXOff);
1954
0
                        args.nChunkXSize = nChunkXSizeQueried;
1955
0
                        args.nChunkYOff =
1956
0
                            nChunkYOffQueried - (bHasYOffVirtual ? 0 : nYOff);
1957
0
                        args.nChunkYSize = nChunkYSizeQueried;
1958
0
                        args.nDstXOff = nDstXOff + nDestXOffVirtual;
1959
0
                        args.nDstXOff2 =
1960
0
                            nDstXOff + nDestXOffVirtual + nDstXCount;
1961
0
                        args.nDstYOff = nDstYOff + nDestYOffVirtual;
1962
0
                        args.nDstYOff2 =
1963
0
                            nDstYOff + nDestYOffVirtual + nDstYCount;
1964
0
                        args.pszResampling = pszResampling;
1965
0
                        args.bHasNoData = false;
1966
0
                        args.dfNoDataValue = 0.0;
1967
0
                        args.poColorTable = nullptr;
1968
0
                        args.bPropagateNoData = bPropagateNoData;
1969
1970
0
                        eErr =
1971
0
                            pfnResampleFunc(args,
1972
0
                                            reinterpret_cast<GByte *>(pChunk) +
1973
0
                                                i * nChunkBandOffset,
1974
0
                                            &pDstBuffer, &eDstBufferDataType);
1975
0
                        if (eErr == CE_None)
1976
0
                        {
1977
0
                            eErr = poMEMBand->RasterIO(
1978
0
                                GF_Write, nDstXOff + nDestXOffVirtual,
1979
0
                                nDstYOff + nDestYOffVirtual, nDstXCount,
1980
0
                                nDstYCount, pDstBuffer, nDstXCount, nDstYCount,
1981
0
                                eDstBufferDataType, 0, 0, nullptr);
1982
0
                        }
1983
0
                        CPLFree(pDstBuffer);
1984
0
                    }
1985
0
                }
1986
1987
0
                nBlocksDone++;
1988
0
                if (eErr == CE_None && psExtraArg->pfnProgress != nullptr &&
1989
0
                    !psExtraArg->pfnProgress(1.0 * nBlocksDone / nTotalBlocks,
1990
0
                                             "", psExtraArg->pProgressData))
1991
0
                {
1992
0
                    eErr = CE_Failure;
1993
0
                }
1994
0
            }
1995
0
        }
1996
1997
0
        CPLFree(pChunk);
1998
0
        CPLFree(pabyChunkNoDataMask);
1999
0
    }
2000
2001
0
    CPLFree(papoDstBands);
2002
0
    GDALClose(poMEMDS);
2003
2004
0
    return eErr;
2005
0
}
2006
2007
//! @endcond
2008
2009
/************************************************************************/
2010
/*                           GDALSwapWords()                            */
2011
/************************************************************************/
2012
2013
/**
2014
 * Byte swap words in-place.
2015
 *
2016
 * This function will byte swap a set of 2, 4 or 8 byte words "in place" in
2017
 * a memory array.  No assumption is made that the words being swapped are
2018
 * word aligned in memory.  Use the CPL_LSB and CPL_MSB macros from cpl_port.h
2019
 * to determine if the current platform is big endian or little endian.  Use
2020
 * The macros like CPL_SWAP32() to byte swap single values without the overhead
2021
 * of a function call.
2022
 *
2023
 * @param pData pointer to start of data buffer.
2024
 * @param nWordSize size of words being swapped in bytes. Normally 2, 4 or 8.
2025
 * @param nWordCount the number of words to be swapped in this call.
2026
 * @param nWordSkip the byte offset from the start of one word to the start of
2027
 * the next. For packed buffers this is the same as nWordSize.
2028
 */
2029
2030
void CPL_STDCALL GDALSwapWords(void *pData, int nWordSize, int nWordCount,
2031
                               int nWordSkip)
2032
2033
0
{
2034
0
    if (nWordCount > 0)
2035
0
        VALIDATE_POINTER0(pData, "GDALSwapWords");
2036
2037
0
    GByte *pabyData = static_cast<GByte *>(pData);
2038
2039
0
    switch (nWordSize)
2040
0
    {
2041
0
        case 1:
2042
0
            break;
2043
2044
0
        case 2:
2045
0
            CPLAssert(nWordSkip >= 2 || nWordCount == 1);
2046
0
            for (int i = 0; i < nWordCount; i++)
2047
0
            {
2048
0
                CPL_SWAP16PTR(pabyData);
2049
0
                pabyData += nWordSkip;
2050
0
            }
2051
0
            break;
2052
2053
0
        case 4:
2054
0
            CPLAssert(nWordSkip >= 4 || nWordCount == 1);
2055
0
            if (CPL_IS_ALIGNED(pabyData, 4) && (nWordSkip % 4) == 0)
2056
0
            {
2057
0
                for (int i = 0; i < nWordCount; i++)
2058
0
                {
2059
0
                    *reinterpret_cast<GUInt32 *>(pabyData) = CPL_SWAP32(
2060
0
                        *reinterpret_cast<const GUInt32 *>(pabyData));
2061
0
                    pabyData += nWordSkip;
2062
0
                }
2063
0
            }
2064
0
            else
2065
0
            {
2066
0
                for (int i = 0; i < nWordCount; i++)
2067
0
                {
2068
0
                    CPL_SWAP32PTR(pabyData);
2069
0
                    pabyData += nWordSkip;
2070
0
                }
2071
0
            }
2072
0
            break;
2073
2074
0
        case 8:
2075
0
            CPLAssert(nWordSkip >= 8 || nWordCount == 1);
2076
0
            if (CPL_IS_ALIGNED(pabyData, 8) && (nWordSkip % 8) == 0)
2077
0
            {
2078
0
                for (int i = 0; i < nWordCount; i++)
2079
0
                {
2080
0
                    *reinterpret_cast<GUInt64 *>(pabyData) = CPL_SWAP64(
2081
0
                        *reinterpret_cast<const GUInt64 *>(pabyData));
2082
0
                    pabyData += nWordSkip;
2083
0
                }
2084
0
            }
2085
0
            else
2086
0
            {
2087
0
                for (int i = 0; i < nWordCount; i++)
2088
0
                {
2089
0
                    CPL_SWAP64PTR(pabyData);
2090
0
                    pabyData += nWordSkip;
2091
0
                }
2092
0
            }
2093
0
            break;
2094
2095
0
        default:
2096
0
            CPLAssert(false);
2097
0
    }
2098
0
}
2099
2100
/************************************************************************/
2101
/*                           GDALSwapWordsEx()                          */
2102
/************************************************************************/
2103
2104
/**
2105
 * Byte swap words in-place.
2106
 *
2107
 * This function will byte swap a set of 2, 4 or 8 byte words "in place" in
2108
 * a memory array.  No assumption is made that the words being swapped are
2109
 * word aligned in memory.  Use the CPL_LSB and CPL_MSB macros from cpl_port.h
2110
 * to determine if the current platform is big endian or little endian.  Use
2111
 * The macros like CPL_SWAP32() to byte swap single values without the overhead
2112
 * of a function call.
2113
 *
2114
 * @param pData pointer to start of data buffer.
2115
 * @param nWordSize size of words being swapped in bytes. Normally 2, 4 or 8.
2116
 * @param nWordCount the number of words to be swapped in this call.
2117
 * @param nWordSkip the byte offset from the start of one word to the start of
2118
 * the next. For packed buffers this is the same as nWordSize.
2119
 * @since GDAL 2.1
2120
 */
2121
void CPL_STDCALL GDALSwapWordsEx(void *pData, int nWordSize, size_t nWordCount,
2122
                                 int nWordSkip)
2123
0
{
2124
0
    GByte *pabyData = static_cast<GByte *>(pData);
2125
0
    while (nWordCount)
2126
0
    {
2127
        // Pick-up a multiple of 8 as max chunk size.
2128
0
        const int nWordCountSmall =
2129
0
            (nWordCount > (1 << 30)) ? (1 << 30) : static_cast<int>(nWordCount);
2130
0
        GDALSwapWords(pabyData, nWordSize, nWordCountSmall, nWordSkip);
2131
0
        pabyData += static_cast<size_t>(nWordSkip) * nWordCountSmall;
2132
0
        nWordCount -= nWordCountSmall;
2133
0
    }
2134
0
}
2135
2136
// Place the new GDALCopyWords helpers in an anonymous namespace
2137
namespace
2138
{
2139
2140
/************************************************************************/
2141
/*                           GDALCopyWordsT()                           */
2142
/************************************************************************/
2143
/**
2144
 * Template function, used to copy data from pSrcData into buffer
2145
 * pDstData, with stride nSrcPixelStride in the source data and
2146
 * stride nDstPixelStride in the destination data. This template can
2147
 * deal with the case where the input data type is real or complex and
2148
 * the output is real.
2149
 *
2150
 * @param pSrcData the source data buffer
2151
 * @param nSrcPixelStride the stride, in the buffer pSrcData for pixels
2152
 *                      of interest.
2153
 * @param pDstData the destination buffer.
2154
 * @param nDstPixelStride the stride in the buffer pDstData for pixels of
2155
 *                      interest.
2156
 * @param nWordCount the total number of pixel words to copy
2157
 *
2158
 * @code
2159
 * // Assume an input buffer of type GUInt16 named pBufferIn
2160
 * GByte *pBufferOut = new GByte[numBytesOut];
2161
 * GDALCopyWordsT<GUInt16, GByte>(pSrcData, 2, pDstData, 1, numBytesOut);
2162
 * @endcode
2163
 * @note
2164
 * This is a private function, and should not be exposed outside of
2165
 * rasterio.cpp. External users should call the GDALCopyWords driver function.
2166
 */
2167
2168
template <class Tin, class Tout>
2169
static void inline GDALCopyWordsGenericT(const Tin *const CPL_RESTRICT pSrcData,
2170
                                         int nSrcPixelStride,
2171
                                         Tout *const CPL_RESTRICT pDstData,
2172
                                         int nDstPixelStride,
2173
                                         GPtrDiff_t nWordCount)
2174
0
{
2175
0
    decltype(nWordCount) nDstOffset = 0;
2176
2177
0
    const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData);
2178
0
    char *const pDstDataPtr = reinterpret_cast<char *>(pDstData);
2179
0
    for (decltype(nWordCount) n = 0; n < nWordCount; n++)
2180
0
    {
2181
0
        const Tin tValue =
2182
0
            *reinterpret_cast<const Tin *>(pSrcDataPtr + (n * nSrcPixelStride));
2183
0
        Tout *const pOutPixel =
2184
0
            reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
2185
2186
0
        GDALCopyWord(tValue, *pOutPixel);
2187
2188
0
        nDstOffset += nDstPixelStride;
2189
0
    }
2190
0
}
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, unsigned char>(unsigned char const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, signed char>(unsigned char const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, unsigned short>(unsigned char const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, short>(unsigned char const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, unsigned int>(unsigned char const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, int>(unsigned char const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, unsigned long>(unsigned char const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, long>(unsigned char const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, cpl::Float16>(unsigned char const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, float>(unsigned char const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, double>(unsigned char const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, unsigned char>(signed char const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, signed char>(signed char const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, unsigned short>(signed char const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, short>(signed char const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, unsigned int>(signed char const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, int>(signed char const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, unsigned long>(signed char const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, long>(signed char const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, cpl::Float16>(signed char const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, float>(signed char const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, double>(signed char const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, unsigned char>(unsigned short const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, signed char>(unsigned short const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, unsigned short>(unsigned short const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, short>(unsigned short const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, unsigned int>(unsigned short const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, int>(unsigned short const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, unsigned long>(unsigned short const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, long>(unsigned short const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, cpl::Float16>(unsigned short const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, float>(unsigned short const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, double>(unsigned short const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, unsigned char>(short const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, signed char>(short const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, unsigned short>(short const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, short>(short const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, unsigned int>(short const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, int>(short const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, unsigned long>(short const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, long>(short const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, cpl::Float16>(short const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, float>(short const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, double>(short const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, unsigned char>(unsigned int const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, signed char>(unsigned int const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, unsigned short>(unsigned int const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, short>(unsigned int const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, unsigned int>(unsigned int const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, int>(unsigned int const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, unsigned long>(unsigned int const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, long>(unsigned int const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, cpl::Float16>(unsigned int const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, float>(unsigned int const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, double>(unsigned int const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, unsigned char>(int const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, signed char>(int const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, unsigned short>(int const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, short>(int const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, unsigned int>(int const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, int>(int const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, unsigned long>(int const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, long>(int const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, cpl::Float16>(int const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, float>(int const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, double>(int const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, unsigned char>(unsigned long const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, signed char>(unsigned long const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, unsigned short>(unsigned long const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, short>(unsigned long const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, unsigned int>(unsigned long const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, int>(unsigned long const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, unsigned long>(unsigned long const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, long>(unsigned long const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, cpl::Float16>(unsigned long const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, float>(unsigned long const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, double>(unsigned long const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, unsigned char>(long const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, signed char>(long const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, unsigned short>(long const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, short>(long const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, unsigned int>(long const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, int>(long const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, unsigned long>(long const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, long>(long const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, cpl::Float16>(long const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, float>(long const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, double>(long const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, unsigned char>(cpl::Float16 const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, signed char>(cpl::Float16 const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, unsigned short>(cpl::Float16 const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, short>(cpl::Float16 const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, unsigned int>(cpl::Float16 const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, int>(cpl::Float16 const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, unsigned long>(cpl::Float16 const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, long>(cpl::Float16 const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, cpl::Float16>(cpl::Float16 const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, float>(cpl::Float16 const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, double>(cpl::Float16 const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<float, signed char>(float const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<float, unsigned int>(float const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<float, int>(float const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<float, unsigned long>(float const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<float, long>(float const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<float, cpl::Float16>(float const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<float, float>(float const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<float, double>(float const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, unsigned char>(double const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, signed char>(double const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, short>(double const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, unsigned int>(double const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, int>(double const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, unsigned long>(double const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, long>(double const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, cpl::Float16>(double const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, float>(double const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, double>(double const*, int, double*, int, long long)
2191
2192
template <class Tin, class Tout>
2193
static void inline GDALCopyWordsT(const Tin *const CPL_RESTRICT pSrcData,
2194
                                  int nSrcPixelStride,
2195
                                  Tout *const CPL_RESTRICT pDstData,
2196
                                  int nDstPixelStride, GPtrDiff_t nWordCount)
2197
0
{
2198
0
    GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, nDstPixelStride,
2199
0
                          nWordCount);
2200
0
}
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned char, unsigned char>(unsigned char const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned char, signed char>(unsigned char const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned char, unsigned long>(unsigned char const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned char, long>(unsigned char const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned char, cpl::Float16>(unsigned char const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, unsigned char>(signed char const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, signed char>(signed char const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, unsigned short>(signed char const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, short>(signed char const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, unsigned int>(signed char const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, int>(signed char const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, unsigned long>(signed char const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, long>(signed char const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, cpl::Float16>(signed char const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, float>(signed char const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, double>(signed char const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned short, signed char>(unsigned short const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned short, unsigned short>(unsigned short const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned short, unsigned int>(unsigned short const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned short, int>(unsigned short const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned short, unsigned long>(unsigned short const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned short, long>(unsigned short const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned short, cpl::Float16>(unsigned short const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, unsigned char>(short const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, signed char>(short const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, unsigned short>(short const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, short>(short const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, unsigned int>(short const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, int>(short const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, unsigned long>(short const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, long>(short const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, cpl::Float16>(short const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, float>(short const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, double>(short const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, unsigned char>(unsigned int const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, signed char>(unsigned int const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, unsigned short>(unsigned int const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, short>(unsigned int const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, unsigned int>(unsigned int const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, int>(unsigned int const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, unsigned long>(unsigned int const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, long>(unsigned int const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, cpl::Float16>(unsigned int const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, float>(unsigned int const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, double>(unsigned int const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, unsigned char>(int const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, signed char>(int const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, unsigned short>(int const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, short>(int const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, unsigned int>(int const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, int>(int const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, unsigned long>(int const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, long>(int const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, cpl::Float16>(int const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, float>(int const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, double>(int const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, unsigned char>(unsigned long const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, signed char>(unsigned long const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, unsigned short>(unsigned long const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, short>(unsigned long const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, unsigned int>(unsigned long const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, int>(unsigned long const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, unsigned long>(unsigned long const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, long>(unsigned long const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, cpl::Float16>(unsigned long const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, float>(unsigned long const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, double>(unsigned long const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, unsigned char>(long const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, signed char>(long const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, unsigned short>(long const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, short>(long const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, unsigned int>(long const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, int>(long const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, unsigned long>(long const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, long>(long const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, cpl::Float16>(long const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, float>(long const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, double>(long const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, unsigned char>(cpl::Float16 const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, signed char>(cpl::Float16 const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, unsigned short>(cpl::Float16 const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, short>(cpl::Float16 const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, unsigned int>(cpl::Float16 const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, int>(cpl::Float16 const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, unsigned long>(cpl::Float16 const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, long>(cpl::Float16 const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, cpl::Float16>(cpl::Float16 const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, float>(cpl::Float16 const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, double>(cpl::Float16 const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<float, signed char>(float const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<float, unsigned int>(float const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<float, int>(float const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<float, unsigned long>(float const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<float, long>(float const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<float, cpl::Float16>(float const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<float, float>(float const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<float, double>(float const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, unsigned char>(double const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, signed char>(double const*, int, signed char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, short>(double const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, unsigned int>(double const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, int>(double const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, unsigned long>(double const*, int, unsigned long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, long>(double const*, int, long*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, cpl::Float16>(double const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, float>(double const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, double>(double const*, int, double*, int, long long)
2201
2202
template <class Tin, class Tout>
2203
static void inline GDALCopyWordsT_8atatime(
2204
    const Tin *const CPL_RESTRICT pSrcData, int nSrcPixelStride,
2205
    Tout *const CPL_RESTRICT pDstData, int nDstPixelStride,
2206
    GPtrDiff_t nWordCount)
2207
0
{
2208
0
    decltype(nWordCount) nDstOffset = 0;
2209
2210
0
    const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData);
2211
0
    char *const pDstDataPtr = reinterpret_cast<char *>(pDstData);
2212
0
    decltype(nWordCount) n = 0;
2213
0
    if (nSrcPixelStride == static_cast<int>(sizeof(Tin)) &&
2214
0
        nDstPixelStride == static_cast<int>(sizeof(Tout)))
2215
0
    {
2216
0
        for (; n < nWordCount - 7; n += 8)
2217
0
        {
2218
0
            const Tin *pInValues = reinterpret_cast<const Tin *>(
2219
0
                pSrcDataPtr + (n * nSrcPixelStride));
2220
0
            Tout *const pOutPixels =
2221
0
                reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
2222
2223
0
            GDALCopy8Words(pInValues, pOutPixels);
2224
2225
0
            nDstOffset += 8 * nDstPixelStride;
2226
0
        }
2227
0
    }
2228
0
    for (; n < nWordCount; n++)
2229
0
    {
2230
0
        const Tin tValue =
2231
0
            *reinterpret_cast<const Tin *>(pSrcDataPtr + (n * nSrcPixelStride));
2232
0
        Tout *const pOutPixel =
2233
0
            reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
2234
2235
0
        GDALCopyWord(tValue, *pOutPixel);
2236
2237
0
        nDstOffset += nDstPixelStride;
2238
0
    }
2239
0
}
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<float, unsigned char>(float const*, int, unsigned char*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<float, unsigned short>(float const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<float, short>(float const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<double, unsigned short>(double const*, int, unsigned short*, int, long long)
2240
2241
#ifdef HAVE_SSE2
2242
2243
template <class Tout>
2244
void GDALCopyWordsByteTo16Bit(const GByte *const CPL_RESTRICT pSrcData,
2245
                              int nSrcPixelStride,
2246
                              Tout *const CPL_RESTRICT pDstData,
2247
                              int nDstPixelStride, GPtrDiff_t nWordCount)
2248
0
{
2249
0
    static_assert(std::is_integral<Tout>::value &&
2250
0
                      sizeof(Tout) == sizeof(uint16_t),
2251
0
                  "Bad Tout");
2252
0
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2253
0
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2254
0
    {
2255
0
        decltype(nWordCount) n = 0;
2256
0
        const __m128i xmm_zero = _mm_setzero_si128();
2257
0
        GByte *CPL_RESTRICT pabyDstDataPtr =
2258
0
            reinterpret_cast<GByte *>(pDstData);
2259
0
        for (; n < nWordCount - 15; n += 16)
2260
0
        {
2261
0
            __m128i xmm = _mm_loadu_si128(
2262
0
                reinterpret_cast<const __m128i *>(pSrcData + n));
2263
0
            __m128i xmm0 = _mm_unpacklo_epi8(xmm, xmm_zero);
2264
0
            __m128i xmm1 = _mm_unpackhi_epi8(xmm, xmm_zero);
2265
0
            _mm_storeu_si128(
2266
0
                reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 2), xmm0);
2267
0
            _mm_storeu_si128(
2268
0
                reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 2 + 16), xmm1);
2269
0
        }
2270
0
        for (; n < nWordCount; n++)
2271
0
        {
2272
0
            pDstData[n] = pSrcData[n];
2273
0
        }
2274
0
    }
2275
0
    else
2276
0
    {
2277
0
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2278
0
                              nDstPixelStride, nWordCount);
2279
0
    }
2280
0
}
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsByteTo16Bit<unsigned short>(unsigned char const*, int, unsigned short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsByteTo16Bit<short>(unsigned char const*, int, short*, int, long long)
2281
2282
template <>
2283
void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2284
                    int nSrcPixelStride, GUInt16 *const CPL_RESTRICT pDstData,
2285
                    int nDstPixelStride, GPtrDiff_t nWordCount)
2286
0
{
2287
0
    GDALCopyWordsByteTo16Bit(pSrcData, nSrcPixelStride, pDstData,
2288
0
                             nDstPixelStride, nWordCount);
2289
0
}
2290
2291
template <>
2292
void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2293
                    int nSrcPixelStride, GInt16 *const CPL_RESTRICT pDstData,
2294
                    int nDstPixelStride, GPtrDiff_t nWordCount)
2295
0
{
2296
0
    GDALCopyWordsByteTo16Bit(pSrcData, nSrcPixelStride, pDstData,
2297
0
                             nDstPixelStride, nWordCount);
2298
0
}
2299
2300
template <class Tout>
2301
void GDALCopyWordsByteTo32Bit(const GByte *const CPL_RESTRICT pSrcData,
2302
                              int nSrcPixelStride,
2303
                              Tout *const CPL_RESTRICT pDstData,
2304
                              int nDstPixelStride, GPtrDiff_t nWordCount)
2305
0
{
2306
0
    static_assert(std::is_integral<Tout>::value &&
2307
0
                      sizeof(Tout) == sizeof(uint32_t),
2308
0
                  "Bad Tout");
2309
0
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2310
0
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2311
0
    {
2312
0
        decltype(nWordCount) n = 0;
2313
0
        const __m128i xmm_zero = _mm_setzero_si128();
2314
0
        GByte *CPL_RESTRICT pabyDstDataPtr =
2315
0
            reinterpret_cast<GByte *>(pDstData);
2316
0
        for (; n < nWordCount - 15; n += 16)
2317
0
        {
2318
0
            __m128i xmm = _mm_loadu_si128(
2319
0
                reinterpret_cast<const __m128i *>(pSrcData + n));
2320
0
            __m128i xmm_low = _mm_unpacklo_epi8(xmm, xmm_zero);
2321
0
            __m128i xmm_high = _mm_unpackhi_epi8(xmm, xmm_zero);
2322
0
            __m128i xmm0 = _mm_unpacklo_epi16(xmm_low, xmm_zero);
2323
0
            __m128i xmm1 = _mm_unpackhi_epi16(xmm_low, xmm_zero);
2324
0
            __m128i xmm2 = _mm_unpacklo_epi16(xmm_high, xmm_zero);
2325
0
            __m128i xmm3 = _mm_unpackhi_epi16(xmm_high, xmm_zero);
2326
0
            _mm_storeu_si128(
2327
0
                reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4), xmm0);
2328
0
            _mm_storeu_si128(
2329
0
                reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4 + 16), xmm1);
2330
0
            _mm_storeu_si128(
2331
0
                reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4 + 32), xmm2);
2332
0
            _mm_storeu_si128(
2333
0
                reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4 + 48), xmm3);
2334
0
        }
2335
0
        for (; n < nWordCount; n++)
2336
0
        {
2337
0
            pDstData[n] = pSrcData[n];
2338
0
        }
2339
0
    }
2340
0
    else
2341
0
    {
2342
0
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2343
0
                              nDstPixelStride, nWordCount);
2344
0
    }
2345
0
}
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsByteTo32Bit<unsigned int>(unsigned char const*, int, unsigned int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsByteTo32Bit<int>(unsigned char const*, int, int*, int, long long)
2346
2347
template <>
2348
void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2349
                    int nSrcPixelStride, GUInt32 *const CPL_RESTRICT pDstData,
2350
                    int nDstPixelStride, GPtrDiff_t nWordCount)
2351
0
{
2352
0
    GDALCopyWordsByteTo32Bit(pSrcData, nSrcPixelStride, pDstData,
2353
0
                             nDstPixelStride, nWordCount);
2354
0
}
2355
2356
template <>
2357
void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2358
                    int nSrcPixelStride, GInt32 *const CPL_RESTRICT pDstData,
2359
                    int nDstPixelStride, GPtrDiff_t nWordCount)
2360
0
{
2361
0
    GDALCopyWordsByteTo32Bit(pSrcData, nSrcPixelStride, pDstData,
2362
0
                             nDstPixelStride, nWordCount);
2363
0
}
2364
2365
template <>
2366
void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2367
                    int nSrcPixelStride, float *const CPL_RESTRICT pDstData,
2368
                    int nDstPixelStride, GPtrDiff_t nWordCount)
2369
0
{
2370
0
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2371
0
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2372
0
    {
2373
0
        decltype(nWordCount) n = 0;
2374
0
        const __m128i xmm_zero = _mm_setzero_si128();
2375
0
        GByte *CPL_RESTRICT pabyDstDataPtr =
2376
0
            reinterpret_cast<GByte *>(pDstData);
2377
0
        for (; n < nWordCount - 15; n += 16)
2378
0
        {
2379
0
            __m128i xmm = _mm_loadu_si128(
2380
0
                reinterpret_cast<const __m128i *>(pSrcData + n));
2381
0
            __m128i xmm_low = _mm_unpacklo_epi8(xmm, xmm_zero);
2382
0
            __m128i xmm_high = _mm_unpackhi_epi8(xmm, xmm_zero);
2383
0
            __m128i xmm0 = _mm_unpacklo_epi16(xmm_low, xmm_zero);
2384
0
            __m128i xmm1 = _mm_unpackhi_epi16(xmm_low, xmm_zero);
2385
0
            __m128i xmm2 = _mm_unpacklo_epi16(xmm_high, xmm_zero);
2386
0
            __m128i xmm3 = _mm_unpackhi_epi16(xmm_high, xmm_zero);
2387
0
            __m128 xmm0_f = _mm_cvtepi32_ps(xmm0);
2388
0
            __m128 xmm1_f = _mm_cvtepi32_ps(xmm1);
2389
0
            __m128 xmm2_f = _mm_cvtepi32_ps(xmm2);
2390
0
            __m128 xmm3_f = _mm_cvtepi32_ps(xmm3);
2391
0
            _mm_storeu_ps(reinterpret_cast<float *>(pabyDstDataPtr + n * 4),
2392
0
                          xmm0_f);
2393
0
            _mm_storeu_ps(
2394
0
                reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 16), xmm1_f);
2395
0
            _mm_storeu_ps(
2396
0
                reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 32), xmm2_f);
2397
0
            _mm_storeu_ps(
2398
0
                reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 48), xmm3_f);
2399
0
        }
2400
0
        for (; n < nWordCount; n++)
2401
0
        {
2402
0
            pDstData[n] = pSrcData[n];
2403
0
        }
2404
0
    }
2405
0
    else
2406
0
    {
2407
0
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2408
0
                              nDstPixelStride, nWordCount);
2409
0
    }
2410
0
}
2411
2412
template <>
2413
void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2414
                    int nSrcPixelStride, double *const CPL_RESTRICT pDstData,
2415
                    int nDstPixelStride, GPtrDiff_t nWordCount)
2416
0
{
2417
0
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2418
0
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2419
0
    {
2420
0
        decltype(nWordCount) n = 0;
2421
0
        const __m128i xmm_zero = _mm_setzero_si128();
2422
0
        GByte *CPL_RESTRICT pabyDstDataPtr =
2423
0
            reinterpret_cast<GByte *>(pDstData);
2424
0
        for (; n < nWordCount - 15; n += 16)
2425
0
        {
2426
0
            __m128i xmm = _mm_loadu_si128(
2427
0
                reinterpret_cast<const __m128i *>(pSrcData + n));
2428
0
            __m128i xmm_low = _mm_unpacklo_epi8(xmm, xmm_zero);
2429
0
            __m128i xmm_high = _mm_unpackhi_epi8(xmm, xmm_zero);
2430
0
            __m128i xmm0 = _mm_unpacklo_epi16(xmm_low, xmm_zero);
2431
0
            __m128i xmm1 = _mm_unpackhi_epi16(xmm_low, xmm_zero);
2432
0
            __m128i xmm2 = _mm_unpacklo_epi16(xmm_high, xmm_zero);
2433
0
            __m128i xmm3 = _mm_unpackhi_epi16(xmm_high, xmm_zero);
2434
2435
0
            __m128d xmm0_low_d = _mm_cvtepi32_pd(xmm0);
2436
0
            __m128d xmm1_low_d = _mm_cvtepi32_pd(xmm1);
2437
0
            __m128d xmm2_low_d = _mm_cvtepi32_pd(xmm2);
2438
0
            __m128d xmm3_low_d = _mm_cvtepi32_pd(xmm3);
2439
0
            xmm0 = _mm_srli_si128(xmm0, 8);
2440
0
            xmm1 = _mm_srli_si128(xmm1, 8);
2441
0
            xmm2 = _mm_srli_si128(xmm2, 8);
2442
0
            xmm3 = _mm_srli_si128(xmm3, 8);
2443
0
            __m128d xmm0_high_d = _mm_cvtepi32_pd(xmm0);
2444
0
            __m128d xmm1_high_d = _mm_cvtepi32_pd(xmm1);
2445
0
            __m128d xmm2_high_d = _mm_cvtepi32_pd(xmm2);
2446
0
            __m128d xmm3_high_d = _mm_cvtepi32_pd(xmm3);
2447
2448
0
            _mm_storeu_pd(reinterpret_cast<double *>(pabyDstDataPtr + n * 8),
2449
0
                          xmm0_low_d);
2450
0
            _mm_storeu_pd(
2451
0
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 16),
2452
0
                xmm0_high_d);
2453
0
            _mm_storeu_pd(
2454
0
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 32),
2455
0
                xmm1_low_d);
2456
0
            _mm_storeu_pd(
2457
0
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 48),
2458
0
                xmm1_high_d);
2459
0
            _mm_storeu_pd(
2460
0
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 64),
2461
0
                xmm2_low_d);
2462
0
            _mm_storeu_pd(
2463
0
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 80),
2464
0
                xmm2_high_d);
2465
0
            _mm_storeu_pd(
2466
0
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 96),
2467
0
                xmm3_low_d);
2468
0
            _mm_storeu_pd(
2469
0
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 112),
2470
0
                xmm3_high_d);
2471
0
        }
2472
0
        for (; n < nWordCount; n++)
2473
0
        {
2474
0
            pDstData[n] = pSrcData[n];
2475
0
        }
2476
0
    }
2477
0
    else
2478
0
    {
2479
0
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2480
0
                              nDstPixelStride, nWordCount);
2481
0
    }
2482
0
}
2483
2484
template <>
2485
void GDALCopyWordsT(const GUInt16 *const CPL_RESTRICT pSrcData,
2486
                    int nSrcPixelStride, GByte *const CPL_RESTRICT pDstData,
2487
                    int nDstPixelStride, GPtrDiff_t nWordCount)
2488
0
{
2489
0
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2490
0
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2491
0
    {
2492
0
        decltype(nWordCount) n = 0;
2493
        // In SSE2, min_epu16 does not exist, so shift from
2494
        // UInt16 to SInt16 to be able to use min_epi16
2495
0
        const __m128i xmm_UINT16_to_INT16 = _mm_set1_epi16(-32768);
2496
0
        const __m128i xmm_m255_shifted = _mm_set1_epi16(255 - 32768);
2497
0
        for (; n < nWordCount - 7; n += 8)
2498
0
        {
2499
0
            __m128i xmm = _mm_loadu_si128(
2500
0
                reinterpret_cast<const __m128i *>(pSrcData + n));
2501
0
            xmm = _mm_add_epi16(xmm, xmm_UINT16_to_INT16);
2502
0
            xmm = _mm_min_epi16(xmm, xmm_m255_shifted);
2503
0
            xmm = _mm_sub_epi16(xmm, xmm_UINT16_to_INT16);
2504
0
            xmm = _mm_packus_epi16(xmm, xmm);
2505
0
            GDALCopyXMMToInt64(xmm,
2506
0
                               reinterpret_cast<GPtrDiff_t *>(pDstData + n));
2507
0
        }
2508
0
        for (; n < nWordCount; n++)
2509
0
        {
2510
0
            pDstData[n] =
2511
0
                pSrcData[n] >= 255 ? 255 : static_cast<GByte>(pSrcData[n]);
2512
0
        }
2513
0
    }
2514
0
    else
2515
0
    {
2516
0
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2517
0
                              nDstPixelStride, nWordCount);
2518
0
    }
2519
0
}
2520
2521
template <>
2522
void GDALCopyWordsT(const GUInt16 *const CPL_RESTRICT pSrcData,
2523
                    int nSrcPixelStride, GInt16 *const CPL_RESTRICT pDstData,
2524
                    int nDstPixelStride, GPtrDiff_t nWordCount)
2525
0
{
2526
0
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2527
0
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2528
0
    {
2529
0
        decltype(nWordCount) n = 0;
2530
        // In SSE2, min_epu16 does not exist, so shift from
2531
        // UInt16 to SInt16 to be able to use min_epi16
2532
0
        const __m128i xmm_UINT16_to_INT16 = _mm_set1_epi16(-32768);
2533
0
        const __m128i xmm_32767_shifted = _mm_set1_epi16(32767 - 32768);
2534
0
        for (; n < nWordCount - 7; n += 8)
2535
0
        {
2536
0
            __m128i xmm = _mm_loadu_si128(
2537
0
                reinterpret_cast<const __m128i *>(pSrcData + n));
2538
0
            xmm = _mm_add_epi16(xmm, xmm_UINT16_to_INT16);
2539
0
            xmm = _mm_min_epi16(xmm, xmm_32767_shifted);
2540
0
            xmm = _mm_sub_epi16(xmm, xmm_UINT16_to_INT16);
2541
0
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n), xmm);
2542
0
        }
2543
0
        for (; n < nWordCount; n++)
2544
0
        {
2545
0
            pDstData[n] =
2546
0
                pSrcData[n] >= 32767 ? 32767 : static_cast<GInt16>(pSrcData[n]);
2547
0
        }
2548
0
    }
2549
0
    else
2550
0
    {
2551
0
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2552
0
                              nDstPixelStride, nWordCount);
2553
0
    }
2554
0
}
2555
2556
template <>
2557
void GDALCopyWordsT(const GUInt16 *const CPL_RESTRICT pSrcData,
2558
                    int nSrcPixelStride, float *const CPL_RESTRICT pDstData,
2559
                    int nDstPixelStride, GPtrDiff_t nWordCount)
2560
0
{
2561
0
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2562
0
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2563
0
    {
2564
0
        decltype(nWordCount) n = 0;
2565
0
        const __m128i xmm_zero = _mm_setzero_si128();
2566
0
        GByte *CPL_RESTRICT pabyDstDataPtr =
2567
0
            reinterpret_cast<GByte *>(pDstData);
2568
0
        for (; n < nWordCount - 7; n += 8)
2569
0
        {
2570
0
            __m128i xmm = _mm_loadu_si128(
2571
0
                reinterpret_cast<const __m128i *>(pSrcData + n));
2572
0
            __m128i xmm0 = _mm_unpacklo_epi16(xmm, xmm_zero);
2573
0
            __m128i xmm1 = _mm_unpackhi_epi16(xmm, xmm_zero);
2574
0
            __m128 xmm0_f = _mm_cvtepi32_ps(xmm0);
2575
0
            __m128 xmm1_f = _mm_cvtepi32_ps(xmm1);
2576
0
            _mm_storeu_ps(reinterpret_cast<float *>(pabyDstDataPtr + n * 4),
2577
0
                          xmm0_f);
2578
0
            _mm_storeu_ps(
2579
0
                reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 16), xmm1_f);
2580
0
        }
2581
0
        for (; n < nWordCount; n++)
2582
0
        {
2583
0
            pDstData[n] = pSrcData[n];
2584
0
        }
2585
0
    }
2586
0
    else
2587
0
    {
2588
0
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2589
0
                              nDstPixelStride, nWordCount);
2590
0
    }
2591
0
}
2592
2593
template <>
2594
void GDALCopyWordsT(const GUInt16 *const CPL_RESTRICT pSrcData,
2595
                    int nSrcPixelStride, double *const CPL_RESTRICT pDstData,
2596
                    int nDstPixelStride, GPtrDiff_t nWordCount)
2597
0
{
2598
0
    if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2599
0
        nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2600
0
    {
2601
0
        decltype(nWordCount) n = 0;
2602
0
        const __m128i xmm_zero = _mm_setzero_si128();
2603
0
        GByte *CPL_RESTRICT pabyDstDataPtr =
2604
0
            reinterpret_cast<GByte *>(pDstData);
2605
0
        for (; n < nWordCount - 7; n += 8)
2606
0
        {
2607
0
            __m128i xmm = _mm_loadu_si128(
2608
0
                reinterpret_cast<const __m128i *>(pSrcData + n));
2609
0
            __m128i xmm0 = _mm_unpacklo_epi16(xmm, xmm_zero);
2610
0
            __m128i xmm1 = _mm_unpackhi_epi16(xmm, xmm_zero);
2611
2612
0
            __m128d xmm0_low_d = _mm_cvtepi32_pd(xmm0);
2613
0
            __m128d xmm1_low_d = _mm_cvtepi32_pd(xmm1);
2614
0
            xmm0 = _mm_srli_si128(xmm0, 8);
2615
0
            xmm1 = _mm_srli_si128(xmm1, 8);
2616
0
            __m128d xmm0_high_d = _mm_cvtepi32_pd(xmm0);
2617
0
            __m128d xmm1_high_d = _mm_cvtepi32_pd(xmm1);
2618
2619
0
            _mm_storeu_pd(reinterpret_cast<double *>(pabyDstDataPtr + n * 8),
2620
0
                          xmm0_low_d);
2621
0
            _mm_storeu_pd(
2622
0
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 16),
2623
0
                xmm0_high_d);
2624
0
            _mm_storeu_pd(
2625
0
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 32),
2626
0
                xmm1_low_d);
2627
0
            _mm_storeu_pd(
2628
0
                reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 48),
2629
0
                xmm1_high_d);
2630
0
        }
2631
0
        for (; n < nWordCount; n++)
2632
0
        {
2633
0
            pDstData[n] = pSrcData[n];
2634
0
        }
2635
0
    }
2636
0
    else
2637
0
    {
2638
0
        GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2639
0
                              nDstPixelStride, nWordCount);
2640
0
    }
2641
0
}
2642
2643
template <>
2644
void GDALCopyWordsT(const double *const CPL_RESTRICT pSrcData,
2645
                    int nSrcPixelStride, GUInt16 *const CPL_RESTRICT pDstData,
2646
                    int nDstPixelStride, GPtrDiff_t nWordCount)
2647
0
{
2648
0
    GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
2649
0
                            nDstPixelStride, nWordCount);
2650
0
}
2651
2652
#endif  // HAVE_SSE2
2653
2654
template <>
2655
void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData,
2656
                    int nSrcPixelStride, GByte *const CPL_RESTRICT pDstData,
2657
                    int nDstPixelStride, GPtrDiff_t nWordCount)
2658
0
{
2659
0
    GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
2660
0
                            nDstPixelStride, nWordCount);
2661
0
}
2662
2663
template <>
2664
void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData,
2665
                    int nSrcPixelStride, GInt16 *const CPL_RESTRICT pDstData,
2666
                    int nDstPixelStride, GPtrDiff_t nWordCount)
2667
0
{
2668
0
    GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
2669
0
                            nDstPixelStride, nWordCount);
2670
0
}
2671
2672
template <>
2673
void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData,
2674
                    int nSrcPixelStride, GUInt16 *const CPL_RESTRICT pDstData,
2675
                    int nDstPixelStride, GPtrDiff_t nWordCount)
2676
0
{
2677
0
    GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
2678
0
                            nDstPixelStride, nWordCount);
2679
0
}
2680
2681
/************************************************************************/
2682
/*                   GDALCopyWordsComplexT()                            */
2683
/************************************************************************/
2684
/**
2685
 * Template function, used to copy data from pSrcData into buffer
2686
 * pDstData, with stride nSrcPixelStride in the source data and
2687
 * stride nDstPixelStride in the destination data. Deals with the
2688
 * complex case, where input is complex and output is complex.
2689
 *
2690
 * @param pSrcData the source data buffer
2691
 * @param nSrcPixelStride the stride, in the buffer pSrcData for pixels
2692
 *                      of interest.
2693
 * @param pDstData the destination buffer.
2694
 * @param nDstPixelStride the stride in the buffer pDstData for pixels of
2695
 *                      interest.
2696
 * @param nWordCount the total number of pixel words to copy
2697
 *
2698
 */
2699
template <class Tin, class Tout>
2700
inline void GDALCopyWordsComplexT(const Tin *const CPL_RESTRICT pSrcData,
2701
                                  int nSrcPixelStride,
2702
                                  Tout *const CPL_RESTRICT pDstData,
2703
                                  int nDstPixelStride, GPtrDiff_t nWordCount)
2704
0
{
2705
0
    decltype(nWordCount) nDstOffset = 0;
2706
0
    const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData);
2707
0
    char *const pDstDataPtr = reinterpret_cast<char *>(pDstData);
2708
2709
0
    for (decltype(nWordCount) n = 0; n < nWordCount; n++)
2710
0
    {
2711
0
        const Tin *const pPixelIn =
2712
0
            reinterpret_cast<const Tin *>(pSrcDataPtr + n * nSrcPixelStride);
2713
0
        Tout *const pPixelOut =
2714
0
            reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
2715
2716
0
        GDALCopyWord(pPixelIn[0], pPixelOut[0]);
2717
0
        GDALCopyWord(pPixelIn[1], pPixelOut[1]);
2718
2719
0
        nDstOffset += nDstPixelStride;
2720
0
    }
2721
0
}
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned char, short>(unsigned char const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned char, int>(unsigned char const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned char, cpl::Float16>(unsigned char const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned char, float>(unsigned char const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned char, double>(unsigned char const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<signed char, short>(signed char const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<signed char, int>(signed char const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<signed char, cpl::Float16>(signed char const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<signed char, float>(signed char const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<signed char, double>(signed char const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned short, short>(unsigned short const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned short, int>(unsigned short const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned short, cpl::Float16>(unsigned short const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned short, float>(unsigned short const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned short, double>(unsigned short const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<short, short>(short const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<short, int>(short const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<short, cpl::Float16>(short const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<short, float>(short const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<short, double>(short const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned int, short>(unsigned int const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned int, int>(unsigned int const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned int, cpl::Float16>(unsigned int const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned int, float>(unsigned int const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned int, double>(unsigned int const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<int, short>(int const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<int, int>(int const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<int, cpl::Float16>(int const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<int, float>(int const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<int, double>(int const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned long, short>(unsigned long const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned long, int>(unsigned long const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned long, cpl::Float16>(unsigned long const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned long, float>(unsigned long const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned long, double>(unsigned long const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<long, short>(long const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<long, int>(long const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<long, cpl::Float16>(long const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<long, float>(long const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<long, double>(long const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<cpl::Float16, short>(cpl::Float16 const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<cpl::Float16, int>(cpl::Float16 const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<cpl::Float16, cpl::Float16>(cpl::Float16 const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<cpl::Float16, float>(cpl::Float16 const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<cpl::Float16, double>(cpl::Float16 const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<float, short>(float const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<float, int>(float const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<float, cpl::Float16>(float const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<float, float>(float const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<float, double>(float const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<double, short>(double const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<double, int>(double const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<double, cpl::Float16>(double const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<double, float>(double const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<double, double>(double const*, int, double*, int, long long)
2722
2723
/************************************************************************/
2724
/*                   GDALCopyWordsComplexOutT()                         */
2725
/************************************************************************/
2726
/**
2727
 * Template function, used to copy data from pSrcData into buffer
2728
 * pDstData, with stride nSrcPixelStride in the source data and
2729
 * stride nDstPixelStride in the destination data. Deals with the
2730
 * case where the value is real coming in, but complex going out.
2731
 *
2732
 * @param pSrcData the source data buffer
2733
 * @param nSrcPixelStride the stride, in the buffer pSrcData for pixels
2734
 *                      of interest, in bytes.
2735
 * @param pDstData the destination buffer.
2736
 * @param nDstPixelStride the stride in the buffer pDstData for pixels of
2737
 *                      interest, in bytes.
2738
 * @param nWordCount the total number of pixel words to copy
2739
 *
2740
 */
2741
template <class Tin, class Tout>
2742
inline void GDALCopyWordsComplexOutT(const Tin *const CPL_RESTRICT pSrcData,
2743
                                     int nSrcPixelStride,
2744
                                     Tout *const CPL_RESTRICT pDstData,
2745
                                     int nDstPixelStride, GPtrDiff_t nWordCount)
2746
0
{
2747
0
    decltype(nWordCount) nDstOffset = 0;
2748
2749
0
    const Tout tOutZero = static_cast<Tout>(0);
2750
2751
0
    const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData);
2752
0
    char *const pDstDataPtr = reinterpret_cast<char *>(pDstData);
2753
2754
0
    for (decltype(nWordCount) n = 0; n < nWordCount; n++)
2755
0
    {
2756
0
        const Tin tValue =
2757
0
            *reinterpret_cast<const Tin *>(pSrcDataPtr + n * nSrcPixelStride);
2758
0
        Tout *const pPixelOut =
2759
0
            reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
2760
0
        GDALCopyWord(tValue, *pPixelOut);
2761
2762
0
        pPixelOut[1] = tOutZero;
2763
2764
0
        nDstOffset += nDstPixelStride;
2765
0
    }
2766
0
}
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned char, short>(unsigned char const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned char, int>(unsigned char const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned char, cpl::Float16>(unsigned char const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned char, float>(unsigned char const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned char, double>(unsigned char const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<signed char, short>(signed char const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<signed char, int>(signed char const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<signed char, cpl::Float16>(signed char const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<signed char, float>(signed char const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<signed char, double>(signed char const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned short, short>(unsigned short const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned short, int>(unsigned short const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned short, cpl::Float16>(unsigned short const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned short, float>(unsigned short const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned short, double>(unsigned short const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<short, short>(short const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<short, int>(short const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<short, cpl::Float16>(short const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<short, float>(short const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<short, double>(short const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned int, short>(unsigned int const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned int, int>(unsigned int const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned int, cpl::Float16>(unsigned int const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned int, float>(unsigned int const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned int, double>(unsigned int const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<int, short>(int const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<int, int>(int const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<int, cpl::Float16>(int const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<int, float>(int const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<int, double>(int const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned long, short>(unsigned long const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned long, int>(unsigned long const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned long, cpl::Float16>(unsigned long const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned long, float>(unsigned long const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned long, double>(unsigned long const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<long, short>(long const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<long, int>(long const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<long, cpl::Float16>(long const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<long, float>(long const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<long, double>(long const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<cpl::Float16, short>(cpl::Float16 const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<cpl::Float16, int>(cpl::Float16 const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<cpl::Float16, cpl::Float16>(cpl::Float16 const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<cpl::Float16, float>(cpl::Float16 const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<cpl::Float16, double>(cpl::Float16 const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<float, short>(float const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<float, int>(float const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<float, cpl::Float16>(float const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<float, float>(float const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<float, double>(float const*, int, double*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<double, short>(double const*, int, short*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<double, int>(double const*, int, int*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<double, cpl::Float16>(double const*, int, cpl::Float16*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<double, float>(double const*, int, float*, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<double, double>(double const*, int, double*, int, long long)
2767
2768
/************************************************************************/
2769
/*                           GDALCopyWordsFromT()                       */
2770
/************************************************************************/
2771
/**
2772
 * Template driver function. Given the input type T, call the appropriate
2773
 * GDALCopyWordsT function template for the desired output type. You should
2774
 * never call this function directly (call GDALCopyWords instead).
2775
 *
2776
 * @param pSrcData source data buffer
2777
 * @param nSrcPixelStride pixel stride in input buffer, in pixel words
2778
 * @param bInComplex input is complex
2779
 * @param pDstData destination data buffer
2780
 * @param eDstType destination data type
2781
 * @param nDstPixelStride pixel stride in output buffer, in pixel words
2782
 * @param nWordCount number of pixel words to be copied
2783
 */
2784
template <class T>
2785
inline void GDALCopyWordsFromT(const T *const CPL_RESTRICT pSrcData,
2786
                               int nSrcPixelStride, bool bInComplex,
2787
                               void *CPL_RESTRICT pDstData,
2788
                               GDALDataType eDstType, int nDstPixelStride,
2789
                               GPtrDiff_t nWordCount)
2790
0
{
2791
0
    switch (eDstType)
2792
0
    {
2793
0
        case GDT_Byte:
2794
0
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
2795
0
                           static_cast<unsigned char *>(pDstData),
2796
0
                           nDstPixelStride, nWordCount);
2797
0
            break;
2798
0
        case GDT_Int8:
2799
0
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
2800
0
                           static_cast<signed char *>(pDstData),
2801
0
                           nDstPixelStride, nWordCount);
2802
0
            break;
2803
0
        case GDT_UInt16:
2804
0
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
2805
0
                           static_cast<unsigned short *>(pDstData),
2806
0
                           nDstPixelStride, nWordCount);
2807
0
            break;
2808
0
        case GDT_Int16:
2809
0
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
2810
0
                           static_cast<short *>(pDstData), nDstPixelStride,
2811
0
                           nWordCount);
2812
0
            break;
2813
0
        case GDT_UInt32:
2814
0
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
2815
0
                           static_cast<unsigned int *>(pDstData),
2816
0
                           nDstPixelStride, nWordCount);
2817
0
            break;
2818
0
        case GDT_Int32:
2819
0
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
2820
0
                           static_cast<int *>(pDstData), nDstPixelStride,
2821
0
                           nWordCount);
2822
0
            break;
2823
0
        case GDT_UInt64:
2824
0
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
2825
0
                           static_cast<std::uint64_t *>(pDstData),
2826
0
                           nDstPixelStride, nWordCount);
2827
0
            break;
2828
0
        case GDT_Int64:
2829
0
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
2830
0
                           static_cast<std::int64_t *>(pDstData),
2831
0
                           nDstPixelStride, nWordCount);
2832
0
            break;
2833
0
        case GDT_Float16:
2834
0
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
2835
0
                           static_cast<GFloat16 *>(pDstData), nDstPixelStride,
2836
0
                           nWordCount);
2837
0
            break;
2838
0
        case GDT_Float32:
2839
0
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
2840
0
                           static_cast<float *>(pDstData), nDstPixelStride,
2841
0
                           nWordCount);
2842
0
            break;
2843
0
        case GDT_Float64:
2844
0
            GDALCopyWordsT(pSrcData, nSrcPixelStride,
2845
0
                           static_cast<double *>(pDstData), nDstPixelStride,
2846
0
                           nWordCount);
2847
0
            break;
2848
0
        case GDT_CInt16:
2849
0
            if (bInComplex)
2850
0
            {
2851
0
                GDALCopyWordsComplexT(pSrcData, nSrcPixelStride,
2852
0
                                      static_cast<short *>(pDstData),
2853
0
                                      nDstPixelStride, nWordCount);
2854
0
            }
2855
0
            else  // input is not complex, so we need to promote to a complex
2856
                  // buffer
2857
0
            {
2858
0
                GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride,
2859
0
                                         static_cast<short *>(pDstData),
2860
0
                                         nDstPixelStride, nWordCount);
2861
0
            }
2862
0
            break;
2863
0
        case GDT_CInt32:
2864
0
            if (bInComplex)
2865
0
            {
2866
0
                GDALCopyWordsComplexT(pSrcData, nSrcPixelStride,
2867
0
                                      static_cast<int *>(pDstData),
2868
0
                                      nDstPixelStride, nWordCount);
2869
0
            }
2870
0
            else  // input is not complex, so we need to promote to a complex
2871
                  // buffer
2872
0
            {
2873
0
                GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride,
2874
0
                                         static_cast<int *>(pDstData),
2875
0
                                         nDstPixelStride, nWordCount);
2876
0
            }
2877
0
            break;
2878
0
        case GDT_CFloat16:
2879
0
            if (bInComplex)
2880
0
            {
2881
0
                GDALCopyWordsComplexT(pSrcData, nSrcPixelStride,
2882
0
                                      static_cast<GFloat16 *>(pDstData),
2883
0
                                      nDstPixelStride, nWordCount);
2884
0
            }
2885
0
            else  // input is not complex, so we need to promote to a complex
2886
                  // buffer
2887
0
            {
2888
0
                GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride,
2889
0
                                         static_cast<GFloat16 *>(pDstData),
2890
0
                                         nDstPixelStride, nWordCount);
2891
0
            }
2892
0
            break;
2893
0
        case GDT_CFloat32:
2894
0
            if (bInComplex)
2895
0
            {
2896
0
                GDALCopyWordsComplexT(pSrcData, nSrcPixelStride,
2897
0
                                      static_cast<float *>(pDstData),
2898
0
                                      nDstPixelStride, nWordCount);
2899
0
            }
2900
0
            else  // input is not complex, so we need to promote to a complex
2901
                  // buffer
2902
0
            {
2903
0
                GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride,
2904
0
                                         static_cast<float *>(pDstData),
2905
0
                                         nDstPixelStride, nWordCount);
2906
0
            }
2907
0
            break;
2908
0
        case GDT_CFloat64:
2909
0
            if (bInComplex)
2910
0
            {
2911
0
                GDALCopyWordsComplexT(pSrcData, nSrcPixelStride,
2912
0
                                      static_cast<double *>(pDstData),
2913
0
                                      nDstPixelStride, nWordCount);
2914
0
            }
2915
0
            else  // input is not complex, so we need to promote to a complex
2916
                  // buffer
2917
0
            {
2918
0
                GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride,
2919
0
                                         static_cast<double *>(pDstData),
2920
0
                                         nDstPixelStride, nWordCount);
2921
0
            }
2922
0
            break;
2923
0
        case GDT_Unknown:
2924
0
        case GDT_TypeCount:
2925
0
            CPLAssert(false);
2926
0
    }
2927
0
}
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<unsigned char>(unsigned char const*, int, bool, void*, GDALDataType, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<signed char>(signed char const*, int, bool, void*, GDALDataType, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<unsigned short>(unsigned short const*, int, bool, void*, GDALDataType, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<short>(short const*, int, bool, void*, GDALDataType, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<unsigned int>(unsigned int const*, int, bool, void*, GDALDataType, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<int>(int const*, int, bool, void*, GDALDataType, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<unsigned long>(unsigned long const*, int, bool, void*, GDALDataType, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<long>(long const*, int, bool, void*, GDALDataType, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<cpl::Float16>(cpl::Float16 const*, int, bool, void*, GDALDataType, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<float>(float const*, int, bool, void*, GDALDataType, int, long long)
Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<double>(double const*, int, bool, void*, GDALDataType, int, long long)
2928
2929
}  // end anonymous namespace
2930
2931
/************************************************************************/
2932
/*                          GDALReplicateWord()                         */
2933
/************************************************************************/
2934
2935
template <class T>
2936
inline void GDALReplicateWordT(void *pDstData, int nDstPixelStride,
2937
                               GPtrDiff_t nWordCount)
2938
0
{
2939
0
    const T valSet = *static_cast<const T *>(pDstData);
2940
0
    if (nDstPixelStride == static_cast<int>(sizeof(T)))
2941
0
    {
2942
0
        T *pDstPtr = static_cast<T *>(pDstData) + 1;
2943
0
        while (nWordCount >= 4)
2944
0
        {
2945
0
            nWordCount -= 4;
2946
0
            pDstPtr[0] = valSet;
2947
0
            pDstPtr[1] = valSet;
2948
0
            pDstPtr[2] = valSet;
2949
0
            pDstPtr[3] = valSet;
2950
0
            pDstPtr += 4;
2951
0
        }
2952
0
        while (nWordCount > 0)
2953
0
        {
2954
0
            --nWordCount;
2955
0
            *pDstPtr = valSet;
2956
0
            pDstPtr++;
2957
0
        }
2958
0
    }
2959
0
    else
2960
0
    {
2961
0
        GByte *pabyDstPtr = static_cast<GByte *>(pDstData) + nDstPixelStride;
2962
0
        while (nWordCount > 0)
2963
0
        {
2964
0
            --nWordCount;
2965
0
            *reinterpret_cast<T *>(pabyDstPtr) = valSet;
2966
0
            pabyDstPtr += nDstPixelStride;
2967
0
        }
2968
0
    }
2969
0
}
Unexecuted instantiation: void GDALReplicateWordT<unsigned short>(void*, int, long long)
Unexecuted instantiation: void GDALReplicateWordT<short>(void*, int, long long)
Unexecuted instantiation: void GDALReplicateWordT<unsigned int>(void*, int, long long)
Unexecuted instantiation: void GDALReplicateWordT<int>(void*, int, long long)
Unexecuted instantiation: void GDALReplicateWordT<unsigned long>(void*, int, long long)
Unexecuted instantiation: void GDALReplicateWordT<long>(void*, int, long long)
Unexecuted instantiation: void GDALReplicateWordT<cpl::Float16>(void*, int, long long)
Unexecuted instantiation: void GDALReplicateWordT<float>(void*, int, long long)
Unexecuted instantiation: void GDALReplicateWordT<double>(void*, int, long long)
2970
2971
static void GDALReplicateWord(const void *CPL_RESTRICT pSrcData,
2972
                              GDALDataType eSrcType,
2973
                              void *CPL_RESTRICT pDstData,
2974
                              GDALDataType eDstType, int nDstPixelStride,
2975
                              GPtrDiff_t nWordCount)
2976
0
{
2977
    /* -----------------------------------------------------------------------
2978
     */
2979
    /* Special case when the source data is always the same value */
2980
    /* (for VRTSourcedRasterBand::IRasterIO and
2981
     * VRTDerivedRasterBand::IRasterIO*/
2982
    /*  for example) */
2983
    /* -----------------------------------------------------------------------
2984
     */
2985
    // Let the general translation case do the necessary conversions
2986
    // on the first destination element.
2987
0
    GDALCopyWords64(pSrcData, eSrcType, 0, pDstData, eDstType, 0, 1);
2988
2989
    // Now copy the first element to the nWordCount - 1 following destination
2990
    // elements.
2991
0
    nWordCount--;
2992
0
    GByte *pabyDstWord = reinterpret_cast<GByte *>(pDstData) + nDstPixelStride;
2993
2994
0
    switch (eDstType)
2995
0
    {
2996
0
        case GDT_Byte:
2997
0
        case GDT_Int8:
2998
0
        {
2999
0
            if (nDstPixelStride == 1)
3000
0
            {
3001
0
                if (nWordCount > 0)
3002
0
                    memset(pabyDstWord,
3003
0
                           *reinterpret_cast<const GByte *>(pDstData),
3004
0
                           nWordCount);
3005
0
            }
3006
0
            else
3007
0
            {
3008
0
                GByte valSet = *reinterpret_cast<const GByte *>(pDstData);
3009
0
                while (nWordCount > 0)
3010
0
                {
3011
0
                    --nWordCount;
3012
0
                    *pabyDstWord = valSet;
3013
0
                    pabyDstWord += nDstPixelStride;
3014
0
                }
3015
0
            }
3016
0
            break;
3017
0
        }
3018
3019
0
#define CASE_DUPLICATE_SIMPLE(enum_type, c_type)                               \
3020
0
    case enum_type:                                                            \
3021
0
    {                                                                          \
3022
0
        GDALReplicateWordT<c_type>(pDstData, nDstPixelStride, nWordCount);     \
3023
0
        break;                                                                 \
3024
0
    }
3025
3026
0
            CASE_DUPLICATE_SIMPLE(GDT_UInt16, GUInt16)
3027
0
            CASE_DUPLICATE_SIMPLE(GDT_Int16, GInt16)
3028
0
            CASE_DUPLICATE_SIMPLE(GDT_UInt32, GUInt32)
3029
0
            CASE_DUPLICATE_SIMPLE(GDT_Int32, GInt32)
3030
0
            CASE_DUPLICATE_SIMPLE(GDT_UInt64, std::uint64_t)
3031
0
            CASE_DUPLICATE_SIMPLE(GDT_Int64, std::int64_t)
3032
0
            CASE_DUPLICATE_SIMPLE(GDT_Float16, GFloat16)
3033
0
            CASE_DUPLICATE_SIMPLE(GDT_Float32, float)
3034
0
            CASE_DUPLICATE_SIMPLE(GDT_Float64, double)
3035
3036
0
#define CASE_DUPLICATE_COMPLEX(enum_type, c_type)                              \
3037
0
    case enum_type:                                                            \
3038
0
    {                                                                          \
3039
0
        c_type valSet1 = reinterpret_cast<const c_type *>(pDstData)[0];        \
3040
0
        c_type valSet2 = reinterpret_cast<const c_type *>(pDstData)[1];        \
3041
0
        while (nWordCount > 0)                                                 \
3042
0
        {                                                                      \
3043
0
            --nWordCount;                                                      \
3044
0
            reinterpret_cast<c_type *>(pabyDstWord)[0] = valSet1;              \
3045
0
            reinterpret_cast<c_type *>(pabyDstWord)[1] = valSet2;              \
3046
0
            pabyDstWord += nDstPixelStride;                                    \
3047
0
        }                                                                      \
3048
0
        break;                                                                 \
3049
0
    }
3050
3051
0
            CASE_DUPLICATE_COMPLEX(GDT_CInt16, GInt16)
3052
0
            CASE_DUPLICATE_COMPLEX(GDT_CInt32, GInt32)
3053
0
            CASE_DUPLICATE_COMPLEX(GDT_CFloat16, GFloat16)
3054
0
            CASE_DUPLICATE_COMPLEX(GDT_CFloat32, float)
3055
0
            CASE_DUPLICATE_COMPLEX(GDT_CFloat64, double)
3056
3057
0
        case GDT_Unknown:
3058
0
        case GDT_TypeCount:
3059
0
            CPLAssert(false);
3060
0
    }
3061
0
}
3062
3063
/************************************************************************/
3064
/*                        GDALUnrolledCopy()                            */
3065
/************************************************************************/
3066
3067
template <class T, int srcStride, int dstStride>
3068
static inline void GDALUnrolledCopyGeneric(T *CPL_RESTRICT pDest,
3069
                                           const T *CPL_RESTRICT pSrc,
3070
                                           GPtrDiff_t nIters)
3071
0
{
3072
0
    if (nIters >= 16)
3073
0
    {
3074
0
        for (GPtrDiff_t i = nIters / 16; i != 0; i--)
3075
0
        {
3076
0
            pDest[0 * dstStride] = pSrc[0 * srcStride];
3077
0
            pDest[1 * dstStride] = pSrc[1 * srcStride];
3078
0
            pDest[2 * dstStride] = pSrc[2 * srcStride];
3079
0
            pDest[3 * dstStride] = pSrc[3 * srcStride];
3080
0
            pDest[4 * dstStride] = pSrc[4 * srcStride];
3081
0
            pDest[5 * dstStride] = pSrc[5 * srcStride];
3082
0
            pDest[6 * dstStride] = pSrc[6 * srcStride];
3083
0
            pDest[7 * dstStride] = pSrc[7 * srcStride];
3084
0
            pDest[8 * dstStride] = pSrc[8 * srcStride];
3085
0
            pDest[9 * dstStride] = pSrc[9 * srcStride];
3086
0
            pDest[10 * dstStride] = pSrc[10 * srcStride];
3087
0
            pDest[11 * dstStride] = pSrc[11 * srcStride];
3088
0
            pDest[12 * dstStride] = pSrc[12 * srcStride];
3089
0
            pDest[13 * dstStride] = pSrc[13 * srcStride];
3090
0
            pDest[14 * dstStride] = pSrc[14 * srcStride];
3091
0
            pDest[15 * dstStride] = pSrc[15 * srcStride];
3092
0
            pDest += 16 * dstStride;
3093
0
            pSrc += 16 * srcStride;
3094
0
        }
3095
0
        nIters = nIters % 16;
3096
0
    }
3097
0
    for (GPtrDiff_t i = 0; i < nIters; i++)
3098
0
    {
3099
0
        pDest[i * dstStride] = *pSrc;
3100
0
        pSrc += srcStride;
3101
0
    }
3102
0
}
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<unsigned char, 3, 1>(unsigned char*, unsigned char const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<unsigned char, 1, 2>(unsigned char*, unsigned char const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<unsigned char, 1, 3>(unsigned char*, unsigned char const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<unsigned char, 1, 4>(unsigned char*, unsigned char const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<short, 2, 1>(short*, short const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<short, 3, 1>(short*, short const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<short, 4, 1>(short*, short const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<short, 1, 2>(short*, short const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<short, 1, 3>(short*, short const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<short, 1, 4>(short*, short const*, long long)
3103
3104
template <class T, int srcStride, int dstStride>
3105
static inline void GDALUnrolledCopy(T *CPL_RESTRICT pDest,
3106
                                    const T *CPL_RESTRICT pSrc,
3107
                                    GPtrDiff_t nIters)
3108
0
{
3109
0
    GDALUnrolledCopyGeneric<T, srcStride, dstStride>(pDest, pSrc, nIters);
3110
0
}
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<unsigned char, 1, 2>(unsigned char*, unsigned char const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<unsigned char, 1, 3>(unsigned char*, unsigned char const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<unsigned char, 1, 4>(unsigned char*, unsigned char const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<short, 2, 1>(short*, short const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<short, 3, 1>(short*, short const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<short, 4, 1>(short*, short const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<short, 1, 2>(short*, short const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<short, 1, 3>(short*, short const*, long long)
Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<short, 1, 4>(short*, short const*, long long)
3111
3112
#ifdef HAVE_SSE2
3113
3114
template <>
3115
void GDALUnrolledCopy<GByte, 2, 1>(GByte *CPL_RESTRICT pDest,
3116
                                   const GByte *CPL_RESTRICT pSrc,
3117
                                   GPtrDiff_t nIters)
3118
0
{
3119
0
    decltype(nIters) i = 0;
3120
0
    if (nIters > 16)
3121
0
    {
3122
0
        const __m128i xmm_mask = _mm_set1_epi16(0xff);
3123
        // If we were sure that there would always be 1 trailing byte, we could
3124
        // check against nIters - 15
3125
0
        for (; i < nIters - 16; i += 16)
3126
0
        {
3127
0
            __m128i xmm0 =
3128
0
                _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 0));
3129
0
            __m128i xmm1 =
3130
0
                _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 16));
3131
            // Set higher 8bit of each int16 packed word to 0
3132
0
            xmm0 = _mm_and_si128(xmm0, xmm_mask);
3133
0
            xmm1 = _mm_and_si128(xmm1, xmm_mask);
3134
            // Pack int16 to uint8 and merge back both vector
3135
0
            xmm0 = _mm_packus_epi16(xmm0, xmm1);
3136
3137
            // Store result
3138
0
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pDest + i), xmm0);
3139
3140
0
            pSrc += 2 * 16;
3141
0
        }
3142
0
    }
3143
0
    for (; i < nIters; i++)
3144
0
    {
3145
0
        pDest[i] = *pSrc;
3146
0
        pSrc += 2;
3147
0
    }
3148
0
}
3149
3150
#ifdef HAVE_SSSE3_AT_COMPILE_TIME
3151
3152
template <>
3153
void GDALUnrolledCopy<GByte, 3, 1>(GByte *CPL_RESTRICT pDest,
3154
                                   const GByte *CPL_RESTRICT pSrc,
3155
                                   GPtrDiff_t nIters)
3156
0
{
3157
0
    if (nIters > 16 && CPLHaveRuntimeSSSE3())
3158
0
    {
3159
0
        GDALUnrolledCopy_GByte_3_1_SSSE3(pDest, pSrc, nIters);
3160
0
    }
3161
0
    else
3162
0
    {
3163
0
        GDALUnrolledCopyGeneric<GByte, 3, 1>(pDest, pSrc, nIters);
3164
0
    }
3165
0
}
3166
3167
#endif
3168
3169
template <>
3170
void GDALUnrolledCopy<GByte, 4, 1>(GByte *CPL_RESTRICT pDest,
3171
                                   const GByte *CPL_RESTRICT pSrc,
3172
                                   GPtrDiff_t nIters)
3173
0
{
3174
0
    decltype(nIters) i = 0;
3175
0
    if (nIters > 16)
3176
0
    {
3177
0
        const __m128i xmm_mask = _mm_set1_epi32(0xff);
3178
        // If we were sure that there would always be 3 trailing bytes, we could
3179
        // check against nIters - 15
3180
0
        for (; i < nIters - 16; i += 16)
3181
0
        {
3182
0
            __m128i xmm0 =
3183
0
                _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 0));
3184
0
            __m128i xmm1 =
3185
0
                _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 16));
3186
0
            __m128i xmm2 =
3187
0
                _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 32));
3188
0
            __m128i xmm3 =
3189
0
                _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 48));
3190
            // Set higher 24bit of each int32 packed word to 0
3191
0
            xmm0 = _mm_and_si128(xmm0, xmm_mask);
3192
0
            xmm1 = _mm_and_si128(xmm1, xmm_mask);
3193
0
            xmm2 = _mm_and_si128(xmm2, xmm_mask);
3194
0
            xmm3 = _mm_and_si128(xmm3, xmm_mask);
3195
            // Pack int32 to int16
3196
0
            xmm0 = _mm_packs_epi32(xmm0, xmm1);
3197
0
            xmm2 = _mm_packs_epi32(xmm2, xmm3);
3198
            // Pack int16 to uint8
3199
0
            xmm0 = _mm_packus_epi16(xmm0, xmm2);
3200
3201
            // Store result
3202
0
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pDest + i), xmm0);
3203
3204
0
            pSrc += 4 * 16;
3205
0
        }
3206
0
    }
3207
0
    for (; i < nIters; i++)
3208
0
    {
3209
0
        pDest[i] = *pSrc;
3210
0
        pSrc += 4;
3211
0
    }
3212
0
}
3213
#endif  // HAVE_SSE2
3214
3215
/************************************************************************/
3216
/*                         GDALFastCopy()                               */
3217
/************************************************************************/
3218
3219
template <class T>
3220
static inline void GDALFastCopy(T *CPL_RESTRICT pDest, int nDestStride,
3221
                                const T *CPL_RESTRICT pSrc, int nSrcStride,
3222
                                GPtrDiff_t nIters)
3223
0
{
3224
0
    constexpr int sizeofT = static_cast<int>(sizeof(T));
3225
0
    if (nIters == 1)
3226
0
    {
3227
0
        *pDest = *pSrc;
3228
0
    }
3229
0
    else if (nDestStride == sizeofT)
3230
0
    {
3231
0
        if (nSrcStride == sizeofT)
3232
0
        {
3233
0
            memcpy(pDest, pSrc, nIters * sizeof(T));
3234
0
        }
3235
0
        else if (nSrcStride == 2 * sizeofT)
3236
0
        {
3237
0
            GDALUnrolledCopy<T, 2, 1>(pDest, pSrc, nIters);
3238
0
        }
3239
0
        else if (nSrcStride == 3 * sizeofT)
3240
0
        {
3241
0
            GDALUnrolledCopy<T, 3, 1>(pDest, pSrc, nIters);
3242
0
        }
3243
0
        else if (nSrcStride == 4 * sizeofT)
3244
0
        {
3245
0
            GDALUnrolledCopy<T, 4, 1>(pDest, pSrc, nIters);
3246
0
        }
3247
0
        else
3248
0
        {
3249
0
            while (nIters-- > 0)
3250
0
            {
3251
0
                *pDest = *pSrc;
3252
0
                pSrc += nSrcStride / sizeofT;
3253
0
                pDest++;
3254
0
            }
3255
0
        }
3256
0
    }
3257
0
    else if (nSrcStride == sizeofT)
3258
0
    {
3259
0
        if (nDestStride == 2 * sizeofT)
3260
0
        {
3261
0
            GDALUnrolledCopy<T, 1, 2>(pDest, pSrc, nIters);
3262
0
        }
3263
0
        else if (nDestStride == 3 * sizeofT)
3264
0
        {
3265
0
            GDALUnrolledCopy<T, 1, 3>(pDest, pSrc, nIters);
3266
0
        }
3267
0
        else if (nDestStride == 4 * sizeofT)
3268
0
        {
3269
0
            GDALUnrolledCopy<T, 1, 4>(pDest, pSrc, nIters);
3270
0
        }
3271
0
        else
3272
0
        {
3273
0
            while (nIters-- > 0)
3274
0
            {
3275
0
                *pDest = *pSrc;
3276
0
                pSrc++;
3277
0
                pDest += nDestStride / sizeofT;
3278
0
            }
3279
0
        }
3280
0
    }
3281
0
    else
3282
0
    {
3283
0
        while (nIters-- > 0)
3284
0
        {
3285
0
            *pDest = *pSrc;
3286
0
            pSrc += nSrcStride / sizeofT;
3287
0
            pDest += nDestStride / sizeofT;
3288
0
        }
3289
0
    }
3290
0
}
Unexecuted instantiation: rasterio.cpp:void GDALFastCopy<unsigned char>(unsigned char*, int, unsigned char const*, int, long long)
Unexecuted instantiation: rasterio.cpp:void GDALFastCopy<short>(short*, int, short const*, int, long long)
3291
3292
/************************************************************************/
3293
/*                         GDALFastCopyByte()                           */
3294
/************************************************************************/
3295
3296
static void GDALFastCopyByte(const GByte *CPL_RESTRICT pSrcData,
3297
                             int nSrcPixelStride, GByte *CPL_RESTRICT pDstData,
3298
                             int nDstPixelStride, GPtrDiff_t nWordCount)
3299
0
{
3300
0
    GDALFastCopy(pDstData, nDstPixelStride, pSrcData, nSrcPixelStride,
3301
0
                 nWordCount);
3302
0
}
3303
3304
/************************************************************************/
3305
/*                           GDALCopyWords()                            */
3306
/************************************************************************/
3307
3308
/**
3309
 * Copy pixel words from buffer to buffer.
3310
 *
3311
 * @see GDALCopyWords64()
3312
 */
3313
void CPL_STDCALL GDALCopyWords(const void *CPL_RESTRICT pSrcData,
3314
                               GDALDataType eSrcType, int nSrcPixelStride,
3315
                               void *CPL_RESTRICT pDstData,
3316
                               GDALDataType eDstType, int nDstPixelStride,
3317
                               int nWordCount)
3318
0
{
3319
0
    GDALCopyWords64(pSrcData, eSrcType, nSrcPixelStride, pDstData, eDstType,
3320
0
                    nDstPixelStride, nWordCount);
3321
0
}
3322
3323
/************************************************************************/
3324
/*                          GDALCopyWords64()                           */
3325
/************************************************************************/
3326
3327
/**
3328
 * Copy pixel words from buffer to buffer.
3329
 *
3330
 * This function is used to copy pixel word values from one memory buffer
3331
 * to another, with support for conversion between data types, and differing
3332
 * step factors. The data type conversion is done using the following
3333
 * rules:
3334
 * <ul>
3335
 * <li>Values assigned to a lower range integer type are clipped. For
3336
 * instance assigning GDT_Int16 values to a GDT_Byte buffer will cause values
3337
 * less the 0 to be set to 0, and values larger than 255 to be set to 255.
3338
 * </li>
3339
 * <li>
3340
 * Assignment from floating point to integer rounds to closest integer.
3341
 * +Infinity is mapped to the largest integer. -Infinity is mapped to the
3342
 * smallest integer. NaN is mapped to 0.
3343
 * </li>
3344
 * <li>
3345
 * Assignment from non-complex to complex will result in the imaginary part
3346
 * being set to zero on output.
3347
 * </li>
3348
 * <li> Assignment from complex to
3349
 * non-complex will result in the complex portion being lost and the real
3350
 * component being preserved (<i>not magnitude!</i>).
3351
 * </li>
3352
 * </ul>
3353
 *
3354
 * No assumptions are made about the source or destination words occurring
3355
 * on word boundaries.  It is assumed that all values are in native machine
3356
 * byte order.
3357
 *
3358
 * @param pSrcData Pointer to source data to be converted.
3359
 * @param eSrcType the source data type (see GDALDataType enum)
3360
 * @param nSrcPixelStride Source pixel stride (i.e. distance between 2 words),
3361
 * in bytes
3362
 * @param pDstData Pointer to buffer where destination data should go
3363
 * @param eDstType the destination data type (see GDALDataType enum)
3364
 * @param nDstPixelStride Destination pixel stride (i.e. distance between 2
3365
 * words), in bytes
3366
 * @param nWordCount number of words to be copied
3367
 *
3368
 * @note
3369
 * When adding a new data type to GDAL, you must do the following to
3370
 * support it properly within the GDALCopyWords function:
3371
 * 1. Add the data type to the switch on eSrcType in GDALCopyWords.
3372
 *    This should invoke the appropriate GDALCopyWordsFromT wrapper.
3373
 * 2. Add the data type to the switch on eDstType in GDALCopyWordsFromT.
3374
 *    This should call the appropriate GDALCopyWordsT template.
3375
 * 3. If appropriate, overload the appropriate CopyWord template in the
3376
 *    above namespace. This will ensure that any conversion issues are
3377
 *    handled (cases like the float -> int32 case, where the min/max)
3378
 *    values are subject to roundoff error.
3379
 */
3380
3381
void CPL_STDCALL GDALCopyWords64(const void *CPL_RESTRICT pSrcData,
3382
                                 GDALDataType eSrcType, int nSrcPixelStride,
3383
                                 void *CPL_RESTRICT pDstData,
3384
                                 GDALDataType eDstType, int nDstPixelStride,
3385
                                 GPtrDiff_t nWordCount)
3386
3387
0
{
3388
    // On platforms where alignment matters, be careful
3389
0
    const int nSrcDataTypeSize = GDALGetDataTypeSizeBytes(eSrcType);
3390
0
    const int nDstDataTypeSize = GDALGetDataTypeSizeBytes(eDstType);
3391
0
    if (CPL_UNLIKELY(nSrcDataTypeSize == 0 || nDstDataTypeSize == 0))
3392
0
    {
3393
0
        CPLError(CE_Failure, CPLE_NotSupported,
3394
0
                 "GDALCopyWords64(): unsupported GDT_Unknown/GDT_TypeCount "
3395
0
                 "argument");
3396
0
        return;
3397
0
    }
3398
0
    if (!(eSrcType == eDstType && nSrcPixelStride == nDstPixelStride) &&
3399
0
        ((reinterpret_cast<uintptr_t>(pSrcData) % nSrcDataTypeSize) != 0 ||
3400
0
         (reinterpret_cast<uintptr_t>(pDstData) % nDstDataTypeSize) != 0 ||
3401
0
         (nSrcPixelStride % nSrcDataTypeSize) != 0 ||
3402
0
         (nDstPixelStride % nDstDataTypeSize) != 0))
3403
0
    {
3404
0
        if (eSrcType == eDstType)
3405
0
        {
3406
0
            for (decltype(nWordCount) i = 0; i < nWordCount; i++)
3407
0
            {
3408
0
                memcpy(static_cast<GByte *>(pDstData) + nDstPixelStride * i,
3409
0
                       static_cast<const GByte *>(pSrcData) +
3410
0
                           nSrcPixelStride * i,
3411
0
                       nDstDataTypeSize);
3412
0
            }
3413
0
        }
3414
0
        else
3415
0
        {
3416
0
            const auto getAlignedPtr = [](GByte *ptr, int align)
3417
0
            {
3418
0
                return ptr +
3419
0
                       ((align - (reinterpret_cast<uintptr_t>(ptr) % align)) %
3420
0
                        align);
3421
0
            };
3422
3423
            // The largest we need is for CFloat64 (16 bytes), so 32 bytes to
3424
            // be sure to get correctly aligned pointer.
3425
0
            constexpr size_t SIZEOF_CFLOAT64 = 2 * sizeof(double);
3426
0
            GByte abySrcBuffer[2 * SIZEOF_CFLOAT64];
3427
0
            GByte abyDstBuffer[2 * SIZEOF_CFLOAT64];
3428
0
            GByte *pabySrcBuffer =
3429
0
                getAlignedPtr(abySrcBuffer, nSrcDataTypeSize);
3430
0
            GByte *pabyDstBuffer =
3431
0
                getAlignedPtr(abyDstBuffer, nDstDataTypeSize);
3432
0
            for (decltype(nWordCount) i = 0; i < nWordCount; i++)
3433
0
            {
3434
0
                memcpy(pabySrcBuffer,
3435
0
                       static_cast<const GByte *>(pSrcData) +
3436
0
                           nSrcPixelStride * i,
3437
0
                       nSrcDataTypeSize);
3438
0
                GDALCopyWords64(pabySrcBuffer, eSrcType, 0, pabyDstBuffer,
3439
0
                                eDstType, 0, 1);
3440
0
                memcpy(static_cast<GByte *>(pDstData) + nDstPixelStride * i,
3441
0
                       pabyDstBuffer, nDstDataTypeSize);
3442
0
            }
3443
0
        }
3444
0
        return;
3445
0
    }
3446
3447
    // Deal with the case where we're replicating a single word into the
3448
    // provided buffer
3449
0
    if (nSrcPixelStride == 0 && nWordCount > 1)
3450
0
    {
3451
0
        GDALReplicateWord(pSrcData, eSrcType, pDstData, eDstType,
3452
0
                          nDstPixelStride, nWordCount);
3453
0
        return;
3454
0
    }
3455
3456
0
    if (eSrcType == eDstType)
3457
0
    {
3458
0
        if (eSrcType == GDT_Byte || eSrcType == GDT_Int8)
3459
0
        {
3460
0
            GDALFastCopy(static_cast<GByte *>(pDstData), nDstPixelStride,
3461
0
                         static_cast<const GByte *>(pSrcData), nSrcPixelStride,
3462
0
                         nWordCount);
3463
0
            return;
3464
0
        }
3465
3466
0
        if (nSrcDataTypeSize == 2 && (nSrcPixelStride % 2) == 0 &&
3467
0
            (nDstPixelStride % 2) == 0)
3468
0
        {
3469
0
            GDALFastCopy(static_cast<short *>(pDstData), nDstPixelStride,
3470
0
                         static_cast<const short *>(pSrcData), nSrcPixelStride,
3471
0
                         nWordCount);
3472
0
            return;
3473
0
        }
3474
3475
0
        if (nWordCount == 1)
3476
0
        {
3477
#if defined(CSA_BUILD) || defined(__COVERITY__)
3478
            // Avoid false positives...
3479
            memcpy(pDstData, pSrcData, nSrcDataTypeSize);
3480
#else
3481
0
            if (nSrcDataTypeSize == 2)
3482
0
                memcpy(pDstData, pSrcData, 2);
3483
0
            else if (nSrcDataTypeSize == 4)
3484
0
                memcpy(pDstData, pSrcData, 4);
3485
0
            else if (nSrcDataTypeSize == 8)
3486
0
                memcpy(pDstData, pSrcData, 8);
3487
0
            else /* if( eSrcType == GDT_CFloat64 ) */
3488
0
                memcpy(pDstData, pSrcData, 16);
3489
0
#endif
3490
0
            return;
3491
0
        }
3492
3493
        // Let memcpy() handle the case where we're copying a packed buffer
3494
        // of pixels.
3495
0
        if (nSrcPixelStride == nDstPixelStride)
3496
0
        {
3497
0
            if (nSrcPixelStride == nSrcDataTypeSize)
3498
0
            {
3499
0
                memcpy(pDstData, pSrcData, nWordCount * nSrcDataTypeSize);
3500
0
                return;
3501
0
            }
3502
0
        }
3503
0
    }
3504
3505
    // Handle the more general case -- deals with conversion of data types
3506
    // directly.
3507
0
    switch (eSrcType)
3508
0
    {
3509
0
        case GDT_Byte:
3510
0
            GDALCopyWordsFromT<unsigned char>(
3511
0
                static_cast<const unsigned char *>(pSrcData), nSrcPixelStride,
3512
0
                false, pDstData, eDstType, nDstPixelStride, nWordCount);
3513
0
            break;
3514
0
        case GDT_Int8:
3515
0
            GDALCopyWordsFromT<signed char>(
3516
0
                static_cast<const signed char *>(pSrcData), nSrcPixelStride,
3517
0
                false, pDstData, eDstType, nDstPixelStride, nWordCount);
3518
0
            break;
3519
0
        case GDT_UInt16:
3520
0
            GDALCopyWordsFromT<unsigned short>(
3521
0
                static_cast<const unsigned short *>(pSrcData), nSrcPixelStride,
3522
0
                false, pDstData, eDstType, nDstPixelStride, nWordCount);
3523
0
            break;
3524
0
        case GDT_Int16:
3525
0
            GDALCopyWordsFromT<short>(static_cast<const short *>(pSrcData),
3526
0
                                      nSrcPixelStride, false, pDstData,
3527
0
                                      eDstType, nDstPixelStride, nWordCount);
3528
0
            break;
3529
0
        case GDT_UInt32:
3530
0
            GDALCopyWordsFromT<unsigned int>(
3531
0
                static_cast<const unsigned int *>(pSrcData), nSrcPixelStride,
3532
0
                false, pDstData, eDstType, nDstPixelStride, nWordCount);
3533
0
            break;
3534
0
        case GDT_Int32:
3535
0
            GDALCopyWordsFromT<int>(static_cast<const int *>(pSrcData),
3536
0
                                    nSrcPixelStride, false, pDstData, eDstType,
3537
0
                                    nDstPixelStride, nWordCount);
3538
0
            break;
3539
0
        case GDT_UInt64:
3540
0
            GDALCopyWordsFromT<std::uint64_t>(
3541
0
                static_cast<const std::uint64_t *>(pSrcData), nSrcPixelStride,
3542
0
                false, pDstData, eDstType, nDstPixelStride, nWordCount);
3543
0
            break;
3544
0
        case GDT_Int64:
3545
0
            GDALCopyWordsFromT<std::int64_t>(
3546
0
                static_cast<const std::int64_t *>(pSrcData), nSrcPixelStride,
3547
0
                false, pDstData, eDstType, nDstPixelStride, nWordCount);
3548
0
            break;
3549
0
        case GDT_Float16:
3550
0
            GDALCopyWordsFromT<GFloat16>(
3551
0
                static_cast<const GFloat16 *>(pSrcData), nSrcPixelStride, false,
3552
0
                pDstData, eDstType, nDstPixelStride, nWordCount);
3553
0
            break;
3554
0
        case GDT_Float32:
3555
0
            GDALCopyWordsFromT<float>(static_cast<const float *>(pSrcData),
3556
0
                                      nSrcPixelStride, false, pDstData,
3557
0
                                      eDstType, nDstPixelStride, nWordCount);
3558
0
            break;
3559
0
        case GDT_Float64:
3560
0
            GDALCopyWordsFromT<double>(static_cast<const double *>(pSrcData),
3561
0
                                       nSrcPixelStride, false, pDstData,
3562
0
                                       eDstType, nDstPixelStride, nWordCount);
3563
0
            break;
3564
0
        case GDT_CInt16:
3565
0
            GDALCopyWordsFromT<short>(static_cast<const short *>(pSrcData),
3566
0
                                      nSrcPixelStride, true, pDstData, eDstType,
3567
0
                                      nDstPixelStride, nWordCount);
3568
0
            break;
3569
0
        case GDT_CInt32:
3570
0
            GDALCopyWordsFromT<int>(static_cast<const int *>(pSrcData),
3571
0
                                    nSrcPixelStride, true, pDstData, eDstType,
3572
0
                                    nDstPixelStride, nWordCount);
3573
0
            break;
3574
0
        case GDT_CFloat16:
3575
0
            GDALCopyWordsFromT<GFloat16>(
3576
0
                static_cast<const GFloat16 *>(pSrcData), nSrcPixelStride, true,
3577
0
                pDstData, eDstType, nDstPixelStride, nWordCount);
3578
0
            break;
3579
0
        case GDT_CFloat32:
3580
0
            GDALCopyWordsFromT<float>(static_cast<const float *>(pSrcData),
3581
0
                                      nSrcPixelStride, true, pDstData, eDstType,
3582
0
                                      nDstPixelStride, nWordCount);
3583
0
            break;
3584
0
        case GDT_CFloat64:
3585
0
            GDALCopyWordsFromT<double>(static_cast<const double *>(pSrcData),
3586
0
                                       nSrcPixelStride, true, pDstData,
3587
0
                                       eDstType, nDstPixelStride, nWordCount);
3588
0
            break;
3589
0
        case GDT_Unknown:
3590
0
        case GDT_TypeCount:
3591
0
            CPLAssert(false);
3592
0
    }
3593
0
}
3594
3595
/************************************************************************/
3596
/*                            GDALCopyBits()                            */
3597
/************************************************************************/
3598
3599
/**
3600
 * Bitwise word copying.
3601
 *
3602
 * A function for moving sets of partial bytes around.  Loosely
3603
 * speaking this is a bitwise analog to GDALCopyWords().
3604
 *
3605
 * It copies nStepCount "words" where each word is nBitCount bits long.
3606
 * The nSrcStep and nDstStep are the number of bits from the start of one
3607
 * word to the next (same as nBitCount if they are packed).  The nSrcOffset
3608
 * and nDstOffset are the offset into the source and destination buffers
3609
 * to start at, also measured in bits.
3610
 *
3611
 * All bit offsets are assumed to start from the high order bit in a byte
3612
 * (i.e. most significant bit first).  Currently this function is not very
3613
 * optimized, but it may be improved for some common cases in the future
3614
 * as needed.
3615
 *
3616
 * @param pabySrcData the source data buffer.
3617
 * @param nSrcOffset the offset (in bits) in pabySrcData to the start of the
3618
 * first word to copy.
3619
 * @param nSrcStep the offset in bits from the start one source word to the
3620
 * start of the next.
3621
 * @param pabyDstData the destination data buffer.
3622
 * @param nDstOffset the offset (in bits) in pabyDstData to the start of the
3623
 * first word to copy over.
3624
 * @param nDstStep the offset in bits from the start one word to the
3625
 * start of the next.
3626
 * @param nBitCount the number of bits in a word to be copied.
3627
 * @param nStepCount the number of words to copy.
3628
 */
3629
3630
void GDALCopyBits(const GByte *pabySrcData, int nSrcOffset, int nSrcStep,
3631
                  GByte *pabyDstData, int nDstOffset, int nDstStep,
3632
                  int nBitCount, int nStepCount)
3633
3634
0
{
3635
0
    VALIDATE_POINTER0(pabySrcData, "GDALCopyBits");
3636
3637
0
    for (int iStep = 0; iStep < nStepCount; iStep++)
3638
0
    {
3639
0
        for (int iBit = 0; iBit < nBitCount; iBit++)
3640
0
        {
3641
0
            if (pabySrcData[nSrcOffset >> 3] & (0x80 >> (nSrcOffset & 7)))
3642
0
                pabyDstData[nDstOffset >> 3] |= (0x80 >> (nDstOffset & 7));
3643
0
            else
3644
0
                pabyDstData[nDstOffset >> 3] &= ~(0x80 >> (nDstOffset & 7));
3645
3646
0
            nSrcOffset++;
3647
0
            nDstOffset++;
3648
0
        }
3649
3650
0
        nSrcOffset += (nSrcStep - nBitCount);
3651
0
        nDstOffset += (nDstStep - nBitCount);
3652
0
    }
3653
0
}
3654
3655
/************************************************************************/
3656
/*                    GDALGetBestOverviewLevel()                        */
3657
/*                                                                      */
3658
/* Returns the best overview level to satisfy the query or -1 if none   */
3659
/* Also updates nXOff, nYOff, nXSize, nYSize and psExtraArg when        */
3660
/* returning a valid overview level                                     */
3661
/************************************************************************/
3662
3663
int GDALBandGetBestOverviewLevel(GDALRasterBand *poBand, int &nXOff, int &nYOff,
3664
                                 int &nXSize, int &nYSize, int nBufXSize,
3665
                                 int nBufYSize)
3666
0
{
3667
0
    return GDALBandGetBestOverviewLevel2(poBand, nXOff, nYOff, nXSize, nYSize,
3668
0
                                         nBufXSize, nBufYSize, nullptr);
3669
0
}
3670
3671
int GDALBandGetBestOverviewLevel2(GDALRasterBand *poBand, int &nXOff,
3672
                                  int &nYOff, int &nXSize, int &nYSize,
3673
                                  int nBufXSize, int nBufYSize,
3674
                                  GDALRasterIOExtraArg *psExtraArg)
3675
0
{
3676
0
    if (psExtraArg != nullptr && psExtraArg->nVersion > 1 &&
3677
0
        psExtraArg->bUseOnlyThisScale)
3678
0
        return -1;
3679
    /* -------------------------------------------------------------------- */
3680
    /*      Compute the desired downsampling factor.  It is                 */
3681
    /*      based on the least reduced axis, and represents the number      */
3682
    /*      of source pixels to one destination pixel.                      */
3683
    /* -------------------------------------------------------------------- */
3684
0
    const double dfDesiredDownsamplingFactor =
3685
0
        ((nXSize / static_cast<double>(nBufXSize)) <
3686
0
             (nYSize / static_cast<double>(nBufYSize)) ||
3687
0
         nBufYSize == 1)
3688
0
            ? nXSize / static_cast<double>(nBufXSize)
3689
0
            : nYSize / static_cast<double>(nBufYSize);
3690
3691
    /* -------------------------------------------------------------------- */
3692
    /*      Find the overview level that largest downsampling factor (most  */
3693
    /*      downsampled) that is still less than (or only a little more)    */
3694
    /*      downsampled than the request.                                   */
3695
    /* -------------------------------------------------------------------- */
3696
0
    const int nOverviewCount = poBand->GetOverviewCount();
3697
0
    GDALRasterBand *poBestOverview = nullptr;
3698
0
    double dfBestDownsamplingFactor = 0;
3699
0
    int nBestOverviewLevel = -1;
3700
3701
0
    const char *pszOversampligThreshold =
3702
0
        CPLGetConfigOption("GDAL_OVERVIEW_OVERSAMPLING_THRESHOLD", nullptr);
3703
3704
    // Note: keep this logic for overview selection in sync between
3705
    // gdalwarp_lib.cpp and rasterio.cpp
3706
    // Cf https://github.com/OSGeo/gdal/pull/9040#issuecomment-1898524693
3707
0
    const double dfOversamplingThreshold =
3708
0
        pszOversampligThreshold ? CPLAtof(pszOversampligThreshold)
3709
0
        : psExtraArg && psExtraArg->eResampleAlg != GRIORA_NearestNeighbour
3710
0
            ? 1.0
3711
0
            : 1.2;
3712
0
    for (int iOverview = 0; iOverview < nOverviewCount; iOverview++)
3713
0
    {
3714
0
        GDALRasterBand *poOverview = poBand->GetOverview(iOverview);
3715
0
        if (poOverview == nullptr ||
3716
0
            poOverview->GetXSize() > poBand->GetXSize() ||
3717
0
            poOverview->GetYSize() > poBand->GetYSize())
3718
0
        {
3719
0
            continue;
3720
0
        }
3721
3722
        // Compute downsampling factor of this overview
3723
0
        const double dfDownsamplingFactor = std::min(
3724
0
            poBand->GetXSize() / static_cast<double>(poOverview->GetXSize()),
3725
0
            poBand->GetYSize() / static_cast<double>(poOverview->GetYSize()));
3726
3727
        // Is it nearly the requested factor and better (lower) than
3728
        // the current best factor?
3729
        // Use an epsilon because of numerical instability.
3730
0
        constexpr double EPSILON = 1e-1;
3731
0
        if (dfDownsamplingFactor >=
3732
0
                dfDesiredDownsamplingFactor * dfOversamplingThreshold +
3733
0
                    EPSILON ||
3734
0
            dfDownsamplingFactor <= dfBestDownsamplingFactor)
3735
0
        {
3736
0
            continue;
3737
0
        }
3738
3739
        // Ignore AVERAGE_BIT2GRAYSCALE overviews for RasterIO purposes.
3740
0
        const char *pszResampling = poOverview->GetMetadataItem("RESAMPLING");
3741
3742
0
        if (pszResampling != nullptr &&
3743
0
            STARTS_WITH_CI(pszResampling, "AVERAGE_BIT2"))
3744
0
            continue;
3745
3746
        // OK, this is our new best overview.
3747
0
        poBestOverview = poOverview;
3748
0
        nBestOverviewLevel = iOverview;
3749
0
        dfBestDownsamplingFactor = dfDownsamplingFactor;
3750
3751
0
        if (std::abs(dfDesiredDownsamplingFactor - dfDownsamplingFactor) <
3752
0
            EPSILON)
3753
0
        {
3754
0
            break;
3755
0
        }
3756
0
    }
3757
3758
    /* -------------------------------------------------------------------- */
3759
    /*      If we didn't find an overview that helps us, just return        */
3760
    /*      indicating failure and the full resolution image will be used.  */
3761
    /* -------------------------------------------------------------------- */
3762
0
    if (nBestOverviewLevel < 0)
3763
0
        return -1;
3764
3765
    /* -------------------------------------------------------------------- */
3766
    /*      Recompute the source window in terms of the selected            */
3767
    /*      overview.                                                       */
3768
    /* -------------------------------------------------------------------- */
3769
0
    const double dfXFactor =
3770
0
        poBand->GetXSize() / static_cast<double>(poBestOverview->GetXSize());
3771
0
    const double dfYFactor =
3772
0
        poBand->GetYSize() / static_cast<double>(poBestOverview->GetYSize());
3773
0
    CPLDebug("GDAL", "Selecting overview %d x %d", poBestOverview->GetXSize(),
3774
0
             poBestOverview->GetYSize());
3775
3776
0
    const int nOXOff = std::min(poBestOverview->GetXSize() - 1,
3777
0
                                static_cast<int>(nXOff / dfXFactor + 0.5));
3778
0
    const int nOYOff = std::min(poBestOverview->GetYSize() - 1,
3779
0
                                static_cast<int>(nYOff / dfYFactor + 0.5));
3780
0
    int nOXSize = std::max(1, static_cast<int>(nXSize / dfXFactor + 0.5));
3781
0
    int nOYSize = std::max(1, static_cast<int>(nYSize / dfYFactor + 0.5));
3782
0
    if (nOXOff + nOXSize > poBestOverview->GetXSize())
3783
0
        nOXSize = poBestOverview->GetXSize() - nOXOff;
3784
0
    if (nOYOff + nOYSize > poBestOverview->GetYSize())
3785
0
        nOYSize = poBestOverview->GetYSize() - nOYOff;
3786
3787
0
    if (psExtraArg)
3788
0
    {
3789
0
        if (psExtraArg->bFloatingPointWindowValidity)
3790
0
        {
3791
0
            psExtraArg->dfXOff /= dfXFactor;
3792
0
            psExtraArg->dfXSize /= dfXFactor;
3793
0
            psExtraArg->dfYOff /= dfYFactor;
3794
0
            psExtraArg->dfYSize /= dfYFactor;
3795
0
        }
3796
0
        else if (psExtraArg->eResampleAlg != GRIORA_NearestNeighbour)
3797
0
        {
3798
0
            psExtraArg->bFloatingPointWindowValidity = true;
3799
0
            psExtraArg->dfXOff = nXOff / dfXFactor;
3800
0
            psExtraArg->dfXSize = nXSize / dfXFactor;
3801
0
            psExtraArg->dfYOff = nYOff / dfYFactor;
3802
0
            psExtraArg->dfYSize = nYSize / dfYFactor;
3803
0
        }
3804
0
    }
3805
3806
0
    nXOff = nOXOff;
3807
0
    nYOff = nOYOff;
3808
0
    nXSize = nOXSize;
3809
0
    nYSize = nOYSize;
3810
3811
0
    return nBestOverviewLevel;
3812
0
}
3813
3814
/************************************************************************/
3815
/*                          OverviewRasterIO()                          */
3816
/*                                                                      */
3817
/*      Special work function to utilize available overviews to         */
3818
/*      more efficiently satisfy downsampled requests.  It will         */
3819
/*      return CE_Failure if there are no appropriate overviews         */
3820
/*      available but it doesn't emit any error messages.               */
3821
/************************************************************************/
3822
3823
//! @cond Doxygen_Suppress
3824
CPLErr GDALRasterBand::OverviewRasterIO(
3825
    GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize,
3826
    void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
3827
    GSpacing nPixelSpace, GSpacing nLineSpace, GDALRasterIOExtraArg *psExtraArg)
3828
3829
0
{
3830
0
    GDALRasterIOExtraArg sExtraArg;
3831
0
    GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
3832
3833
0
    const int nOverview = GDALBandGetBestOverviewLevel2(
3834
0
        this, nXOff, nYOff, nXSize, nYSize, nBufXSize, nBufYSize, &sExtraArg);
3835
0
    if (nOverview < 0)
3836
0
        return CE_Failure;
3837
3838
    /* -------------------------------------------------------------------- */
3839
    /*      Recast the call in terms of the new raster layer.               */
3840
    /* -------------------------------------------------------------------- */
3841
0
    GDALRasterBand *poOverviewBand = GetOverview(nOverview);
3842
0
    if (poOverviewBand == nullptr)
3843
0
        return CE_Failure;
3844
3845
0
    return poOverviewBand->RasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize,
3846
0
                                    pData, nBufXSize, nBufYSize, eBufType,
3847
0
                                    nPixelSpace, nLineSpace, &sExtraArg);
3848
0
}
3849
3850
/************************************************************************/
3851
/*                      TryOverviewRasterIO()                           */
3852
/************************************************************************/
3853
3854
CPLErr GDALRasterBand::TryOverviewRasterIO(
3855
    GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize,
3856
    void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
3857
    GSpacing nPixelSpace, GSpacing nLineSpace, GDALRasterIOExtraArg *psExtraArg,
3858
    int *pbTried)
3859
0
{
3860
0
    int nXOffMod = nXOff;
3861
0
    int nYOffMod = nYOff;
3862
0
    int nXSizeMod = nXSize;
3863
0
    int nYSizeMod = nYSize;
3864
0
    GDALRasterIOExtraArg sExtraArg;
3865
3866
0
    GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
3867
3868
0
    int iOvrLevel = GDALBandGetBestOverviewLevel2(
3869
0
        this, nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, nBufXSize, nBufYSize,
3870
0
        &sExtraArg);
3871
3872
0
    if (iOvrLevel >= 0)
3873
0
    {
3874
0
        GDALRasterBand *poOverviewBand = GetOverview(iOvrLevel);
3875
0
        if (poOverviewBand)
3876
0
        {
3877
0
            *pbTried = TRUE;
3878
0
            return poOverviewBand->RasterIO(
3879
0
                eRWFlag, nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, pData,
3880
0
                nBufXSize, nBufYSize, eBufType, nPixelSpace, nLineSpace,
3881
0
                &sExtraArg);
3882
0
        }
3883
0
    }
3884
3885
0
    *pbTried = FALSE;
3886
0
    return CE_None;
3887
0
}
3888
3889
/************************************************************************/
3890
/*                      TryOverviewRasterIO()                           */
3891
/************************************************************************/
3892
3893
CPLErr GDALDataset::TryOverviewRasterIO(
3894
    GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize,
3895
    void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
3896
    int nBandCount, const int *panBandMap, GSpacing nPixelSpace,
3897
    GSpacing nLineSpace, GSpacing nBandSpace, GDALRasterIOExtraArg *psExtraArg,
3898
    int *pbTried)
3899
0
{
3900
0
    int nXOffMod = nXOff;
3901
0
    int nYOffMod = nYOff;
3902
0
    int nXSizeMod = nXSize;
3903
0
    int nYSizeMod = nYSize;
3904
0
    GDALRasterIOExtraArg sExtraArg;
3905
0
    GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
3906
3907
0
    int iOvrLevel = GDALBandGetBestOverviewLevel2(
3908
0
        papoBands[0], nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, nBufXSize,
3909
0
        nBufYSize, &sExtraArg);
3910
3911
0
    if (iOvrLevel >= 0 && papoBands[0]->GetOverview(iOvrLevel) != nullptr &&
3912
0
        papoBands[0]->GetOverview(iOvrLevel)->GetDataset() != nullptr)
3913
0
    {
3914
0
        *pbTried = TRUE;
3915
0
        return papoBands[0]->GetOverview(iOvrLevel)->GetDataset()->RasterIO(
3916
0
            eRWFlag, nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, pData, nBufXSize,
3917
0
            nBufYSize, eBufType, nBandCount, panBandMap, nPixelSpace,
3918
0
            nLineSpace, nBandSpace, &sExtraArg);
3919
0
    }
3920
0
    else
3921
0
    {
3922
0
        *pbTried = FALSE;
3923
0
        return CE_None;
3924
0
    }
3925
0
}
3926
3927
/************************************************************************/
3928
/*                        GetBestOverviewLevel()                        */
3929
/*                                                                      */
3930
/* Returns the best overview level to satisfy the query or -1 if none   */
3931
/* Also updates nXOff, nYOff, nXSize, nYSize when returning a valid     */
3932
/* overview level                                                       */
3933
/************************************************************************/
3934
3935
static int GDALDatasetGetBestOverviewLevel(GDALDataset *poDS, int &nXOff,
3936
                                           int &nYOff, int &nXSize, int &nYSize,
3937
                                           int nBufXSize, int nBufYSize,
3938
                                           int nBandCount,
3939
                                           const int *panBandMap,
3940
                                           GDALRasterIOExtraArg *psExtraArg)
3941
0
{
3942
0
    int nOverviewCount = 0;
3943
0
    GDALRasterBand *poFirstBand = nullptr;
3944
3945
    /* -------------------------------------------------------------------- */
3946
    /* Check that all bands have the same number of overviews and           */
3947
    /* that they have all the same size and block dimensions                */
3948
    /* -------------------------------------------------------------------- */
3949
0
    for (int iBand = 0; iBand < nBandCount; iBand++)
3950
0
    {
3951
0
        GDALRasterBand *poBand = poDS->GetRasterBand(panBandMap[iBand]);
3952
0
        if (poBand == nullptr)
3953
0
            return -1;
3954
0
        if (iBand == 0)
3955
0
        {
3956
0
            poFirstBand = poBand;
3957
0
            nOverviewCount = poBand->GetOverviewCount();
3958
0
        }
3959
0
        else if (nOverviewCount != poBand->GetOverviewCount())
3960
0
        {
3961
0
            CPLDebug("GDAL", "GDALDataset::GetBestOverviewLevel() ... "
3962
0
                             "mismatched overview count, use std method.");
3963
0
            return -1;
3964
0
        }
3965
0
        else
3966
0
        {
3967
0
            for (int iOverview = 0; iOverview < nOverviewCount; iOverview++)
3968
0
            {
3969
0
                GDALRasterBand *poOvrBand = poBand->GetOverview(iOverview);
3970
0
                GDALRasterBand *poOvrFirstBand =
3971
0
                    poFirstBand->GetOverview(iOverview);
3972
0
                if (poOvrBand == nullptr || poOvrFirstBand == nullptr)
3973
0
                    continue;
3974
3975
0
                if (poOvrFirstBand->GetXSize() != poOvrBand->GetXSize() ||
3976
0
                    poOvrFirstBand->GetYSize() != poOvrBand->GetYSize())
3977
0
                {
3978
0
                    CPLDebug("GDAL",
3979
0
                             "GDALDataset::GetBestOverviewLevel() ... "
3980
0
                             "mismatched overview sizes, use std method.");
3981
0
                    return -1;
3982
0
                }
3983
0
                int nBlockXSizeFirst = 0;
3984
0
                int nBlockYSizeFirst = 0;
3985
0
                poOvrFirstBand->GetBlockSize(&nBlockXSizeFirst,
3986
0
                                             &nBlockYSizeFirst);
3987
3988
0
                int nBlockXSizeCurrent = 0;
3989
0
                int nBlockYSizeCurrent = 0;
3990
0
                poOvrBand->GetBlockSize(&nBlockXSizeCurrent,
3991
0
                                        &nBlockYSizeCurrent);
3992
3993
0
                if (nBlockXSizeFirst != nBlockXSizeCurrent ||
3994
0
                    nBlockYSizeFirst != nBlockYSizeCurrent)
3995
0
                {
3996
0
                    CPLDebug("GDAL", "GDALDataset::GetBestOverviewLevel() ... "
3997
0
                                     "mismatched block sizes, use std method.");
3998
0
                    return -1;
3999
0
                }
4000
0
            }
4001
0
        }
4002
0
    }
4003
0
    if (poFirstBand == nullptr)
4004
0
        return -1;
4005
4006
0
    return GDALBandGetBestOverviewLevel2(poFirstBand, nXOff, nYOff, nXSize,
4007
0
                                         nYSize, nBufXSize, nBufYSize,
4008
0
                                         psExtraArg);
4009
0
}
4010
4011
/************************************************************************/
4012
/*                         BlockBasedRasterIO()                         */
4013
/*                                                                      */
4014
/*      This convenience function implements a dataset level            */
4015
/*      RasterIO() interface based on calling down to fetch blocks,     */
4016
/*      much like the GDALRasterBand::IRasterIO(), but it handles       */
4017
/*      all bands at once, so that a format driver that handles a       */
4018
/*      request for different bands of the same block efficiently       */
4019
/*      (i.e. without re-reading interleaved data) will efficiently.    */
4020
/*                                                                      */
4021
/*      This method is intended to be called by an overridden           */
4022
/*      IRasterIO() method in the driver specific GDALDataset           */
4023
/*      derived class.                                                  */
4024
/*                                                                      */
4025
/*      Default internal implementation of RasterIO() ... utilizes      */
4026
/*      the Block access methods to satisfy the request.  This would    */
4027
/*      normally only be overridden by formats with overviews.          */
4028
/*                                                                      */
4029
/*      To keep things relatively simple, this method does not          */
4030
/*      currently take advantage of some special cases addressed in     */
4031
/*      GDALRasterBand::IRasterIO(), so it is likely best to only       */
4032
/*      call it when you know it will help.  That is in cases where     */
4033
/*      data is at 1:1 to the buffer, and you know the driver is        */
4034
/*      implementing interleaved IO efficiently on a block by block     */
4035
/*      basis. Overviews will be used when possible.                    */
4036
/************************************************************************/
4037
4038
CPLErr GDALDataset::BlockBasedRasterIO(
4039
    GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize,
4040
    void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
4041
    int nBandCount, const int *panBandMap, GSpacing nPixelSpace,
4042
    GSpacing nLineSpace, GSpacing nBandSpace, GDALRasterIOExtraArg *psExtraArg)
4043
4044
0
{
4045
0
    CPLAssert(nullptr != pData);
4046
4047
0
    GByte **papabySrcBlock = nullptr;
4048
0
    GDALRasterBlock *poBlock = nullptr;
4049
0
    GDALRasterBlock **papoBlocks = nullptr;
4050
0
    int nLBlockX = -1;
4051
0
    int nLBlockY = -1;
4052
0
    int iBufYOff;
4053
0
    int iBufXOff;
4054
0
    int nBlockXSize = 1;
4055
0
    int nBlockYSize = 1;
4056
0
    CPLErr eErr = CE_None;
4057
0
    GDALDataType eDataType = GDT_Byte;
4058
4059
0
    const bool bUseIntegerRequestCoords =
4060
0
        (!psExtraArg->bFloatingPointWindowValidity ||
4061
0
         (nXOff == psExtraArg->dfXOff && nYOff == psExtraArg->dfYOff &&
4062
0
          nXSize == psExtraArg->dfXSize && nYSize == psExtraArg->dfYSize));
4063
4064
    /* -------------------------------------------------------------------- */
4065
    /*      Ensure that all bands share a common block size and data type.  */
4066
    /* -------------------------------------------------------------------- */
4067
0
    for (int iBand = 0; iBand < nBandCount; iBand++)
4068
0
    {
4069
0
        GDALRasterBand *poBand = GetRasterBand(panBandMap[iBand]);
4070
4071
0
        if (iBand == 0)
4072
0
        {
4073
0
            poBand->GetBlockSize(&nBlockXSize, &nBlockYSize);
4074
0
            eDataType = poBand->GetRasterDataType();
4075
0
        }
4076
0
        else
4077
0
        {
4078
0
            int nThisBlockXSize = 0;
4079
0
            int nThisBlockYSize = 0;
4080
0
            poBand->GetBlockSize(&nThisBlockXSize, &nThisBlockYSize);
4081
0
            if (nThisBlockXSize != nBlockXSize ||
4082
0
                nThisBlockYSize != nBlockYSize)
4083
0
            {
4084
0
                CPLDebug("GDAL", "GDALDataset::BlockBasedRasterIO() ... "
4085
0
                                 "mismatched block sizes, use std method.");
4086
0
                return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize,
4087
0
                                         pData, nBufXSize, nBufYSize, eBufType,
4088
0
                                         nBandCount, panBandMap, nPixelSpace,
4089
0
                                         nLineSpace, nBandSpace, psExtraArg);
4090
0
            }
4091
4092
0
            if (eDataType != poBand->GetRasterDataType() &&
4093
0
                (nXSize != nBufXSize || nYSize != nBufYSize))
4094
0
            {
4095
0
                CPLDebug("GDAL", "GDALDataset::BlockBasedRasterIO() ... "
4096
0
                                 "mismatched band data types, use std method.");
4097
0
                return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize,
4098
0
                                         pData, nBufXSize, nBufYSize, eBufType,
4099
0
                                         nBandCount, panBandMap, nPixelSpace,
4100
0
                                         nLineSpace, nBandSpace, psExtraArg);
4101
0
            }
4102
0
        }
4103
0
    }
4104
4105
    /* ==================================================================== */
4106
    /*      In this special case at full resolution we step through in      */
4107
    /*      blocks, turning the request over to the per-band                */
4108
    /*      IRasterIO(), but ensuring that all bands of one block are       */
4109
    /*      called before proceeding to the next.                           */
4110
    /* ==================================================================== */
4111
4112
0
    if (nXSize == nBufXSize && nYSize == nBufYSize && bUseIntegerRequestCoords)
4113
0
    {
4114
0
        GDALRasterIOExtraArg sDummyExtraArg;
4115
0
        INIT_RASTERIO_EXTRA_ARG(sDummyExtraArg);
4116
4117
0
        int nChunkYSize = 0;
4118
0
        int nChunkXSize = 0;
4119
4120
0
        for (iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff += nChunkYSize)
4121
0
        {
4122
0
            const int nChunkYOff = iBufYOff + nYOff;
4123
0
            nChunkYSize = nBlockYSize - (nChunkYOff % nBlockYSize);
4124
0
            if (nChunkYOff + nChunkYSize > nYOff + nYSize)
4125
0
                nChunkYSize = (nYOff + nYSize) - nChunkYOff;
4126
4127
0
            for (iBufXOff = 0; iBufXOff < nBufXSize; iBufXOff += nChunkXSize)
4128
0
            {
4129
0
                const int nChunkXOff = iBufXOff + nXOff;
4130
0
                nChunkXSize = nBlockXSize - (nChunkXOff % nBlockXSize);
4131
0
                if (nChunkXOff + nChunkXSize > nXOff + nXSize)
4132
0
                    nChunkXSize = (nXOff + nXSize) - nChunkXOff;
4133
4134
0
                GByte *pabyChunkData =
4135
0
                    static_cast<GByte *>(pData) + iBufXOff * nPixelSpace +
4136
0
                    static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace;
4137
4138
0
                for (int iBand = 0; iBand < nBandCount; iBand++)
4139
0
                {
4140
0
                    GDALRasterBand *poBand = GetRasterBand(panBandMap[iBand]);
4141
4142
0
                    eErr = poBand->IRasterIO(
4143
0
                        eRWFlag, nChunkXOff, nChunkYOff, nChunkXSize,
4144
0
                        nChunkYSize,
4145
0
                        pabyChunkData +
4146
0
                            static_cast<GPtrDiff_t>(iBand) * nBandSpace,
4147
0
                        nChunkXSize, nChunkYSize, eBufType, nPixelSpace,
4148
0
                        nLineSpace, &sDummyExtraArg);
4149
0
                    if (eErr != CE_None)
4150
0
                        return eErr;
4151
0
                }
4152
0
            }
4153
4154
0
            if (psExtraArg->pfnProgress != nullptr &&
4155
0
                !psExtraArg->pfnProgress(
4156
0
                    1.0 * std::min(nBufYSize, iBufYOff + nChunkYSize) /
4157
0
                        nBufYSize,
4158
0
                    "", psExtraArg->pProgressData))
4159
0
            {
4160
0
                return CE_Failure;
4161
0
            }
4162
0
        }
4163
4164
0
        return CE_None;
4165
0
    }
4166
4167
    /* Below code is not compatible with that case. It would need a complete */
4168
    /* separate code like done in GDALRasterBand::IRasterIO. */
4169
0
    if (eRWFlag == GF_Write && (nBufXSize < nXSize || nBufYSize < nYSize))
4170
0
    {
4171
0
        return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize, pData,
4172
0
                                 nBufXSize, nBufYSize, eBufType, nBandCount,
4173
0
                                 panBandMap, nPixelSpace, nLineSpace,
4174
0
                                 nBandSpace, psExtraArg);
4175
0
    }
4176
4177
    /* We could have a smarter implementation, but that will do for now */
4178
0
    if (psExtraArg->eResampleAlg != GRIORA_NearestNeighbour &&
4179
0
        (nBufXSize != nXSize || nBufYSize != nYSize))
4180
0
    {
4181
0
        return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize, pData,
4182
0
                                 nBufXSize, nBufYSize, eBufType, nBandCount,
4183
0
                                 panBandMap, nPixelSpace, nLineSpace,
4184
0
                                 nBandSpace, psExtraArg);
4185
0
    }
4186
4187
    /* ==================================================================== */
4188
    /*      Loop reading required source blocks to satisfy output           */
4189
    /*      request.  This is the most general implementation.              */
4190
    /* ==================================================================== */
4191
4192
0
    const int nBandDataSize = GDALGetDataTypeSizeBytes(eDataType);
4193
4194
0
    papabySrcBlock =
4195
0
        static_cast<GByte **>(CPLCalloc(sizeof(GByte *), nBandCount));
4196
0
    papoBlocks =
4197
0
        static_cast<GDALRasterBlock **>(CPLCalloc(sizeof(void *), nBandCount));
4198
4199
    /* -------------------------------------------------------------------- */
4200
    /*      Select an overview level if appropriate.                        */
4201
    /* -------------------------------------------------------------------- */
4202
4203
0
    GDALRasterIOExtraArg sExtraArg;
4204
0
    GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
4205
0
    const int nOverviewLevel = GDALDatasetGetBestOverviewLevel(
4206
0
        this, nXOff, nYOff, nXSize, nYSize, nBufXSize, nBufYSize, nBandCount,
4207
0
        panBandMap, &sExtraArg);
4208
0
    if (nOverviewLevel >= 0)
4209
0
    {
4210
0
        GetRasterBand(panBandMap[0])
4211
0
            ->GetOverview(nOverviewLevel)
4212
0
            ->GetBlockSize(&nBlockXSize, &nBlockYSize);
4213
0
    }
4214
4215
0
    double dfXOff = nXOff;
4216
0
    double dfYOff = nYOff;
4217
0
    double dfXSize = nXSize;
4218
0
    double dfYSize = nYSize;
4219
0
    if (sExtraArg.bFloatingPointWindowValidity)
4220
0
    {
4221
0
        dfXOff = sExtraArg.dfXOff;
4222
0
        dfYOff = sExtraArg.dfYOff;
4223
0
        dfXSize = sExtraArg.dfXSize;
4224
0
        dfYSize = sExtraArg.dfYSize;
4225
0
    }
4226
4227
    /* -------------------------------------------------------------------- */
4228
    /*      Compute stepping increment.                                     */
4229
    /* -------------------------------------------------------------------- */
4230
0
    const double dfSrcXInc = dfXSize / static_cast<double>(nBufXSize);
4231
0
    const double dfSrcYInc = dfYSize / static_cast<double>(nBufYSize);
4232
4233
0
    constexpr double EPS = 1e-10;
4234
    /* -------------------------------------------------------------------- */
4235
    /*      Loop over buffer computing source locations.                    */
4236
    /* -------------------------------------------------------------------- */
4237
0
    for (iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff++)
4238
0
    {
4239
0
        GPtrDiff_t iSrcOffset;
4240
4241
        // Add small epsilon to avoid some numeric precision issues.
4242
0
        const double dfSrcY = (iBufYOff + 0.5) * dfSrcYInc + dfYOff + EPS;
4243
0
        const int iSrcY = static_cast<int>(std::min(
4244
0
            std::max(0.0, dfSrcY), static_cast<double>(nRasterYSize - 1)));
4245
4246
0
        GPtrDiff_t iBufOffset = static_cast<GPtrDiff_t>(iBufYOff) *
4247
0
                                static_cast<GPtrDiff_t>(nLineSpace);
4248
4249
0
        for (iBufXOff = 0; iBufXOff < nBufXSize; iBufXOff++)
4250
0
        {
4251
0
            const double dfSrcX = (iBufXOff + 0.5) * dfSrcXInc + dfXOff + EPS;
4252
0
            const int iSrcX = static_cast<int>(std::min(
4253
0
                std::max(0.0, dfSrcX), static_cast<double>(nRasterXSize - 1)));
4254
4255
            // FIXME: this code likely doesn't work if the dirty block gets
4256
            // flushed to disk before being completely written. In the meantime,
4257
            // bJustInitialize should probably be set to FALSE even if it is not
4258
            // ideal performance wise, and for lossy compression
4259
4260
            /* --------------------------------------------------------------------
4261
             */
4262
            /*      Ensure we have the appropriate block loaded. */
4263
            /* --------------------------------------------------------------------
4264
             */
4265
0
            if (iSrcX < nLBlockX * nBlockXSize ||
4266
0
                iSrcX - nBlockXSize >= nLBlockX * nBlockXSize ||
4267
0
                iSrcY < nLBlockY * nBlockYSize ||
4268
0
                iSrcY - nBlockYSize >= nLBlockY * nBlockYSize)
4269
0
            {
4270
0
                nLBlockX = iSrcX / nBlockXSize;
4271
0
                nLBlockY = iSrcY / nBlockYSize;
4272
4273
0
                const bool bJustInitialize =
4274
0
                    eRWFlag == GF_Write && nYOff <= nLBlockY * nBlockYSize &&
4275
0
                    nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize &&
4276
0
                    nXOff <= nLBlockX * nBlockXSize &&
4277
0
                    nXOff + nXSize - nBlockXSize >= nLBlockX * nBlockXSize;
4278
                /*bool bMemZeroBuffer = FALSE;
4279
                if( eRWFlag == GF_Write && !bJustInitialize &&
4280
                    nXOff <= nLBlockX * nBlockXSize &&
4281
                    nYOff <= nLBlockY * nBlockYSize &&
4282
                    (nXOff + nXSize >= (nLBlockX+1) * nBlockXSize ||
4283
                     (nXOff + nXSize == GetRasterXSize() &&
4284
                     (nLBlockX+1) * nBlockXSize > GetRasterXSize())) &&
4285
                    (nYOff + nYSize >= (nLBlockY+1) * nBlockYSize ||
4286
                     (nYOff + nYSize == GetRasterYSize() &&
4287
                     (nLBlockY+1) * nBlockYSize > GetRasterYSize())) )
4288
                {
4289
                    bJustInitialize = TRUE;
4290
                    bMemZeroBuffer = TRUE;
4291
                }*/
4292
0
                for (int iBand = 0; iBand < nBandCount; iBand++)
4293
0
                {
4294
0
                    GDALRasterBand *poBand = GetRasterBand(panBandMap[iBand]);
4295
0
                    if (nOverviewLevel >= 0)
4296
0
                        poBand = poBand->GetOverview(nOverviewLevel);
4297
0
                    poBlock = poBand->GetLockedBlockRef(nLBlockX, nLBlockY,
4298
0
                                                        bJustInitialize);
4299
0
                    if (poBlock == nullptr)
4300
0
                    {
4301
0
                        eErr = CE_Failure;
4302
0
                        goto CleanupAndReturn;
4303
0
                    }
4304
4305
0
                    if (eRWFlag == GF_Write)
4306
0
                        poBlock->MarkDirty();
4307
4308
0
                    if (papoBlocks[iBand] != nullptr)
4309
0
                        papoBlocks[iBand]->DropLock();
4310
4311
0
                    papoBlocks[iBand] = poBlock;
4312
4313
0
                    papabySrcBlock[iBand] =
4314
0
                        static_cast<GByte *>(poBlock->GetDataRef());
4315
                    /*if( bMemZeroBuffer )
4316
                    {
4317
                        memset(papabySrcBlock[iBand], 0,
4318
                            static_cast<GPtrDiff_t>(nBandDataSize) * nBlockXSize
4319
                    * nBlockYSize);
4320
                    }*/
4321
0
                }
4322
0
            }
4323
4324
            /* --------------------------------------------------------------------
4325
             */
4326
            /*      Copy over this pixel of data. */
4327
            /* --------------------------------------------------------------------
4328
             */
4329
0
            iSrcOffset = (static_cast<GPtrDiff_t>(iSrcX) -
4330
0
                          static_cast<GPtrDiff_t>(nLBlockX) * nBlockXSize +
4331
0
                          (static_cast<GPtrDiff_t>(iSrcY) -
4332
0
                           static_cast<GPtrDiff_t>(nLBlockY) * nBlockYSize) *
4333
0
                              nBlockXSize) *
4334
0
                         nBandDataSize;
4335
4336
0
            for (int iBand = 0; iBand < nBandCount; iBand++)
4337
0
            {
4338
0
                GByte *pabySrcBlock = papabySrcBlock[iBand];
4339
0
                GPtrDiff_t iBandBufOffset =
4340
0
                    iBufOffset + static_cast<GPtrDiff_t>(iBand) *
4341
0
                                     static_cast<GPtrDiff_t>(nBandSpace);
4342
4343
0
                if (eDataType == eBufType)
4344
0
                {
4345
0
                    if (eRWFlag == GF_Read)
4346
0
                        memcpy(static_cast<GByte *>(pData) + iBandBufOffset,
4347
0
                               pabySrcBlock + iSrcOffset, nBandDataSize);
4348
0
                    else
4349
0
                        memcpy(pabySrcBlock + iSrcOffset,
4350
0
                               static_cast<const GByte *>(pData) +
4351
0
                                   iBandBufOffset,
4352
0
                               nBandDataSize);
4353
0
                }
4354
0
                else
4355
0
                {
4356
                    /* type to type conversion ... ouch, this is expensive way
4357
                       of handling single words */
4358
4359
0
                    if (eRWFlag == GF_Read)
4360
0
                        GDALCopyWords64(pabySrcBlock + iSrcOffset, eDataType, 0,
4361
0
                                        static_cast<GByte *>(pData) +
4362
0
                                            iBandBufOffset,
4363
0
                                        eBufType, 0, 1);
4364
0
                    else
4365
0
                        GDALCopyWords64(static_cast<const GByte *>(pData) +
4366
0
                                            iBandBufOffset,
4367
0
                                        eBufType, 0, pabySrcBlock + iSrcOffset,
4368
0
                                        eDataType, 0, 1);
4369
0
                }
4370
0
            }
4371
4372
0
            iBufOffset += static_cast<int>(nPixelSpace);
4373
0
        }
4374
0
    }
4375
4376
    /* -------------------------------------------------------------------- */
4377
    /*      CleanupAndReturn.                                               */
4378
    /* -------------------------------------------------------------------- */
4379
0
CleanupAndReturn:
4380
0
    CPLFree(papabySrcBlock);
4381
0
    if (papoBlocks != nullptr)
4382
0
    {
4383
0
        for (int iBand = 0; iBand < nBandCount; iBand++)
4384
0
        {
4385
0
            if (papoBlocks[iBand] != nullptr)
4386
0
                papoBlocks[iBand]->DropLock();
4387
0
        }
4388
0
        CPLFree(papoBlocks);
4389
0
    }
4390
4391
0
    return eErr;
4392
0
}
4393
4394
//! @endcond
4395
4396
/************************************************************************/
4397
/*                  GDALCopyWholeRasterGetSwathSize()                   */
4398
/************************************************************************/
4399
4400
static void GDALCopyWholeRasterGetSwathSize(GDALRasterBand *poSrcPrototypeBand,
4401
                                            GDALRasterBand *poDstPrototypeBand,
4402
                                            int nBandCount,
4403
                                            int bDstIsCompressed,
4404
                                            int bInterleave, int *pnSwathCols,
4405
                                            int *pnSwathLines)
4406
0
{
4407
0
    GDALDataType eDT = poDstPrototypeBand->GetRasterDataType();
4408
0
    int nSrcBlockXSize = 0;
4409
0
    int nSrcBlockYSize = 0;
4410
0
    int nBlockXSize = 0;
4411
0
    int nBlockYSize = 0;
4412
4413
0
    int nXSize = poSrcPrototypeBand->GetXSize();
4414
0
    int nYSize = poSrcPrototypeBand->GetYSize();
4415
4416
0
    poSrcPrototypeBand->GetBlockSize(&nSrcBlockXSize, &nSrcBlockYSize);
4417
0
    poDstPrototypeBand->GetBlockSize(&nBlockXSize, &nBlockYSize);
4418
4419
0
    const int nMaxBlockXSize = std::max(nBlockXSize, nSrcBlockXSize);
4420
0
    const int nMaxBlockYSize = std::max(nBlockYSize, nSrcBlockYSize);
4421
4422
0
    int nPixelSize = GDALGetDataTypeSizeBytes(eDT);
4423
0
    if (bInterleave)
4424
0
        nPixelSize *= nBandCount;
4425
4426
    // aim for one row of blocks.  Do not settle for less.
4427
0
    int nSwathCols = nXSize;
4428
0
    int nSwathLines = nMaxBlockYSize;
4429
4430
0
    const char *pszSrcCompression =
4431
0
        poSrcPrototypeBand->GetMetadataItem("COMPRESSION", "IMAGE_STRUCTURE");
4432
0
    if (pszSrcCompression == nullptr)
4433
0
    {
4434
0
        auto poSrcDS = poSrcPrototypeBand->GetDataset();
4435
0
        if (poSrcDS)
4436
0
            pszSrcCompression =
4437
0
                poSrcDS->GetMetadataItem("COMPRESSION", "IMAGE_STRUCTURE");
4438
0
    }
4439
4440
    /* -------------------------------------------------------------------- */
4441
    /*      What will our swath size be?                                    */
4442
    /* -------------------------------------------------------------------- */
4443
    // When writing interleaved data in a compressed format, we want to be sure
4444
    // that each block will only be written once, so the swath size must not be
4445
    // greater than the block cache.
4446
0
    const char *pszSwathSize = CPLGetConfigOption("GDAL_SWATH_SIZE", nullptr);
4447
0
    int nTargetSwathSize;
4448
0
    if (pszSwathSize != nullptr)
4449
0
        nTargetSwathSize = static_cast<int>(
4450
0
            std::min(GIntBig(INT_MAX), CPLAtoGIntBig(pszSwathSize)));
4451
0
    else
4452
0
    {
4453
        // As a default, take one 1/4 of the cache size.
4454
0
        nTargetSwathSize = static_cast<int>(
4455
0
            std::min(GIntBig(INT_MAX), GDALGetCacheMax64() / 4));
4456
4457
        // but if the minimum idal swath buf size is less, then go for it to
4458
        // avoid unnecessarily abusing RAM usage.
4459
        // but try to use 10 MB at least.
4460
0
        GIntBig nIdealSwathBufSize =
4461
0
            static_cast<GIntBig>(nSwathCols) * nSwathLines * nPixelSize;
4462
0
        int nMinTargetSwathSize = 10 * 1000 * 1000;
4463
4464
0
        if ((poSrcPrototypeBand->GetSuggestedBlockAccessPattern() &
4465
0
             GSBAP_LARGEST_CHUNK_POSSIBLE) != 0)
4466
0
        {
4467
0
            nMinTargetSwathSize = nTargetSwathSize;
4468
0
        }
4469
4470
0
        if (nIdealSwathBufSize < nTargetSwathSize &&
4471
0
            nIdealSwathBufSize < nMinTargetSwathSize)
4472
0
        {
4473
0
            nIdealSwathBufSize = nMinTargetSwathSize;
4474
0
        }
4475
4476
0
        if (pszSrcCompression != nullptr &&
4477
0
            EQUAL(pszSrcCompression, "JPEG2000") &&
4478
0
            (!bDstIsCompressed || ((nSrcBlockXSize % nBlockXSize) == 0 &&
4479
0
                                   (nSrcBlockYSize % nBlockYSize) == 0)))
4480
0
        {
4481
0
            nIdealSwathBufSize =
4482
0
                std::max(nIdealSwathBufSize, static_cast<GIntBig>(nSwathCols) *
4483
0
                                                 nSrcBlockYSize * nPixelSize);
4484
0
        }
4485
0
        if (nTargetSwathSize > nIdealSwathBufSize)
4486
0
            nTargetSwathSize = static_cast<int>(
4487
0
                std::min(GIntBig(INT_MAX), nIdealSwathBufSize));
4488
0
    }
4489
4490
0
    if (nTargetSwathSize < 1000000)
4491
0
        nTargetSwathSize = 1000000;
4492
4493
    /* But let's check that  */
4494
0
    if (bDstIsCompressed && bInterleave &&
4495
0
        nTargetSwathSize > GDALGetCacheMax64())
4496
0
    {
4497
0
        CPLError(CE_Warning, CPLE_AppDefined,
4498
0
                 "When translating into a compressed interleave format, "
4499
0
                 "the block cache size (" CPL_FRMT_GIB ") "
4500
0
                 "should be at least the size of the swath (%d) "
4501
0
                 "(GDAL_SWATH_SIZE config. option)",
4502
0
                 GDALGetCacheMax64(), nTargetSwathSize);
4503
0
    }
4504
4505
0
#define IS_DIVIDER_OF(x, y) ((y) % (x) == 0)
4506
0
#define ROUND_TO(x, y) (((x) / (y)) * (y))
4507
4508
    // if both input and output datasets are tiled, that the tile dimensions
4509
    // are "compatible", try to stick  to a swath dimension that is a multiple
4510
    // of input and output block dimensions.
4511
0
    if (nBlockXSize != nXSize && nSrcBlockXSize != nXSize &&
4512
0
        IS_DIVIDER_OF(nBlockXSize, nMaxBlockXSize) &&
4513
0
        IS_DIVIDER_OF(nSrcBlockXSize, nMaxBlockXSize) &&
4514
0
        IS_DIVIDER_OF(nBlockYSize, nMaxBlockYSize) &&
4515
0
        IS_DIVIDER_OF(nSrcBlockYSize, nMaxBlockYSize))
4516
0
    {
4517
0
        if (static_cast<GIntBig>(nMaxBlockXSize) * nMaxBlockYSize *
4518
0
                nPixelSize <=
4519
0
            static_cast<GIntBig>(nTargetSwathSize))
4520
0
        {
4521
0
            nSwathCols = nTargetSwathSize / (nMaxBlockYSize * nPixelSize);
4522
0
            nSwathCols = ROUND_TO(nSwathCols, nMaxBlockXSize);
4523
0
            if (nSwathCols == 0)
4524
0
                nSwathCols = nMaxBlockXSize;
4525
0
            if (nSwathCols > nXSize)
4526
0
                nSwathCols = nXSize;
4527
0
            nSwathLines = nMaxBlockYSize;
4528
4529
0
            if (static_cast<GIntBig>(nSwathCols) * nSwathLines * nPixelSize >
4530
0
                static_cast<GIntBig>(nTargetSwathSize))
4531
0
            {
4532
0
                nSwathCols = nXSize;
4533
0
                nSwathLines = nBlockYSize;
4534
0
            }
4535
0
        }
4536
0
    }
4537
4538
0
    const GIntBig nMemoryPerCol = static_cast<GIntBig>(nSwathCols) * nPixelSize;
4539
0
    const GIntBig nSwathBufSize = nMemoryPerCol * nSwathLines;
4540
0
    if (nSwathBufSize > static_cast<GIntBig>(nTargetSwathSize))
4541
0
    {
4542
0
        nSwathLines = static_cast<int>(nTargetSwathSize / nMemoryPerCol);
4543
0
        if (nSwathLines == 0)
4544
0
            nSwathLines = 1;
4545
4546
0
        CPLDebug(
4547
0
            "GDAL",
4548
0
            "GDALCopyWholeRasterGetSwathSize(): adjusting to %d line swath "
4549
0
            "since requirement (" CPL_FRMT_GIB " bytes) exceed target swath "
4550
0
            "size (%d bytes) (GDAL_SWATH_SIZE config. option)",
4551
0
            nSwathLines, nBlockYSize * nMemoryPerCol, nTargetSwathSize);
4552
0
    }
4553
    // If we are processing single scans, try to handle several at once.
4554
    // If we are handling swaths already, only grow the swath if a row
4555
    // of blocks is substantially less than our target buffer size.
4556
0
    else if (nSwathLines == 1 ||
4557
0
             nMemoryPerCol * nSwathLines <
4558
0
                 static_cast<GIntBig>(nTargetSwathSize) / 10)
4559
0
    {
4560
0
        nSwathLines = std::min(
4561
0
            nYSize,
4562
0
            std::max(1, static_cast<int>(nTargetSwathSize / nMemoryPerCol)));
4563
4564
        /* If possible try to align to source and target block height */
4565
0
        if ((nSwathLines % nMaxBlockYSize) != 0 &&
4566
0
            nSwathLines > nMaxBlockYSize &&
4567
0
            IS_DIVIDER_OF(nBlockYSize, nMaxBlockYSize) &&
4568
0
            IS_DIVIDER_OF(nSrcBlockYSize, nMaxBlockYSize))
4569
0
            nSwathLines = ROUND_TO(nSwathLines, nMaxBlockYSize);
4570
0
    }
4571
4572
0
    if (pszSrcCompression != nullptr && EQUAL(pszSrcCompression, "JPEG2000") &&
4573
0
        (!bDstIsCompressed || (IS_DIVIDER_OF(nBlockXSize, nSrcBlockXSize) &&
4574
0
                               IS_DIVIDER_OF(nBlockYSize, nSrcBlockYSize))))
4575
0
    {
4576
        // Typical use case: converting from Pleaiades that is 2048x2048 tiled.
4577
0
        if (nSwathLines < nSrcBlockYSize)
4578
0
        {
4579
0
            nSwathLines = nSrcBlockYSize;
4580
4581
            // Number of pixels that can be read/write simultaneously.
4582
0
            nSwathCols = nTargetSwathSize / (nSrcBlockXSize * nPixelSize);
4583
0
            nSwathCols = ROUND_TO(nSwathCols, nSrcBlockXSize);
4584
0
            if (nSwathCols == 0)
4585
0
                nSwathCols = nSrcBlockXSize;
4586
0
            if (nSwathCols > nXSize)
4587
0
                nSwathCols = nXSize;
4588
4589
0
            CPLDebug(
4590
0
                "GDAL",
4591
0
                "GDALCopyWholeRasterGetSwathSize(): because of compression and "
4592
0
                "too high block, "
4593
0
                "use partial width at one time");
4594
0
        }
4595
0
        else if ((nSwathLines % nSrcBlockYSize) != 0)
4596
0
        {
4597
            /* Round on a multiple of nSrcBlockYSize */
4598
0
            nSwathLines = ROUND_TO(nSwathLines, nSrcBlockYSize);
4599
0
            CPLDebug(
4600
0
                "GDAL",
4601
0
                "GDALCopyWholeRasterGetSwathSize(): because of compression, "
4602
0
                "round nSwathLines to block height : %d",
4603
0
                nSwathLines);
4604
0
        }
4605
0
    }
4606
0
    else if (bDstIsCompressed)
4607
0
    {
4608
0
        if (nSwathLines < nBlockYSize)
4609
0
        {
4610
0
            nSwathLines = nBlockYSize;
4611
4612
            // Number of pixels that can be read/write simultaneously.
4613
0
            nSwathCols = nTargetSwathSize / (nSwathLines * nPixelSize);
4614
0
            nSwathCols = ROUND_TO(nSwathCols, nBlockXSize);
4615
0
            if (nSwathCols == 0)
4616
0
                nSwathCols = nBlockXSize;
4617
0
            if (nSwathCols > nXSize)
4618
0
                nSwathCols = nXSize;
4619
4620
0
            CPLDebug(
4621
0
                "GDAL",
4622
0
                "GDALCopyWholeRasterGetSwathSize(): because of compression and "
4623
0
                "too high block, "
4624
0
                "use partial width at one time");
4625
0
        }
4626
0
        else if ((nSwathLines % nBlockYSize) != 0)
4627
0
        {
4628
            // Round on a multiple of nBlockYSize.
4629
0
            nSwathLines = ROUND_TO(nSwathLines, nBlockYSize);
4630
0
            CPLDebug(
4631
0
                "GDAL",
4632
0
                "GDALCopyWholeRasterGetSwathSize(): because of compression, "
4633
0
                "round nSwathLines to block height : %d",
4634
0
                nSwathLines);
4635
0
        }
4636
0
    }
4637
4638
0
    *pnSwathCols = nSwathCols;
4639
0
    *pnSwathLines = nSwathLines;
4640
0
}
4641
4642
/************************************************************************/
4643
/*                     GDALDatasetCopyWholeRaster()                     */
4644
/************************************************************************/
4645
4646
/**
4647
 * \brief Copy all dataset raster data.
4648
 *
4649
 * This function copies the complete raster contents of one dataset to
4650
 * another similarly configured dataset.  The source and destination
4651
 * dataset must have the same number of bands, and the same width
4652
 * and height.  The bands do not have to have the same data type.
4653
 *
4654
 * This function is primarily intended to support implementation of
4655
 * driver specific CreateCopy() functions.  It implements efficient copying,
4656
 * in particular "chunking" the copy in substantial blocks and, if appropriate,
4657
 * performing the transfer in a pixel interleaved fashion.
4658
 *
4659
 * Currently the only papszOptions value supported are :
4660
 * <ul>
4661
 * <li>"INTERLEAVE=PIXEL/BAND" to force pixel (resp. band) interleaved read and
4662
 * write access pattern (this does not modify the layout of the destination
4663
 * data)</li> <li>"COMPRESSED=YES" to force alignment on target dataset block
4664
 * sizes to achieve best compression.</li> <li>"SKIP_HOLES=YES" to skip chunks
4665
 * for which GDALGetDataCoverageStatus() returns GDAL_DATA_COVERAGE_STATUS_EMPTY
4666
 * (GDAL &gt;= 2.2)</li>
4667
 * </ul>
4668
 * More options may be supported in the future.
4669
 *
4670
 * @param hSrcDS the source dataset
4671
 * @param hDstDS the destination dataset
4672
 * @param papszOptions transfer hints in "StringList" Name=Value format.
4673
 * @param pfnProgress progress reporting function.
4674
 * @param pProgressData callback data for progress function.
4675
 *
4676
 * @return CE_None on success, or CE_Failure on failure.
4677
 */
4678
4679
CPLErr CPL_STDCALL GDALDatasetCopyWholeRaster(GDALDatasetH hSrcDS,
4680
                                              GDALDatasetH hDstDS,
4681
                                              CSLConstList papszOptions,
4682
                                              GDALProgressFunc pfnProgress,
4683
                                              void *pProgressData)
4684
4685
0
{
4686
0
    VALIDATE_POINTER1(hSrcDS, "GDALDatasetCopyWholeRaster", CE_Failure);
4687
0
    VALIDATE_POINTER1(hDstDS, "GDALDatasetCopyWholeRaster", CE_Failure);
4688
4689
0
    GDALDataset *poSrcDS = GDALDataset::FromHandle(hSrcDS);
4690
0
    GDALDataset *poDstDS = GDALDataset::FromHandle(hDstDS);
4691
4692
0
    if (pfnProgress == nullptr)
4693
0
        pfnProgress = GDALDummyProgress;
4694
4695
    /* -------------------------------------------------------------------- */
4696
    /*      Confirm the datasets match in size and band counts.             */
4697
    /* -------------------------------------------------------------------- */
4698
0
    const int nXSize = poDstDS->GetRasterXSize();
4699
0
    const int nYSize = poDstDS->GetRasterYSize();
4700
0
    const int nBandCount = poDstDS->GetRasterCount();
4701
4702
0
    if (poSrcDS->GetRasterXSize() != nXSize ||
4703
0
        poSrcDS->GetRasterYSize() != nYSize ||
4704
0
        poSrcDS->GetRasterCount() != nBandCount)
4705
0
    {
4706
0
        CPLError(CE_Failure, CPLE_AppDefined,
4707
0
                 "Input and output dataset sizes or band counts do not\n"
4708
0
                 "match in GDALDatasetCopyWholeRaster()");
4709
0
        return CE_Failure;
4710
0
    }
4711
4712
    /* -------------------------------------------------------------------- */
4713
    /*      Report preliminary (0) progress.                                */
4714
    /* -------------------------------------------------------------------- */
4715
0
    if (!pfnProgress(0.0, nullptr, pProgressData))
4716
0
    {
4717
0
        CPLError(CE_Failure, CPLE_UserInterrupt,
4718
0
                 "User terminated CreateCopy()");
4719
0
        return CE_Failure;
4720
0
    }
4721
4722
    /* -------------------------------------------------------------------- */
4723
    /*      Get our prototype band, and assume the others are similarly     */
4724
    /*      configured.                                                     */
4725
    /* -------------------------------------------------------------------- */
4726
0
    if (nBandCount == 0)
4727
0
        return CE_None;
4728
4729
0
    GDALRasterBand *poSrcPrototypeBand = poSrcDS->GetRasterBand(1);
4730
0
    GDALRasterBand *poDstPrototypeBand = poDstDS->GetRasterBand(1);
4731
0
    GDALDataType eDT = poDstPrototypeBand->GetRasterDataType();
4732
4733
    /* -------------------------------------------------------------------- */
4734
    /*      Do we want to try and do the operation in a pixel               */
4735
    /*      interleaved fashion?                                            */
4736
    /* -------------------------------------------------------------------- */
4737
0
    bool bInterleave = false;
4738
0
    const char *pszInterleave =
4739
0
        poSrcDS->GetMetadataItem("INTERLEAVE", "IMAGE_STRUCTURE");
4740
0
    if (pszInterleave != nullptr &&
4741
0
        (EQUAL(pszInterleave, "PIXEL") || EQUAL(pszInterleave, "LINE")))
4742
0
        bInterleave = true;
4743
4744
0
    pszInterleave = poDstDS->GetMetadataItem("INTERLEAVE", "IMAGE_STRUCTURE");
4745
0
    if (pszInterleave != nullptr &&
4746
0
        (EQUAL(pszInterleave, "PIXEL") || EQUAL(pszInterleave, "LINE")))
4747
0
        bInterleave = true;
4748
4749
0
    pszInterleave = CSLFetchNameValue(papszOptions, "INTERLEAVE");
4750
0
    if (pszInterleave != nullptr && EQUAL(pszInterleave, "PIXEL"))
4751
0
        bInterleave = true;
4752
0
    else if (pszInterleave != nullptr && EQUAL(pszInterleave, "BAND"))
4753
0
        bInterleave = false;
4754
    // attributes is specific to the TileDB driver
4755
0
    else if (pszInterleave != nullptr && EQUAL(pszInterleave, "ATTRIBUTES"))
4756
0
        bInterleave = true;
4757
0
    else if (pszInterleave != nullptr)
4758
0
    {
4759
0
        CPLError(CE_Warning, CPLE_NotSupported,
4760
0
                 "Unsupported value for option INTERLEAVE");
4761
0
    }
4762
4763
    // If the destination is compressed, we must try to write blocks just once,
4764
    // to save disk space (GTiff case for example), and to avoid data loss
4765
    // (JPEG compression for example).
4766
0
    bool bDstIsCompressed = false;
4767
0
    const char *pszDstCompressed =
4768
0
        CSLFetchNameValue(papszOptions, "COMPRESSED");
4769
0
    if (pszDstCompressed != nullptr && CPLTestBool(pszDstCompressed))
4770
0
        bDstIsCompressed = true;
4771
4772
    /* -------------------------------------------------------------------- */
4773
    /*      What will our swath size be?                                    */
4774
    /* -------------------------------------------------------------------- */
4775
4776
0
    int nSwathCols = 0;
4777
0
    int nSwathLines = 0;
4778
0
    GDALCopyWholeRasterGetSwathSize(poSrcPrototypeBand, poDstPrototypeBand,
4779
0
                                    nBandCount, bDstIsCompressed, bInterleave,
4780
0
                                    &nSwathCols, &nSwathLines);
4781
4782
0
    int nPixelSize = GDALGetDataTypeSizeBytes(eDT);
4783
0
    if (bInterleave)
4784
0
        nPixelSize *= nBandCount;
4785
4786
0
    void *pSwathBuf = VSI_MALLOC3_VERBOSE(nSwathCols, nSwathLines, nPixelSize);
4787
0
    if (pSwathBuf == nullptr)
4788
0
    {
4789
0
        return CE_Failure;
4790
0
    }
4791
4792
0
    CPLDebug("GDAL",
4793
0
             "GDALDatasetCopyWholeRaster(): %d*%d swaths, bInterleave=%d",
4794
0
             nSwathCols, nSwathLines, static_cast<int>(bInterleave));
4795
4796
    // Advise the source raster that we are going to read it completely
4797
    // Note: this might already have been done by GDALCreateCopy() in the
4798
    // likely case this function is indirectly called by it
4799
0
    poSrcDS->AdviseRead(0, 0, nXSize, nYSize, nXSize, nYSize, eDT, nBandCount,
4800
0
                        nullptr, nullptr);
4801
4802
    /* ==================================================================== */
4803
    /*      Band oriented (uninterleaved) case.                             */
4804
    /* ==================================================================== */
4805
0
    CPLErr eErr = CE_None;
4806
0
    const bool bCheckHoles =
4807
0
        CPLTestBool(CSLFetchNameValueDef(papszOptions, "SKIP_HOLES", "NO"));
4808
4809
0
    if (!bInterleave)
4810
0
    {
4811
0
        GDALRasterIOExtraArg sExtraArg;
4812
0
        INIT_RASTERIO_EXTRA_ARG(sExtraArg);
4813
0
        CPL_IGNORE_RET_VAL(sExtraArg.pfnProgress);  // to make cppcheck happy
4814
4815
0
        const GIntBig nTotalBlocks = static_cast<GIntBig>(nBandCount) *
4816
0
                                     DIV_ROUND_UP(nYSize, nSwathLines) *
4817
0
                                     DIV_ROUND_UP(nXSize, nSwathCols);
4818
0
        GIntBig nBlocksDone = 0;
4819
4820
0
        for (int iBand = 0; iBand < nBandCount && eErr == CE_None; iBand++)
4821
0
        {
4822
0
            int nBand = iBand + 1;
4823
4824
0
            for (int iY = 0; iY < nYSize && eErr == CE_None; iY += nSwathLines)
4825
0
            {
4826
0
                int nThisLines = nSwathLines;
4827
4828
0
                if (iY + nThisLines > nYSize)
4829
0
                    nThisLines = nYSize - iY;
4830
4831
0
                for (int iX = 0; iX < nXSize && eErr == CE_None;
4832
0
                     iX += nSwathCols)
4833
0
                {
4834
0
                    int nThisCols = nSwathCols;
4835
4836
0
                    if (iX + nThisCols > nXSize)
4837
0
                        nThisCols = nXSize - iX;
4838
4839
0
                    int nStatus = GDAL_DATA_COVERAGE_STATUS_DATA;
4840
0
                    if (bCheckHoles)
4841
0
                    {
4842
0
                        nStatus = poSrcDS->GetRasterBand(nBand)
4843
0
                                      ->GetDataCoverageStatus(
4844
0
                                          iX, iY, nThisCols, nThisLines,
4845
0
                                          GDAL_DATA_COVERAGE_STATUS_DATA);
4846
0
                    }
4847
0
                    if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA)
4848
0
                    {
4849
0
                        sExtraArg.pfnProgress = GDALScaledProgress;
4850
0
                        sExtraArg.pProgressData = GDALCreateScaledProgress(
4851
0
                            nBlocksDone / static_cast<double>(nTotalBlocks),
4852
0
                            (nBlocksDone + 0.5) /
4853
0
                                static_cast<double>(nTotalBlocks),
4854
0
                            pfnProgress, pProgressData);
4855
0
                        if (sExtraArg.pProgressData == nullptr)
4856
0
                            sExtraArg.pfnProgress = nullptr;
4857
4858
0
                        eErr = poSrcDS->RasterIO(GF_Read, iX, iY, nThisCols,
4859
0
                                                 nThisLines, pSwathBuf,
4860
0
                                                 nThisCols, nThisLines, eDT, 1,
4861
0
                                                 &nBand, 0, 0, 0, &sExtraArg);
4862
4863
0
                        GDALDestroyScaledProgress(sExtraArg.pProgressData);
4864
4865
0
                        if (eErr == CE_None)
4866
0
                            eErr = poDstDS->RasterIO(
4867
0
                                GF_Write, iX, iY, nThisCols, nThisLines,
4868
0
                                pSwathBuf, nThisCols, nThisLines, eDT, 1,
4869
0
                                &nBand, 0, 0, 0, nullptr);
4870
0
                    }
4871
4872
0
                    nBlocksDone++;
4873
0
                    if (eErr == CE_None &&
4874
0
                        !pfnProgress(nBlocksDone /
4875
0
                                         static_cast<double>(nTotalBlocks),
4876
0
                                     nullptr, pProgressData))
4877
0
                    {
4878
0
                        eErr = CE_Failure;
4879
0
                        CPLError(CE_Failure, CPLE_UserInterrupt,
4880
0
                                 "User terminated CreateCopy()");
4881
0
                    }
4882
0
                }
4883
0
            }
4884
0
        }
4885
0
    }
4886
4887
    /* ==================================================================== */
4888
    /*      Pixel interleaved case.                                         */
4889
    /* ==================================================================== */
4890
0
    else /* if( bInterleave ) */
4891
0
    {
4892
0
        GDALRasterIOExtraArg sExtraArg;
4893
0
        INIT_RASTERIO_EXTRA_ARG(sExtraArg);
4894
0
        CPL_IGNORE_RET_VAL(sExtraArg.pfnProgress);  // to make cppcheck happy
4895
4896
0
        const GIntBig nTotalBlocks =
4897
0
            static_cast<GIntBig>(DIV_ROUND_UP(nYSize, nSwathLines)) *
4898
0
            DIV_ROUND_UP(nXSize, nSwathCols);
4899
0
        GIntBig nBlocksDone = 0;
4900
4901
0
        for (int iY = 0; iY < nYSize && eErr == CE_None; iY += nSwathLines)
4902
0
        {
4903
0
            int nThisLines = nSwathLines;
4904
4905
0
            if (iY + nThisLines > nYSize)
4906
0
                nThisLines = nYSize - iY;
4907
4908
0
            for (int iX = 0; iX < nXSize && eErr == CE_None; iX += nSwathCols)
4909
0
            {
4910
0
                int nThisCols = nSwathCols;
4911
4912
0
                if (iX + nThisCols > nXSize)
4913
0
                    nThisCols = nXSize - iX;
4914
4915
0
                int nStatus = GDAL_DATA_COVERAGE_STATUS_DATA;
4916
0
                if (bCheckHoles)
4917
0
                {
4918
0
                    nStatus = 0;
4919
0
                    for (int iBand = 0; iBand < nBandCount; iBand++)
4920
0
                    {
4921
0
                        nStatus |= poSrcDS->GetRasterBand(iBand + 1)
4922
0
                                       ->GetDataCoverageStatus(
4923
0
                                           iX, iY, nThisCols, nThisLines,
4924
0
                                           GDAL_DATA_COVERAGE_STATUS_DATA);
4925
0
                        if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA)
4926
0
                            break;
4927
0
                    }
4928
0
                }
4929
0
                if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA)
4930
0
                {
4931
0
                    sExtraArg.pfnProgress = GDALScaledProgress;
4932
0
                    sExtraArg.pProgressData = GDALCreateScaledProgress(
4933
0
                        nBlocksDone / static_cast<double>(nTotalBlocks),
4934
0
                        (nBlocksDone + 0.5) / static_cast<double>(nTotalBlocks),
4935
0
                        pfnProgress, pProgressData);
4936
0
                    if (sExtraArg.pProgressData == nullptr)
4937
0
                        sExtraArg.pfnProgress = nullptr;
4938
4939
0
                    eErr = poSrcDS->RasterIO(GF_Read, iX, iY, nThisCols,
4940
0
                                             nThisLines, pSwathBuf, nThisCols,
4941
0
                                             nThisLines, eDT, nBandCount,
4942
0
                                             nullptr, 0, 0, 0, &sExtraArg);
4943
4944
0
                    GDALDestroyScaledProgress(sExtraArg.pProgressData);
4945
4946
0
                    if (eErr == CE_None)
4947
0
                        eErr = poDstDS->RasterIO(
4948
0
                            GF_Write, iX, iY, nThisCols, nThisLines, pSwathBuf,
4949
0
                            nThisCols, nThisLines, eDT, nBandCount, nullptr, 0,
4950
0
                            0, 0, nullptr);
4951
0
                }
4952
4953
0
                nBlocksDone++;
4954
0
                if (eErr == CE_None &&
4955
0
                    !pfnProgress(nBlocksDone /
4956
0
                                     static_cast<double>(nTotalBlocks),
4957
0
                                 nullptr, pProgressData))
4958
0
                {
4959
0
                    eErr = CE_Failure;
4960
0
                    CPLError(CE_Failure, CPLE_UserInterrupt,
4961
0
                             "User terminated CreateCopy()");
4962
0
                }
4963
0
            }
4964
0
        }
4965
0
    }
4966
4967
    /* -------------------------------------------------------------------- */
4968
    /*      Cleanup                                                         */
4969
    /* -------------------------------------------------------------------- */
4970
0
    CPLFree(pSwathBuf);
4971
4972
0
    return eErr;
4973
0
}
4974
4975
/************************************************************************/
4976
/*                     GDALRasterBandCopyWholeRaster()                  */
4977
/************************************************************************/
4978
4979
/**
4980
 * \brief Copy a whole raster band
4981
 *
4982
 * This function copies the complete raster contents of one band to
4983
 * another similarly configured band.  The source and destination
4984
 * bands must have the same width and height.  The bands do not have
4985
 * to have the same data type.
4986
 *
4987
 * It implements efficient copying, in particular "chunking" the copy in
4988
 * substantial blocks.
4989
 *
4990
 * Currently the only papszOptions value supported are :
4991
 * <ul>
4992
 * <li>"COMPRESSED=YES" to force alignment on target dataset block sizes to
4993
 * achieve best compression.</li>
4994
 * <li>"SKIP_HOLES=YES" to skip chunks for which GDALGetDataCoverageStatus()
4995
 * returns GDAL_DATA_COVERAGE_STATUS_EMPTY (GDAL &gt;= 2.2)</li>
4996
 * </ul>
4997
 *
4998
 * @param hSrcBand the source band
4999
 * @param hDstBand the destination band
5000
 * @param papszOptions transfer hints in "StringList" Name=Value format.
5001
 * @param pfnProgress progress reporting function.
5002
 * @param pProgressData callback data for progress function.
5003
 *
5004
 * @return CE_None on success, or CE_Failure on failure.
5005
 */
5006
5007
CPLErr CPL_STDCALL GDALRasterBandCopyWholeRaster(
5008
    GDALRasterBandH hSrcBand, GDALRasterBandH hDstBand,
5009
    const char *const *const papszOptions, GDALProgressFunc pfnProgress,
5010
    void *pProgressData)
5011
5012
0
{
5013
0
    VALIDATE_POINTER1(hSrcBand, "GDALRasterBandCopyWholeRaster", CE_Failure);
5014
0
    VALIDATE_POINTER1(hDstBand, "GDALRasterBandCopyWholeRaster", CE_Failure);
5015
5016
0
    GDALRasterBand *poSrcBand = GDALRasterBand::FromHandle(hSrcBand);
5017
0
    GDALRasterBand *poDstBand = GDALRasterBand::FromHandle(hDstBand);
5018
0
    CPLErr eErr = CE_None;
5019
5020
0
    if (pfnProgress == nullptr)
5021
0
        pfnProgress = GDALDummyProgress;
5022
5023
    /* -------------------------------------------------------------------- */
5024
    /*      Confirm the datasets match in size and band counts.             */
5025
    /* -------------------------------------------------------------------- */
5026
0
    int nXSize = poSrcBand->GetXSize();
5027
0
    int nYSize = poSrcBand->GetYSize();
5028
5029
0
    if (poDstBand->GetXSize() != nXSize || poDstBand->GetYSize() != nYSize)
5030
0
    {
5031
0
        CPLError(CE_Failure, CPLE_AppDefined,
5032
0
                 "Input and output band sizes do not\n"
5033
0
                 "match in GDALRasterBandCopyWholeRaster()");
5034
0
        return CE_Failure;
5035
0
    }
5036
5037
    /* -------------------------------------------------------------------- */
5038
    /*      Report preliminary (0) progress.                                */
5039
    /* -------------------------------------------------------------------- */
5040
0
    if (!pfnProgress(0.0, nullptr, pProgressData))
5041
0
    {
5042
0
        CPLError(CE_Failure, CPLE_UserInterrupt,
5043
0
                 "User terminated CreateCopy()");
5044
0
        return CE_Failure;
5045
0
    }
5046
5047
0
    GDALDataType eDT = poDstBand->GetRasterDataType();
5048
5049
    // If the destination is compressed, we must try to write blocks just once,
5050
    // to save disk space (GTiff case for example), and to avoid data loss
5051
    // (JPEG compression for example).
5052
0
    bool bDstIsCompressed = false;
5053
0
    const char *pszDstCompressed =
5054
0
        CSLFetchNameValue(const_cast<char **>(papszOptions), "COMPRESSED");
5055
0
    if (pszDstCompressed != nullptr && CPLTestBool(pszDstCompressed))
5056
0
        bDstIsCompressed = true;
5057
5058
    /* -------------------------------------------------------------------- */
5059
    /*      What will our swath size be?                                    */
5060
    /* -------------------------------------------------------------------- */
5061
5062
0
    int nSwathCols = 0;
5063
0
    int nSwathLines = 0;
5064
0
    GDALCopyWholeRasterGetSwathSize(poSrcBand, poDstBand, 1, bDstIsCompressed,
5065
0
                                    FALSE, &nSwathCols, &nSwathLines);
5066
5067
0
    const int nPixelSize = GDALGetDataTypeSizeBytes(eDT);
5068
5069
0
    void *pSwathBuf = VSI_MALLOC3_VERBOSE(nSwathCols, nSwathLines, nPixelSize);
5070
0
    if (pSwathBuf == nullptr)
5071
0
    {
5072
0
        return CE_Failure;
5073
0
    }
5074
5075
0
    CPLDebug("GDAL", "GDALRasterBandCopyWholeRaster(): %d*%d swaths",
5076
0
             nSwathCols, nSwathLines);
5077
5078
0
    const bool bCheckHoles =
5079
0
        CPLTestBool(CSLFetchNameValueDef(papszOptions, "SKIP_HOLES", "NO"));
5080
5081
    // Advise the source raster that we are going to read it completely
5082
0
    poSrcBand->AdviseRead(0, 0, nXSize, nYSize, nXSize, nYSize, eDT, nullptr);
5083
5084
    /* ==================================================================== */
5085
    /*      Band oriented (uninterleaved) case.                             */
5086
    /* ==================================================================== */
5087
5088
0
    for (int iY = 0; iY < nYSize && eErr == CE_None; iY += nSwathLines)
5089
0
    {
5090
0
        int nThisLines = nSwathLines;
5091
5092
0
        if (iY + nThisLines > nYSize)
5093
0
            nThisLines = nYSize - iY;
5094
5095
0
        for (int iX = 0; iX < nXSize && eErr == CE_None; iX += nSwathCols)
5096
0
        {
5097
0
            int nThisCols = nSwathCols;
5098
5099
0
            if (iX + nThisCols > nXSize)
5100
0
                nThisCols = nXSize - iX;
5101
5102
0
            int nStatus = GDAL_DATA_COVERAGE_STATUS_DATA;
5103
0
            if (bCheckHoles)
5104
0
            {
5105
0
                nStatus = poSrcBand->GetDataCoverageStatus(
5106
0
                    iX, iY, nThisCols, nThisLines,
5107
0
                    GDAL_DATA_COVERAGE_STATUS_DATA);
5108
0
            }
5109
0
            if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA)
5110
0
            {
5111
0
                eErr = poSrcBand->RasterIO(GF_Read, iX, iY, nThisCols,
5112
0
                                           nThisLines, pSwathBuf, nThisCols,
5113
0
                                           nThisLines, eDT, 0, 0, nullptr);
5114
5115
0
                if (eErr == CE_None)
5116
0
                    eErr = poDstBand->RasterIO(GF_Write, iX, iY, nThisCols,
5117
0
                                               nThisLines, pSwathBuf, nThisCols,
5118
0
                                               nThisLines, eDT, 0, 0, nullptr);
5119
0
            }
5120
5121
0
            if (eErr == CE_None &&
5122
0
                !pfnProgress((iY + nThisLines) / static_cast<float>(nYSize),
5123
0
                             nullptr, pProgressData))
5124
0
            {
5125
0
                eErr = CE_Failure;
5126
0
                CPLError(CE_Failure, CPLE_UserInterrupt,
5127
0
                         "User terminated CreateCopy()");
5128
0
            }
5129
0
        }
5130
0
    }
5131
5132
    /* -------------------------------------------------------------------- */
5133
    /*      Cleanup                                                         */
5134
    /* -------------------------------------------------------------------- */
5135
0
    CPLFree(pSwathBuf);
5136
5137
0
    return eErr;
5138
0
}
5139
5140
/************************************************************************/
5141
/*                      GDALCopyRasterIOExtraArg ()                     */
5142
/************************************************************************/
5143
5144
void GDALCopyRasterIOExtraArg(GDALRasterIOExtraArg *psDestArg,
5145
                              GDALRasterIOExtraArg *psSrcArg)
5146
0
{
5147
0
    INIT_RASTERIO_EXTRA_ARG(*psDestArg);
5148
0
    if (psSrcArg)
5149
0
    {
5150
0
        psDestArg->eResampleAlg = psSrcArg->eResampleAlg;
5151
0
        psDestArg->pfnProgress = psSrcArg->pfnProgress;
5152
0
        psDestArg->pProgressData = psSrcArg->pProgressData;
5153
0
        psDestArg->bFloatingPointWindowValidity =
5154
0
            psSrcArg->bFloatingPointWindowValidity;
5155
0
        if (psSrcArg->bFloatingPointWindowValidity)
5156
0
        {
5157
0
            psDestArg->dfXOff = psSrcArg->dfXOff;
5158
0
            psDestArg->dfYOff = psSrcArg->dfYOff;
5159
0
            psDestArg->dfXSize = psSrcArg->dfXSize;
5160
0
            psDestArg->dfYSize = psSrcArg->dfYSize;
5161
0
        }
5162
0
        if (psSrcArg->nVersion >= 2)
5163
0
        {
5164
0
            psDestArg->bUseOnlyThisScale = psSrcArg->bUseOnlyThisScale;
5165
0
        }
5166
0
    }
5167
0
}
5168
5169
/************************************************************************/
5170
/*                         HasOnlyNoData()                              */
5171
/************************************************************************/
5172
5173
template <class T> static inline bool IsEqualToNoData(T value, T noDataValue)
5174
0
{
5175
0
    return value == noDataValue;
5176
0
}
Unexecuted instantiation: rasterio.cpp:bool IsEqualToNoData<unsigned char>(unsigned char, unsigned char)
Unexecuted instantiation: rasterio.cpp:bool IsEqualToNoData<unsigned short>(unsigned short, unsigned short)
Unexecuted instantiation: rasterio.cpp:bool IsEqualToNoData<unsigned int>(unsigned int, unsigned int)
Unexecuted instantiation: rasterio.cpp:bool IsEqualToNoData<unsigned long>(unsigned long, unsigned long)
5177
5178
template <> bool IsEqualToNoData<GFloat16>(GFloat16 value, GFloat16 noDataValue)
5179
0
{
5180
0
    using std::isnan;
5181
0
    return isnan(noDataValue) ? isnan(value) : value == noDataValue;
5182
0
}
5183
5184
template <> bool IsEqualToNoData<float>(float value, float noDataValue)
5185
0
{
5186
0
    return std::isnan(noDataValue) ? std::isnan(value) : value == noDataValue;
5187
0
}
5188
5189
template <> bool IsEqualToNoData<double>(double value, double noDataValue)
5190
0
{
5191
0
    return std::isnan(noDataValue) ? std::isnan(value) : value == noDataValue;
5192
0
}
5193
5194
template <class T>
5195
static bool HasOnlyNoDataT(const T *pBuffer, T noDataValue, size_t nWidth,
5196
                           size_t nHeight, size_t nLineStride,
5197
                           size_t nComponents)
5198
0
{
5199
    // Fast test: check the 4 corners and the middle pixel.
5200
0
    for (size_t iBand = 0; iBand < nComponents; iBand++)
5201
0
    {
5202
0
        if (!(IsEqualToNoData(pBuffer[iBand], noDataValue) &&
5203
0
              IsEqualToNoData(pBuffer[(nWidth - 1) * nComponents + iBand],
5204
0
                              noDataValue) &&
5205
0
              IsEqualToNoData(
5206
0
                  pBuffer[((nHeight - 1) / 2 * nLineStride + (nWidth - 1) / 2) *
5207
0
                              nComponents +
5208
0
                          iBand],
5209
0
                  noDataValue) &&
5210
0
              IsEqualToNoData(
5211
0
                  pBuffer[(nHeight - 1) * nLineStride * nComponents + iBand],
5212
0
                  noDataValue) &&
5213
0
              IsEqualToNoData(
5214
0
                  pBuffer[((nHeight - 1) * nLineStride + nWidth - 1) *
5215
0
                              nComponents +
5216
0
                          iBand],
5217
0
                  noDataValue)))
5218
0
        {
5219
0
            return false;
5220
0
        }
5221
0
    }
5222
5223
    // Test all pixels.
5224
0
    for (size_t iY = 0; iY < nHeight; iY++)
5225
0
    {
5226
0
        const T *pBufferLine = pBuffer + iY * nLineStride * nComponents;
5227
0
        for (size_t iX = 0; iX < nWidth * nComponents; iX++)
5228
0
        {
5229
0
            if (!IsEqualToNoData(pBufferLine[iX], noDataValue))
5230
0
            {
5231
0
                return false;
5232
0
            }
5233
0
        }
5234
0
    }
5235
0
    return true;
5236
0
}
Unexecuted instantiation: rasterio.cpp:bool HasOnlyNoDataT<unsigned char>(unsigned char const*, unsigned char, unsigned long, unsigned long, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:bool HasOnlyNoDataT<unsigned short>(unsigned short const*, unsigned short, unsigned long, unsigned long, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:bool HasOnlyNoDataT<unsigned int>(unsigned int const*, unsigned int, unsigned long, unsigned long, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:bool HasOnlyNoDataT<unsigned long>(unsigned long const*, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:bool HasOnlyNoDataT<cpl::Float16>(cpl::Float16 const*, cpl::Float16, unsigned long, unsigned long, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:bool HasOnlyNoDataT<float>(float const*, float, unsigned long, unsigned long, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:bool HasOnlyNoDataT<double>(double const*, double, unsigned long, unsigned long, unsigned long, unsigned long)
5237
5238
/************************************************************************/
5239
/*                    GDALBufferHasOnlyNoData()                         */
5240
/************************************************************************/
5241
5242
bool GDALBufferHasOnlyNoData(const void *pBuffer, double dfNoDataValue,
5243
                             size_t nWidth, size_t nHeight, size_t nLineStride,
5244
                             size_t nComponents, int nBitsPerSample,
5245
                             GDALBufferSampleFormat nSampleFormat)
5246
0
{
5247
    // In the case where the nodata is 0, we can compare several bytes at
5248
    // once. Select the largest natural integer type for the architecture.
5249
0
#if SIZEOF_VOIDP >= 8 || defined(__x86_64__)
5250
    // We test __x86_64__ for x32 arch where SIZEOF_VOIDP == 4
5251
0
    typedef std::uint64_t WordType;
5252
#else
5253
    typedef std::uint32_t WordType;
5254
#endif
5255
0
    if (dfNoDataValue == 0.0 && nWidth == nLineStride &&
5256
        // Do not use this optimized code path for floating point numbers,
5257
        // as it can't detect negative zero.
5258
0
        nSampleFormat != GSF_FLOATING_POINT)
5259
0
    {
5260
0
        const GByte *pabyBuffer = static_cast<const GByte *>(pBuffer);
5261
0
        const size_t nSize =
5262
0
            (nWidth * nHeight * nComponents * nBitsPerSample + 7) / 8;
5263
0
        size_t i = 0;
5264
0
        const size_t nInitialIters =
5265
0
            std::min(sizeof(WordType) -
5266
0
                         static_cast<size_t>(
5267
0
                             reinterpret_cast<std::uintptr_t>(pabyBuffer) %
5268
0
                             sizeof(WordType)),
5269
0
                     nSize);
5270
0
        for (; i < nInitialIters; i++)
5271
0
        {
5272
0
            if (pabyBuffer[i])
5273
0
                return false;
5274
0
        }
5275
0
        for (; i + sizeof(WordType) - 1 < nSize; i += sizeof(WordType))
5276
0
        {
5277
0
            if (*(reinterpret_cast<const WordType *>(pabyBuffer + i)))
5278
0
                return false;
5279
0
        }
5280
0
        for (; i < nSize; i++)
5281
0
        {
5282
0
            if (pabyBuffer[i])
5283
0
                return false;
5284
0
        }
5285
0
        return true;
5286
0
    }
5287
5288
0
    if (nBitsPerSample == 8 && nSampleFormat == GSF_UNSIGNED_INT)
5289
0
    {
5290
0
        return GDALIsValueInRange<uint8_t>(dfNoDataValue) &&
5291
0
               HasOnlyNoDataT(static_cast<const uint8_t *>(pBuffer),
5292
0
                              static_cast<uint8_t>(dfNoDataValue), nWidth,
5293
0
                              nHeight, nLineStride, nComponents);
5294
0
    }
5295
0
    if (nBitsPerSample == 8 && nSampleFormat == GSF_SIGNED_INT)
5296
0
    {
5297
        // Use unsigned implementation by converting the nodatavalue to
5298
        // unsigned
5299
0
        return GDALIsValueInRange<int8_t>(dfNoDataValue) &&
5300
0
               HasOnlyNoDataT(
5301
0
                   static_cast<const uint8_t *>(pBuffer),
5302
0
                   static_cast<uint8_t>(static_cast<int8_t>(dfNoDataValue)),
5303
0
                   nWidth, nHeight, nLineStride, nComponents);
5304
0
    }
5305
0
    if (nBitsPerSample == 16 && nSampleFormat == GSF_UNSIGNED_INT)
5306
0
    {
5307
0
        return GDALIsValueInRange<uint16_t>(dfNoDataValue) &&
5308
0
               HasOnlyNoDataT(static_cast<const uint16_t *>(pBuffer),
5309
0
                              static_cast<uint16_t>(dfNoDataValue), nWidth,
5310
0
                              nHeight, nLineStride, nComponents);
5311
0
    }
5312
0
    if (nBitsPerSample == 16 && nSampleFormat == GSF_SIGNED_INT)
5313
0
    {
5314
        // Use unsigned implementation by converting the nodatavalue to
5315
        // unsigned
5316
0
        return GDALIsValueInRange<int16_t>(dfNoDataValue) &&
5317
0
               HasOnlyNoDataT(
5318
0
                   static_cast<const uint16_t *>(pBuffer),
5319
0
                   static_cast<uint16_t>(static_cast<int16_t>(dfNoDataValue)),
5320
0
                   nWidth, nHeight, nLineStride, nComponents);
5321
0
    }
5322
0
    if (nBitsPerSample == 32 && nSampleFormat == GSF_UNSIGNED_INT)
5323
0
    {
5324
0
        return GDALIsValueInRange<uint32_t>(dfNoDataValue) &&
5325
0
               HasOnlyNoDataT(static_cast<const uint32_t *>(pBuffer),
5326
0
                              static_cast<uint32_t>(dfNoDataValue), nWidth,
5327
0
                              nHeight, nLineStride, nComponents);
5328
0
    }
5329
0
    if (nBitsPerSample == 32 && nSampleFormat == GSF_SIGNED_INT)
5330
0
    {
5331
        // Use unsigned implementation by converting the nodatavalue to
5332
        // unsigned
5333
0
        return GDALIsValueInRange<int32_t>(dfNoDataValue) &&
5334
0
               HasOnlyNoDataT(
5335
0
                   static_cast<const uint32_t *>(pBuffer),
5336
0
                   static_cast<uint32_t>(static_cast<int32_t>(dfNoDataValue)),
5337
0
                   nWidth, nHeight, nLineStride, nComponents);
5338
0
    }
5339
0
    if (nBitsPerSample == 64 && nSampleFormat == GSF_UNSIGNED_INT)
5340
0
    {
5341
0
        return GDALIsValueInRange<uint64_t>(dfNoDataValue) &&
5342
0
               HasOnlyNoDataT(static_cast<const uint64_t *>(pBuffer),
5343
0
                              static_cast<uint64_t>(dfNoDataValue), nWidth,
5344
0
                              nHeight, nLineStride, nComponents);
5345
0
    }
5346
0
    if (nBitsPerSample == 64 && nSampleFormat == GSF_SIGNED_INT)
5347
0
    {
5348
        // Use unsigned implementation by converting the nodatavalue to
5349
        // unsigned
5350
0
        return GDALIsValueInRange<int64_t>(dfNoDataValue) &&
5351
0
               HasOnlyNoDataT(
5352
0
                   static_cast<const uint64_t *>(pBuffer),
5353
0
                   static_cast<uint64_t>(static_cast<int64_t>(dfNoDataValue)),
5354
0
                   nWidth, nHeight, nLineStride, nComponents);
5355
0
    }
5356
0
    if (nBitsPerSample == 16 && nSampleFormat == GSF_FLOATING_POINT)
5357
0
    {
5358
0
        return (std::isnan(dfNoDataValue) ||
5359
0
                GDALIsValueInRange<GFloat16>(dfNoDataValue)) &&
5360
0
               HasOnlyNoDataT(static_cast<const GFloat16 *>(pBuffer),
5361
0
                              static_cast<GFloat16>(dfNoDataValue), nWidth,
5362
0
                              nHeight, nLineStride, nComponents);
5363
0
    }
5364
0
    if (nBitsPerSample == 32 && nSampleFormat == GSF_FLOATING_POINT)
5365
0
    {
5366
0
        return (std::isnan(dfNoDataValue) ||
5367
0
                GDALIsValueInRange<float>(dfNoDataValue)) &&
5368
0
               HasOnlyNoDataT(static_cast<const float *>(pBuffer),
5369
0
                              static_cast<float>(dfNoDataValue), nWidth,
5370
0
                              nHeight, nLineStride, nComponents);
5371
0
    }
5372
0
    if (nBitsPerSample == 64 && nSampleFormat == GSF_FLOATING_POINT)
5373
0
    {
5374
0
        return HasOnlyNoDataT(static_cast<const double *>(pBuffer),
5375
0
                              dfNoDataValue, nWidth, nHeight, nLineStride,
5376
0
                              nComponents);
5377
0
    }
5378
0
    return false;
5379
0
}
5380
5381
#ifdef HAVE_SSE2
5382
5383
/************************************************************************/
5384
/*                    GDALDeinterleave3Byte()                           */
5385
/************************************************************************/
5386
5387
#if defined(__GNUC__) && !defined(__clang__)
5388
__attribute__((optimize("no-tree-vectorize")))
5389
#endif
5390
static void
5391
GDALDeinterleave3Byte(const GByte *CPL_RESTRICT pabySrc,
5392
                      GByte *CPL_RESTRICT pabyDest0,
5393
                      GByte *CPL_RESTRICT pabyDest1,
5394
                      GByte *CPL_RESTRICT pabyDest2, size_t nIters)
5395
#ifdef USE_NEON_OPTIMIZATIONS
5396
{
5397
    return GDALDeinterleave3Byte_SSSE3(pabySrc, pabyDest0, pabyDest1, pabyDest2,
5398
                                       nIters);
5399
}
5400
#else
5401
0
{
5402
0
#ifdef HAVE_SSSE3_AT_COMPILE_TIME
5403
0
    if (CPLHaveRuntimeSSSE3())
5404
0
    {
5405
0
        return GDALDeinterleave3Byte_SSSE3(pabySrc, pabyDest0, pabyDest1,
5406
0
                                           pabyDest2, nIters);
5407
0
    }
5408
0
#endif
5409
5410
0
    size_t i = 0;
5411
0
    if (((reinterpret_cast<uintptr_t>(pabySrc) |
5412
0
          reinterpret_cast<uintptr_t>(pabyDest0) |
5413
0
          reinterpret_cast<uintptr_t>(pabyDest1) |
5414
0
          reinterpret_cast<uintptr_t>(pabyDest2)) %
5415
0
         sizeof(unsigned int)) == 0)
5416
0
    {
5417
        // Slightly better than GCC autovectorizer
5418
0
        for (size_t j = 0; i + 3 < nIters; i += 4, ++j)
5419
0
        {
5420
0
            unsigned int word0 =
5421
0
                *reinterpret_cast<const unsigned int *>(pabySrc + 3 * i);
5422
0
            unsigned int word1 =
5423
0
                *reinterpret_cast<const unsigned int *>(pabySrc + 3 * i + 4);
5424
0
            unsigned int word2 =
5425
0
                *reinterpret_cast<const unsigned int *>(pabySrc + 3 * i + 8);
5426
0
            reinterpret_cast<unsigned int *>(pabyDest0)[j] =
5427
0
                (word0 & 0xff) | ((word0 >> 24) << 8) | (word1 & 0x00ff0000) |
5428
0
                ((word2 >> 8) << 24);
5429
0
            reinterpret_cast<unsigned int *>(pabyDest1)[j] =
5430
0
                ((word0 >> 8) & 0xff) | ((word1 & 0xff) << 8) |
5431
0
                (((word1 >> 24)) << 16) | ((word2 >> 16) << 24);
5432
0
            pabyDest2[j * 4] = static_cast<GByte>(word0 >> 16);
5433
0
            pabyDest2[j * 4 + 1] = static_cast<GByte>(word1 >> 8);
5434
0
            pabyDest2[j * 4 + 2] = static_cast<GByte>(word2);
5435
0
            pabyDest2[j * 4 + 3] = static_cast<GByte>(word2 >> 24);
5436
0
        }
5437
0
    }
5438
0
#if defined(__clang__)
5439
0
#pragma clang loop vectorize(disable)
5440
0
#endif
5441
0
    for (; i < nIters; ++i)
5442
0
    {
5443
0
        pabyDest0[i] = pabySrc[3 * i + 0];
5444
0
        pabyDest1[i] = pabySrc[3 * i + 1];
5445
0
        pabyDest2[i] = pabySrc[3 * i + 2];
5446
0
    }
5447
0
}
5448
#endif
5449
5450
/************************************************************************/
5451
/*                    GDALDeinterleave4Byte()                           */
5452
/************************************************************************/
5453
5454
#if !defined(__GNUC__) || defined(__clang__)
5455
5456
/************************************************************************/
5457
/*                         deinterleave()                               */
5458
/************************************************************************/
5459
5460
template <bool SHIFT, bool MASK>
5461
inline __m128i deinterleave(__m128i &xmm0_ori, __m128i &xmm1_ori,
5462
                            __m128i &xmm2_ori, __m128i &xmm3_ori)
5463
0
{
5464
    // Set higher 24bit of each int32 packed word to 0
5465
0
    if (SHIFT)
5466
0
    {
5467
0
        xmm0_ori = _mm_srli_epi32(xmm0_ori, 8);
5468
0
        xmm1_ori = _mm_srli_epi32(xmm1_ori, 8);
5469
0
        xmm2_ori = _mm_srli_epi32(xmm2_ori, 8);
5470
0
        xmm3_ori = _mm_srli_epi32(xmm3_ori, 8);
5471
0
    }
5472
0
    __m128i xmm0;
5473
0
    __m128i xmm1;
5474
0
    __m128i xmm2;
5475
0
    __m128i xmm3;
5476
0
    if (MASK)
5477
0
    {
5478
0
        const __m128i xmm_mask = _mm_set1_epi32(0xff);
5479
0
        xmm0 = _mm_and_si128(xmm0_ori, xmm_mask);
5480
0
        xmm1 = _mm_and_si128(xmm1_ori, xmm_mask);
5481
0
        xmm2 = _mm_and_si128(xmm2_ori, xmm_mask);
5482
0
        xmm3 = _mm_and_si128(xmm3_ori, xmm_mask);
5483
0
    }
5484
0
    else
5485
0
    {
5486
0
        xmm0 = xmm0_ori;
5487
0
        xmm1 = xmm1_ori;
5488
0
        xmm2 = xmm2_ori;
5489
0
        xmm3 = xmm3_ori;
5490
0
    }
5491
    // Pack int32 to int16
5492
0
    xmm0 = _mm_packs_epi32(xmm0, xmm1);
5493
0
    xmm2 = _mm_packs_epi32(xmm2, xmm3);
5494
    // Pack int16 to uint8
5495
0
    xmm0 = _mm_packus_epi16(xmm0, xmm2);
5496
0
    return xmm0;
5497
0
}
Unexecuted instantiation: long long __vector(2) deinterleave<false, true>(long long __vector(2)&, long long __vector(2)&, long long __vector(2)&, long long __vector(2)&)
Unexecuted instantiation: long long __vector(2) deinterleave<true, true>(long long __vector(2)&, long long __vector(2)&, long long __vector(2)&, long long __vector(2)&)
Unexecuted instantiation: long long __vector(2) deinterleave<true, false>(long long __vector(2)&, long long __vector(2)&, long long __vector(2)&, long long __vector(2)&)
5498
5499
static void GDALDeinterleave4Byte(const GByte *CPL_RESTRICT pabySrc,
5500
                                  GByte *CPL_RESTRICT pabyDest0,
5501
                                  GByte *CPL_RESTRICT pabyDest1,
5502
                                  GByte *CPL_RESTRICT pabyDest2,
5503
                                  GByte *CPL_RESTRICT pabyDest3, size_t nIters)
5504
#ifdef USE_NEON_OPTIMIZATIONS
5505
{
5506
    return GDALDeinterleave4Byte_SSSE3(pabySrc, pabyDest0, pabyDest1, pabyDest2,
5507
                                       pabyDest3, nIters);
5508
}
5509
#else
5510
0
{
5511
0
#ifdef HAVE_SSSE3_AT_COMPILE_TIME
5512
0
    if (CPLHaveRuntimeSSSE3())
5513
0
    {
5514
0
        return GDALDeinterleave4Byte_SSSE3(pabySrc, pabyDest0, pabyDest1,
5515
0
                                           pabyDest2, pabyDest3, nIters);
5516
0
    }
5517
0
#endif
5518
5519
    // Not the optimal SSE2-only code, as gcc auto-vectorizer manages to
5520
    // do something slightly better.
5521
0
    size_t i = 0;
5522
0
    for (; i + 15 < nIters; i += 16)
5523
0
    {
5524
0
        __m128i xmm0_ori = _mm_loadu_si128(
5525
0
            reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 0));
5526
0
        __m128i xmm1_ori = _mm_loadu_si128(
5527
0
            reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 16));
5528
0
        __m128i xmm2_ori = _mm_loadu_si128(
5529
0
            reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 32));
5530
0
        __m128i xmm3_ori = _mm_loadu_si128(
5531
0
            reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 48));
5532
5533
0
        _mm_storeu_si128(
5534
0
            reinterpret_cast<__m128i *>(pabyDest0 + i),
5535
0
            deinterleave<false, true>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori));
5536
0
        _mm_storeu_si128(
5537
0
            reinterpret_cast<__m128i *>(pabyDest1 + i),
5538
0
            deinterleave<true, true>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori));
5539
0
        _mm_storeu_si128(
5540
0
            reinterpret_cast<__m128i *>(pabyDest2 + i),
5541
0
            deinterleave<true, true>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori));
5542
0
        _mm_storeu_si128(
5543
0
            reinterpret_cast<__m128i *>(pabyDest3 + i),
5544
0
            deinterleave<true, false>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori));
5545
0
    }
5546
5547
0
#if defined(__clang__)
5548
0
#pragma clang loop vectorize(disable)
5549
0
#endif
5550
0
    for (; i < nIters; ++i)
5551
0
    {
5552
0
        pabyDest0[i] = pabySrc[4 * i + 0];
5553
0
        pabyDest1[i] = pabySrc[4 * i + 1];
5554
0
        pabyDest2[i] = pabySrc[4 * i + 2];
5555
0
        pabyDest3[i] = pabySrc[4 * i + 3];
5556
0
    }
5557
0
}
5558
#endif
5559
#else
5560
// GCC autovectorizer does an excellent job
5561
__attribute__((optimize("tree-vectorize"))) static void GDALDeinterleave4Byte(
5562
    const GByte *CPL_RESTRICT pabySrc, GByte *CPL_RESTRICT pabyDest0,
5563
    GByte *CPL_RESTRICT pabyDest1, GByte *CPL_RESTRICT pabyDest2,
5564
    GByte *CPL_RESTRICT pabyDest3, size_t nIters)
5565
{
5566
    for (size_t i = 0; i < nIters; ++i)
5567
    {
5568
        pabyDest0[i] = pabySrc[4 * i + 0];
5569
        pabyDest1[i] = pabySrc[4 * i + 1];
5570
        pabyDest2[i] = pabySrc[4 * i + 2];
5571
        pabyDest3[i] = pabySrc[4 * i + 3];
5572
    }
5573
}
5574
#endif
5575
5576
#else
5577
5578
/************************************************************************/
5579
/*                    GDALDeinterleave3Byte()                           */
5580
/************************************************************************/
5581
5582
// TODO: Enabling below could help on non-Intel architectures where GCC knows
5583
// how to auto-vectorize
5584
// #if defined(__GNUC__)
5585
//__attribute__((optimize("tree-vectorize")))
5586
// #endif
5587
static void GDALDeinterleave3Byte(const GByte *CPL_RESTRICT pabySrc,
5588
                                  GByte *CPL_RESTRICT pabyDest0,
5589
                                  GByte *CPL_RESTRICT pabyDest1,
5590
                                  GByte *CPL_RESTRICT pabyDest2, size_t nIters)
5591
{
5592
    for (size_t i = 0; i < nIters; ++i)
5593
    {
5594
        pabyDest0[i] = pabySrc[3 * i + 0];
5595
        pabyDest1[i] = pabySrc[3 * i + 1];
5596
        pabyDest2[i] = pabySrc[3 * i + 2];
5597
    }
5598
}
5599
5600
/************************************************************************/
5601
/*                    GDALDeinterleave4Byte()                           */
5602
/************************************************************************/
5603
5604
// TODO: Enabling below could help on non-Intel architectures where gcc knows
5605
// how to auto-vectorize
5606
// #if defined(__GNUC__)
5607
//__attribute__((optimize("tree-vectorize")))
5608
// #endif
5609
static void GDALDeinterleave4Byte(const GByte *CPL_RESTRICT pabySrc,
5610
                                  GByte *CPL_RESTRICT pabyDest0,
5611
                                  GByte *CPL_RESTRICT pabyDest1,
5612
                                  GByte *CPL_RESTRICT pabyDest2,
5613
                                  GByte *CPL_RESTRICT pabyDest3, size_t nIters)
5614
{
5615
    for (size_t i = 0; i < nIters; ++i)
5616
    {
5617
        pabyDest0[i] = pabySrc[4 * i + 0];
5618
        pabyDest1[i] = pabySrc[4 * i + 1];
5619
        pabyDest2[i] = pabySrc[4 * i + 2];
5620
        pabyDest3[i] = pabySrc[4 * i + 3];
5621
    }
5622
}
5623
5624
#endif
5625
5626
/************************************************************************/
5627
/*                      GDALDeinterleave()                              */
5628
/************************************************************************/
5629
5630
/*! Copy values from a pixel-interleave buffer to multiple per-component
5631
    buffers.
5632
5633
    In pseudo-code
5634
    \verbatim
5635
    for(size_t i = 0; i < nIters; ++i)
5636
        for(int iComp = 0; iComp < nComponents; iComp++ )
5637
            ppDestBuffer[iComp][i] = pSourceBuffer[nComponents * i + iComp]
5638
    \endverbatim
5639
5640
    The implementation is optimized for a few cases, like de-interleaving
5641
    of 3 or 4-components Byte buffers.
5642
5643
    \since GDAL 3.6
5644
 */
5645
void GDALDeinterleave(const void *pSourceBuffer, GDALDataType eSourceDT,
5646
                      int nComponents, void **ppDestBuffer,
5647
                      GDALDataType eDestDT, size_t nIters)
5648
0
{
5649
0
    if (eSourceDT == eDestDT)
5650
0
    {
5651
0
        if (eSourceDT == GDT_Byte || eSourceDT == GDT_Int8)
5652
0
        {
5653
0
            if (nComponents == 3)
5654
0
            {
5655
0
                const GByte *CPL_RESTRICT pabySrc =
5656
0
                    static_cast<const GByte *>(pSourceBuffer);
5657
0
                GByte *CPL_RESTRICT pabyDest0 =
5658
0
                    static_cast<GByte *>(ppDestBuffer[0]);
5659
0
                GByte *CPL_RESTRICT pabyDest1 =
5660
0
                    static_cast<GByte *>(ppDestBuffer[1]);
5661
0
                GByte *CPL_RESTRICT pabyDest2 =
5662
0
                    static_cast<GByte *>(ppDestBuffer[2]);
5663
0
                GDALDeinterleave3Byte(pabySrc, pabyDest0, pabyDest1, pabyDest2,
5664
0
                                      nIters);
5665
0
                return;
5666
0
            }
5667
0
            else if (nComponents == 4)
5668
0
            {
5669
0
                const GByte *CPL_RESTRICT pabySrc =
5670
0
                    static_cast<const GByte *>(pSourceBuffer);
5671
0
                GByte *CPL_RESTRICT pabyDest0 =
5672
0
                    static_cast<GByte *>(ppDestBuffer[0]);
5673
0
                GByte *CPL_RESTRICT pabyDest1 =
5674
0
                    static_cast<GByte *>(ppDestBuffer[1]);
5675
0
                GByte *CPL_RESTRICT pabyDest2 =
5676
0
                    static_cast<GByte *>(ppDestBuffer[2]);
5677
0
                GByte *CPL_RESTRICT pabyDest3 =
5678
0
                    static_cast<GByte *>(ppDestBuffer[3]);
5679
0
                GDALDeinterleave4Byte(pabySrc, pabyDest0, pabyDest1, pabyDest2,
5680
0
                                      pabyDest3, nIters);
5681
0
                return;
5682
0
            }
5683
0
        }
5684
#if ((defined(__GNUC__) && !defined(__clang__)) ||                             \
5685
     defined(__INTEL_CLANG_COMPILER)) &&                                       \
5686
    defined(HAVE_SSE2) && defined(HAVE_SSSE3_AT_COMPILE_TIME)
5687
        else if ((eSourceDT == GDT_Int16 || eSourceDT == GDT_UInt16) &&
5688
                 CPLHaveRuntimeSSSE3())
5689
        {
5690
            if (nComponents == 3)
5691
            {
5692
                const GUInt16 *CPL_RESTRICT panSrc =
5693
                    static_cast<const GUInt16 *>(pSourceBuffer);
5694
                GUInt16 *CPL_RESTRICT panDest0 =
5695
                    static_cast<GUInt16 *>(ppDestBuffer[0]);
5696
                GUInt16 *CPL_RESTRICT panDest1 =
5697
                    static_cast<GUInt16 *>(ppDestBuffer[1]);
5698
                GUInt16 *CPL_RESTRICT panDest2 =
5699
                    static_cast<GUInt16 *>(ppDestBuffer[2]);
5700
                GDALDeinterleave3UInt16_SSSE3(panSrc, panDest0, panDest1,
5701
                                              panDest2, nIters);
5702
                return;
5703
            }
5704
#if !defined(__INTEL_CLANG_COMPILER)
5705
            // ICC autovectorizer doesn't do a good job, at least with icx
5706
            // 2022.1.0.20220316
5707
            else if (nComponents == 4)
5708
            {
5709
                const GUInt16 *CPL_RESTRICT panSrc =
5710
                    static_cast<const GUInt16 *>(pSourceBuffer);
5711
                GUInt16 *CPL_RESTRICT panDest0 =
5712
                    static_cast<GUInt16 *>(ppDestBuffer[0]);
5713
                GUInt16 *CPL_RESTRICT panDest1 =
5714
                    static_cast<GUInt16 *>(ppDestBuffer[1]);
5715
                GUInt16 *CPL_RESTRICT panDest2 =
5716
                    static_cast<GUInt16 *>(ppDestBuffer[2]);
5717
                GUInt16 *CPL_RESTRICT panDest3 =
5718
                    static_cast<GUInt16 *>(ppDestBuffer[3]);
5719
                GDALDeinterleave4UInt16_SSSE3(panSrc, panDest0, panDest1,
5720
                                              panDest2, panDest3, nIters);
5721
                return;
5722
            }
5723
#endif
5724
        }
5725
#endif
5726
0
    }
5727
5728
0
    const int nSourceDTSize = GDALGetDataTypeSizeBytes(eSourceDT);
5729
0
    const int nDestDTSize = GDALGetDataTypeSizeBytes(eDestDT);
5730
0
    for (int iComp = 0; iComp < nComponents; iComp++)
5731
0
    {
5732
0
        GDALCopyWords64(static_cast<const GByte *>(pSourceBuffer) +
5733
0
                            iComp * nSourceDTSize,
5734
0
                        eSourceDT, nComponents * nSourceDTSize,
5735
0
                        ppDestBuffer[iComp], eDestDT, nDestDTSize, nIters);
5736
0
    }
5737
0
}
5738
5739
/************************************************************************/
5740
/*                    GDALTranspose2DSingleToSingle()                   */
5741
/************************************************************************/
5742
/**
5743
 * Transpose a 2D array of non-complex values, in a efficient (cache-oblivious) way.
5744
 *
5745
 * @param pSrc Source array of height = nSrcHeight and width = nSrcWidth.
5746
 * @param pDst Destination transposed array of height = nSrcWidth and width = nSrcHeight.
5747
 * @param nSrcWidth Width of pSrc array.
5748
 * @param nSrcHeight Height of pSrc array.
5749
 */
5750
5751
template <class DST, class SRC>
5752
void GDALTranspose2DSingleToSingle(const SRC *CPL_RESTRICT pSrc,
5753
                                   DST *CPL_RESTRICT pDst, size_t nSrcWidth,
5754
                                   size_t nSrcHeight)
5755
0
{
5756
0
    constexpr size_t blocksize = 32;
5757
0
    for (size_t i = 0; i < nSrcHeight; i += blocksize)
5758
0
    {
5759
0
        const size_t max_k = std::min(i + blocksize, nSrcHeight);
5760
0
        for (size_t j = 0; j < nSrcWidth; j += blocksize)
5761
0
        {
5762
            // transpose the block beginning at [i,j]
5763
0
            const size_t max_l = std::min(j + blocksize, nSrcWidth);
5764
0
            for (size_t k = i; k < max_k; ++k)
5765
0
            {
5766
0
                for (size_t l = j; l < max_l; ++l)
5767
0
                {
5768
0
                    GDALCopyWord(pSrc[l + k * nSrcWidth],
5769
0
                                 pDst[k + l * nSrcHeight]);
5770
0
                }
5771
0
            }
5772
0
        }
5773
0
    }
5774
0
}
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, unsigned char>(unsigned char const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, signed char>(signed char const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, unsigned short>(unsigned short const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, short>(short const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, unsigned int>(unsigned int const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, int>(int const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, unsigned long>(unsigned long const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, long>(long const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, cpl::Float16>(cpl::Float16 const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, float>(float const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, double>(double const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, unsigned char>(unsigned char const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, signed char>(signed char const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, unsigned short>(unsigned short const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, short>(short const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, unsigned int>(unsigned int const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, int>(int const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, unsigned long>(unsigned long const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, long>(long const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, cpl::Float16>(cpl::Float16 const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, float>(float const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, double>(double const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, unsigned char>(unsigned char const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, signed char>(signed char const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, unsigned short>(unsigned short const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, short>(short const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, unsigned int>(unsigned int const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, int>(int const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, unsigned long>(unsigned long const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, long>(long const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, cpl::Float16>(cpl::Float16 const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, float>(float const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, double>(double const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, unsigned char>(unsigned char const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, signed char>(signed char const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, unsigned short>(unsigned short const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, short>(short const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, unsigned int>(unsigned int const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, int>(int const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, unsigned long>(unsigned long const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, long>(long const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, cpl::Float16>(cpl::Float16 const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, float>(float const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, double>(double const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, unsigned char>(unsigned char const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, signed char>(signed char const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, unsigned short>(unsigned short const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, short>(short const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, unsigned int>(unsigned int const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, int>(int const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, unsigned long>(unsigned long const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, long>(long const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, cpl::Float16>(cpl::Float16 const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, float>(float const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, double>(double const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, unsigned char>(unsigned char const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, signed char>(signed char const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, unsigned short>(unsigned short const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, short>(short const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, unsigned int>(unsigned int const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, int>(int const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, unsigned long>(unsigned long const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, long>(long const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, cpl::Float16>(cpl::Float16 const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, float>(float const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, double>(double const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, unsigned char>(unsigned char const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, signed char>(signed char const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, unsigned short>(unsigned short const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, short>(short const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, unsigned int>(unsigned int const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, int>(int const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, unsigned long>(unsigned long const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, long>(long const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, cpl::Float16>(cpl::Float16 const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, float>(float const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, double>(double const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, unsigned char>(unsigned char const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, signed char>(signed char const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, unsigned short>(unsigned short const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, short>(short const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, unsigned int>(unsigned int const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, int>(int const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, unsigned long>(unsigned long const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, long>(long const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, cpl::Float16>(cpl::Float16 const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, float>(float const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, double>(double const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, unsigned char>(unsigned char const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, signed char>(signed char const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, unsigned short>(unsigned short const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, short>(short const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, unsigned int>(unsigned int const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, int>(int const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, unsigned long>(unsigned long const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, long>(long const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, cpl::Float16>(cpl::Float16 const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, float>(float const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, double>(double const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, unsigned char>(unsigned char const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, signed char>(signed char const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, unsigned short>(unsigned short const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, short>(short const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, unsigned int>(unsigned int const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, int>(int const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, unsigned long>(unsigned long const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, long>(long const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, cpl::Float16>(cpl::Float16 const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, float>(float const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, double>(double const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, unsigned char>(unsigned char const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, signed char>(signed char const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, unsigned short>(unsigned short const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, short>(short const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, unsigned int>(unsigned int const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, int>(int const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, unsigned long>(unsigned long const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, long>(long const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, cpl::Float16>(cpl::Float16 const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, float>(float const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, double>(double const*, double*, unsigned long, unsigned long)
5775
5776
/************************************************************************/
5777
/*                   GDALTranspose2DComplexToComplex()                  */
5778
/************************************************************************/
5779
/**
5780
 * Transpose a 2D array of complex values into an array of complex values,
5781
 * in a efficient (cache-oblivious) way.
5782
 *
5783
 * @param pSrc Source array of height = nSrcHeight and width = nSrcWidth.
5784
 * @param pDst Destination transposed array of height = nSrcWidth and width = nSrcHeight.
5785
 * @param nSrcWidth Width of pSrc array.
5786
 * @param nSrcHeight Height of pSrc array.
5787
 */
5788
template <class DST, class SRC>
5789
void GDALTranspose2DComplexToComplex(const SRC *CPL_RESTRICT pSrc,
5790
                                     DST *CPL_RESTRICT pDst, size_t nSrcWidth,
5791
                                     size_t nSrcHeight)
5792
0
{
5793
0
    constexpr size_t blocksize = 32;
5794
0
    for (size_t i = 0; i < nSrcHeight; i += blocksize)
5795
0
    {
5796
0
        const size_t max_k = std::min(i + blocksize, nSrcHeight);
5797
0
        for (size_t j = 0; j < nSrcWidth; j += blocksize)
5798
0
        {
5799
            // transpose the block beginning at [i,j]
5800
0
            const size_t max_l = std::min(j + blocksize, nSrcWidth);
5801
0
            for (size_t k = i; k < max_k; ++k)
5802
0
            {
5803
0
                for (size_t l = j; l < max_l; ++l)
5804
0
                {
5805
0
                    GDALCopyWord(pSrc[2 * (l + k * nSrcWidth) + 0],
5806
0
                                 pDst[2 * (k + l * nSrcHeight) + 0]);
5807
0
                    GDALCopyWord(pSrc[2 * (l + k * nSrcWidth) + 1],
5808
0
                                 pDst[2 * (k + l * nSrcHeight) + 1]);
5809
0
                }
5810
0
            }
5811
0
        }
5812
0
    }
5813
0
}
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<short, short>(short const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<short, int>(int const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<short, cpl::Float16>(cpl::Float16 const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<short, float>(float const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<short, double>(double const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<int, short>(short const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<int, int>(int const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<int, cpl::Float16>(cpl::Float16 const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<int, float>(float const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<int, double>(double const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<cpl::Float16, short>(short const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<cpl::Float16, int>(int const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<cpl::Float16, cpl::Float16>(cpl::Float16 const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<cpl::Float16, float>(float const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<cpl::Float16, double>(double const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<float, short>(short const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<float, int>(int const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<float, cpl::Float16>(cpl::Float16 const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<float, float>(float const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<float, double>(double const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<double, short>(short const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<double, int>(int const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<double, cpl::Float16>(cpl::Float16 const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<double, float>(float const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToComplex<double, double>(double const*, double*, unsigned long, unsigned long)
5814
5815
/************************************************************************/
5816
/*                   GDALTranspose2DComplexToSingle()                  */
5817
/************************************************************************/
5818
/**
5819
 * Transpose a 2D array of complex values into an array of non-complex values,
5820
 * in a efficient (cache-oblivious) way.
5821
 *
5822
 * @param pSrc Source array of height = nSrcHeight and width = nSrcWidth.
5823
 * @param pDst Destination transposed array of height = nSrcWidth and width = nSrcHeight.
5824
 * @param nSrcWidth Width of pSrc array.
5825
 * @param nSrcHeight Height of pSrc array.
5826
 */
5827
template <class DST, class SRC>
5828
void GDALTranspose2DComplexToSingle(const SRC *CPL_RESTRICT pSrc,
5829
                                    DST *CPL_RESTRICT pDst, size_t nSrcWidth,
5830
                                    size_t nSrcHeight)
5831
0
{
5832
0
    constexpr size_t blocksize = 32;
5833
0
    for (size_t i = 0; i < nSrcHeight; i += blocksize)
5834
0
    {
5835
0
        const size_t max_k = std::min(i + blocksize, nSrcHeight);
5836
0
        for (size_t j = 0; j < nSrcWidth; j += blocksize)
5837
0
        {
5838
            // transpose the block beginning at [i,j]
5839
0
            const size_t max_l = std::min(j + blocksize, nSrcWidth);
5840
0
            for (size_t k = i; k < max_k; ++k)
5841
0
            {
5842
0
                for (size_t l = j; l < max_l; ++l)
5843
0
                {
5844
0
                    GDALCopyWord(pSrc[2 * (l + k * nSrcWidth) + 0],
5845
0
                                 pDst[k + l * nSrcHeight]);
5846
0
                }
5847
0
            }
5848
0
        }
5849
0
    }
5850
0
}
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned char, short>(short const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned char, int>(int const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned char, cpl::Float16>(cpl::Float16 const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned char, float>(float const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned char, double>(double const*, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<signed char, short>(short const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<signed char, int>(int const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<signed char, cpl::Float16>(cpl::Float16 const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<signed char, float>(float const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<signed char, double>(double const*, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned short, short>(short const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned short, int>(int const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned short, cpl::Float16>(cpl::Float16 const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned short, float>(float const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned short, double>(double const*, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<short, short>(short const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<short, int>(int const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<short, cpl::Float16>(cpl::Float16 const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<short, float>(float const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<short, double>(double const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned int, short>(short const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned int, int>(int const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned int, cpl::Float16>(cpl::Float16 const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned int, float>(float const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned int, double>(double const*, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<int, short>(short const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<int, int>(int const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<int, cpl::Float16>(cpl::Float16 const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<int, float>(float const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<int, double>(double const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned long, short>(short const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned long, int>(int const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned long, cpl::Float16>(cpl::Float16 const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned long, float>(float const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned long, double>(double const*, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<long, short>(short const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<long, int>(int const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<long, cpl::Float16>(cpl::Float16 const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<long, float>(float const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<long, double>(double const*, long*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<cpl::Float16, short>(short const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<cpl::Float16, int>(int const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<cpl::Float16, cpl::Float16>(cpl::Float16 const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<cpl::Float16, float>(float const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<cpl::Float16, double>(double const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<float, short>(short const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<float, int>(int const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<float, cpl::Float16>(cpl::Float16 const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<float, float>(float const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<float, double>(double const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<double, short>(short const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<double, int>(int const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<double, cpl::Float16>(cpl::Float16 const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<double, float>(float const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DComplexToSingle<double, double>(double const*, double*, unsigned long, unsigned long)
5851
5852
/************************************************************************/
5853
/*                   GDALTranspose2DSingleToComplex()                  */
5854
/************************************************************************/
5855
/**
5856
 * Transpose a 2D array of non-complex values into an array of complex values,
5857
 * in a efficient (cache-oblivious) way.
5858
 *
5859
 * @param pSrc Source array of height = nSrcHeight and width = nSrcWidth.
5860
 * @param pDst Destination transposed array of height = nSrcWidth and width = nSrcHeight.
5861
 * @param nSrcWidth Width of pSrc array.
5862
 * @param nSrcHeight Height of pSrc array.
5863
 */
5864
template <class DST, class SRC>
5865
void GDALTranspose2DSingleToComplex(const SRC *CPL_RESTRICT pSrc,
5866
                                    DST *CPL_RESTRICT pDst, size_t nSrcWidth,
5867
                                    size_t nSrcHeight)
5868
0
{
5869
0
    constexpr size_t blocksize = 32;
5870
0
    for (size_t i = 0; i < nSrcHeight; i += blocksize)
5871
0
    {
5872
0
        const size_t max_k = std::min(i + blocksize, nSrcHeight);
5873
0
        for (size_t j = 0; j < nSrcWidth; j += blocksize)
5874
0
        {
5875
            // transpose the block beginning at [i,j]
5876
0
            const size_t max_l = std::min(j + blocksize, nSrcWidth);
5877
0
            for (size_t k = i; k < max_k; ++k)
5878
0
            {
5879
0
                for (size_t l = j; l < max_l; ++l)
5880
0
                {
5881
0
                    GDALCopyWord(pSrc[l + k * nSrcWidth],
5882
0
                                 pDst[2 * (k + l * nSrcHeight) + 0]);
5883
0
                    pDst[2 * (k + l * nSrcHeight) + 1] = 0;
5884
0
                }
5885
0
            }
5886
0
        }
5887
0
    }
5888
0
}
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, unsigned char>(unsigned char const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, signed char>(signed char const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, unsigned short>(unsigned short const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, short>(short const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, unsigned int>(unsigned int const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, int>(int const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, unsigned long>(unsigned long const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, long>(long const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, cpl::Float16>(cpl::Float16 const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, float>(float const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, double>(double const*, short*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, unsigned char>(unsigned char const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, signed char>(signed char const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, unsigned short>(unsigned short const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, short>(short const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, unsigned int>(unsigned int const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, int>(int const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, unsigned long>(unsigned long const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, long>(long const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, cpl::Float16>(cpl::Float16 const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, float>(float const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, double>(double const*, int*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, unsigned char>(unsigned char const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, signed char>(signed char const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, unsigned short>(unsigned short const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, short>(short const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, unsigned int>(unsigned int const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, int>(int const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, unsigned long>(unsigned long const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, long>(long const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, cpl::Float16>(cpl::Float16 const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, float>(float const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, double>(double const*, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, unsigned char>(unsigned char const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, signed char>(signed char const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, unsigned short>(unsigned short const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, short>(short const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, unsigned int>(unsigned int const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, int>(int const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, unsigned long>(unsigned long const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, long>(long const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, cpl::Float16>(cpl::Float16 const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, float>(float const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, double>(double const*, float*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, unsigned char>(unsigned char const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, signed char>(signed char const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, unsigned short>(unsigned short const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, short>(short const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, unsigned int>(unsigned int const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, int>(int const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, unsigned long>(unsigned long const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, long>(long const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, cpl::Float16>(cpl::Float16 const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, float>(float const*, double*, unsigned long, unsigned long)
Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, double>(double const*, double*, unsigned long, unsigned long)
5889
5890
/************************************************************************/
5891
/*                        GDALTranspose2D()                             */
5892
/************************************************************************/
5893
5894
template <class DST, bool DST_IS_COMPLEX>
5895
static void GDALTranspose2D(const void *pSrc, GDALDataType eSrcType, DST *pDst,
5896
                            size_t nSrcWidth, size_t nSrcHeight)
5897
0
{
5898
0
#define CALL_GDALTranspose2D_internal(SRC_TYPE)                                \
5899
0
    do                                                                         \
5900
0
    {                                                                          \
5901
0
        if constexpr (DST_IS_COMPLEX)                                          \
5902
0
        {                                                                      \
5903
0
            GDALTranspose2DSingleToComplex(                                    \
5904
0
                static_cast<const SRC_TYPE *>(pSrc), pDst, nSrcWidth,          \
5905
0
                nSrcHeight);                                                   \
5906
0
        }                                                                      \
5907
0
        else                                                                   \
5908
0
        {                                                                      \
5909
0
            GDALTranspose2DSingleToSingle(static_cast<const SRC_TYPE *>(pSrc), \
5910
0
                                          pDst, nSrcWidth, nSrcHeight);        \
5911
0
        }                                                                      \
5912
0
    } while (0)
5913
5914
0
#define CALL_GDALTranspose2DComplex_internal(SRC_TYPE)                         \
5915
0
    do                                                                         \
5916
0
    {                                                                          \
5917
0
        if constexpr (DST_IS_COMPLEX)                                          \
5918
0
        {                                                                      \
5919
0
            GDALTranspose2DComplexToComplex(                                   \
5920
0
                static_cast<const SRC_TYPE *>(pSrc), pDst, nSrcWidth,          \
5921
0
                nSrcHeight);                                                   \
5922
0
        }                                                                      \
5923
0
        else                                                                   \
5924
0
        {                                                                      \
5925
0
            GDALTranspose2DComplexToSingle(                                    \
5926
0
                static_cast<const SRC_TYPE *>(pSrc), pDst, nSrcWidth,          \
5927
0
                nSrcHeight);                                                   \
5928
0
        }                                                                      \
5929
0
    } while (0)
5930
5931
    // clang-format off
5932
0
    switch (eSrcType)
5933
0
    {
5934
0
        case GDT_Byte:     CALL_GDALTranspose2D_internal(uint8_t); break;
5935
0
        case GDT_Int8:     CALL_GDALTranspose2D_internal(int8_t); break;
5936
0
        case GDT_UInt16:   CALL_GDALTranspose2D_internal(uint16_t); break;
5937
0
        case GDT_Int16:    CALL_GDALTranspose2D_internal(int16_t); break;
5938
0
        case GDT_UInt32:   CALL_GDALTranspose2D_internal(uint32_t); break;
5939
0
        case GDT_Int32:    CALL_GDALTranspose2D_internal(int32_t); break;
5940
0
        case GDT_UInt64:   CALL_GDALTranspose2D_internal(uint64_t); break;
5941
0
        case GDT_Int64:    CALL_GDALTranspose2D_internal(int64_t); break;
5942
0
        case GDT_Float16:  CALL_GDALTranspose2D_internal(GFloat16); break;
5943
0
        case GDT_Float32:  CALL_GDALTranspose2D_internal(float); break;
5944
0
        case GDT_Float64:  CALL_GDALTranspose2D_internal(double); break;
5945
0
        case GDT_CInt16:   CALL_GDALTranspose2DComplex_internal(int16_t); break;
5946
0
        case GDT_CInt32:   CALL_GDALTranspose2DComplex_internal(int32_t); break;
5947
0
        case GDT_CFloat16: CALL_GDALTranspose2DComplex_internal(GFloat16); break;
5948
0
        case GDT_CFloat32: CALL_GDALTranspose2DComplex_internal(float); break;
5949
0
        case GDT_CFloat64: CALL_GDALTranspose2DComplex_internal(double); break;
5950
0
        case GDT_Unknown:
5951
0
        case GDT_TypeCount:
5952
0
            break;
5953
0
    }
5954
        // clang-format on
5955
5956
0
#undef CALL_GDALTranspose2D_internal
5957
0
#undef CALL_GDALTranspose2DComplex_internal
5958
0
}
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<unsigned char, false>(void const*, GDALDataType, unsigned char*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<signed char, false>(void const*, GDALDataType, signed char*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<unsigned short, false>(void const*, GDALDataType, unsigned short*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<short, false>(void const*, GDALDataType, short*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<unsigned int, false>(void const*, GDALDataType, unsigned int*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<int, false>(void const*, GDALDataType, int*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<unsigned long, false>(void const*, GDALDataType, unsigned long*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<long, false>(void const*, GDALDataType, long*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<cpl::Float16, false>(void const*, GDALDataType, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<float, false>(void const*, GDALDataType, float*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<double, false>(void const*, GDALDataType, double*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<short, true>(void const*, GDALDataType, short*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<int, true>(void const*, GDALDataType, int*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<cpl::Float16, true>(void const*, GDALDataType, cpl::Float16*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<float, true>(void const*, GDALDataType, float*, unsigned long, unsigned long)
Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<double, true>(void const*, GDALDataType, double*, unsigned long, unsigned long)
5959
5960
/************************************************************************/
5961
/*                      GDALInterleave2Byte()                           */
5962
/************************************************************************/
5963
5964
#if defined(HAVE_SSE2) &&                                                      \
5965
    (!defined(__GNUC__) || defined(__INTEL_CLANG_COMPILER))
5966
5967
// ICC autovectorizer doesn't do a good job at generating good SSE code,
5968
// at least with icx 2024.0.2.20231213, but it nicely unrolls the below loop.
5969
#if defined(__GNUC__)
5970
__attribute__((noinline))
5971
#endif
5972
static void
5973
GDALInterleave2Byte(const uint8_t *CPL_RESTRICT pSrc,
5974
                    uint8_t *CPL_RESTRICT pDst, size_t nIters)
5975
{
5976
    size_t i = 0;
5977
    constexpr size_t VALS_PER_ITER = 16;
5978
    for (i = 0; i + VALS_PER_ITER <= nIters; i += VALS_PER_ITER)
5979
    {
5980
        __m128i xmm0 =
5981
            _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + i));
5982
        __m128i xmm1 = _mm_loadu_si128(
5983
            reinterpret_cast<__m128i const *>(pSrc + i + nIters));
5984
        _mm_storeu_si128(reinterpret_cast<__m128i *>(pDst + 2 * i),
5985
                         _mm_unpacklo_epi8(xmm0, xmm1));
5986
        _mm_storeu_si128(
5987
            reinterpret_cast<__m128i *>(pDst + 2 * i + VALS_PER_ITER),
5988
            _mm_unpackhi_epi8(xmm0, xmm1));
5989
    }
5990
#if defined(__clang__)
5991
#pragma clang loop vectorize(disable)
5992
#endif
5993
    for (; i < nIters; ++i)
5994
    {
5995
        pDst[2 * i + 0] = pSrc[i + 0 * nIters];
5996
        pDst[2 * i + 1] = pSrc[i + 1 * nIters];
5997
    }
5998
}
5999
6000
#else
6001
6002
#if defined(__GNUC__) && !defined(__clang__)
6003
__attribute__((optimize("tree-vectorize")))
6004
#endif
6005
#if defined(__GNUC__)
6006
__attribute__((noinline))
6007
#endif
6008
#if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER)
6009
// clang++ -O2 -fsanitize=undefined fails to vectorize, ignore that warning
6010
#pragma clang diagnostic push
6011
#pragma clang diagnostic ignored "-Wpass-failed"
6012
#endif
6013
static void
6014
GDALInterleave2Byte(const uint8_t *CPL_RESTRICT pSrc,
6015
                    uint8_t *CPL_RESTRICT pDst, size_t nIters)
6016
0
{
6017
0
#if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER)
6018
0
#pragma clang loop vectorize(enable)
6019
0
#endif
6020
0
    for (size_t i = 0; i < nIters; ++i)
6021
0
    {
6022
0
        pDst[2 * i + 0] = pSrc[i + 0 * nIters];
6023
0
        pDst[2 * i + 1] = pSrc[i + 1 * nIters];
6024
0
    }
6025
0
}
6026
#if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER)
6027
#pragma clang diagnostic pop
6028
#endif
6029
6030
#endif
6031
6032
/************************************************************************/
6033
/*                      GDALInterleave4Byte()                           */
6034
/************************************************************************/
6035
6036
#if defined(HAVE_SSE2) &&                                                      \
6037
    (!defined(__GNUC__) || defined(__INTEL_CLANG_COMPILER))
6038
6039
// ICC autovectorizer doesn't do a good job at generating good SSE code,
6040
// at least with icx 2024.0.2.20231213, but it nicely unrolls the below loop.
6041
#if defined(__GNUC__)
6042
__attribute__((noinline))
6043
#endif
6044
static void
6045
GDALInterleave4Byte(const uint8_t *CPL_RESTRICT pSrc,
6046
                    uint8_t *CPL_RESTRICT pDst, size_t nIters)
6047
{
6048
    size_t i = 0;
6049
    constexpr size_t VALS_PER_ITER = 16;
6050
    for (i = 0; i + VALS_PER_ITER <= nIters; i += VALS_PER_ITER)
6051
    {
6052
        __m128i xmm0 = _mm_loadu_si128(
6053
            reinterpret_cast<__m128i const *>(pSrc + i + 0 * nIters));
6054
        __m128i xmm1 = _mm_loadu_si128(
6055
            reinterpret_cast<__m128i const *>(pSrc + i + 1 * nIters));
6056
        __m128i xmm2 = _mm_loadu_si128(
6057
            reinterpret_cast<__m128i const *>(pSrc + i + 2 * nIters));
6058
        __m128i xmm3 = _mm_loadu_si128(
6059
            reinterpret_cast<__m128i const *>(pSrc + i + 3 * nIters));
6060
        auto tmp0 = _mm_unpacklo_epi8(
6061
            xmm0,
6062
            xmm1);  // (xmm0_0, xmm1_0, xmm0_1, xmm1_1, xmm0_2, xmm1_2, ...)
6063
        auto tmp1 = _mm_unpackhi_epi8(
6064
            xmm0,
6065
            xmm1);  // (xmm0_8, xmm1_8, xmm0_9, xmm1_9, xmm0_10, xmm1_10, ...)
6066
        auto tmp2 = _mm_unpacklo_epi8(
6067
            xmm2,
6068
            xmm3);  // (xmm2_0, xmm3_0, xmm2_1, xmm3_1, xmm2_2, xmm3_2, ...)
6069
        auto tmp3 = _mm_unpackhi_epi8(
6070
            xmm2,
6071
            xmm3);  // (xmm2_8, xmm3_8, xmm2_9, xmm3_9, xmm2_10, xmm3_10, ...)
6072
        auto tmp2_0 = _mm_unpacklo_epi16(
6073
            tmp0,
6074
            tmp2);  // (xmm0_0, xmm1_0, xmm2_0, xmm3_0, xmm0_1, xmm1_1, xmm2_1, xmm3_1, ...)
6075
        auto tmp2_1 = _mm_unpackhi_epi16(tmp0, tmp2);
6076
        auto tmp2_2 = _mm_unpacklo_epi16(tmp1, tmp3);
6077
        auto tmp2_3 = _mm_unpackhi_epi16(tmp1, tmp3);
6078
        _mm_storeu_si128(
6079
            reinterpret_cast<__m128i *>(pDst + 4 * i + 0 * VALS_PER_ITER),
6080
            tmp2_0);
6081
        _mm_storeu_si128(
6082
            reinterpret_cast<__m128i *>(pDst + 4 * i + 1 * VALS_PER_ITER),
6083
            tmp2_1);
6084
        _mm_storeu_si128(
6085
            reinterpret_cast<__m128i *>(pDst + 4 * i + 2 * VALS_PER_ITER),
6086
            tmp2_2);
6087
        _mm_storeu_si128(
6088
            reinterpret_cast<__m128i *>(pDst + 4 * i + 3 * VALS_PER_ITER),
6089
            tmp2_3);
6090
    }
6091
#if defined(__clang__)
6092
#pragma clang loop vectorize(disable)
6093
#endif
6094
    for (; i < nIters; ++i)
6095
    {
6096
        pDst[4 * i + 0] = pSrc[i + 0 * nIters];
6097
        pDst[4 * i + 1] = pSrc[i + 1 * nIters];
6098
        pDst[4 * i + 2] = pSrc[i + 2 * nIters];
6099
        pDst[4 * i + 3] = pSrc[i + 3 * nIters];
6100
    }
6101
}
6102
6103
#else
6104
6105
#if defined(__GNUC__) && !defined(__clang__)
6106
__attribute__((optimize("tree-vectorize")))
6107
#endif
6108
#if defined(__GNUC__)
6109
__attribute__((noinline))
6110
#endif
6111
#if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER)
6112
// clang++ -O2 -fsanitize=undefined fails to vectorize, ignore that warning
6113
#pragma clang diagnostic push
6114
#pragma clang diagnostic ignored "-Wpass-failed"
6115
#endif
6116
static void
6117
GDALInterleave4Byte(const uint8_t *CPL_RESTRICT pSrc,
6118
                    uint8_t *CPL_RESTRICT pDst, size_t nIters)
6119
0
{
6120
0
#if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER)
6121
0
#pragma clang loop vectorize(enable)
6122
0
#endif
6123
0
    for (size_t i = 0; i < nIters; ++i)
6124
0
    {
6125
0
        pDst[4 * i + 0] = pSrc[i + 0 * nIters];
6126
0
        pDst[4 * i + 1] = pSrc[i + 1 * nIters];
6127
0
        pDst[4 * i + 2] = pSrc[i + 2 * nIters];
6128
0
        pDst[4 * i + 3] = pSrc[i + 3 * nIters];
6129
0
    }
6130
0
}
6131
#if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER)
6132
#pragma clang diagnostic pop
6133
#endif
6134
6135
#endif
6136
6137
/************************************************************************/
6138
/*                        GDALTranspose2D()                             */
6139
/************************************************************************/
6140
6141
/**
6142
 * Transpose a 2D array in a efficient (cache-oblivious) way.
6143
 *
6144
 * @param pSrc Source array of width = nSrcWidth and height = nSrcHeight.
6145
 * @param eSrcType Data type of pSrc.
6146
 * @param pDst Destination transposed array of width = nSrcHeight and height = nSrcWidth.
6147
 * @param eDstType Data type of pDst.
6148
 * @param nSrcWidth Width of pSrc array.
6149
 * @param nSrcHeight Height of pSrc array.
6150
 * @since GDAL 3.11
6151
 */
6152
6153
void GDALTranspose2D(const void *pSrc, GDALDataType eSrcType, void *pDst,
6154
                     GDALDataType eDstType, size_t nSrcWidth, size_t nSrcHeight)
6155
0
{
6156
0
    if (eSrcType == eDstType && (eSrcType == GDT_Byte || eSrcType == GDT_Int8))
6157
0
    {
6158
0
        if (nSrcHeight == 2)
6159
0
        {
6160
0
            GDALInterleave2Byte(static_cast<const uint8_t *>(pSrc),
6161
0
                                static_cast<uint8_t *>(pDst), nSrcWidth);
6162
0
            return;
6163
0
        }
6164
0
        if (nSrcHeight == 4)
6165
0
        {
6166
0
            GDALInterleave4Byte(static_cast<const uint8_t *>(pSrc),
6167
0
                                static_cast<uint8_t *>(pDst), nSrcWidth);
6168
0
            return;
6169
0
        }
6170
0
#if (defined(HAVE_SSSE3_AT_COMPILE_TIME) &&                                    \
6171
0
     (defined(__x86_64) || defined(_M_X64)))
6172
0
        if (CPLHaveRuntimeSSSE3())
6173
0
        {
6174
0
            GDALTranspose2D_Byte_SSSE3(static_cast<const uint8_t *>(pSrc),
6175
0
                                       static_cast<uint8_t *>(pDst), nSrcWidth,
6176
0
                                       nSrcHeight);
6177
0
            return;
6178
0
        }
6179
#elif defined(USE_NEON_OPTIMIZATIONS)
6180
        {
6181
            GDALTranspose2D_Byte_SSSE3(static_cast<const uint8_t *>(pSrc),
6182
                                       static_cast<uint8_t *>(pDst), nSrcWidth,
6183
                                       nSrcHeight);
6184
            return;
6185
        }
6186
#endif
6187
0
    }
6188
6189
0
#define CALL_GDALTranspose2D_internal(DST_TYPE, DST_IS_COMPLEX)                \
6190
0
    GDALTranspose2D<DST_TYPE, DST_IS_COMPLEX>(                                 \
6191
0
        pSrc, eSrcType, static_cast<DST_TYPE *>(pDst), nSrcWidth, nSrcHeight)
6192
6193
    // clang-format off
6194
0
    switch (eDstType)
6195
0
    {
6196
0
        case GDT_Byte:     CALL_GDALTranspose2D_internal(uint8_t, false); break;
6197
0
        case GDT_Int8:     CALL_GDALTranspose2D_internal(int8_t, false); break;
6198
0
        case GDT_UInt16:   CALL_GDALTranspose2D_internal(uint16_t, false); break;
6199
0
        case GDT_Int16:    CALL_GDALTranspose2D_internal(int16_t, false); break;
6200
0
        case GDT_UInt32:   CALL_GDALTranspose2D_internal(uint32_t, false); break;
6201
0
        case GDT_Int32:    CALL_GDALTranspose2D_internal(int32_t, false); break;
6202
0
        case GDT_UInt64:   CALL_GDALTranspose2D_internal(uint64_t, false); break;
6203
0
        case GDT_Int64:    CALL_GDALTranspose2D_internal(int64_t, false); break;
6204
0
        case GDT_Float16:  CALL_GDALTranspose2D_internal(GFloat16, false); break;
6205
0
        case GDT_Float32:  CALL_GDALTranspose2D_internal(float, false); break;
6206
0
        case GDT_Float64:  CALL_GDALTranspose2D_internal(double, false); break;
6207
0
        case GDT_CInt16:   CALL_GDALTranspose2D_internal(int16_t, true); break;
6208
0
        case GDT_CInt32:   CALL_GDALTranspose2D_internal(int32_t, true); break;
6209
0
        case GDT_CFloat16: CALL_GDALTranspose2D_internal(GFloat16, true); break;
6210
0
        case GDT_CFloat32: CALL_GDALTranspose2D_internal(float, true); break;
6211
0
        case GDT_CFloat64: CALL_GDALTranspose2D_internal(double, true); break;
6212
0
        case GDT_Unknown:
6213
0
        case GDT_TypeCount:
6214
0
            break;
6215
0
    }
6216
        // clang-format on
6217
6218
0
#undef CALL_GDALTranspose2D_internal
6219
0
}
6220
6221
/************************************************************************/
6222
/*                     ExtractBitAndConvertTo255()                      */
6223
/************************************************************************/
6224
6225
#if defined(__GNUC__) || defined(_MSC_VER)
6226
// Signedness of char implementation dependent, so be explicit.
6227
// Assumes 2-complement integer types and sign extension of right shifting
6228
// GCC guarantees such:
6229
// https://gcc.gnu.org/onlinedocs/gcc/Integers-implementation.html#Integers-implementation
6230
static inline GByte ExtractBitAndConvertTo255(GByte byVal, int nBit)
6231
0
{
6232
0
    return static_cast<GByte>(static_cast<signed char>(byVal << (7 - nBit)) >>
6233
0
                              7);
6234
0
}
6235
#else
6236
// Portable way
6237
static inline GByte ExtractBitAndConvertTo255(GByte byVal, int nBit)
6238
{
6239
    return (byVal & (1 << nBit)) ? 255 : 0;
6240
}
6241
#endif
6242
6243
/************************************************************************/
6244
/*                   ExpandEightPackedBitsToByteAt255()                 */
6245
/************************************************************************/
6246
6247
static inline void ExpandEightPackedBitsToByteAt255(GByte byVal,
6248
                                                    GByte abyOutput[8])
6249
0
{
6250
0
    abyOutput[0] = ExtractBitAndConvertTo255(byVal, 7);
6251
0
    abyOutput[1] = ExtractBitAndConvertTo255(byVal, 6);
6252
0
    abyOutput[2] = ExtractBitAndConvertTo255(byVal, 5);
6253
0
    abyOutput[3] = ExtractBitAndConvertTo255(byVal, 4);
6254
0
    abyOutput[4] = ExtractBitAndConvertTo255(byVal, 3);
6255
0
    abyOutput[5] = ExtractBitAndConvertTo255(byVal, 2);
6256
0
    abyOutput[6] = ExtractBitAndConvertTo255(byVal, 1);
6257
0
    abyOutput[7] = ExtractBitAndConvertTo255(byVal, 0);
6258
0
}
6259
6260
/************************************************************************/
6261
/*                GDALExpandPackedBitsToByteAt0Or255()                  */
6262
/************************************************************************/
6263
6264
/** Expand packed-bits (ordered from most-significant bit to least one)
6265
  into a byte each, where a bit at 0 is expanded to a byte at 0, and a bit
6266
  at 1 to a byte at 255.
6267
6268
 The function does (in a possibly more optimized way) the following:
6269
 \code{.cpp}
6270
 for (size_t i = 0; i < nInputBits; ++i )
6271
 {
6272
     pabyOutput[i] = (pabyInput[i / 8] & (1 << (7 - (i % 8)))) ? 255 : 0;
6273
 }
6274
 \endcode
6275
6276
 @param pabyInput Input array of (nInputBits + 7) / 8 bytes.
6277
 @param pabyOutput Output array of nInputBits bytes.
6278
 @param nInputBits Number of valid bits in pabyInput.
6279
6280
 @since 3.11
6281
*/
6282
6283
void GDALExpandPackedBitsToByteAt0Or255(const GByte *CPL_RESTRICT pabyInput,
6284
                                        GByte *CPL_RESTRICT pabyOutput,
6285
                                        size_t nInputBits)
6286
0
{
6287
0
    const size_t nInputWholeBytes = nInputBits / 8;
6288
0
    size_t iByte = 0;
6289
6290
0
#ifdef HAVE_SSE2
6291
    // Mask to isolate each bit
6292
0
    const __m128i bit_mask = _mm_set_epi8(1, 2, 4, 8, 16, 32, 64, -128, 1, 2, 4,
6293
0
                                          8, 16, 32, 64, -128);
6294
0
    const __m128i zero = _mm_setzero_si128();
6295
0
    const __m128i all_ones = _mm_set1_epi8(-1);
6296
#ifdef __SSSE3__
6297
    const __m128i dispatch_two_bytes =
6298
        _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0);
6299
#endif
6300
0
    constexpr size_t SSE_REG_SIZE = sizeof(bit_mask);
6301
0
    for (; iByte + SSE_REG_SIZE <= nInputWholeBytes; iByte += SSE_REG_SIZE)
6302
0
    {
6303
0
        __m128i reg_ori = _mm_loadu_si128(
6304
0
            reinterpret_cast<const __m128i *>(pabyInput + iByte));
6305
6306
0
        constexpr int NUM_PROCESSED_BYTES_PER_REG = 2;
6307
0
        for (size_t k = 0; k < SSE_REG_SIZE / NUM_PROCESSED_BYTES_PER_REG; ++k)
6308
0
        {
6309
            // Given reg_ori = (A, B, ... 14 other bytes ...),
6310
            // expand to (A, A, A, A, A, A, A, A, B, B, B, B, B, B, B, B)
6311
#ifdef __SSSE3__
6312
            __m128i reg = _mm_shuffle_epi8(reg_ori, dispatch_two_bytes);
6313
#else
6314
0
            __m128i reg = _mm_unpacklo_epi8(reg_ori, reg_ori);
6315
0
            reg = _mm_unpacklo_epi16(reg, reg);
6316
0
            reg = _mm_unpacklo_epi32(reg, reg);
6317
0
#endif
6318
6319
            // Test if bits of interest are set
6320
0
            reg = _mm_and_si128(reg, bit_mask);
6321
6322
            // Now test if those bits are set, by comparing to zero. So the
6323
            // result will be that bytes where bits are set will be at 0, and
6324
            // ones where they are cleared will be at 0xFF. So the inverse of
6325
            // the end result we want!
6326
0
            reg = _mm_cmpeq_epi8(reg, zero);
6327
6328
            // Invert the result
6329
0
            reg = _mm_andnot_si128(reg, all_ones);
6330
6331
0
            _mm_storeu_si128(reinterpret_cast<__m128i *>(pabyOutput), reg);
6332
6333
0
            pabyOutput += SSE_REG_SIZE;
6334
6335
            // Right-shift of 2 bytes
6336
0
            reg_ori = _mm_bsrli_si128(reg_ori, NUM_PROCESSED_BYTES_PER_REG);
6337
0
        }
6338
0
    }
6339
6340
0
#endif  // HAVE_SSE2
6341
6342
0
    for (; iByte < nInputWholeBytes; ++iByte)
6343
0
    {
6344
0
        ExpandEightPackedBitsToByteAt255(pabyInput[iByte], pabyOutput);
6345
0
        pabyOutput += 8;
6346
0
    }
6347
0
    for (int iBit = 0; iBit < static_cast<int>(nInputBits % 8); ++iBit)
6348
0
    {
6349
0
        *pabyOutput = ExtractBitAndConvertTo255(pabyInput[iByte], 7 - iBit);
6350
0
        ++pabyOutput;
6351
0
    }
6352
0
}
6353
6354
/************************************************************************/
6355
/*                   ExpandEightPackedBitsToByteAt1()                   */
6356
/************************************************************************/
6357
6358
static inline void ExpandEightPackedBitsToByteAt1(GByte byVal,
6359
                                                  GByte abyOutput[8])
6360
0
{
6361
0
    abyOutput[0] = (byVal >> 7) & 0x1;
6362
0
    abyOutput[1] = (byVal >> 6) & 0x1;
6363
0
    abyOutput[2] = (byVal >> 5) & 0x1;
6364
0
    abyOutput[3] = (byVal >> 4) & 0x1;
6365
0
    abyOutput[4] = (byVal >> 3) & 0x1;
6366
0
    abyOutput[5] = (byVal >> 2) & 0x1;
6367
0
    abyOutput[6] = (byVal >> 1) & 0x1;
6368
0
    abyOutput[7] = (byVal >> 0) & 0x1;
6369
0
}
6370
6371
/************************************************************************/
6372
/*                GDALExpandPackedBitsToByteAt0Or1()                    */
6373
/************************************************************************/
6374
6375
/** Expand packed-bits (ordered from most-significant bit to least one)
6376
  into a byte each, where a bit at 0 is expanded to a byte at 0, and a bit
6377
  at 1 to a byte at 1.
6378
6379
 The function does (in a possibly more optimized way) the following:
6380
 \code{.cpp}
6381
 for (size_t i = 0; i < nInputBits; ++i )
6382
 {
6383
     pabyOutput[i] = (pabyInput[i / 8] & (1 << (7 - (i % 8)))) ? 1 : 0;
6384
 }
6385
 \endcode
6386
6387
 @param pabyInput Input array of (nInputBits + 7) / 8 bytes.
6388
 @param pabyOutput Output array of nInputBits bytes.
6389
 @param nInputBits Number of valid bits in pabyInput.
6390
6391
 @since 3.11
6392
*/
6393
6394
void GDALExpandPackedBitsToByteAt0Or1(const GByte *CPL_RESTRICT pabyInput,
6395
                                      GByte *CPL_RESTRICT pabyOutput,
6396
                                      size_t nInputBits)
6397
0
{
6398
0
    const size_t nInputWholeBytes = nInputBits / 8;
6399
0
    size_t iByte = 0;
6400
0
    for (; iByte < nInputWholeBytes; ++iByte)
6401
0
    {
6402
0
        ExpandEightPackedBitsToByteAt1(pabyInput[iByte], pabyOutput);
6403
0
        pabyOutput += 8;
6404
0
    }
6405
0
    for (int iBit = 0; iBit < static_cast<int>(nInputBits % 8); ++iBit)
6406
0
    {
6407
0
        *pabyOutput = (pabyInput[iByte] >> (7 - iBit)) & 0x1;
6408
0
        ++pabyOutput;
6409
0
    }
6410
0
}