Coverage Report

Created: 2025-06-13 06:18

/src/gdal/alg/gdalwarpkernel.cpp
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 *
3
 * Project:  High Performance Image Reprojector
4
 * Purpose:  Implementation of the GDALWarpKernel class.  Implements the actual
5
 *           image warping for a "chunk" of input and output imagery already
6
 *           loaded into memory.
7
 * Author:   Frank Warmerdam, warmerdam@pobox.com
8
 *
9
 ******************************************************************************
10
 * Copyright (c) 2003, Frank Warmerdam <warmerdam@pobox.com>
11
 * Copyright (c) 2008-2013, Even Rouault <even dot rouault at spatialys.com>
12
 *
13
 * SPDX-License-Identifier: MIT
14
 ****************************************************************************/
15
16
#include "cpl_port.h"
17
#include "gdalwarper.h"
18
19
#include <cfloat>
20
#include <cmath>
21
#include <cstddef>
22
#include <cstdlib>
23
#include <cstring>
24
25
#include <algorithm>
26
#include <limits>
27
#include <mutex>
28
#include <new>
29
#include <utility>
30
#include <vector>
31
32
#include "cpl_atomic_ops.h"
33
#include "cpl_conv.h"
34
#include "cpl_error.h"
35
#include "cpl_float.h"
36
#include "cpl_mask.h"
37
#include "cpl_multiproc.h"
38
#include "cpl_progress.h"
39
#include "cpl_string.h"
40
#include "cpl_vsi.h"
41
#include "cpl_worker_thread_pool.h"
42
#include "cpl_quad_tree.h"
43
#include "gdal.h"
44
#include "gdal_alg.h"
45
#include "gdal_alg_priv.h"
46
#include "gdal_thread_pool.h"
47
#include "gdalresamplingkernels.h"
48
49
// #define CHECK_SUM_WITH_GEOS
50
#ifdef CHECK_SUM_WITH_GEOS
51
#include "ogr_geometry.h"
52
#include "ogr_geos.h"
53
#endif
54
55
#ifdef USE_NEON_OPTIMIZATIONS
56
#include "include_sse2neon.h"
57
#define USE_SSE2
58
59
#include "gdalsse_priv.h"
60
61
// We restrict to 64bit processors because they are guaranteed to have SSE2.
62
// Could possibly be used too on 32bit, but we would need to check at runtime.
63
#elif defined(__x86_64) || defined(_M_X64)
64
#define USE_SSE2
65
66
#include "gdalsse_priv.h"
67
68
#if __SSE4_1__
69
#include <smmintrin.h>
70
#endif
71
72
#if __SSE3__
73
#include <pmmintrin.h>
74
#endif
75
76
#endif
77
78
constexpr double BAND_DENSITY_THRESHOLD = 0.0000000001;
79
constexpr float SRC_DENSITY_THRESHOLD = 0.000000001f;
80
81
// #define INSTANTIATE_FLOAT64_SSE2_IMPL
82
83
static const int anGWKFilterRadius[] = {
84
    0,  // Nearest neighbour
85
    1,  // Bilinear
86
    2,  // Cubic Convolution (Catmull-Rom)
87
    2,  // Cubic B-Spline
88
    3,  // Lanczos windowed sinc
89
    0,  // Average
90
    0,  // Mode
91
    0,  // Reserved GRA_Gauss=7
92
    0,  // Max
93
    0,  // Min
94
    0,  // Med
95
    0,  // Q1
96
    0,  // Q3
97
    0,  // Sum
98
    0,  // RMS
99
};
100
101
static double GWKBilinear(double dfX);
102
static double GWKCubic(double dfX);
103
static double GWKBSpline(double dfX);
104
static double GWKLanczosSinc(double dfX);
105
106
static const FilterFuncType apfGWKFilter[] = {
107
    nullptr,         // Nearest neighbour
108
    GWKBilinear,     // Bilinear
109
    GWKCubic,        // Cubic Convolution (Catmull-Rom)
110
    GWKBSpline,      // Cubic B-Spline
111
    GWKLanczosSinc,  // Lanczos windowed sinc
112
    nullptr,         // Average
113
    nullptr,         // Mode
114
    nullptr,         // Reserved GRA_Gauss=7
115
    nullptr,         // Max
116
    nullptr,         // Min
117
    nullptr,         // Med
118
    nullptr,         // Q1
119
    nullptr,         // Q3
120
    nullptr,         // Sum
121
    nullptr,         // RMS
122
};
123
124
// TODO(schwehr): Can we make these functions have a const * const arg?
125
static double GWKBilinear4Values(double *padfVals);
126
static double GWKCubic4Values(double *padfVals);
127
static double GWKBSpline4Values(double *padfVals);
128
static double GWKLanczosSinc4Values(double *padfVals);
129
130
static const FilterFunc4ValuesType apfGWKFilter4Values[] = {
131
    nullptr,                // Nearest neighbour
132
    GWKBilinear4Values,     // Bilinear
133
    GWKCubic4Values,        // Cubic Convolution (Catmull-Rom)
134
    GWKBSpline4Values,      // Cubic B-Spline
135
    GWKLanczosSinc4Values,  // Lanczos windowed sinc
136
    nullptr,                // Average
137
    nullptr,                // Mode
138
    nullptr,                // Reserved GRA_Gauss=7
139
    nullptr,                // Max
140
    nullptr,                // Min
141
    nullptr,                // Med
142
    nullptr,                // Q1
143
    nullptr,                // Q3
144
    nullptr,                // Sum
145
    nullptr,                // RMS
146
};
147
148
int GWKGetFilterRadius(GDALResampleAlg eResampleAlg)
149
0
{
150
0
    static_assert(CPL_ARRAYSIZE(anGWKFilterRadius) == GRA_LAST_VALUE + 1,
151
0
                  "Bad size of anGWKFilterRadius");
152
0
    return anGWKFilterRadius[eResampleAlg];
153
0
}
154
155
FilterFuncType GWKGetFilterFunc(GDALResampleAlg eResampleAlg)
156
0
{
157
0
    static_assert(CPL_ARRAYSIZE(apfGWKFilter) == GRA_LAST_VALUE + 1,
158
0
                  "Bad size of apfGWKFilter");
159
0
    return apfGWKFilter[eResampleAlg];
160
0
}
161
162
FilterFunc4ValuesType GWKGetFilterFunc4Values(GDALResampleAlg eResampleAlg)
163
0
{
164
0
    static_assert(CPL_ARRAYSIZE(apfGWKFilter4Values) == GRA_LAST_VALUE + 1,
165
0
                  "Bad size of apfGWKFilter4Values");
166
0
    return apfGWKFilter4Values[eResampleAlg];
167
0
}
168
169
static CPLErr GWKGeneralCase(GDALWarpKernel *);
170
static CPLErr GWKRealCase(GDALWarpKernel *poWK);
171
static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
172
static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
173
static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
174
static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
175
#ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
176
static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
177
#endif
178
static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
179
static CPLErr GWKNearestByte(GDALWarpKernel *poWK);
180
static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
181
static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
182
static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
183
#ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
184
static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
185
#endif
186
static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
187
static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
188
static CPLErr GWKNearestShort(GDALWarpKernel *poWK);
189
static CPLErr GWKNearestUnsignedShort(GDALWarpKernel *poWK);
190
static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
191
static CPLErr GWKNearestFloat(GDALWarpKernel *poWK);
192
static CPLErr GWKAverageOrMode(GDALWarpKernel *);
193
static CPLErr GWKSumPreserving(GDALWarpKernel *);
194
static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
195
static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
196
static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
197
198
/************************************************************************/
199
/*                           GWKJobStruct                               */
200
/************************************************************************/
201
202
struct GWKJobStruct
203
{
204
    std::mutex &mutex;
205
    std::condition_variable &cv;
206
    int counterSingleThreaded = 0;
207
    int &counter;
208
    bool &stopFlag;
209
    GDALWarpKernel *poWK = nullptr;
210
    int iYMin = 0;
211
    int iYMax = 0;
212
    int (*pfnProgress)(GWKJobStruct *psJob) = nullptr;
213
    void *pTransformerArg = nullptr;
214
    // used by GWKRun() to assign the proper pTransformerArg
215
    void (*pfnFunc)(void *) = nullptr;
216
217
    GWKJobStruct(std::mutex &mutex_, std::condition_variable &cv_,
218
                 int &counter_, bool &stopFlag_)
219
0
        : mutex(mutex_), cv(cv_), counter(counter_), stopFlag(stopFlag_)
220
0
    {
221
0
    }
222
};
223
224
struct GWKThreadData
225
{
226
    std::unique_ptr<CPLJobQueue> poJobQueue{};
227
    std::unique_ptr<std::vector<GWKJobStruct>> threadJobs{};
228
    int nMaxThreads{0};
229
    int counter{0};
230
    bool stopFlag{false};
231
    std::mutex mutex{};
232
    std::condition_variable cv{};
233
    bool bTransformerArgInputAssignedToThread{false};
234
    void *pTransformerArgInput{
235
        nullptr};  // owned by calling layer. Not to be destroyed
236
    std::map<GIntBig, void *> mapThreadToTransformerArg{};
237
    int nTotalThreadCountForThisRun = 0;
238
    int nCurThreadCountForThisRun = 0;
239
};
240
241
/************************************************************************/
242
/*                        GWKProgressThread()                           */
243
/************************************************************************/
244
245
// Return TRUE if the computation must be interrupted.
246
static int GWKProgressThread(GWKJobStruct *psJob)
247
0
{
248
0
    bool stop = false;
249
0
    {
250
0
        std::lock_guard<std::mutex> lock(psJob->mutex);
251
0
        psJob->counter++;
252
0
        stop = psJob->stopFlag;
253
0
    }
254
0
    psJob->cv.notify_one();
255
256
0
    return stop;
257
0
}
258
259
/************************************************************************/
260
/*                      GWKProgressMonoThread()                         */
261
/************************************************************************/
262
263
// Return TRUE if the computation must be interrupted.
264
static int GWKProgressMonoThread(GWKJobStruct *psJob)
265
0
{
266
0
    GDALWarpKernel *poWK = psJob->poWK;
267
0
    if (!poWK->pfnProgress(poWK->dfProgressBase +
268
0
                               poWK->dfProgressScale *
269
0
                                   (++psJob->counterSingleThreaded /
270
0
                                    static_cast<double>(psJob->iYMax)),
271
0
                           "", poWK->pProgress))
272
0
    {
273
0
        CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
274
0
        psJob->stopFlag = true;
275
0
        return TRUE;
276
0
    }
277
0
    return FALSE;
278
0
}
279
280
/************************************************************************/
281
/*                       GWKGenericMonoThread()                         */
282
/************************************************************************/
283
284
static CPLErr GWKGenericMonoThread(GDALWarpKernel *poWK,
285
                                   void (*pfnFunc)(void *pUserData))
286
0
{
287
0
    GWKThreadData td;
288
289
    // NOTE: the mutex is not used.
290
0
    GWKJobStruct job(td.mutex, td.cv, td.counter, td.stopFlag);
291
0
    job.poWK = poWK;
292
0
    job.iYMin = 0;
293
0
    job.iYMax = poWK->nDstYSize;
294
0
    job.pfnProgress = GWKProgressMonoThread;
295
0
    job.pTransformerArg = poWK->pTransformerArg;
296
0
    job.counterSingleThreaded = td.counter;
297
0
    pfnFunc(&job);
298
0
    td.counter = job.counterSingleThreaded;
299
300
0
    return td.stopFlag ? CE_Failure : CE_None;
301
0
}
302
303
/************************************************************************/
304
/*                          GWKThreadsCreate()                          */
305
/************************************************************************/
306
307
void *GWKThreadsCreate(char **papszWarpOptions,
308
                       GDALTransformerFunc /* pfnTransformer */,
309
                       void *pTransformerArg)
310
0
{
311
0
    const char *pszWarpThreads =
312
0
        CSLFetchNameValue(papszWarpOptions, "NUM_THREADS");
313
0
    if (pszWarpThreads == nullptr)
314
0
        pszWarpThreads = CPLGetConfigOption("GDAL_NUM_THREADS", "1");
315
316
0
    int nThreads = 0;
317
0
    if (EQUAL(pszWarpThreads, "ALL_CPUS"))
318
0
        nThreads = CPLGetNumCPUs();
319
0
    else
320
0
        nThreads = atoi(pszWarpThreads);
321
0
    if (nThreads <= 1)
322
0
        nThreads = 0;
323
0
    if (nThreads > 128)
324
0
        nThreads = 128;
325
326
0
    GWKThreadData *psThreadData = new GWKThreadData();
327
0
    auto poThreadPool =
328
0
        nThreads > 0 ? GDALGetGlobalThreadPool(nThreads) : nullptr;
329
0
    if (nThreads && poThreadPool)
330
0
    {
331
0
        psThreadData->nMaxThreads = nThreads;
332
0
        psThreadData->threadJobs.reset(new std::vector<GWKJobStruct>(
333
0
            nThreads,
334
0
            GWKJobStruct(psThreadData->mutex, psThreadData->cv,
335
0
                         psThreadData->counter, psThreadData->stopFlag)));
336
337
0
        psThreadData->poJobQueue = poThreadPool->CreateJobQueue();
338
0
        psThreadData->pTransformerArgInput = pTransformerArg;
339
0
    }
340
341
0
    return psThreadData;
342
0
}
343
344
/************************************************************************/
345
/*                             GWKThreadsEnd()                          */
346
/************************************************************************/
347
348
void GWKThreadsEnd(void *psThreadDataIn)
349
0
{
350
0
    if (psThreadDataIn == nullptr)
351
0
        return;
352
353
0
    GWKThreadData *psThreadData = static_cast<GWKThreadData *>(psThreadDataIn);
354
0
    if (psThreadData->poJobQueue)
355
0
    {
356
        // cppcheck-suppress constVariableReference
357
0
        for (auto &pair : psThreadData->mapThreadToTransformerArg)
358
0
        {
359
0
            CPLAssert(pair.second != psThreadData->pTransformerArgInput);
360
0
            GDALDestroyTransformer(pair.second);
361
0
        }
362
0
        psThreadData->poJobQueue.reset();
363
0
    }
364
0
    delete psThreadData;
365
0
}
366
367
/************************************************************************/
368
/*                         ThreadFuncAdapter()                          */
369
/************************************************************************/
370
371
static void ThreadFuncAdapter(void *pData)
372
0
{
373
0
    GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
374
0
    GWKThreadData *psThreadData =
375
0
        static_cast<GWKThreadData *>(psJob->poWK->psThreadData);
376
377
    // Look if we have already a per-thread transformer
378
0
    void *pTransformerArg = nullptr;
379
0
    const GIntBig nThreadId = CPLGetPID();
380
381
0
    {
382
0
        std::lock_guard<std::mutex> lock(psThreadData->mutex);
383
0
        ++psThreadData->nCurThreadCountForThisRun;
384
385
0
        auto oIter = psThreadData->mapThreadToTransformerArg.find(nThreadId);
386
0
        if (oIter != psThreadData->mapThreadToTransformerArg.end())
387
0
        {
388
0
            pTransformerArg = oIter->second;
389
0
        }
390
0
        else if (!psThreadData->bTransformerArgInputAssignedToThread &&
391
0
                 psThreadData->nCurThreadCountForThisRun ==
392
0
                     psThreadData->nTotalThreadCountForThisRun)
393
0
        {
394
            // If we are the last thread to be started, temporarily borrow the
395
            // original transformer
396
0
            psThreadData->bTransformerArgInputAssignedToThread = true;
397
0
            pTransformerArg = psThreadData->pTransformerArgInput;
398
0
            psThreadData->mapThreadToTransformerArg[nThreadId] =
399
0
                pTransformerArg;
400
0
        }
401
402
0
        if (pTransformerArg == nullptr)
403
0
        {
404
0
            CPLAssert(psThreadData->pTransformerArgInput != nullptr);
405
0
            CPLAssert(!psThreadData->bTransformerArgInputAssignedToThread);
406
0
        }
407
0
    }
408
409
    // If no transformer assigned to current thread, instantiate one
410
0
    if (pTransformerArg == nullptr)
411
0
    {
412
        // This somehow assumes that GDALCloneTransformer() is thread-safe
413
        // which should normally be the case.
414
0
        pTransformerArg =
415
0
            GDALCloneTransformer(psThreadData->pTransformerArgInput);
416
417
        // Lock for the stop flag and the transformer map.
418
0
        std::lock_guard<std::mutex> lock(psThreadData->mutex);
419
0
        if (!pTransformerArg)
420
0
        {
421
0
            psJob->stopFlag = true;
422
0
            return;
423
0
        }
424
0
        psThreadData->mapThreadToTransformerArg[nThreadId] = pTransformerArg;
425
0
    }
426
427
0
    psJob->pTransformerArg = pTransformerArg;
428
0
    psJob->pfnFunc(pData);
429
430
    // Give back original transformer, if borrowed.
431
0
    {
432
0
        std::lock_guard<std::mutex> lock(psThreadData->mutex);
433
0
        if (psThreadData->bTransformerArgInputAssignedToThread &&
434
0
            pTransformerArg == psThreadData->pTransformerArgInput)
435
0
        {
436
0
            psThreadData->mapThreadToTransformerArg.erase(
437
0
                psThreadData->mapThreadToTransformerArg.find(nThreadId));
438
0
            psThreadData->bTransformerArgInputAssignedToThread = false;
439
0
        }
440
0
    }
441
0
}
442
443
/************************************************************************/
444
/*                                GWKRun()                              */
445
/************************************************************************/
446
447
static CPLErr GWKRun(GDALWarpKernel *poWK, const char *pszFuncName,
448
                     void (*pfnFunc)(void *pUserData))
449
450
0
{
451
0
    const int nDstYSize = poWK->nDstYSize;
452
453
0
    CPLDebug("GDAL",
454
0
             "GDALWarpKernel()::%s() "
455
0
             "Src=%d,%d,%dx%d Dst=%d,%d,%dx%d",
456
0
             pszFuncName, poWK->nSrcXOff, poWK->nSrcYOff, poWK->nSrcXSize,
457
0
             poWK->nSrcYSize, poWK->nDstXOff, poWK->nDstYOff, poWK->nDstXSize,
458
0
             poWK->nDstYSize);
459
460
0
    if (!poWK->pfnProgress(poWK->dfProgressBase, "", poWK->pProgress))
461
0
    {
462
0
        CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
463
0
        return CE_Failure;
464
0
    }
465
466
0
    GWKThreadData *psThreadData =
467
0
        static_cast<GWKThreadData *>(poWK->psThreadData);
468
0
    if (psThreadData == nullptr || psThreadData->poJobQueue == nullptr)
469
0
    {
470
0
        return GWKGenericMonoThread(poWK, pfnFunc);
471
0
    }
472
473
0
    int nThreads = std::min(psThreadData->nMaxThreads, nDstYSize / 2);
474
    // Config option mostly useful for tests to be able to test multithreading
475
    // with small rasters
476
0
    const int nWarpChunkSize =
477
0
        atoi(CPLGetConfigOption("WARP_THREAD_CHUNK_SIZE", "65536"));
478
0
    if (nWarpChunkSize > 0)
479
0
    {
480
0
        GIntBig nChunks =
481
0
            static_cast<GIntBig>(nDstYSize) * poWK->nDstXSize / nWarpChunkSize;
482
0
        if (nThreads > nChunks)
483
0
            nThreads = static_cast<int>(nChunks);
484
0
    }
485
0
    if (nThreads <= 0)
486
0
        nThreads = 1;
487
488
0
    CPLDebug("WARP", "Using %d threads", nThreads);
489
490
0
    auto &jobs = *psThreadData->threadJobs;
491
0
    CPLAssert(static_cast<int>(jobs.size()) >= nThreads);
492
    // Fill-in job structures.
493
0
    for (int i = 0; i < nThreads; ++i)
494
0
    {
495
0
        auto &job = jobs[i];
496
0
        job.poWK = poWK;
497
0
        job.iYMin =
498
0
            static_cast<int>(static_cast<int64_t>(i) * nDstYSize / nThreads);
499
0
        job.iYMax = static_cast<int>(static_cast<int64_t>(i + 1) * nDstYSize /
500
0
                                     nThreads);
501
0
        if (poWK->pfnProgress != GDALDummyProgress)
502
0
            job.pfnProgress = GWKProgressThread;
503
0
        job.pfnFunc = pfnFunc;
504
0
    }
505
506
0
    bool bStopFlag;
507
0
    {
508
0
        std::unique_lock<std::mutex> lock(psThreadData->mutex);
509
510
0
        psThreadData->nTotalThreadCountForThisRun = nThreads;
511
        // coverity[missing_lock]
512
0
        psThreadData->nCurThreadCountForThisRun = 0;
513
514
        // Start jobs.
515
0
        for (int i = 0; i < nThreads; ++i)
516
0
        {
517
0
            auto &job = jobs[i];
518
0
            psThreadData->poJobQueue->SubmitJob(ThreadFuncAdapter,
519
0
                                                static_cast<void *>(&job));
520
0
        }
521
522
        /* --------------------------------------------------------------------
523
         */
524
        /*      Report progress. */
525
        /* --------------------------------------------------------------------
526
         */
527
0
        if (poWK->pfnProgress != GDALDummyProgress)
528
0
        {
529
0
            while (psThreadData->counter < nDstYSize)
530
0
            {
531
0
                psThreadData->cv.wait(lock);
532
0
                if (!poWK->pfnProgress(poWK->dfProgressBase +
533
0
                                           poWK->dfProgressScale *
534
0
                                               (psThreadData->counter /
535
0
                                                static_cast<double>(nDstYSize)),
536
0
                                       "", poWK->pProgress))
537
0
                {
538
0
                    CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
539
0
                    psThreadData->stopFlag = true;
540
0
                    break;
541
0
                }
542
0
            }
543
0
        }
544
545
0
        bStopFlag = psThreadData->stopFlag;
546
0
    }
547
548
    /* -------------------------------------------------------------------- */
549
    /*      Wait for all jobs to complete.                                  */
550
    /* -------------------------------------------------------------------- */
551
0
    psThreadData->poJobQueue->WaitCompletion();
552
553
0
    return bStopFlag ? CE_Failure : CE_None;
554
0
}
555
556
/************************************************************************/
557
/* ==================================================================== */
558
/*                            GDALWarpKernel                            */
559
/* ==================================================================== */
560
/************************************************************************/
561
562
/**
563
 * \class GDALWarpKernel "gdalwarper.h"
564
 *
565
 * Low level image warping class.
566
 *
567
 * This class is responsible for low level image warping for one
568
 * "chunk" of imagery.  The class is essentially a structure with all
569
 * data members public - primarily so that new special-case functions
570
 * can be added without changing the class declaration.
571
 *
572
 * Applications are normally intended to interactive with warping facilities
573
 * through the GDALWarpOperation class, though the GDALWarpKernel can in
574
 * theory be used directly if great care is taken in setting up the
575
 * control data.
576
 *
577
 * <h3>Design Issues</h3>
578
 *
579
 * The intention is that PerformWarp() would analyze the setup in terms
580
 * of the datatype, resampling type, and validity/density mask usage and
581
 * pick one of many specific implementations of the warping algorithm over
582
 * a continuum of optimization vs. generality.  At one end there will be a
583
 * reference general purpose implementation of the algorithm that supports
584
 * any data type (working internally in double precision complex), all three
585
 * resampling types, and any or all of the validity/density masks.  At the
586
 * other end would be highly optimized algorithms for common cases like
587
 * nearest neighbour resampling on GDT_Byte data with no masks.
588
 *
589
 * The full set of optimized versions have not been decided but we should
590
 * expect to have at least:
591
 *  - One for each resampling algorithm for 8bit data with no masks.
592
 *  - One for each resampling algorithm for float data with no masks.
593
 *  - One for each resampling algorithm for float data with any/all masks
594
 *    (essentially the generic case for just float data).
595
 *  - One for each resampling algorithm for 8bit data with support for
596
 *    input validity masks (per band or per pixel).  This handles the common
597
 *    case of nodata masking.
598
 *  - One for each resampling algorithm for float data with support for
599
 *    input validity masks (per band or per pixel).  This handles the common
600
 *    case of nodata masking.
601
 *
602
 * Some of the specializations would operate on all bands in one pass
603
 * (especially the ones without masking would do this), while others might
604
 * process each band individually to reduce code complexity.
605
 *
606
 * <h3>Masking Semantics</h3>
607
 *
608
 * A detailed explanation of the semantics of the validity and density masks,
609
 * and their effects on resampling kernels is needed here.
610
 */
611
612
/************************************************************************/
613
/*                     GDALWarpKernel Data Members                      */
614
/************************************************************************/
615
616
/**
617
 * \var GDALResampleAlg GDALWarpKernel::eResample;
618
 *
619
 * Resampling algorithm.
620
 *
621
 * The resampling algorithm to use.  One of GRA_NearestNeighbour, GRA_Bilinear,
622
 * GRA_Cubic, GRA_CubicSpline, GRA_Lanczos, GRA_Average, GRA_RMS,
623
 * GRA_Mode or GRA_Sum.
624
 *
625
 * This field is required. GDT_NearestNeighbour may be used as a default
626
 * value.
627
 */
628
629
/**
630
 * \var GDALDataType GDALWarpKernel::eWorkingDataType;
631
 *
632
 * Working pixel data type.
633
 *
634
 * The datatype of pixels in the source image (papabySrcimage) and
635
 * destination image (papabyDstImage) buffers.  Note that operations on
636
 * some data types (such as GDT_Byte) may be much better optimized than other
637
 * less common cases.
638
 *
639
 * This field is required.  It may not be GDT_Unknown.
640
 */
641
642
/**
643
 * \var int GDALWarpKernel::nBands;
644
 *
645
 * Number of bands.
646
 *
647
 * The number of bands (layers) of imagery being warped.  Determines the
648
 * number of entries in the papabySrcImage, papanBandSrcValid,
649
 * and papabyDstImage arrays.
650
 *
651
 * This field is required.
652
 */
653
654
/**
655
 * \var int GDALWarpKernel::nSrcXSize;
656
 *
657
 * Source image width in pixels.
658
 *
659
 * This field is required.
660
 */
661
662
/**
663
 * \var int GDALWarpKernel::nSrcYSize;
664
 *
665
 * Source image height in pixels.
666
 *
667
 * This field is required.
668
 */
669
670
/**
671
 * \var double GDALWarpKernel::dfSrcXExtraSize;
672
 *
673
 * Number of pixels included in nSrcXSize that are present on the edges of
674
 * the area of interest to take into account the width of the kernel.
675
 *
676
 * This field is required.
677
 */
678
679
/**
680
 * \var double GDALWarpKernel::dfSrcYExtraSize;
681
 *
682
 * Number of pixels included in nSrcYExtraSize that are present on the edges of
683
 * the area of interest to take into account the height of the kernel.
684
 *
685
 * This field is required.
686
 */
687
688
/**
689
 * \var int GDALWarpKernel::papabySrcImage;
690
 *
691
 * Array of source image band data.
692
 *
693
 * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
694
 * to image data.  Each individual band of image data is organized as a single
695
 * block of image data in left to right, then bottom to top order.  The actual
696
 * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
697
 *
698
 * To access the pixel value for the (x=3, y=4) pixel (zero based) of
699
 * the second band with eWorkingDataType set to GDT_Float32 use code like
700
 * this:
701
 *
702
 * \code
703
 *   float dfPixelValue;
704
 *   int   nBand = 2-1;  // Band indexes are zero based.
705
 *   int   nPixel = 3; // Zero based.
706
 *   int   nLine = 4;  // Zero based.
707
 *
708
 *   assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
709
 *   assert( nLine >= 0 && nLine < poKern->nSrcYSize );
710
 *   assert( nBand >= 0 && nBand < poKern->nBands );
711
 *   dfPixelValue = ((float *) poKern->papabySrcImage[nBand])
712
 *                                  [nPixel + nLine * poKern->nSrcXSize];
713
 * \endcode
714
 *
715
 * This field is required.
716
 */
717
718
/**
719
 * \var GUInt32 **GDALWarpKernel::papanBandSrcValid;
720
 *
721
 * Per band validity mask for source pixels.
722
 *
723
 * Array of pixel validity mask layers for each source band.   Each of
724
 * the mask layers is the same size (in pixels) as the source image with
725
 * one bit per pixel.  Note that it is legal (and common) for this to be
726
 * NULL indicating that none of the pixels are invalidated, or for some
727
 * band validity masks to be NULL in which case all pixels of the band are
728
 * valid.  The following code can be used to test the validity of a particular
729
 * pixel.
730
 *
731
 * \code
732
 *   int   bIsValid = TRUE;
733
 *   int   nBand = 2-1;  // Band indexes are zero based.
734
 *   int   nPixel = 3; // Zero based.
735
 *   int   nLine = 4;  // Zero based.
736
 *
737
 *   assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
738
 *   assert( nLine >= 0 && nLine < poKern->nSrcYSize );
739
 *   assert( nBand >= 0 && nBand < poKern->nBands );
740
 *
741
 *   if( poKern->papanBandSrcValid != NULL
742
 *       && poKern->papanBandSrcValid[nBand] != NULL )
743
 *   {
744
 *       GUInt32 *panBandMask = poKern->papanBandSrcValid[nBand];
745
 *       int    iPixelOffset = nPixel + nLine * poKern->nSrcXSize;
746
 *
747
 *       bIsValid = CPLMaskGet(panBandMask, iPixelOffset)
748
 *   }
749
 * \endcode
750
 */
751
752
/**
753
 * \var GUInt32 *GDALWarpKernel::panUnifiedSrcValid;
754
 *
755
 * Per pixel validity mask for source pixels.
756
 *
757
 * A single validity mask layer that applies to the pixels of all source
758
 * bands.  It is accessed similarly to papanBandSrcValid, but without the
759
 * extra level of band indirection.
760
 *
761
 * This pointer may be NULL indicating that all pixels are valid.
762
 *
763
 * Note that if both panUnifiedSrcValid, and papanBandSrcValid are available,
764
 * the pixel isn't considered to be valid unless both arrays indicate it is
765
 * valid.
766
 */
767
768
/**
769
 * \var float *GDALWarpKernel::pafUnifiedSrcDensity;
770
 *
771
 * Per pixel density mask for source pixels.
772
 *
773
 * A single density mask layer that applies to the pixels of all source
774
 * bands.  It contains values between 0.0 and 1.0 indicating the degree to
775
 * which this pixel should be allowed to contribute to the output result.
776
 *
777
 * This pointer may be NULL indicating that all pixels have a density of 1.0.
778
 *
779
 * The density for a pixel may be accessed like this:
780
 *
781
 * \code
782
 *   float fDensity = 1.0;
783
 *   int nPixel = 3;  // Zero based.
784
 *   int nLine = 4;   // Zero based.
785
 *
786
 *   assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
787
 *   assert( nLine >= 0 && nLine < poKern->nSrcYSize );
788
 *   if( poKern->pafUnifiedSrcDensity != NULL )
789
 *     fDensity = poKern->pafUnifiedSrcDensity
790
 *                                  [nPixel + nLine * poKern->nSrcXSize];
791
 * \endcode
792
 */
793
794
/**
795
 * \var int GDALWarpKernel::nDstXSize;
796
 *
797
 * Width of destination image in pixels.
798
 *
799
 * This field is required.
800
 */
801
802
/**
803
 * \var int GDALWarpKernel::nDstYSize;
804
 *
805
 * Height of destination image in pixels.
806
 *
807
 * This field is required.
808
 */
809
810
/**
811
 * \var GByte **GDALWarpKernel::papabyDstImage;
812
 *
813
 * Array of destination image band data.
814
 *
815
 * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
816
 * to image data.  Each individual band of image data is organized as a single
817
 * block of image data in left to right, then bottom to top order.  The actual
818
 * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
819
 *
820
 * To access the pixel value for the (x=3, y=4) pixel (zero based) of
821
 * the second band with eWorkingDataType set to GDT_Float32 use code like
822
 * this:
823
 *
824
 * \code
825
 *   float dfPixelValue;
826
 *   int   nBand = 2-1;  // Band indexes are zero based.
827
 *   int   nPixel = 3; // Zero based.
828
 *   int   nLine = 4;  // Zero based.
829
 *
830
 *   assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
831
 *   assert( nLine >= 0 && nLine < poKern->nDstYSize );
832
 *   assert( nBand >= 0 && nBand < poKern->nBands );
833
 *   dfPixelValue = ((float *) poKern->papabyDstImage[nBand])
834
 *                                  [nPixel + nLine * poKern->nSrcYSize];
835
 * \endcode
836
 *
837
 * This field is required.
838
 */
839
840
/**
841
 * \var GUInt32 *GDALWarpKernel::panDstValid;
842
 *
843
 * Per pixel validity mask for destination pixels.
844
 *
845
 * A single validity mask layer that applies to the pixels of all destination
846
 * bands.  It is accessed similarly to papanUnitifiedSrcValid, but based
847
 * on the size of the destination image.
848
 *
849
 * This pointer may be NULL indicating that all pixels are valid.
850
 */
851
852
/**
853
 * \var float *GDALWarpKernel::pafDstDensity;
854
 *
855
 * Per pixel density mask for destination pixels.
856
 *
857
 * A single density mask layer that applies to the pixels of all destination
858
 * bands.  It contains values between 0.0 and 1.0.
859
 *
860
 * This pointer may be NULL indicating that all pixels have a density of 1.0.
861
 *
862
 * The density for a pixel may be accessed like this:
863
 *
864
 * \code
865
 *   float fDensity = 1.0;
866
 *   int   nPixel = 3; // Zero based.
867
 *   int   nLine = 4;  // Zero based.
868
 *
869
 *   assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
870
 *   assert( nLine >= 0 && nLine < poKern->nDstYSize );
871
 *   if( poKern->pafDstDensity != NULL )
872
 *     fDensity = poKern->pafDstDensity[nPixel + nLine * poKern->nDstXSize];
873
 * \endcode
874
 */
875
876
/**
877
 * \var int GDALWarpKernel::nSrcXOff;
878
 *
879
 * X offset to source pixel coordinates for transformation.
880
 *
881
 * See pfnTransformer.
882
 *
883
 * This field is required.
884
 */
885
886
/**
887
 * \var int GDALWarpKernel::nSrcYOff;
888
 *
889
 * Y offset to source pixel coordinates for transformation.
890
 *
891
 * See pfnTransformer.
892
 *
893
 * This field is required.
894
 */
895
896
/**
897
 * \var int GDALWarpKernel::nDstXOff;
898
 *
899
 * X offset to destination pixel coordinates for transformation.
900
 *
901
 * See pfnTransformer.
902
 *
903
 * This field is required.
904
 */
905
906
/**
907
 * \var int GDALWarpKernel::nDstYOff;
908
 *
909
 * Y offset to destination pixel coordinates for transformation.
910
 *
911
 * See pfnTransformer.
912
 *
913
 * This field is required.
914
 */
915
916
/**
917
 * \var GDALTransformerFunc GDALWarpKernel::pfnTransformer;
918
 *
919
 * Source/destination location transformer.
920
 *
921
 * The function to call to transform coordinates between source image
922
 * pixel/line coordinates and destination image pixel/line coordinates.
923
 * See GDALTransformerFunc() for details of the semantics of this function.
924
 *
925
 * The GDALWarpKern algorithm will only ever use this transformer in
926
 * "destination to source" mode (bDstToSrc=TRUE), and will always pass
927
 * partial or complete scanlines of points in the destination image as
928
 * input.  This means, among other things, that it is safe to the
929
 * approximating transform GDALApproxTransform() as the transformation
930
 * function.
931
 *
932
 * Source and destination images may be subsets of a larger overall image.
933
 * The transformation algorithms will expect and return pixel/line coordinates
934
 * in terms of this larger image, so coordinates need to be offset by
935
 * the offsets specified in nSrcXOff, nSrcYOff, nDstXOff, and nDstYOff before
936
 * passing to pfnTransformer, and after return from it.
937
 *
938
 * The GDALWarpKernel::pfnTransformerArg value will be passed as the callback
939
 * data to this function when it is called.
940
 *
941
 * This field is required.
942
 */
943
944
/**
945
 * \var void *GDALWarpKernel::pTransformerArg;
946
 *
947
 * Callback data for pfnTransformer.
948
 *
949
 * This field may be NULL if not required for the pfnTransformer being used.
950
 */
951
952
/**
953
 * \var GDALProgressFunc GDALWarpKernel::pfnProgress;
954
 *
955
 * The function to call to report progress of the algorithm, and to check
956
 * for a requested termination of the operation.  It operates according to
957
 * GDALProgressFunc() semantics.
958
 *
959
 * Generally speaking the progress function will be invoked for each
960
 * scanline of the destination buffer that has been processed.
961
 *
962
 * This field may be NULL (internally set to GDALDummyProgress()).
963
 */
964
965
/**
966
 * \var void *GDALWarpKernel::pProgress;
967
 *
968
 * Callback data for pfnProgress.
969
 *
970
 * This field may be NULL if not required for the pfnProgress being used.
971
 */
972
973
/************************************************************************/
974
/*                           GDALWarpKernel()                           */
975
/************************************************************************/
976
977
GDALWarpKernel::GDALWarpKernel()
978
0
    : papszWarpOptions(nullptr), eResample(GRA_NearestNeighbour),
979
0
      eWorkingDataType(GDT_Unknown), nBands(0), nSrcXSize(0), nSrcYSize(0),
980
0
      dfSrcXExtraSize(0.0), dfSrcYExtraSize(0.0), papabySrcImage(nullptr),
981
0
      papanBandSrcValid(nullptr), panUnifiedSrcValid(nullptr),
982
0
      pafUnifiedSrcDensity(nullptr), nDstXSize(0), nDstYSize(0),
983
0
      papabyDstImage(nullptr), panDstValid(nullptr), pafDstDensity(nullptr),
984
0
      dfXScale(1.0), dfYScale(1.0), dfXFilter(0.0), dfYFilter(0.0), nXRadius(0),
985
0
      nYRadius(0), nFiltInitX(0), nFiltInitY(0), nSrcXOff(0), nSrcYOff(0),
986
0
      nDstXOff(0), nDstYOff(0), pfnTransformer(nullptr),
987
0
      pTransformerArg(nullptr), pfnProgress(GDALDummyProgress),
988
0
      pProgress(nullptr), dfProgressBase(0.0), dfProgressScale(1.0),
989
0
      padfDstNoDataReal(nullptr), psThreadData(nullptr),
990
0
      eTieStrategy(GWKTS_First)
991
0
{
992
0
}
993
994
/************************************************************************/
995
/*                          ~GDALWarpKernel()                           */
996
/************************************************************************/
997
998
GDALWarpKernel::~GDALWarpKernel()
999
0
{
1000
0
}
1001
1002
/************************************************************************/
1003
/*                            PerformWarp()                             */
1004
/************************************************************************/
1005
1006
/**
1007
 * \fn CPLErr GDALWarpKernel::PerformWarp();
1008
 *
1009
 * This method performs the warp described in the GDALWarpKernel.
1010
 *
1011
 * @return CE_None on success or CE_Failure if an error occurs.
1012
 */
1013
1014
CPLErr GDALWarpKernel::PerformWarp()
1015
1016
0
{
1017
0
    const CPLErr eErr = Validate();
1018
1019
0
    if (eErr != CE_None)
1020
0
        return eErr;
1021
1022
    // See #2445 and #3079.
1023
0
    if (nSrcXSize <= 0 || nSrcYSize <= 0)
1024
0
    {
1025
0
        if (!pfnProgress(dfProgressBase + dfProgressScale, "", pProgress))
1026
0
        {
1027
0
            CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
1028
0
            return CE_Failure;
1029
0
        }
1030
0
        return CE_None;
1031
0
    }
1032
1033
    /* -------------------------------------------------------------------- */
1034
    /*      Pre-calculate resampling scales and window sizes for filtering. */
1035
    /* -------------------------------------------------------------------- */
1036
1037
0
    dfXScale = static_cast<double>(nDstXSize) / (nSrcXSize - dfSrcXExtraSize);
1038
0
    dfYScale = static_cast<double>(nDstYSize) / (nSrcYSize - dfSrcYExtraSize);
1039
0
    if (nSrcXSize >= nDstXSize && nSrcXSize <= nDstXSize + dfSrcXExtraSize)
1040
0
        dfXScale = 1.0;
1041
0
    if (nSrcYSize >= nDstYSize && nSrcYSize <= nDstYSize + dfSrcYExtraSize)
1042
0
        dfYScale = 1.0;
1043
0
    if (dfXScale < 1.0)
1044
0
    {
1045
0
        double dfXReciprocalScale = 1.0 / dfXScale;
1046
0
        const int nXReciprocalScale =
1047
0
            static_cast<int>(dfXReciprocalScale + 0.5);
1048
0
        if (fabs(dfXReciprocalScale - nXReciprocalScale) < 0.05)
1049
0
            dfXScale = 1.0 / nXReciprocalScale;
1050
0
    }
1051
0
    if (dfYScale < 1.0)
1052
0
    {
1053
0
        double dfYReciprocalScale = 1.0 / dfYScale;
1054
0
        const int nYReciprocalScale =
1055
0
            static_cast<int>(dfYReciprocalScale + 0.5);
1056
0
        if (fabs(dfYReciprocalScale - nYReciprocalScale) < 0.05)
1057
0
            dfYScale = 1.0 / nYReciprocalScale;
1058
0
    }
1059
1060
    // XSCALE and YSCALE undocumented for now. Can help in some cases.
1061
    // Best would probably be a per-pixel scale computation.
1062
0
    const char *pszXScale = CSLFetchNameValue(papszWarpOptions, "XSCALE");
1063
0
    if (pszXScale != nullptr && !EQUAL(pszXScale, "FROM_GRID_SAMPLING"))
1064
0
        dfXScale = CPLAtof(pszXScale);
1065
0
    const char *pszYScale = CSLFetchNameValue(papszWarpOptions, "YSCALE");
1066
0
    if (pszYScale != nullptr)
1067
0
        dfYScale = CPLAtof(pszYScale);
1068
1069
    // If the xscale is significantly lower than the yscale, this is highly
1070
    // suspicious of a situation of wrapping a very large virtual file in
1071
    // geographic coordinates with left and right parts being close to the
1072
    // antimeridian. In that situation, the xscale computed by the above method
1073
    // is completely wrong. Prefer doing an average of a few sample points
1074
    // instead
1075
0
    if ((dfYScale / dfXScale > 100 ||
1076
0
         (pszXScale != nullptr && EQUAL(pszXScale, "FROM_GRID_SAMPLING"))))
1077
0
    {
1078
        // Sample points along a grid
1079
0
        const int nPointsX = std::min(10, nDstXSize);
1080
0
        const int nPointsY = std::min(10, nDstYSize);
1081
0
        const int nPoints = 3 * nPointsX * nPointsY;
1082
0
        std::vector<double> padfX;
1083
0
        std::vector<double> padfY;
1084
0
        std::vector<double> padfZ(nPoints);
1085
0
        std::vector<int> pabSuccess(nPoints);
1086
0
        for (int iY = 0; iY < nPointsY; iY++)
1087
0
        {
1088
0
            for (int iX = 0; iX < nPointsX; iX++)
1089
0
            {
1090
0
                const double dfX =
1091
0
                    nPointsX == 1
1092
0
                        ? 0.0
1093
0
                        : static_cast<double>(iX) * nDstXSize / (nPointsX - 1);
1094
0
                const double dfY =
1095
0
                    nPointsY == 1
1096
0
                        ? 0.0
1097
0
                        : static_cast<double>(iY) * nDstYSize / (nPointsY - 1);
1098
1099
                // Reproject each destination sample point and its neighbours
1100
                // at (x+1,y) and (x,y+1), so as to get the local scale.
1101
0
                padfX.push_back(dfX);
1102
0
                padfY.push_back(dfY);
1103
1104
0
                padfX.push_back((iX == nPointsX - 1) ? dfX - 1 : dfX + 1);
1105
0
                padfY.push_back(dfY);
1106
1107
0
                padfX.push_back(dfX);
1108
0
                padfY.push_back((iY == nPointsY - 1) ? dfY - 1 : dfY + 1);
1109
0
            }
1110
0
        }
1111
0
        pfnTransformer(pTransformerArg, TRUE, nPoints, &padfX[0], &padfY[0],
1112
0
                       &padfZ[0], &pabSuccess[0]);
1113
1114
        // Compute the xscale at each sampling point
1115
0
        std::vector<double> adfXScales;
1116
0
        for (int i = 0; i < nPoints; i += 3)
1117
0
        {
1118
0
            if (pabSuccess[i] && pabSuccess[i + 1] && pabSuccess[i + 2])
1119
0
            {
1120
0
                const double dfPointXScale =
1121
0
                    1.0 / std::max(std::abs(padfX[i + 1] - padfX[i]),
1122
0
                                   std::abs(padfX[i + 2] - padfX[i]));
1123
0
                adfXScales.push_back(dfPointXScale);
1124
0
            }
1125
0
        }
1126
1127
        // Sort by increasing xcale
1128
0
        std::sort(adfXScales.begin(), adfXScales.end());
1129
1130
0
        if (!adfXScales.empty())
1131
0
        {
1132
            // Compute the average of scales, but eliminate outliers small
1133
            // scales, if some samples are just along the discontinuity.
1134
0
            const double dfMaxPointXScale = adfXScales.back();
1135
0
            double dfSumPointXScale = 0;
1136
0
            int nCountPointScale = 0;
1137
0
            for (double dfPointXScale : adfXScales)
1138
0
            {
1139
0
                if (dfPointXScale > dfMaxPointXScale / 10)
1140
0
                {
1141
0
                    dfSumPointXScale += dfPointXScale;
1142
0
                    nCountPointScale++;
1143
0
                }
1144
0
            }
1145
0
            if (nCountPointScale > 0)  // should always be true
1146
0
            {
1147
0
                const double dfXScaleFromSampling =
1148
0
                    dfSumPointXScale / nCountPointScale;
1149
#if DEBUG_VERBOSE
1150
                CPLDebug("WARP", "Correcting dfXScale from %f to %f", dfXScale,
1151
                         dfXScaleFromSampling);
1152
#endif
1153
0
                dfXScale = dfXScaleFromSampling;
1154
0
            }
1155
0
        }
1156
0
    }
1157
1158
#if DEBUG_VERBOSE
1159
    CPLDebug("WARP", "dfXScale = %f, dfYScale = %f", dfXScale, dfYScale);
1160
#endif
1161
1162
0
    const int bUse4SamplesFormula = dfXScale >= 0.95 && dfYScale >= 0.95;
1163
1164
    // Safety check for callers that would use GDALWarpKernel without using
1165
    // GDALWarpOperation.
1166
0
    if ((eResample == GRA_CubicSpline || eResample == GRA_Lanczos ||
1167
0
         ((eResample == GRA_Cubic || eResample == GRA_Bilinear) &&
1168
0
          !bUse4SamplesFormula)) &&
1169
0
        atoi(CSLFetchNameValueDef(papszWarpOptions, "EXTRA_ELTS", "0")) !=
1170
0
            WARP_EXTRA_ELTS)
1171
0
    {
1172
0
        CPLError(CE_Failure, CPLE_AppDefined,
1173
0
                 "Source arrays must have WARP_EXTRA_ELTS extra elements at "
1174
0
                 "their end. "
1175
0
                 "See GDALWarpKernel class definition. If this condition is "
1176
0
                 "fulfilled, define a EXTRA_ELTS=%d warp options",
1177
0
                 WARP_EXTRA_ELTS);
1178
0
        return CE_Failure;
1179
0
    }
1180
1181
0
    dfXFilter = anGWKFilterRadius[eResample];
1182
0
    dfYFilter = anGWKFilterRadius[eResample];
1183
1184
0
    nXRadius = dfXScale < 1.0 ? static_cast<int>(ceil(dfXFilter / dfXScale))
1185
0
                              : static_cast<int>(dfXFilter);
1186
0
    nYRadius = dfYScale < 1.0 ? static_cast<int>(ceil(dfYFilter / dfYScale))
1187
0
                              : static_cast<int>(dfYFilter);
1188
1189
    // Filter window offset depends on the parity of the kernel radius.
1190
0
    nFiltInitX = ((anGWKFilterRadius[eResample] + 1) % 2) - nXRadius;
1191
0
    nFiltInitY = ((anGWKFilterRadius[eResample] + 1) % 2) - nYRadius;
1192
1193
0
    bApplyVerticalShift =
1194
0
        CPLFetchBool(papszWarpOptions, "APPLY_VERTICAL_SHIFT", false);
1195
0
    dfMultFactorVerticalShift = CPLAtof(CSLFetchNameValueDef(
1196
0
        papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT", "1.0"));
1197
1198
    /* -------------------------------------------------------------------- */
1199
    /*      Set up resampling functions.                                    */
1200
    /* -------------------------------------------------------------------- */
1201
0
    if (CPLFetchBool(papszWarpOptions, "USE_GENERAL_CASE", false))
1202
0
        return GWKGeneralCase(this);
1203
1204
0
    const bool bNoMasksOrDstDensityOnly =
1205
0
        papanBandSrcValid == nullptr && panUnifiedSrcValid == nullptr &&
1206
0
        pafUnifiedSrcDensity == nullptr && panDstValid == nullptr;
1207
1208
0
    if (eWorkingDataType == GDT_Byte && eResample == GRA_NearestNeighbour &&
1209
0
        bNoMasksOrDstDensityOnly)
1210
0
        return GWKNearestNoMasksOrDstDensityOnlyByte(this);
1211
1212
0
    if (eWorkingDataType == GDT_Byte && eResample == GRA_Bilinear &&
1213
0
        bNoMasksOrDstDensityOnly)
1214
0
        return GWKBilinearNoMasksOrDstDensityOnlyByte(this);
1215
1216
0
    if (eWorkingDataType == GDT_Byte && eResample == GRA_Cubic &&
1217
0
        bNoMasksOrDstDensityOnly)
1218
0
        return GWKCubicNoMasksOrDstDensityOnlyByte(this);
1219
1220
0
    if (eWorkingDataType == GDT_Byte && eResample == GRA_CubicSpline &&
1221
0
        bNoMasksOrDstDensityOnly)
1222
0
        return GWKCubicSplineNoMasksOrDstDensityOnlyByte(this);
1223
1224
0
    if (eWorkingDataType == GDT_Byte && eResample == GRA_NearestNeighbour)
1225
0
        return GWKNearestByte(this);
1226
1227
0
    if ((eWorkingDataType == GDT_Int16 || eWorkingDataType == GDT_UInt16) &&
1228
0
        eResample == GRA_NearestNeighbour && bNoMasksOrDstDensityOnly)
1229
0
        return GWKNearestNoMasksOrDstDensityOnlyShort(this);
1230
1231
0
    if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Cubic &&
1232
0
        bNoMasksOrDstDensityOnly)
1233
0
        return GWKCubicNoMasksOrDstDensityOnlyShort(this);
1234
1235
0
    if ((eWorkingDataType == GDT_Int16) && eResample == GRA_CubicSpline &&
1236
0
        bNoMasksOrDstDensityOnly)
1237
0
        return GWKCubicSplineNoMasksOrDstDensityOnlyShort(this);
1238
1239
0
    if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Bilinear &&
1240
0
        bNoMasksOrDstDensityOnly)
1241
0
        return GWKBilinearNoMasksOrDstDensityOnlyShort(this);
1242
1243
0
    if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Cubic &&
1244
0
        bNoMasksOrDstDensityOnly)
1245
0
        return GWKCubicNoMasksOrDstDensityOnlyUShort(this);
1246
1247
0
    if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_CubicSpline &&
1248
0
        bNoMasksOrDstDensityOnly)
1249
0
        return GWKCubicSplineNoMasksOrDstDensityOnlyUShort(this);
1250
1251
0
    if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Bilinear &&
1252
0
        bNoMasksOrDstDensityOnly)
1253
0
        return GWKBilinearNoMasksOrDstDensityOnlyUShort(this);
1254
1255
0
    if (eWorkingDataType == GDT_Int16 && eResample == GRA_NearestNeighbour)
1256
0
        return GWKNearestShort(this);
1257
1258
0
    if (eWorkingDataType == GDT_Int16 && eResample == GRA_NearestNeighbour)
1259
0
        return GWKNearestUnsignedShort(this);
1260
1261
0
    if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour &&
1262
0
        bNoMasksOrDstDensityOnly)
1263
0
        return GWKNearestNoMasksOrDstDensityOnlyFloat(this);
1264
1265
0
    if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour)
1266
0
        return GWKNearestFloat(this);
1267
1268
0
    if (eWorkingDataType == GDT_Float32 && eResample == GRA_Bilinear &&
1269
0
        bNoMasksOrDstDensityOnly)
1270
0
        return GWKBilinearNoMasksOrDstDensityOnlyFloat(this);
1271
1272
0
    if (eWorkingDataType == GDT_Float32 && eResample == GRA_Cubic &&
1273
0
        bNoMasksOrDstDensityOnly)
1274
0
        return GWKCubicNoMasksOrDstDensityOnlyFloat(this);
1275
1276
#ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
1277
    if (eWorkingDataType == GDT_Float64 && eResample == GRA_Bilinear &&
1278
        bNoMasksOrDstDensityOnly)
1279
        return GWKBilinearNoMasksOrDstDensityOnlyDouble(this);
1280
1281
    if (eWorkingDataType == GDT_Float64 && eResample == GRA_Cubic &&
1282
        bNoMasksOrDstDensityOnly)
1283
        return GWKCubicNoMasksOrDstDensityOnlyDouble(this);
1284
#endif
1285
1286
0
    if (eResample == GRA_Average)
1287
0
        return GWKAverageOrMode(this);
1288
1289
0
    if (eResample == GRA_RMS)
1290
0
        return GWKAverageOrMode(this);
1291
1292
0
    if (eResample == GRA_Mode)
1293
0
        return GWKAverageOrMode(this);
1294
1295
0
    if (eResample == GRA_Max)
1296
0
        return GWKAverageOrMode(this);
1297
1298
0
    if (eResample == GRA_Min)
1299
0
        return GWKAverageOrMode(this);
1300
1301
0
    if (eResample == GRA_Med)
1302
0
        return GWKAverageOrMode(this);
1303
1304
0
    if (eResample == GRA_Q1)
1305
0
        return GWKAverageOrMode(this);
1306
1307
0
    if (eResample == GRA_Q3)
1308
0
        return GWKAverageOrMode(this);
1309
1310
0
    if (eResample == GRA_Sum)
1311
0
        return GWKSumPreserving(this);
1312
1313
0
    if (!GDALDataTypeIsComplex(eWorkingDataType))
1314
0
    {
1315
0
        return GWKRealCase(this);
1316
0
    }
1317
1318
0
    return GWKGeneralCase(this);
1319
0
}
1320
1321
/************************************************************************/
1322
/*                              Validate()                              */
1323
/************************************************************************/
1324
1325
/**
1326
 * \fn CPLErr GDALWarpKernel::Validate()
1327
 *
1328
 * Check the settings in the GDALWarpKernel, and issue a CPLError()
1329
 * (and return CE_Failure) if the configuration is considered to be
1330
 * invalid for some reason.
1331
 *
1332
 * This method will also do some standard defaulting such as setting
1333
 * pfnProgress to GDALDummyProgress() if it is NULL.
1334
 *
1335
 * @return CE_None on success or CE_Failure if an error is detected.
1336
 */
1337
1338
CPLErr GDALWarpKernel::Validate()
1339
1340
0
{
1341
0
    if (static_cast<size_t>(eResample) >=
1342
0
        (sizeof(anGWKFilterRadius) / sizeof(anGWKFilterRadius[0])))
1343
0
    {
1344
0
        CPLError(CE_Failure, CPLE_AppDefined,
1345
0
                 "Unsupported resampling method %d.",
1346
0
                 static_cast<int>(eResample));
1347
0
        return CE_Failure;
1348
0
    }
1349
1350
    // Tuples of values (e.g. "<R>,<G>,<B>" or "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>)") that must
1351
    // be ignored as contributing source pixels during resampling. Only taken into account by
1352
    // Average currently
1353
0
    const char *pszExcludedValues =
1354
0
        CSLFetchNameValue(papszWarpOptions, "EXCLUDED_VALUES");
1355
0
    if (pszExcludedValues)
1356
0
    {
1357
0
        const CPLStringList aosTokens(
1358
0
            CSLTokenizeString2(pszExcludedValues, "(,)", 0));
1359
0
        if ((aosTokens.size() % nBands) != 0)
1360
0
        {
1361
0
            CPLError(CE_Failure, CPLE_AppDefined,
1362
0
                     "EXCLUDED_VALUES should contain one or several tuples of "
1363
0
                     "%d values formatted like <R>,<G>,<B> or "
1364
0
                     "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>) if there are multiple "
1365
0
                     "tuples",
1366
0
                     nBands);
1367
0
            return CE_Failure;
1368
0
        }
1369
0
        std::vector<double> adfTuple;
1370
0
        for (int i = 0; i < aosTokens.size(); ++i)
1371
0
        {
1372
0
            adfTuple.push_back(CPLAtof(aosTokens[i]));
1373
0
            if (((i + 1) % nBands) == 0)
1374
0
            {
1375
0
                m_aadfExcludedValues.push_back(adfTuple);
1376
0
                adfTuple.clear();
1377
0
            }
1378
0
        }
1379
0
    }
1380
1381
0
    return CE_None;
1382
0
}
1383
1384
/************************************************************************/
1385
/*                         GWKOverlayDensity()                          */
1386
/*                                                                      */
1387
/*      Compute the final density for the destination pixel.  This      */
1388
/*      is a function of the overlay density (passed in) and the        */
1389
/*      original density.                                               */
1390
/************************************************************************/
1391
1392
static void GWKOverlayDensity(const GDALWarpKernel *poWK, GPtrDiff_t iDstOffset,
1393
                              double dfDensity)
1394
0
{
1395
0
    if (dfDensity < 0.0001 || poWK->pafDstDensity == nullptr)
1396
0
        return;
1397
1398
0
    poWK->pafDstDensity[iDstOffset] = static_cast<float>(
1399
0
        1.0 - (1.0 - dfDensity) * (1.0 - poWK->pafDstDensity[iDstOffset]));
1400
0
}
1401
1402
/************************************************************************/
1403
/*                          GWKRoundValueT()                            */
1404
/************************************************************************/
1405
1406
template <class T, bool is_signed> struct sGWKRoundValueT
1407
{
1408
    static T eval(double);
1409
};
1410
1411
template <class T> struct sGWKRoundValueT<T, true> /* signed */
1412
{
1413
    static T eval(double dfValue)
1414
0
    {
1415
0
        return static_cast<T>(floor(dfValue + 0.5));
1416
0
    }
1417
};
1418
1419
template <class T> struct sGWKRoundValueT<T, false> /* unsigned */
1420
{
1421
    static T eval(double dfValue)
1422
0
    {
1423
0
        return static_cast<T>(dfValue + 0.5);
1424
0
    }
Unexecuted instantiation: sGWKRoundValueT<unsigned char, false>::eval(double)
Unexecuted instantiation: sGWKRoundValueT<unsigned short, false>::eval(double)
1425
};
1426
1427
template <class T> static T GWKRoundValueT(double dfValue)
1428
0
{
1429
0
    return sGWKRoundValueT<T, cpl::NumericLimits<T>::is_signed>::eval(dfValue);
1430
0
}
Unexecuted instantiation: gdalwarpkernel.cpp:unsigned char GWKRoundValueT<unsigned char>(double)
Unexecuted instantiation: gdalwarpkernel.cpp:short GWKRoundValueT<short>(double)
Unexecuted instantiation: gdalwarpkernel.cpp:unsigned short GWKRoundValueT<unsigned short>(double)
1431
1432
template <> float GWKRoundValueT<float>(double dfValue)
1433
0
{
1434
0
    return static_cast<float>(dfValue);
1435
0
}
1436
1437
#ifdef notused
1438
template <> double GWKRoundValueT<double>(double dfValue)
1439
{
1440
    return dfValue;
1441
}
1442
#endif
1443
1444
/************************************************************************/
1445
/*                            GWKClampValueT()                          */
1446
/************************************************************************/
1447
1448
template <class T> static CPL_INLINE T GWKClampValueT(double dfValue)
1449
0
{
1450
0
    if (dfValue < cpl::NumericLimits<T>::min())
1451
0
        return cpl::NumericLimits<T>::min();
1452
0
    else if (dfValue > cpl::NumericLimits<T>::max())
1453
0
        return cpl::NumericLimits<T>::max();
1454
0
    else
1455
0
        return GWKRoundValueT<T>(dfValue);
1456
0
}
Unexecuted instantiation: gdalwarpkernel.cpp:unsigned char GWKClampValueT<unsigned char>(double)
Unexecuted instantiation: gdalwarpkernel.cpp:short GWKClampValueT<short>(double)
Unexecuted instantiation: gdalwarpkernel.cpp:unsigned short GWKClampValueT<unsigned short>(double)
1457
1458
template <> float GWKClampValueT<float>(double dfValue)
1459
0
{
1460
0
    return static_cast<float>(dfValue);
1461
0
}
1462
1463
#ifdef notused
1464
template <> double GWKClampValueT<double>(double dfValue)
1465
{
1466
    return dfValue;
1467
}
1468
#endif
1469
1470
/************************************************************************/
1471
/*                             AvoidNoData()                            */
1472
/************************************************************************/
1473
1474
template <class T>
1475
inline void AvoidNoData(const GDALWarpKernel *poWK, int iBand,
1476
                        GPtrDiff_t iDstOffset)
1477
0
{
1478
0
    GByte *pabyDst = poWK->papabyDstImage[iBand];
1479
0
    T *pDst = reinterpret_cast<T *>(pabyDst);
1480
1481
0
    if (poWK->padfDstNoDataReal != nullptr &&
1482
0
        poWK->padfDstNoDataReal[iBand] == static_cast<double>(pDst[iDstOffset]))
1483
0
    {
1484
        if constexpr (cpl::NumericLimits<T>::is_integer)
1485
0
        {
1486
0
            if (pDst[iDstOffset] ==
1487
0
                static_cast<T>(cpl::NumericLimits<T>::lowest()))
1488
0
            {
1489
0
                pDst[iDstOffset] =
1490
0
                    static_cast<T>(cpl::NumericLimits<T>::lowest() + 1);
1491
0
            }
1492
0
            else
1493
0
                pDst[iDstOffset]--;
1494
        }
1495
        else
1496
0
        {
1497
0
            if (pDst[iDstOffset] == cpl::NumericLimits<T>::max())
1498
0
            {
1499
0
                using std::nextafter;
1500
0
                pDst[iDstOffset] =
1501
0
                    nextafter(pDst[iDstOffset], static_cast<T>(0));
1502
0
            }
1503
0
            else
1504
0
            {
1505
0
                using std::nextafter;
1506
0
                pDst[iDstOffset] =
1507
0
                    nextafter(pDst[iDstOffset], cpl::NumericLimits<T>::max());
1508
0
            }
1509
0
        }
1510
1511
0
        if (!poWK->bWarnedAboutDstNoDataReplacement)
1512
0
        {
1513
0
            const_cast<GDALWarpKernel *>(poWK)
1514
0
                ->bWarnedAboutDstNoDataReplacement = true;
1515
0
            CPLError(CE_Warning, CPLE_AppDefined,
1516
0
                     "Value %g in the source dataset has been changed to %g "
1517
0
                     "in the destination dataset to avoid being treated as "
1518
0
                     "NoData. To avoid this, select a different NoData value "
1519
0
                     "for the destination dataset.",
1520
0
                     poWK->padfDstNoDataReal[iBand],
1521
0
                     static_cast<double>(pDst[iDstOffset]));
1522
0
        }
1523
0
    }
1524
0
}
Unexecuted instantiation: void AvoidNoData<unsigned char>(GDALWarpKernel const*, int, long long)
Unexecuted instantiation: void AvoidNoData<signed char>(GDALWarpKernel const*, int, long long)
Unexecuted instantiation: void AvoidNoData<short>(GDALWarpKernel const*, int, long long)
Unexecuted instantiation: void AvoidNoData<unsigned short>(GDALWarpKernel const*, int, long long)
Unexecuted instantiation: void AvoidNoData<unsigned int>(GDALWarpKernel const*, int, long long)
Unexecuted instantiation: void AvoidNoData<int>(GDALWarpKernel const*, int, long long)
Unexecuted instantiation: void AvoidNoData<unsigned long>(GDALWarpKernel const*, int, long long)
Unexecuted instantiation: void AvoidNoData<long>(GDALWarpKernel const*, int, long long)
Unexecuted instantiation: void AvoidNoData<cpl::Float16>(GDALWarpKernel const*, int, long long)
Unexecuted instantiation: void AvoidNoData<float>(GDALWarpKernel const*, int, long long)
Unexecuted instantiation: void AvoidNoData<double>(GDALWarpKernel const*, int, long long)
1525
1526
/************************************************************************/
1527
/*                         GWKSetPixelValueRealT()                      */
1528
/************************************************************************/
1529
1530
template <class T>
1531
static bool GWKSetPixelValueRealT(const GDALWarpKernel *poWK, int iBand,
1532
                                  GPtrDiff_t iDstOffset, double dfDensity,
1533
                                  T value)
1534
0
{
1535
0
    T *pDst = reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
1536
1537
    /* -------------------------------------------------------------------- */
1538
    /*      If the source density is less than 100% we need to fetch the    */
1539
    /*      existing destination value, and mix it with the source to       */
1540
    /*      get the new "to apply" value.  Also compute composite           */
1541
    /*      density.                                                        */
1542
    /*                                                                      */
1543
    /*      We avoid mixing if density is very near one or risk mixing      */
1544
    /*      in very extreme nodata values and causing odd results (#1610)   */
1545
    /* -------------------------------------------------------------------- */
1546
0
    if (dfDensity < 0.9999)
1547
0
    {
1548
0
        if (dfDensity < 0.0001)
1549
0
            return true;
1550
1551
0
        double dfDstDensity = 1.0;
1552
1553
0
        if (poWK->pafDstDensity != nullptr)
1554
0
            dfDstDensity = poWK->pafDstDensity[iDstOffset];
1555
0
        else if (poWK->panDstValid != nullptr &&
1556
0
                 !CPLMaskGet(poWK->panDstValid, iDstOffset))
1557
0
            dfDstDensity = 0.0;
1558
1559
        // It seems like we also ought to be testing panDstValid[] here!
1560
1561
0
        const double dfDstReal = pDst[iDstOffset];
1562
1563
        // The destination density is really only relative to the portion
1564
        // not occluded by the overlay.
1565
0
        const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
1566
1567
0
        const double dfReal = (value * dfDensity + dfDstReal * dfDstInfluence) /
1568
0
                              (dfDensity + dfDstInfluence);
1569
1570
        /* --------------------------------------------------------------------
1571
         */
1572
        /*      Actually apply the destination value. */
1573
        /*                                                                      */
1574
        /*      Avoid using the destination nodata value for integer datatypes
1575
         */
1576
        /*      if by chance it is equal to the computed pixel value. */
1577
        /* --------------------------------------------------------------------
1578
         */
1579
0
        pDst[iDstOffset] = GWKClampValueT<T>(dfReal);
1580
0
    }
1581
0
    else
1582
0
    {
1583
0
        pDst[iDstOffset] = value;
1584
0
    }
1585
1586
0
    AvoidNoData<T>(poWK, iBand, iDstOffset);
1587
1588
0
    return true;
1589
0
}
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKSetPixelValueRealT<unsigned char>(GDALWarpKernel const*, int, long long, double, unsigned char)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKSetPixelValueRealT<short>(GDALWarpKernel const*, int, long long, double, short)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKSetPixelValueRealT<unsigned short>(GDALWarpKernel const*, int, long long, double, unsigned short)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKSetPixelValueRealT<float>(GDALWarpKernel const*, int, long long, double, float)
1590
1591
/************************************************************************/
1592
/*                       ClampRoundAndAvoidNoData()                     */
1593
/************************************************************************/
1594
1595
template <class T>
1596
inline void ClampRoundAndAvoidNoData(const GDALWarpKernel *poWK, int iBand,
1597
                                     GPtrDiff_t iDstOffset, double dfReal)
1598
0
{
1599
0
    GByte *pabyDst = poWK->papabyDstImage[iBand];
1600
0
    T *pDst = reinterpret_cast<T *>(pabyDst);
1601
1602
    if constexpr (cpl::NumericLimits<T>::is_integer)
1603
0
    {
1604
0
        using std::floor;
1605
0
        if (dfReal < static_cast<double>(cpl::NumericLimits<T>::lowest()))
1606
0
            pDst[iDstOffset] = static_cast<T>(cpl::NumericLimits<T>::lowest());
1607
0
        else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
1608
0
            pDst[iDstOffset] = static_cast<T>(cpl::NumericLimits<T>::max());
1609
        else if constexpr (cpl::NumericLimits<T>::is_signed)
1610
0
            pDst[iDstOffset] = static_cast<T>(floor(dfReal + 0.5));
1611
        else
1612
0
            pDst[iDstOffset] = static_cast<T>(dfReal + 0.5);
1613
    }
1614
    else
1615
0
    {
1616
0
        pDst[iDstOffset] = static_cast<T>(dfReal);
1617
0
    }
1618
1619
0
    AvoidNoData<T>(poWK, iBand, iDstOffset);
1620
0
}
Unexecuted instantiation: void ClampRoundAndAvoidNoData<unsigned char>(GDALWarpKernel const*, int, long long, double)
Unexecuted instantiation: void ClampRoundAndAvoidNoData<signed char>(GDALWarpKernel const*, int, long long, double)
Unexecuted instantiation: void ClampRoundAndAvoidNoData<short>(GDALWarpKernel const*, int, long long, double)
Unexecuted instantiation: void ClampRoundAndAvoidNoData<unsigned short>(GDALWarpKernel const*, int, long long, double)
Unexecuted instantiation: void ClampRoundAndAvoidNoData<unsigned int>(GDALWarpKernel const*, int, long long, double)
Unexecuted instantiation: void ClampRoundAndAvoidNoData<int>(GDALWarpKernel const*, int, long long, double)
Unexecuted instantiation: void ClampRoundAndAvoidNoData<unsigned long>(GDALWarpKernel const*, int, long long, double)
Unexecuted instantiation: void ClampRoundAndAvoidNoData<long>(GDALWarpKernel const*, int, long long, double)
Unexecuted instantiation: void ClampRoundAndAvoidNoData<cpl::Float16>(GDALWarpKernel const*, int, long long, double)
Unexecuted instantiation: void ClampRoundAndAvoidNoData<float>(GDALWarpKernel const*, int, long long, double)
Unexecuted instantiation: void ClampRoundAndAvoidNoData<double>(GDALWarpKernel const*, int, long long, double)
1621
1622
/************************************************************************/
1623
/*                          GWKSetPixelValue()                          */
1624
/************************************************************************/
1625
1626
static bool GWKSetPixelValue(const GDALWarpKernel *poWK, int iBand,
1627
                             GPtrDiff_t iDstOffset, double dfDensity,
1628
                             double dfReal, double dfImag)
1629
1630
0
{
1631
0
    GByte *pabyDst = poWK->papabyDstImage[iBand];
1632
1633
    /* -------------------------------------------------------------------- */
1634
    /*      If the source density is less than 100% we need to fetch the    */
1635
    /*      existing destination value, and mix it with the source to       */
1636
    /*      get the new "to apply" value.  Also compute composite           */
1637
    /*      density.                                                        */
1638
    /*                                                                      */
1639
    /*      We avoid mixing if density is very near one or risk mixing      */
1640
    /*      in very extreme nodata values and causing odd results (#1610)   */
1641
    /* -------------------------------------------------------------------- */
1642
0
    if (dfDensity < 0.9999)
1643
0
    {
1644
0
        if (dfDensity < 0.0001)
1645
0
            return true;
1646
1647
0
        double dfDstDensity = 1.0;
1648
0
        if (poWK->pafDstDensity != nullptr)
1649
0
            dfDstDensity = poWK->pafDstDensity[iDstOffset];
1650
0
        else if (poWK->panDstValid != nullptr &&
1651
0
                 !CPLMaskGet(poWK->panDstValid, iDstOffset))
1652
0
            dfDstDensity = 0.0;
1653
1654
0
        double dfDstReal = 0.0;
1655
0
        double dfDstImag = 0.0;
1656
        // It seems like we also ought to be testing panDstValid[] here!
1657
1658
        // TODO(schwehr): Factor out this repreated type of set.
1659
0
        switch (poWK->eWorkingDataType)
1660
0
        {
1661
0
            case GDT_Byte:
1662
0
                dfDstReal = pabyDst[iDstOffset];
1663
0
                dfDstImag = 0.0;
1664
0
                break;
1665
1666
0
            case GDT_Int8:
1667
0
                dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
1668
0
                dfDstImag = 0.0;
1669
0
                break;
1670
1671
0
            case GDT_Int16:
1672
0
                dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
1673
0
                dfDstImag = 0.0;
1674
0
                break;
1675
1676
0
            case GDT_UInt16:
1677
0
                dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
1678
0
                dfDstImag = 0.0;
1679
0
                break;
1680
1681
0
            case GDT_Int32:
1682
0
                dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
1683
0
                dfDstImag = 0.0;
1684
0
                break;
1685
1686
0
            case GDT_UInt32:
1687
0
                dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
1688
0
                dfDstImag = 0.0;
1689
0
                break;
1690
1691
0
            case GDT_Int64:
1692
0
                dfDstReal = static_cast<double>(
1693
0
                    reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
1694
0
                dfDstImag = 0.0;
1695
0
                break;
1696
1697
0
            case GDT_UInt64:
1698
0
                dfDstReal = static_cast<double>(
1699
0
                    reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
1700
0
                dfDstImag = 0.0;
1701
0
                break;
1702
1703
0
            case GDT_Float16:
1704
0
                dfDstReal = reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset];
1705
0
                dfDstImag = 0.0;
1706
0
                break;
1707
1708
0
            case GDT_Float32:
1709
0
                dfDstReal = reinterpret_cast<float *>(pabyDst)[iDstOffset];
1710
0
                dfDstImag = 0.0;
1711
0
                break;
1712
1713
0
            case GDT_Float64:
1714
0
                dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
1715
0
                dfDstImag = 0.0;
1716
0
                break;
1717
1718
0
            case GDT_CInt16:
1719
0
                dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2];
1720
0
                dfDstImag =
1721
0
                    reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2 + 1];
1722
0
                break;
1723
1724
0
            case GDT_CInt32:
1725
0
                dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2];
1726
0
                dfDstImag =
1727
0
                    reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2 + 1];
1728
0
                break;
1729
1730
0
            case GDT_CFloat16:
1731
0
                dfDstReal =
1732
0
                    reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2];
1733
0
                dfDstImag =
1734
0
                    reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2 + 1];
1735
0
                break;
1736
1737
0
            case GDT_CFloat32:
1738
0
                dfDstReal = reinterpret_cast<float *>(pabyDst)[iDstOffset * 2];
1739
0
                dfDstImag =
1740
0
                    reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1];
1741
0
                break;
1742
1743
0
            case GDT_CFloat64:
1744
0
                dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset * 2];
1745
0
                dfDstImag =
1746
0
                    reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1];
1747
0
                break;
1748
1749
0
            case GDT_Unknown:
1750
0
            case GDT_TypeCount:
1751
0
                CPLAssert(false);
1752
0
                return false;
1753
0
        }
1754
1755
        // The destination density is really only relative to the portion
1756
        // not occluded by the overlay.
1757
0
        const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
1758
1759
0
        dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
1760
0
                 (dfDensity + dfDstInfluence);
1761
1762
0
        dfImag = (dfImag * dfDensity + dfDstImag * dfDstInfluence) /
1763
0
                 (dfDensity + dfDstInfluence);
1764
0
    }
1765
1766
    /* -------------------------------------------------------------------- */
1767
    /*      Actually apply the destination value.                           */
1768
    /*                                                                      */
1769
    /*      Avoid using the destination nodata value for integer datatypes  */
1770
    /*      if by chance it is equal to the computed pixel value.           */
1771
    /* -------------------------------------------------------------------- */
1772
1773
0
    switch (poWK->eWorkingDataType)
1774
0
    {
1775
0
        case GDT_Byte:
1776
0
            ClampRoundAndAvoidNoData<GByte>(poWK, iBand, iDstOffset, dfReal);
1777
0
            break;
1778
1779
0
        case GDT_Int8:
1780
0
            ClampRoundAndAvoidNoData<GInt8>(poWK, iBand, iDstOffset, dfReal);
1781
0
            break;
1782
1783
0
        case GDT_Int16:
1784
0
            ClampRoundAndAvoidNoData<GInt16>(poWK, iBand, iDstOffset, dfReal);
1785
0
            break;
1786
1787
0
        case GDT_UInt16:
1788
0
            ClampRoundAndAvoidNoData<GUInt16>(poWK, iBand, iDstOffset, dfReal);
1789
0
            break;
1790
1791
0
        case GDT_UInt32:
1792
0
            ClampRoundAndAvoidNoData<GUInt32>(poWK, iBand, iDstOffset, dfReal);
1793
0
            break;
1794
1795
0
        case GDT_Int32:
1796
0
            ClampRoundAndAvoidNoData<GInt32>(poWK, iBand, iDstOffset, dfReal);
1797
0
            break;
1798
1799
0
        case GDT_UInt64:
1800
0
            ClampRoundAndAvoidNoData<std::uint64_t>(poWK, iBand, iDstOffset,
1801
0
                                                    dfReal);
1802
0
            break;
1803
1804
0
        case GDT_Int64:
1805
0
            ClampRoundAndAvoidNoData<std::int64_t>(poWK, iBand, iDstOffset,
1806
0
                                                   dfReal);
1807
0
            break;
1808
1809
0
        case GDT_Float16:
1810
0
            ClampRoundAndAvoidNoData<GFloat16>(poWK, iBand, iDstOffset, dfReal);
1811
0
            break;
1812
1813
0
        case GDT_Float32:
1814
0
            ClampRoundAndAvoidNoData<float>(poWK, iBand, iDstOffset, dfReal);
1815
0
            break;
1816
1817
0
        case GDT_Float64:
1818
0
            ClampRoundAndAvoidNoData<double>(poWK, iBand, iDstOffset, dfReal);
1819
0
            break;
1820
1821
0
        case GDT_CInt16:
1822
0
        {
1823
0
            typedef GInt16 T;
1824
0
            if (dfReal < static_cast<double>(cpl::NumericLimits<T>::min()))
1825
0
                reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1826
0
                    cpl::NumericLimits<T>::min();
1827
0
            else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
1828
0
                reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1829
0
                    cpl::NumericLimits<T>::max();
1830
0
            else
1831
0
                reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1832
0
                    static_cast<T>(floor(dfReal + 0.5));
1833
0
            if (dfImag < static_cast<double>(cpl::NumericLimits<T>::min()))
1834
0
                reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1835
0
                    cpl::NumericLimits<T>::min();
1836
0
            else if (dfImag > static_cast<double>(cpl::NumericLimits<T>::max()))
1837
0
                reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1838
0
                    cpl::NumericLimits<T>::max();
1839
0
            else
1840
0
                reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1841
0
                    static_cast<T>(floor(dfImag + 0.5));
1842
0
            break;
1843
0
        }
1844
1845
0
        case GDT_CInt32:
1846
0
        {
1847
0
            typedef GInt32 T;
1848
0
            if (dfReal < static_cast<double>(cpl::NumericLimits<T>::min()))
1849
0
                reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1850
0
                    cpl::NumericLimits<T>::min();
1851
0
            else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
1852
0
                reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1853
0
                    cpl::NumericLimits<T>::max();
1854
0
            else
1855
0
                reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1856
0
                    static_cast<T>(floor(dfReal + 0.5));
1857
0
            if (dfImag < static_cast<double>(cpl::NumericLimits<T>::min()))
1858
0
                reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1859
0
                    cpl::NumericLimits<T>::min();
1860
0
            else if (dfImag > static_cast<double>(cpl::NumericLimits<T>::max()))
1861
0
                reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1862
0
                    cpl::NumericLimits<T>::max();
1863
0
            else
1864
0
                reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1865
0
                    static_cast<T>(floor(dfImag + 0.5));
1866
0
            break;
1867
0
        }
1868
1869
0
        case GDT_CFloat16:
1870
0
            reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2] =
1871
0
                static_cast<GFloat16>(dfReal);
1872
0
            reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2 + 1] =
1873
0
                static_cast<GFloat16>(dfImag);
1874
0
            break;
1875
1876
0
        case GDT_CFloat32:
1877
0
            reinterpret_cast<float *>(pabyDst)[iDstOffset * 2] =
1878
0
                static_cast<float>(dfReal);
1879
0
            reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1] =
1880
0
                static_cast<float>(dfImag);
1881
0
            break;
1882
1883
0
        case GDT_CFloat64:
1884
0
            reinterpret_cast<double *>(pabyDst)[iDstOffset * 2] = dfReal;
1885
0
            reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1] = dfImag;
1886
0
            break;
1887
1888
0
        case GDT_Unknown:
1889
0
        case GDT_TypeCount:
1890
0
            return false;
1891
0
    }
1892
1893
0
    return true;
1894
0
}
1895
1896
/************************************************************************/
1897
/*                       GWKSetPixelValueReal()                         */
1898
/************************************************************************/
1899
1900
static bool GWKSetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
1901
                                 GPtrDiff_t iDstOffset, double dfDensity,
1902
                                 double dfReal)
1903
1904
0
{
1905
0
    GByte *pabyDst = poWK->papabyDstImage[iBand];
1906
1907
    /* -------------------------------------------------------------------- */
1908
    /*      If the source density is less than 100% we need to fetch the    */
1909
    /*      existing destination value, and mix it with the source to       */
1910
    /*      get the new "to apply" value.  Also compute composite           */
1911
    /*      density.                                                        */
1912
    /*                                                                      */
1913
    /*      We avoid mixing if density is very near one or risk mixing      */
1914
    /*      in very extreme nodata values and causing odd results (#1610)   */
1915
    /* -------------------------------------------------------------------- */
1916
0
    if (dfDensity < 0.9999)
1917
0
    {
1918
0
        if (dfDensity < 0.0001)
1919
0
            return true;
1920
1921
0
        double dfDstReal = 0.0;
1922
0
        double dfDstDensity = 1.0;
1923
1924
0
        if (poWK->pafDstDensity != nullptr)
1925
0
            dfDstDensity = poWK->pafDstDensity[iDstOffset];
1926
0
        else if (poWK->panDstValid != nullptr &&
1927
0
                 !CPLMaskGet(poWK->panDstValid, iDstOffset))
1928
0
            dfDstDensity = 0.0;
1929
1930
        // It seems like we also ought to be testing panDstValid[] here!
1931
1932
0
        switch (poWK->eWorkingDataType)
1933
0
        {
1934
0
            case GDT_Byte:
1935
0
                dfDstReal = pabyDst[iDstOffset];
1936
0
                break;
1937
1938
0
            case GDT_Int8:
1939
0
                dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
1940
0
                break;
1941
1942
0
            case GDT_Int16:
1943
0
                dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
1944
0
                break;
1945
1946
0
            case GDT_UInt16:
1947
0
                dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
1948
0
                break;
1949
1950
0
            case GDT_Int32:
1951
0
                dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
1952
0
                break;
1953
1954
0
            case GDT_UInt32:
1955
0
                dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
1956
0
                break;
1957
1958
0
            case GDT_Int64:
1959
0
                dfDstReal = static_cast<double>(
1960
0
                    reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
1961
0
                break;
1962
1963
0
            case GDT_UInt64:
1964
0
                dfDstReal = static_cast<double>(
1965
0
                    reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
1966
0
                break;
1967
1968
0
            case GDT_Float16:
1969
0
                dfDstReal = reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset];
1970
0
                break;
1971
1972
0
            case GDT_Float32:
1973
0
                dfDstReal = reinterpret_cast<float *>(pabyDst)[iDstOffset];
1974
0
                break;
1975
1976
0
            case GDT_Float64:
1977
0
                dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
1978
0
                break;
1979
1980
0
            case GDT_CInt16:
1981
0
            case GDT_CInt32:
1982
0
            case GDT_CFloat16:
1983
0
            case GDT_CFloat32:
1984
0
            case GDT_CFloat64:
1985
0
            case GDT_Unknown:
1986
0
            case GDT_TypeCount:
1987
0
                CPLAssert(false);
1988
0
                return false;
1989
0
        }
1990
1991
        // The destination density is really only relative to the portion
1992
        // not occluded by the overlay.
1993
0
        const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
1994
1995
0
        dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
1996
0
                 (dfDensity + dfDstInfluence);
1997
0
    }
1998
1999
    /* -------------------------------------------------------------------- */
2000
    /*      Actually apply the destination value.                           */
2001
    /*                                                                      */
2002
    /*      Avoid using the destination nodata value for integer datatypes  */
2003
    /*      if by chance it is equal to the computed pixel value.           */
2004
    /* -------------------------------------------------------------------- */
2005
2006
0
    switch (poWK->eWorkingDataType)
2007
0
    {
2008
0
        case GDT_Byte:
2009
0
            ClampRoundAndAvoidNoData<GByte>(poWK, iBand, iDstOffset, dfReal);
2010
0
            break;
2011
2012
0
        case GDT_Int8:
2013
0
            ClampRoundAndAvoidNoData<GInt8>(poWK, iBand, iDstOffset, dfReal);
2014
0
            break;
2015
2016
0
        case GDT_Int16:
2017
0
            ClampRoundAndAvoidNoData<GInt16>(poWK, iBand, iDstOffset, dfReal);
2018
0
            break;
2019
2020
0
        case GDT_UInt16:
2021
0
            ClampRoundAndAvoidNoData<GUInt16>(poWK, iBand, iDstOffset, dfReal);
2022
0
            break;
2023
2024
0
        case GDT_UInt32:
2025
0
            ClampRoundAndAvoidNoData<GUInt32>(poWK, iBand, iDstOffset, dfReal);
2026
0
            break;
2027
2028
0
        case GDT_Int32:
2029
0
            ClampRoundAndAvoidNoData<GInt32>(poWK, iBand, iDstOffset, dfReal);
2030
0
            break;
2031
2032
0
        case GDT_UInt64:
2033
0
            ClampRoundAndAvoidNoData<std::uint64_t>(poWK, iBand, iDstOffset,
2034
0
                                                    dfReal);
2035
0
            break;
2036
2037
0
        case GDT_Int64:
2038
0
            ClampRoundAndAvoidNoData<std::int64_t>(poWK, iBand, iDstOffset,
2039
0
                                                   dfReal);
2040
0
            break;
2041
2042
0
        case GDT_Float16:
2043
0
            ClampRoundAndAvoidNoData<GFloat16>(poWK, iBand, iDstOffset, dfReal);
2044
0
            break;
2045
2046
0
        case GDT_Float32:
2047
0
            ClampRoundAndAvoidNoData<float>(poWK, iBand, iDstOffset, dfReal);
2048
0
            break;
2049
2050
0
        case GDT_Float64:
2051
0
            ClampRoundAndAvoidNoData<double>(poWK, iBand, iDstOffset, dfReal);
2052
0
            break;
2053
2054
0
        case GDT_CInt16:
2055
0
        case GDT_CInt32:
2056
0
        case GDT_CFloat16:
2057
0
        case GDT_CFloat32:
2058
0
        case GDT_CFloat64:
2059
0
            return false;
2060
2061
0
        case GDT_Unknown:
2062
0
        case GDT_TypeCount:
2063
0
            CPLAssert(false);
2064
0
            return false;
2065
0
    }
2066
2067
0
    return true;
2068
0
}
2069
2070
/************************************************************************/
2071
/*                          GWKGetPixelValue()                          */
2072
/************************************************************************/
2073
2074
/* It is assumed that panUnifiedSrcValid has been checked before */
2075
2076
static bool GWKGetPixelValue(const GDALWarpKernel *poWK, int iBand,
2077
                             GPtrDiff_t iSrcOffset, double *pdfDensity,
2078
                             double *pdfReal, double *pdfImag)
2079
2080
0
{
2081
0
    GByte *pabySrc = poWK->papabySrcImage[iBand];
2082
2083
0
    if (poWK->papanBandSrcValid != nullptr &&
2084
0
        poWK->papanBandSrcValid[iBand] != nullptr &&
2085
0
        !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
2086
0
    {
2087
0
        *pdfDensity = 0.0;
2088
0
        return false;
2089
0
    }
2090
2091
0
    *pdfReal = 0.0;
2092
0
    *pdfImag = 0.0;
2093
2094
    // TODO(schwehr): Fix casting.
2095
0
    switch (poWK->eWorkingDataType)
2096
0
    {
2097
0
        case GDT_Byte:
2098
0
            *pdfReal = pabySrc[iSrcOffset];
2099
0
            *pdfImag = 0.0;
2100
0
            break;
2101
2102
0
        case GDT_Int8:
2103
0
            *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
2104
0
            *pdfImag = 0.0;
2105
0
            break;
2106
2107
0
        case GDT_Int16:
2108
0
            *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
2109
0
            *pdfImag = 0.0;
2110
0
            break;
2111
2112
0
        case GDT_UInt16:
2113
0
            *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
2114
0
            *pdfImag = 0.0;
2115
0
            break;
2116
2117
0
        case GDT_Int32:
2118
0
            *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
2119
0
            *pdfImag = 0.0;
2120
0
            break;
2121
2122
0
        case GDT_UInt32:
2123
0
            *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
2124
0
            *pdfImag = 0.0;
2125
0
            break;
2126
2127
0
        case GDT_Int64:
2128
0
            *pdfReal = static_cast<double>(
2129
0
                reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
2130
0
            *pdfImag = 0.0;
2131
0
            break;
2132
2133
0
        case GDT_UInt64:
2134
0
            *pdfReal = static_cast<double>(
2135
0
                reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
2136
0
            *pdfImag = 0.0;
2137
0
            break;
2138
2139
0
        case GDT_Float16:
2140
0
            *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset];
2141
0
            *pdfImag = 0.0;
2142
0
            break;
2143
2144
0
        case GDT_Float32:
2145
0
            *pdfReal = reinterpret_cast<float *>(pabySrc)[iSrcOffset];
2146
0
            *pdfImag = 0.0;
2147
0
            break;
2148
2149
0
        case GDT_Float64:
2150
0
            *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
2151
0
            *pdfImag = 0.0;
2152
0
            break;
2153
2154
0
        case GDT_CInt16:
2155
0
            *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2];
2156
0
            *pdfImag = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2 + 1];
2157
0
            break;
2158
2159
0
        case GDT_CInt32:
2160
0
            *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2];
2161
0
            *pdfImag = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2 + 1];
2162
0
            break;
2163
2164
0
        case GDT_CFloat16:
2165
0
            *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset * 2];
2166
0
            *pdfImag =
2167
0
                reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset * 2 + 1];
2168
0
            break;
2169
2170
0
        case GDT_CFloat32:
2171
0
            *pdfReal = reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2];
2172
0
            *pdfImag = reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2 + 1];
2173
0
            break;
2174
2175
0
        case GDT_CFloat64:
2176
0
            *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2];
2177
0
            *pdfImag = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2 + 1];
2178
0
            break;
2179
2180
0
        case GDT_Unknown:
2181
0
        case GDT_TypeCount:
2182
0
            CPLAssert(false);
2183
0
            *pdfDensity = 0.0;
2184
0
            return false;
2185
0
    }
2186
2187
0
    if (poWK->pafUnifiedSrcDensity != nullptr)
2188
0
        *pdfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
2189
0
    else
2190
0
        *pdfDensity = 1.0;
2191
2192
0
    return *pdfDensity != 0.0;
2193
0
}
2194
2195
/************************************************************************/
2196
/*                       GWKGetPixelValueReal()                         */
2197
/************************************************************************/
2198
2199
static bool GWKGetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
2200
                                 GPtrDiff_t iSrcOffset, double *pdfDensity,
2201
                                 double *pdfReal)
2202
2203
0
{
2204
0
    GByte *pabySrc = poWK->papabySrcImage[iBand];
2205
2206
0
    if (poWK->papanBandSrcValid != nullptr &&
2207
0
        poWK->papanBandSrcValid[iBand] != nullptr &&
2208
0
        !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
2209
0
    {
2210
0
        *pdfDensity = 0.0;
2211
0
        return false;
2212
0
    }
2213
2214
0
    switch (poWK->eWorkingDataType)
2215
0
    {
2216
0
        case GDT_Byte:
2217
0
            *pdfReal = pabySrc[iSrcOffset];
2218
0
            break;
2219
2220
0
        case GDT_Int8:
2221
0
            *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
2222
0
            break;
2223
2224
0
        case GDT_Int16:
2225
0
            *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
2226
0
            break;
2227
2228
0
        case GDT_UInt16:
2229
0
            *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
2230
0
            break;
2231
2232
0
        case GDT_Int32:
2233
0
            *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
2234
0
            break;
2235
2236
0
        case GDT_UInt32:
2237
0
            *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
2238
0
            break;
2239
2240
0
        case GDT_Int64:
2241
0
            *pdfReal = static_cast<double>(
2242
0
                reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
2243
0
            break;
2244
2245
0
        case GDT_UInt64:
2246
0
            *pdfReal = static_cast<double>(
2247
0
                reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
2248
0
            break;
2249
2250
0
        case GDT_Float16:
2251
0
            *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset];
2252
0
            break;
2253
2254
0
        case GDT_Float32:
2255
0
            *pdfReal = reinterpret_cast<float *>(pabySrc)[iSrcOffset];
2256
0
            break;
2257
2258
0
        case GDT_Float64:
2259
0
            *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
2260
0
            break;
2261
2262
0
        case GDT_CInt16:
2263
0
        case GDT_CInt32:
2264
0
        case GDT_CFloat16:
2265
0
        case GDT_CFloat32:
2266
0
        case GDT_CFloat64:
2267
0
        case GDT_Unknown:
2268
0
        case GDT_TypeCount:
2269
0
            CPLAssert(false);
2270
0
            return false;
2271
0
    }
2272
2273
0
    if (poWK->pafUnifiedSrcDensity != nullptr)
2274
0
        *pdfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
2275
0
    else
2276
0
        *pdfDensity = 1.0;
2277
2278
0
    return *pdfDensity != 0.0;
2279
0
}
2280
2281
/************************************************************************/
2282
/*                          GWKGetPixelRow()                            */
2283
/************************************************************************/
2284
2285
/* It is assumed that adfImag[] is set to 0 by caller code for non-complex */
2286
/* data-types. */
2287
2288
static bool GWKGetPixelRow(const GDALWarpKernel *poWK, int iBand,
2289
                           GPtrDiff_t iSrcOffset, int nHalfSrcLen,
2290
                           double *padfDensity, double adfReal[],
2291
                           double *padfImag)
2292
0
{
2293
    // We know that nSrcLen is even, so we can *always* unroll loops 2x.
2294
0
    const int nSrcLen = nHalfSrcLen * 2;
2295
0
    bool bHasValid = false;
2296
2297
0
    if (padfDensity != nullptr)
2298
0
    {
2299
        // Init the density.
2300
0
        for (int i = 0; i < nSrcLen; i += 2)
2301
0
        {
2302
0
            padfDensity[i] = 1.0;
2303
0
            padfDensity[i + 1] = 1.0;
2304
0
        }
2305
2306
0
        if (poWK->panUnifiedSrcValid != nullptr)
2307
0
        {
2308
0
            for (int i = 0; i < nSrcLen; i += 2)
2309
0
            {
2310
0
                if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i))
2311
0
                    bHasValid = true;
2312
0
                else
2313
0
                    padfDensity[i] = 0.0;
2314
2315
0
                if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i + 1))
2316
0
                    bHasValid = true;
2317
0
                else
2318
0
                    padfDensity[i + 1] = 0.0;
2319
0
            }
2320
2321
            // Reset or fail as needed.
2322
0
            if (bHasValid)
2323
0
                bHasValid = false;
2324
0
            else
2325
0
                return false;
2326
0
        }
2327
2328
0
        if (poWK->papanBandSrcValid != nullptr &&
2329
0
            poWK->papanBandSrcValid[iBand] != nullptr)
2330
0
        {
2331
0
            for (int i = 0; i < nSrcLen; i += 2)
2332
0
            {
2333
0
                if (CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset + i))
2334
0
                    bHasValid = true;
2335
0
                else
2336
0
                    padfDensity[i] = 0.0;
2337
2338
0
                if (CPLMaskGet(poWK->papanBandSrcValid[iBand],
2339
0
                               iSrcOffset + i + 1))
2340
0
                    bHasValid = true;
2341
0
                else
2342
0
                    padfDensity[i + 1] = 0.0;
2343
0
            }
2344
2345
            // Reset or fail as needed.
2346
0
            if (bHasValid)
2347
0
                bHasValid = false;
2348
0
            else
2349
0
                return false;
2350
0
        }
2351
0
    }
2352
2353
    // TODO(schwehr): Fix casting.
2354
    // Fetch data.
2355
0
    switch (poWK->eWorkingDataType)
2356
0
    {
2357
0
        case GDT_Byte:
2358
0
        {
2359
0
            GByte *pSrc =
2360
0
                reinterpret_cast<GByte *>(poWK->papabySrcImage[iBand]);
2361
0
            pSrc += iSrcOffset;
2362
0
            for (int i = 0; i < nSrcLen; i += 2)
2363
0
            {
2364
0
                adfReal[i] = pSrc[i];
2365
0
                adfReal[i + 1] = pSrc[i + 1];
2366
0
            }
2367
0
            break;
2368
0
        }
2369
2370
0
        case GDT_Int8:
2371
0
        {
2372
0
            GInt8 *pSrc =
2373
0
                reinterpret_cast<GInt8 *>(poWK->papabySrcImage[iBand]);
2374
0
            pSrc += iSrcOffset;
2375
0
            for (int i = 0; i < nSrcLen; i += 2)
2376
0
            {
2377
0
                adfReal[i] = pSrc[i];
2378
0
                adfReal[i + 1] = pSrc[i + 1];
2379
0
            }
2380
0
            break;
2381
0
        }
2382
2383
0
        case GDT_Int16:
2384
0
        {
2385
0
            GInt16 *pSrc =
2386
0
                reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
2387
0
            pSrc += iSrcOffset;
2388
0
            for (int i = 0; i < nSrcLen; i += 2)
2389
0
            {
2390
0
                adfReal[i] = pSrc[i];
2391
0
                adfReal[i + 1] = pSrc[i + 1];
2392
0
            }
2393
0
            break;
2394
0
        }
2395
2396
0
        case GDT_UInt16:
2397
0
        {
2398
0
            GUInt16 *pSrc =
2399
0
                reinterpret_cast<GUInt16 *>(poWK->papabySrcImage[iBand]);
2400
0
            pSrc += iSrcOffset;
2401
0
            for (int i = 0; i < nSrcLen; i += 2)
2402
0
            {
2403
0
                adfReal[i] = pSrc[i];
2404
0
                adfReal[i + 1] = pSrc[i + 1];
2405
0
            }
2406
0
            break;
2407
0
        }
2408
2409
0
        case GDT_Int32:
2410
0
        {
2411
0
            GInt32 *pSrc =
2412
0
                reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
2413
0
            pSrc += iSrcOffset;
2414
0
            for (int i = 0; i < nSrcLen; i += 2)
2415
0
            {
2416
0
                adfReal[i] = pSrc[i];
2417
0
                adfReal[i + 1] = pSrc[i + 1];
2418
0
            }
2419
0
            break;
2420
0
        }
2421
2422
0
        case GDT_UInt32:
2423
0
        {
2424
0
            GUInt32 *pSrc =
2425
0
                reinterpret_cast<GUInt32 *>(poWK->papabySrcImage[iBand]);
2426
0
            pSrc += iSrcOffset;
2427
0
            for (int i = 0; i < nSrcLen; i += 2)
2428
0
            {
2429
0
                adfReal[i] = pSrc[i];
2430
0
                adfReal[i + 1] = pSrc[i + 1];
2431
0
            }
2432
0
            break;
2433
0
        }
2434
2435
0
        case GDT_Int64:
2436
0
        {
2437
0
            auto pSrc =
2438
0
                reinterpret_cast<std::int64_t *>(poWK->papabySrcImage[iBand]);
2439
0
            pSrc += iSrcOffset;
2440
0
            for (int i = 0; i < nSrcLen; i += 2)
2441
0
            {
2442
0
                adfReal[i] = static_cast<double>(pSrc[i]);
2443
0
                adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
2444
0
            }
2445
0
            break;
2446
0
        }
2447
2448
0
        case GDT_UInt64:
2449
0
        {
2450
0
            auto pSrc =
2451
0
                reinterpret_cast<std::uint64_t *>(poWK->papabySrcImage[iBand]);
2452
0
            pSrc += iSrcOffset;
2453
0
            for (int i = 0; i < nSrcLen; i += 2)
2454
0
            {
2455
0
                adfReal[i] = static_cast<double>(pSrc[i]);
2456
0
                adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
2457
0
            }
2458
0
            break;
2459
0
        }
2460
2461
0
        case GDT_Float16:
2462
0
        {
2463
0
            GFloat16 *pSrc =
2464
0
                reinterpret_cast<GFloat16 *>(poWK->papabySrcImage[iBand]);
2465
0
            pSrc += iSrcOffset;
2466
0
            for (int i = 0; i < nSrcLen; i += 2)
2467
0
            {
2468
0
                adfReal[i] = pSrc[i];
2469
0
                adfReal[i + 1] = pSrc[i + 1];
2470
0
            }
2471
0
            break;
2472
0
        }
2473
2474
0
        case GDT_Float32:
2475
0
        {
2476
0
            float *pSrc =
2477
0
                reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
2478
0
            pSrc += iSrcOffset;
2479
0
            for (int i = 0; i < nSrcLen; i += 2)
2480
0
            {
2481
0
                adfReal[i] = pSrc[i];
2482
0
                adfReal[i + 1] = pSrc[i + 1];
2483
0
            }
2484
0
            break;
2485
0
        }
2486
2487
0
        case GDT_Float64:
2488
0
        {
2489
0
            double *pSrc =
2490
0
                reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
2491
0
            pSrc += iSrcOffset;
2492
0
            for (int i = 0; i < nSrcLen; i += 2)
2493
0
            {
2494
0
                adfReal[i] = pSrc[i];
2495
0
                adfReal[i + 1] = pSrc[i + 1];
2496
0
            }
2497
0
            break;
2498
0
        }
2499
2500
0
        case GDT_CInt16:
2501
0
        {
2502
0
            GInt16 *pSrc =
2503
0
                reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
2504
0
            pSrc += 2 * iSrcOffset;
2505
0
            for (int i = 0; i < nSrcLen; i += 2)
2506
0
            {
2507
0
                adfReal[i] = pSrc[2 * i];
2508
0
                padfImag[i] = pSrc[2 * i + 1];
2509
2510
0
                adfReal[i + 1] = pSrc[2 * i + 2];
2511
0
                padfImag[i + 1] = pSrc[2 * i + 3];
2512
0
            }
2513
0
            break;
2514
0
        }
2515
2516
0
        case GDT_CInt32:
2517
0
        {
2518
0
            GInt32 *pSrc =
2519
0
                reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
2520
0
            pSrc += 2 * iSrcOffset;
2521
0
            for (int i = 0; i < nSrcLen; i += 2)
2522
0
            {
2523
0
                adfReal[i] = pSrc[2 * i];
2524
0
                padfImag[i] = pSrc[2 * i + 1];
2525
2526
0
                adfReal[i + 1] = pSrc[2 * i + 2];
2527
0
                padfImag[i + 1] = pSrc[2 * i + 3];
2528
0
            }
2529
0
            break;
2530
0
        }
2531
2532
0
        case GDT_CFloat16:
2533
0
        {
2534
0
            GFloat16 *pSrc =
2535
0
                reinterpret_cast<GFloat16 *>(poWK->papabySrcImage[iBand]);
2536
0
            pSrc += 2 * iSrcOffset;
2537
0
            for (int i = 0; i < nSrcLen; i += 2)
2538
0
            {
2539
0
                adfReal[i] = pSrc[2 * i];
2540
0
                padfImag[i] = pSrc[2 * i + 1];
2541
2542
0
                adfReal[i + 1] = pSrc[2 * i + 2];
2543
0
                padfImag[i + 1] = pSrc[2 * i + 3];
2544
0
            }
2545
0
            break;
2546
0
        }
2547
2548
0
        case GDT_CFloat32:
2549
0
        {
2550
0
            float *pSrc =
2551
0
                reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
2552
0
            pSrc += 2 * iSrcOffset;
2553
0
            for (int i = 0; i < nSrcLen; i += 2)
2554
0
            {
2555
0
                adfReal[i] = pSrc[2 * i];
2556
0
                padfImag[i] = pSrc[2 * i + 1];
2557
2558
0
                adfReal[i + 1] = pSrc[2 * i + 2];
2559
0
                padfImag[i + 1] = pSrc[2 * i + 3];
2560
0
            }
2561
0
            break;
2562
0
        }
2563
2564
0
        case GDT_CFloat64:
2565
0
        {
2566
0
            double *pSrc =
2567
0
                reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
2568
0
            pSrc += 2 * iSrcOffset;
2569
0
            for (int i = 0; i < nSrcLen; i += 2)
2570
0
            {
2571
0
                adfReal[i] = pSrc[2 * i];
2572
0
                padfImag[i] = pSrc[2 * i + 1];
2573
2574
0
                adfReal[i + 1] = pSrc[2 * i + 2];
2575
0
                padfImag[i + 1] = pSrc[2 * i + 3];
2576
0
            }
2577
0
            break;
2578
0
        }
2579
2580
0
        case GDT_Unknown:
2581
0
        case GDT_TypeCount:
2582
0
            CPLAssert(false);
2583
0
            if (padfDensity)
2584
0
                memset(padfDensity, 0, nSrcLen * sizeof(double));
2585
0
            return false;
2586
0
    }
2587
2588
0
    if (padfDensity == nullptr)
2589
0
        return true;
2590
2591
0
    if (poWK->pafUnifiedSrcDensity == nullptr)
2592
0
    {
2593
0
        for (int i = 0; i < nSrcLen; i += 2)
2594
0
        {
2595
            // Take into account earlier calcs.
2596
0
            if (padfDensity[i] > SRC_DENSITY_THRESHOLD)
2597
0
            {
2598
0
                padfDensity[i] = 1.0;
2599
0
                bHasValid = true;
2600
0
            }
2601
2602
0
            if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD)
2603
0
            {
2604
0
                padfDensity[i + 1] = 1.0;
2605
0
                bHasValid = true;
2606
0
            }
2607
0
        }
2608
0
    }
2609
0
    else
2610
0
    {
2611
0
        for (int i = 0; i < nSrcLen; i += 2)
2612
0
        {
2613
0
            if (padfDensity[i] > SRC_DENSITY_THRESHOLD)
2614
0
                padfDensity[i] = poWK->pafUnifiedSrcDensity[iSrcOffset + i];
2615
0
            if (padfDensity[i] > SRC_DENSITY_THRESHOLD)
2616
0
                bHasValid = true;
2617
2618
0
            if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD)
2619
0
                padfDensity[i + 1] =
2620
0
                    poWK->pafUnifiedSrcDensity[iSrcOffset + i + 1];
2621
0
            if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD)
2622
0
                bHasValid = true;
2623
0
        }
2624
0
    }
2625
2626
0
    return bHasValid;
2627
0
}
2628
2629
/************************************************************************/
2630
/*                          GWKGetPixelT()                              */
2631
/************************************************************************/
2632
2633
template <class T>
2634
static bool GWKGetPixelT(const GDALWarpKernel *poWK, int iBand,
2635
                         GPtrDiff_t iSrcOffset, double *pdfDensity, T *pValue)
2636
2637
0
{
2638
0
    T *pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
2639
2640
0
    if ((poWK->panUnifiedSrcValid != nullptr &&
2641
0
         !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset)) ||
2642
0
        (poWK->papanBandSrcValid != nullptr &&
2643
0
         poWK->papanBandSrcValid[iBand] != nullptr &&
2644
0
         !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset)))
2645
0
    {
2646
0
        *pdfDensity = 0.0;
2647
0
        return false;
2648
0
    }
2649
2650
0
    *pValue = pSrc[iSrcOffset];
2651
2652
0
    if (poWK->pafUnifiedSrcDensity == nullptr)
2653
0
        *pdfDensity = 1.0;
2654
0
    else
2655
0
        *pdfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
2656
2657
0
    return *pdfDensity != 0.0;
2658
0
}
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKGetPixelT<unsigned char>(GDALWarpKernel const*, int, long long, double*, unsigned char*)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKGetPixelT<short>(GDALWarpKernel const*, int, long long, double*, short*)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKGetPixelT<unsigned short>(GDALWarpKernel const*, int, long long, double*, unsigned short*)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKGetPixelT<float>(GDALWarpKernel const*, int, long long, double*, float*)
2659
2660
/************************************************************************/
2661
/*                        GWKBilinearResample()                         */
2662
/*     Set of bilinear interpolators                                    */
2663
/************************************************************************/
2664
2665
static bool GWKBilinearResample4Sample(const GDALWarpKernel *poWK, int iBand,
2666
                                       double dfSrcX, double dfSrcY,
2667
                                       double *pdfDensity, double *pdfReal,
2668
                                       double *pdfImag)
2669
2670
0
{
2671
    // Save as local variables to avoid following pointers.
2672
0
    const int nSrcXSize = poWK->nSrcXSize;
2673
0
    const int nSrcYSize = poWK->nSrcYSize;
2674
2675
0
    int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
2676
0
    int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
2677
0
    double dfRatioX = 1.5 - (dfSrcX - iSrcX);
2678
0
    double dfRatioY = 1.5 - (dfSrcY - iSrcY);
2679
0
    bool bShifted = false;
2680
2681
0
    if (iSrcX == -1)
2682
0
    {
2683
0
        iSrcX = 0;
2684
0
        dfRatioX = 1;
2685
0
    }
2686
0
    if (iSrcY == -1)
2687
0
    {
2688
0
        iSrcY = 0;
2689
0
        dfRatioY = 1;
2690
0
    }
2691
0
    GPtrDiff_t iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
2692
2693
    // Shift so we don't overrun the array.
2694
0
    if (static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize == iSrcOffset + 1 ||
2695
0
        static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize ==
2696
0
            iSrcOffset + nSrcXSize + 1)
2697
0
    {
2698
0
        bShifted = true;
2699
0
        --iSrcOffset;
2700
0
    }
2701
2702
0
    double adfDensity[2] = {0.0, 0.0};
2703
0
    double adfReal[2] = {0.0, 0.0};
2704
0
    double adfImag[2] = {0.0, 0.0};
2705
0
    double dfAccumulatorReal = 0.0;
2706
0
    double dfAccumulatorImag = 0.0;
2707
0
    double dfAccumulatorDensity = 0.0;
2708
0
    double dfAccumulatorDivisor = 0.0;
2709
2710
0
    const GPtrDiff_t nSrcPixels =
2711
0
        static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize;
2712
    // Get pixel row.
2713
0
    if (iSrcY >= 0 && iSrcY < nSrcYSize && iSrcOffset >= 0 &&
2714
0
        iSrcOffset < nSrcPixels &&
2715
0
        GWKGetPixelRow(poWK, iBand, iSrcOffset, 1, adfDensity, adfReal,
2716
0
                       adfImag))
2717
0
    {
2718
0
        double dfMult1 = dfRatioX * dfRatioY;
2719
0
        double dfMult2 = (1.0 - dfRatioX) * dfRatioY;
2720
2721
        // Shifting corrected.
2722
0
        if (bShifted)
2723
0
        {
2724
0
            adfReal[0] = adfReal[1];
2725
0
            adfImag[0] = adfImag[1];
2726
0
            adfDensity[0] = adfDensity[1];
2727
0
        }
2728
2729
        // Upper Left Pixel.
2730
0
        if (iSrcX >= 0 && iSrcX < nSrcXSize &&
2731
0
            adfDensity[0] > SRC_DENSITY_THRESHOLD)
2732
0
        {
2733
0
            dfAccumulatorDivisor += dfMult1;
2734
2735
0
            dfAccumulatorReal += adfReal[0] * dfMult1;
2736
0
            dfAccumulatorImag += adfImag[0] * dfMult1;
2737
0
            dfAccumulatorDensity += adfDensity[0] * dfMult1;
2738
0
        }
2739
2740
        // Upper Right Pixel.
2741
0
        if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
2742
0
            adfDensity[1] > SRC_DENSITY_THRESHOLD)
2743
0
        {
2744
0
            dfAccumulatorDivisor += dfMult2;
2745
2746
0
            dfAccumulatorReal += adfReal[1] * dfMult2;
2747
0
            dfAccumulatorImag += adfImag[1] * dfMult2;
2748
0
            dfAccumulatorDensity += adfDensity[1] * dfMult2;
2749
0
        }
2750
0
    }
2751
2752
    // Get pixel row.
2753
0
    if (iSrcY + 1 >= 0 && iSrcY + 1 < nSrcYSize &&
2754
0
        iSrcOffset + nSrcXSize >= 0 && iSrcOffset + nSrcXSize < nSrcPixels &&
2755
0
        GWKGetPixelRow(poWK, iBand, iSrcOffset + nSrcXSize, 1, adfDensity,
2756
0
                       adfReal, adfImag))
2757
0
    {
2758
0
        double dfMult1 = dfRatioX * (1.0 - dfRatioY);
2759
0
        double dfMult2 = (1.0 - dfRatioX) * (1.0 - dfRatioY);
2760
2761
        // Shifting corrected
2762
0
        if (bShifted)
2763
0
        {
2764
0
            adfReal[0] = adfReal[1];
2765
0
            adfImag[0] = adfImag[1];
2766
0
            adfDensity[0] = adfDensity[1];
2767
0
        }
2768
2769
        // Lower Left Pixel
2770
0
        if (iSrcX >= 0 && iSrcX < nSrcXSize &&
2771
0
            adfDensity[0] > SRC_DENSITY_THRESHOLD)
2772
0
        {
2773
0
            dfAccumulatorDivisor += dfMult1;
2774
2775
0
            dfAccumulatorReal += adfReal[0] * dfMult1;
2776
0
            dfAccumulatorImag += adfImag[0] * dfMult1;
2777
0
            dfAccumulatorDensity += adfDensity[0] * dfMult1;
2778
0
        }
2779
2780
        // Lower Right Pixel.
2781
0
        if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
2782
0
            adfDensity[1] > SRC_DENSITY_THRESHOLD)
2783
0
        {
2784
0
            dfAccumulatorDivisor += dfMult2;
2785
2786
0
            dfAccumulatorReal += adfReal[1] * dfMult2;
2787
0
            dfAccumulatorImag += adfImag[1] * dfMult2;
2788
0
            dfAccumulatorDensity += adfDensity[1] * dfMult2;
2789
0
        }
2790
0
    }
2791
2792
    /* -------------------------------------------------------------------- */
2793
    /*      Return result.                                                  */
2794
    /* -------------------------------------------------------------------- */
2795
0
    if (dfAccumulatorDivisor == 1.0)
2796
0
    {
2797
0
        *pdfReal = dfAccumulatorReal;
2798
0
        *pdfImag = dfAccumulatorImag;
2799
0
        *pdfDensity = dfAccumulatorDensity;
2800
0
        return false;
2801
0
    }
2802
0
    else if (dfAccumulatorDivisor < 0.00001)
2803
0
    {
2804
0
        *pdfReal = 0.0;
2805
0
        *pdfImag = 0.0;
2806
0
        *pdfDensity = 0.0;
2807
0
        return false;
2808
0
    }
2809
0
    else
2810
0
    {
2811
0
        *pdfReal = dfAccumulatorReal / dfAccumulatorDivisor;
2812
0
        *pdfImag = dfAccumulatorImag / dfAccumulatorDivisor;
2813
0
        *pdfDensity = dfAccumulatorDensity / dfAccumulatorDivisor;
2814
0
        return true;
2815
0
    }
2816
0
}
2817
2818
template <class T>
2819
static bool GWKBilinearResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
2820
                                               int iBand, double dfSrcX,
2821
                                               double dfSrcY, T *pValue)
2822
2823
0
{
2824
2825
0
    const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
2826
0
    const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
2827
0
    GPtrDiff_t iSrcOffset =
2828
0
        iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
2829
0
    const double dfRatioX = 1.5 - (dfSrcX - iSrcX);
2830
0
    const double dfRatioY = 1.5 - (dfSrcY - iSrcY);
2831
2832
0
    const T *const pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
2833
2834
0
    if (iSrcX >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
2835
0
        iSrcY + 1 < poWK->nSrcYSize)
2836
0
    {
2837
0
        const double dfAccumulator =
2838
0
            (pSrc[iSrcOffset] * dfRatioX +
2839
0
             pSrc[iSrcOffset + 1] * (1.0 - dfRatioX)) *
2840
0
                dfRatioY +
2841
0
            (pSrc[iSrcOffset + poWK->nSrcXSize] * dfRatioX +
2842
0
             pSrc[iSrcOffset + 1 + poWK->nSrcXSize] * (1.0 - dfRatioX)) *
2843
0
                (1.0 - dfRatioY);
2844
2845
0
        *pValue = GWKRoundValueT<T>(dfAccumulator);
2846
2847
0
        return true;
2848
0
    }
2849
2850
0
    double dfAccumulatorDivisor = 0.0;
2851
0
    double dfAccumulator = 0.0;
2852
2853
    // Upper Left Pixel.
2854
0
    if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY >= 0 &&
2855
0
        iSrcY < poWK->nSrcYSize)
2856
0
    {
2857
0
        const double dfMult = dfRatioX * dfRatioY;
2858
2859
0
        dfAccumulatorDivisor += dfMult;
2860
2861
0
        dfAccumulator += pSrc[iSrcOffset] * dfMult;
2862
0
    }
2863
2864
    // Upper Right Pixel.
2865
0
    if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
2866
0
        iSrcY < poWK->nSrcYSize)
2867
0
    {
2868
0
        const double dfMult = (1.0 - dfRatioX) * dfRatioY;
2869
2870
0
        dfAccumulatorDivisor += dfMult;
2871
2872
0
        dfAccumulator += pSrc[iSrcOffset + 1] * dfMult;
2873
0
    }
2874
2875
    // Lower Right Pixel.
2876
0
    if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
2877
0
        iSrcY + 1 < poWK->nSrcYSize)
2878
0
    {
2879
0
        const double dfMult = (1.0 - dfRatioX) * (1.0 - dfRatioY);
2880
2881
0
        dfAccumulatorDivisor += dfMult;
2882
2883
0
        dfAccumulator += pSrc[iSrcOffset + 1 + poWK->nSrcXSize] * dfMult;
2884
0
    }
2885
2886
    // Lower Left Pixel.
2887
0
    if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
2888
0
        iSrcY + 1 < poWK->nSrcYSize)
2889
0
    {
2890
0
        const double dfMult = dfRatioX * (1.0 - dfRatioY);
2891
2892
0
        dfAccumulatorDivisor += dfMult;
2893
2894
0
        dfAccumulator += pSrc[iSrcOffset + poWK->nSrcXSize] * dfMult;
2895
0
    }
2896
2897
    /* -------------------------------------------------------------------- */
2898
    /*      Return result.                                                  */
2899
    /* -------------------------------------------------------------------- */
2900
0
    double dfValue = 0.0;
2901
2902
0
    if (dfAccumulatorDivisor < 0.00001)
2903
0
    {
2904
0
        *pValue = 0;
2905
0
        return false;
2906
0
    }
2907
0
    else if (dfAccumulatorDivisor == 1.0)
2908
0
    {
2909
0
        dfValue = dfAccumulator;
2910
0
    }
2911
0
    else
2912
0
    {
2913
0
        dfValue = dfAccumulator / dfAccumulatorDivisor;
2914
0
    }
2915
2916
0
    *pValue = GWKRoundValueT<T>(dfValue);
2917
2918
0
    return true;
2919
0
}
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKBilinearResampleNoMasks4SampleT<unsigned char>(GDALWarpKernel const*, int, double, double, unsigned char*)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKBilinearResampleNoMasks4SampleT<float>(GDALWarpKernel const*, int, double, double, float*)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKBilinearResampleNoMasks4SampleT<short>(GDALWarpKernel const*, int, double, double, short*)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKBilinearResampleNoMasks4SampleT<unsigned short>(GDALWarpKernel const*, int, double, double, unsigned short*)
2920
2921
/************************************************************************/
2922
/*                        GWKCubicResample()                            */
2923
/*     Set of bicubic interpolators using cubic convolution.            */
2924
/************************************************************************/
2925
2926
// http://verona.fi-p.unam.mx/boris/practicas/CubConvInterp.pdf Formula 18
2927
// or http://en.wikipedia.org/wiki/Cubic_Hermite_spline : CINTx(p_1,p0,p1,p2)
2928
// http://en.wikipedia.org/wiki/Bicubic_interpolation: matrix notation
2929
2930
template <typename T>
2931
static inline T CubicConvolution(T distance1, T distance2, T distance3, T f0,
2932
                                 T f1, T f2, T f3)
2933
0
{
2934
0
    return (f1 + T(0.5) * (distance1 * (f2 - f0) +
2935
0
                           distance2 * (2 * f0 - 5 * f1 + 4 * f2 - f3) +
2936
0
                           distance3 * (3 * (f1 - f2) + f3 - f0)));
2937
0
}
2938
2939
/************************************************************************/
2940
/*                       GWKCubicComputeWeights()                       */
2941
/************************************************************************/
2942
2943
// adfCoeffs[2] = 1.0 - (adfCoeffs[0] + adfCoeffs[1] - adfCoeffs[3]);
2944
2945
template <typename T>
2946
static inline void GWKCubicComputeWeights(T x, T coeffs[4])
2947
0
{
2948
0
    const T halfX = T(0.5) * x;
2949
0
    const T threeX = T(3.0) * x;
2950
0
    const T halfX2 = halfX * x;
2951
2952
0
    coeffs[0] = halfX * (-1 + x * (2 - x));
2953
0
    coeffs[1] = 1 + halfX2 * (-5 + threeX);
2954
0
    coeffs[2] = halfX * (1 + x * (4 - threeX));
2955
0
    coeffs[3] = halfX2 * (-1 + x);
2956
0
}
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKCubicComputeWeights<double>(double, double*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKCubicComputeWeights<float>(float, float*)
2957
2958
// TODO(schwehr): Use an inline function.
2959
#define CONVOL4(v1, v2)                                                        \
2960
0
    ((v1)[0] * (v2)[0] + (v1)[1] * (v2)[1] + (v1)[2] * (v2)[2] +               \
2961
0
     (v1)[3] * (v2)[3])
2962
2963
#if 0
2964
// Optimal (in theory...) for max 2 convolutions: 14 multiplications
2965
// instead of 17.
2966
// TODO(schwehr): Use an inline function.
2967
#define GWKCubicComputeWeights_Optim2MAX(dfX_, adfCoeffs, dfHalfX)             \
2968
    {                                                                          \
2969
        const double dfX = dfX_;                                               \
2970
        dfHalfX = 0.5 * dfX;                                                   \
2971
        const double dfThreeX = 3.0 * dfX;                                     \
2972
        const double dfXMinus1 = dfX - 1;                                      \
2973
                                                                               \
2974
        adfCoeffs[0] = -1 + dfX * (2 - dfX);                                   \
2975
        adfCoeffs[1] = dfX * (-5 + dfThreeX);                                  \
2976
        /*adfCoeffs[2] = 1 + dfX * (4 - dfThreeX);*/                           \
2977
        adfCoeffs[2] = -dfXMinus1 - adfCoeffs[1];                              \
2978
        /*adfCoeffs[3] = dfX * (-1 + dfX); */                                  \
2979
        adfCoeffs[3] = dfXMinus1 - adfCoeffs[0];                               \
2980
    }
2981
2982
// TODO(schwehr): Use an inline function.
2983
#define CONVOL4_Optim2MAX(adfCoeffs, v, dfHalfX)                               \
2984
    ((v)[1] + (dfHalfX) * ((adfCoeffs)[0] * (v)[0] + (adfCoeffs)[1] * (v)[1] + \
2985
                           (adfCoeffs)[2] * (v)[2] + (adfCoeffs)[3] * (v)[3]))
2986
#endif
2987
2988
static bool GWKCubicResample4Sample(const GDALWarpKernel *poWK, int iBand,
2989
                                    double dfSrcX, double dfSrcY,
2990
                                    double *pdfDensity, double *pdfReal,
2991
                                    double *pdfImag)
2992
2993
0
{
2994
0
    const int iSrcX = static_cast<int>(dfSrcX - 0.5);
2995
0
    const int iSrcY = static_cast<int>(dfSrcY - 0.5);
2996
0
    GPtrDiff_t iSrcOffset =
2997
0
        iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
2998
0
    const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
2999
0
    const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3000
0
    double adfDensity[4] = {};
3001
0
    double adfReal[4] = {};
3002
0
    double adfImag[4] = {};
3003
3004
    // Get the bilinear interpolation at the image borders.
3005
0
    if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3006
0
        iSrcY + 2 >= poWK->nSrcYSize)
3007
0
        return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3008
0
                                          pdfDensity, pdfReal, pdfImag);
3009
3010
0
    double adfValueDens[4] = {};
3011
0
    double adfValueReal[4] = {};
3012
0
    double adfValueImag[4] = {};
3013
3014
0
    double adfCoeffsX[4] = {};
3015
0
    GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
3016
3017
0
    for (GPtrDiff_t i = -1; i < 3; i++)
3018
0
    {
3019
0
        if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
3020
0
                            2, adfDensity, adfReal, adfImag) ||
3021
0
            adfDensity[0] < SRC_DENSITY_THRESHOLD ||
3022
0
            adfDensity[1] < SRC_DENSITY_THRESHOLD ||
3023
0
            adfDensity[2] < SRC_DENSITY_THRESHOLD ||
3024
0
            adfDensity[3] < SRC_DENSITY_THRESHOLD)
3025
0
        {
3026
0
            return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3027
0
                                              pdfDensity, pdfReal, pdfImag);
3028
0
        }
3029
3030
0
        adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
3031
0
        adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
3032
0
        adfValueImag[i + 1] = CONVOL4(adfCoeffsX, adfImag);
3033
0
    }
3034
3035
    /* -------------------------------------------------------------------- */
3036
    /*      For now, if we have any pixels missing in the kernel area,      */
3037
    /*      we fallback on using bilinear interpolation.  Ideally we        */
3038
    /*      should do "weight adjustment" of our results similarly to       */
3039
    /*      what is done for the cubic spline and lanc. interpolators.      */
3040
    /* -------------------------------------------------------------------- */
3041
3042
0
    double adfCoeffsY[4] = {};
3043
0
    GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
3044
3045
0
    *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
3046
0
    *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
3047
0
    *pdfImag = CONVOL4(adfCoeffsY, adfValueImag);
3048
3049
0
    return true;
3050
0
}
3051
3052
#ifdef USE_SSE2
3053
3054
/************************************************************************/
3055
/*                           XMMLoad4Values()                           */
3056
/*                                                                      */
3057
/*  Load 4 packed byte or uint16, cast them to float and put them in a  */
3058
/*  m128 register.                                                      */
3059
/************************************************************************/
3060
3061
static CPL_INLINE __m128 XMMLoad4Values(const GByte *ptr)
3062
0
{
3063
0
    unsigned int i;
3064
0
    memcpy(&i, ptr, 4);
3065
0
    __m128i xmm_i = _mm_cvtsi32_si128(i);
3066
    // Zero extend 4 packed unsigned 8-bit integers in a to packed
3067
    // 32-bit integers.
3068
#if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
3069
    xmm_i = _mm_cvtepu8_epi32(xmm_i);
3070
#else
3071
0
    xmm_i = _mm_unpacklo_epi8(xmm_i, _mm_setzero_si128());
3072
0
    xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
3073
0
#endif
3074
0
    return _mm_cvtepi32_ps(xmm_i);
3075
0
}
3076
3077
static CPL_INLINE __m128 XMMLoad4Values(const GUInt16 *ptr)
3078
0
{
3079
0
    GUInt64 i;
3080
0
    memcpy(&i, ptr, 8);
3081
0
    __m128i xmm_i = _mm_cvtsi64_si128(i);
3082
    // Zero extend 4 packed unsigned 16-bit integers in a to packed
3083
    // 32-bit integers.
3084
#if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
3085
    xmm_i = _mm_cvtepu16_epi32(xmm_i);
3086
#else
3087
0
    xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
3088
0
#endif
3089
0
    return _mm_cvtepi32_ps(xmm_i);
3090
0
}
3091
3092
/************************************************************************/
3093
/*                           XMMHorizontalAdd()                         */
3094
/*                                                                      */
3095
/*  Return the sum of the 4 floating points of the register.            */
3096
/************************************************************************/
3097
3098
#if defined(__SSE3__) || defined(USE_NEON_OPTIMIZATIONS)
3099
static CPL_INLINE float XMMHorizontalAdd(__m128 v)
3100
{
3101
    __m128 shuf = _mm_movehdup_ps(v);   // (v3   , v3   , v1   , v1)
3102
    __m128 sums = _mm_add_ps(v, shuf);  // (v3+v3, v3+v2, v1+v1, v1+v0)
3103
    shuf = _mm_movehl_ps(shuf, sums);   // (v3   , v3   , v3+v3, v3+v2)
3104
    sums = _mm_add_ss(sums, shuf);      // (v1+v0)+(v3+v2)
3105
    return _mm_cvtss_f32(sums);
3106
}
3107
#else
3108
static CPL_INLINE float XMMHorizontalAdd(__m128 v)
3109
0
{
3110
0
    __m128 shuf = _mm_movehl_ps(v, v);     // (v3   , v2   , v3   , v2)
3111
0
    __m128 sums = _mm_add_ps(v, shuf);     // (v3+v3, v2+v2, v3+v1, v2+v0)
3112
0
    shuf = _mm_shuffle_ps(sums, sums, 1);  // (v2+v0, v2+v0, v2+v0, v3+v1)
3113
0
    sums = _mm_add_ss(sums, shuf);         // (v2+v0)+(v3+v1)
3114
0
    return _mm_cvtss_f32(sums);
3115
0
}
3116
#endif
3117
3118
#endif  // define USE_SSE2
3119
3120
/************************************************************************/
3121
/*            GWKCubicResampleSrcMaskIsDensity4SampleRealT()            */
3122
/************************************************************************/
3123
3124
// Note: if USE_SSE_CUBIC_IMPL, only instantiate that for Byte and UInt16,
3125
// because there are a few assumptions above those types.
3126
// We do not define USE_SSE_CUBIC_IMPL since in practice, it gives zero
3127
// perf benefit.
3128
3129
template <class T>
3130
static CPL_INLINE bool GWKCubicResampleSrcMaskIsDensity4SampleRealT(
3131
    const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
3132
    double *pdfDensity, double *pdfReal)
3133
0
{
3134
0
    const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3135
0
    const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3136
0
    const GPtrDiff_t iSrcOffset =
3137
0
        iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3138
3139
    // Get the bilinear interpolation at the image borders.
3140
0
    if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3141
0
        iSrcY + 2 >= poWK->nSrcYSize)
3142
0
    {
3143
0
        double adfImagIgnored[4] = {};
3144
0
        return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3145
0
                                          pdfDensity, pdfReal, adfImagIgnored);
3146
0
    }
3147
3148
#if defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2)
3149
    const float fDeltaX = static_cast<float>(dfSrcX) - 0.5f - iSrcX;
3150
    const float fDeltaY = static_cast<float>(dfSrcY) - 0.5f - iSrcY;
3151
3152
    // TODO(schwehr): Explain the magic numbers.
3153
    float afTemp[4 + 4 + 4 + 1];
3154
    float *pafAligned =
3155
        reinterpret_cast<float *>(afTemp + ((size_t)afTemp & 0xf));
3156
    float *pafCoeffs = pafAligned;
3157
    float *pafDensity = pafAligned + 4;
3158
    float *pafValue = pafAligned + 8;
3159
3160
    const float fHalfDeltaX = 0.5f * fDeltaX;
3161
    const float fThreeDeltaX = 3.0f * fDeltaX;
3162
    const float fHalfDeltaX2 = fHalfDeltaX * fDeltaX;
3163
3164
    pafCoeffs[0] = fHalfDeltaX * (-1 + fDeltaX * (2 - fDeltaX));
3165
    pafCoeffs[1] = 1 + fHalfDeltaX2 * (-5 + fThreeDeltaX);
3166
    pafCoeffs[2] = fHalfDeltaX * (1 + fDeltaX * (4 - fThreeDeltaX));
3167
    pafCoeffs[3] = fHalfDeltaX2 * (-1 + fDeltaX);
3168
    __m128 xmmCoeffs = _mm_load_ps(pafCoeffs);
3169
    const __m128 xmmThreshold = _mm_load1_ps(&SRC_DENSITY_THRESHOLD);
3170
3171
    __m128 xmmMaskLowDensity = _mm_setzero_ps();
3172
    for (GPtrDiff_t i = -1, iOffset = iSrcOffset - poWK->nSrcXSize - 1; i < 3;
3173
         i++, iOffset += poWK->nSrcXSize)
3174
    {
3175
        const __m128 xmmDensity =
3176
            _mm_loadu_ps(poWK->pafUnifiedSrcDensity + iOffset);
3177
        xmmMaskLowDensity = _mm_or_ps(xmmMaskLowDensity,
3178
                                      _mm_cmplt_ps(xmmDensity, xmmThreshold));
3179
        pafDensity[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
3180
3181
        const __m128 xmmValues =
3182
            XMMLoad4Values(((T *)poWK->papabySrcImage[iBand]) + iOffset);
3183
        pafValue[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValues));
3184
    }
3185
    if (_mm_movemask_ps(xmmMaskLowDensity))
3186
    {
3187
        double adfImagIgnored[4] = {};
3188
        return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3189
                                          pdfDensity, pdfReal, adfImagIgnored);
3190
    }
3191
3192
    const float fHalfDeltaY = 0.5f * fDeltaY;
3193
    const float fThreeDeltaY = 3.0f * fDeltaY;
3194
    const float fHalfDeltaY2 = fHalfDeltaY * fDeltaY;
3195
3196
    pafCoeffs[0] = fHalfDeltaY * (-1 + fDeltaY * (2 - fDeltaY));
3197
    pafCoeffs[1] = 1 + fHalfDeltaY2 * (-5 + fThreeDeltaY);
3198
    pafCoeffs[2] = fHalfDeltaY * (1 + fDeltaY * (4 - fThreeDeltaY));
3199
    pafCoeffs[3] = fHalfDeltaY2 * (-1 + fDeltaY);
3200
3201
    xmmCoeffs = _mm_load_ps(pafCoeffs);
3202
3203
    const __m128 xmmDensity = _mm_load_ps(pafDensity);
3204
    const __m128 xmmValue = _mm_load_ps(pafValue);
3205
    *pdfDensity = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
3206
    *pdfReal = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValue));
3207
3208
    // We did all above computations on float32 whereas the general case is
3209
    // float64. Not sure if one is fundamentally more correct than the other
3210
    // one, but we want our optimization to give the same result as the
3211
    // general case as much as possible, so if the resulting value is
3212
    // close to some_int_value + 0.5, redo the computation with the general
3213
    // case.
3214
    // Note: If other types than Byte or UInt16, will need changes.
3215
    if (fabs(*pdfReal - static_cast<int>(*pdfReal) - 0.5) > .007)
3216
        return true;
3217
3218
#endif  // defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2)
3219
3220
0
    const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3221
0
    const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3222
3223
0
    double adfValueDens[4] = {};
3224
0
    double adfValueReal[4] = {};
3225
3226
0
    double adfCoeffsX[4] = {};
3227
0
    GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
3228
3229
0
    double adfCoeffsY[4] = {};
3230
0
    GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
3231
3232
0
    for (GPtrDiff_t i = -1; i < 3; i++)
3233
0
    {
3234
0
        const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
3235
0
#if !(defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2))
3236
0
        if (poWK->pafUnifiedSrcDensity[iOffset + 0] < SRC_DENSITY_THRESHOLD ||
3237
0
            poWK->pafUnifiedSrcDensity[iOffset + 1] < SRC_DENSITY_THRESHOLD ||
3238
0
            poWK->pafUnifiedSrcDensity[iOffset + 2] < SRC_DENSITY_THRESHOLD ||
3239
0
            poWK->pafUnifiedSrcDensity[iOffset + 3] < SRC_DENSITY_THRESHOLD)
3240
0
        {
3241
0
            double adfImagIgnored[4] = {};
3242
0
            return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3243
0
                                              pdfDensity, pdfReal,
3244
0
                                              adfImagIgnored);
3245
0
        }
3246
0
#endif
3247
3248
0
        adfValueDens[i + 1] =
3249
0
            CONVOL4(adfCoeffsX, poWK->pafUnifiedSrcDensity + iOffset);
3250
3251
0
        adfValueReal[i + 1] = CONVOL4(
3252
0
            adfCoeffsX,
3253
0
            reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
3254
0
    }
3255
3256
0
    *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
3257
0
    *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
3258
3259
0
    return true;
3260
0
}
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKCubicResampleSrcMaskIsDensity4SampleRealT<unsigned char>(GDALWarpKernel const*, int, double, double, double*, double*)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKCubicResampleSrcMaskIsDensity4SampleRealT<unsigned short>(GDALWarpKernel const*, int, double, double, double*, double*)
3261
3262
/************************************************************************/
3263
/*              GWKCubicResampleSrcMaskIsDensity4SampleReal()             */
3264
/*     Bi-cubic when source has and only has pafUnifiedSrcDensity.      */
3265
/************************************************************************/
3266
3267
static bool GWKCubicResampleSrcMaskIsDensity4SampleReal(
3268
    const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
3269
    double *pdfDensity, double *pdfReal)
3270
3271
0
{
3272
0
    const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3273
0
    const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3274
0
    const GPtrDiff_t iSrcOffset =
3275
0
        iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3276
0
    const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3277
0
    const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3278
3279
    // Get the bilinear interpolation at the image borders.
3280
0
    if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3281
0
        iSrcY + 2 >= poWK->nSrcYSize)
3282
0
    {
3283
0
        double adfImagIgnored[4] = {};
3284
0
        return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3285
0
                                          pdfDensity, pdfReal, adfImagIgnored);
3286
0
    }
3287
3288
0
    double adfCoeffsX[4] = {};
3289
0
    GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
3290
3291
0
    double adfCoeffsY[4] = {};
3292
0
    GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
3293
3294
0
    double adfValueDens[4] = {};
3295
0
    double adfValueReal[4] = {};
3296
0
    double adfDensity[4] = {};
3297
0
    double adfReal[4] = {};
3298
0
    double adfImagIgnored[4] = {};
3299
3300
0
    for (GPtrDiff_t i = -1; i < 3; i++)
3301
0
    {
3302
0
        if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
3303
0
                            2, adfDensity, adfReal, adfImagIgnored) ||
3304
0
            adfDensity[0] < SRC_DENSITY_THRESHOLD ||
3305
0
            adfDensity[1] < SRC_DENSITY_THRESHOLD ||
3306
0
            adfDensity[2] < SRC_DENSITY_THRESHOLD ||
3307
0
            adfDensity[3] < SRC_DENSITY_THRESHOLD)
3308
0
        {
3309
0
            return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3310
0
                                              pdfDensity, pdfReal,
3311
0
                                              adfImagIgnored);
3312
0
        }
3313
3314
0
        adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
3315
0
        adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
3316
0
    }
3317
3318
0
    *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
3319
0
    *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
3320
3321
0
    return true;
3322
0
}
3323
3324
template <class T>
3325
static bool GWKCubicResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
3326
                                            int iBand, double dfSrcX,
3327
                                            double dfSrcY, T *pValue)
3328
3329
0
{
3330
0
    const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3331
0
    const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3332
0
    const GPtrDiff_t iSrcOffset =
3333
0
        iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3334
0
    const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3335
0
    const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3336
0
    const double dfDeltaY2 = dfDeltaY * dfDeltaY;
3337
0
    const double dfDeltaY3 = dfDeltaY2 * dfDeltaY;
3338
3339
    // Get the bilinear interpolation at the image borders.
3340
0
    if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3341
0
        iSrcY + 2 >= poWK->nSrcYSize)
3342
0
        return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
3343
0
                                                  pValue);
3344
3345
0
    double adfCoeffs[4] = {};
3346
0
    GWKCubicComputeWeights(dfDeltaX, adfCoeffs);
3347
3348
0
    double adfValue[4] = {};
3349
3350
0
    for (GPtrDiff_t i = -1; i < 3; i++)
3351
0
    {
3352
0
        const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
3353
3354
0
        adfValue[i + 1] = CONVOL4(
3355
0
            adfCoeffs,
3356
0
            reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
3357
0
    }
3358
3359
0
    const double dfValue =
3360
0
        CubicConvolution(dfDeltaY, dfDeltaY2, dfDeltaY3, adfValue[0],
3361
0
                         adfValue[1], adfValue[2], adfValue[3]);
3362
3363
0
    *pValue = GWKClampValueT<T>(dfValue);
3364
3365
0
    return true;
3366
0
}
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKCubicResampleNoMasks4SampleT<unsigned char>(GDALWarpKernel const*, int, double, double, unsigned char*)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKCubicResampleNoMasks4SampleT<float>(GDALWarpKernel const*, int, double, double, float*)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKCubicResampleNoMasks4SampleT<short>(GDALWarpKernel const*, int, double, double, short*)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKCubicResampleNoMasks4SampleT<unsigned short>(GDALWarpKernel const*, int, double, double, unsigned short*)
3367
3368
/************************************************************************/
3369
/*                          GWKLanczosSinc()                            */
3370
/************************************************************************/
3371
3372
/*
3373
 * Lanczos windowed sinc interpolation kernel with radius r.
3374
 *        /
3375
 *        | sinc(x) * sinc(x/r), if |x| < r
3376
 * L(x) = | 1, if x = 0                     ,
3377
 *        | 0, otherwise
3378
 *        \
3379
 *
3380
 * where sinc(x) = sin(PI * x) / (PI * x).
3381
 */
3382
3383
static double GWKLanczosSinc(double dfX)
3384
0
{
3385
0
    if (dfX == 0.0)
3386
0
        return 1.0;
3387
3388
0
    const double dfPIX = M_PI * dfX;
3389
0
    const double dfPIXoverR = dfPIX / 3;
3390
0
    const double dfPIX2overR = dfPIX * dfPIXoverR;
3391
    // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
3392
    // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
3393
0
    const double dfSinPIXoverR = sin(dfPIXoverR);
3394
0
    const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
3395
0
    const double dfSinPIXMulSinPIXoverR =
3396
0
        (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
3397
0
    return dfSinPIXMulSinPIXoverR / dfPIX2overR;
3398
0
}
3399
3400
static double GWKLanczosSinc4Values(double *padfValues)
3401
0
{
3402
0
    for (int i = 0; i < 4; i++)
3403
0
    {
3404
0
        if (padfValues[i] == 0.0)
3405
0
        {
3406
0
            padfValues[i] = 1.0;
3407
0
        }
3408
0
        else
3409
0
        {
3410
0
            const double dfPIX = M_PI * padfValues[i];
3411
0
            const double dfPIXoverR = dfPIX / 3;
3412
0
            const double dfPIX2overR = dfPIX * dfPIXoverR;
3413
            // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
3414
            // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
3415
0
            const double dfSinPIXoverR = sin(dfPIXoverR);
3416
0
            const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
3417
0
            const double dfSinPIXMulSinPIXoverR =
3418
0
                (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
3419
0
            padfValues[i] = dfSinPIXMulSinPIXoverR / dfPIX2overR;
3420
0
        }
3421
0
    }
3422
0
    return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
3423
0
}
3424
3425
/************************************************************************/
3426
/*                           GWKBilinear()                              */
3427
/************************************************************************/
3428
3429
static double GWKBilinear(double dfX)
3430
0
{
3431
0
    double dfAbsX = fabs(dfX);
3432
0
    if (dfAbsX <= 1.0)
3433
0
        return 1 - dfAbsX;
3434
0
    else
3435
0
        return 0.0;
3436
0
}
3437
3438
static double GWKBilinear4Values(double *padfValues)
3439
0
{
3440
0
    double dfAbsX0 = fabs(padfValues[0]);
3441
0
    double dfAbsX1 = fabs(padfValues[1]);
3442
0
    double dfAbsX2 = fabs(padfValues[2]);
3443
0
    double dfAbsX3 = fabs(padfValues[3]);
3444
0
    if (dfAbsX0 <= 1.0)
3445
0
        padfValues[0] = 1 - dfAbsX0;
3446
0
    else
3447
0
        padfValues[0] = 0.0;
3448
0
    if (dfAbsX1 <= 1.0)
3449
0
        padfValues[1] = 1 - dfAbsX1;
3450
0
    else
3451
0
        padfValues[1] = 0.0;
3452
0
    if (dfAbsX2 <= 1.0)
3453
0
        padfValues[2] = 1 - dfAbsX2;
3454
0
    else
3455
0
        padfValues[2] = 0.0;
3456
0
    if (dfAbsX3 <= 1.0)
3457
0
        padfValues[3] = 1 - dfAbsX3;
3458
0
    else
3459
0
        padfValues[3] = 0.0;
3460
0
    return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
3461
0
}
3462
3463
/************************************************************************/
3464
/*                            GWKCubic()                                */
3465
/************************************************************************/
3466
3467
static double GWKCubic(double dfX)
3468
0
{
3469
0
    return CubicKernel(dfX);
3470
0
}
3471
3472
static double GWKCubic4Values(double *padfValues)
3473
0
{
3474
0
    const double dfAbsX_0 = fabs(padfValues[0]);
3475
0
    const double dfAbsX_1 = fabs(padfValues[1]);
3476
0
    const double dfAbsX_2 = fabs(padfValues[2]);
3477
0
    const double dfAbsX_3 = fabs(padfValues[3]);
3478
0
    const double dfX2_0 = padfValues[0] * padfValues[0];
3479
0
    const double dfX2_1 = padfValues[1] * padfValues[1];
3480
0
    const double dfX2_2 = padfValues[2] * padfValues[2];
3481
0
    const double dfX2_3 = padfValues[3] * padfValues[3];
3482
3483
0
    double dfVal0 = 0.0;
3484
0
    if (dfAbsX_0 <= 1.0)
3485
0
        dfVal0 = dfX2_0 * (1.5 * dfAbsX_0 - 2.5) + 1.0;
3486
0
    else if (dfAbsX_0 <= 2.0)
3487
0
        dfVal0 = dfX2_0 * (-0.5 * dfAbsX_0 + 2.5) - 4.0 * dfAbsX_0 + 2.0;
3488
3489
0
    double dfVal1 = 0.0;
3490
0
    if (dfAbsX_1 <= 1.0)
3491
0
        dfVal1 = dfX2_1 * (1.5 * dfAbsX_1 - 2.5) + 1.0;
3492
0
    else if (dfAbsX_1 <= 2.0)
3493
0
        dfVal1 = dfX2_1 * (-0.5 * dfAbsX_1 + 2.5) - 4.0 * dfAbsX_1 + 2.0;
3494
3495
0
    double dfVal2 = 0.0;
3496
0
    if (dfAbsX_2 <= 1.0)
3497
0
        dfVal2 = dfX2_2 * (1.5 * dfAbsX_2 - 2.5) + 1.0;
3498
0
    else if (dfAbsX_2 <= 2.0)
3499
0
        dfVal2 = dfX2_2 * (-0.5 * dfAbsX_2 + 2.5) - 4.0 * dfAbsX_2 + 2.0;
3500
3501
0
    double dfVal3 = 0.0;
3502
0
    if (dfAbsX_3 <= 1.0)
3503
0
        dfVal3 = dfX2_3 * (1.5 * dfAbsX_3 - 2.5) + 1.0;
3504
0
    else if (dfAbsX_3 <= 2.0)
3505
0
        dfVal3 = dfX2_3 * (-0.5 * dfAbsX_3 + 2.5) - 4.0 * dfAbsX_3 + 2.0;
3506
3507
0
    padfValues[0] = dfVal0;
3508
0
    padfValues[1] = dfVal1;
3509
0
    padfValues[2] = dfVal2;
3510
0
    padfValues[3] = dfVal3;
3511
0
    return dfVal0 + dfVal1 + dfVal2 + dfVal3;
3512
0
}
3513
3514
/************************************************************************/
3515
/*                           GWKBSpline()                               */
3516
/************************************************************************/
3517
3518
// https://www.cs.utexas.edu/~fussell/courses/cs384g-fall2013/lectures/mitchell/Mitchell.pdf
3519
// Equation 8 with (B,C)=(1,0)
3520
// 1/6 * ( 3 * |x|^3 -  6 * |x|^2 + 4) |x| < 1
3521
// 1/6 * ( -|x|^3 + 6 |x|^2  - 12|x| + 8) |x| >= 1 and |x| < 2
3522
3523
static double GWKBSpline(double x)
3524
0
{
3525
0
    const double xp2 = x + 2.0;
3526
0
    const double xp1 = x + 1.0;
3527
0
    const double xm1 = x - 1.0;
3528
3529
    // This will most likely be used, so we'll compute it ahead of time to
3530
    // avoid stalling the processor.
3531
0
    const double xp2c = xp2 * xp2 * xp2;
3532
3533
    // Note that the test is computed only if it is needed.
3534
    // TODO(schwehr): Make this easier to follow.
3535
0
    return xp2 > 0.0
3536
0
               ? ((xp1 > 0.0)
3537
0
                      ? ((x > 0.0)
3538
0
                             ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
3539
0
                                   6.0 * x * x * x
3540
0
                             : 0.0) +
3541
0
                            -4.0 * xp1 * xp1 * xp1
3542
0
                      : 0.0) +
3543
0
                     xp2c
3544
0
               : 0.0;  // * 0.166666666666666666666
3545
0
}
3546
3547
static double GWKBSpline4Values(double *padfValues)
3548
0
{
3549
0
    for (int i = 0; i < 4; i++)
3550
0
    {
3551
0
        const double x = padfValues[i];
3552
0
        const double xp2 = x + 2.0;
3553
0
        const double xp1 = x + 1.0;
3554
0
        const double xm1 = x - 1.0;
3555
3556
        // This will most likely be used, so we'll compute it ahead of time to
3557
        // avoid stalling the processor.
3558
0
        const double xp2c = xp2 * xp2 * xp2;
3559
3560
        // Note that the test is computed only if it is needed.
3561
        // TODO(schwehr): Make this easier to follow.
3562
0
        padfValues[i] =
3563
0
            (xp2 > 0.0)
3564
0
                ? ((xp1 > 0.0)
3565
0
                       ? ((x > 0.0)
3566
0
                              ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
3567
0
                                    6.0 * x * x * x
3568
0
                              : 0.0) +
3569
0
                             -4.0 * xp1 * xp1 * xp1
3570
0
                       : 0.0) +
3571
0
                      xp2c
3572
0
                : 0.0;  // * 0.166666666666666666666
3573
0
    }
3574
0
    return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
3575
0
}
3576
/************************************************************************/
3577
/*                       GWKResampleWrkStruct                           */
3578
/************************************************************************/
3579
3580
typedef struct _GWKResampleWrkStruct GWKResampleWrkStruct;
3581
3582
typedef bool (*pfnGWKResampleType)(const GDALWarpKernel *poWK, int iBand,
3583
                                   double dfSrcX, double dfSrcY,
3584
                                   double *pdfDensity, double *pdfReal,
3585
                                   double *pdfImag,
3586
                                   GWKResampleWrkStruct *psWrkStruct);
3587
3588
struct _GWKResampleWrkStruct
3589
{
3590
    pfnGWKResampleType pfnGWKResample;
3591
3592
    // Space for saved X weights.
3593
    double *padfWeightsX;
3594
    bool *pabCalcX;
3595
3596
    double *padfWeightsY;       // Only used by GWKResampleOptimizedLanczos.
3597
    int iLastSrcX;              // Only used by GWKResampleOptimizedLanczos.
3598
    int iLastSrcY;              // Only used by GWKResampleOptimizedLanczos.
3599
    double dfLastDeltaX;        // Only used by GWKResampleOptimizedLanczos.
3600
    double dfLastDeltaY;        // Only used by GWKResampleOptimizedLanczos.
3601
    double dfCosPiXScale;       // Only used by GWKResampleOptimizedLanczos.
3602
    double dfSinPiXScale;       // Only used by GWKResampleOptimizedLanczos.
3603
    double dfCosPiXScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
3604
    double dfSinPiXScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
3605
    double dfCosPiYScale;       // Only used by GWKResampleOptimizedLanczos.
3606
    double dfSinPiYScale;       // Only used by GWKResampleOptimizedLanczos.
3607
    double dfCosPiYScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
3608
    double dfSinPiYScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
3609
3610
    // Space for saving a row of pixels.
3611
    double *padfRowDensity;
3612
    double *padfRowReal;
3613
    double *padfRowImag;
3614
};
3615
3616
/************************************************************************/
3617
/*                    GWKResampleCreateWrkStruct()                      */
3618
/************************************************************************/
3619
3620
static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
3621
                        double dfSrcY, double *pdfDensity, double *pdfReal,
3622
                        double *pdfImag, GWKResampleWrkStruct *psWrkStruct);
3623
3624
static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
3625
                                        double dfSrcX, double dfSrcY,
3626
                                        double *pdfDensity, double *pdfReal,
3627
                                        double *pdfImag,
3628
                                        GWKResampleWrkStruct *psWrkStruct);
3629
3630
static GWKResampleWrkStruct *GWKResampleCreateWrkStruct(GDALWarpKernel *poWK)
3631
0
{
3632
0
    const int nXDist = (poWK->nXRadius + 1) * 2;
3633
0
    const int nYDist = (poWK->nYRadius + 1) * 2;
3634
3635
0
    GWKResampleWrkStruct *psWrkStruct = static_cast<GWKResampleWrkStruct *>(
3636
0
        CPLCalloc(1, sizeof(GWKResampleWrkStruct)));
3637
3638
    // Alloc space for saved X weights.
3639
0
    psWrkStruct->padfWeightsX =
3640
0
        static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3641
0
    psWrkStruct->pabCalcX =
3642
0
        static_cast<bool *>(CPLMalloc(nXDist * sizeof(bool)));
3643
3644
0
    psWrkStruct->padfWeightsY =
3645
0
        static_cast<double *>(CPLCalloc(nYDist, sizeof(double)));
3646
0
    psWrkStruct->iLastSrcX = -10;
3647
0
    psWrkStruct->iLastSrcY = -10;
3648
0
    psWrkStruct->dfLastDeltaX = -10;
3649
0
    psWrkStruct->dfLastDeltaY = -10;
3650
3651
    // Alloc space for saving a row of pixels.
3652
0
    if (poWK->pafUnifiedSrcDensity == nullptr &&
3653
0
        poWK->panUnifiedSrcValid == nullptr &&
3654
0
        poWK->papanBandSrcValid == nullptr)
3655
0
    {
3656
0
        psWrkStruct->padfRowDensity = nullptr;
3657
0
    }
3658
0
    else
3659
0
    {
3660
0
        psWrkStruct->padfRowDensity =
3661
0
            static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3662
0
    }
3663
0
    psWrkStruct->padfRowReal =
3664
0
        static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3665
0
    psWrkStruct->padfRowImag =
3666
0
        static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3667
3668
0
    if (poWK->eResample == GRA_Lanczos)
3669
0
    {
3670
0
        psWrkStruct->pfnGWKResample = GWKResampleOptimizedLanczos;
3671
3672
0
        if (poWK->dfXScale < 1)
3673
0
        {
3674
0
            psWrkStruct->dfCosPiXScaleOver3 = cos(M_PI / 3 * poWK->dfXScale);
3675
0
            psWrkStruct->dfSinPiXScaleOver3 =
3676
0
                sqrt(1 - psWrkStruct->dfCosPiXScaleOver3 *
3677
0
                             psWrkStruct->dfCosPiXScaleOver3);
3678
            // "Naive":
3679
            // const double dfCosPiXScale = cos(  M_PI * dfXScale );
3680
            // const double dfSinPiXScale = sin(  M_PI * dfXScale );
3681
            // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
3682
0
            psWrkStruct->dfCosPiXScale = (4 * psWrkStruct->dfCosPiXScaleOver3 *
3683
0
                                              psWrkStruct->dfCosPiXScaleOver3 -
3684
0
                                          3) *
3685
0
                                         psWrkStruct->dfCosPiXScaleOver3;
3686
0
            psWrkStruct->dfSinPiXScale = sqrt(
3687
0
                1 - psWrkStruct->dfCosPiXScale * psWrkStruct->dfCosPiXScale);
3688
0
        }
3689
3690
0
        if (poWK->dfYScale < 1)
3691
0
        {
3692
0
            psWrkStruct->dfCosPiYScaleOver3 = cos(M_PI / 3 * poWK->dfYScale);
3693
0
            psWrkStruct->dfSinPiYScaleOver3 =
3694
0
                sqrt(1 - psWrkStruct->dfCosPiYScaleOver3 *
3695
0
                             psWrkStruct->dfCosPiYScaleOver3);
3696
            // "Naive":
3697
            // const double dfCosPiYScale = cos(  M_PI * dfYScale );
3698
            // const double dfSinPiYScale = sin(  M_PI * dfYScale );
3699
            // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
3700
0
            psWrkStruct->dfCosPiYScale = (4 * psWrkStruct->dfCosPiYScaleOver3 *
3701
0
                                              psWrkStruct->dfCosPiYScaleOver3 -
3702
0
                                          3) *
3703
0
                                         psWrkStruct->dfCosPiYScaleOver3;
3704
0
            psWrkStruct->dfSinPiYScale = sqrt(
3705
0
                1 - psWrkStruct->dfCosPiYScale * psWrkStruct->dfCosPiYScale);
3706
0
        }
3707
0
    }
3708
0
    else
3709
0
        psWrkStruct->pfnGWKResample = GWKResample;
3710
3711
0
    return psWrkStruct;
3712
0
}
3713
3714
/************************************************************************/
3715
/*                    GWKResampleDeleteWrkStruct()                      */
3716
/************************************************************************/
3717
3718
static void GWKResampleDeleteWrkStruct(GWKResampleWrkStruct *psWrkStruct)
3719
0
{
3720
0
    CPLFree(psWrkStruct->padfWeightsX);
3721
0
    CPLFree(psWrkStruct->padfWeightsY);
3722
0
    CPLFree(psWrkStruct->pabCalcX);
3723
0
    CPLFree(psWrkStruct->padfRowDensity);
3724
0
    CPLFree(psWrkStruct->padfRowReal);
3725
0
    CPLFree(psWrkStruct->padfRowImag);
3726
0
    CPLFree(psWrkStruct);
3727
0
}
3728
3729
/************************************************************************/
3730
/*                           GWKResample()                              */
3731
/************************************************************************/
3732
3733
static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
3734
                        double dfSrcY, double *pdfDensity, double *pdfReal,
3735
                        double *pdfImag, GWKResampleWrkStruct *psWrkStruct)
3736
3737
0
{
3738
    // Save as local variables to avoid following pointers in loops.
3739
0
    const int nSrcXSize = poWK->nSrcXSize;
3740
0
    const int nSrcYSize = poWK->nSrcYSize;
3741
3742
0
    double dfAccumulatorReal = 0.0;
3743
0
    double dfAccumulatorImag = 0.0;
3744
0
    double dfAccumulatorDensity = 0.0;
3745
0
    double dfAccumulatorWeight = 0.0;
3746
0
    const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
3747
0
    const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
3748
0
    const GPtrDiff_t iSrcOffset =
3749
0
        iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
3750
0
    const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3751
0
    const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3752
3753
0
    const double dfXScale = poWK->dfXScale;
3754
0
    const double dfYScale = poWK->dfYScale;
3755
3756
0
    const int nXDist = (poWK->nXRadius + 1) * 2;
3757
3758
    // Space for saved X weights.
3759
0
    double *padfWeightsX = psWrkStruct->padfWeightsX;
3760
0
    bool *pabCalcX = psWrkStruct->pabCalcX;
3761
3762
    // Space for saving a row of pixels.
3763
0
    double *padfRowDensity = psWrkStruct->padfRowDensity;
3764
0
    double *padfRowReal = psWrkStruct->padfRowReal;
3765
0
    double *padfRowImag = psWrkStruct->padfRowImag;
3766
3767
    // Mark as needing calculation (don't calculate the weights yet,
3768
    // because a mask may render it unnecessary).
3769
0
    memset(pabCalcX, false, nXDist * sizeof(bool));
3770
3771
0
    FilterFuncType pfnGetWeight = apfGWKFilter[poWK->eResample];
3772
0
    CPLAssert(pfnGetWeight);
3773
3774
    // Skip sampling over edge of image.
3775
0
    int j = poWK->nFiltInitY;
3776
0
    int jMax = poWK->nYRadius;
3777
0
    if (iSrcY + j < 0)
3778
0
        j = -iSrcY;
3779
0
    if (iSrcY + jMax >= nSrcYSize)
3780
0
        jMax = nSrcYSize - iSrcY - 1;
3781
3782
0
    int iMin = poWK->nFiltInitX;
3783
0
    int iMax = poWK->nXRadius;
3784
0
    if (iSrcX + iMin < 0)
3785
0
        iMin = -iSrcX;
3786
0
    if (iSrcX + iMax >= nSrcXSize)
3787
0
        iMax = nSrcXSize - iSrcX - 1;
3788
3789
0
    const int bXScaleBelow1 = (dfXScale < 1.0);
3790
0
    const int bYScaleBelow1 = (dfYScale < 1.0);
3791
3792
0
    GPtrDiff_t iRowOffset =
3793
0
        iSrcOffset + static_cast<GPtrDiff_t>(j - 1) * nSrcXSize + iMin;
3794
3795
    // Loop over pixel rows in the kernel.
3796
0
    for (; j <= jMax; ++j)
3797
0
    {
3798
0
        iRowOffset += nSrcXSize;
3799
3800
        // Get pixel values.
3801
        // We can potentially read extra elements after the "normal" end of the
3802
        // source arrays, but the contract of papabySrcImage[iBand],
3803
        // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
3804
        // is to have WARP_EXTRA_ELTS reserved at their end.
3805
0
        if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
3806
0
                            padfRowDensity, padfRowReal, padfRowImag))
3807
0
            continue;
3808
3809
        // Calculate the Y weight.
3810
0
        double dfWeight1 = (bYScaleBelow1)
3811
0
                               ? pfnGetWeight((j - dfDeltaY) * dfYScale)
3812
0
                               : pfnGetWeight(j - dfDeltaY);
3813
3814
        // Iterate over pixels in row.
3815
0
        double dfAccumulatorRealLocal = 0.0;
3816
0
        double dfAccumulatorImagLocal = 0.0;
3817
0
        double dfAccumulatorDensityLocal = 0.0;
3818
0
        double dfAccumulatorWeightLocal = 0.0;
3819
3820
0
        for (int i = iMin; i <= iMax; ++i)
3821
0
        {
3822
            // Skip sampling if pixel has zero density.
3823
0
            if (padfRowDensity != nullptr &&
3824
0
                padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD)
3825
0
                continue;
3826
3827
0
            double dfWeight2 = 0.0;
3828
3829
            // Make or use a cached set of weights for this row.
3830
0
            if (pabCalcX[i - iMin])
3831
0
            {
3832
                // Use saved weight value instead of recomputing it.
3833
0
                dfWeight2 = padfWeightsX[i - iMin];
3834
0
            }
3835
0
            else
3836
0
            {
3837
                // Calculate & save the X weight.
3838
0
                padfWeightsX[i - iMin] = dfWeight2 =
3839
0
                    (bXScaleBelow1) ? pfnGetWeight((i - dfDeltaX) * dfXScale)
3840
0
                                    : pfnGetWeight(i - dfDeltaX);
3841
3842
0
                pabCalcX[i - iMin] = true;
3843
0
            }
3844
3845
            // Accumulate!
3846
0
            dfAccumulatorRealLocal += padfRowReal[i - iMin] * dfWeight2;
3847
0
            dfAccumulatorImagLocal += padfRowImag[i - iMin] * dfWeight2;
3848
0
            if (padfRowDensity != nullptr)
3849
0
                dfAccumulatorDensityLocal +=
3850
0
                    padfRowDensity[i - iMin] * dfWeight2;
3851
0
            dfAccumulatorWeightLocal += dfWeight2;
3852
0
        }
3853
3854
0
        dfAccumulatorReal += dfAccumulatorRealLocal * dfWeight1;
3855
0
        dfAccumulatorImag += dfAccumulatorImagLocal * dfWeight1;
3856
0
        dfAccumulatorDensity += dfAccumulatorDensityLocal * dfWeight1;
3857
0
        dfAccumulatorWeight += dfAccumulatorWeightLocal * dfWeight1;
3858
0
    }
3859
3860
0
    if (dfAccumulatorWeight < 0.000001 ||
3861
0
        (padfRowDensity != nullptr && dfAccumulatorDensity < 0.000001))
3862
0
    {
3863
0
        *pdfDensity = 0.0;
3864
0
        return false;
3865
0
    }
3866
3867
    // Calculate the output taking into account weighting.
3868
0
    if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
3869
0
    {
3870
0
        *pdfReal = dfAccumulatorReal / dfAccumulatorWeight;
3871
0
        *pdfImag = dfAccumulatorImag / dfAccumulatorWeight;
3872
0
        if (padfRowDensity != nullptr)
3873
0
            *pdfDensity = dfAccumulatorDensity / dfAccumulatorWeight;
3874
0
        else
3875
0
            *pdfDensity = 1.0;
3876
0
    }
3877
0
    else
3878
0
    {
3879
0
        *pdfReal = dfAccumulatorReal;
3880
0
        *pdfImag = dfAccumulatorImag;
3881
0
        if (padfRowDensity != nullptr)
3882
0
            *pdfDensity = dfAccumulatorDensity;
3883
0
        else
3884
0
            *pdfDensity = 1.0;
3885
0
    }
3886
3887
0
    return true;
3888
0
}
3889
3890
/************************************************************************/
3891
/*                      GWKResampleOptimizedLanczos()                   */
3892
/************************************************************************/
3893
3894
static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
3895
                                        double dfSrcX, double dfSrcY,
3896
                                        double *pdfDensity, double *pdfReal,
3897
                                        double *pdfImag,
3898
                                        GWKResampleWrkStruct *psWrkStruct)
3899
3900
0
{
3901
    // Save as local variables to avoid following pointers in loops.
3902
0
    const int nSrcXSize = poWK->nSrcXSize;
3903
0
    const int nSrcYSize = poWK->nSrcYSize;
3904
3905
0
    double dfAccumulatorReal = 0.0;
3906
0
    double dfAccumulatorImag = 0.0;
3907
0
    double dfAccumulatorDensity = 0.0;
3908
0
    double dfAccumulatorWeight = 0.0;
3909
0
    const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
3910
0
    const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
3911
0
    const GPtrDiff_t iSrcOffset =
3912
0
        iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
3913
0
    const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3914
0
    const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3915
3916
0
    const double dfXScale = poWK->dfXScale;
3917
0
    const double dfYScale = poWK->dfYScale;
3918
3919
    // Space for saved X weights.
3920
0
    double *const padfWeightsXShifted =
3921
0
        psWrkStruct->padfWeightsX - poWK->nFiltInitX;
3922
0
    double *const padfWeightsYShifted =
3923
0
        psWrkStruct->padfWeightsY - poWK->nFiltInitY;
3924
3925
    // Space for saving a row of pixels.
3926
0
    double *const padfRowDensity = psWrkStruct->padfRowDensity;
3927
0
    double *const padfRowReal = psWrkStruct->padfRowReal;
3928
0
    double *const padfRowImag = psWrkStruct->padfRowImag;
3929
3930
    // Skip sampling over edge of image.
3931
0
    int jMin = poWK->nFiltInitY;
3932
0
    int jMax = poWK->nYRadius;
3933
0
    if (iSrcY + jMin < 0)
3934
0
        jMin = -iSrcY;
3935
0
    if (iSrcY + jMax >= nSrcYSize)
3936
0
        jMax = nSrcYSize - iSrcY - 1;
3937
3938
0
    int iMin = poWK->nFiltInitX;
3939
0
    int iMax = poWK->nXRadius;
3940
0
    if (iSrcX + iMin < 0)
3941
0
        iMin = -iSrcX;
3942
0
    if (iSrcX + iMax >= nSrcXSize)
3943
0
        iMax = nSrcXSize - iSrcX - 1;
3944
3945
0
    if (dfXScale < 1.0)
3946
0
    {
3947
0
        while ((iMin - dfDeltaX) * dfXScale < -3.0)
3948
0
            iMin++;
3949
0
        while ((iMax - dfDeltaX) * dfXScale > 3.0)
3950
0
            iMax--;
3951
3952
        // clang-format off
3953
        /*
3954
        Naive version:
3955
        for (int i = iMin; i <= iMax; ++i)
3956
        {
3957
            psWrkStruct->padfWeightsXShifted[i] =
3958
                GWKLanczosSinc((i - dfDeltaX) * dfXScale);
3959
        }
3960
3961
        but given that:
3962
3963
        GWKLanczosSinc(x):
3964
            if (dfX == 0.0)
3965
                return 1.0;
3966
3967
            const double dfPIX = M_PI * dfX;
3968
            const double dfPIXoverR = dfPIX / 3;
3969
            const double dfPIX2overR = dfPIX * dfPIXoverR;
3970
            return sin(dfPIX) * sin(dfPIXoverR) / dfPIX2overR;
3971
3972
        and
3973
            sin (a + b) = sin a cos b + cos a sin b.
3974
            cos (a + b) = cos a cos b - sin a sin b.
3975
3976
        we can skip any sin() computation within the loop
3977
        */
3978
        // clang-format on
3979
3980
0
        if (iSrcX != psWrkStruct->iLastSrcX ||
3981
0
            dfDeltaX != psWrkStruct->dfLastDeltaX)
3982
0
        {
3983
0
            double dfX = (iMin - dfDeltaX) * dfXScale;
3984
3985
0
            double dfPIXover3 = M_PI / 3 * dfX;
3986
0
            double dfCosOver3 = cos(dfPIXover3);
3987
0
            double dfSinOver3 = sin(dfPIXover3);
3988
3989
            // "Naive":
3990
            // double dfSin = sin( M_PI * dfX );
3991
            // double dfCos = cos( M_PI * dfX );
3992
            // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
3993
0
            double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
3994
0
            double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
3995
3996
0
            const double dfCosPiXScaleOver3 = psWrkStruct->dfCosPiXScaleOver3;
3997
0
            const double dfSinPiXScaleOver3 = psWrkStruct->dfSinPiXScaleOver3;
3998
0
            const double dfCosPiXScale = psWrkStruct->dfCosPiXScale;
3999
0
            const double dfSinPiXScale = psWrkStruct->dfSinPiXScale;
4000
0
            constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
4001
0
            padfWeightsXShifted[iMin] =
4002
0
                dfX == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfX * dfX);
4003
0
            for (int i = iMin + 1; i <= iMax; ++i)
4004
0
            {
4005
0
                dfX += dfXScale;
4006
0
                const double dfNewSin =
4007
0
                    dfSin * dfCosPiXScale + dfCos * dfSinPiXScale;
4008
0
                const double dfNewSinOver3 = dfSinOver3 * dfCosPiXScaleOver3 +
4009
0
                                             dfCosOver3 * dfSinPiXScaleOver3;
4010
0
                padfWeightsXShifted[i] =
4011
0
                    dfX == 0
4012
0
                        ? 1.0
4013
0
                        : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfX * dfX);
4014
0
                const double dfNewCos =
4015
0
                    dfCos * dfCosPiXScale - dfSin * dfSinPiXScale;
4016
0
                const double dfNewCosOver3 = dfCosOver3 * dfCosPiXScaleOver3 -
4017
0
                                             dfSinOver3 * dfSinPiXScaleOver3;
4018
0
                dfSin = dfNewSin;
4019
0
                dfCos = dfNewCos;
4020
0
                dfSinOver3 = dfNewSinOver3;
4021
0
                dfCosOver3 = dfNewCosOver3;
4022
0
            }
4023
4024
0
            psWrkStruct->iLastSrcX = iSrcX;
4025
0
            psWrkStruct->dfLastDeltaX = dfDeltaX;
4026
0
        }
4027
0
    }
4028
0
    else
4029
0
    {
4030
0
        while (iMin - dfDeltaX < -3.0)
4031
0
            iMin++;
4032
0
        while (iMax - dfDeltaX > 3.0)
4033
0
            iMax--;
4034
4035
0
        if (iSrcX != psWrkStruct->iLastSrcX ||
4036
0
            dfDeltaX != psWrkStruct->dfLastDeltaX)
4037
0
        {
4038
            // Optimisation of GWKLanczosSinc(i - dfDeltaX) based on the
4039
            // following trigonometric formulas.
4040
4041
            // TODO(schwehr): Move this somewhere where it can be rendered at
4042
            // LaTeX.
4043
            // clang-format off
4044
            // sin(M_PI * (dfBase + k)) = sin(M_PI * dfBase) * cos(M_PI * k) +
4045
            //                            cos(M_PI * dfBase) * sin(M_PI * k)
4046
            // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k) + dfCosPIBase * sin(M_PI * k)
4047
            // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k)
4048
            // sin(M_PI * (dfBase + k)) = dfSinPIBase * (((k % 2) == 0) ? 1 : -1)
4049
4050
            // sin(M_PI / dfR * (dfBase + k)) = sin(M_PI / dfR * dfBase) * cos(M_PI / dfR * k) +
4051
            //                                  cos(M_PI / dfR * dfBase) * sin(M_PI / dfR * k)
4052
            // sin(M_PI / dfR * (dfBase + k)) = dfSinPIBaseOverR * cos(M_PI / dfR * k) + dfCosPIBaseOverR * sin(M_PI / dfR * k)
4053
            // clang-format on
4054
4055
0
            const double dfSinPIDeltaXOver3 = sin((-M_PI / 3.0) * dfDeltaX);
4056
0
            const double dfSin2PIDeltaXOver3 =
4057
0
                dfSinPIDeltaXOver3 * dfSinPIDeltaXOver3;
4058
            // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaX < PI/2.
4059
0
            const double dfCosPIDeltaXOver3 = sqrt(1.0 - dfSin2PIDeltaXOver3);
4060
0
            const double dfSinPIDeltaX =
4061
0
                (3.0 - 4 * dfSin2PIDeltaXOver3) * dfSinPIDeltaXOver3;
4062
0
            const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
4063
0
            const double dfInvPI2Over3xSinPIDeltaX =
4064
0
                dfInvPI2Over3 * dfSinPIDeltaX;
4065
0
            const double dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 =
4066
0
                -0.5 * dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3;
4067
0
            const double dfSinPIOver3 = 0.8660254037844386;
4068
0
            const double dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3 =
4069
0
                dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaX * dfCosPIDeltaXOver3;
4070
0
            const double padfCst[] = {
4071
0
                dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3,
4072
0
                dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 -
4073
0
                    dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3,
4074
0
                dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 +
4075
0
                    dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3};
4076
4077
0
            for (int i = iMin; i <= iMax; ++i)
4078
0
            {
4079
0
                const double dfX = i - dfDeltaX;
4080
0
                if (dfX == 0.0)
4081
0
                    padfWeightsXShifted[i] = 1.0;
4082
0
                else
4083
0
                    padfWeightsXShifted[i] = padfCst[(i + 3) % 3] / (dfX * dfX);
4084
#if DEBUG_VERBOSE
4085
                    // TODO(schwehr): AlmostEqual.
4086
                    // CPLAssert(fabs(padfWeightsX[i-poWK->nFiltInitX] -
4087
                    //               GWKLanczosSinc(dfX, 3.0)) < 1e-10);
4088
#endif
4089
0
            }
4090
4091
0
            psWrkStruct->iLastSrcX = iSrcX;
4092
0
            psWrkStruct->dfLastDeltaX = dfDeltaX;
4093
0
        }
4094
0
    }
4095
4096
0
    if (dfYScale < 1.0)
4097
0
    {
4098
0
        while ((jMin - dfDeltaY) * dfYScale < -3.0)
4099
0
            jMin++;
4100
0
        while ((jMax - dfDeltaY) * dfYScale > 3.0)
4101
0
            jMax--;
4102
4103
        // clang-format off
4104
        /*
4105
        Naive version:
4106
        for (int j = jMin; j <= jMax; ++j)
4107
        {
4108
            padfWeightsYShifted[j] =
4109
                GWKLanczosSinc((j - dfDeltaY) * dfYScale);
4110
        }
4111
        */
4112
        // clang-format on
4113
4114
0
        if (iSrcY != psWrkStruct->iLastSrcY ||
4115
0
            dfDeltaY != psWrkStruct->dfLastDeltaY)
4116
0
        {
4117
0
            double dfY = (jMin - dfDeltaY) * dfYScale;
4118
4119
0
            double dfPIYover3 = M_PI / 3 * dfY;
4120
0
            double dfCosOver3 = cos(dfPIYover3);
4121
0
            double dfSinOver3 = sin(dfPIYover3);
4122
4123
            // "Naive":
4124
            // double dfSin = sin( M_PI * dfY );
4125
            // double dfCos = cos( M_PI * dfY );
4126
            // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
4127
0
            double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
4128
0
            double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
4129
4130
0
            const double dfCosPiYScaleOver3 = psWrkStruct->dfCosPiYScaleOver3;
4131
0
            const double dfSinPiYScaleOver3 = psWrkStruct->dfSinPiYScaleOver3;
4132
0
            const double dfCosPiYScale = psWrkStruct->dfCosPiYScale;
4133
0
            const double dfSinPiYScale = psWrkStruct->dfSinPiYScale;
4134
0
            constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
4135
0
            padfWeightsYShifted[jMin] =
4136
0
                dfY == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfY * dfY);
4137
0
            for (int j = jMin + 1; j <= jMax; ++j)
4138
0
            {
4139
0
                dfY += dfYScale;
4140
0
                const double dfNewSin =
4141
0
                    dfSin * dfCosPiYScale + dfCos * dfSinPiYScale;
4142
0
                const double dfNewSinOver3 = dfSinOver3 * dfCosPiYScaleOver3 +
4143
0
                                             dfCosOver3 * dfSinPiYScaleOver3;
4144
0
                padfWeightsYShifted[j] =
4145
0
                    dfY == 0
4146
0
                        ? 1.0
4147
0
                        : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfY * dfY);
4148
0
                const double dfNewCos =
4149
0
                    dfCos * dfCosPiYScale - dfSin * dfSinPiYScale;
4150
0
                const double dfNewCosOver3 = dfCosOver3 * dfCosPiYScaleOver3 -
4151
0
                                             dfSinOver3 * dfSinPiYScaleOver3;
4152
0
                dfSin = dfNewSin;
4153
0
                dfCos = dfNewCos;
4154
0
                dfSinOver3 = dfNewSinOver3;
4155
0
                dfCosOver3 = dfNewCosOver3;
4156
0
            }
4157
4158
0
            psWrkStruct->iLastSrcY = iSrcY;
4159
0
            psWrkStruct->dfLastDeltaY = dfDeltaY;
4160
0
        }
4161
0
    }
4162
0
    else
4163
0
    {
4164
0
        while (jMin - dfDeltaY < -3.0)
4165
0
            jMin++;
4166
0
        while (jMax - dfDeltaY > 3.0)
4167
0
            jMax--;
4168
4169
0
        if (iSrcY != psWrkStruct->iLastSrcY ||
4170
0
            dfDeltaY != psWrkStruct->dfLastDeltaY)
4171
0
        {
4172
0
            const double dfSinPIDeltaYOver3 = sin((-M_PI / 3.0) * dfDeltaY);
4173
0
            const double dfSin2PIDeltaYOver3 =
4174
0
                dfSinPIDeltaYOver3 * dfSinPIDeltaYOver3;
4175
            // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaY < PI/2.
4176
0
            const double dfCosPIDeltaYOver3 = sqrt(1.0 - dfSin2PIDeltaYOver3);
4177
0
            const double dfSinPIDeltaY =
4178
0
                (3.0 - 4.0 * dfSin2PIDeltaYOver3) * dfSinPIDeltaYOver3;
4179
0
            const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
4180
0
            const double dfInvPI2Over3xSinPIDeltaY =
4181
0
                dfInvPI2Over3 * dfSinPIDeltaY;
4182
0
            const double dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 =
4183
0
                -0.5 * dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3;
4184
0
            const double dfSinPIOver3 = 0.8660254037844386;
4185
0
            const double dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3 =
4186
0
                dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaY * dfCosPIDeltaYOver3;
4187
0
            const double padfCst[] = {
4188
0
                dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3,
4189
0
                dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 -
4190
0
                    dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3,
4191
0
                dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 +
4192
0
                    dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3};
4193
4194
0
            for (int j = jMin; j <= jMax; ++j)
4195
0
            {
4196
0
                const double dfY = j - dfDeltaY;
4197
0
                if (dfY == 0.0)
4198
0
                    padfWeightsYShifted[j] = 1.0;
4199
0
                else
4200
0
                    padfWeightsYShifted[j] = padfCst[(j + 3) % 3] / (dfY * dfY);
4201
#if DEBUG_VERBOSE
4202
                    // TODO(schwehr): AlmostEqual.
4203
                    // CPLAssert(fabs(padfWeightsYShifted[j] -
4204
                    //               GWKLanczosSinc(dfY, 3.0)) < 1e-10);
4205
#endif
4206
0
            }
4207
4208
0
            psWrkStruct->iLastSrcY = iSrcY;
4209
0
            psWrkStruct->dfLastDeltaY = dfDeltaY;
4210
0
        }
4211
0
    }
4212
4213
    // If we have no density information, we can simply compute the
4214
    // accumulated weight.
4215
0
    if (padfRowDensity == nullptr)
4216
0
    {
4217
0
        double dfRowAccWeight = 0.0;
4218
0
        for (int i = iMin; i <= iMax; ++i)
4219
0
        {
4220
0
            dfRowAccWeight += padfWeightsXShifted[i];
4221
0
        }
4222
0
        double dfColAccWeight = 0.0;
4223
0
        for (int j = jMin; j <= jMax; ++j)
4224
0
        {
4225
0
            dfColAccWeight += padfWeightsYShifted[j];
4226
0
        }
4227
0
        dfAccumulatorWeight = dfRowAccWeight * dfColAccWeight;
4228
0
    }
4229
4230
    // Loop over pixel rows in the kernel.
4231
4232
0
    if (poWK->eWorkingDataType == GDT_Byte && !poWK->panUnifiedSrcValid &&
4233
0
        !poWK->papanBandSrcValid && !poWK->pafUnifiedSrcDensity &&
4234
0
        !padfRowDensity)
4235
0
    {
4236
        // Optimization for Byte case without any masking/alpha
4237
4238
0
        if (dfAccumulatorWeight < 0.000001)
4239
0
        {
4240
0
            *pdfDensity = 0.0;
4241
0
            return false;
4242
0
        }
4243
4244
0
        const GByte *pSrc =
4245
0
            reinterpret_cast<const GByte *>(poWK->papabySrcImage[iBand]);
4246
0
        pSrc += iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
4247
4248
0
#if defined(USE_SSE2)
4249
0
        if (iMax - iMin + 1 == 6)
4250
0
        {
4251
            // This is just an optimized version of the general case in
4252
            // the else clause.
4253
4254
0
            pSrc += iMin;
4255
0
            int j = jMin;
4256
0
            const auto fourXWeights =
4257
0
                XMMReg4Double::Load4Val(padfWeightsXShifted + iMin);
4258
4259
            // Process 2 lines at the same time.
4260
0
            for (; j < jMax; j += 2)
4261
0
            {
4262
0
                const XMMReg4Double v_acc =
4263
0
                    XMMReg4Double::Load4Val(pSrc) * fourXWeights;
4264
0
                const XMMReg4Double v_acc2 =
4265
0
                    XMMReg4Double::Load4Val(pSrc + nSrcXSize) * fourXWeights;
4266
0
                const double dfRowAcc = v_acc.GetHorizSum();
4267
0
                const double dfRowAccEnd =
4268
0
                    pSrc[4] * padfWeightsXShifted[iMin + 4] +
4269
0
                    pSrc[5] * padfWeightsXShifted[iMin + 5];
4270
0
                dfAccumulatorReal +=
4271
0
                    (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
4272
0
                const double dfRowAcc2 = v_acc2.GetHorizSum();
4273
0
                const double dfRowAcc2End =
4274
0
                    pSrc[nSrcXSize + 4] * padfWeightsXShifted[iMin + 4] +
4275
0
                    pSrc[nSrcXSize + 5] * padfWeightsXShifted[iMin + 5];
4276
0
                dfAccumulatorReal +=
4277
0
                    (dfRowAcc2 + dfRowAcc2End) * padfWeightsYShifted[j + 1];
4278
0
                pSrc += 2 * nSrcXSize;
4279
0
            }
4280
0
            if (j == jMax)
4281
0
            {
4282
                // Process last line if there's an odd number of them.
4283
4284
0
                const XMMReg4Double v_acc =
4285
0
                    XMMReg4Double::Load4Val(pSrc) * fourXWeights;
4286
0
                const double dfRowAcc = v_acc.GetHorizSum();
4287
0
                const double dfRowAccEnd =
4288
0
                    pSrc[4] * padfWeightsXShifted[iMin + 4] +
4289
0
                    pSrc[5] * padfWeightsXShifted[iMin + 5];
4290
0
                dfAccumulatorReal +=
4291
0
                    (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
4292
0
            }
4293
0
        }
4294
0
        else
4295
0
#endif
4296
0
        {
4297
0
            for (int j = jMin; j <= jMax; ++j)
4298
0
            {
4299
0
                int i = iMin;
4300
0
                double dfRowAcc1 = 0.0;
4301
0
                double dfRowAcc2 = 0.0;
4302
                // A bit of loop unrolling
4303
0
                for (; i < iMax; i += 2)
4304
0
                {
4305
0
                    dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
4306
0
                    dfRowAcc2 += pSrc[i + 1] * padfWeightsXShifted[i + 1];
4307
0
                }
4308
0
                if (i == iMax)
4309
0
                {
4310
                    // Process last column if there's an odd number of them.
4311
0
                    dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
4312
0
                }
4313
4314
0
                dfAccumulatorReal +=
4315
0
                    (dfRowAcc1 + dfRowAcc2) * padfWeightsYShifted[j];
4316
0
                pSrc += nSrcXSize;
4317
0
            }
4318
0
        }
4319
4320
        // Calculate the output taking into account weighting.
4321
0
        if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
4322
0
        {
4323
0
            const double dfInvAcc = 1.0 / dfAccumulatorWeight;
4324
0
            *pdfReal = dfAccumulatorReal * dfInvAcc;
4325
0
            *pdfDensity = 1.0;
4326
0
        }
4327
0
        else
4328
0
        {
4329
0
            *pdfReal = dfAccumulatorReal;
4330
0
            *pdfDensity = 1.0;
4331
0
        }
4332
4333
0
        return true;
4334
0
    }
4335
4336
0
    GPtrDiff_t iRowOffset =
4337
0
        iSrcOffset + static_cast<GPtrDiff_t>(jMin - 1) * nSrcXSize + iMin;
4338
4339
0
    int nCountValid = 0;
4340
0
    const bool bIsNonComplex = !GDALDataTypeIsComplex(poWK->eWorkingDataType);
4341
4342
0
    for (int j = jMin; j <= jMax; ++j)
4343
0
    {
4344
0
        iRowOffset += nSrcXSize;
4345
4346
        // Get pixel values.
4347
        // We can potentially read extra elements after the "normal" end of the
4348
        // source arrays, but the contract of papabySrcImage[iBand],
4349
        // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
4350
        // is to have WARP_EXTRA_ELTS reserved at their end.
4351
0
        if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
4352
0
                            padfRowDensity, padfRowReal, padfRowImag))
4353
0
            continue;
4354
4355
0
        const double dfWeight1 = padfWeightsYShifted[j];
4356
4357
        // Iterate over pixels in row.
4358
0
        if (padfRowDensity != nullptr)
4359
0
        {
4360
0
            for (int i = iMin; i <= iMax; ++i)
4361
0
            {
4362
                // Skip sampling if pixel has zero density.
4363
0
                if (padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD)
4364
0
                    continue;
4365
4366
0
                nCountValid++;
4367
4368
                //  Use a cached set of weights for this row.
4369
0
                const double dfWeight2 = dfWeight1 * padfWeightsXShifted[i];
4370
4371
                // Accumulate!
4372
0
                dfAccumulatorReal += padfRowReal[i - iMin] * dfWeight2;
4373
0
                dfAccumulatorImag += padfRowImag[i - iMin] * dfWeight2;
4374
0
                dfAccumulatorDensity += padfRowDensity[i - iMin] * dfWeight2;
4375
0
                dfAccumulatorWeight += dfWeight2;
4376
0
            }
4377
0
        }
4378
0
        else if (bIsNonComplex)
4379
0
        {
4380
0
            double dfRowAccReal = 0.0;
4381
0
            for (int i = iMin; i <= iMax; ++i)
4382
0
            {
4383
0
                const double dfWeight2 = padfWeightsXShifted[i];
4384
4385
                // Accumulate!
4386
0
                dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
4387
0
            }
4388
4389
0
            dfAccumulatorReal += dfRowAccReal * dfWeight1;
4390
0
        }
4391
0
        else
4392
0
        {
4393
0
            double dfRowAccReal = 0.0;
4394
0
            double dfRowAccImag = 0.0;
4395
0
            for (int i = iMin; i <= iMax; ++i)
4396
0
            {
4397
0
                const double dfWeight2 = padfWeightsXShifted[i];
4398
4399
                // Accumulate!
4400
0
                dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
4401
0
                dfRowAccImag += padfRowImag[i - iMin] * dfWeight2;
4402
0
            }
4403
4404
0
            dfAccumulatorReal += dfRowAccReal * dfWeight1;
4405
0
            dfAccumulatorImag += dfRowAccImag * dfWeight1;
4406
0
        }
4407
0
    }
4408
4409
0
    if (dfAccumulatorWeight < 0.000001 ||
4410
0
        (padfRowDensity != nullptr &&
4411
0
         (dfAccumulatorDensity < 0.000001 ||
4412
0
          nCountValid < (jMax - jMin + 1) * (iMax - iMin + 1) / 2)))
4413
0
    {
4414
0
        *pdfDensity = 0.0;
4415
0
        return false;
4416
0
    }
4417
4418
    // Calculate the output taking into account weighting.
4419
0
    if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
4420
0
    {
4421
0
        const double dfInvAcc = 1.0 / dfAccumulatorWeight;
4422
0
        *pdfReal = dfAccumulatorReal * dfInvAcc;
4423
0
        *pdfImag = dfAccumulatorImag * dfInvAcc;
4424
0
        if (padfRowDensity != nullptr)
4425
0
            *pdfDensity = dfAccumulatorDensity * dfInvAcc;
4426
0
        else
4427
0
            *pdfDensity = 1.0;
4428
0
    }
4429
0
    else
4430
0
    {
4431
0
        *pdfReal = dfAccumulatorReal;
4432
0
        *pdfImag = dfAccumulatorImag;
4433
0
        if (padfRowDensity != nullptr)
4434
0
            *pdfDensity = dfAccumulatorDensity;
4435
0
        else
4436
0
            *pdfDensity = 1.0;
4437
0
    }
4438
4439
0
    return true;
4440
0
}
4441
4442
/************************************************************************/
4443
/*                        GWKComputeWeights()                           */
4444
/************************************************************************/
4445
4446
static void GWKComputeWeights(GDALResampleAlg eResample, int iMin, int iMax,
4447
                              double dfDeltaX, double dfXScale, int jMin,
4448
                              int jMax, double dfDeltaY, double dfYScale,
4449
                              double *padfWeightsHorizontal,
4450
                              double *padfWeightsVertical, double &dfInvWeights)
4451
0
{
4452
4453
0
    const FilterFuncType pfnGetWeight = apfGWKFilter[eResample];
4454
0
    CPLAssert(pfnGetWeight);
4455
0
    const FilterFunc4ValuesType pfnGetWeight4Values =
4456
0
        apfGWKFilter4Values[eResample];
4457
0
    CPLAssert(pfnGetWeight4Values);
4458
4459
0
    int i = iMin;  // Used after for.
4460
0
    int iC = 0;    // Used after for.
4461
    // Not zero, but as close as possible to it, to avoid potential division by
4462
    // zero at end of function
4463
0
    double dfAccumulatorWeightHorizontal = cpl::NumericLimits<double>::min();
4464
0
    for (; i + 2 < iMax; i += 4, iC += 4)
4465
0
    {
4466
0
        padfWeightsHorizontal[iC] = (i - dfDeltaX) * dfXScale;
4467
0
        padfWeightsHorizontal[iC + 1] = padfWeightsHorizontal[iC] + dfXScale;
4468
0
        padfWeightsHorizontal[iC + 2] =
4469
0
            padfWeightsHorizontal[iC + 1] + dfXScale;
4470
0
        padfWeightsHorizontal[iC + 3] =
4471
0
            padfWeightsHorizontal[iC + 2] + dfXScale;
4472
0
        dfAccumulatorWeightHorizontal +=
4473
0
            pfnGetWeight4Values(padfWeightsHorizontal + iC);
4474
0
    }
4475
0
    for (; i <= iMax; ++i, ++iC)
4476
0
    {
4477
0
        const double dfWeight = pfnGetWeight((i - dfDeltaX) * dfXScale);
4478
0
        padfWeightsHorizontal[iC] = dfWeight;
4479
0
        dfAccumulatorWeightHorizontal += dfWeight;
4480
0
    }
4481
4482
0
    int j = jMin;  // Used after for.
4483
0
    int jC = 0;    // Used after for.
4484
    // Not zero, but as close as possible to it, to avoid potential division by
4485
    // zero at end of function
4486
0
    double dfAccumulatorWeightVertical = cpl::NumericLimits<double>::min();
4487
0
    for (; j + 2 < jMax; j += 4, jC += 4)
4488
0
    {
4489
0
        padfWeightsVertical[jC] = (j - dfDeltaY) * dfYScale;
4490
0
        padfWeightsVertical[jC + 1] = padfWeightsVertical[jC] + dfYScale;
4491
0
        padfWeightsVertical[jC + 2] = padfWeightsVertical[jC + 1] + dfYScale;
4492
0
        padfWeightsVertical[jC + 3] = padfWeightsVertical[jC + 2] + dfYScale;
4493
0
        dfAccumulatorWeightVertical +=
4494
0
            pfnGetWeight4Values(padfWeightsVertical + jC);
4495
0
    }
4496
0
    for (; j <= jMax; ++j, ++jC)
4497
0
    {
4498
0
        const double dfWeight = pfnGetWeight((j - dfDeltaY) * dfYScale);
4499
0
        padfWeightsVertical[jC] = dfWeight;
4500
0
        dfAccumulatorWeightVertical += dfWeight;
4501
0
    }
4502
4503
0
    dfInvWeights =
4504
0
        1. / (dfAccumulatorWeightHorizontal * dfAccumulatorWeightVertical);
4505
0
}
4506
4507
/************************************************************************/
4508
/*                        GWKResampleNoMasksT()                         */
4509
/************************************************************************/
4510
4511
template <class T>
4512
static bool
4513
GWKResampleNoMasksT(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
4514
                    double dfSrcY, T *pValue, double *padfWeightsHorizontal,
4515
                    double *padfWeightsVertical, double &dfInvWeights)
4516
4517
{
4518
    // Commonly used; save locally.
4519
    const int nSrcXSize = poWK->nSrcXSize;
4520
    const int nSrcYSize = poWK->nSrcYSize;
4521
4522
    const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
4523
    const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
4524
    const GPtrDiff_t iSrcOffset =
4525
        iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
4526
4527
    const int nXRadius = poWK->nXRadius;
4528
    const int nYRadius = poWK->nYRadius;
4529
4530
    // Politely refuse to process invalid coordinates or obscenely small image.
4531
    if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
4532
        nYRadius > nSrcYSize)
4533
        return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
4534
                                                  pValue);
4535
4536
    T *pSrcBand = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
4537
    const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
4538
    const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
4539
4540
    const double dfXScale = std::min(poWK->dfXScale, 1.0);
4541
    const double dfYScale = std::min(poWK->dfYScale, 1.0);
4542
4543
    int iMin = 1 - nXRadius;
4544
    if (iSrcX + iMin < 0)
4545
        iMin = -iSrcX;
4546
    int iMax = nXRadius;
4547
    if (iSrcX + iMax >= nSrcXSize - 1)
4548
        iMax = nSrcXSize - 1 - iSrcX;
4549
4550
    int jMin = 1 - nYRadius;
4551
    if (iSrcY + jMin < 0)
4552
        jMin = -iSrcY;
4553
    int jMax = nYRadius;
4554
    if (iSrcY + jMax >= nSrcYSize - 1)
4555
        jMax = nSrcYSize - 1 - iSrcY;
4556
4557
    if (iBand == 0)
4558
    {
4559
        GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
4560
                          jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
4561
                          padfWeightsVertical, dfInvWeights);
4562
    }
4563
4564
    // Loop over all rows in the kernel.
4565
    double dfAccumulator = 0.0;
4566
    for (int jC = 0, j = jMin; j <= jMax; ++j, ++jC)
4567
    {
4568
        const GPtrDiff_t iSampJ =
4569
            iSrcOffset + static_cast<GPtrDiff_t>(j) * nSrcXSize;
4570
4571
        // Loop over all pixels in the row.
4572
        double dfAccumulatorLocal = 0.0;
4573
        double dfAccumulatorLocal2 = 0.0;
4574
        int iC = 0;
4575
        int i = iMin;
4576
        // Process by chunk of 4 cols.
4577
        for (; i + 2 < iMax; i += 4, iC += 4)
4578
        {
4579
            // Retrieve the pixel & accumulate.
4580
            dfAccumulatorLocal +=
4581
                pSrcBand[i + iSampJ] * padfWeightsHorizontal[iC];
4582
            dfAccumulatorLocal +=
4583
                pSrcBand[i + 1 + iSampJ] * padfWeightsHorizontal[iC + 1];
4584
            dfAccumulatorLocal2 +=
4585
                pSrcBand[i + 2 + iSampJ] * padfWeightsHorizontal[iC + 2];
4586
            dfAccumulatorLocal2 +=
4587
                pSrcBand[i + 3 + iSampJ] * padfWeightsHorizontal[iC + 3];
4588
        }
4589
        dfAccumulatorLocal += dfAccumulatorLocal2;
4590
        if (i < iMax)
4591
        {
4592
            dfAccumulatorLocal +=
4593
                pSrcBand[i + iSampJ] * padfWeightsHorizontal[iC];
4594
            dfAccumulatorLocal +=
4595
                pSrcBand[i + 1 + iSampJ] * padfWeightsHorizontal[iC + 1];
4596
            i += 2;
4597
            iC += 2;
4598
        }
4599
        if (i == iMax)
4600
        {
4601
            dfAccumulatorLocal +=
4602
                pSrcBand[i + iSampJ] * padfWeightsHorizontal[iC];
4603
        }
4604
4605
        dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
4606
    }
4607
4608
    *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
4609
4610
    return true;
4611
}
4612
4613
/* We restrict to 64bit processors because they are guaranteed to have SSE2 */
4614
/* Could possibly be used too on 32bit, but we would need to check at runtime */
4615
#if defined(USE_SSE2)
4616
4617
/************************************************************************/
4618
/*                    GWKResampleNoMasks_SSE2_T()                       */
4619
/************************************************************************/
4620
4621
template <class T>
4622
static bool GWKResampleNoMasks_SSE2_T(const GDALWarpKernel *poWK, int iBand,
4623
                                      double dfSrcX, double dfSrcY, T *pValue,
4624
                                      double *padfWeightsHorizontal,
4625
                                      double *padfWeightsVertical,
4626
                                      double &dfInvWeights)
4627
0
{
4628
    // Commonly used; save locally.
4629
0
    const int nSrcXSize = poWK->nSrcXSize;
4630
0
    const int nSrcYSize = poWK->nSrcYSize;
4631
4632
0
    const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
4633
0
    const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
4634
0
    const GPtrDiff_t iSrcOffset =
4635
0
        iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
4636
0
    const int nXRadius = poWK->nXRadius;
4637
0
    const int nYRadius = poWK->nYRadius;
4638
4639
    // Politely refuse to process invalid coordinates or obscenely small image.
4640
0
    if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
4641
0
        nYRadius > nSrcYSize)
4642
0
        return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
4643
0
                                                  pValue);
4644
4645
0
    const T *pSrcBand =
4646
0
        reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
4647
4648
0
    const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
4649
0
    const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
4650
0
    const double dfXScale = std::min(poWK->dfXScale, 1.0);
4651
0
    const double dfYScale = std::min(poWK->dfYScale, 1.0);
4652
4653
0
    int iMin = 1 - nXRadius;
4654
0
    if (iSrcX + iMin < 0)
4655
0
        iMin = -iSrcX;
4656
0
    int iMax = nXRadius;
4657
0
    if (iSrcX + iMax >= nSrcXSize - 1)
4658
0
        iMax = nSrcXSize - 1 - iSrcX;
4659
4660
0
    int jMin = 1 - nYRadius;
4661
0
    if (iSrcY + jMin < 0)
4662
0
        jMin = -iSrcY;
4663
0
    int jMax = nYRadius;
4664
0
    if (iSrcY + jMax >= nSrcYSize - 1)
4665
0
        jMax = nSrcYSize - 1 - iSrcY;
4666
4667
0
    if (iBand == 0)
4668
0
    {
4669
0
        GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
4670
0
                          jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
4671
0
                          padfWeightsVertical, dfInvWeights);
4672
0
    }
4673
4674
0
    GPtrDiff_t iSampJ = iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
4675
    // Process by chunk of 4 rows.
4676
0
    int jC = 0;
4677
0
    int j = jMin;
4678
0
    double dfAccumulator = 0.0;
4679
0
    for (; j + 2 < jMax; j += 4, iSampJ += 4 * nSrcXSize, jC += 4)
4680
0
    {
4681
        // Loop over all pixels in the row.
4682
0
        int iC = 0;
4683
0
        int i = iMin;
4684
        // Process by chunk of 4 cols.
4685
0
        XMMReg4Double v_acc_1 = XMMReg4Double::Zero();
4686
0
        XMMReg4Double v_acc_2 = XMMReg4Double::Zero();
4687
0
        XMMReg4Double v_acc_3 = XMMReg4Double::Zero();
4688
0
        XMMReg4Double v_acc_4 = XMMReg4Double::Zero();
4689
0
        for (; i + 2 < iMax; i += 4, iC += 4)
4690
0
        {
4691
            // Retrieve the pixel & accumulate.
4692
0
            XMMReg4Double v_pixels_1 =
4693
0
                XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
4694
0
            XMMReg4Double v_pixels_2 =
4695
0
                XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + nSrcXSize);
4696
0
            XMMReg4Double v_pixels_3 =
4697
0
                XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
4698
0
            XMMReg4Double v_pixels_4 =
4699
0
                XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
4700
4701
0
            XMMReg4Double v_padfWeight =
4702
0
                XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
4703
4704
0
            v_acc_1 += v_pixels_1 * v_padfWeight;
4705
0
            v_acc_2 += v_pixels_2 * v_padfWeight;
4706
0
            v_acc_3 += v_pixels_3 * v_padfWeight;
4707
0
            v_acc_4 += v_pixels_4 * v_padfWeight;
4708
0
        }
4709
4710
0
        if (i < iMax)
4711
0
        {
4712
0
            XMMReg2Double v_pixels_1 =
4713
0
                XMMReg2Double::Load2Val(pSrcBand + i + iSampJ);
4714
0
            XMMReg2Double v_pixels_2 =
4715
0
                XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + nSrcXSize);
4716
0
            XMMReg2Double v_pixels_3 =
4717
0
                XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
4718
0
            XMMReg2Double v_pixels_4 =
4719
0
                XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
4720
4721
0
            XMMReg2Double v_padfWeight =
4722
0
                XMMReg2Double::Load2Val(padfWeightsHorizontal + iC);
4723
4724
0
            v_acc_1.AddToLow(v_pixels_1 * v_padfWeight);
4725
0
            v_acc_2.AddToLow(v_pixels_2 * v_padfWeight);
4726
0
            v_acc_3.AddToLow(v_pixels_3 * v_padfWeight);
4727
0
            v_acc_4.AddToLow(v_pixels_4 * v_padfWeight);
4728
4729
0
            i += 2;
4730
0
            iC += 2;
4731
0
        }
4732
4733
0
        double dfAccumulatorLocal_1 = v_acc_1.GetHorizSum();
4734
0
        double dfAccumulatorLocal_2 = v_acc_2.GetHorizSum();
4735
0
        double dfAccumulatorLocal_3 = v_acc_3.GetHorizSum();
4736
0
        double dfAccumulatorLocal_4 = v_acc_4.GetHorizSum();
4737
4738
0
        if (i == iMax)
4739
0
        {
4740
0
            dfAccumulatorLocal_1 += static_cast<double>(pSrcBand[i + iSampJ]) *
4741
0
                                    padfWeightsHorizontal[iC];
4742
0
            dfAccumulatorLocal_2 +=
4743
0
                static_cast<double>(pSrcBand[i + iSampJ + nSrcXSize]) *
4744
0
                padfWeightsHorizontal[iC];
4745
0
            dfAccumulatorLocal_3 +=
4746
0
                static_cast<double>(pSrcBand[i + iSampJ + 2 * nSrcXSize]) *
4747
0
                padfWeightsHorizontal[iC];
4748
0
            dfAccumulatorLocal_4 +=
4749
0
                static_cast<double>(pSrcBand[i + iSampJ + 3 * nSrcXSize]) *
4750
0
                padfWeightsHorizontal[iC];
4751
0
        }
4752
4753
0
        dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal_1;
4754
0
        dfAccumulator += padfWeightsVertical[jC + 1] * dfAccumulatorLocal_2;
4755
0
        dfAccumulator += padfWeightsVertical[jC + 2] * dfAccumulatorLocal_3;
4756
0
        dfAccumulator += padfWeightsVertical[jC + 3] * dfAccumulatorLocal_4;
4757
0
    }
4758
0
    for (; j <= jMax; ++j, iSampJ += nSrcXSize, ++jC)
4759
0
    {
4760
        // Loop over all pixels in the row.
4761
0
        int iC = 0;
4762
0
        int i = iMin;
4763
        // Process by chunk of 4 cols.
4764
0
        XMMReg4Double v_acc = XMMReg4Double::Zero();
4765
0
        for (; i + 2 < iMax; i += 4, iC += 4)
4766
0
        {
4767
            // Retrieve the pixel & accumulate.
4768
0
            XMMReg4Double v_pixels =
4769
0
                XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
4770
0
            XMMReg4Double v_padfWeight =
4771
0
                XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
4772
4773
0
            v_acc += v_pixels * v_padfWeight;
4774
0
        }
4775
4776
0
        double dfAccumulatorLocal = v_acc.GetHorizSum();
4777
4778
0
        if (i < iMax)
4779
0
        {
4780
0
            dfAccumulatorLocal +=
4781
0
                pSrcBand[i + iSampJ] * padfWeightsHorizontal[iC];
4782
0
            dfAccumulatorLocal +=
4783
0
                pSrcBand[i + 1 + iSampJ] * padfWeightsHorizontal[iC + 1];
4784
0
            i += 2;
4785
0
            iC += 2;
4786
0
        }
4787
0
        if (i == iMax)
4788
0
        {
4789
0
            dfAccumulatorLocal += static_cast<double>(pSrcBand[i + iSampJ]) *
4790
0
                                  padfWeightsHorizontal[iC];
4791
0
        }
4792
4793
0
        dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
4794
0
    }
4795
4796
0
    *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
4797
4798
0
    return true;
4799
0
}
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKResampleNoMasks_SSE2_T<unsigned char>(GDALWarpKernel const*, int, double, double, unsigned char*, double*, double*, double&)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKResampleNoMasks_SSE2_T<float>(GDALWarpKernel const*, int, double, double, float*, double*, double*, double&)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKResampleNoMasks_SSE2_T<short>(GDALWarpKernel const*, int, double, double, short*, double*, double*, double&)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKResampleNoMasks_SSE2_T<unsigned short>(GDALWarpKernel const*, int, double, double, unsigned short*, double*, double*, double&)
4800
4801
/************************************************************************/
4802
/*                     GWKResampleNoMasksT<GByte>()                     */
4803
/************************************************************************/
4804
4805
template <>
4806
bool GWKResampleNoMasksT<GByte>(const GDALWarpKernel *poWK, int iBand,
4807
                                double dfSrcX, double dfSrcY, GByte *pValue,
4808
                                double *padfWeightsHorizontal,
4809
                                double *padfWeightsVertical,
4810
                                double &dfInvWeights)
4811
0
{
4812
0
    return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
4813
0
                                     padfWeightsHorizontal, padfWeightsVertical,
4814
0
                                     dfInvWeights);
4815
0
}
4816
4817
/************************************************************************/
4818
/*                     GWKResampleNoMasksT<GInt16>()                    */
4819
/************************************************************************/
4820
4821
template <>
4822
bool GWKResampleNoMasksT<GInt16>(const GDALWarpKernel *poWK, int iBand,
4823
                                 double dfSrcX, double dfSrcY, GInt16 *pValue,
4824
                                 double *padfWeightsHorizontal,
4825
                                 double *padfWeightsVertical,
4826
                                 double &dfInvWeights)
4827
0
{
4828
0
    return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
4829
0
                                     padfWeightsHorizontal, padfWeightsVertical,
4830
0
                                     dfInvWeights);
4831
0
}
4832
4833
/************************************************************************/
4834
/*                     GWKResampleNoMasksT<GUInt16>()                   */
4835
/************************************************************************/
4836
4837
template <>
4838
bool GWKResampleNoMasksT<GUInt16>(const GDALWarpKernel *poWK, int iBand,
4839
                                  double dfSrcX, double dfSrcY, GUInt16 *pValue,
4840
                                  double *padfWeightsHorizontal,
4841
                                  double *padfWeightsVertical,
4842
                                  double &dfInvWeights)
4843
0
{
4844
0
    return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
4845
0
                                     padfWeightsHorizontal, padfWeightsVertical,
4846
0
                                     dfInvWeights);
4847
0
}
4848
4849
/************************************************************************/
4850
/*                     GWKResampleNoMasksT<float>()                     */
4851
/************************************************************************/
4852
4853
template <>
4854
bool GWKResampleNoMasksT<float>(const GDALWarpKernel *poWK, int iBand,
4855
                                double dfSrcX, double dfSrcY, float *pValue,
4856
                                double *padfWeightsHorizontal,
4857
                                double *padfWeightsVertical,
4858
                                double &dfInvWeights)
4859
0
{
4860
0
    return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
4861
0
                                     padfWeightsHorizontal, padfWeightsVertical,
4862
0
                                     dfInvWeights);
4863
0
}
4864
4865
#ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
4866
4867
/************************************************************************/
4868
/*                     GWKResampleNoMasksT<double>()                    */
4869
/************************************************************************/
4870
4871
template <>
4872
bool GWKResampleNoMasksT<double>(const GDALWarpKernel *poWK, int iBand,
4873
                                 double dfSrcX, double dfSrcY, double *pValue,
4874
                                 double *padfWeightsHorizontal,
4875
                                 double *padfWeightsVertical,
4876
                                 double &dfInvWeights)
4877
{
4878
    return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
4879
                                     padfWeightsHorizontal, padfWeightsVertical,
4880
                                     dfInvWeights);
4881
}
4882
4883
#endif /* INSTANTIATE_FLOAT64_SSE2_IMPL */
4884
4885
#endif /* defined(USE_SSE2) */
4886
4887
/************************************************************************/
4888
/*                     GWKRoundSourceCoordinates()                      */
4889
/************************************************************************/
4890
4891
static void GWKRoundSourceCoordinates(
4892
    int nDstXSize, double *padfX, double *padfY, double *padfZ, int *pabSuccess,
4893
    double dfSrcCoordPrecision, double dfErrorThreshold,
4894
    GDALTransformerFunc pfnTransformer, void *pTransformerArg, double dfDstXOff,
4895
    double dfDstY)
4896
0
{
4897
0
    double dfPct = 0.8;
4898
0
    if (dfErrorThreshold > 0 && dfSrcCoordPrecision / dfErrorThreshold >= 10.0)
4899
0
    {
4900
0
        dfPct = 1.0 - 2 * 1.0 / (dfSrcCoordPrecision / dfErrorThreshold);
4901
0
    }
4902
0
    const double dfExactTransformThreshold = 0.5 * dfPct * dfSrcCoordPrecision;
4903
4904
0
    for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
4905
0
    {
4906
0
        const double dfXBefore = padfX[iDstX];
4907
0
        const double dfYBefore = padfY[iDstX];
4908
0
        padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
4909
0
                       dfSrcCoordPrecision;
4910
0
        padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
4911
0
                       dfSrcCoordPrecision;
4912
4913
        // If we are in an uncertainty zone, go to non-approximated
4914
        // transformation.
4915
        // Due to the 80% of half-precision threshold, dfSrcCoordPrecision must
4916
        // be at least 10 times greater than the approximation error.
4917
0
        if (fabs(dfXBefore - padfX[iDstX]) > dfExactTransformThreshold ||
4918
0
            fabs(dfYBefore - padfY[iDstX]) > dfExactTransformThreshold)
4919
0
        {
4920
0
            padfX[iDstX] = iDstX + dfDstXOff;
4921
0
            padfY[iDstX] = dfDstY;
4922
0
            padfZ[iDstX] = 0.0;
4923
0
            pfnTransformer(pTransformerArg, TRUE, 1, padfX + iDstX,
4924
0
                           padfY + iDstX, padfZ + iDstX, pabSuccess + iDstX);
4925
0
            padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
4926
0
                           dfSrcCoordPrecision;
4927
0
            padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
4928
0
                           dfSrcCoordPrecision;
4929
0
        }
4930
0
    }
4931
0
}
4932
4933
/************************************************************************/
4934
/*                     GWKCheckAndComputeSrcOffsets()                   */
4935
/************************************************************************/
4936
static CPL_INLINE bool
4937
GWKCheckAndComputeSrcOffsets(GWKJobStruct *psJob, int *_pabSuccess, int _iDstX,
4938
                             int _iDstY, double *_padfX, double *_padfY,
4939
                             int _nSrcXSize, int _nSrcYSize,
4940
                             GPtrDiff_t &iSrcOffset)
4941
0
{
4942
0
    const GDALWarpKernel *_poWK = psJob->poWK;
4943
0
    for (int iTry = 0; iTry < 2; ++iTry)
4944
0
    {
4945
0
        if (iTry == 1)
4946
0
        {
4947
            // If the source coordinate is slightly outside of the source raster
4948
            // retry to transform it alone, so that the exact coordinate
4949
            // transformer is used.
4950
4951
0
            _padfX[_iDstX] = _iDstX + 0.5 + _poWK->nDstXOff;
4952
0
            _padfY[_iDstX] = _iDstY + 0.5 + _poWK->nDstYOff;
4953
0
            double dfZ = 0;
4954
0
            _poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1,
4955
0
                                  _padfX + _iDstX, _padfY + _iDstX, &dfZ,
4956
0
                                  _pabSuccess + _iDstX);
4957
0
        }
4958
0
        if (!_pabSuccess[_iDstX])
4959
0
            return false;
4960
4961
        // If this happens this is likely the symptom of a bug somewhere.
4962
0
        if (std::isnan(_padfX[_iDstX]) || std::isnan(_padfY[_iDstX]))
4963
0
        {
4964
0
            static bool bNanCoordFound = false;
4965
0
            if (!bNanCoordFound)
4966
0
            {
4967
0
                CPLDebug("WARP",
4968
0
                         "GWKCheckAndComputeSrcOffsets(): "
4969
0
                         "NaN coordinate found on point %d.",
4970
0
                         _iDstX);
4971
0
                bNanCoordFound = true;
4972
0
            }
4973
0
            return false;
4974
0
        }
4975
4976
        /* --------------------------------------------------------------------
4977
         */
4978
        /*      Figure out what pixel we want in our source raster, and skip */
4979
        /*      further processing if it is well off the source image. */
4980
        /* --------------------------------------------------------------------
4981
         */
4982
        /* We test against the value before casting to avoid the */
4983
        /* problem of asymmetric truncation effects around zero.  That is */
4984
        /* -0.5 will be 0 when cast to an int. */
4985
0
        if (_padfX[_iDstX] < _poWK->nSrcXOff)
4986
0
        {
4987
            // If the source coordinate is slightly outside of the source raster
4988
            // retry to transform it alone, so that the exact coordinate
4989
            // transformer is used.
4990
0
            if (iTry == 0 && _padfX[_iDstX] > _poWK->nSrcXOff - 1)
4991
0
                continue;
4992
0
            return false;
4993
0
        }
4994
4995
0
        if (_padfY[_iDstX] < _poWK->nSrcYOff)
4996
0
        {
4997
            // If the source coordinate is slightly outside of the source raster
4998
            // retry to transform it alone, so that the exact coordinate
4999
            // transformer is used.
5000
0
            if (iTry == 0 && _padfY[_iDstX] > _poWK->nSrcYOff - 1)
5001
0
                continue;
5002
0
            return false;
5003
0
        }
5004
5005
        // Check for potential overflow when casting from float to int, (if
5006
        // operating outside natural projection area, padfX/Y can be a very huge
5007
        // positive number before doing the actual conversion), as such cast is
5008
        // undefined behavior that can trigger exception with some compilers
5009
        // (see #6753)
5010
0
        if (_padfX[_iDstX] + 1e-10 > _nSrcXSize + _poWK->nSrcXOff)
5011
0
        {
5012
            // If the source coordinate is slightly outside of the source raster
5013
            // retry to transform it alone, so that the exact coordinate
5014
            // transformer is used.
5015
0
            if (iTry == 0 && _padfX[_iDstX] < _nSrcXSize + _poWK->nSrcXOff + 1)
5016
0
                continue;
5017
0
            return false;
5018
0
        }
5019
0
        if (_padfY[_iDstX] + 1e-10 > _nSrcYSize + _poWK->nSrcYOff)
5020
0
        {
5021
            // If the source coordinate is slightly outside of the source raster
5022
            // retry to transform it alone, so that the exact coordinate
5023
            // transformer is used.
5024
0
            if (iTry == 0 && _padfY[_iDstX] < _nSrcYSize + _poWK->nSrcYOff + 1)
5025
0
                continue;
5026
0
            return false;
5027
0
        }
5028
5029
0
        break;
5030
0
    }
5031
5032
0
    int iSrcX = static_cast<int>(_padfX[_iDstX] + 1.0e-10) - _poWK->nSrcXOff;
5033
0
    int iSrcY = static_cast<int>(_padfY[_iDstX] + 1.0e-10) - _poWK->nSrcYOff;
5034
0
    if (iSrcX == _nSrcXSize)
5035
0
        iSrcX--;
5036
0
    if (iSrcY == _nSrcYSize)
5037
0
        iSrcY--;
5038
5039
    // Those checks should normally be OK given the previous ones.
5040
0
    CPLAssert(iSrcX >= 0);
5041
0
    CPLAssert(iSrcY >= 0);
5042
0
    CPLAssert(iSrcX < _nSrcXSize);
5043
0
    CPLAssert(iSrcY < _nSrcYSize);
5044
5045
0
    iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * _nSrcXSize;
5046
5047
0
    return true;
5048
0
}
5049
5050
/************************************************************************/
5051
/*                   GWKOneSourceCornerFailsToReproject()               */
5052
/************************************************************************/
5053
5054
static bool GWKOneSourceCornerFailsToReproject(GWKJobStruct *psJob)
5055
0
{
5056
0
    GDALWarpKernel *poWK = psJob->poWK;
5057
0
    for (int iY = 0; iY <= 1; ++iY)
5058
0
    {
5059
0
        for (int iX = 0; iX <= 1; ++iX)
5060
0
        {
5061
0
            double dfXTmp = poWK->nSrcXOff + iX * poWK->nSrcXSize;
5062
0
            double dfYTmp = poWK->nSrcYOff + iY * poWK->nSrcYSize;
5063
0
            double dfZTmp = 0;
5064
0
            int nSuccess = FALSE;
5065
0
            poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp,
5066
0
                                 &dfYTmp, &dfZTmp, &nSuccess);
5067
0
            if (!nSuccess)
5068
0
                return true;
5069
0
        }
5070
0
    }
5071
0
    return false;
5072
0
}
5073
5074
/************************************************************************/
5075
/*                       GWKAdjustSrcOffsetOnEdge()                     */
5076
/************************************************************************/
5077
5078
static bool GWKAdjustSrcOffsetOnEdge(GWKJobStruct *psJob,
5079
                                     GPtrDiff_t &iSrcOffset)
5080
0
{
5081
0
    GDALWarpKernel *poWK = psJob->poWK;
5082
0
    const int nSrcXSize = poWK->nSrcXSize;
5083
0
    const int nSrcYSize = poWK->nSrcYSize;
5084
5085
    // Check if the computed source position slightly altered
5086
    // fails to reproject. If so, then we are at the edge of
5087
    // the validity area, and it is worth checking neighbour
5088
    // source pixels for validity.
5089
0
    int nSuccess = FALSE;
5090
0
    {
5091
0
        double dfXTmp =
5092
0
            poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5093
0
        double dfYTmp =
5094
0
            poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5095
0
        double dfZTmp = 0;
5096
0
        poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5097
0
                             &dfZTmp, &nSuccess);
5098
0
    }
5099
0
    if (nSuccess)
5100
0
    {
5101
0
        double dfXTmp =
5102
0
            poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5103
0
        double dfYTmp =
5104
0
            poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
5105
0
        double dfZTmp = 0;
5106
0
        nSuccess = FALSE;
5107
0
        poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5108
0
                             &dfZTmp, &nSuccess);
5109
0
    }
5110
0
    if (nSuccess)
5111
0
    {
5112
0
        double dfXTmp =
5113
0
            poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
5114
0
        double dfYTmp =
5115
0
            poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5116
0
        double dfZTmp = 0;
5117
0
        nSuccess = FALSE;
5118
0
        poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5119
0
                             &dfZTmp, &nSuccess);
5120
0
    }
5121
5122
0
    if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
5123
0
        CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + 1))
5124
0
    {
5125
0
        iSrcOffset++;
5126
0
        return true;
5127
0
    }
5128
0
    else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
5129
0
             CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + nSrcXSize))
5130
0
    {
5131
0
        iSrcOffset += nSrcXSize;
5132
0
        return true;
5133
0
    }
5134
0
    else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
5135
0
             CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - 1))
5136
0
    {
5137
0
        iSrcOffset--;
5138
0
        return true;
5139
0
    }
5140
0
    else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
5141
0
             CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - nSrcXSize))
5142
0
    {
5143
0
        iSrcOffset -= nSrcXSize;
5144
0
        return true;
5145
0
    }
5146
5147
0
    return false;
5148
0
}
5149
5150
/************************************************************************/
5151
/*                 GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity()          */
5152
/************************************************************************/
5153
5154
static bool GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(GWKJobStruct *psJob,
5155
                                                      GPtrDiff_t &iSrcOffset)
5156
0
{
5157
0
    GDALWarpKernel *poWK = psJob->poWK;
5158
0
    const int nSrcXSize = poWK->nSrcXSize;
5159
0
    const int nSrcYSize = poWK->nSrcYSize;
5160
5161
    // Check if the computed source position slightly altered
5162
    // fails to reproject. If so, then we are at the edge of
5163
    // the validity area, and it is worth checking neighbour
5164
    // source pixels for validity.
5165
0
    int nSuccess = FALSE;
5166
0
    {
5167
0
        double dfXTmp =
5168
0
            poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5169
0
        double dfYTmp =
5170
0
            poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5171
0
        double dfZTmp = 0;
5172
0
        poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5173
0
                             &dfZTmp, &nSuccess);
5174
0
    }
5175
0
    if (nSuccess)
5176
0
    {
5177
0
        double dfXTmp =
5178
0
            poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5179
0
        double dfYTmp =
5180
0
            poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
5181
0
        double dfZTmp = 0;
5182
0
        nSuccess = FALSE;
5183
0
        poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5184
0
                             &dfZTmp, &nSuccess);
5185
0
    }
5186
0
    if (nSuccess)
5187
0
    {
5188
0
        double dfXTmp =
5189
0
            poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
5190
0
        double dfYTmp =
5191
0
            poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5192
0
        double dfZTmp = 0;
5193
0
        nSuccess = FALSE;
5194
0
        poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5195
0
                             &dfZTmp, &nSuccess);
5196
0
    }
5197
5198
0
    if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
5199
0
        poWK->pafUnifiedSrcDensity[iSrcOffset + 1] >= SRC_DENSITY_THRESHOLD)
5200
0
    {
5201
0
        iSrcOffset++;
5202
0
        return true;
5203
0
    }
5204
0
    else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
5205
0
             poWK->pafUnifiedSrcDensity[iSrcOffset + nSrcXSize] >=
5206
0
                 SRC_DENSITY_THRESHOLD)
5207
0
    {
5208
0
        iSrcOffset += nSrcXSize;
5209
0
        return true;
5210
0
    }
5211
0
    else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
5212
0
             poWK->pafUnifiedSrcDensity[iSrcOffset - 1] >=
5213
0
                 SRC_DENSITY_THRESHOLD)
5214
0
    {
5215
0
        iSrcOffset--;
5216
0
        return true;
5217
0
    }
5218
0
    else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
5219
0
             poWK->pafUnifiedSrcDensity[iSrcOffset - nSrcXSize] >=
5220
0
                 SRC_DENSITY_THRESHOLD)
5221
0
    {
5222
0
        iSrcOffset -= nSrcXSize;
5223
0
        return true;
5224
0
    }
5225
5226
0
    return false;
5227
0
}
5228
5229
/************************************************************************/
5230
/*                           GWKGeneralCase()                           */
5231
/*                                                                      */
5232
/*      This is the most general case.  It attempts to handle all       */
5233
/*      possible features with relatively little concern for            */
5234
/*      efficiency.                                                     */
5235
/************************************************************************/
5236
5237
static void GWKGeneralCaseThread(void *pData)
5238
0
{
5239
0
    GWKJobStruct *psJob = reinterpret_cast<GWKJobStruct *>(pData);
5240
0
    GDALWarpKernel *poWK = psJob->poWK;
5241
0
    const int iYMin = psJob->iYMin;
5242
0
    const int iYMax = psJob->iYMax;
5243
0
    const double dfMultFactorVerticalShiftPipeline =
5244
0
        poWK->bApplyVerticalShift
5245
0
            ? CPLAtof(CSLFetchNameValueDef(
5246
0
                  poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
5247
0
                  "1.0"))
5248
0
            : 0.0;
5249
5250
0
    int nDstXSize = poWK->nDstXSize;
5251
0
    int nSrcXSize = poWK->nSrcXSize;
5252
0
    int nSrcYSize = poWK->nSrcYSize;
5253
5254
    /* -------------------------------------------------------------------- */
5255
    /*      Allocate x,y,z coordinate arrays for transformation ... one     */
5256
    /*      scanlines worth of positions.                                   */
5257
    /* -------------------------------------------------------------------- */
5258
    // For x, 2 *, because we cache the precomputed values at the end.
5259
0
    double *padfX =
5260
0
        static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
5261
0
    double *padfY =
5262
0
        static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5263
0
    double *padfZ =
5264
0
        static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5265
0
    int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
5266
5267
0
    const bool bUse4SamplesFormula =
5268
0
        poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
5269
5270
0
    GWKResampleWrkStruct *psWrkStruct = nullptr;
5271
0
    if (poWK->eResample != GRA_NearestNeighbour)
5272
0
    {
5273
0
        psWrkStruct = GWKResampleCreateWrkStruct(poWK);
5274
0
    }
5275
0
    const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
5276
0
        poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
5277
0
    const double dfErrorThreshold = CPLAtof(
5278
0
        CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
5279
5280
0
    const bool bOneSourceCornerFailsToReproject =
5281
0
        GWKOneSourceCornerFailsToReproject(psJob);
5282
5283
    // Precompute values.
5284
0
    for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5285
0
        padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
5286
5287
    /* ==================================================================== */
5288
    /*      Loop over output lines.                                         */
5289
    /* ==================================================================== */
5290
0
    for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
5291
0
    {
5292
        /* --------------------------------------------------------------------
5293
         */
5294
        /*      Setup points to transform to source image space. */
5295
        /* --------------------------------------------------------------------
5296
         */
5297
0
        memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
5298
0
        const double dfY = iDstY + 0.5 + poWK->nDstYOff;
5299
0
        for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5300
0
            padfY[iDstX] = dfY;
5301
0
        memset(padfZ, 0, sizeof(double) * nDstXSize);
5302
5303
        /* --------------------------------------------------------------------
5304
         */
5305
        /*      Transform the points from destination pixel/line coordinates */
5306
        /*      to source pixel/line coordinates. */
5307
        /* --------------------------------------------------------------------
5308
         */
5309
0
        poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
5310
0
                             padfY, padfZ, pabSuccess);
5311
0
        if (dfSrcCoordPrecision > 0.0)
5312
0
        {
5313
0
            GWKRoundSourceCoordinates(
5314
0
                nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
5315
0
                dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
5316
0
                0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
5317
0
        }
5318
5319
        /* ====================================================================
5320
         */
5321
        /*      Loop over pixels in output scanline. */
5322
        /* ====================================================================
5323
         */
5324
0
        for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5325
0
        {
5326
0
            GPtrDiff_t iSrcOffset = 0;
5327
0
            if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
5328
0
                                              padfX, padfY, nSrcXSize,
5329
0
                                              nSrcYSize, iSrcOffset))
5330
0
                continue;
5331
5332
            /* --------------------------------------------------------------------
5333
             */
5334
            /*      Do not try to apply transparent/invalid source pixels to the
5335
             */
5336
            /*      destination.  This currently ignores the multi-pixel input
5337
             */
5338
            /*      of bilinear and cubic resamples. */
5339
            /* --------------------------------------------------------------------
5340
             */
5341
0
            double dfDensity = 1.0;
5342
5343
0
            if (poWK->pafUnifiedSrcDensity != nullptr)
5344
0
            {
5345
0
                dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
5346
0
                if (dfDensity < SRC_DENSITY_THRESHOLD)
5347
0
                {
5348
0
                    if (!bOneSourceCornerFailsToReproject)
5349
0
                    {
5350
0
                        continue;
5351
0
                    }
5352
0
                    else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
5353
0
                                 psJob, iSrcOffset))
5354
0
                    {
5355
0
                        dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
5356
0
                    }
5357
0
                    else
5358
0
                    {
5359
0
                        continue;
5360
0
                    }
5361
0
                }
5362
0
            }
5363
5364
0
            if (poWK->panUnifiedSrcValid != nullptr &&
5365
0
                !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
5366
0
            {
5367
0
                if (!bOneSourceCornerFailsToReproject)
5368
0
                {
5369
0
                    continue;
5370
0
                }
5371
0
                else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
5372
0
                {
5373
0
                    continue;
5374
0
                }
5375
0
            }
5376
5377
            /* ====================================================================
5378
             */
5379
            /*      Loop processing each band. */
5380
            /* ====================================================================
5381
             */
5382
0
            bool bHasFoundDensity = false;
5383
5384
0
            const GPtrDiff_t iDstOffset =
5385
0
                iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
5386
0
            for (int iBand = 0; iBand < poWK->nBands; iBand++)
5387
0
            {
5388
0
                double dfBandDensity = 0.0;
5389
0
                double dfValueReal = 0.0;
5390
0
                double dfValueImag = 0.0;
5391
5392
                /* --------------------------------------------------------------------
5393
                 */
5394
                /*      Collect the source value. */
5395
                /* --------------------------------------------------------------------
5396
                 */
5397
0
                if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
5398
0
                    nSrcYSize == 1)
5399
0
                {
5400
                    // FALSE is returned if dfBandDensity == 0, which is
5401
                    // checked below.
5402
0
                    CPL_IGNORE_RET_VAL(GWKGetPixelValue(
5403
0
                        poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal,
5404
0
                        &dfValueImag));
5405
0
                }
5406
0
                else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
5407
0
                {
5408
0
                    GWKBilinearResample4Sample(
5409
0
                        poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5410
0
                        padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5411
0
                        &dfValueReal, &dfValueImag);
5412
0
                }
5413
0
                else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
5414
0
                {
5415
0
                    GWKCubicResample4Sample(
5416
0
                        poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5417
0
                        padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5418
0
                        &dfValueReal, &dfValueImag);
5419
0
                }
5420
0
                else
5421
0
#ifdef DEBUG
5422
                    // Only useful for clang static analyzer.
5423
0
                    if (psWrkStruct != nullptr)
5424
0
#endif
5425
0
                    {
5426
0
                        psWrkStruct->pfnGWKResample(
5427
0
                            poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5428
0
                            padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5429
0
                            &dfValueReal, &dfValueImag, psWrkStruct);
5430
0
                    }
5431
5432
                // If we didn't find any valid inputs skip to next band.
5433
0
                if (dfBandDensity < BAND_DENSITY_THRESHOLD)
5434
0
                    continue;
5435
5436
0
                if (poWK->bApplyVerticalShift)
5437
0
                {
5438
0
                    if (!std::isfinite(padfZ[iDstX]))
5439
0
                        continue;
5440
                    // Subtract padfZ[] since the coordinate transformation is
5441
                    // from target to source
5442
0
                    dfValueReal =
5443
0
                        dfValueReal * poWK->dfMultFactorVerticalShift -
5444
0
                        padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
5445
0
                }
5446
5447
0
                bHasFoundDensity = true;
5448
5449
                /* --------------------------------------------------------------------
5450
                 */
5451
                /*      We have a computed value from the source.  Now apply it
5452
                 * to      */
5453
                /*      the destination pixel. */
5454
                /* --------------------------------------------------------------------
5455
                 */
5456
0
                GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
5457
0
                                 dfValueReal, dfValueImag);
5458
0
            }
5459
5460
0
            if (!bHasFoundDensity)
5461
0
                continue;
5462
5463
            /* --------------------------------------------------------------------
5464
             */
5465
            /*      Update destination density/validity masks. */
5466
            /* --------------------------------------------------------------------
5467
             */
5468
0
            GWKOverlayDensity(poWK, iDstOffset, dfDensity);
5469
5470
0
            if (poWK->panDstValid != nullptr)
5471
0
            {
5472
0
                CPLMaskSet(poWK->panDstValid, iDstOffset);
5473
0
            }
5474
0
        } /* Next iDstX */
5475
5476
        /* --------------------------------------------------------------------
5477
         */
5478
        /*      Report progress to the user, and optionally cancel out. */
5479
        /* --------------------------------------------------------------------
5480
         */
5481
0
        if (psJob->pfnProgress && psJob->pfnProgress(psJob))
5482
0
            break;
5483
0
    }
5484
5485
    /* -------------------------------------------------------------------- */
5486
    /*      Cleanup and return.                                             */
5487
    /* -------------------------------------------------------------------- */
5488
0
    CPLFree(padfX);
5489
0
    CPLFree(padfY);
5490
0
    CPLFree(padfZ);
5491
0
    CPLFree(pabSuccess);
5492
0
    if (psWrkStruct)
5493
0
        GWKResampleDeleteWrkStruct(psWrkStruct);
5494
0
}
5495
5496
static CPLErr GWKGeneralCase(GDALWarpKernel *poWK)
5497
0
{
5498
0
    return GWKRun(poWK, "GWKGeneralCase", GWKGeneralCaseThread);
5499
0
}
5500
5501
/************************************************************************/
5502
/*                            GWKRealCase()                             */
5503
/*                                                                      */
5504
/*      General case for non-complex data types.                        */
5505
/************************************************************************/
5506
5507
static void GWKRealCaseThread(void *pData)
5508
5509
0
{
5510
0
    GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
5511
0
    GDALWarpKernel *poWK = psJob->poWK;
5512
0
    const int iYMin = psJob->iYMin;
5513
0
    const int iYMax = psJob->iYMax;
5514
5515
0
    const int nDstXSize = poWK->nDstXSize;
5516
0
    const int nSrcXSize = poWK->nSrcXSize;
5517
0
    const int nSrcYSize = poWK->nSrcYSize;
5518
0
    const double dfMultFactorVerticalShiftPipeline =
5519
0
        poWK->bApplyVerticalShift
5520
0
            ? CPLAtof(CSLFetchNameValueDef(
5521
0
                  poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
5522
0
                  "1.0"))
5523
0
            : 0.0;
5524
5525
    /* -------------------------------------------------------------------- */
5526
    /*      Allocate x,y,z coordinate arrays for transformation ... one     */
5527
    /*      scanlines worth of positions.                                   */
5528
    /* -------------------------------------------------------------------- */
5529
5530
    // For x, 2 *, because we cache the precomputed values at the end.
5531
0
    double *padfX =
5532
0
        static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
5533
0
    double *padfY =
5534
0
        static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5535
0
    double *padfZ =
5536
0
        static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5537
0
    int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
5538
5539
0
    const bool bUse4SamplesFormula =
5540
0
        poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
5541
5542
0
    GWKResampleWrkStruct *psWrkStruct = nullptr;
5543
0
    if (poWK->eResample != GRA_NearestNeighbour)
5544
0
    {
5545
0
        psWrkStruct = GWKResampleCreateWrkStruct(poWK);
5546
0
    }
5547
0
    const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
5548
0
        poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
5549
0
    const double dfErrorThreshold = CPLAtof(
5550
0
        CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
5551
5552
0
    const bool bSrcMaskIsDensity = poWK->panUnifiedSrcValid == nullptr &&
5553
0
                                   poWK->papanBandSrcValid == nullptr &&
5554
0
                                   poWK->pafUnifiedSrcDensity != nullptr;
5555
5556
0
    const bool bOneSourceCornerFailsToReproject =
5557
0
        GWKOneSourceCornerFailsToReproject(psJob);
5558
5559
    // Precompute values.
5560
0
    for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5561
0
        padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
5562
5563
    /* ==================================================================== */
5564
    /*      Loop over output lines.                                         */
5565
    /* ==================================================================== */
5566
0
    for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
5567
0
    {
5568
        /* --------------------------------------------------------------------
5569
         */
5570
        /*      Setup points to transform to source image space. */
5571
        /* --------------------------------------------------------------------
5572
         */
5573
0
        memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
5574
0
        const double dfY = iDstY + 0.5 + poWK->nDstYOff;
5575
0
        for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5576
0
            padfY[iDstX] = dfY;
5577
0
        memset(padfZ, 0, sizeof(double) * nDstXSize);
5578
5579
        /* --------------------------------------------------------------------
5580
         */
5581
        /*      Transform the points from destination pixel/line coordinates */
5582
        /*      to source pixel/line coordinates. */
5583
        /* --------------------------------------------------------------------
5584
         */
5585
0
        poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
5586
0
                             padfY, padfZ, pabSuccess);
5587
0
        if (dfSrcCoordPrecision > 0.0)
5588
0
        {
5589
0
            GWKRoundSourceCoordinates(
5590
0
                nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
5591
0
                dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
5592
0
                0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
5593
0
        }
5594
5595
        /* ====================================================================
5596
         */
5597
        /*      Loop over pixels in output scanline. */
5598
        /* ====================================================================
5599
         */
5600
0
        for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5601
0
        {
5602
0
            GPtrDiff_t iSrcOffset = 0;
5603
0
            if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
5604
0
                                              padfX, padfY, nSrcXSize,
5605
0
                                              nSrcYSize, iSrcOffset))
5606
0
                continue;
5607
5608
            /* --------------------------------------------------------------------
5609
             */
5610
            /*      Do not try to apply transparent/invalid source pixels to the
5611
             */
5612
            /*      destination.  This currently ignores the multi-pixel input
5613
             */
5614
            /*      of bilinear and cubic resamples. */
5615
            /* --------------------------------------------------------------------
5616
             */
5617
0
            double dfDensity = 1.0;
5618
5619
0
            if (poWK->pafUnifiedSrcDensity != nullptr)
5620
0
            {
5621
0
                dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
5622
0
                if (dfDensity < SRC_DENSITY_THRESHOLD)
5623
0
                {
5624
0
                    if (!bOneSourceCornerFailsToReproject)
5625
0
                    {
5626
0
                        continue;
5627
0
                    }
5628
0
                    else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
5629
0
                                 psJob, iSrcOffset))
5630
0
                    {
5631
0
                        dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
5632
0
                    }
5633
0
                    else
5634
0
                    {
5635
0
                        continue;
5636
0
                    }
5637
0
                }
5638
0
            }
5639
5640
0
            if (poWK->panUnifiedSrcValid != nullptr &&
5641
0
                !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
5642
0
            {
5643
0
                if (!bOneSourceCornerFailsToReproject)
5644
0
                {
5645
0
                    continue;
5646
0
                }
5647
0
                else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
5648
0
                {
5649
0
                    continue;
5650
0
                }
5651
0
            }
5652
5653
            /* ====================================================================
5654
             */
5655
            /*      Loop processing each band. */
5656
            /* ====================================================================
5657
             */
5658
0
            bool bHasFoundDensity = false;
5659
5660
0
            const GPtrDiff_t iDstOffset =
5661
0
                iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
5662
0
            for (int iBand = 0; iBand < poWK->nBands; iBand++)
5663
0
            {
5664
0
                double dfBandDensity = 0.0;
5665
0
                double dfValueReal = 0.0;
5666
5667
                /* --------------------------------------------------------------------
5668
                 */
5669
                /*      Collect the source value. */
5670
                /* --------------------------------------------------------------------
5671
                 */
5672
0
                if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
5673
0
                    nSrcYSize == 1)
5674
0
                {
5675
                    // FALSE is returned if dfBandDensity == 0, which is
5676
                    // checked below.
5677
0
                    CPL_IGNORE_RET_VAL(GWKGetPixelValueReal(
5678
0
                        poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal));
5679
0
                }
5680
0
                else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
5681
0
                {
5682
0
                    double dfValueImagIgnored = 0.0;
5683
0
                    GWKBilinearResample4Sample(
5684
0
                        poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5685
0
                        padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5686
0
                        &dfValueReal, &dfValueImagIgnored);
5687
0
                }
5688
0
                else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
5689
0
                {
5690
0
                    if (bSrcMaskIsDensity)
5691
0
                    {
5692
0
                        if (poWK->eWorkingDataType == GDT_Byte)
5693
0
                        {
5694
0
                            GWKCubicResampleSrcMaskIsDensity4SampleRealT<GByte>(
5695
0
                                poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5696
0
                                padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5697
0
                                &dfValueReal);
5698
0
                        }
5699
0
                        else if (poWK->eWorkingDataType == GDT_UInt16)
5700
0
                        {
5701
0
                            GWKCubicResampleSrcMaskIsDensity4SampleRealT<
5702
0
                                GUInt16>(poWK, iBand,
5703
0
                                         padfX[iDstX] - poWK->nSrcXOff,
5704
0
                                         padfY[iDstX] - poWK->nSrcYOff,
5705
0
                                         &dfBandDensity, &dfValueReal);
5706
0
                        }
5707
0
                        else
5708
0
                        {
5709
0
                            GWKCubicResampleSrcMaskIsDensity4SampleReal(
5710
0
                                poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5711
0
                                padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5712
0
                                &dfValueReal);
5713
0
                        }
5714
0
                    }
5715
0
                    else
5716
0
                    {
5717
0
                        double dfValueImagIgnored = 0.0;
5718
0
                        GWKCubicResample4Sample(
5719
0
                            poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5720
0
                            padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5721
0
                            &dfValueReal, &dfValueImagIgnored);
5722
0
                    }
5723
0
                }
5724
0
                else
5725
0
#ifdef DEBUG
5726
                    // Only useful for clang static analyzer.
5727
0
                    if (psWrkStruct != nullptr)
5728
0
#endif
5729
0
                    {
5730
0
                        double dfValueImagIgnored = 0.0;
5731
0
                        psWrkStruct->pfnGWKResample(
5732
0
                            poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5733
0
                            padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5734
0
                            &dfValueReal, &dfValueImagIgnored, psWrkStruct);
5735
0
                    }
5736
5737
                // If we didn't find any valid inputs skip to next band.
5738
0
                if (dfBandDensity < BAND_DENSITY_THRESHOLD)
5739
0
                    continue;
5740
5741
0
                if (poWK->bApplyVerticalShift)
5742
0
                {
5743
0
                    if (!std::isfinite(padfZ[iDstX]))
5744
0
                        continue;
5745
                    // Subtract padfZ[] since the coordinate transformation is
5746
                    // from target to source
5747
0
                    dfValueReal =
5748
0
                        dfValueReal * poWK->dfMultFactorVerticalShift -
5749
0
                        padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
5750
0
                }
5751
5752
0
                bHasFoundDensity = true;
5753
5754
                /* --------------------------------------------------------------------
5755
                 */
5756
                /*      We have a computed value from the source.  Now apply it
5757
                 * to      */
5758
                /*      the destination pixel. */
5759
                /* --------------------------------------------------------------------
5760
                 */
5761
0
                GWKSetPixelValueReal(poWK, iBand, iDstOffset, dfBandDensity,
5762
0
                                     dfValueReal);
5763
0
            }
5764
5765
0
            if (!bHasFoundDensity)
5766
0
                continue;
5767
5768
            /* --------------------------------------------------------------------
5769
             */
5770
            /*      Update destination density/validity masks. */
5771
            /* --------------------------------------------------------------------
5772
             */
5773
0
            GWKOverlayDensity(poWK, iDstOffset, dfDensity);
5774
5775
0
            if (poWK->panDstValid != nullptr)
5776
0
            {
5777
0
                CPLMaskSet(poWK->panDstValid, iDstOffset);
5778
0
            }
5779
0
        }  // Next iDstX.
5780
5781
        /* --------------------------------------------------------------------
5782
         */
5783
        /*      Report progress to the user, and optionally cancel out. */
5784
        /* --------------------------------------------------------------------
5785
         */
5786
0
        if (psJob->pfnProgress && psJob->pfnProgress(psJob))
5787
0
            break;
5788
0
    }
5789
5790
    /* -------------------------------------------------------------------- */
5791
    /*      Cleanup and return.                                             */
5792
    /* -------------------------------------------------------------------- */
5793
0
    CPLFree(padfX);
5794
0
    CPLFree(padfY);
5795
0
    CPLFree(padfZ);
5796
0
    CPLFree(pabSuccess);
5797
0
    if (psWrkStruct)
5798
0
        GWKResampleDeleteWrkStruct(psWrkStruct);
5799
0
}
5800
5801
static CPLErr GWKRealCase(GDALWarpKernel *poWK)
5802
0
{
5803
0
    return GWKRun(poWK, "GWKRealCase", GWKRealCaseThread);
5804
0
}
5805
5806
/************************************************************************/
5807
/*                 GWKCubicResampleNoMasks4MultiBandT()                 */
5808
/************************************************************************/
5809
5810
/* We restrict to 64bit processors because they are guaranteed to have SSE2 */
5811
/* and enough SSE registries */
5812
#if defined(USE_SSE2)
5813
5814
static inline float Convolute4x4(const __m128 row0, const __m128 row1,
5815
                                 const __m128 row2, const __m128 row3,
5816
                                 const __m128 weightsXY0,
5817
                                 const __m128 weightsXY1,
5818
                                 const __m128 weightsXY2,
5819
                                 const __m128 weightsXY3)
5820
0
{
5821
0
    return XMMHorizontalAdd(_mm_add_ps(
5822
0
        _mm_add_ps(_mm_mul_ps(row0, weightsXY0), _mm_mul_ps(row1, weightsXY1)),
5823
0
        _mm_add_ps(_mm_mul_ps(row2, weightsXY2),
5824
0
                   _mm_mul_ps(row3, weightsXY3))));
5825
0
}
5826
5827
template <class T>
5828
static void GWKCubicResampleNoMasks4MultiBandT(const GDALWarpKernel *poWK,
5829
                                               double dfSrcX, double dfSrcY,
5830
                                               const GPtrDiff_t iDstOffset)
5831
0
{
5832
0
    const double dfSrcXShifted = dfSrcX - 0.5;
5833
0
    const int iSrcX = static_cast<int>(dfSrcXShifted);
5834
0
    const double dfSrcYShifted = dfSrcY - 0.5;
5835
0
    const int iSrcY = static_cast<int>(dfSrcYShifted);
5836
0
    const GPtrDiff_t iSrcOffset =
5837
0
        iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
5838
5839
    // Get the bilinear interpolation at the image borders.
5840
0
    if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
5841
0
        iSrcY + 2 >= poWK->nSrcYSize)
5842
0
    {
5843
0
        for (int iBand = 0; iBand < poWK->nBands; iBand++)
5844
0
        {
5845
0
            T value;
5846
0
            GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
5847
0
                                               &value);
5848
0
            reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
5849
0
                value;
5850
0
        }
5851
0
    }
5852
0
    else
5853
0
    {
5854
0
        const float fDeltaX = static_cast<float>(dfSrcXShifted) - iSrcX;
5855
0
        const float fDeltaY = static_cast<float>(dfSrcYShifted) - iSrcY;
5856
5857
0
        float afCoeffsX[4];
5858
0
        float afCoeffsY[4];
5859
0
        GWKCubicComputeWeights(fDeltaX, afCoeffsX);
5860
0
        GWKCubicComputeWeights(fDeltaY, afCoeffsY);
5861
0
        const auto weightsX = _mm_loadu_ps(afCoeffsX);
5862
0
        const auto weightsXY0 =
5863
0
            _mm_mul_ps(_mm_load1_ps(&afCoeffsY[0]), weightsX);
5864
0
        const auto weightsXY1 =
5865
0
            _mm_mul_ps(_mm_load1_ps(&afCoeffsY[1]), weightsX);
5866
0
        const auto weightsXY2 =
5867
0
            _mm_mul_ps(_mm_load1_ps(&afCoeffsY[2]), weightsX);
5868
0
        const auto weightsXY3 =
5869
0
            _mm_mul_ps(_mm_load1_ps(&afCoeffsY[3]), weightsX);
5870
5871
0
        const GPtrDiff_t iOffset = iSrcOffset - poWK->nSrcXSize - 1;
5872
5873
0
        int iBand = 0;
5874
        // Process 2 bands at a time
5875
0
        for (; iBand + 1 < poWK->nBands; iBand += 2)
5876
0
        {
5877
0
            const T *CPL_RESTRICT pBand0 =
5878
0
                reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
5879
0
            const auto row0_0 = XMMLoad4Values(pBand0 + iOffset);
5880
0
            const auto row1_0 =
5881
0
                XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
5882
0
            const auto row2_0 =
5883
0
                XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
5884
0
            const auto row3_0 =
5885
0
                XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
5886
5887
0
            const T *CPL_RESTRICT pBand1 =
5888
0
                reinterpret_cast<const T *>(poWK->papabySrcImage[iBand + 1]);
5889
0
            const auto row0_1 = XMMLoad4Values(pBand1 + iOffset);
5890
0
            const auto row1_1 =
5891
0
                XMMLoad4Values(pBand1 + iOffset + poWK->nSrcXSize);
5892
0
            const auto row2_1 =
5893
0
                XMMLoad4Values(pBand1 + iOffset + 2 * poWK->nSrcXSize);
5894
0
            const auto row3_1 =
5895
0
                XMMLoad4Values(pBand1 + iOffset + 3 * poWK->nSrcXSize);
5896
5897
0
            const float fValue_0 =
5898
0
                Convolute4x4(row0_0, row1_0, row2_0, row3_0, weightsXY0,
5899
0
                             weightsXY1, weightsXY2, weightsXY3);
5900
5901
0
            const float fValue_1 =
5902
0
                Convolute4x4(row0_1, row1_1, row2_1, row3_1, weightsXY0,
5903
0
                             weightsXY1, weightsXY2, weightsXY3);
5904
5905
0
            T *CPL_RESTRICT pDstBand0 =
5906
0
                reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
5907
0
            pDstBand0[iDstOffset] = GWKClampValueT<T>(fValue_0);
5908
5909
0
            T *CPL_RESTRICT pDstBand1 =
5910
0
                reinterpret_cast<T *>(poWK->papabyDstImage[iBand + 1]);
5911
0
            pDstBand1[iDstOffset] = GWKClampValueT<T>(fValue_1);
5912
0
        }
5913
0
        if (iBand < poWK->nBands)
5914
0
        {
5915
0
            const T *CPL_RESTRICT pBand0 =
5916
0
                reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
5917
0
            const auto row0 = XMMLoad4Values(pBand0 + iOffset);
5918
0
            const auto row1 =
5919
0
                XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
5920
0
            const auto row2 =
5921
0
                XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
5922
0
            const auto row3 =
5923
0
                XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
5924
5925
0
            const float fValue =
5926
0
                Convolute4x4(row0, row1, row2, row3, weightsXY0, weightsXY1,
5927
0
                             weightsXY2, weightsXY3);
5928
5929
0
            T *CPL_RESTRICT pDstBand =
5930
0
                reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
5931
0
            pDstBand[iDstOffset] = GWKClampValueT<T>(fValue);
5932
0
        }
5933
0
    }
5934
5935
0
    if (poWK->pafDstDensity)
5936
0
        poWK->pafDstDensity[iDstOffset] = 1.0f;
5937
0
}
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKCubicResampleNoMasks4MultiBandT<unsigned char>(GDALWarpKernel const*, double, double, long long)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKCubicResampleNoMasks4MultiBandT<unsigned short>(GDALWarpKernel const*, double, double, long long)
5938
5939
#endif  // defined(USE_SSE2)
5940
5941
/************************************************************************/
5942
/*                GWKResampleNoMasksOrDstDensityOnlyThreadInternal()    */
5943
/************************************************************************/
5944
5945
template <class T, GDALResampleAlg eResample, int bUse4SamplesFormula>
5946
static void GWKResampleNoMasksOrDstDensityOnlyThreadInternal(void *pData)
5947
5948
0
{
5949
0
    GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
5950
0
    GDALWarpKernel *poWK = psJob->poWK;
5951
0
    const int iYMin = psJob->iYMin;
5952
0
    const int iYMax = psJob->iYMax;
5953
0
    const double dfMultFactorVerticalShiftPipeline =
5954
0
        poWK->bApplyVerticalShift
5955
0
            ? CPLAtof(CSLFetchNameValueDef(
5956
0
                  poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
5957
0
                  "1.0"))
5958
0
            : 0.0;
5959
5960
0
    const int nDstXSize = poWK->nDstXSize;
5961
0
    const int nSrcXSize = poWK->nSrcXSize;
5962
0
    const int nSrcYSize = poWK->nSrcYSize;
5963
5964
    /* -------------------------------------------------------------------- */
5965
    /*      Allocate x,y,z coordinate arrays for transformation ... one     */
5966
    /*      scanlines worth of positions.                                   */
5967
    /* -------------------------------------------------------------------- */
5968
5969
    // For x, 2 *, because we cache the precomputed values at the end.
5970
0
    double *padfX =
5971
0
        static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
5972
0
    double *padfY =
5973
0
        static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5974
0
    double *padfZ =
5975
0
        static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5976
0
    int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
5977
5978
0
    const int nXRadius = poWK->nXRadius;
5979
0
    double *padfWeightsX =
5980
0
        static_cast<double *>(CPLCalloc(1 + nXRadius * 2, sizeof(double)));
5981
0
    double *padfWeightsY = static_cast<double *>(
5982
0
        CPLCalloc(1 + poWK->nYRadius * 2, sizeof(double)));
5983
0
    const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
5984
0
        poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
5985
0
    const double dfErrorThreshold = CPLAtof(
5986
0
        CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
5987
5988
    // Precompute values.
5989
0
    for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5990
0
        padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
5991
5992
    /* ==================================================================== */
5993
    /*      Loop over output lines.                                         */
5994
    /* ==================================================================== */
5995
0
    for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
5996
0
    {
5997
        /* --------------------------------------------------------------------
5998
         */
5999
        /*      Setup points to transform to source image space. */
6000
        /* --------------------------------------------------------------------
6001
         */
6002
0
        memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
6003
0
        const double dfY = iDstY + 0.5 + poWK->nDstYOff;
6004
0
        for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6005
0
            padfY[iDstX] = dfY;
6006
0
        memset(padfZ, 0, sizeof(double) * nDstXSize);
6007
6008
        /* --------------------------------------------------------------------
6009
         */
6010
        /*      Transform the points from destination pixel/line coordinates */
6011
        /*      to source pixel/line coordinates. */
6012
        /* --------------------------------------------------------------------
6013
         */
6014
0
        poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
6015
0
                             padfY, padfZ, pabSuccess);
6016
0
        if (dfSrcCoordPrecision > 0.0)
6017
0
        {
6018
0
            GWKRoundSourceCoordinates(
6019
0
                nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
6020
0
                dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
6021
0
                0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
6022
0
        }
6023
6024
        /* ====================================================================
6025
         */
6026
        /*      Loop over pixels in output scanline. */
6027
        /* ====================================================================
6028
         */
6029
0
        for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6030
0
        {
6031
0
            GPtrDiff_t iSrcOffset = 0;
6032
0
            if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
6033
0
                                              padfX, padfY, nSrcXSize,
6034
0
                                              nSrcYSize, iSrcOffset))
6035
0
                continue;
6036
6037
            /* ====================================================================
6038
             */
6039
            /*      Loop processing each band. */
6040
            /* ====================================================================
6041
             */
6042
0
            const GPtrDiff_t iDstOffset =
6043
0
                iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
6044
6045
0
#if defined(USE_SSE2)
6046
            if constexpr (bUse4SamplesFormula && eResample == GRA_Cubic &&
6047
                          (std::is_same<T, GByte>::value ||
6048
                           std::is_same<T, GUInt16>::value))
6049
0
            {
6050
0
                if (poWK->nBands > 1 && !poWK->bApplyVerticalShift)
6051
0
                {
6052
0
                    GWKCubicResampleNoMasks4MultiBandT<T>(
6053
0
                        poWK, padfX[iDstX] - poWK->nSrcXOff,
6054
0
                        padfY[iDstX] - poWK->nSrcYOff, iDstOffset);
6055
6056
0
                    continue;
6057
0
                }
6058
0
            }
6059
0
#endif  // defined(USE_SSE2)
6060
6061
0
            [[maybe_unused]] double dfInvWeights = 0;
6062
0
            for (int iBand = 0; iBand < poWK->nBands; iBand++)
6063
0
            {
6064
0
                T value = 0;
6065
                if constexpr (eResample == GRA_NearestNeighbour)
6066
0
                {
6067
0
                    value = reinterpret_cast<T *>(
6068
0
                        poWK->papabySrcImage[iBand])[iSrcOffset];
6069
                }
6070
                else if constexpr (bUse4SamplesFormula)
6071
0
                {
6072
                    if constexpr (eResample == GRA_Bilinear)
6073
0
                        GWKBilinearResampleNoMasks4SampleT(
6074
0
                            poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6075
                            padfY[iDstX] - poWK->nSrcYOff, &value);
6076
                    else
6077
0
                        GWKCubicResampleNoMasks4SampleT(
6078
0
                            poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6079
0
                            padfY[iDstX] - poWK->nSrcYOff, &value);
6080
                }
6081
                else
6082
0
                {
6083
0
                    GWKResampleNoMasksT(
6084
0
                        poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6085
0
                        padfY[iDstX] - poWK->nSrcYOff, &value, padfWeightsX,
6086
0
                        padfWeightsY, dfInvWeights);
6087
0
                }
6088
6089
0
                if (poWK->bApplyVerticalShift)
6090
0
                {
6091
0
                    if (!std::isfinite(padfZ[iDstX]))
6092
0
                        continue;
6093
                    // Subtract padfZ[] since the coordinate transformation is
6094
                    // from target to source
6095
0
                    value = GWKClampValueT<T>(
6096
0
                        value * poWK->dfMultFactorVerticalShift -
6097
0
                        padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
6098
0
                }
6099
6100
0
                if (poWK->pafDstDensity)
6101
0
                    poWK->pafDstDensity[iDstOffset] = 1.0f;
6102
6103
0
                reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
6104
0
                    value;
6105
0
            }
6106
0
        }
6107
6108
        /* --------------------------------------------------------------------
6109
         */
6110
        /*      Report progress to the user, and optionally cancel out. */
6111
        /* --------------------------------------------------------------------
6112
         */
6113
0
        if (psJob->pfnProgress && psJob->pfnProgress(psJob))
6114
0
            break;
6115
0
    }
6116
6117
    /* -------------------------------------------------------------------- */
6118
    /*      Cleanup and return.                                             */
6119
    /* -------------------------------------------------------------------- */
6120
0
    CPLFree(padfX);
6121
0
    CPLFree(padfY);
6122
0
    CPLFree(padfZ);
6123
0
    CPLFree(pabSuccess);
6124
0
    CPLFree(padfWeightsX);
6125
0
    CPLFree(padfWeightsY);
6126
0
}
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<unsigned char, (GDALResampleAlg)0, 0>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<unsigned char, (GDALResampleAlg)1, 1>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<unsigned char, (GDALResampleAlg)1, 0>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<unsigned char, (GDALResampleAlg)2, 1>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<unsigned char, (GDALResampleAlg)2, 0>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<float, (GDALResampleAlg)2, 1>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<float, (GDALResampleAlg)2, 0>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<unsigned char, (GDALResampleAlg)3, 0>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<short, (GDALResampleAlg)0, 0>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<short, (GDALResampleAlg)1, 1>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<short, (GDALResampleAlg)1, 0>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<unsigned short, (GDALResampleAlg)1, 1>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<unsigned short, (GDALResampleAlg)1, 0>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<float, (GDALResampleAlg)1, 1>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<float, (GDALResampleAlg)1, 0>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<short, (GDALResampleAlg)2, 1>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<short, (GDALResampleAlg)2, 0>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<unsigned short, (GDALResampleAlg)2, 1>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<unsigned short, (GDALResampleAlg)2, 0>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<short, (GDALResampleAlg)3, 0>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<unsigned short, (GDALResampleAlg)3, 0>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<float, (GDALResampleAlg)0, 0>(void*)
6127
6128
template <class T, GDALResampleAlg eResample>
6129
static void GWKResampleNoMasksOrDstDensityOnlyThread(void *pData)
6130
0
{
6131
0
    GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
6132
0
        pData);
6133
0
}
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThread<unsigned char, (GDALResampleAlg)0>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThread<unsigned char, (GDALResampleAlg)3>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThread<short, (GDALResampleAlg)0>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThread<short, (GDALResampleAlg)3>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThread<unsigned short, (GDALResampleAlg)3>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThread<float, (GDALResampleAlg)0>(void*)
6134
6135
template <class T, GDALResampleAlg eResample>
6136
static void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread(void *pData)
6137
6138
0
{
6139
0
    GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
6140
0
    GDALWarpKernel *poWK = psJob->poWK;
6141
0
    static_assert(eResample == GRA_Bilinear || eResample == GRA_Cubic);
6142
0
    const bool bUse4SamplesFormula =
6143
0
        poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
6144
0
    if (bUse4SamplesFormula)
6145
0
        GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, TRUE>(
6146
0
            pData);
6147
0
    else
6148
0
        GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
6149
0
            pData);
6150
0
}
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<unsigned char, (GDALResampleAlg)1>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<unsigned char, (GDALResampleAlg)2>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float, (GDALResampleAlg)2>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<short, (GDALResampleAlg)1>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<unsigned short, (GDALResampleAlg)1>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float, (GDALResampleAlg)1>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<short, (GDALResampleAlg)2>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<unsigned short, (GDALResampleAlg)2>(void*)
6151
6152
static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6153
0
{
6154
0
    return GWKRun(
6155
0
        poWK, "GWKNearestNoMasksOrDstDensityOnlyByte",
6156
0
        GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_NearestNeighbour>);
6157
0
}
6158
6159
static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6160
0
{
6161
0
    return GWKRun(
6162
0
        poWK, "GWKBilinearNoMasksOrDstDensityOnlyByte",
6163
0
        GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte,
6164
0
                                                           GRA_Bilinear>);
6165
0
}
6166
6167
static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6168
0
{
6169
0
    return GWKRun(
6170
0
        poWK, "GWKCubicNoMasksOrDstDensityOnlyByte",
6171
0
        GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte, GRA_Cubic>);
6172
0
}
6173
6174
static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
6175
0
{
6176
0
    return GWKRun(
6177
0
        poWK, "GWKCubicNoMasksOrDstDensityOnlyFloat",
6178
0
        GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float, GRA_Cubic>);
6179
0
}
6180
6181
#ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
6182
6183
static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
6184
{
6185
    return GWKRun(
6186
        poWK, "GWKCubicNoMasksOrDstDensityOnlyDouble",
6187
        GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double, GRA_Cubic>);
6188
}
6189
#endif
6190
6191
static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6192
0
{
6193
0
    return GWKRun(
6194
0
        poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyByte",
6195
0
        GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_CubicSpline>);
6196
0
}
6197
6198
/************************************************************************/
6199
/*                          GWKNearestByte()                            */
6200
/*                                                                      */
6201
/*      Case for 8bit input data with nearest neighbour resampling      */
6202
/*      using valid flags. Should be as fast as possible for this       */
6203
/*      particular transformation type.                                 */
6204
/************************************************************************/
6205
6206
template <class T> static void GWKNearestThread(void *pData)
6207
6208
0
{
6209
0
    GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
6210
0
    GDALWarpKernel *poWK = psJob->poWK;
6211
0
    const int iYMin = psJob->iYMin;
6212
0
    const int iYMax = psJob->iYMax;
6213
0
    const double dfMultFactorVerticalShiftPipeline =
6214
0
        poWK->bApplyVerticalShift
6215
0
            ? CPLAtof(CSLFetchNameValueDef(
6216
0
                  poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
6217
0
                  "1.0"))
6218
0
            : 0.0;
6219
6220
0
    const int nDstXSize = poWK->nDstXSize;
6221
0
    const int nSrcXSize = poWK->nSrcXSize;
6222
0
    const int nSrcYSize = poWK->nSrcYSize;
6223
6224
    /* -------------------------------------------------------------------- */
6225
    /*      Allocate x,y,z coordinate arrays for transformation ... one     */
6226
    /*      scanlines worth of positions.                                   */
6227
    /* -------------------------------------------------------------------- */
6228
6229
    // For x, 2 *, because we cache the precomputed values at the end.
6230
0
    double *padfX =
6231
0
        static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
6232
0
    double *padfY =
6233
0
        static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6234
0
    double *padfZ =
6235
0
        static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6236
0
    int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
6237
6238
0
    const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
6239
0
        poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
6240
0
    const double dfErrorThreshold = CPLAtof(
6241
0
        CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
6242
6243
0
    const bool bOneSourceCornerFailsToReproject =
6244
0
        GWKOneSourceCornerFailsToReproject(psJob);
6245
6246
    // Precompute values.
6247
0
    for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6248
0
        padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
6249
6250
    /* ==================================================================== */
6251
    /*      Loop over output lines.                                         */
6252
    /* ==================================================================== */
6253
0
    for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
6254
0
    {
6255
6256
        /* --------------------------------------------------------------------
6257
         */
6258
        /*      Setup points to transform to source image space. */
6259
        /* --------------------------------------------------------------------
6260
         */
6261
0
        memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
6262
0
        const double dfY = iDstY + 0.5 + poWK->nDstYOff;
6263
0
        for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6264
0
            padfY[iDstX] = dfY;
6265
0
        memset(padfZ, 0, sizeof(double) * nDstXSize);
6266
6267
        /* --------------------------------------------------------------------
6268
         */
6269
        /*      Transform the points from destination pixel/line coordinates */
6270
        /*      to source pixel/line coordinates. */
6271
        /* --------------------------------------------------------------------
6272
         */
6273
0
        poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
6274
0
                             padfY, padfZ, pabSuccess);
6275
0
        if (dfSrcCoordPrecision > 0.0)
6276
0
        {
6277
0
            GWKRoundSourceCoordinates(
6278
0
                nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
6279
0
                dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
6280
0
                0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
6281
0
        }
6282
        /* ====================================================================
6283
         */
6284
        /*      Loop over pixels in output scanline. */
6285
        /* ====================================================================
6286
         */
6287
0
        for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6288
0
        {
6289
0
            GPtrDiff_t iSrcOffset = 0;
6290
0
            if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
6291
0
                                              padfX, padfY, nSrcXSize,
6292
0
                                              nSrcYSize, iSrcOffset))
6293
0
                continue;
6294
6295
            /* --------------------------------------------------------------------
6296
             */
6297
            /*      Do not try to apply invalid source pixels to the dest. */
6298
            /* --------------------------------------------------------------------
6299
             */
6300
0
            if (poWK->panUnifiedSrcValid != nullptr &&
6301
0
                !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
6302
0
            {
6303
0
                if (!bOneSourceCornerFailsToReproject)
6304
0
                {
6305
0
                    continue;
6306
0
                }
6307
0
                else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
6308
0
                {
6309
0
                    continue;
6310
0
                }
6311
0
            }
6312
6313
            /* --------------------------------------------------------------------
6314
             */
6315
            /*      Do not try to apply transparent source pixels to the
6316
             * destination.*/
6317
            /* --------------------------------------------------------------------
6318
             */
6319
0
            double dfDensity = 1.0;
6320
6321
0
            if (poWK->pafUnifiedSrcDensity != nullptr)
6322
0
            {
6323
0
                dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
6324
0
                if (dfDensity < SRC_DENSITY_THRESHOLD)
6325
0
                    continue;
6326
0
            }
6327
6328
            /* ====================================================================
6329
             */
6330
            /*      Loop processing each band. */
6331
            /* ====================================================================
6332
             */
6333
6334
0
            const GPtrDiff_t iDstOffset =
6335
0
                iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
6336
6337
0
            for (int iBand = 0; iBand < poWK->nBands; iBand++)
6338
0
            {
6339
0
                T value = 0;
6340
0
                double dfBandDensity = 0.0;
6341
6342
                /* --------------------------------------------------------------------
6343
                 */
6344
                /*      Collect the source value. */
6345
                /* --------------------------------------------------------------------
6346
                 */
6347
0
                if (GWKGetPixelT(poWK, iBand, iSrcOffset, &dfBandDensity,
6348
0
                                 &value))
6349
0
                {
6350
6351
0
                    if (poWK->bApplyVerticalShift)
6352
0
                    {
6353
0
                        if (!std::isfinite(padfZ[iDstX]))
6354
0
                            continue;
6355
                        // Subtract padfZ[] since the coordinate transformation
6356
                        // is from target to source
6357
0
                        value = GWKClampValueT<T>(
6358
0
                            value * poWK->dfMultFactorVerticalShift -
6359
0
                            padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
6360
0
                    }
6361
6362
0
                    GWKSetPixelValueRealT(poWK, iBand, iDstOffset,
6363
0
                                          dfBandDensity, value);
6364
0
                }
6365
0
            }
6366
6367
            /* --------------------------------------------------------------------
6368
             */
6369
            /*      Mark this pixel valid/opaque in the output. */
6370
            /* --------------------------------------------------------------------
6371
             */
6372
0
            GWKOverlayDensity(poWK, iDstOffset, dfDensity);
6373
6374
0
            if (poWK->panDstValid != nullptr)
6375
0
            {
6376
0
                CPLMaskSet(poWK->panDstValid, iDstOffset);
6377
0
            }
6378
0
        } /* Next iDstX */
6379
6380
        /* --------------------------------------------------------------------
6381
         */
6382
        /*      Report progress to the user, and optionally cancel out. */
6383
        /* --------------------------------------------------------------------
6384
         */
6385
0
        if (psJob->pfnProgress && psJob->pfnProgress(psJob))
6386
0
            break;
6387
0
    }
6388
6389
    /* -------------------------------------------------------------------- */
6390
    /*      Cleanup and return.                                             */
6391
    /* -------------------------------------------------------------------- */
6392
0
    CPLFree(padfX);
6393
0
    CPLFree(padfY);
6394
0
    CPLFree(padfZ);
6395
0
    CPLFree(pabSuccess);
6396
0
}
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKNearestThread<unsigned char>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKNearestThread<short>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKNearestThread<unsigned short>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKNearestThread<float>(void*)
6397
6398
static CPLErr GWKNearestByte(GDALWarpKernel *poWK)
6399
0
{
6400
0
    return GWKRun(poWK, "GWKNearestByte", GWKNearestThread<GByte>);
6401
0
}
6402
6403
static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6404
0
{
6405
0
    return GWKRun(
6406
0
        poWK, "GWKNearestNoMasksOrDstDensityOnlyShort",
6407
0
        GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_NearestNeighbour>);
6408
0
}
6409
6410
static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6411
0
{
6412
0
    return GWKRun(
6413
0
        poWK, "GWKBilinearNoMasksOrDstDensityOnlyShort",
6414
0
        GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16,
6415
0
                                                           GRA_Bilinear>);
6416
0
}
6417
6418
static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
6419
0
{
6420
0
    return GWKRun(
6421
0
        poWK, "GWKBilinearNoMasksOrDstDensityOnlyUShort",
6422
0
        GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16,
6423
0
                                                           GRA_Bilinear>);
6424
0
}
6425
6426
static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
6427
0
{
6428
0
    return GWKRun(
6429
0
        poWK, "GWKBilinearNoMasksOrDstDensityOnlyFloat",
6430
0
        GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float,
6431
0
                                                           GRA_Bilinear>);
6432
0
}
6433
6434
#ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
6435
6436
static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
6437
{
6438
    return GWKRun(
6439
        poWK, "GWKBilinearNoMasksOrDstDensityOnlyDouble",
6440
        GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double,
6441
                                                           GRA_Bilinear>);
6442
}
6443
#endif
6444
6445
static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6446
0
{
6447
0
    return GWKRun(
6448
0
        poWK, "GWKCubicNoMasksOrDstDensityOnlyShort",
6449
0
        GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16, GRA_Cubic>);
6450
0
}
6451
6452
static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
6453
0
{
6454
0
    return GWKRun(
6455
0
        poWK, "GWKCubicNoMasksOrDstDensityOnlyUShort",
6456
0
        GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16, GRA_Cubic>);
6457
0
}
6458
6459
static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6460
0
{
6461
0
    return GWKRun(
6462
0
        poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyShort",
6463
0
        GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_CubicSpline>);
6464
0
}
6465
6466
static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
6467
0
{
6468
0
    return GWKRun(
6469
0
        poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyUShort",
6470
0
        GWKResampleNoMasksOrDstDensityOnlyThread<GUInt16, GRA_CubicSpline>);
6471
0
}
6472
6473
static CPLErr GWKNearestShort(GDALWarpKernel *poWK)
6474
0
{
6475
0
    return GWKRun(poWK, "GWKNearestShort", GWKNearestThread<GInt16>);
6476
0
}
6477
6478
static CPLErr GWKNearestUnsignedShort(GDALWarpKernel *poWK)
6479
0
{
6480
0
    return GWKRun(poWK, "GWKNearestUnsignedShort", GWKNearestThread<GUInt16>);
6481
0
}
6482
6483
static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
6484
0
{
6485
0
    return GWKRun(
6486
0
        poWK, "GWKNearestNoMasksOrDstDensityOnlyFloat",
6487
0
        GWKResampleNoMasksOrDstDensityOnlyThread<float, GRA_NearestNeighbour>);
6488
0
}
6489
6490
static CPLErr GWKNearestFloat(GDALWarpKernel *poWK)
6491
0
{
6492
0
    return GWKRun(poWK, "GWKNearestFloat", GWKNearestThread<float>);
6493
0
}
6494
6495
/************************************************************************/
6496
/*                           GWKAverageOrMode()                         */
6497
/*                                                                      */
6498
/************************************************************************/
6499
6500
static void GWKAverageOrModeThread(void *pData);
6501
6502
static CPLErr GWKAverageOrMode(GDALWarpKernel *poWK)
6503
0
{
6504
0
    return GWKRun(poWK, "GWKAverageOrMode", GWKAverageOrModeThread);
6505
0
}
6506
6507
// Overall logic based on GWKGeneralCaseThread().
6508
static void GWKAverageOrModeThread(void *pData)
6509
0
{
6510
0
    GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
6511
0
    GDALWarpKernel *poWK = psJob->poWK;
6512
0
    const int iYMin = psJob->iYMin;
6513
0
    const int iYMax = psJob->iYMax;
6514
0
    const double dfMultFactorVerticalShiftPipeline =
6515
0
        poWK->bApplyVerticalShift
6516
0
            ? CPLAtof(CSLFetchNameValueDef(
6517
0
                  poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
6518
0
                  "1.0"))
6519
0
            : 0.0;
6520
6521
0
    const int nDstXSize = poWK->nDstXSize;
6522
0
    const int nSrcXSize = poWK->nSrcXSize;
6523
0
    const int nSrcYSize = poWK->nSrcYSize;
6524
6525
    /* -------------------------------------------------------------------- */
6526
    /*      Find out which algorithm to use (small optim.)                  */
6527
    /* -------------------------------------------------------------------- */
6528
0
    int nAlgo = 0;
6529
6530
    // Only used for GRA_Mode
6531
0
    float *pafRealVals = nullptr;
6532
0
    float *pafCounts = nullptr;
6533
0
    int nBins = 0;
6534
0
    int nBinsOffset = 0;
6535
0
    const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
6536
6537
    // Only used with nAlgo = 6.
6538
0
    float quant = 0.5;
6539
6540
    // To control array allocation only when data type is complex
6541
0
    const bool bIsComplex = GDALDataTypeIsComplex(poWK->eWorkingDataType) != 0;
6542
6543
0
    if (poWK->eResample == GRA_Average)
6544
0
    {
6545
0
        nAlgo = GWKAOM_Average;
6546
0
    }
6547
0
    else if (poWK->eResample == GRA_RMS)
6548
0
    {
6549
0
        nAlgo = GWKAOM_RMS;
6550
0
    }
6551
0
    else if (poWK->eResample == GRA_Mode)
6552
0
    {
6553
        // TODO check color table count > 256.
6554
0
        if (poWK->eWorkingDataType == GDT_Byte ||
6555
0
            poWK->eWorkingDataType == GDT_UInt16 ||
6556
0
            poWK->eWorkingDataType == GDT_Int16)
6557
0
        {
6558
0
            nAlgo = GWKAOM_Imode;
6559
6560
            // In the case of a paletted or non-paletted byte band,
6561
            // Input values are between 0 and 255.
6562
0
            if (poWK->eWorkingDataType == GDT_Byte)
6563
0
            {
6564
0
                nBins = 256;
6565
0
            }
6566
            // In the case of Int8, input values are between -128 and 127.
6567
0
            else if (poWK->eWorkingDataType == GDT_Int8)
6568
0
            {
6569
0
                nBins = 256;
6570
0
                nBinsOffset = 128;
6571
0
            }
6572
            // In the case of Int16, input values are between -32768 and 32767.
6573
0
            else if (poWK->eWorkingDataType == GDT_Int16)
6574
0
            {
6575
0
                nBins = 65536;
6576
0
                nBinsOffset = 32768;
6577
0
            }
6578
            // In the case of UInt16, input values are between 0 and 65537.
6579
0
            else if (poWK->eWorkingDataType == GDT_UInt16)
6580
0
            {
6581
0
                nBins = 65536;
6582
0
            }
6583
0
            pafCounts =
6584
0
                static_cast<float *>(VSI_MALLOC_VERBOSE(nBins * sizeof(float)));
6585
0
            if (pafCounts == nullptr)
6586
0
                return;
6587
0
        }
6588
0
        else
6589
0
        {
6590
0
            nAlgo = GWKAOM_Fmode;
6591
6592
0
            if (nSrcXSize > 0 && nSrcYSize > 0)
6593
0
            {
6594
0
                pafRealVals = static_cast<float *>(
6595
0
                    VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
6596
0
                pafCounts = static_cast<float *>(
6597
0
                    VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
6598
0
                if (pafRealVals == nullptr || pafCounts == nullptr)
6599
0
                {
6600
0
                    VSIFree(pafRealVals);
6601
0
                    VSIFree(pafCounts);
6602
0
                    return;
6603
0
                }
6604
0
            }
6605
0
        }
6606
0
    }
6607
0
    else if (poWK->eResample == GRA_Max)
6608
0
    {
6609
0
        nAlgo = GWKAOM_Max;
6610
0
    }
6611
0
    else if (poWK->eResample == GRA_Min)
6612
0
    {
6613
0
        nAlgo = GWKAOM_Min;
6614
0
    }
6615
0
    else if (poWK->eResample == GRA_Med)
6616
0
    {
6617
0
        nAlgo = GWKAOM_Quant;
6618
0
        quant = 0.5;
6619
0
    }
6620
0
    else if (poWK->eResample == GRA_Q1)
6621
0
    {
6622
0
        nAlgo = GWKAOM_Quant;
6623
0
        quant = 0.25;
6624
0
    }
6625
0
    else if (poWK->eResample == GRA_Q3)
6626
0
    {
6627
0
        nAlgo = GWKAOM_Quant;
6628
0
        quant = 0.75;
6629
0
    }
6630
#ifdef disabled
6631
    else if (poWK->eResample == GRA_Sum)
6632
    {
6633
        nAlgo = GWKAOM_Sum;
6634
    }
6635
#endif
6636
0
    else
6637
0
    {
6638
        // Other resample algorithms not permitted here.
6639
0
        CPLDebug("GDAL", "GDALWarpKernel():GWKAverageOrModeThread() ERROR, "
6640
0
                         "illegal resample");
6641
0
        return;
6642
0
    }
6643
6644
0
    CPLDebug("GDAL", "GDALWarpKernel():GWKAverageOrModeThread() using algo %d",
6645
0
             nAlgo);
6646
6647
    /* -------------------------------------------------------------------- */
6648
    /*      Allocate x,y,z coordinate arrays for transformation ... two     */
6649
    /*      scanlines worth of positions.                                   */
6650
    /* -------------------------------------------------------------------- */
6651
6652
0
    double *padfX =
6653
0
        static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6654
0
    double *padfY =
6655
0
        static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6656
0
    double *padfZ =
6657
0
        static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6658
0
    double *padfX2 =
6659
0
        static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6660
0
    double *padfY2 =
6661
0
        static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6662
0
    double *padfZ2 =
6663
0
        static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6664
0
    int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
6665
0
    int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
6666
6667
0
    const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
6668
0
        poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
6669
0
    const double dfErrorThreshold = CPLAtof(
6670
0
        CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
6671
6672
0
    const double dfExcludedValuesThreshold =
6673
0
        CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
6674
0
                                     "EXCLUDED_VALUES_PCT_THRESHOLD", "50")) /
6675
0
        100.0;
6676
0
    const double dfNodataValuesThreshold =
6677
0
        CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
6678
0
                                     "NODATA_VALUES_PCT_THRESHOLD", "100")) /
6679
0
        100.0;
6680
6681
0
    const int nXMargin =
6682
0
        2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
6683
0
    const int nYMargin =
6684
0
        2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
6685
6686
    /* ==================================================================== */
6687
    /*      Loop over output lines.                                         */
6688
    /* ==================================================================== */
6689
0
    for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
6690
0
    {
6691
6692
        /* --------------------------------------------------------------------
6693
         */
6694
        /*      Setup points to transform to source image space. */
6695
        /* --------------------------------------------------------------------
6696
         */
6697
0
        for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6698
0
        {
6699
0
            padfX[iDstX] = iDstX + poWK->nDstXOff;
6700
0
            padfY[iDstX] = iDstY + poWK->nDstYOff;
6701
0
            padfZ[iDstX] = 0.0;
6702
0
            padfX2[iDstX] = iDstX + 1.0 + poWK->nDstXOff;
6703
0
            padfY2[iDstX] = iDstY + 1.0 + poWK->nDstYOff;
6704
0
            padfZ2[iDstX] = 0.0;
6705
0
        }
6706
6707
        /* --------------------------------------------------------------------
6708
         */
6709
        /*      Transform the points from destination pixel/line coordinates */
6710
        /*      to source pixel/line coordinates. */
6711
        /* --------------------------------------------------------------------
6712
         */
6713
0
        poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
6714
0
                             padfY, padfZ, pabSuccess);
6715
0
        poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX2,
6716
0
                             padfY2, padfZ2, pabSuccess2);
6717
6718
0
        if (dfSrcCoordPrecision > 0.0)
6719
0
        {
6720
0
            GWKRoundSourceCoordinates(
6721
0
                nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
6722
0
                dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
6723
0
                poWK->nDstXOff, iDstY + poWK->nDstYOff);
6724
0
            GWKRoundSourceCoordinates(
6725
0
                nDstXSize, padfX2, padfY2, padfZ2, pabSuccess2,
6726
0
                dfSrcCoordPrecision, dfErrorThreshold, poWK->pfnTransformer,
6727
0
                psJob->pTransformerArg, 1.0 + poWK->nDstXOff,
6728
0
                iDstY + 1.0 + poWK->nDstYOff);
6729
0
        }
6730
6731
        /* ====================================================================
6732
         */
6733
        /*      Loop over pixels in output scanline. */
6734
        /* ====================================================================
6735
         */
6736
0
        for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6737
0
        {
6738
0
            GPtrDiff_t iSrcOffset = 0;
6739
0
            double dfDensity = 1.0;
6740
0
            bool bHasFoundDensity = false;
6741
6742
0
            if (!pabSuccess[iDstX] || !pabSuccess2[iDstX])
6743
0
                continue;
6744
6745
            // Add some checks so that padfX[iDstX] - poWK->nSrcXOff is in
6746
            // reasonable range (https://github.com/OSGeo/gdal/issues/2365)
6747
0
            if (!(padfX[iDstX] - poWK->nSrcXOff >= -nXMargin &&
6748
0
                  padfX2[iDstX] - poWK->nSrcXOff >= -nXMargin &&
6749
0
                  padfY[iDstX] - poWK->nSrcYOff >= -nYMargin &&
6750
0
                  padfY2[iDstX] - poWK->nSrcYOff >= -nYMargin &&
6751
0
                  padfX[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
6752
0
                  padfX2[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
6753
0
                  padfY[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin &&
6754
0
                  padfY2[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin))
6755
0
            {
6756
0
                continue;
6757
0
            }
6758
6759
0
            const GPtrDiff_t iDstOffset =
6760
0
                iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
6761
6762
            // Compute corners in source crs.
6763
6764
            // The transformation might not have preserved ordering of
6765
            // coordinates so do the necessary swapping (#5433).
6766
            // NOTE: this is really an approximative fix. To do something
6767
            // more precise we would for example need to compute the
6768
            // transformation of coordinates in the
6769
            // [iDstX,iDstY]x[iDstX+1,iDstY+1] square back to source
6770
            // coordinates, and take the bounding box of the got source
6771
            // coordinates.
6772
6773
0
            if (padfX[iDstX] > padfX2[iDstX])
6774
0
                std::swap(padfX[iDstX], padfX2[iDstX]);
6775
6776
            // Detect situations where the target pixel is close to the
6777
            // antimeridian and when padfX[iDstX] and padfX2[iDstX] are very
6778
            // close to the left-most and right-most columns of the source
6779
            // raster. The 2 value below was experimentally determined to
6780
            // avoid false-positives and false-negatives.
6781
            // Addresses https://github.com/OSGeo/gdal/issues/6478
6782
0
            bool bWrapOverX = false;
6783
0
            const int nThresholdWrapOverX = std::min(2, nSrcXSize / 10);
6784
0
            if (poWK->nSrcXOff == 0 &&
6785
0
                padfX[iDstX] * poWK->dfXScale < nThresholdWrapOverX &&
6786
0
                (nSrcXSize - padfX2[iDstX]) * poWK->dfXScale <
6787
0
                    nThresholdWrapOverX)
6788
0
            {
6789
                // Check there is a discontinuity by checking at mid-pixel.
6790
                // NOTE: all this remains fragile. To confidently
6791
                // detect antimeridian warping we should probably try to access
6792
                // georeferenced coordinates, and not rely only on tests on
6793
                // image space coordinates. But accessing georeferenced
6794
                // coordinates from here is not trivial, and we would for example
6795
                // have to handle both geographic, Mercator, etc.
6796
                // Let's hope this heuristics is good enough for now.
6797
0
                double x = iDstX + 0.5 + poWK->nDstXOff;
6798
0
                double y = iDstY + poWK->nDstYOff;
6799
0
                double z = 0;
6800
0
                int bSuccess = FALSE;
6801
0
                poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1, &x, &y,
6802
0
                                     &z, &bSuccess);
6803
0
                if (bSuccess && x < padfX[iDstX])
6804
0
                {
6805
0
                    bWrapOverX = true;
6806
0
                    std::swap(padfX[iDstX], padfX2[iDstX]);
6807
0
                    padfX2[iDstX] += nSrcXSize;
6808
0
                }
6809
0
            }
6810
6811
0
            const double dfXMin = padfX[iDstX] - poWK->nSrcXOff;
6812
0
            const double dfXMax = padfX2[iDstX] - poWK->nSrcXOff;
6813
0
            constexpr double EPS = 1e-10;
6814
            // Check that [dfXMin, dfXMax] intersect with [0,nSrcXSize] with a tolerance
6815
0
            if (!(dfXMax > -EPS && dfXMin < nSrcXSize + EPS))
6816
0
                continue;
6817
0
            int iSrcXMin = static_cast<int>(std::max(floor(dfXMin + EPS), 0.0));
6818
0
            int iSrcXMax = static_cast<int>(
6819
0
                std::min(ceil(dfXMax - EPS), static_cast<double>(INT_MAX)));
6820
0
            if (!bWrapOverX)
6821
0
                iSrcXMax = std::min(iSrcXMax, nSrcXSize);
6822
0
            if (iSrcXMin == iSrcXMax && iSrcXMax < nSrcXSize)
6823
0
                iSrcXMax++;
6824
6825
0
            if (padfY[iDstX] > padfY2[iDstX])
6826
0
                std::swap(padfY[iDstX], padfY2[iDstX]);
6827
0
            const double dfYMin = padfY[iDstX] - poWK->nSrcYOff;
6828
0
            const double dfYMax = padfY2[iDstX] - poWK->nSrcYOff;
6829
            // Check that [dfYMin, dfYMax] intersect with [0,nSrcYSize] with a tolerance
6830
0
            if (!(dfYMax > -EPS && dfYMin < nSrcYSize + EPS))
6831
0
                continue;
6832
0
            int iSrcYMin = static_cast<int>(std::max(floor(dfYMin + EPS), 0.0));
6833
0
            int iSrcYMax =
6834
0
                std::min(static_cast<int>(ceil(dfYMax - EPS)), nSrcYSize);
6835
0
            if (iSrcYMin == iSrcYMax && iSrcYMax < nSrcYSize)
6836
0
                iSrcYMax++;
6837
6838
0
#define COMPUTE_WEIGHT_Y(iSrcY)                                                \
6839
0
    ((iSrcY == iSrcYMin)                                                       \
6840
0
         ? ((iSrcYMin + 1 == iSrcYMax) ? 1.0 : 1 - (dfYMin - iSrcYMin))        \
6841
0
     : (iSrcY + 1 == iSrcYMax) ? 1 - (iSrcYMax - dfYMax)                       \
6842
0
                               : 1.0)
6843
6844
0
#define COMPUTE_WEIGHT(iSrcX, dfWeightY)                                       \
6845
0
    ((iSrcX == iSrcXMin)       ? ((iSrcXMin + 1 == iSrcXMax)                   \
6846
0
                                      ? dfWeightY                              \
6847
0
                                      : dfWeightY * (1 - (dfXMin - iSrcXMin))) \
6848
0
     : (iSrcX + 1 == iSrcXMax) ? dfWeightY * (1 - (iSrcXMax - dfXMax))         \
6849
0
                               : dfWeightY)
6850
6851
0
            bool bDone = false;
6852
6853
            // Special Average mode where we process all bands together,
6854
            // to avoid averaging tuples that match an entry of m_aadfExcludedValues
6855
0
            if (nAlgo == GWKAOM_Average &&
6856
0
                (!poWK->m_aadfExcludedValues.empty() ||
6857
0
                 dfNodataValuesThreshold < 1 - EPS) &&
6858
0
                !poWK->bApplyVerticalShift && !bIsComplex)
6859
0
            {
6860
0
                double dfTotalWeightInvalid = 0.0;
6861
0
                double dfTotalWeightExcluded = 0.0;
6862
0
                double dfTotalWeightRegular = 0.0;
6863
0
                std::vector<double> adfValueReal(poWK->nBands, 0);
6864
0
                std::vector<double> adfValueAveraged(poWK->nBands, 0);
6865
0
                std::vector<int> anCountExcludedValues(
6866
0
                    poWK->m_aadfExcludedValues.size(), 0);
6867
6868
0
                for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
6869
0
                {
6870
0
                    const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
6871
0
                    iSrcOffset =
6872
0
                        iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
6873
0
                    for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
6874
0
                         iSrcX++, iSrcOffset++)
6875
0
                    {
6876
0
                        if (bWrapOverX)
6877
0
                            iSrcOffset =
6878
0
                                (iSrcX % nSrcXSize) +
6879
0
                                static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
6880
6881
0
                        const double dfWeight =
6882
0
                            COMPUTE_WEIGHT(iSrcX, dfWeightY);
6883
0
                        if (dfWeight <= 0)
6884
0
                            continue;
6885
6886
0
                        if (poWK->panUnifiedSrcValid != nullptr &&
6887
0
                            !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
6888
0
                        {
6889
0
                            dfTotalWeightInvalid += dfWeight;
6890
0
                            continue;
6891
0
                        }
6892
6893
0
                        bool bAllValid = true;
6894
0
                        for (int iBand = 0; iBand < poWK->nBands; iBand++)
6895
0
                        {
6896
0
                            double dfBandDensity = 0;
6897
0
                            double dfValueImagTmp = 0;
6898
0
                            if (!(GWKGetPixelValue(
6899
0
                                      poWK, iBand, iSrcOffset, &dfBandDensity,
6900
0
                                      &adfValueReal[iBand], &dfValueImagTmp) &&
6901
0
                                  dfBandDensity > BAND_DENSITY_THRESHOLD))
6902
0
                            {
6903
0
                                bAllValid = false;
6904
0
                                break;
6905
0
                            }
6906
0
                        }
6907
6908
0
                        if (!bAllValid)
6909
0
                        {
6910
0
                            dfTotalWeightInvalid += dfWeight;
6911
0
                            continue;
6912
0
                        }
6913
6914
0
                        bool bExcludedValueFound = false;
6915
0
                        for (size_t i = 0;
6916
0
                             i < poWK->m_aadfExcludedValues.size(); ++i)
6917
0
                        {
6918
0
                            if (poWK->m_aadfExcludedValues[i] == adfValueReal)
6919
0
                            {
6920
0
                                bExcludedValueFound = true;
6921
0
                                ++anCountExcludedValues[i];
6922
0
                                dfTotalWeightExcluded += dfWeight;
6923
0
                                break;
6924
0
                            }
6925
0
                        }
6926
0
                        if (!bExcludedValueFound)
6927
0
                        {
6928
                            // Weighted incremental algorithm mean
6929
                            // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
6930
0
                            dfTotalWeightRegular += dfWeight;
6931
0
                            for (int iBand = 0; iBand < poWK->nBands; iBand++)
6932
0
                            {
6933
0
                                adfValueAveraged[iBand] +=
6934
0
                                    (dfWeight / dfTotalWeightRegular) *
6935
0
                                    (adfValueReal[iBand] -
6936
0
                                     adfValueAveraged[iBand]);
6937
0
                            }
6938
0
                        }
6939
0
                    }
6940
0
                }
6941
6942
0
                const double dfTotalWeight = dfTotalWeightInvalid +
6943
0
                                             dfTotalWeightExcluded +
6944
0
                                             dfTotalWeightRegular;
6945
0
                if (dfTotalWeightInvalid > 0 &&
6946
0
                    dfTotalWeightInvalid >=
6947
0
                        dfNodataValuesThreshold * dfTotalWeight)
6948
0
                {
6949
                    // Do nothing. Let bHasFoundDensity to false.
6950
0
                }
6951
0
                else if (dfTotalWeightExcluded > 0 &&
6952
0
                         dfTotalWeightExcluded >=
6953
0
                             dfExcludedValuesThreshold * dfTotalWeight)
6954
0
                {
6955
                    // Find the most represented excluded value tuple
6956
0
                    size_t iExcludedValue = 0;
6957
0
                    int nExcludedValueCount = 0;
6958
0
                    for (size_t i = 0; i < poWK->m_aadfExcludedValues.size();
6959
0
                         ++i)
6960
0
                    {
6961
0
                        if (anCountExcludedValues[i] > nExcludedValueCount)
6962
0
                        {
6963
0
                            iExcludedValue = i;
6964
0
                            nExcludedValueCount = anCountExcludedValues[i];
6965
0
                        }
6966
0
                    }
6967
6968
0
                    bHasFoundDensity = true;
6969
6970
0
                    for (int iBand = 0; iBand < poWK->nBands; iBand++)
6971
0
                    {
6972
0
                        GWKSetPixelValue(
6973
0
                            poWK, iBand, iDstOffset, /* dfBandDensity = */ 1.0,
6974
0
                            poWK->m_aadfExcludedValues[iExcludedValue][iBand],
6975
0
                            0);
6976
0
                    }
6977
0
                }
6978
0
                else if (dfTotalWeightRegular > 0)
6979
0
                {
6980
0
                    bHasFoundDensity = true;
6981
6982
0
                    for (int iBand = 0; iBand < poWK->nBands; iBand++)
6983
0
                    {
6984
0
                        GWKSetPixelValue(poWK, iBand, iDstOffset,
6985
0
                                         /* dfBandDensity = */ 1.0,
6986
0
                                         adfValueAveraged[iBand], 0);
6987
0
                    }
6988
0
                }
6989
6990
                // Skip below loop on bands
6991
0
                bDone = true;
6992
0
            }
6993
6994
            /* ====================================================================
6995
             */
6996
            /*      Loop processing each band. */
6997
            /* ====================================================================
6998
             */
6999
7000
0
            for (int iBand = 0; !bDone && iBand < poWK->nBands; iBand++)
7001
0
            {
7002
0
                double dfBandDensity = 0.0;
7003
0
                double dfValueReal = 0.0;
7004
0
                double dfValueImag = 0.0;
7005
0
                double dfValueRealTmp = 0.0;
7006
0
                double dfValueImagTmp = 0.0;
7007
7008
                /* --------------------------------------------------------------------
7009
                 */
7010
                /*      Collect the source value. */
7011
                /* --------------------------------------------------------------------
7012
                 */
7013
7014
                // Loop over source lines and pixels - 3 possible algorithms.
7015
7016
                // poWK->eResample == GRA_Average.
7017
0
                if (nAlgo == GWKAOM_Average)
7018
0
                {
7019
0
                    double dfTotalWeight = 0.0;
7020
7021
                    // This code adapted from GDALDownsampleChunk32R_AverageT()
7022
                    // in gcore/overview.cpp.
7023
0
                    for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7024
0
                    {
7025
0
                        const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7026
0
                        iSrcOffset = iSrcXMin +
7027
0
                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7028
0
                        for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7029
0
                             iSrcX++, iSrcOffset++)
7030
0
                        {
7031
0
                            if (bWrapOverX)
7032
0
                                iSrcOffset =
7033
0
                                    (iSrcX % nSrcXSize) +
7034
0
                                    static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7035
7036
0
                            if (poWK->panUnifiedSrcValid != nullptr &&
7037
0
                                !CPLMaskGet(poWK->panUnifiedSrcValid,
7038
0
                                            iSrcOffset))
7039
0
                            {
7040
0
                                continue;
7041
0
                            }
7042
7043
0
                            if (GWKGetPixelValue(
7044
0
                                    poWK, iBand, iSrcOffset, &dfBandDensity,
7045
0
                                    &dfValueRealTmp, &dfValueImagTmp) &&
7046
0
                                dfBandDensity > BAND_DENSITY_THRESHOLD)
7047
0
                            {
7048
0
                                const double dfWeight =
7049
0
                                    COMPUTE_WEIGHT(iSrcX, dfWeightY);
7050
0
                                if (dfWeight > 0)
7051
0
                                {
7052
                                    // Weighted incremental algorithm mean
7053
                                    // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
7054
0
                                    dfTotalWeight += dfWeight;
7055
0
                                    dfValueReal +=
7056
0
                                        (dfWeight / dfTotalWeight) *
7057
0
                                        (dfValueRealTmp - dfValueReal);
7058
0
                                    if (bIsComplex)
7059
0
                                    {
7060
0
                                        dfValueImag +=
7061
0
                                            (dfWeight / dfTotalWeight) *
7062
0
                                            (dfValueImagTmp - dfValueImag);
7063
0
                                    }
7064
0
                                }
7065
0
                            }
7066
0
                        }
7067
0
                    }
7068
7069
0
                    if (dfTotalWeight > 0)
7070
0
                    {
7071
0
                        if (poWK->bApplyVerticalShift)
7072
0
                        {
7073
0
                            if (!std::isfinite(padfZ[iDstX]))
7074
0
                                continue;
7075
                            // Subtract padfZ[] since the coordinate
7076
                            // transformation is from target to source
7077
0
                            dfValueReal =
7078
0
                                dfValueReal * poWK->dfMultFactorVerticalShift -
7079
0
                                padfZ[iDstX] *
7080
0
                                    dfMultFactorVerticalShiftPipeline;
7081
0
                        }
7082
7083
0
                        dfBandDensity = 1;
7084
0
                        bHasFoundDensity = true;
7085
0
                    }
7086
0
                }  // GRA_Average.
7087
                // poWK->eResample == GRA_RMS.
7088
0
                if (nAlgo == GWKAOM_RMS)
7089
0
                {
7090
0
                    double dfTotalReal = 0.0;
7091
0
                    double dfTotalImag = 0.0;
7092
0
                    double dfTotalWeight = 0.0;
7093
                    // This code adapted from GDALDownsampleChunk32R_AverageT()
7094
                    // in gcore/overview.cpp.
7095
0
                    for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7096
0
                    {
7097
0
                        const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7098
0
                        iSrcOffset = iSrcXMin +
7099
0
                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7100
0
                        for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7101
0
                             iSrcX++, iSrcOffset++)
7102
0
                        {
7103
0
                            if (bWrapOverX)
7104
0
                                iSrcOffset =
7105
0
                                    (iSrcX % nSrcXSize) +
7106
0
                                    static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7107
7108
0
                            if (poWK->panUnifiedSrcValid != nullptr &&
7109
0
                                !CPLMaskGet(poWK->panUnifiedSrcValid,
7110
0
                                            iSrcOffset))
7111
0
                            {
7112
0
                                continue;
7113
0
                            }
7114
7115
0
                            if (GWKGetPixelValue(
7116
0
                                    poWK, iBand, iSrcOffset, &dfBandDensity,
7117
0
                                    &dfValueRealTmp, &dfValueImagTmp) &&
7118
0
                                dfBandDensity > BAND_DENSITY_THRESHOLD)
7119
0
                            {
7120
0
                                const double dfWeight =
7121
0
                                    COMPUTE_WEIGHT(iSrcX, dfWeightY);
7122
0
                                dfTotalWeight += dfWeight;
7123
0
                                dfTotalReal +=
7124
0
                                    dfValueRealTmp * dfValueRealTmp * dfWeight;
7125
0
                                if (bIsComplex)
7126
0
                                    dfTotalImag += dfValueImagTmp *
7127
0
                                                   dfValueImagTmp * dfWeight;
7128
0
                            }
7129
0
                        }
7130
0
                    }
7131
7132
0
                    if (dfTotalWeight > 0)
7133
0
                    {
7134
0
                        dfValueReal = sqrt(dfTotalReal / dfTotalWeight);
7135
7136
0
                        if (poWK->bApplyVerticalShift)
7137
0
                        {
7138
0
                            if (!std::isfinite(padfZ[iDstX]))
7139
0
                                continue;
7140
                            // Subtract padfZ[] since the coordinate
7141
                            // transformation is from target to source
7142
0
                            dfValueReal =
7143
0
                                dfValueReal * poWK->dfMultFactorVerticalShift -
7144
0
                                padfZ[iDstX] *
7145
0
                                    dfMultFactorVerticalShiftPipeline;
7146
0
                        }
7147
7148
0
                        if (bIsComplex)
7149
0
                            dfValueImag = sqrt(dfTotalImag / dfTotalWeight);
7150
7151
0
                        dfBandDensity = 1;
7152
0
                        bHasFoundDensity = true;
7153
0
                    }
7154
0
                }  // GRA_RMS.
7155
#ifdef disabled
7156
                else if (nAlgo == GWKAOM_Sum)
7157
                // poWK->eResample == GRA_Sum
7158
                {
7159
                    double dfTotalReal = 0.0;
7160
                    double dfTotalImag = 0.0;
7161
                    bool bFoundValid = false;
7162
7163
                    for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7164
                    {
7165
                        const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7166
                        iSrcOffset = iSrcXMin +
7167
                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7168
                        for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7169
                             iSrcX++, iSrcOffset++)
7170
                        {
7171
                            if (bWrapOverX)
7172
                                iSrcOffset =
7173
                                    (iSrcX % nSrcXSize) +
7174
                                    static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7175
7176
                            if (poWK->panUnifiedSrcValid != nullptr &&
7177
                                !CPLMaskGet(poWK->panUnifiedSrcValid,
7178
                                            iSrcOffset))
7179
                            {
7180
                                continue;
7181
                            }
7182
7183
                            if (GWKGetPixelValue(
7184
                                    poWK, iBand, iSrcOffset, &dfBandDensity,
7185
                                    &dfValueRealTmp, &dfValueImagTmp) &&
7186
                                dfBandDensity > BAND_DENSITY_THRESHOLD)
7187
                            {
7188
                                const double dfWeight =
7189
                                    COMPUTE_WEIGHT(iSrcX, dfWeightY);
7190
                                bFoundValid = true;
7191
                                dfTotalReal += dfValueRealTmp * dfWeight;
7192
                                if (bIsComplex)
7193
                                {
7194
                                    dfTotalImag += dfValueImagTmp * dfWeight;
7195
                                }
7196
                            }
7197
                        }
7198
                    }
7199
7200
                    if (bFoundValid)
7201
                    {
7202
                        dfValueReal = dfTotalReal;
7203
7204
                        if (poWK->bApplyVerticalShift)
7205
                        {
7206
                            if (!std::isfinite(padfZ[iDstX]))
7207
                                continue;
7208
                            // Subtract padfZ[] since the coordinate
7209
                            // transformation is from target to source
7210
                            dfValueReal =
7211
                                dfValueReal * poWK->dfMultFactorVerticalShift -
7212
                                padfZ[iDstX] *
7213
                                    dfMultFactorVerticalShiftPipeline;
7214
                        }
7215
7216
                        if (bIsComplex)
7217
                        {
7218
                            dfValueImag = dfTotalImag;
7219
                        }
7220
                        dfBandDensity = 1;
7221
                        bHasFoundDensity = true;
7222
                    }
7223
                }  // GRA_Sum.
7224
#endif
7225
0
                else if (nAlgo == GWKAOM_Imode || nAlgo == GWKAOM_Fmode)
7226
                // poWK->eResample == GRA_Mode
7227
0
                {
7228
                    // This code adapted from GDALDownsampleChunk32R_Mode() in
7229
                    // gcore/overview.cpp.
7230
0
                    if (nAlgo == GWKAOM_Fmode)  // int32 or float.
7231
0
                    {
7232
                        // Does it make sense it makes to run a
7233
                        // majority filter on floating point data? But, here it
7234
                        // is for the sake of compatibility. It won't look
7235
                        // right on RGB images by the nature of the filter.
7236
0
                        nBins = 0;
7237
0
                        int iModeIndex = -1;
7238
7239
0
                        for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7240
0
                        {
7241
0
                            const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7242
0
                            iSrcOffset =
7243
0
                                iSrcXMin +
7244
0
                                static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7245
0
                            for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7246
0
                                 iSrcX++, iSrcOffset++)
7247
0
                            {
7248
0
                                if (bWrapOverX)
7249
0
                                    iSrcOffset =
7250
0
                                        (iSrcX % nSrcXSize) +
7251
0
                                        static_cast<GPtrDiff_t>(iSrcY) *
7252
0
                                            nSrcXSize;
7253
7254
0
                                if (poWK->panUnifiedSrcValid != nullptr &&
7255
0
                                    !CPLMaskGet(poWK->panUnifiedSrcValid,
7256
0
                                                iSrcOffset))
7257
0
                                    continue;
7258
7259
0
                                if (GWKGetPixelValue(
7260
0
                                        poWK, iBand, iSrcOffset, &dfBandDensity,
7261
0
                                        &dfValueRealTmp, &dfValueImagTmp) &&
7262
0
                                    dfBandDensity > BAND_DENSITY_THRESHOLD)
7263
0
                                {
7264
0
                                    const float fVal =
7265
0
                                        static_cast<float>(dfValueRealTmp);
7266
0
                                    const double dfWeight =
7267
0
                                        COMPUTE_WEIGHT(iSrcX, dfWeightY);
7268
7269
                                    // Check array for existing entry.
7270
0
                                    int i = 0;
7271
0
                                    for (i = 0; i < nBins; ++i)
7272
0
                                    {
7273
0
                                        if (pafRealVals[i] == fVal)
7274
0
                                        {
7275
7276
0
                                            pafCounts[i] +=
7277
0
                                                static_cast<float>(dfWeight);
7278
0
                                            bool bValIsMaxCount =
7279
0
                                                (pafCounts[i] >
7280
0
                                                 pafCounts[iModeIndex]);
7281
7282
0
                                            if (!bValIsMaxCount &&
7283
0
                                                pafCounts[i] ==
7284
0
                                                    pafCounts[iModeIndex])
7285
0
                                            {
7286
0
                                                switch (eTieStrategy)
7287
0
                                                {
7288
0
                                                    case GWKTS_First:
7289
0
                                                        break;
7290
0
                                                    case GWKTS_Min:
7291
0
                                                        bValIsMaxCount =
7292
0
                                                            fVal <
7293
0
                                                            pafRealVals
7294
0
                                                                [iModeIndex];
7295
0
                                                        break;
7296
0
                                                    case GWKTS_Max:
7297
0
                                                        bValIsMaxCount =
7298
0
                                                            fVal >
7299
0
                                                            pafRealVals
7300
0
                                                                [iModeIndex];
7301
0
                                                        break;
7302
0
                                                }
7303
0
                                            }
7304
7305
0
                                            if (bValIsMaxCount)
7306
0
                                            {
7307
0
                                                iModeIndex = i;
7308
0
                                            }
7309
7310
0
                                            break;
7311
0
                                        }
7312
0
                                    }
7313
7314
                                    // Add to arr if entry not already there.
7315
0
                                    if (i == nBins)
7316
0
                                    {
7317
0
                                        pafRealVals[i] = fVal;
7318
0
                                        pafCounts[i] =
7319
0
                                            static_cast<float>(dfWeight);
7320
7321
0
                                        if (iModeIndex < 0)
7322
0
                                            iModeIndex = i;
7323
7324
0
                                        ++nBins;
7325
0
                                    }
7326
0
                                }
7327
0
                            }
7328
0
                        }
7329
7330
0
                        if (iModeIndex != -1)
7331
0
                        {
7332
0
                            dfValueReal = pafRealVals[iModeIndex];
7333
7334
0
                            if (poWK->bApplyVerticalShift)
7335
0
                            {
7336
0
                                if (!std::isfinite(padfZ[iDstX]))
7337
0
                                    continue;
7338
                                // Subtract padfZ[] since the coordinate
7339
                                // transformation is from target to source
7340
0
                                dfValueReal =
7341
0
                                    dfValueReal *
7342
0
                                        poWK->dfMultFactorVerticalShift -
7343
0
                                    padfZ[iDstX] *
7344
0
                                        dfMultFactorVerticalShiftPipeline;
7345
0
                            }
7346
7347
0
                            dfBandDensity = 1;
7348
0
                            bHasFoundDensity = true;
7349
0
                        }
7350
0
                    }
7351
0
                    else  // byte or int16.
7352
0
                    {
7353
0
                        float fMaxCount = 0.0f;
7354
0
                        int nMode = -1;
7355
0
                        bool bHasSourceValues = false;
7356
7357
0
                        memset(pafCounts, 0, nBins * sizeof(float));
7358
7359
0
                        for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7360
0
                        {
7361
0
                            const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7362
0
                            iSrcOffset =
7363
0
                                iSrcXMin +
7364
0
                                static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7365
0
                            for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7366
0
                                 iSrcX++, iSrcOffset++)
7367
0
                            {
7368
0
                                if (bWrapOverX)
7369
0
                                    iSrcOffset =
7370
0
                                        (iSrcX % nSrcXSize) +
7371
0
                                        static_cast<GPtrDiff_t>(iSrcY) *
7372
0
                                            nSrcXSize;
7373
7374
0
                                if (poWK->panUnifiedSrcValid != nullptr &&
7375
0
                                    !CPLMaskGet(poWK->panUnifiedSrcValid,
7376
0
                                                iSrcOffset))
7377
0
                                    continue;
7378
7379
0
                                if (GWKGetPixelValue(
7380
0
                                        poWK, iBand, iSrcOffset, &dfBandDensity,
7381
0
                                        &dfValueRealTmp, &dfValueImagTmp) &&
7382
0
                                    dfBandDensity > BAND_DENSITY_THRESHOLD)
7383
0
                                {
7384
0
                                    bHasSourceValues = true;
7385
0
                                    const int nVal =
7386
0
                                        static_cast<int>(dfValueRealTmp);
7387
0
                                    const int iBin = nVal + nBinsOffset;
7388
0
                                    const double dfWeight =
7389
0
                                        COMPUTE_WEIGHT(iSrcX, dfWeightY);
7390
7391
                                    // Sum the density.
7392
0
                                    pafCounts[iBin] +=
7393
0
                                        static_cast<float>(dfWeight);
7394
                                    // Is it the most common value so far?
7395
0
                                    bool bUpdateMode =
7396
0
                                        pafCounts[iBin] > fMaxCount;
7397
0
                                    if (!bUpdateMode &&
7398
0
                                        pafCounts[iBin] == fMaxCount)
7399
0
                                    {
7400
0
                                        switch (eTieStrategy)
7401
0
                                        {
7402
0
                                            case GWKTS_First:
7403
0
                                                break;
7404
0
                                            case GWKTS_Min:
7405
0
                                                bUpdateMode = nVal < nMode;
7406
0
                                                break;
7407
0
                                            case GWKTS_Max:
7408
0
                                                bUpdateMode = nVal > nMode;
7409
0
                                                break;
7410
0
                                        }
7411
0
                                    }
7412
0
                                    if (bUpdateMode)
7413
0
                                    {
7414
0
                                        nMode = nVal;
7415
0
                                        fMaxCount = pafCounts[iBin];
7416
0
                                    }
7417
0
                                }
7418
0
                            }
7419
0
                        }
7420
7421
0
                        if (bHasSourceValues)
7422
0
                        {
7423
0
                            dfValueReal = nMode;
7424
7425
0
                            if (poWK->bApplyVerticalShift)
7426
0
                            {
7427
0
                                if (!std::isfinite(padfZ[iDstX]))
7428
0
                                    continue;
7429
                                // Subtract padfZ[] since the coordinate
7430
                                // transformation is from target to source
7431
0
                                dfValueReal =
7432
0
                                    dfValueReal *
7433
0
                                        poWK->dfMultFactorVerticalShift -
7434
0
                                    padfZ[iDstX] *
7435
0
                                        dfMultFactorVerticalShiftPipeline;
7436
0
                            }
7437
7438
0
                            dfBandDensity = 1;
7439
0
                            bHasFoundDensity = true;
7440
0
                        }
7441
0
                    }
7442
0
                }  // GRA_Mode.
7443
0
                else if (nAlgo == GWKAOM_Max)
7444
                // poWK->eResample == GRA_Max.
7445
0
                {
7446
0
                    bool bFoundValid = false;
7447
0
                    double dfTotalReal = cpl::NumericLimits<double>::lowest();
7448
                    // This code adapted from nAlgo 1 method, GRA_Average.
7449
0
                    for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7450
0
                    {
7451
0
                        iSrcOffset = iSrcXMin +
7452
0
                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7453
0
                        for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7454
0
                             iSrcX++, iSrcOffset++)
7455
0
                        {
7456
0
                            if (bWrapOverX)
7457
0
                                iSrcOffset =
7458
0
                                    (iSrcX % nSrcXSize) +
7459
0
                                    static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7460
7461
0
                            if (poWK->panUnifiedSrcValid != nullptr &&
7462
0
                                !CPLMaskGet(poWK->panUnifiedSrcValid,
7463
0
                                            iSrcOffset))
7464
0
                            {
7465
0
                                continue;
7466
0
                            }
7467
7468
                            // Returns pixel value if it is not no data.
7469
0
                            if (GWKGetPixelValue(
7470
0
                                    poWK, iBand, iSrcOffset, &dfBandDensity,
7471
0
                                    &dfValueRealTmp, &dfValueImagTmp) &&
7472
0
                                dfBandDensity > BAND_DENSITY_THRESHOLD)
7473
0
                            {
7474
0
                                bFoundValid = true;
7475
0
                                if (dfTotalReal < dfValueRealTmp)
7476
0
                                {
7477
0
                                    dfTotalReal = dfValueRealTmp;
7478
0
                                }
7479
0
                            }
7480
0
                        }
7481
0
                    }
7482
7483
0
                    if (bFoundValid)
7484
0
                    {
7485
0
                        dfValueReal = dfTotalReal;
7486
7487
0
                        if (poWK->bApplyVerticalShift)
7488
0
                        {
7489
0
                            if (!std::isfinite(padfZ[iDstX]))
7490
0
                                continue;
7491
                            // Subtract padfZ[] since the coordinate
7492
                            // transformation is from target to source
7493
0
                            dfValueReal =
7494
0
                                dfValueReal * poWK->dfMultFactorVerticalShift -
7495
0
                                padfZ[iDstX] *
7496
0
                                    dfMultFactorVerticalShiftPipeline;
7497
0
                        }
7498
7499
0
                        dfBandDensity = 1;
7500
0
                        bHasFoundDensity = true;
7501
0
                    }
7502
0
                }  // GRA_Max.
7503
0
                else if (nAlgo == GWKAOM_Min)
7504
                // poWK->eResample == GRA_Min.
7505
0
                {
7506
0
                    bool bFoundValid = false;
7507
0
                    double dfTotalReal = cpl::NumericLimits<double>::max();
7508
                    // This code adapted from nAlgo 1 method, GRA_Average.
7509
0
                    for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7510
0
                    {
7511
0
                        iSrcOffset = iSrcXMin +
7512
0
                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7513
0
                        for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7514
0
                             iSrcX++, iSrcOffset++)
7515
0
                        {
7516
0
                            if (bWrapOverX)
7517
0
                                iSrcOffset =
7518
0
                                    (iSrcX % nSrcXSize) +
7519
0
                                    static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7520
7521
0
                            if (poWK->panUnifiedSrcValid != nullptr &&
7522
0
                                !CPLMaskGet(poWK->panUnifiedSrcValid,
7523
0
                                            iSrcOffset))
7524
0
                            {
7525
0
                                continue;
7526
0
                            }
7527
7528
                            // Returns pixel value if it is not no data.
7529
0
                            if (GWKGetPixelValue(
7530
0
                                    poWK, iBand, iSrcOffset, &dfBandDensity,
7531
0
                                    &dfValueRealTmp, &dfValueImagTmp) &&
7532
0
                                dfBandDensity > BAND_DENSITY_THRESHOLD)
7533
0
                            {
7534
0
                                bFoundValid = true;
7535
0
                                if (dfTotalReal > dfValueRealTmp)
7536
0
                                {
7537
0
                                    dfTotalReal = dfValueRealTmp;
7538
0
                                }
7539
0
                            }
7540
0
                        }
7541
0
                    }
7542
7543
0
                    if (bFoundValid)
7544
0
                    {
7545
0
                        dfValueReal = dfTotalReal;
7546
7547
0
                        if (poWK->bApplyVerticalShift)
7548
0
                        {
7549
0
                            if (!std::isfinite(padfZ[iDstX]))
7550
0
                                continue;
7551
                            // Subtract padfZ[] since the coordinate
7552
                            // transformation is from target to source
7553
0
                            dfValueReal =
7554
0
                                dfValueReal * poWK->dfMultFactorVerticalShift -
7555
0
                                padfZ[iDstX] *
7556
0
                                    dfMultFactorVerticalShiftPipeline;
7557
0
                        }
7558
7559
0
                        dfBandDensity = 1;
7560
0
                        bHasFoundDensity = true;
7561
0
                    }
7562
0
                }  // GRA_Min.
7563
0
                else if (nAlgo == GWKAOM_Quant)
7564
                // poWK->eResample == GRA_Med | GRA_Q1 | GRA_Q3.
7565
0
                {
7566
0
                    bool bFoundValid = false;
7567
0
                    std::vector<double> dfRealValuesTmp;
7568
7569
                    // This code adapted from nAlgo 1 method, GRA_Average.
7570
0
                    for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7571
0
                    {
7572
0
                        iSrcOffset = iSrcXMin +
7573
0
                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7574
0
                        for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7575
0
                             iSrcX++, iSrcOffset++)
7576
0
                        {
7577
0
                            if (bWrapOverX)
7578
0
                                iSrcOffset =
7579
0
                                    (iSrcX % nSrcXSize) +
7580
0
                                    static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7581
7582
0
                            if (poWK->panUnifiedSrcValid != nullptr &&
7583
0
                                !CPLMaskGet(poWK->panUnifiedSrcValid,
7584
0
                                            iSrcOffset))
7585
0
                            {
7586
0
                                continue;
7587
0
                            }
7588
7589
                            // Returns pixel value if it is not no data.
7590
0
                            if (GWKGetPixelValue(
7591
0
                                    poWK, iBand, iSrcOffset, &dfBandDensity,
7592
0
                                    &dfValueRealTmp, &dfValueImagTmp) &&
7593
0
                                dfBandDensity > BAND_DENSITY_THRESHOLD)
7594
0
                            {
7595
0
                                bFoundValid = true;
7596
0
                                dfRealValuesTmp.push_back(dfValueRealTmp);
7597
0
                            }
7598
0
                        }
7599
0
                    }
7600
7601
0
                    if (bFoundValid)
7602
0
                    {
7603
0
                        std::sort(dfRealValuesTmp.begin(),
7604
0
                                  dfRealValuesTmp.end());
7605
0
                        int quantIdx = static_cast<int>(
7606
0
                            std::ceil(quant * dfRealValuesTmp.size() - 1));
7607
0
                        dfValueReal = dfRealValuesTmp[quantIdx];
7608
7609
0
                        if (poWK->bApplyVerticalShift)
7610
0
                        {
7611
0
                            if (!std::isfinite(padfZ[iDstX]))
7612
0
                                continue;
7613
                            // Subtract padfZ[] since the coordinate
7614
                            // transformation is from target to source
7615
0
                            dfValueReal =
7616
0
                                dfValueReal * poWK->dfMultFactorVerticalShift -
7617
0
                                padfZ[iDstX] *
7618
0
                                    dfMultFactorVerticalShiftPipeline;
7619
0
                        }
7620
7621
0
                        dfBandDensity = 1;
7622
0
                        bHasFoundDensity = true;
7623
0
                        dfRealValuesTmp.clear();
7624
0
                    }
7625
0
                }  // Quantile.
7626
7627
                /* --------------------------------------------------------------------
7628
                 */
7629
                /*      We have a computed value from the source.  Now apply it
7630
                 * to      */
7631
                /*      the destination pixel. */
7632
                /* --------------------------------------------------------------------
7633
                 */
7634
0
                if (bHasFoundDensity)
7635
0
                {
7636
                    // TODO: Should we compute dfBandDensity in fct of
7637
                    // nCount/nCount2, or use as a threshold to set the dest
7638
                    // value?
7639
                    // dfBandDensity = (float) nCount / nCount2;
7640
                    // if( (float) nCount / nCount2 > 0.1 )
7641
                    // or fix gdalwarp crop_to_cutline to crop partially
7642
                    // overlapping pixels.
7643
0
                    GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
7644
0
                                     dfValueReal, dfValueImag);
7645
0
                }
7646
0
            }
7647
7648
0
            if (!bHasFoundDensity)
7649
0
                continue;
7650
7651
            /* --------------------------------------------------------------------
7652
             */
7653
            /*      Update destination density/validity masks. */
7654
            /* --------------------------------------------------------------------
7655
             */
7656
0
            GWKOverlayDensity(poWK, iDstOffset, dfDensity);
7657
7658
0
            if (poWK->panDstValid != nullptr)
7659
0
            {
7660
0
                CPLMaskSet(poWK->panDstValid, iDstOffset);
7661
0
            }
7662
0
        } /* Next iDstX */
7663
7664
        /* --------------------------------------------------------------------
7665
         */
7666
        /*      Report progress to the user, and optionally cancel out. */
7667
        /* --------------------------------------------------------------------
7668
         */
7669
0
        if (psJob->pfnProgress && psJob->pfnProgress(psJob))
7670
0
            break;
7671
0
    }
7672
7673
    /* -------------------------------------------------------------------- */
7674
    /*      Cleanup and return.                                             */
7675
    /* -------------------------------------------------------------------- */
7676
0
    CPLFree(padfX);
7677
0
    CPLFree(padfY);
7678
0
    CPLFree(padfZ);
7679
0
    CPLFree(padfX2);
7680
0
    CPLFree(padfY2);
7681
0
    CPLFree(padfZ2);
7682
0
    CPLFree(pabSuccess);
7683
0
    CPLFree(pabSuccess2);
7684
0
    VSIFree(pafCounts);
7685
0
    VSIFree(pafRealVals);
7686
0
}
7687
7688
/************************************************************************/
7689
/*                         getOrientation()                             */
7690
/************************************************************************/
7691
7692
typedef std::pair<double, double> XYPair;
7693
7694
// Returns 1 whether (p1,p2,p3) is clockwise oriented,
7695
// -1 if it is counter-clockwise oriented,
7696
// or 0 if it is colinear.
7697
static int getOrientation(const XYPair &p1, const XYPair &p2, const XYPair &p3)
7698
0
{
7699
0
    const double p1x = p1.first;
7700
0
    const double p1y = p1.second;
7701
0
    const double p2x = p2.first;
7702
0
    const double p2y = p2.second;
7703
0
    const double p3x = p3.first;
7704
0
    const double p3y = p3.second;
7705
0
    const double val = (p2y - p1y) * (p3x - p2x) - (p2x - p1x) * (p3y - p2y);
7706
0
    if (std::abs(val) < 1e-20)
7707
0
        return 0;
7708
0
    else if (val > 0)
7709
0
        return 1;
7710
0
    else
7711
0
        return -1;
7712
0
}
7713
7714
/************************************************************************/
7715
/*                          isConvex()                                  */
7716
/************************************************************************/
7717
7718
typedef std::vector<XYPair> XYPoly;
7719
7720
// poly must be closed
7721
static bool isConvex(const XYPoly &poly)
7722
0
{
7723
0
    const size_t n = poly.size();
7724
0
    size_t i = 0;
7725
0
    int last_orientation = getOrientation(poly[i], poly[i + 1], poly[i + 2]);
7726
0
    ++i;
7727
0
    for (; i < n - 2; ++i)
7728
0
    {
7729
0
        const int orientation =
7730
0
            getOrientation(poly[i], poly[i + 1], poly[i + 2]);
7731
0
        if (orientation != 0)
7732
0
        {
7733
0
            if (last_orientation == 0)
7734
0
                last_orientation = orientation;
7735
0
            else if (orientation != last_orientation)
7736
0
                return false;
7737
0
        }
7738
0
    }
7739
0
    return true;
7740
0
}
7741
7742
/************************************************************************/
7743
/*                     pointIntersectsConvexPoly()                      */
7744
/************************************************************************/
7745
7746
// Returns whether xy intersects poly, that must be closed and convex.
7747
static bool pointIntersectsConvexPoly(const XYPair &xy, const XYPoly &poly)
7748
0
{
7749
0
    const size_t n = poly.size();
7750
0
    double dx1 = xy.first - poly[0].first;
7751
0
    double dy1 = xy.second - poly[0].second;
7752
0
    double dx2 = poly[1].first - poly[0].first;
7753
0
    double dy2 = poly[1].second - poly[0].second;
7754
0
    double prevCrossProduct = dx1 * dy2 - dx2 * dy1;
7755
7756
    // Check if the point remains on the same side (left/right) of all edges
7757
0
    for (size_t i = 2; i < n; i++)
7758
0
    {
7759
0
        dx1 = xy.first - poly[i - 1].first;
7760
0
        dy1 = xy.second - poly[i - 1].second;
7761
7762
0
        dx2 = poly[i].first - poly[i - 1].first;
7763
0
        dy2 = poly[i].second - poly[i - 1].second;
7764
7765
0
        double crossProduct = dx1 * dy2 - dx2 * dy1;
7766
0
        if (std::abs(prevCrossProduct) < 1e-20)
7767
0
            prevCrossProduct = crossProduct;
7768
0
        else if (prevCrossProduct * crossProduct < 0)
7769
0
            return false;
7770
0
    }
7771
7772
0
    return true;
7773
0
}
7774
7775
/************************************************************************/
7776
/*                     getIntersection()                                */
7777
/************************************************************************/
7778
7779
/* Returns intersection of [p1,p2] with [p3,p4], if
7780
 * it is a single point, and the 2 segments are not colinear.
7781
 */
7782
static bool getIntersection(const XYPair &p1, const XYPair &p2,
7783
                            const XYPair &p3, const XYPair &p4, XYPair &xy)
7784
0
{
7785
0
    const double x1 = p1.first;
7786
0
    const double y1 = p1.second;
7787
0
    const double x2 = p2.first;
7788
0
    const double y2 = p2.second;
7789
0
    const double x3 = p3.first;
7790
0
    const double y3 = p3.second;
7791
0
    const double x4 = p4.first;
7792
0
    const double y4 = p4.second;
7793
0
    const double t_num = (x1 - x3) * (y3 - y4) - (y1 - y3) * (x3 - x4);
7794
0
    const double denom = (x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4);
7795
0
    if (t_num * denom < 0 || std::abs(t_num) > std::abs(denom) || denom == 0)
7796
0
        return false;
7797
7798
0
    const double u_num = (x1 - x3) * (y1 - y2) - (y1 - y3) * (x1 - x2);
7799
0
    if (u_num * denom < 0 || std::abs(u_num) > std::abs(denom))
7800
0
        return false;
7801
7802
0
    const double t = t_num / denom;
7803
0
    xy.first = x1 + t * (x2 - x1);
7804
0
    xy.second = y1 + t * (y2 - y1);
7805
0
    return true;
7806
0
}
7807
7808
/************************************************************************/
7809
/*                     getConvexPolyIntersection()                      */
7810
/************************************************************************/
7811
7812
// poly1 and poly2 must be closed and convex.
7813
// The returned intersection will not necessary be closed.
7814
static void getConvexPolyIntersection(const XYPoly &poly1, const XYPoly &poly2,
7815
                                      XYPoly &intersection)
7816
0
{
7817
0
    intersection.clear();
7818
7819
    // Add all points of poly1 inside poly2
7820
0
    for (size_t i = 0; i < poly1.size() - 1; ++i)
7821
0
    {
7822
0
        if (pointIntersectsConvexPoly(poly1[i], poly2))
7823
0
            intersection.push_back(poly1[i]);
7824
0
    }
7825
0
    if (intersection.size() == poly1.size() - 1)
7826
0
    {
7827
        // poly1 is inside poly2
7828
0
        return;
7829
0
    }
7830
7831
    // Add all points of poly2 inside poly1
7832
0
    for (size_t i = 0; i < poly2.size() - 1; ++i)
7833
0
    {
7834
0
        if (pointIntersectsConvexPoly(poly2[i], poly1))
7835
0
            intersection.push_back(poly2[i]);
7836
0
    }
7837
7838
    // Compute the intersection of all edges of both polygons
7839
0
    XYPair xy;
7840
0
    for (size_t i1 = 0; i1 < poly1.size() - 1; ++i1)
7841
0
    {
7842
0
        for (size_t i2 = 0; i2 < poly2.size() - 1; ++i2)
7843
0
        {
7844
0
            if (getIntersection(poly1[i1], poly1[i1 + 1], poly2[i2],
7845
0
                                poly2[i2 + 1], xy))
7846
0
            {
7847
0
                intersection.push_back(xy);
7848
0
            }
7849
0
        }
7850
0
    }
7851
7852
0
    if (intersection.empty())
7853
0
        return;
7854
7855
    // Find lowest-left point in intersection set
7856
0
    double lowest_x = cpl::NumericLimits<double>::max();
7857
0
    double lowest_y = cpl::NumericLimits<double>::max();
7858
0
    for (const auto &pair : intersection)
7859
0
    {
7860
0
        const double x = pair.first;
7861
0
        const double y = pair.second;
7862
0
        if (y < lowest_y || (y == lowest_y && x < lowest_x))
7863
0
        {
7864
0
            lowest_x = x;
7865
0
            lowest_y = y;
7866
0
        }
7867
0
    }
7868
7869
0
    const auto sortFunc = [&](const XYPair &p1, const XYPair &p2)
7870
0
    {
7871
0
        const double p1x_diff = p1.first - lowest_x;
7872
0
        const double p1y_diff = p1.second - lowest_y;
7873
0
        const double p2x_diff = p2.first - lowest_x;
7874
0
        const double p2y_diff = p2.second - lowest_y;
7875
0
        if (p2y_diff == 0.0 && p1y_diff == 0.0)
7876
0
        {
7877
0
            if (p1x_diff >= 0)
7878
0
            {
7879
0
                if (p2x_diff >= 0)
7880
0
                    return p1.first < p2.first;
7881
0
                return true;
7882
0
            }
7883
0
            else
7884
0
            {
7885
0
                if (p2x_diff >= 0)
7886
0
                    return false;
7887
0
                return p1.first < p2.first;
7888
0
            }
7889
0
        }
7890
7891
0
        if (p2x_diff == 0.0 && p1x_diff == 0.0)
7892
0
            return p1.second < p2.second;
7893
7894
0
        double tan_p1;
7895
0
        if (p1x_diff == 0.0)
7896
0
            tan_p1 = p1y_diff == 0.0 ? 0.0 : cpl::NumericLimits<double>::max();
7897
0
        else
7898
0
            tan_p1 = p1y_diff / p1x_diff;
7899
7900
0
        double tan_p2;
7901
0
        if (p2x_diff == 0.0)
7902
0
            tan_p2 = p2y_diff == 0.0 ? 0.0 : cpl::NumericLimits<double>::max();
7903
0
        else
7904
0
            tan_p2 = p2y_diff / p2x_diff;
7905
7906
0
        if (tan_p1 >= 0)
7907
0
        {
7908
0
            if (tan_p2 >= 0)
7909
0
                return tan_p1 < tan_p2;
7910
0
            else
7911
0
                return true;
7912
0
        }
7913
0
        else
7914
0
        {
7915
0
            if (tan_p2 >= 0)
7916
0
                return false;
7917
0
            else
7918
0
                return tan_p1 < tan_p2;
7919
0
        }
7920
0
    };
7921
7922
    // Sort points by increasing atan2(y-lowest_y, x-lowest_x) to form a convex
7923
    // hull
7924
0
    std::sort(intersection.begin(), intersection.end(), sortFunc);
7925
7926
    // Remove duplicated points
7927
0
    size_t j = 1;
7928
0
    for (size_t i = 1; i < intersection.size(); ++i)
7929
0
    {
7930
0
        if (intersection[i] != intersection[i - 1])
7931
0
        {
7932
0
            if (j < i)
7933
0
                intersection[j] = intersection[i];
7934
0
            ++j;
7935
0
        }
7936
0
    }
7937
0
    intersection.resize(j);
7938
0
}
7939
7940
/************************************************************************/
7941
/*                            getArea()                                 */
7942
/************************************************************************/
7943
7944
// poly may or may not be closed.
7945
static double getArea(const XYPoly &poly)
7946
0
{
7947
    // CPLAssert(poly.size() >= 2);
7948
0
    const size_t nPointCount = poly.size();
7949
0
    double dfAreaSum =
7950
0
        poly[0].first * (poly[1].second - poly[nPointCount - 1].second);
7951
7952
0
    for (size_t i = 1; i < nPointCount - 1; i++)
7953
0
    {
7954
0
        dfAreaSum += poly[i].first * (poly[i + 1].second - poly[i - 1].second);
7955
0
    }
7956
7957
0
    dfAreaSum += poly[nPointCount - 1].first *
7958
0
                 (poly[0].second - poly[nPointCount - 2].second);
7959
7960
0
    return 0.5 * std::fabs(dfAreaSum);
7961
0
}
7962
7963
/************************************************************************/
7964
/*                           GWKSumPreserving()                         */
7965
/************************************************************************/
7966
7967
static void GWKSumPreservingThread(void *pData);
7968
7969
static CPLErr GWKSumPreserving(GDALWarpKernel *poWK)
7970
0
{
7971
0
    return GWKRun(poWK, "GWKSumPreserving", GWKSumPreservingThread);
7972
0
}
7973
7974
static void GWKSumPreservingThread(void *pData)
7975
0
{
7976
0
    GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
7977
0
    GDALWarpKernel *poWK = psJob->poWK;
7978
0
    const int iYMin = psJob->iYMin;
7979
0
    const int iYMax = psJob->iYMax;
7980
0
    const bool bIsAffineNoRotation =
7981
0
        GDALTransformIsAffineNoRotation(poWK->pfnTransformer,
7982
0
                                        poWK->pTransformerArg) &&
7983
        // for debug/testing purposes
7984
0
        CPLTestBool(
7985
0
            CPLGetConfigOption("GDAL_WARP_USE_AFFINE_OPTIMIZATION", "YES"));
7986
7987
0
    const int nDstXSize = poWK->nDstXSize;
7988
0
    const int nSrcXSize = poWK->nSrcXSize;
7989
0
    const int nSrcYSize = poWK->nSrcYSize;
7990
7991
0
    std::vector<double> adfX0(nSrcXSize + 1);
7992
0
    std::vector<double> adfY0(nSrcXSize + 1);
7993
0
    std::vector<double> adfZ0(nSrcXSize + 1);
7994
0
    std::vector<double> adfX1(nSrcXSize + 1);
7995
0
    std::vector<double> adfY1(nSrcXSize + 1);
7996
0
    std::vector<double> adfZ1(nSrcXSize + 1);
7997
0
    std::vector<int> abSuccess0(nSrcXSize + 1);
7998
0
    std::vector<int> abSuccess1(nSrcXSize + 1);
7999
8000
0
    CPLRectObj sGlobalBounds;
8001
0
    sGlobalBounds.minx = -2 * poWK->dfXScale;
8002
0
    sGlobalBounds.miny = iYMin - 2 * poWK->dfYScale;
8003
0
    sGlobalBounds.maxx = nDstXSize + 2 * poWK->dfXScale;
8004
0
    sGlobalBounds.maxy = iYMax + 2 * poWK->dfYScale;
8005
0
    CPLQuadTree *hQuadTree = CPLQuadTreeCreate(&sGlobalBounds, nullptr);
8006
8007
0
    struct SourcePixel
8008
0
    {
8009
0
        int iSrcX;
8010
0
        int iSrcY;
8011
8012
        // Coordinates of source pixel in target pixel coordinates
8013
0
        double dfDstX0;
8014
0
        double dfDstY0;
8015
0
        double dfDstX1;
8016
0
        double dfDstY1;
8017
0
        double dfDstX2;
8018
0
        double dfDstY2;
8019
0
        double dfDstX3;
8020
0
        double dfDstY3;
8021
8022
        // Source pixel total area (might be larger than the one described
8023
        // by above coordinates, if the pixel was crossing the antimeridian
8024
        // and split)
8025
0
        double dfArea;
8026
0
    };
8027
8028
0
    std::vector<SourcePixel> sourcePixels;
8029
8030
0
    XYPoly discontinuityLeft(5);
8031
0
    XYPoly discontinuityRight(5);
8032
8033
    /* ==================================================================== */
8034
    /*      First pass: transform the 4 corners of each potential           */
8035
    /*      contributing source pixel to target pixel coordinates.          */
8036
    /* ==================================================================== */
8037
8038
    // Special case for top line
8039
0
    {
8040
0
        int iY = 0;
8041
0
        for (int iX = 0; iX <= nSrcXSize; ++iX)
8042
0
        {
8043
0
            adfX1[iX] = iX + poWK->nSrcXOff;
8044
0
            adfY1[iX] = iY + poWK->nSrcYOff;
8045
0
            adfZ1[iX] = 0;
8046
0
        }
8047
8048
0
        poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
8049
0
                             adfX1.data(), adfY1.data(), adfZ1.data(),
8050
0
                             abSuccess1.data());
8051
8052
0
        for (int iX = 0; iX <= nSrcXSize; ++iX)
8053
0
        {
8054
0
            if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
8055
0
                abSuccess1[iX] = FALSE;
8056
0
            else
8057
0
            {
8058
0
                adfX1[iX] -= poWK->nDstXOff;
8059
0
                adfY1[iX] -= poWK->nDstYOff;
8060
0
            }
8061
0
        }
8062
0
    }
8063
8064
0
    const auto getInsideXSign = [poWK, nDstXSize](double dfX)
8065
0
    {
8066
0
        return dfX - poWK->nDstXOff >= -2 * poWK->dfXScale &&
8067
0
                       dfX - poWK->nDstXOff <= nDstXSize + 2 * poWK->dfXScale
8068
0
                   ? 1
8069
0
                   : -1;
8070
0
    };
8071
8072
0
    const auto FindDiscontinuity =
8073
0
        [poWK, psJob, getInsideXSign](
8074
0
            double dfXLeft, double dfXRight, double dfY,
8075
0
            int XLeftReprojectedInsideSign, double &dfXMidReprojectedLeft,
8076
0
            double &dfXMidReprojectedRight, double &dfYMidReprojected)
8077
0
    {
8078
0
        for (int i = 0; i < 10 && dfXRight - dfXLeft > 1e-8; ++i)
8079
0
        {
8080
0
            double dfXMid = (dfXLeft + dfXRight) / 2;
8081
0
            double dfXMidReprojected = dfXMid;
8082
0
            dfYMidReprojected = dfY;
8083
0
            double dfZ = 0;
8084
0
            int nSuccess = 0;
8085
0
            poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1,
8086
0
                                 &dfXMidReprojected, &dfYMidReprojected, &dfZ,
8087
0
                                 &nSuccess);
8088
0
            if (XLeftReprojectedInsideSign != getInsideXSign(dfXMidReprojected))
8089
0
            {
8090
0
                dfXRight = dfXMid;
8091
0
                dfXMidReprojectedRight = dfXMidReprojected;
8092
0
            }
8093
0
            else
8094
0
            {
8095
0
                dfXLeft = dfXMid;
8096
0
                dfXMidReprojectedLeft = dfXMidReprojected;
8097
0
            }
8098
0
        }
8099
0
    };
8100
8101
0
    for (int iY = 0; iY < nSrcYSize; ++iY)
8102
0
    {
8103
0
        std::swap(adfX0, adfX1);
8104
0
        std::swap(adfY0, adfY1);
8105
0
        std::swap(adfZ0, adfZ1);
8106
0
        std::swap(abSuccess0, abSuccess1);
8107
8108
0
        for (int iX = 0; iX <= nSrcXSize; ++iX)
8109
0
        {
8110
0
            adfX1[iX] = iX + poWK->nSrcXOff;
8111
0
            adfY1[iX] = iY + 1 + poWK->nSrcYOff;
8112
0
            adfZ1[iX] = 0;
8113
0
        }
8114
8115
0
        poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
8116
0
                             adfX1.data(), adfY1.data(), adfZ1.data(),
8117
0
                             abSuccess1.data());
8118
8119
0
        for (int iX = 0; iX <= nSrcXSize; ++iX)
8120
0
        {
8121
0
            if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
8122
0
                abSuccess1[iX] = FALSE;
8123
0
            else
8124
0
            {
8125
0
                adfX1[iX] -= poWK->nDstXOff;
8126
0
                adfY1[iX] -= poWK->nDstYOff;
8127
0
            }
8128
0
        }
8129
8130
0
        for (int iX = 0; iX < nSrcXSize; ++iX)
8131
0
        {
8132
0
            if (abSuccess0[iX] && abSuccess0[iX + 1] && abSuccess1[iX] &&
8133
0
                abSuccess1[iX + 1])
8134
0
            {
8135
                /* --------------------------------------------------------------------
8136
                 */
8137
                /*      Do not try to apply transparent source pixels to the
8138
                 * destination.*/
8139
                /* --------------------------------------------------------------------
8140
                 */
8141
0
                const auto iSrcOffset =
8142
0
                    iX + static_cast<GPtrDiff_t>(iY) * nSrcXSize;
8143
0
                if (poWK->panUnifiedSrcValid != nullptr &&
8144
0
                    !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
8145
0
                {
8146
0
                    continue;
8147
0
                }
8148
8149
0
                if (poWK->pafUnifiedSrcDensity != nullptr)
8150
0
                {
8151
0
                    if (poWK->pafUnifiedSrcDensity[iSrcOffset] <
8152
0
                        SRC_DENSITY_THRESHOLD)
8153
0
                        continue;
8154
0
                }
8155
8156
0
                SourcePixel sp;
8157
0
                sp.dfArea = 0;
8158
0
                sp.dfDstX0 = adfX0[iX];
8159
0
                sp.dfDstY0 = adfY0[iX];
8160
0
                sp.dfDstX1 = adfX0[iX + 1];
8161
0
                sp.dfDstY1 = adfY0[iX + 1];
8162
0
                sp.dfDstX2 = adfX1[iX + 1];
8163
0
                sp.dfDstY2 = adfY1[iX + 1];
8164
0
                sp.dfDstX3 = adfX1[iX];
8165
0
                sp.dfDstY3 = adfY1[iX];
8166
8167
                // Detect pixel that likely cross the anti-meridian and
8168
                // introduce a discontinuity when reprojected.
8169
8170
0
                if (getInsideXSign(adfX0[iX]) !=
8171
0
                        getInsideXSign(adfX0[iX + 1]) &&
8172
0
                    getInsideXSign(adfX0[iX]) == getInsideXSign(adfX1[iX]) &&
8173
0
                    getInsideXSign(adfX0[iX + 1]) ==
8174
0
                        getInsideXSign(adfX1[iX + 1]) &&
8175
0
                    (adfY1[iX] - adfY0[iX]) * (adfY1[iX + 1] - adfY0[iX + 1]) >
8176
0
                        0)
8177
0
                {
8178
0
                    double dfXMidReprojectedLeftTop = 0;
8179
0
                    double dfXMidReprojectedRightTop = 0;
8180
0
                    double dfYMidReprojectedTop = 0;
8181
0
                    FindDiscontinuity(
8182
0
                        iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
8183
0
                        iY + poWK->nSrcYOff, getInsideXSign(adfX0[iX]),
8184
0
                        dfXMidReprojectedLeftTop, dfXMidReprojectedRightTop,
8185
0
                        dfYMidReprojectedTop);
8186
0
                    double dfXMidReprojectedLeftBottom = 0;
8187
0
                    double dfXMidReprojectedRightBottom = 0;
8188
0
                    double dfYMidReprojectedBottom = 0;
8189
0
                    FindDiscontinuity(
8190
0
                        iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
8191
0
                        iY + poWK->nSrcYOff + 1, getInsideXSign(adfX1[iX]),
8192
0
                        dfXMidReprojectedLeftBottom,
8193
0
                        dfXMidReprojectedRightBottom, dfYMidReprojectedBottom);
8194
8195
0
                    discontinuityLeft[0] = XYPair(adfX0[iX], adfY0[iX]);
8196
0
                    discontinuityLeft[1] =
8197
0
                        XYPair(dfXMidReprojectedLeftTop, dfYMidReprojectedTop);
8198
0
                    discontinuityLeft[2] = XYPair(dfXMidReprojectedLeftBottom,
8199
0
                                                  dfYMidReprojectedBottom);
8200
0
                    discontinuityLeft[3] = XYPair(adfX1[iX], adfY1[iX]);
8201
0
                    discontinuityLeft[4] = XYPair(adfX0[iX], adfY0[iX]);
8202
8203
0
                    discontinuityRight[0] =
8204
0
                        XYPair(adfX0[iX + 1], adfY0[iX + 1]);
8205
0
                    discontinuityRight[1] =
8206
0
                        XYPair(dfXMidReprojectedRightTop, dfYMidReprojectedTop);
8207
0
                    discontinuityRight[2] = XYPair(dfXMidReprojectedRightBottom,
8208
0
                                                   dfYMidReprojectedBottom);
8209
0
                    discontinuityRight[3] =
8210
0
                        XYPair(adfX1[iX + 1], adfY1[iX + 1]);
8211
0
                    discontinuityRight[4] =
8212
0
                        XYPair(adfX0[iX + 1], adfY0[iX + 1]);
8213
8214
0
                    sp.dfArea = getArea(discontinuityLeft) +
8215
0
                                getArea(discontinuityRight);
8216
0
                    if (getInsideXSign(adfX0[iX]) >= 1)
8217
0
                    {
8218
0
                        sp.dfDstX1 = dfXMidReprojectedLeftTop;
8219
0
                        sp.dfDstY1 = dfYMidReprojectedTop;
8220
0
                        sp.dfDstX2 = dfXMidReprojectedLeftBottom;
8221
0
                        sp.dfDstY2 = dfYMidReprojectedBottom;
8222
0
                    }
8223
0
                    else
8224
0
                    {
8225
0
                        sp.dfDstX0 = dfXMidReprojectedRightTop;
8226
0
                        sp.dfDstY0 = dfYMidReprojectedTop;
8227
0
                        sp.dfDstX3 = dfXMidReprojectedRightBottom;
8228
0
                        sp.dfDstY3 = dfYMidReprojectedBottom;
8229
0
                    }
8230
0
                }
8231
8232
                // Bounding box of source pixel (expressed in target pixel
8233
                // coordinates)
8234
0
                CPLRectObj sRect;
8235
0
                sRect.minx = std::min(std::min(sp.dfDstX0, sp.dfDstX1),
8236
0
                                      std::min(sp.dfDstX2, sp.dfDstX3));
8237
0
                sRect.miny = std::min(std::min(sp.dfDstY0, sp.dfDstY1),
8238
0
                                      std::min(sp.dfDstY2, sp.dfDstY3));
8239
0
                sRect.maxx = std::max(std::max(sp.dfDstX0, sp.dfDstX1),
8240
0
                                      std::max(sp.dfDstX2, sp.dfDstX3));
8241
0
                sRect.maxy = std::max(std::max(sp.dfDstY0, sp.dfDstY1),
8242
0
                                      std::max(sp.dfDstY2, sp.dfDstY3));
8243
0
                if (!(sRect.minx < nDstXSize && sRect.maxx > 0 &&
8244
0
                      sRect.miny < iYMax && sRect.maxy > iYMin))
8245
0
                {
8246
0
                    continue;
8247
0
                }
8248
8249
0
                sp.iSrcX = iX;
8250
0
                sp.iSrcY = iY;
8251
8252
0
                if (!bIsAffineNoRotation)
8253
0
                {
8254
                    // Check polygon validity (no self-crossing)
8255
0
                    XYPair xy;
8256
0
                    if (getIntersection(XYPair(sp.dfDstX0, sp.dfDstY0),
8257
0
                                        XYPair(sp.dfDstX1, sp.dfDstY1),
8258
0
                                        XYPair(sp.dfDstX2, sp.dfDstY2),
8259
0
                                        XYPair(sp.dfDstX3, sp.dfDstY3), xy) ||
8260
0
                        getIntersection(XYPair(sp.dfDstX1, sp.dfDstY1),
8261
0
                                        XYPair(sp.dfDstX2, sp.dfDstY2),
8262
0
                                        XYPair(sp.dfDstX0, sp.dfDstY0),
8263
0
                                        XYPair(sp.dfDstX3, sp.dfDstY3), xy))
8264
0
                    {
8265
0
                        continue;
8266
0
                    }
8267
0
                }
8268
8269
0
                CPLQuadTreeInsertWithBounds(
8270
0
                    hQuadTree,
8271
0
                    reinterpret_cast<void *>(
8272
0
                        static_cast<uintptr_t>(sourcePixels.size())),
8273
0
                    &sRect);
8274
8275
0
                sourcePixels.push_back(sp);
8276
0
            }
8277
0
        }
8278
0
    }
8279
8280
0
    std::vector<double> adfRealValue(poWK->nBands);
8281
0
    std::vector<double> adfImagValue(poWK->nBands);
8282
0
    std::vector<double> adfBandDensity(poWK->nBands);
8283
0
    std::vector<double> adfWeight(poWK->nBands);
8284
8285
#ifdef CHECK_SUM_WITH_GEOS
8286
    auto hGEOSContext = OGRGeometry::createGEOSContext();
8287
    auto seq1 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
8288
    GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 0, 0.0, 0.0);
8289
    GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 1, 1.0, 0.0);
8290
    GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 2, 1.0, 1.0);
8291
    GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 3, 0.0, 1.0);
8292
    GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 4, 0.0, 0.0);
8293
    auto hLR1 = GEOSGeom_createLinearRing_r(hGEOSContext, seq1);
8294
    auto hP1 = GEOSGeom_createPolygon_r(hGEOSContext, hLR1, nullptr, 0);
8295
8296
    auto seq2 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
8297
    auto hLR2 = GEOSGeom_createLinearRing_r(hGEOSContext, seq2);
8298
    auto hP2 = GEOSGeom_createPolygon_r(hGEOSContext, hLR2, nullptr, 0);
8299
#endif
8300
8301
0
    const XYPoly xy1{
8302
0
        {0.0, 0.0}, {1.0, 0.0}, {1.0, 1.0}, {0.0, 1.0}, {0.0, 0.0}};
8303
0
    XYPoly xy2(5);
8304
0
    XYPoly xy2_triangle(4);
8305
0
    XYPoly intersection;
8306
8307
    /* ==================================================================== */
8308
    /*      Loop over output lines.                                         */
8309
    /* ==================================================================== */
8310
0
    for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
8311
0
    {
8312
0
        CPLRectObj sRect;
8313
0
        sRect.miny = iDstY;
8314
0
        sRect.maxy = iDstY + 1;
8315
8316
        /* ====================================================================
8317
         */
8318
        /*      Loop over pixels in output scanline. */
8319
        /* ====================================================================
8320
         */
8321
0
        for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
8322
0
        {
8323
0
            sRect.minx = iDstX;
8324
0
            sRect.maxx = iDstX + 1;
8325
0
            int nSourcePixels = 0;
8326
0
            void **pahSourcePixel =
8327
0
                CPLQuadTreeSearch(hQuadTree, &sRect, &nSourcePixels);
8328
0
            if (nSourcePixels == 0)
8329
0
            {
8330
0
                CPLFree(pahSourcePixel);
8331
0
                continue;
8332
0
            }
8333
8334
0
            std::fill(adfRealValue.begin(), adfRealValue.end(), 0);
8335
0
            std::fill(adfImagValue.begin(), adfImagValue.end(), 0);
8336
0
            std::fill(adfBandDensity.begin(), adfBandDensity.end(), 0);
8337
0
            std::fill(adfWeight.begin(), adfWeight.end(), 0);
8338
0
            double dfDensity = 0;
8339
0
            double dfTotalWeight = 0;
8340
8341
            /* ====================================================================
8342
             */
8343
            /*          Iterate over each contributing source pixel to add its
8344
             */
8345
            /*          value weighed by the ratio of the area of its
8346
             * intersection  */
8347
            /*          with the target pixel divided by the area of the source
8348
             */
8349
            /*          pixel. */
8350
            /* ====================================================================
8351
             */
8352
0
            for (int i = 0; i < nSourcePixels; ++i)
8353
0
            {
8354
0
                const int iSourcePixel = static_cast<int>(
8355
0
                    reinterpret_cast<uintptr_t>(pahSourcePixel[i]));
8356
0
                auto &sp = sourcePixels[iSourcePixel];
8357
8358
0
                double dfWeight = 0.0;
8359
0
                if (bIsAffineNoRotation)
8360
0
                {
8361
                    // Optimization since the source pixel is a rectangle in
8362
                    // target pixel coordinates
8363
0
                    double dfSrcMinX = std::min(sp.dfDstX0, sp.dfDstX2);
8364
0
                    double dfSrcMaxX = std::max(sp.dfDstX0, sp.dfDstX2);
8365
0
                    double dfSrcMinY = std::min(sp.dfDstY0, sp.dfDstY2);
8366
0
                    double dfSrcMaxY = std::max(sp.dfDstY0, sp.dfDstY2);
8367
0
                    double dfIntersMinX = std::max<double>(dfSrcMinX, iDstX);
8368
0
                    double dfIntersMaxX = std::min(dfSrcMaxX, iDstX + 1.0);
8369
0
                    double dfIntersMinY = std::max<double>(dfSrcMinY, iDstY);
8370
0
                    double dfIntersMaxY = std::min(dfSrcMaxY, iDstY + 1.0);
8371
0
                    dfWeight =
8372
0
                        ((dfIntersMaxX - dfIntersMinX) *
8373
0
                         (dfIntersMaxY - dfIntersMinY)) /
8374
0
                        ((dfSrcMaxX - dfSrcMinX) * (dfSrcMaxY - dfSrcMinY));
8375
0
                }
8376
0
                else
8377
0
                {
8378
                    // Compute the polygon of the source pixel in target pixel
8379
                    // coordinates, and shifted to the target pixel (unit square
8380
                    // coordinates)
8381
8382
0
                    xy2[0] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
8383
0
                    xy2[1] = {sp.dfDstX1 - iDstX, sp.dfDstY1 - iDstY};
8384
0
                    xy2[2] = {sp.dfDstX2 - iDstX, sp.dfDstY2 - iDstY};
8385
0
                    xy2[3] = {sp.dfDstX3 - iDstX, sp.dfDstY3 - iDstY};
8386
0
                    xy2[4] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
8387
8388
0
                    if (isConvex(xy2))
8389
0
                    {
8390
0
                        getConvexPolyIntersection(xy1, xy2, intersection);
8391
0
                        if (intersection.size() >= 3)
8392
0
                        {
8393
0
                            dfWeight = getArea(intersection);
8394
0
                        }
8395
0
                    }
8396
0
                    else
8397
0
                    {
8398
                        // Split xy2 into 2 triangles.
8399
0
                        xy2_triangle[0] = xy2[0];
8400
0
                        xy2_triangle[1] = xy2[1];
8401
0
                        xy2_triangle[2] = xy2[2];
8402
0
                        xy2_triangle[3] = xy2[0];
8403
0
                        getConvexPolyIntersection(xy1, xy2_triangle,
8404
0
                                                  intersection);
8405
0
                        if (intersection.size() >= 3)
8406
0
                        {
8407
0
                            dfWeight = getArea(intersection);
8408
0
                        }
8409
8410
0
                        xy2_triangle[1] = xy2[2];
8411
0
                        xy2_triangle[2] = xy2[3];
8412
0
                        getConvexPolyIntersection(xy1, xy2_triangle,
8413
0
                                                  intersection);
8414
0
                        if (intersection.size() >= 3)
8415
0
                        {
8416
0
                            dfWeight += getArea(intersection);
8417
0
                        }
8418
0
                    }
8419
0
                    if (dfWeight > 0.0)
8420
0
                    {
8421
0
                        if (sp.dfArea == 0)
8422
0
                            sp.dfArea = getArea(xy2);
8423
0
                        dfWeight /= sp.dfArea;
8424
0
                    }
8425
8426
#ifdef CHECK_SUM_WITH_GEOS
8427
                    GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 0,
8428
                                         sp.dfDstX0 - iDstX,
8429
                                         sp.dfDstY0 - iDstY);
8430
                    GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 1,
8431
                                         sp.dfDstX1 - iDstX,
8432
                                         sp.dfDstY1 - iDstY);
8433
                    GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 2,
8434
                                         sp.dfDstX2 - iDstX,
8435
                                         sp.dfDstY2 - iDstY);
8436
                    GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 3,
8437
                                         sp.dfDstX3 - iDstX,
8438
                                         sp.dfDstY3 - iDstY);
8439
                    GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 4,
8440
                                         sp.dfDstX0 - iDstX,
8441
                                         sp.dfDstY0 - iDstY);
8442
8443
                    double dfWeightGEOS = 0.0;
8444
                    auto hIntersection =
8445
                        GEOSIntersection_r(hGEOSContext, hP1, hP2);
8446
                    if (hIntersection)
8447
                    {
8448
                        double dfIntersArea = 0.0;
8449
                        if (GEOSArea_r(hGEOSContext, hIntersection,
8450
                                       &dfIntersArea) &&
8451
                            dfIntersArea > 0)
8452
                        {
8453
                            double dfSourceArea = 0.0;
8454
                            if (GEOSArea_r(hGEOSContext, hP2, &dfSourceArea))
8455
                            {
8456
                                dfWeightGEOS = dfIntersArea / dfSourceArea;
8457
                            }
8458
                        }
8459
                        GEOSGeom_destroy_r(hGEOSContext, hIntersection);
8460
                    }
8461
                    if (fabs(dfWeight - dfWeightGEOS) > 1e-5 * dfWeightGEOS)
8462
                    {
8463
                        /* ok */ printf("dfWeight=%f dfWeightGEOS=%f\n",
8464
                                        dfWeight, dfWeightGEOS);
8465
                        printf("xy2: ");  // ok
8466
                        for (const auto &xy : xy2)
8467
                            printf("[%f, %f], ", xy.first, xy.second);  // ok
8468
                        printf("\n");                                   // ok
8469
                        printf("intersection: ");                       // ok
8470
                        for (const auto &xy : intersection)
8471
                            printf("[%f, %f], ", xy.first, xy.second);  // ok
8472
                        printf("\n");                                   // ok
8473
                    }
8474
#endif
8475
0
                }
8476
0
                if (dfWeight > 0.0)
8477
0
                {
8478
0
                    const GPtrDiff_t iSrcOffset =
8479
0
                        sp.iSrcX +
8480
0
                        static_cast<GPtrDiff_t>(sp.iSrcY) * nSrcXSize;
8481
0
                    dfTotalWeight += dfWeight;
8482
8483
0
                    if (poWK->pafUnifiedSrcDensity != nullptr)
8484
0
                    {
8485
0
                        dfDensity +=
8486
0
                            dfWeight * poWK->pafUnifiedSrcDensity[iSrcOffset];
8487
0
                    }
8488
0
                    else
8489
0
                    {
8490
0
                        dfDensity += dfWeight;
8491
0
                    }
8492
8493
0
                    for (int iBand = 0; iBand < poWK->nBands; ++iBand)
8494
0
                    {
8495
                        // Returns pixel value if it is not no data.
8496
0
                        double dfBandDensity;
8497
0
                        double dfRealValue;
8498
0
                        double dfImagValue;
8499
0
                        if (!(GWKGetPixelValue(poWK, iBand, iSrcOffset,
8500
0
                                               &dfBandDensity, &dfRealValue,
8501
0
                                               &dfImagValue) &&
8502
0
                              dfBandDensity > BAND_DENSITY_THRESHOLD))
8503
0
                        {
8504
0
                            continue;
8505
0
                        }
8506
8507
0
                        adfRealValue[iBand] += dfRealValue * dfWeight;
8508
0
                        adfImagValue[iBand] += dfImagValue * dfWeight;
8509
0
                        adfBandDensity[iBand] += dfBandDensity * dfWeight;
8510
0
                        adfWeight[iBand] += dfWeight;
8511
0
                    }
8512
0
                }
8513
0
            }
8514
8515
0
            CPLFree(pahSourcePixel);
8516
8517
            /* --------------------------------------------------------------------
8518
             */
8519
            /*          Update destination pixel value. */
8520
            /* --------------------------------------------------------------------
8521
             */
8522
0
            bool bHasFoundDensity = false;
8523
0
            const GPtrDiff_t iDstOffset =
8524
0
                iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
8525
0
            for (int iBand = 0; iBand < poWK->nBands; ++iBand)
8526
0
            {
8527
0
                if (adfWeight[iBand] > 0)
8528
0
                {
8529
0
                    const double dfBandDensity =
8530
0
                        adfBandDensity[iBand] / adfWeight[iBand];
8531
0
                    if (dfBandDensity > BAND_DENSITY_THRESHOLD)
8532
0
                    {
8533
0
                        bHasFoundDensity = true;
8534
0
                        GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
8535
0
                                         adfRealValue[iBand],
8536
0
                                         adfImagValue[iBand]);
8537
0
                    }
8538
0
                }
8539
0
            }
8540
8541
0
            if (!bHasFoundDensity)
8542
0
                continue;
8543
8544
            /* --------------------------------------------------------------------
8545
             */
8546
            /*          Update destination density/validity masks. */
8547
            /* --------------------------------------------------------------------
8548
             */
8549
0
            GWKOverlayDensity(poWK, iDstOffset, dfDensity / dfTotalWeight);
8550
8551
0
            if (poWK->panDstValid != nullptr)
8552
0
            {
8553
0
                CPLMaskSet(poWK->panDstValid, iDstOffset);
8554
0
            }
8555
0
        }
8556
8557
        /* --------------------------------------------------------------------
8558
         */
8559
        /*      Report progress to the user, and optionally cancel out. */
8560
        /* --------------------------------------------------------------------
8561
         */
8562
0
        if (psJob->pfnProgress && psJob->pfnProgress(psJob))
8563
0
            break;
8564
0
    }
8565
8566
#ifdef CHECK_SUM_WITH_GEOS
8567
    GEOSGeom_destroy_r(hGEOSContext, hP1);
8568
    GEOSGeom_destroy_r(hGEOSContext, hP2);
8569
    OGRGeometry::freeGEOSContext(hGEOSContext);
8570
#endif
8571
0
    CPLQuadTreeDestroy(hQuadTree);
8572
0
}