Coverage Report

Created: 2025-11-16 06:25

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/gdal/alg/gdalwarpkernel.cpp
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Project:  High Performance Image Reprojector
4
 * Purpose:  Implementation of the GDALWarpKernel class.  Implements the actual
5
 *           image warping for a "chunk" of input and output imagery already
6
 *           loaded into memory.
7
 * Author:   Frank Warmerdam, warmerdam@pobox.com
8
 *
9
 ******************************************************************************
10
 * Copyright (c) 2003, Frank Warmerdam <warmerdam@pobox.com>
11
 * Copyright (c) 2008-2013, Even Rouault <even dot rouault at spatialys.com>
12
 *
13
 * SPDX-License-Identifier: MIT
14
 ****************************************************************************/
15
16
#include "cpl_port.h"
17
#include "gdalwarper.h"
18
19
#include <cfloat>
20
#include <cmath>
21
#include <cstddef>
22
#include <cstdlib>
23
#include <cstring>
24
25
#include <algorithm>
26
#include <limits>
27
#include <mutex>
28
#include <new>
29
#include <utility>
30
#include <vector>
31
32
#include "cpl_atomic_ops.h"
33
#include "cpl_conv.h"
34
#include "cpl_error.h"
35
#include "cpl_float.h"
36
#include "cpl_mask.h"
37
#include "cpl_multiproc.h"
38
#include "cpl_progress.h"
39
#include "cpl_string.h"
40
#include "cpl_vsi.h"
41
#include "cpl_worker_thread_pool.h"
42
#include "cpl_quad_tree.h"
43
#include "gdal.h"
44
#include "gdal_alg.h"
45
#include "gdal_alg_priv.h"
46
#include "gdal_thread_pool.h"
47
#include "gdalresamplingkernels.h"
48
49
// #define CHECK_SUM_WITH_GEOS
50
#ifdef CHECK_SUM_WITH_GEOS
51
#include "ogr_geometry.h"
52
#include "ogr_geos.h"
53
#endif
54
55
#ifdef USE_NEON_OPTIMIZATIONS
56
#include "include_sse2neon.h"
57
#define USE_SSE2
58
59
#include "gdalsse_priv.h"
60
61
// We restrict to 64bit processors because they are guaranteed to have SSE2.
62
// Could possibly be used too on 32bit, but we would need to check at runtime.
63
#elif defined(__x86_64) || defined(_M_X64)
64
#define USE_SSE2
65
66
#include "gdalsse_priv.h"
67
68
#if __SSE4_1__
69
#include <smmintrin.h>
70
#endif
71
72
#if __SSE3__
73
#include <pmmintrin.h>
74
#endif
75
76
#endif
77
78
constexpr double BAND_DENSITY_THRESHOLD = 0.0000000001;
79
constexpr float SRC_DENSITY_THRESHOLD_FLOAT = 0.000000001f;
80
constexpr double SRC_DENSITY_THRESHOLD_DOUBLE = 0.000000001;
81
82
// #define INSTANTIATE_FLOAT64_SSE2_IMPL
83
84
static const int anGWKFilterRadius[] = {
85
    0,  // Nearest neighbour
86
    1,  // Bilinear
87
    2,  // Cubic Convolution (Catmull-Rom)
88
    2,  // Cubic B-Spline
89
    3,  // Lanczos windowed sinc
90
    0,  // Average
91
    0,  // Mode
92
    0,  // Reserved GRA_Gauss=7
93
    0,  // Max
94
    0,  // Min
95
    0,  // Med
96
    0,  // Q1
97
    0,  // Q3
98
    0,  // Sum
99
    0,  // RMS
100
};
101
102
static double GWKBilinear(double dfX);
103
static double GWKCubic(double dfX);
104
static double GWKBSpline(double dfX);
105
static double GWKLanczosSinc(double dfX);
106
107
static const FilterFuncType apfGWKFilter[] = {
108
    nullptr,         // Nearest neighbour
109
    GWKBilinear,     // Bilinear
110
    GWKCubic,        // Cubic Convolution (Catmull-Rom)
111
    GWKBSpline,      // Cubic B-Spline
112
    GWKLanczosSinc,  // Lanczos windowed sinc
113
    nullptr,         // Average
114
    nullptr,         // Mode
115
    nullptr,         // Reserved GRA_Gauss=7
116
    nullptr,         // Max
117
    nullptr,         // Min
118
    nullptr,         // Med
119
    nullptr,         // Q1
120
    nullptr,         // Q3
121
    nullptr,         // Sum
122
    nullptr,         // RMS
123
};
124
125
// TODO(schwehr): Can we make these functions have a const * const arg?
126
static double GWKBilinear4Values(double *padfVals);
127
static double GWKCubic4Values(double *padfVals);
128
static double GWKBSpline4Values(double *padfVals);
129
static double GWKLanczosSinc4Values(double *padfVals);
130
131
static const FilterFunc4ValuesType apfGWKFilter4Values[] = {
132
    nullptr,                // Nearest neighbour
133
    GWKBilinear4Values,     // Bilinear
134
    GWKCubic4Values,        // Cubic Convolution (Catmull-Rom)
135
    GWKBSpline4Values,      // Cubic B-Spline
136
    GWKLanczosSinc4Values,  // Lanczos windowed sinc
137
    nullptr,                // Average
138
    nullptr,                // Mode
139
    nullptr,                // Reserved GRA_Gauss=7
140
    nullptr,                // Max
141
    nullptr,                // Min
142
    nullptr,                // Med
143
    nullptr,                // Q1
144
    nullptr,                // Q3
145
    nullptr,                // Sum
146
    nullptr,                // RMS
147
};
148
149
int GWKGetFilterRadius(GDALResampleAlg eResampleAlg)
150
0
{
151
0
    static_assert(CPL_ARRAYSIZE(anGWKFilterRadius) == GRA_LAST_VALUE + 1,
152
0
                  "Bad size of anGWKFilterRadius");
153
0
    return anGWKFilterRadius[eResampleAlg];
154
0
}
155
156
FilterFuncType GWKGetFilterFunc(GDALResampleAlg eResampleAlg)
157
0
{
158
0
    static_assert(CPL_ARRAYSIZE(apfGWKFilter) == GRA_LAST_VALUE + 1,
159
0
                  "Bad size of apfGWKFilter");
160
0
    return apfGWKFilter[eResampleAlg];
161
0
}
162
163
FilterFunc4ValuesType GWKGetFilterFunc4Values(GDALResampleAlg eResampleAlg)
164
0
{
165
0
    static_assert(CPL_ARRAYSIZE(apfGWKFilter4Values) == GRA_LAST_VALUE + 1,
166
0
                  "Bad size of apfGWKFilter4Values");
167
0
    return apfGWKFilter4Values[eResampleAlg];
168
0
}
169
170
static CPLErr GWKGeneralCase(GDALWarpKernel *);
171
static CPLErr GWKRealCase(GDALWarpKernel *poWK);
172
static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
173
static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
174
static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
175
static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
176
#ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
177
static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
178
#endif
179
static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
180
static CPLErr GWKNearestByte(GDALWarpKernel *poWK);
181
static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
182
static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
183
static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
184
#ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
185
static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
186
#endif
187
static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
188
static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
189
static CPLErr GWKNearestShort(GDALWarpKernel *poWK);
190
static CPLErr GWKNearestUnsignedShort(GDALWarpKernel *poWK);
191
static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
192
static CPLErr GWKNearestFloat(GDALWarpKernel *poWK);
193
static CPLErr GWKAverageOrMode(GDALWarpKernel *);
194
static CPLErr GWKSumPreserving(GDALWarpKernel *);
195
static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
196
static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
197
static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
198
199
/************************************************************************/
200
/*                           GWKJobStruct                               */
201
/************************************************************************/
202
203
struct GWKJobStruct
204
{
205
    std::mutex &mutex;
206
    std::condition_variable &cv;
207
    int counterSingleThreaded = 0;
208
    int &counter;
209
    bool &stopFlag;
210
    GDALWarpKernel *poWK = nullptr;
211
    int iYMin = 0;
212
    int iYMax = 0;
213
    int (*pfnProgress)(GWKJobStruct *psJob) = nullptr;
214
    void *pTransformerArg = nullptr;
215
    // used by GWKRun() to assign the proper pTransformerArg
216
    void (*pfnFunc)(void *) = nullptr;
217
218
    GWKJobStruct(std::mutex &mutex_, std::condition_variable &cv_,
219
                 int &counter_, bool &stopFlag_)
220
0
        : mutex(mutex_), cv(cv_), counter(counter_), stopFlag(stopFlag_)
221
0
    {
222
0
    }
223
};
224
225
struct GWKThreadData
226
{
227
    std::unique_ptr<CPLJobQueue> poJobQueue{};
228
    std::unique_ptr<std::vector<GWKJobStruct>> threadJobs{};
229
    int nMaxThreads{0};
230
    int counter{0};
231
    bool stopFlag{false};
232
    std::mutex mutex{};
233
    std::condition_variable cv{};
234
    bool bTransformerArgInputAssignedToThread{false};
235
    void *pTransformerArgInput{
236
        nullptr};  // owned by calling layer. Not to be destroyed
237
    std::map<GIntBig, void *> mapThreadToTransformerArg{};
238
    int nTotalThreadCountForThisRun = 0;
239
    int nCurThreadCountForThisRun = 0;
240
};
241
242
/************************************************************************/
243
/*                        GWKProgressThread()                           */
244
/************************************************************************/
245
246
// Return TRUE if the computation must be interrupted.
247
static int GWKProgressThread(GWKJobStruct *psJob)
248
0
{
249
0
    bool stop = false;
250
0
    {
251
0
        std::lock_guard<std::mutex> lock(psJob->mutex);
252
0
        psJob->counter++;
253
0
        stop = psJob->stopFlag;
254
0
    }
255
0
    psJob->cv.notify_one();
256
257
0
    return stop;
258
0
}
259
260
/************************************************************************/
261
/*                      GWKProgressMonoThread()                         */
262
/************************************************************************/
263
264
// Return TRUE if the computation must be interrupted.
265
static int GWKProgressMonoThread(GWKJobStruct *psJob)
266
0
{
267
0
    GDALWarpKernel *poWK = psJob->poWK;
268
0
    if (!poWK->pfnProgress(poWK->dfProgressBase +
269
0
                               poWK->dfProgressScale *
270
0
                                   (++psJob->counterSingleThreaded /
271
0
                                    static_cast<double>(psJob->iYMax)),
272
0
                           "", poWK->pProgress))
273
0
    {
274
0
        CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
275
0
        psJob->stopFlag = true;
276
0
        return TRUE;
277
0
    }
278
0
    return FALSE;
279
0
}
280
281
/************************************************************************/
282
/*                       GWKGenericMonoThread()                         */
283
/************************************************************************/
284
285
static CPLErr GWKGenericMonoThread(GDALWarpKernel *poWK,
286
                                   void (*pfnFunc)(void *pUserData))
287
0
{
288
0
    GWKThreadData td;
289
290
    // NOTE: the mutex is not used.
291
0
    GWKJobStruct job(td.mutex, td.cv, td.counter, td.stopFlag);
292
0
    job.poWK = poWK;
293
0
    job.iYMin = 0;
294
0
    job.iYMax = poWK->nDstYSize;
295
0
    job.pfnProgress = GWKProgressMonoThread;
296
0
    job.pTransformerArg = poWK->pTransformerArg;
297
0
    job.counterSingleThreaded = td.counter;
298
0
    pfnFunc(&job);
299
0
    td.counter = job.counterSingleThreaded;
300
301
0
    return td.stopFlag ? CE_Failure : CE_None;
302
0
}
303
304
/************************************************************************/
305
/*                          GWKThreadsCreate()                          */
306
/************************************************************************/
307
308
void *GWKThreadsCreate(char **papszWarpOptions,
309
                       GDALTransformerFunc /* pfnTransformer */,
310
                       void *pTransformerArg)
311
0
{
312
0
    const char *pszWarpThreads =
313
0
        CSLFetchNameValue(papszWarpOptions, "NUM_THREADS");
314
0
    if (pszWarpThreads == nullptr)
315
0
        pszWarpThreads = CPLGetConfigOption("GDAL_NUM_THREADS", "1");
316
317
0
    int nThreads = 0;
318
0
    if (EQUAL(pszWarpThreads, "ALL_CPUS"))
319
0
        nThreads = CPLGetNumCPUs();
320
0
    else
321
0
        nThreads = atoi(pszWarpThreads);
322
0
    if (nThreads <= 1)
323
0
        nThreads = 0;
324
0
    if (nThreads > 128)
325
0
        nThreads = 128;
326
327
0
    GWKThreadData *psThreadData = new GWKThreadData();
328
0
    auto poThreadPool =
329
0
        nThreads > 0 ? GDALGetGlobalThreadPool(nThreads) : nullptr;
330
0
    if (nThreads && poThreadPool)
331
0
    {
332
0
        psThreadData->nMaxThreads = nThreads;
333
0
        psThreadData->threadJobs.reset(new std::vector<GWKJobStruct>(
334
0
            nThreads,
335
0
            GWKJobStruct(psThreadData->mutex, psThreadData->cv,
336
0
                         psThreadData->counter, psThreadData->stopFlag)));
337
338
0
        psThreadData->poJobQueue = poThreadPool->CreateJobQueue();
339
0
        psThreadData->pTransformerArgInput = pTransformerArg;
340
0
    }
341
342
0
    return psThreadData;
343
0
}
344
345
/************************************************************************/
346
/*                             GWKThreadsEnd()                          */
347
/************************************************************************/
348
349
void GWKThreadsEnd(void *psThreadDataIn)
350
0
{
351
0
    if (psThreadDataIn == nullptr)
352
0
        return;
353
354
0
    GWKThreadData *psThreadData = static_cast<GWKThreadData *>(psThreadDataIn);
355
0
    if (psThreadData->poJobQueue)
356
0
    {
357
        // cppcheck-suppress constVariableReference
358
0
        for (auto &pair : psThreadData->mapThreadToTransformerArg)
359
0
        {
360
0
            CPLAssert(pair.second != psThreadData->pTransformerArgInput);
361
0
            GDALDestroyTransformer(pair.second);
362
0
        }
363
0
        psThreadData->poJobQueue.reset();
364
0
    }
365
0
    delete psThreadData;
366
0
}
367
368
/************************************************************************/
369
/*                         ThreadFuncAdapter()                          */
370
/************************************************************************/
371
372
static void ThreadFuncAdapter(void *pData)
373
0
{
374
0
    GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
375
0
    GWKThreadData *psThreadData =
376
0
        static_cast<GWKThreadData *>(psJob->poWK->psThreadData);
377
378
    // Look if we have already a per-thread transformer
379
0
    void *pTransformerArg = nullptr;
380
0
    const GIntBig nThreadId = CPLGetPID();
381
382
0
    {
383
0
        std::lock_guard<std::mutex> lock(psThreadData->mutex);
384
0
        ++psThreadData->nCurThreadCountForThisRun;
385
386
0
        auto oIter = psThreadData->mapThreadToTransformerArg.find(nThreadId);
387
0
        if (oIter != psThreadData->mapThreadToTransformerArg.end())
388
0
        {
389
0
            pTransformerArg = oIter->second;
390
0
        }
391
0
        else if (!psThreadData->bTransformerArgInputAssignedToThread &&
392
0
                 psThreadData->nCurThreadCountForThisRun ==
393
0
                     psThreadData->nTotalThreadCountForThisRun)
394
0
        {
395
            // If we are the last thread to be started, temporarily borrow the
396
            // original transformer
397
0
            psThreadData->bTransformerArgInputAssignedToThread = true;
398
0
            pTransformerArg = psThreadData->pTransformerArgInput;
399
0
            psThreadData->mapThreadToTransformerArg[nThreadId] =
400
0
                pTransformerArg;
401
0
        }
402
403
0
        if (pTransformerArg == nullptr)
404
0
        {
405
0
            CPLAssert(psThreadData->pTransformerArgInput != nullptr);
406
0
            CPLAssert(!psThreadData->bTransformerArgInputAssignedToThread);
407
0
        }
408
0
    }
409
410
    // If no transformer assigned to current thread, instantiate one
411
0
    if (pTransformerArg == nullptr)
412
0
    {
413
        // This somehow assumes that GDALCloneTransformer() is thread-safe
414
        // which should normally be the case.
415
0
        pTransformerArg =
416
0
            GDALCloneTransformer(psThreadData->pTransformerArgInput);
417
418
        // Lock for the stop flag and the transformer map.
419
0
        std::lock_guard<std::mutex> lock(psThreadData->mutex);
420
0
        if (!pTransformerArg)
421
0
        {
422
0
            psJob->stopFlag = true;
423
0
            return;
424
0
        }
425
0
        psThreadData->mapThreadToTransformerArg[nThreadId] = pTransformerArg;
426
0
    }
427
428
0
    psJob->pTransformerArg = pTransformerArg;
429
0
    psJob->pfnFunc(pData);
430
431
    // Give back original transformer, if borrowed.
432
0
    {
433
0
        std::lock_guard<std::mutex> lock(psThreadData->mutex);
434
0
        if (psThreadData->bTransformerArgInputAssignedToThread &&
435
0
            pTransformerArg == psThreadData->pTransformerArgInput)
436
0
        {
437
0
            psThreadData->mapThreadToTransformerArg.erase(
438
0
                psThreadData->mapThreadToTransformerArg.find(nThreadId));
439
0
            psThreadData->bTransformerArgInputAssignedToThread = false;
440
0
        }
441
0
    }
442
0
}
443
444
/************************************************************************/
445
/*                                GWKRun()                              */
446
/************************************************************************/
447
448
static CPLErr GWKRun(GDALWarpKernel *poWK, const char *pszFuncName,
449
                     void (*pfnFunc)(void *pUserData))
450
451
0
{
452
0
    const int nDstYSize = poWK->nDstYSize;
453
454
0
    CPLDebug("GDAL",
455
0
             "GDALWarpKernel()::%s() "
456
0
             "Src=%d,%d,%dx%d Dst=%d,%d,%dx%d",
457
0
             pszFuncName, poWK->nSrcXOff, poWK->nSrcYOff, poWK->nSrcXSize,
458
0
             poWK->nSrcYSize, poWK->nDstXOff, poWK->nDstYOff, poWK->nDstXSize,
459
0
             poWK->nDstYSize);
460
461
0
    if (!poWK->pfnProgress(poWK->dfProgressBase, "", poWK->pProgress))
462
0
    {
463
0
        CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
464
0
        return CE_Failure;
465
0
    }
466
467
0
    GWKThreadData *psThreadData =
468
0
        static_cast<GWKThreadData *>(poWK->psThreadData);
469
0
    if (psThreadData == nullptr || psThreadData->poJobQueue == nullptr)
470
0
    {
471
0
        return GWKGenericMonoThread(poWK, pfnFunc);
472
0
    }
473
474
0
    int nThreads = std::min(psThreadData->nMaxThreads, nDstYSize / 2);
475
    // Config option mostly useful for tests to be able to test multithreading
476
    // with small rasters
477
0
    const int nWarpChunkSize =
478
0
        atoi(CPLGetConfigOption("WARP_THREAD_CHUNK_SIZE", "65536"));
479
0
    if (nWarpChunkSize > 0)
480
0
    {
481
0
        GIntBig nChunks =
482
0
            static_cast<GIntBig>(nDstYSize) * poWK->nDstXSize / nWarpChunkSize;
483
0
        if (nThreads > nChunks)
484
0
            nThreads = static_cast<int>(nChunks);
485
0
    }
486
0
    if (nThreads <= 0)
487
0
        nThreads = 1;
488
489
0
    CPLDebug("WARP", "Using %d threads", nThreads);
490
491
0
    auto &jobs = *psThreadData->threadJobs;
492
0
    CPLAssert(static_cast<int>(jobs.size()) >= nThreads);
493
    // Fill-in job structures.
494
0
    for (int i = 0; i < nThreads; ++i)
495
0
    {
496
0
        auto &job = jobs[i];
497
0
        job.poWK = poWK;
498
0
        job.iYMin =
499
0
            static_cast<int>(static_cast<int64_t>(i) * nDstYSize / nThreads);
500
0
        job.iYMax = static_cast<int>(static_cast<int64_t>(i + 1) * nDstYSize /
501
0
                                     nThreads);
502
0
        if (poWK->pfnProgress != GDALDummyProgress)
503
0
            job.pfnProgress = GWKProgressThread;
504
0
        job.pfnFunc = pfnFunc;
505
0
    }
506
507
0
    bool bStopFlag;
508
0
    {
509
0
        std::unique_lock<std::mutex> lock(psThreadData->mutex);
510
511
0
        psThreadData->nTotalThreadCountForThisRun = nThreads;
512
        // coverity[missing_lock]
513
0
        psThreadData->nCurThreadCountForThisRun = 0;
514
515
        // Start jobs.
516
0
        for (int i = 0; i < nThreads; ++i)
517
0
        {
518
0
            auto &job = jobs[i];
519
0
            psThreadData->poJobQueue->SubmitJob(ThreadFuncAdapter,
520
0
                                                static_cast<void *>(&job));
521
0
        }
522
523
        /* --------------------------------------------------------------------
524
         */
525
        /*      Report progress. */
526
        /* --------------------------------------------------------------------
527
         */
528
0
        if (poWK->pfnProgress != GDALDummyProgress)
529
0
        {
530
0
            while (psThreadData->counter < nDstYSize)
531
0
            {
532
0
                psThreadData->cv.wait(lock);
533
0
                if (!poWK->pfnProgress(poWK->dfProgressBase +
534
0
                                           poWK->dfProgressScale *
535
0
                                               (psThreadData->counter /
536
0
                                                static_cast<double>(nDstYSize)),
537
0
                                       "", poWK->pProgress))
538
0
                {
539
0
                    CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
540
0
                    psThreadData->stopFlag = true;
541
0
                    break;
542
0
                }
543
0
            }
544
0
        }
545
546
0
        bStopFlag = psThreadData->stopFlag;
547
0
    }
548
549
    /* -------------------------------------------------------------------- */
550
    /*      Wait for all jobs to complete.                                  */
551
    /* -------------------------------------------------------------------- */
552
0
    psThreadData->poJobQueue->WaitCompletion();
553
554
0
    return bStopFlag ? CE_Failure : CE_None;
555
0
}
556
557
/************************************************************************/
558
/* ==================================================================== */
559
/*                            GDALWarpKernel                            */
560
/* ==================================================================== */
561
/************************************************************************/
562
563
/**
564
 * \class GDALWarpKernel "gdalwarper.h"
565
 *
566
 * Low level image warping class.
567
 *
568
 * This class is responsible for low level image warping for one
569
 * "chunk" of imagery.  The class is essentially a structure with all
570
 * data members public - primarily so that new special-case functions
571
 * can be added without changing the class declaration.
572
 *
573
 * Applications are normally intended to interactive with warping facilities
574
 * through the GDALWarpOperation class, though the GDALWarpKernel can in
575
 * theory be used directly if great care is taken in setting up the
576
 * control data.
577
 *
578
 * <h3>Design Issues</h3>
579
 *
580
 * The intention is that PerformWarp() would analyze the setup in terms
581
 * of the datatype, resampling type, and validity/density mask usage and
582
 * pick one of many specific implementations of the warping algorithm over
583
 * a continuum of optimization vs. generality.  At one end there will be a
584
 * reference general purpose implementation of the algorithm that supports
585
 * any data type (working internally in double precision complex), all three
586
 * resampling types, and any or all of the validity/density masks.  At the
587
 * other end would be highly optimized algorithms for common cases like
588
 * nearest neighbour resampling on GDT_Byte data with no masks.
589
 *
590
 * The full set of optimized versions have not been decided but we should
591
 * expect to have at least:
592
 *  - One for each resampling algorithm for 8bit data with no masks.
593
 *  - One for each resampling algorithm for float data with no masks.
594
 *  - One for each resampling algorithm for float data with any/all masks
595
 *    (essentially the generic case for just float data).
596
 *  - One for each resampling algorithm for 8bit data with support for
597
 *    input validity masks (per band or per pixel).  This handles the common
598
 *    case of nodata masking.
599
 *  - One for each resampling algorithm for float data with support for
600
 *    input validity masks (per band or per pixel).  This handles the common
601
 *    case of nodata masking.
602
 *
603
 * Some of the specializations would operate on all bands in one pass
604
 * (especially the ones without masking would do this), while others might
605
 * process each band individually to reduce code complexity.
606
 *
607
 * <h3>Masking Semantics</h3>
608
 *
609
 * A detailed explanation of the semantics of the validity and density masks,
610
 * and their effects on resampling kernels is needed here.
611
 */
612
613
/************************************************************************/
614
/*                     GDALWarpKernel Data Members                      */
615
/************************************************************************/
616
617
/**
618
 * \var GDALResampleAlg GDALWarpKernel::eResample;
619
 *
620
 * Resampling algorithm.
621
 *
622
 * The resampling algorithm to use.  One of GRA_NearestNeighbour, GRA_Bilinear,
623
 * GRA_Cubic, GRA_CubicSpline, GRA_Lanczos, GRA_Average, GRA_RMS,
624
 * GRA_Mode or GRA_Sum.
625
 *
626
 * This field is required. GDT_NearestNeighbour may be used as a default
627
 * value.
628
 */
629
630
/**
631
 * \var GDALDataType GDALWarpKernel::eWorkingDataType;
632
 *
633
 * Working pixel data type.
634
 *
635
 * The datatype of pixels in the source image (papabySrcimage) and
636
 * destination image (papabyDstImage) buffers.  Note that operations on
637
 * some data types (such as GDT_Byte) may be much better optimized than other
638
 * less common cases.
639
 *
640
 * This field is required.  It may not be GDT_Unknown.
641
 */
642
643
/**
644
 * \var int GDALWarpKernel::nBands;
645
 *
646
 * Number of bands.
647
 *
648
 * The number of bands (layers) of imagery being warped.  Determines the
649
 * number of entries in the papabySrcImage, papanBandSrcValid,
650
 * and papabyDstImage arrays.
651
 *
652
 * This field is required.
653
 */
654
655
/**
656
 * \var int GDALWarpKernel::nSrcXSize;
657
 *
658
 * Source image width in pixels.
659
 *
660
 * This field is required.
661
 */
662
663
/**
664
 * \var int GDALWarpKernel::nSrcYSize;
665
 *
666
 * Source image height in pixels.
667
 *
668
 * This field is required.
669
 */
670
671
/**
672
 * \var double GDALWarpKernel::dfSrcXExtraSize;
673
 *
674
 * Number of pixels included in nSrcXSize that are present on the edges of
675
 * the area of interest to take into account the width of the kernel.
676
 *
677
 * This field is required.
678
 */
679
680
/**
681
 * \var double GDALWarpKernel::dfSrcYExtraSize;
682
 *
683
 * Number of pixels included in nSrcYExtraSize that are present on the edges of
684
 * the area of interest to take into account the height of the kernel.
685
 *
686
 * This field is required.
687
 */
688
689
/**
690
 * \var int GDALWarpKernel::papabySrcImage;
691
 *
692
 * Array of source image band data.
693
 *
694
 * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
695
 * to image data.  Each individual band of image data is organized as a single
696
 * block of image data in left to right, then bottom to top order.  The actual
697
 * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
698
 *
699
 * To access the pixel value for the (x=3, y=4) pixel (zero based) of
700
 * the second band with eWorkingDataType set to GDT_Float32 use code like
701
 * this:
702
 *
703
 * \code
704
 *   float dfPixelValue;
705
 *   int   nBand = 2-1;  // Band indexes are zero based.
706
 *   int   nPixel = 3; // Zero based.
707
 *   int   nLine = 4;  // Zero based.
708
 *
709
 *   assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
710
 *   assert( nLine >= 0 && nLine < poKern->nSrcYSize );
711
 *   assert( nBand >= 0 && nBand < poKern->nBands );
712
 *   dfPixelValue = ((float *) poKern->papabySrcImage[nBand])
713
 *                                  [nPixel + nLine * poKern->nSrcXSize];
714
 * \endcode
715
 *
716
 * This field is required.
717
 */
718
719
/**
720
 * \var GUInt32 **GDALWarpKernel::papanBandSrcValid;
721
 *
722
 * Per band validity mask for source pixels.
723
 *
724
 * Array of pixel validity mask layers for each source band.   Each of
725
 * the mask layers is the same size (in pixels) as the source image with
726
 * one bit per pixel.  Note that it is legal (and common) for this to be
727
 * NULL indicating that none of the pixels are invalidated, or for some
728
 * band validity masks to be NULL in which case all pixels of the band are
729
 * valid.  The following code can be used to test the validity of a particular
730
 * pixel.
731
 *
732
 * \code
733
 *   int   bIsValid = TRUE;
734
 *   int   nBand = 2-1;  // Band indexes are zero based.
735
 *   int   nPixel = 3; // Zero based.
736
 *   int   nLine = 4;  // Zero based.
737
 *
738
 *   assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
739
 *   assert( nLine >= 0 && nLine < poKern->nSrcYSize );
740
 *   assert( nBand >= 0 && nBand < poKern->nBands );
741
 *
742
 *   if( poKern->papanBandSrcValid != NULL
743
 *       && poKern->papanBandSrcValid[nBand] != NULL )
744
 *   {
745
 *       GUInt32 *panBandMask = poKern->papanBandSrcValid[nBand];
746
 *       int    iPixelOffset = nPixel + nLine * poKern->nSrcXSize;
747
 *
748
 *       bIsValid = CPLMaskGet(panBandMask, iPixelOffset)
749
 *   }
750
 * \endcode
751
 */
752
753
/**
754
 * \var GUInt32 *GDALWarpKernel::panUnifiedSrcValid;
755
 *
756
 * Per pixel validity mask for source pixels.
757
 *
758
 * A single validity mask layer that applies to the pixels of all source
759
 * bands.  It is accessed similarly to papanBandSrcValid, but without the
760
 * extra level of band indirection.
761
 *
762
 * This pointer may be NULL indicating that all pixels are valid.
763
 *
764
 * Note that if both panUnifiedSrcValid, and papanBandSrcValid are available,
765
 * the pixel isn't considered to be valid unless both arrays indicate it is
766
 * valid.
767
 */
768
769
/**
770
 * \var float *GDALWarpKernel::pafUnifiedSrcDensity;
771
 *
772
 * Per pixel density mask for source pixels.
773
 *
774
 * A single density mask layer that applies to the pixels of all source
775
 * bands.  It contains values between 0.0 and 1.0 indicating the degree to
776
 * which this pixel should be allowed to contribute to the output result.
777
 *
778
 * This pointer may be NULL indicating that all pixels have a density of 1.0.
779
 *
780
 * The density for a pixel may be accessed like this:
781
 *
782
 * \code
783
 *   float fDensity = 1.0;
784
 *   int nPixel = 3;  // Zero based.
785
 *   int nLine = 4;   // Zero based.
786
 *
787
 *   assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
788
 *   assert( nLine >= 0 && nLine < poKern->nSrcYSize );
789
 *   if( poKern->pafUnifiedSrcDensity != NULL )
790
 *     fDensity = poKern->pafUnifiedSrcDensity
791
 *                                  [nPixel + nLine * poKern->nSrcXSize];
792
 * \endcode
793
 */
794
795
/**
796
 * \var int GDALWarpKernel::nDstXSize;
797
 *
798
 * Width of destination image in pixels.
799
 *
800
 * This field is required.
801
 */
802
803
/**
804
 * \var int GDALWarpKernel::nDstYSize;
805
 *
806
 * Height of destination image in pixels.
807
 *
808
 * This field is required.
809
 */
810
811
/**
812
 * \var GByte **GDALWarpKernel::papabyDstImage;
813
 *
814
 * Array of destination image band data.
815
 *
816
 * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
817
 * to image data.  Each individual band of image data is organized as a single
818
 * block of image data in left to right, then bottom to top order.  The actual
819
 * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
820
 *
821
 * To access the pixel value for the (x=3, y=4) pixel (zero based) of
822
 * the second band with eWorkingDataType set to GDT_Float32 use code like
823
 * this:
824
 *
825
 * \code
826
 *   float dfPixelValue;
827
 *   int   nBand = 2-1;  // Band indexes are zero based.
828
 *   int   nPixel = 3; // Zero based.
829
 *   int   nLine = 4;  // Zero based.
830
 *
831
 *   assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
832
 *   assert( nLine >= 0 && nLine < poKern->nDstYSize );
833
 *   assert( nBand >= 0 && nBand < poKern->nBands );
834
 *   dfPixelValue = ((float *) poKern->papabyDstImage[nBand])
835
 *                                  [nPixel + nLine * poKern->nSrcYSize];
836
 * \endcode
837
 *
838
 * This field is required.
839
 */
840
841
/**
842
 * \var GUInt32 *GDALWarpKernel::panDstValid;
843
 *
844
 * Per pixel validity mask for destination pixels.
845
 *
846
 * A single validity mask layer that applies to the pixels of all destination
847
 * bands.  It is accessed similarly to papanUnitifiedSrcValid, but based
848
 * on the size of the destination image.
849
 *
850
 * This pointer may be NULL indicating that all pixels are valid.
851
 */
852
853
/**
854
 * \var float *GDALWarpKernel::pafDstDensity;
855
 *
856
 * Per pixel density mask for destination pixels.
857
 *
858
 * A single density mask layer that applies to the pixels of all destination
859
 * bands.  It contains values between 0.0 and 1.0.
860
 *
861
 * This pointer may be NULL indicating that all pixels have a density of 1.0.
862
 *
863
 * The density for a pixel may be accessed like this:
864
 *
865
 * \code
866
 *   float fDensity = 1.0;
867
 *   int   nPixel = 3; // Zero based.
868
 *   int   nLine = 4;  // Zero based.
869
 *
870
 *   assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
871
 *   assert( nLine >= 0 && nLine < poKern->nDstYSize );
872
 *   if( poKern->pafDstDensity != NULL )
873
 *     fDensity = poKern->pafDstDensity[nPixel + nLine * poKern->nDstXSize];
874
 * \endcode
875
 */
876
877
/**
878
 * \var int GDALWarpKernel::nSrcXOff;
879
 *
880
 * X offset to source pixel coordinates for transformation.
881
 *
882
 * See pfnTransformer.
883
 *
884
 * This field is required.
885
 */
886
887
/**
888
 * \var int GDALWarpKernel::nSrcYOff;
889
 *
890
 * Y offset to source pixel coordinates for transformation.
891
 *
892
 * See pfnTransformer.
893
 *
894
 * This field is required.
895
 */
896
897
/**
898
 * \var int GDALWarpKernel::nDstXOff;
899
 *
900
 * X offset to destination pixel coordinates for transformation.
901
 *
902
 * See pfnTransformer.
903
 *
904
 * This field is required.
905
 */
906
907
/**
908
 * \var int GDALWarpKernel::nDstYOff;
909
 *
910
 * Y offset to destination pixel coordinates for transformation.
911
 *
912
 * See pfnTransformer.
913
 *
914
 * This field is required.
915
 */
916
917
/**
918
 * \var GDALTransformerFunc GDALWarpKernel::pfnTransformer;
919
 *
920
 * Source/destination location transformer.
921
 *
922
 * The function to call to transform coordinates between source image
923
 * pixel/line coordinates and destination image pixel/line coordinates.
924
 * See GDALTransformerFunc() for details of the semantics of this function.
925
 *
926
 * The GDALWarpKern algorithm will only ever use this transformer in
927
 * "destination to source" mode (bDstToSrc=TRUE), and will always pass
928
 * partial or complete scanlines of points in the destination image as
929
 * input.  This means, among other things, that it is safe to the
930
 * approximating transform GDALApproxTransform() as the transformation
931
 * function.
932
 *
933
 * Source and destination images may be subsets of a larger overall image.
934
 * The transformation algorithms will expect and return pixel/line coordinates
935
 * in terms of this larger image, so coordinates need to be offset by
936
 * the offsets specified in nSrcXOff, nSrcYOff, nDstXOff, and nDstYOff before
937
 * passing to pfnTransformer, and after return from it.
938
 *
939
 * The GDALWarpKernel::pfnTransformerArg value will be passed as the callback
940
 * data to this function when it is called.
941
 *
942
 * This field is required.
943
 */
944
945
/**
946
 * \var void *GDALWarpKernel::pTransformerArg;
947
 *
948
 * Callback data for pfnTransformer.
949
 *
950
 * This field may be NULL if not required for the pfnTransformer being used.
951
 */
952
953
/**
954
 * \var GDALProgressFunc GDALWarpKernel::pfnProgress;
955
 *
956
 * The function to call to report progress of the algorithm, and to check
957
 * for a requested termination of the operation.  It operates according to
958
 * GDALProgressFunc() semantics.
959
 *
960
 * Generally speaking the progress function will be invoked for each
961
 * scanline of the destination buffer that has been processed.
962
 *
963
 * This field may be NULL (internally set to GDALDummyProgress()).
964
 */
965
966
/**
967
 * \var void *GDALWarpKernel::pProgress;
968
 *
969
 * Callback data for pfnProgress.
970
 *
971
 * This field may be NULL if not required for the pfnProgress being used.
972
 */
973
974
/************************************************************************/
975
/*                           GDALWarpKernel()                           */
976
/************************************************************************/
977
978
GDALWarpKernel::GDALWarpKernel()
979
0
    : papszWarpOptions(nullptr), eResample(GRA_NearestNeighbour),
980
0
      eWorkingDataType(GDT_Unknown), nBands(0), nSrcXSize(0), nSrcYSize(0),
981
0
      dfSrcXExtraSize(0.0), dfSrcYExtraSize(0.0), papabySrcImage(nullptr),
982
0
      papanBandSrcValid(nullptr), panUnifiedSrcValid(nullptr),
983
0
      pafUnifiedSrcDensity(nullptr), nDstXSize(0), nDstYSize(0),
984
0
      papabyDstImage(nullptr), panDstValid(nullptr), pafDstDensity(nullptr),
985
0
      dfXScale(1.0), dfYScale(1.0), dfXFilter(0.0), dfYFilter(0.0), nXRadius(0),
986
0
      nYRadius(0), nFiltInitX(0), nFiltInitY(0), nSrcXOff(0), nSrcYOff(0),
987
0
      nDstXOff(0), nDstYOff(0), pfnTransformer(nullptr),
988
0
      pTransformerArg(nullptr), pfnProgress(GDALDummyProgress),
989
0
      pProgress(nullptr), dfProgressBase(0.0), dfProgressScale(1.0),
990
0
      padfDstNoDataReal(nullptr), psThreadData(nullptr),
991
0
      eTieStrategy(GWKTS_First)
992
0
{
993
0
}
994
995
/************************************************************************/
996
/*                          ~GDALWarpKernel()                           */
997
/************************************************************************/
998
999
GDALWarpKernel::~GDALWarpKernel()
1000
0
{
1001
0
}
1002
1003
/************************************************************************/
1004
/*                            PerformWarp()                             */
1005
/************************************************************************/
1006
1007
/**
1008
 * \fn CPLErr GDALWarpKernel::PerformWarp();
1009
 *
1010
 * This method performs the warp described in the GDALWarpKernel.
1011
 *
1012
 * @return CE_None on success or CE_Failure if an error occurs.
1013
 */
1014
1015
CPLErr GDALWarpKernel::PerformWarp()
1016
1017
0
{
1018
0
    const CPLErr eErr = Validate();
1019
1020
0
    if (eErr != CE_None)
1021
0
        return eErr;
1022
1023
    // See #2445 and #3079.
1024
0
    if (nSrcXSize <= 0 || nSrcYSize <= 0)
1025
0
    {
1026
0
        if (!pfnProgress(dfProgressBase + dfProgressScale, "", pProgress))
1027
0
        {
1028
0
            CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
1029
0
            return CE_Failure;
1030
0
        }
1031
0
        return CE_None;
1032
0
    }
1033
1034
    /* -------------------------------------------------------------------- */
1035
    /*      Pre-calculate resampling scales and window sizes for filtering. */
1036
    /* -------------------------------------------------------------------- */
1037
1038
0
    dfXScale = static_cast<double>(nDstXSize) / (nSrcXSize - dfSrcXExtraSize);
1039
0
    dfYScale = static_cast<double>(nDstYSize) / (nSrcYSize - dfSrcYExtraSize);
1040
0
    if (nSrcXSize >= nDstXSize && nSrcXSize <= nDstXSize + dfSrcXExtraSize)
1041
0
        dfXScale = 1.0;
1042
0
    if (nSrcYSize >= nDstYSize && nSrcYSize <= nDstYSize + dfSrcYExtraSize)
1043
0
        dfYScale = 1.0;
1044
0
    if (dfXScale < 1.0)
1045
0
    {
1046
0
        double dfXReciprocalScale = 1.0 / dfXScale;
1047
0
        const int nXReciprocalScale =
1048
0
            static_cast<int>(dfXReciprocalScale + 0.5);
1049
0
        if (fabs(dfXReciprocalScale - nXReciprocalScale) < 0.05)
1050
0
            dfXScale = 1.0 / nXReciprocalScale;
1051
0
    }
1052
0
    if (dfYScale < 1.0)
1053
0
    {
1054
0
        double dfYReciprocalScale = 1.0 / dfYScale;
1055
0
        const int nYReciprocalScale =
1056
0
            static_cast<int>(dfYReciprocalScale + 0.5);
1057
0
        if (fabs(dfYReciprocalScale - nYReciprocalScale) < 0.05)
1058
0
            dfYScale = 1.0 / nYReciprocalScale;
1059
0
    }
1060
1061
    // XSCALE and YSCALE undocumented for now. Can help in some cases.
1062
    // Best would probably be a per-pixel scale computation.
1063
0
    const char *pszXScale = CSLFetchNameValue(papszWarpOptions, "XSCALE");
1064
0
    if (pszXScale != nullptr && !EQUAL(pszXScale, "FROM_GRID_SAMPLING"))
1065
0
        dfXScale = CPLAtof(pszXScale);
1066
0
    const char *pszYScale = CSLFetchNameValue(papszWarpOptions, "YSCALE");
1067
0
    if (pszYScale != nullptr)
1068
0
        dfYScale = CPLAtof(pszYScale);
1069
1070
    // If the xscale is significantly lower than the yscale, this is highly
1071
    // suspicious of a situation of wrapping a very large virtual file in
1072
    // geographic coordinates with left and right parts being close to the
1073
    // antimeridian. In that situation, the xscale computed by the above method
1074
    // is completely wrong. Prefer doing an average of a few sample points
1075
    // instead
1076
0
    if ((dfYScale / dfXScale > 100 ||
1077
0
         (pszXScale != nullptr && EQUAL(pszXScale, "FROM_GRID_SAMPLING"))))
1078
0
    {
1079
        // Sample points along a grid
1080
0
        const int nPointsX = std::min(10, nDstXSize);
1081
0
        const int nPointsY = std::min(10, nDstYSize);
1082
0
        const int nPoints = 3 * nPointsX * nPointsY;
1083
0
        std::vector<double> padfX;
1084
0
        std::vector<double> padfY;
1085
0
        std::vector<double> padfZ(nPoints);
1086
0
        std::vector<int> pabSuccess(nPoints);
1087
0
        for (int iY = 0; iY < nPointsY; iY++)
1088
0
        {
1089
0
            for (int iX = 0; iX < nPointsX; iX++)
1090
0
            {
1091
0
                const double dfX =
1092
0
                    nPointsX == 1
1093
0
                        ? 0.0
1094
0
                        : static_cast<double>(iX) * nDstXSize / (nPointsX - 1);
1095
0
                const double dfY =
1096
0
                    nPointsY == 1
1097
0
                        ? 0.0
1098
0
                        : static_cast<double>(iY) * nDstYSize / (nPointsY - 1);
1099
1100
                // Reproject each destination sample point and its neighbours
1101
                // at (x+1,y) and (x,y+1), so as to get the local scale.
1102
0
                padfX.push_back(dfX);
1103
0
                padfY.push_back(dfY);
1104
1105
0
                padfX.push_back((iX == nPointsX - 1) ? dfX - 1 : dfX + 1);
1106
0
                padfY.push_back(dfY);
1107
1108
0
                padfX.push_back(dfX);
1109
0
                padfY.push_back((iY == nPointsY - 1) ? dfY - 1 : dfY + 1);
1110
0
            }
1111
0
        }
1112
0
        pfnTransformer(pTransformerArg, TRUE, nPoints, &padfX[0], &padfY[0],
1113
0
                       &padfZ[0], &pabSuccess[0]);
1114
1115
        // Compute the xscale at each sampling point
1116
0
        std::vector<double> adfXScales;
1117
0
        for (int i = 0; i < nPoints; i += 3)
1118
0
        {
1119
0
            if (pabSuccess[i] && pabSuccess[i + 1] && pabSuccess[i + 2])
1120
0
            {
1121
0
                const double dfPointXScale =
1122
0
                    1.0 / std::max(std::abs(padfX[i + 1] - padfX[i]),
1123
0
                                   std::abs(padfX[i + 2] - padfX[i]));
1124
0
                adfXScales.push_back(dfPointXScale);
1125
0
            }
1126
0
        }
1127
1128
        // Sort by increasing xcale
1129
0
        std::sort(adfXScales.begin(), adfXScales.end());
1130
1131
0
        if (!adfXScales.empty())
1132
0
        {
1133
            // Compute the average of scales, but eliminate outliers small
1134
            // scales, if some samples are just along the discontinuity.
1135
0
            const double dfMaxPointXScale = adfXScales.back();
1136
0
            double dfSumPointXScale = 0;
1137
0
            int nCountPointScale = 0;
1138
0
            for (double dfPointXScale : adfXScales)
1139
0
            {
1140
0
                if (dfPointXScale > dfMaxPointXScale / 10)
1141
0
                {
1142
0
                    dfSumPointXScale += dfPointXScale;
1143
0
                    nCountPointScale++;
1144
0
                }
1145
0
            }
1146
0
            if (nCountPointScale > 0)  // should always be true
1147
0
            {
1148
0
                const double dfXScaleFromSampling =
1149
0
                    dfSumPointXScale / nCountPointScale;
1150
#if DEBUG_VERBOSE
1151
                CPLDebug("WARP", "Correcting dfXScale from %f to %f", dfXScale,
1152
                         dfXScaleFromSampling);
1153
#endif
1154
0
                dfXScale = dfXScaleFromSampling;
1155
0
            }
1156
0
        }
1157
0
    }
1158
1159
#if DEBUG_VERBOSE
1160
    CPLDebug("WARP", "dfXScale = %f, dfYScale = %f", dfXScale, dfYScale);
1161
#endif
1162
1163
0
    const int bUse4SamplesFormula = dfXScale >= 0.95 && dfYScale >= 0.95;
1164
1165
    // Safety check for callers that would use GDALWarpKernel without using
1166
    // GDALWarpOperation.
1167
0
    if ((eResample == GRA_CubicSpline || eResample == GRA_Lanczos ||
1168
0
         ((eResample == GRA_Cubic || eResample == GRA_Bilinear) &&
1169
0
          !bUse4SamplesFormula)) &&
1170
0
        atoi(CSLFetchNameValueDef(papszWarpOptions, "EXTRA_ELTS", "0")) !=
1171
0
            WARP_EXTRA_ELTS)
1172
0
    {
1173
0
        CPLError(CE_Failure, CPLE_AppDefined,
1174
0
                 "Source arrays must have WARP_EXTRA_ELTS extra elements at "
1175
0
                 "their end. "
1176
0
                 "See GDALWarpKernel class definition. If this condition is "
1177
0
                 "fulfilled, define a EXTRA_ELTS=%d warp options",
1178
0
                 WARP_EXTRA_ELTS);
1179
0
        return CE_Failure;
1180
0
    }
1181
1182
0
    dfXFilter = anGWKFilterRadius[eResample];
1183
0
    dfYFilter = anGWKFilterRadius[eResample];
1184
1185
0
    nXRadius = dfXScale < 1.0 ? static_cast<int>(ceil(dfXFilter / dfXScale))
1186
0
                              : static_cast<int>(dfXFilter);
1187
0
    nYRadius = dfYScale < 1.0 ? static_cast<int>(ceil(dfYFilter / dfYScale))
1188
0
                              : static_cast<int>(dfYFilter);
1189
1190
    // Filter window offset depends on the parity of the kernel radius.
1191
0
    nFiltInitX = ((anGWKFilterRadius[eResample] + 1) % 2) - nXRadius;
1192
0
    nFiltInitY = ((anGWKFilterRadius[eResample] + 1) % 2) - nYRadius;
1193
1194
0
    bApplyVerticalShift =
1195
0
        CPLFetchBool(papszWarpOptions, "APPLY_VERTICAL_SHIFT", false);
1196
0
    dfMultFactorVerticalShift = CPLAtof(CSLFetchNameValueDef(
1197
0
        papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT", "1.0"));
1198
1199
    /* -------------------------------------------------------------------- */
1200
    /*      Set up resampling functions.                                    */
1201
    /* -------------------------------------------------------------------- */
1202
0
    if (CPLFetchBool(papszWarpOptions, "USE_GENERAL_CASE", false))
1203
0
        return GWKGeneralCase(this);
1204
1205
0
    const bool bNoMasksOrDstDensityOnly =
1206
0
        papanBandSrcValid == nullptr && panUnifiedSrcValid == nullptr &&
1207
0
        pafUnifiedSrcDensity == nullptr && panDstValid == nullptr;
1208
1209
0
    if (eWorkingDataType == GDT_Byte && eResample == GRA_NearestNeighbour &&
1210
0
        bNoMasksOrDstDensityOnly)
1211
0
        return GWKNearestNoMasksOrDstDensityOnlyByte(this);
1212
1213
0
    if (eWorkingDataType == GDT_Byte && eResample == GRA_Bilinear &&
1214
0
        bNoMasksOrDstDensityOnly)
1215
0
        return GWKBilinearNoMasksOrDstDensityOnlyByte(this);
1216
1217
0
    if (eWorkingDataType == GDT_Byte && eResample == GRA_Cubic &&
1218
0
        bNoMasksOrDstDensityOnly)
1219
0
        return GWKCubicNoMasksOrDstDensityOnlyByte(this);
1220
1221
0
    if (eWorkingDataType == GDT_Byte && eResample == GRA_CubicSpline &&
1222
0
        bNoMasksOrDstDensityOnly)
1223
0
        return GWKCubicSplineNoMasksOrDstDensityOnlyByte(this);
1224
1225
0
    if (eWorkingDataType == GDT_Byte && eResample == GRA_NearestNeighbour)
1226
0
        return GWKNearestByte(this);
1227
1228
0
    if ((eWorkingDataType == GDT_Int16 || eWorkingDataType == GDT_UInt16) &&
1229
0
        eResample == GRA_NearestNeighbour && bNoMasksOrDstDensityOnly)
1230
0
        return GWKNearestNoMasksOrDstDensityOnlyShort(this);
1231
1232
0
    if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Cubic &&
1233
0
        bNoMasksOrDstDensityOnly)
1234
0
        return GWKCubicNoMasksOrDstDensityOnlyShort(this);
1235
1236
0
    if ((eWorkingDataType == GDT_Int16) && eResample == GRA_CubicSpline &&
1237
0
        bNoMasksOrDstDensityOnly)
1238
0
        return GWKCubicSplineNoMasksOrDstDensityOnlyShort(this);
1239
1240
0
    if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Bilinear &&
1241
0
        bNoMasksOrDstDensityOnly)
1242
0
        return GWKBilinearNoMasksOrDstDensityOnlyShort(this);
1243
1244
0
    if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Cubic &&
1245
0
        bNoMasksOrDstDensityOnly)
1246
0
        return GWKCubicNoMasksOrDstDensityOnlyUShort(this);
1247
1248
0
    if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_CubicSpline &&
1249
0
        bNoMasksOrDstDensityOnly)
1250
0
        return GWKCubicSplineNoMasksOrDstDensityOnlyUShort(this);
1251
1252
0
    if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Bilinear &&
1253
0
        bNoMasksOrDstDensityOnly)
1254
0
        return GWKBilinearNoMasksOrDstDensityOnlyUShort(this);
1255
1256
0
    if (eWorkingDataType == GDT_Int16 && eResample == GRA_NearestNeighbour)
1257
0
        return GWKNearestShort(this);
1258
1259
0
    if (eWorkingDataType == GDT_UInt16 && eResample == GRA_NearestNeighbour)
1260
0
        return GWKNearestUnsignedShort(this);
1261
1262
0
    if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour &&
1263
0
        bNoMasksOrDstDensityOnly)
1264
0
        return GWKNearestNoMasksOrDstDensityOnlyFloat(this);
1265
1266
0
    if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour)
1267
0
        return GWKNearestFloat(this);
1268
1269
0
    if (eWorkingDataType == GDT_Float32 && eResample == GRA_Bilinear &&
1270
0
        bNoMasksOrDstDensityOnly)
1271
0
        return GWKBilinearNoMasksOrDstDensityOnlyFloat(this);
1272
1273
0
    if (eWorkingDataType == GDT_Float32 && eResample == GRA_Cubic &&
1274
0
        bNoMasksOrDstDensityOnly)
1275
0
        return GWKCubicNoMasksOrDstDensityOnlyFloat(this);
1276
1277
#ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
1278
    if (eWorkingDataType == GDT_Float64 && eResample == GRA_Bilinear &&
1279
        bNoMasksOrDstDensityOnly)
1280
        return GWKBilinearNoMasksOrDstDensityOnlyDouble(this);
1281
1282
    if (eWorkingDataType == GDT_Float64 && eResample == GRA_Cubic &&
1283
        bNoMasksOrDstDensityOnly)
1284
        return GWKCubicNoMasksOrDstDensityOnlyDouble(this);
1285
#endif
1286
1287
0
    if (eResample == GRA_Average)
1288
0
        return GWKAverageOrMode(this);
1289
1290
0
    if (eResample == GRA_RMS)
1291
0
        return GWKAverageOrMode(this);
1292
1293
0
    if (eResample == GRA_Mode)
1294
0
        return GWKAverageOrMode(this);
1295
1296
0
    if (eResample == GRA_Max)
1297
0
        return GWKAverageOrMode(this);
1298
1299
0
    if (eResample == GRA_Min)
1300
0
        return GWKAverageOrMode(this);
1301
1302
0
    if (eResample == GRA_Med)
1303
0
        return GWKAverageOrMode(this);
1304
1305
0
    if (eResample == GRA_Q1)
1306
0
        return GWKAverageOrMode(this);
1307
1308
0
    if (eResample == GRA_Q3)
1309
0
        return GWKAverageOrMode(this);
1310
1311
0
    if (eResample == GRA_Sum)
1312
0
        return GWKSumPreserving(this);
1313
1314
0
    if (!GDALDataTypeIsComplex(eWorkingDataType))
1315
0
    {
1316
0
        return GWKRealCase(this);
1317
0
    }
1318
1319
0
    return GWKGeneralCase(this);
1320
0
}
1321
1322
/************************************************************************/
1323
/*                              Validate()                              */
1324
/************************************************************************/
1325
1326
/**
1327
 * \fn CPLErr GDALWarpKernel::Validate()
1328
 *
1329
 * Check the settings in the GDALWarpKernel, and issue a CPLError()
1330
 * (and return CE_Failure) if the configuration is considered to be
1331
 * invalid for some reason.
1332
 *
1333
 * This method will also do some standard defaulting such as setting
1334
 * pfnProgress to GDALDummyProgress() if it is NULL.
1335
 *
1336
 * @return CE_None on success or CE_Failure if an error is detected.
1337
 */
1338
1339
CPLErr GDALWarpKernel::Validate()
1340
1341
0
{
1342
0
    if (static_cast<size_t>(eResample) >=
1343
0
        (sizeof(anGWKFilterRadius) / sizeof(anGWKFilterRadius[0])))
1344
0
    {
1345
0
        CPLError(CE_Failure, CPLE_AppDefined,
1346
0
                 "Unsupported resampling method %d.",
1347
0
                 static_cast<int>(eResample));
1348
0
        return CE_Failure;
1349
0
    }
1350
1351
    // Tuples of values (e.g. "<R>,<G>,<B>" or "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>)") that must
1352
    // be ignored as contributing source pixels during resampling. Only taken into account by
1353
    // Average currently
1354
0
    const char *pszExcludedValues =
1355
0
        CSLFetchNameValue(papszWarpOptions, "EXCLUDED_VALUES");
1356
0
    if (pszExcludedValues)
1357
0
    {
1358
0
        const CPLStringList aosTokens(
1359
0
            CSLTokenizeString2(pszExcludedValues, "(,)", 0));
1360
0
        if ((aosTokens.size() % nBands) != 0)
1361
0
        {
1362
0
            CPLError(CE_Failure, CPLE_AppDefined,
1363
0
                     "EXCLUDED_VALUES should contain one or several tuples of "
1364
0
                     "%d values formatted like <R>,<G>,<B> or "
1365
0
                     "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>) if there are multiple "
1366
0
                     "tuples",
1367
0
                     nBands);
1368
0
            return CE_Failure;
1369
0
        }
1370
0
        std::vector<double> adfTuple;
1371
0
        for (int i = 0; i < aosTokens.size(); ++i)
1372
0
        {
1373
0
            adfTuple.push_back(CPLAtof(aosTokens[i]));
1374
0
            if (((i + 1) % nBands) == 0)
1375
0
            {
1376
0
                m_aadfExcludedValues.push_back(adfTuple);
1377
0
                adfTuple.clear();
1378
0
            }
1379
0
        }
1380
0
    }
1381
1382
0
    return CE_None;
1383
0
}
1384
1385
/************************************************************************/
1386
/*                         GWKOverlayDensity()                          */
1387
/*                                                                      */
1388
/*      Compute the final density for the destination pixel.  This      */
1389
/*      is a function of the overlay density (passed in) and the        */
1390
/*      original density.                                               */
1391
/************************************************************************/
1392
1393
static void GWKOverlayDensity(const GDALWarpKernel *poWK, GPtrDiff_t iDstOffset,
1394
                              double dfDensity)
1395
0
{
1396
0
    if (dfDensity < 0.0001 || poWK->pafDstDensity == nullptr)
1397
0
        return;
1398
1399
0
    poWK->pafDstDensity[iDstOffset] =
1400
0
        1.0f -
1401
0
        (1.0f - float(dfDensity)) * (1.0f - poWK->pafDstDensity[iDstOffset]);
1402
0
}
1403
1404
/************************************************************************/
1405
/*                          GWKRoundValueT()                            */
1406
/************************************************************************/
1407
1408
template <class T, class U, bool is_signed> struct sGWKRoundValueT
1409
{
1410
    static T eval(U);
1411
};
1412
1413
template <class T, class U> struct sGWKRoundValueT<T, U, true> /* signed */
1414
{
1415
    static T eval(U value)
1416
0
    {
1417
0
        return static_cast<T>(floor(value + U(0.5)));
1418
0
    }
Unexecuted instantiation: sGWKRoundValueT<short, double, true>::eval(double)
Unexecuted instantiation: sGWKRoundValueT<int, double, true>::eval(double)
Unexecuted instantiation: sGWKRoundValueT<long, double, true>::eval(double)
Unexecuted instantiation: sGWKRoundValueT<cpl::Float16, double, true>::eval(double)
Unexecuted instantiation: sGWKRoundValueT<double, double, true>::eval(double)
1419
};
1420
1421
template <class T, class U> struct sGWKRoundValueT<T, U, false> /* unsigned */
1422
{
1423
    static T eval(U value)
1424
0
    {
1425
0
        return static_cast<T>(value + U(0.5));
1426
0
    }
Unexecuted instantiation: sGWKRoundValueT<unsigned char, double, false>::eval(double)
Unexecuted instantiation: sGWKRoundValueT<unsigned char, float, false>::eval(float)
Unexecuted instantiation: sGWKRoundValueT<unsigned short, double, false>::eval(double)
Unexecuted instantiation: sGWKRoundValueT<unsigned short, float, false>::eval(float)
Unexecuted instantiation: sGWKRoundValueT<unsigned int, double, false>::eval(double)
Unexecuted instantiation: sGWKRoundValueT<unsigned long, double, false>::eval(double)
1427
};
1428
1429
template <class T, class U> static T GWKRoundValueT(U value)
1430
0
{
1431
0
    return sGWKRoundValueT<T, U, cpl::NumericLimits<T>::is_signed>::eval(value);
1432
0
}
Unexecuted instantiation: gdalwarpkernel.cpp:unsigned char GWKRoundValueT<unsigned char, double>(double)
Unexecuted instantiation: gdalwarpkernel.cpp:unsigned char GWKRoundValueT<unsigned char, float>(float)
Unexecuted instantiation: gdalwarpkernel.cpp:short GWKRoundValueT<short, double>(double)
Unexecuted instantiation: gdalwarpkernel.cpp:unsigned short GWKRoundValueT<unsigned short, double>(double)
Unexecuted instantiation: gdalwarpkernel.cpp:unsigned short GWKRoundValueT<unsigned short, float>(float)
Unexecuted instantiation: gdalwarpkernel.cpp:int GWKRoundValueT<int, double>(double)
Unexecuted instantiation: gdalwarpkernel.cpp:unsigned int GWKRoundValueT<unsigned int, double>(double)
Unexecuted instantiation: gdalwarpkernel.cpp:long GWKRoundValueT<long, double>(double)
Unexecuted instantiation: gdalwarpkernel.cpp:unsigned long GWKRoundValueT<unsigned long, double>(double)
Unexecuted instantiation: gdalwarpkernel.cpp:cpl::Float16 GWKRoundValueT<cpl::Float16, double>(double)
Unexecuted instantiation: gdalwarpkernel.cpp:double GWKRoundValueT<double, double>(double)
1433
1434
template <> float GWKRoundValueT<float, double>(double value)
1435
0
{
1436
0
    return static_cast<float>(value);
1437
0
}
1438
1439
#ifdef notused
1440
template <> double GWKRoundValueT<double, double>(double value)
1441
{
1442
    return value;
1443
}
1444
#endif
1445
1446
/************************************************************************/
1447
/*                            GWKClampValueT()                          */
1448
/************************************************************************/
1449
1450
template <class T, class U> static CPL_INLINE T GWKClampValueT(U value)
1451
0
{
1452
0
    if (value < static_cast<U>(cpl::NumericLimits<T>::min()))
1453
0
        return cpl::NumericLimits<T>::min();
1454
0
    else if (value > static_cast<U>(cpl::NumericLimits<T>::max()))
1455
0
        return cpl::NumericLimits<T>::max();
1456
0
    else
1457
0
        return GWKRoundValueT<T, U>(value);
1458
0
}
Unexecuted instantiation: gdalwarpkernel.cpp:unsigned char GWKClampValueT<unsigned char, double>(double)
Unexecuted instantiation: gdalwarpkernel.cpp:unsigned char GWKClampValueT<unsigned char, float>(float)
Unexecuted instantiation: gdalwarpkernel.cpp:short GWKClampValueT<short, double>(double)
Unexecuted instantiation: gdalwarpkernel.cpp:unsigned short GWKClampValueT<unsigned short, double>(double)
Unexecuted instantiation: gdalwarpkernel.cpp:unsigned short GWKClampValueT<unsigned short, float>(float)
Unexecuted instantiation: gdalwarpkernel.cpp:int GWKClampValueT<int, double>(double)
Unexecuted instantiation: gdalwarpkernel.cpp:unsigned int GWKClampValueT<unsigned int, double>(double)
Unexecuted instantiation: gdalwarpkernel.cpp:long GWKClampValueT<long, double>(double)
Unexecuted instantiation: gdalwarpkernel.cpp:unsigned long GWKClampValueT<unsigned long, double>(double)
Unexecuted instantiation: gdalwarpkernel.cpp:cpl::Float16 GWKClampValueT<cpl::Float16, double>(double)
Unexecuted instantiation: gdalwarpkernel.cpp:double GWKClampValueT<double, double>(double)
1459
1460
template <> float GWKClampValueT<float, double>(double dfValue)
1461
0
{
1462
0
    return static_cast<float>(dfValue);
1463
0
}
1464
1465
#ifdef notused
1466
template <> double GWKClampValueT<double, double>(double dfValue)
1467
{
1468
    return dfValue;
1469
}
1470
#endif
1471
1472
/************************************************************************/
1473
/*                             AvoidNoData()                            */
1474
/************************************************************************/
1475
1476
template <class T>
1477
inline void AvoidNoData(const GDALWarpKernel *poWK, int iBand,
1478
                        GPtrDiff_t iDstOffset)
1479
0
{
1480
0
    GByte *pabyDst = poWK->papabyDstImage[iBand];
1481
0
    T *pDst = reinterpret_cast<T *>(pabyDst);
1482
1483
0
    if (poWK->padfDstNoDataReal != nullptr &&
1484
0
        poWK->padfDstNoDataReal[iBand] == static_cast<double>(pDst[iDstOffset]))
1485
0
    {
1486
        if constexpr (cpl::NumericLimits<T>::is_integer)
1487
0
        {
1488
0
            if (pDst[iDstOffset] ==
1489
0
                static_cast<T>(cpl::NumericLimits<T>::lowest()))
1490
0
            {
1491
0
                pDst[iDstOffset] =
1492
0
                    static_cast<T>(cpl::NumericLimits<T>::lowest() + 1);
1493
0
            }
1494
0
            else
1495
0
                pDst[iDstOffset]--;
1496
        }
1497
        else
1498
0
        {
1499
0
            if (pDst[iDstOffset] == cpl::NumericLimits<T>::max())
1500
0
            {
1501
0
                using std::nextafter;
1502
0
                pDst[iDstOffset] =
1503
0
                    nextafter(pDst[iDstOffset], static_cast<T>(0));
1504
0
            }
1505
0
            else
1506
0
            {
1507
0
                using std::nextafter;
1508
0
                pDst[iDstOffset] =
1509
0
                    nextafter(pDst[iDstOffset], cpl::NumericLimits<T>::max());
1510
0
            }
1511
0
        }
1512
1513
0
        if (!poWK->bWarnedAboutDstNoDataReplacement)
1514
0
        {
1515
0
            const_cast<GDALWarpKernel *>(poWK)
1516
0
                ->bWarnedAboutDstNoDataReplacement = true;
1517
0
            CPLError(CE_Warning, CPLE_AppDefined,
1518
0
                     "Value %g in the source dataset has been changed to %g "
1519
0
                     "in the destination dataset to avoid being treated as "
1520
0
                     "NoData. To avoid this, select a different NoData value "
1521
0
                     "for the destination dataset.",
1522
0
                     poWK->padfDstNoDataReal[iBand],
1523
0
                     static_cast<double>(pDst[iDstOffset]));
1524
0
        }
1525
0
    }
1526
0
}
Unexecuted instantiation: void AvoidNoData<unsigned char>(GDALWarpKernel const*, int, long long)
Unexecuted instantiation: void AvoidNoData<signed char>(GDALWarpKernel const*, int, long long)
Unexecuted instantiation: void AvoidNoData<short>(GDALWarpKernel const*, int, long long)
Unexecuted instantiation: void AvoidNoData<unsigned short>(GDALWarpKernel const*, int, long long)
Unexecuted instantiation: void AvoidNoData<unsigned int>(GDALWarpKernel const*, int, long long)
Unexecuted instantiation: void AvoidNoData<int>(GDALWarpKernel const*, int, long long)
Unexecuted instantiation: void AvoidNoData<unsigned long>(GDALWarpKernel const*, int, long long)
Unexecuted instantiation: void AvoidNoData<long>(GDALWarpKernel const*, int, long long)
Unexecuted instantiation: void AvoidNoData<cpl::Float16>(GDALWarpKernel const*, int, long long)
Unexecuted instantiation: void AvoidNoData<float>(GDALWarpKernel const*, int, long long)
Unexecuted instantiation: void AvoidNoData<double>(GDALWarpKernel const*, int, long long)
1527
1528
/************************************************************************/
1529
/*                         GWKSetPixelValueRealT()                      */
1530
/************************************************************************/
1531
1532
template <class T>
1533
static bool GWKSetPixelValueRealT(const GDALWarpKernel *poWK, int iBand,
1534
                                  GPtrDiff_t iDstOffset, double dfDensity,
1535
                                  T value)
1536
0
{
1537
0
    T *pDst = reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
1538
1539
    /* -------------------------------------------------------------------- */
1540
    /*      If the source density is less than 100% we need to fetch the    */
1541
    /*      existing destination value, and mix it with the source to       */
1542
    /*      get the new "to apply" value.  Also compute composite           */
1543
    /*      density.                                                        */
1544
    /*                                                                      */
1545
    /*      We avoid mixing if density is very near one or risk mixing      */
1546
    /*      in very extreme nodata values and causing odd results (#1610)   */
1547
    /* -------------------------------------------------------------------- */
1548
0
    if (dfDensity < 0.9999)
1549
0
    {
1550
0
        if (dfDensity < 0.0001)
1551
0
            return true;
1552
1553
0
        double dfDstDensity = 1.0;
1554
1555
0
        if (poWK->pafDstDensity != nullptr)
1556
0
            dfDstDensity = double(poWK->pafDstDensity[iDstOffset]);
1557
0
        else if (poWK->panDstValid != nullptr &&
1558
0
                 !CPLMaskGet(poWK->panDstValid, iDstOffset))
1559
0
            dfDstDensity = 0.0;
1560
1561
        // It seems like we also ought to be testing panDstValid[] here!
1562
1563
0
        const double dfDstReal = static_cast<double>(pDst[iDstOffset]);
1564
1565
        // The destination density is really only relative to the portion
1566
        // not occluded by the overlay.
1567
0
        const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
1568
1569
0
        const double dfReal =
1570
0
            (double(value) * dfDensity + dfDstReal * dfDstInfluence) /
1571
0
            (dfDensity + dfDstInfluence);
1572
1573
        /* --------------------------------------------------------------------
1574
         */
1575
        /*      Actually apply the destination value. */
1576
        /*                                                                      */
1577
        /*      Avoid using the destination nodata value for integer datatypes
1578
         */
1579
        /*      if by chance it is equal to the computed pixel value. */
1580
        /* --------------------------------------------------------------------
1581
         */
1582
0
        pDst[iDstOffset] = GWKClampValueT<T>(dfReal);
1583
0
    }
1584
0
    else
1585
0
    {
1586
0
        pDst[iDstOffset] = value;
1587
0
    }
1588
1589
0
    AvoidNoData<T>(poWK, iBand, iDstOffset);
1590
1591
0
    return true;
1592
0
}
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKSetPixelValueRealT<unsigned char>(GDALWarpKernel const*, int, long long, double, unsigned char)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKSetPixelValueRealT<short>(GDALWarpKernel const*, int, long long, double, short)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKSetPixelValueRealT<unsigned short>(GDALWarpKernel const*, int, long long, double, unsigned short)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKSetPixelValueRealT<float>(GDALWarpKernel const*, int, long long, double, float)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKSetPixelValueRealT<int>(GDALWarpKernel const*, int, long long, double, int)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKSetPixelValueRealT<unsigned int>(GDALWarpKernel const*, int, long long, double, unsigned int)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKSetPixelValueRealT<long>(GDALWarpKernel const*, int, long long, double, long)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKSetPixelValueRealT<unsigned long>(GDALWarpKernel const*, int, long long, double, unsigned long)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKSetPixelValueRealT<cpl::Float16>(GDALWarpKernel const*, int, long long, double, cpl::Float16)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKSetPixelValueRealT<double>(GDALWarpKernel const*, int, long long, double, double)
1593
1594
/************************************************************************/
1595
/*                       ClampRoundAndAvoidNoData()                     */
1596
/************************************************************************/
1597
1598
template <class T>
1599
inline void ClampRoundAndAvoidNoData(const GDALWarpKernel *poWK, int iBand,
1600
                                     GPtrDiff_t iDstOffset, double dfReal)
1601
0
{
1602
0
    GByte *pabyDst = poWK->papabyDstImage[iBand];
1603
0
    T *pDst = reinterpret_cast<T *>(pabyDst);
1604
1605
    if constexpr (cpl::NumericLimits<T>::is_integer)
1606
0
    {
1607
0
        using std::floor;
1608
0
        if (dfReal < static_cast<double>(cpl::NumericLimits<T>::lowest()))
1609
0
            pDst[iDstOffset] = static_cast<T>(cpl::NumericLimits<T>::lowest());
1610
0
        else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
1611
0
            pDst[iDstOffset] = static_cast<T>(cpl::NumericLimits<T>::max());
1612
        else if constexpr (cpl::NumericLimits<T>::is_signed)
1613
0
            pDst[iDstOffset] = static_cast<T>(floor(dfReal + 0.5));
1614
        else
1615
0
            pDst[iDstOffset] = static_cast<T>(dfReal + 0.5);
1616
    }
1617
    else
1618
0
    {
1619
0
        pDst[iDstOffset] = static_cast<T>(dfReal);
1620
0
    }
1621
1622
0
    AvoidNoData<T>(poWK, iBand, iDstOffset);
1623
0
}
Unexecuted instantiation: void ClampRoundAndAvoidNoData<unsigned char>(GDALWarpKernel const*, int, long long, double)
Unexecuted instantiation: void ClampRoundAndAvoidNoData<signed char>(GDALWarpKernel const*, int, long long, double)
Unexecuted instantiation: void ClampRoundAndAvoidNoData<short>(GDALWarpKernel const*, int, long long, double)
Unexecuted instantiation: void ClampRoundAndAvoidNoData<unsigned short>(GDALWarpKernel const*, int, long long, double)
Unexecuted instantiation: void ClampRoundAndAvoidNoData<unsigned int>(GDALWarpKernel const*, int, long long, double)
Unexecuted instantiation: void ClampRoundAndAvoidNoData<int>(GDALWarpKernel const*, int, long long, double)
Unexecuted instantiation: void ClampRoundAndAvoidNoData<unsigned long>(GDALWarpKernel const*, int, long long, double)
Unexecuted instantiation: void ClampRoundAndAvoidNoData<long>(GDALWarpKernel const*, int, long long, double)
Unexecuted instantiation: void ClampRoundAndAvoidNoData<cpl::Float16>(GDALWarpKernel const*, int, long long, double)
Unexecuted instantiation: void ClampRoundAndAvoidNoData<float>(GDALWarpKernel const*, int, long long, double)
Unexecuted instantiation: void ClampRoundAndAvoidNoData<double>(GDALWarpKernel const*, int, long long, double)
1624
1625
/************************************************************************/
1626
/*                          GWKSetPixelValue()                          */
1627
/************************************************************************/
1628
1629
static bool GWKSetPixelValue(const GDALWarpKernel *poWK, int iBand,
1630
                             GPtrDiff_t iDstOffset, double dfDensity,
1631
                             double dfReal, double dfImag)
1632
1633
0
{
1634
0
    GByte *pabyDst = poWK->papabyDstImage[iBand];
1635
1636
    /* -------------------------------------------------------------------- */
1637
    /*      If the source density is less than 100% we need to fetch the    */
1638
    /*      existing destination value, and mix it with the source to       */
1639
    /*      get the new "to apply" value.  Also compute composite           */
1640
    /*      density.                                                        */
1641
    /*                                                                      */
1642
    /*      We avoid mixing if density is very near one or risk mixing      */
1643
    /*      in very extreme nodata values and causing odd results (#1610)   */
1644
    /* -------------------------------------------------------------------- */
1645
0
    if (dfDensity < 0.9999)
1646
0
    {
1647
0
        if (dfDensity < 0.0001)
1648
0
            return true;
1649
1650
0
        double dfDstDensity = 1.0;
1651
0
        if (poWK->pafDstDensity != nullptr)
1652
0
            dfDstDensity = double(poWK->pafDstDensity[iDstOffset]);
1653
0
        else if (poWK->panDstValid != nullptr &&
1654
0
                 !CPLMaskGet(poWK->panDstValid, iDstOffset))
1655
0
            dfDstDensity = 0.0;
1656
1657
0
        double dfDstReal = 0.0;
1658
0
        double dfDstImag = 0.0;
1659
        // It seems like we also ought to be testing panDstValid[] here!
1660
1661
        // TODO(schwehr): Factor out this repreated type of set.
1662
0
        switch (poWK->eWorkingDataType)
1663
0
        {
1664
0
            case GDT_Byte:
1665
0
                dfDstReal = pabyDst[iDstOffset];
1666
0
                dfDstImag = 0.0;
1667
0
                break;
1668
1669
0
            case GDT_Int8:
1670
0
                dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
1671
0
                dfDstImag = 0.0;
1672
0
                break;
1673
1674
0
            case GDT_Int16:
1675
0
                dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
1676
0
                dfDstImag = 0.0;
1677
0
                break;
1678
1679
0
            case GDT_UInt16:
1680
0
                dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
1681
0
                dfDstImag = 0.0;
1682
0
                break;
1683
1684
0
            case GDT_Int32:
1685
0
                dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
1686
0
                dfDstImag = 0.0;
1687
0
                break;
1688
1689
0
            case GDT_UInt32:
1690
0
                dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
1691
0
                dfDstImag = 0.0;
1692
0
                break;
1693
1694
0
            case GDT_Int64:
1695
0
                dfDstReal = static_cast<double>(
1696
0
                    reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
1697
0
                dfDstImag = 0.0;
1698
0
                break;
1699
1700
0
            case GDT_UInt64:
1701
0
                dfDstReal = static_cast<double>(
1702
0
                    reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
1703
0
                dfDstImag = 0.0;
1704
0
                break;
1705
1706
0
            case GDT_Float16:
1707
0
                dfDstReal = reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset];
1708
0
                dfDstImag = 0.0;
1709
0
                break;
1710
1711
0
            case GDT_Float32:
1712
0
                dfDstReal =
1713
0
                    double(reinterpret_cast<float *>(pabyDst)[iDstOffset]);
1714
0
                dfDstImag = 0.0;
1715
0
                break;
1716
1717
0
            case GDT_Float64:
1718
0
                dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
1719
0
                dfDstImag = 0.0;
1720
0
                break;
1721
1722
0
            case GDT_CInt16:
1723
0
                dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2];
1724
0
                dfDstImag =
1725
0
                    reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2 + 1];
1726
0
                break;
1727
1728
0
            case GDT_CInt32:
1729
0
                dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2];
1730
0
                dfDstImag =
1731
0
                    reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2 + 1];
1732
0
                break;
1733
1734
0
            case GDT_CFloat16:
1735
0
                dfDstReal =
1736
0
                    reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2];
1737
0
                dfDstImag =
1738
0
                    reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2 + 1];
1739
0
                break;
1740
1741
0
            case GDT_CFloat32:
1742
0
                dfDstReal =
1743
0
                    double(reinterpret_cast<float *>(pabyDst)[iDstOffset * 2]);
1744
0
                dfDstImag = double(
1745
0
                    reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1]);
1746
0
                break;
1747
1748
0
            case GDT_CFloat64:
1749
0
                dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset * 2];
1750
0
                dfDstImag =
1751
0
                    reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1];
1752
0
                break;
1753
1754
0
            case GDT_Unknown:
1755
0
            case GDT_TypeCount:
1756
0
                CPLAssert(false);
1757
0
                return false;
1758
0
        }
1759
1760
        // The destination density is really only relative to the portion
1761
        // not occluded by the overlay.
1762
0
        const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
1763
1764
0
        dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
1765
0
                 (dfDensity + dfDstInfluence);
1766
1767
0
        dfImag = (dfImag * dfDensity + dfDstImag * dfDstInfluence) /
1768
0
                 (dfDensity + dfDstInfluence);
1769
0
    }
1770
1771
    /* -------------------------------------------------------------------- */
1772
    /*      Actually apply the destination value.                           */
1773
    /*                                                                      */
1774
    /*      Avoid using the destination nodata value for integer datatypes  */
1775
    /*      if by chance it is equal to the computed pixel value.           */
1776
    /* -------------------------------------------------------------------- */
1777
1778
0
    switch (poWK->eWorkingDataType)
1779
0
    {
1780
0
        case GDT_Byte:
1781
0
            ClampRoundAndAvoidNoData<GByte>(poWK, iBand, iDstOffset, dfReal);
1782
0
            break;
1783
1784
0
        case GDT_Int8:
1785
0
            ClampRoundAndAvoidNoData<GInt8>(poWK, iBand, iDstOffset, dfReal);
1786
0
            break;
1787
1788
0
        case GDT_Int16:
1789
0
            ClampRoundAndAvoidNoData<GInt16>(poWK, iBand, iDstOffset, dfReal);
1790
0
            break;
1791
1792
0
        case GDT_UInt16:
1793
0
            ClampRoundAndAvoidNoData<GUInt16>(poWK, iBand, iDstOffset, dfReal);
1794
0
            break;
1795
1796
0
        case GDT_UInt32:
1797
0
            ClampRoundAndAvoidNoData<GUInt32>(poWK, iBand, iDstOffset, dfReal);
1798
0
            break;
1799
1800
0
        case GDT_Int32:
1801
0
            ClampRoundAndAvoidNoData<GInt32>(poWK, iBand, iDstOffset, dfReal);
1802
0
            break;
1803
1804
0
        case GDT_UInt64:
1805
0
            ClampRoundAndAvoidNoData<std::uint64_t>(poWK, iBand, iDstOffset,
1806
0
                                                    dfReal);
1807
0
            break;
1808
1809
0
        case GDT_Int64:
1810
0
            ClampRoundAndAvoidNoData<std::int64_t>(poWK, iBand, iDstOffset,
1811
0
                                                   dfReal);
1812
0
            break;
1813
1814
0
        case GDT_Float16:
1815
0
            ClampRoundAndAvoidNoData<GFloat16>(poWK, iBand, iDstOffset, dfReal);
1816
0
            break;
1817
1818
0
        case GDT_Float32:
1819
0
            ClampRoundAndAvoidNoData<float>(poWK, iBand, iDstOffset, dfReal);
1820
0
            break;
1821
1822
0
        case GDT_Float64:
1823
0
            ClampRoundAndAvoidNoData<double>(poWK, iBand, iDstOffset, dfReal);
1824
0
            break;
1825
1826
0
        case GDT_CInt16:
1827
0
        {
1828
0
            typedef GInt16 T;
1829
0
            if (dfReal < static_cast<double>(cpl::NumericLimits<T>::min()))
1830
0
                reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1831
0
                    cpl::NumericLimits<T>::min();
1832
0
            else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
1833
0
                reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1834
0
                    cpl::NumericLimits<T>::max();
1835
0
            else
1836
0
                reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1837
0
                    static_cast<T>(floor(dfReal + 0.5));
1838
0
            if (dfImag < static_cast<double>(cpl::NumericLimits<T>::min()))
1839
0
                reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1840
0
                    cpl::NumericLimits<T>::min();
1841
0
            else if (dfImag > static_cast<double>(cpl::NumericLimits<T>::max()))
1842
0
                reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1843
0
                    cpl::NumericLimits<T>::max();
1844
0
            else
1845
0
                reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1846
0
                    static_cast<T>(floor(dfImag + 0.5));
1847
0
            break;
1848
0
        }
1849
1850
0
        case GDT_CInt32:
1851
0
        {
1852
0
            typedef GInt32 T;
1853
0
            if (dfReal < static_cast<double>(cpl::NumericLimits<T>::min()))
1854
0
                reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1855
0
                    cpl::NumericLimits<T>::min();
1856
0
            else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
1857
0
                reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1858
0
                    cpl::NumericLimits<T>::max();
1859
0
            else
1860
0
                reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1861
0
                    static_cast<T>(floor(dfReal + 0.5));
1862
0
            if (dfImag < static_cast<double>(cpl::NumericLimits<T>::min()))
1863
0
                reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1864
0
                    cpl::NumericLimits<T>::min();
1865
0
            else if (dfImag > static_cast<double>(cpl::NumericLimits<T>::max()))
1866
0
                reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1867
0
                    cpl::NumericLimits<T>::max();
1868
0
            else
1869
0
                reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1870
0
                    static_cast<T>(floor(dfImag + 0.5));
1871
0
            break;
1872
0
        }
1873
1874
0
        case GDT_CFloat16:
1875
0
            reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2] =
1876
0
                static_cast<GFloat16>(dfReal);
1877
0
            reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2 + 1] =
1878
0
                static_cast<GFloat16>(dfImag);
1879
0
            break;
1880
1881
0
        case GDT_CFloat32:
1882
0
            reinterpret_cast<float *>(pabyDst)[iDstOffset * 2] =
1883
0
                static_cast<float>(dfReal);
1884
0
            reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1] =
1885
0
                static_cast<float>(dfImag);
1886
0
            break;
1887
1888
0
        case GDT_CFloat64:
1889
0
            reinterpret_cast<double *>(pabyDst)[iDstOffset * 2] = dfReal;
1890
0
            reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1] = dfImag;
1891
0
            break;
1892
1893
0
        case GDT_Unknown:
1894
0
        case GDT_TypeCount:
1895
0
            return false;
1896
0
    }
1897
1898
0
    return true;
1899
0
}
1900
1901
/************************************************************************/
1902
/*                       GWKSetPixelValueReal()                         */
1903
/************************************************************************/
1904
1905
static bool GWKSetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
1906
                                 GPtrDiff_t iDstOffset, double dfDensity,
1907
                                 double dfReal)
1908
1909
0
{
1910
0
    GByte *pabyDst = poWK->papabyDstImage[iBand];
1911
1912
    /* -------------------------------------------------------------------- */
1913
    /*      If the source density is less than 100% we need to fetch the    */
1914
    /*      existing destination value, and mix it with the source to       */
1915
    /*      get the new "to apply" value.  Also compute composite           */
1916
    /*      density.                                                        */
1917
    /*                                                                      */
1918
    /*      We avoid mixing if density is very near one or risk mixing      */
1919
    /*      in very extreme nodata values and causing odd results (#1610)   */
1920
    /* -------------------------------------------------------------------- */
1921
0
    if (dfDensity < 0.9999)
1922
0
    {
1923
0
        if (dfDensity < 0.0001)
1924
0
            return true;
1925
1926
0
        double dfDstReal = 0.0;
1927
0
        double dfDstDensity = 1.0;
1928
1929
0
        if (poWK->pafDstDensity != nullptr)
1930
0
            dfDstDensity = double(poWK->pafDstDensity[iDstOffset]);
1931
0
        else if (poWK->panDstValid != nullptr &&
1932
0
                 !CPLMaskGet(poWK->panDstValid, iDstOffset))
1933
0
            dfDstDensity = 0.0;
1934
1935
        // It seems like we also ought to be testing panDstValid[] here!
1936
1937
0
        switch (poWK->eWorkingDataType)
1938
0
        {
1939
0
            case GDT_Byte:
1940
0
                dfDstReal = pabyDst[iDstOffset];
1941
0
                break;
1942
1943
0
            case GDT_Int8:
1944
0
                dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
1945
0
                break;
1946
1947
0
            case GDT_Int16:
1948
0
                dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
1949
0
                break;
1950
1951
0
            case GDT_UInt16:
1952
0
                dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
1953
0
                break;
1954
1955
0
            case GDT_Int32:
1956
0
                dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
1957
0
                break;
1958
1959
0
            case GDT_UInt32:
1960
0
                dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
1961
0
                break;
1962
1963
0
            case GDT_Int64:
1964
0
                dfDstReal = static_cast<double>(
1965
0
                    reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
1966
0
                break;
1967
1968
0
            case GDT_UInt64:
1969
0
                dfDstReal = static_cast<double>(
1970
0
                    reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
1971
0
                break;
1972
1973
0
            case GDT_Float16:
1974
0
                dfDstReal = reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset];
1975
0
                break;
1976
1977
0
            case GDT_Float32:
1978
0
                dfDstReal =
1979
0
                    double(reinterpret_cast<float *>(pabyDst)[iDstOffset]);
1980
0
                break;
1981
1982
0
            case GDT_Float64:
1983
0
                dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
1984
0
                break;
1985
1986
0
            case GDT_CInt16:
1987
0
            case GDT_CInt32:
1988
0
            case GDT_CFloat16:
1989
0
            case GDT_CFloat32:
1990
0
            case GDT_CFloat64:
1991
0
            case GDT_Unknown:
1992
0
            case GDT_TypeCount:
1993
0
                CPLAssert(false);
1994
0
                return false;
1995
0
        }
1996
1997
        // The destination density is really only relative to the portion
1998
        // not occluded by the overlay.
1999
0
        const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
2000
2001
0
        dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
2002
0
                 (dfDensity + dfDstInfluence);
2003
0
    }
2004
2005
    /* -------------------------------------------------------------------- */
2006
    /*      Actually apply the destination value.                           */
2007
    /*                                                                      */
2008
    /*      Avoid using the destination nodata value for integer datatypes  */
2009
    /*      if by chance it is equal to the computed pixel value.           */
2010
    /* -------------------------------------------------------------------- */
2011
2012
0
    switch (poWK->eWorkingDataType)
2013
0
    {
2014
0
        case GDT_Byte:
2015
0
            ClampRoundAndAvoidNoData<GByte>(poWK, iBand, iDstOffset, dfReal);
2016
0
            break;
2017
2018
0
        case GDT_Int8:
2019
0
            ClampRoundAndAvoidNoData<GInt8>(poWK, iBand, iDstOffset, dfReal);
2020
0
            break;
2021
2022
0
        case GDT_Int16:
2023
0
            ClampRoundAndAvoidNoData<GInt16>(poWK, iBand, iDstOffset, dfReal);
2024
0
            break;
2025
2026
0
        case GDT_UInt16:
2027
0
            ClampRoundAndAvoidNoData<GUInt16>(poWK, iBand, iDstOffset, dfReal);
2028
0
            break;
2029
2030
0
        case GDT_UInt32:
2031
0
            ClampRoundAndAvoidNoData<GUInt32>(poWK, iBand, iDstOffset, dfReal);
2032
0
            break;
2033
2034
0
        case GDT_Int32:
2035
0
            ClampRoundAndAvoidNoData<GInt32>(poWK, iBand, iDstOffset, dfReal);
2036
0
            break;
2037
2038
0
        case GDT_UInt64:
2039
0
            ClampRoundAndAvoidNoData<std::uint64_t>(poWK, iBand, iDstOffset,
2040
0
                                                    dfReal);
2041
0
            break;
2042
2043
0
        case GDT_Int64:
2044
0
            ClampRoundAndAvoidNoData<std::int64_t>(poWK, iBand, iDstOffset,
2045
0
                                                   dfReal);
2046
0
            break;
2047
2048
0
        case GDT_Float16:
2049
0
            ClampRoundAndAvoidNoData<GFloat16>(poWK, iBand, iDstOffset, dfReal);
2050
0
            break;
2051
2052
0
        case GDT_Float32:
2053
0
            ClampRoundAndAvoidNoData<float>(poWK, iBand, iDstOffset, dfReal);
2054
0
            break;
2055
2056
0
        case GDT_Float64:
2057
0
            ClampRoundAndAvoidNoData<double>(poWK, iBand, iDstOffset, dfReal);
2058
0
            break;
2059
2060
0
        case GDT_CInt16:
2061
0
        case GDT_CInt32:
2062
0
        case GDT_CFloat16:
2063
0
        case GDT_CFloat32:
2064
0
        case GDT_CFloat64:
2065
0
            return false;
2066
2067
0
        case GDT_Unknown:
2068
0
        case GDT_TypeCount:
2069
0
            CPLAssert(false);
2070
0
            return false;
2071
0
    }
2072
2073
0
    return true;
2074
0
}
2075
2076
/************************************************************************/
2077
/*                          GWKGetPixelValue()                          */
2078
/************************************************************************/
2079
2080
/* It is assumed that panUnifiedSrcValid has been checked before */
2081
2082
static bool GWKGetPixelValue(const GDALWarpKernel *poWK, int iBand,
2083
                             GPtrDiff_t iSrcOffset, double *pdfDensity,
2084
                             double *pdfReal, double *pdfImag)
2085
2086
0
{
2087
0
    GByte *pabySrc = poWK->papabySrcImage[iBand];
2088
2089
0
    if (poWK->papanBandSrcValid != nullptr &&
2090
0
        poWK->papanBandSrcValid[iBand] != nullptr &&
2091
0
        !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
2092
0
    {
2093
0
        *pdfDensity = 0.0;
2094
0
        return false;
2095
0
    }
2096
2097
0
    *pdfReal = 0.0;
2098
0
    *pdfImag = 0.0;
2099
2100
    // TODO(schwehr): Fix casting.
2101
0
    switch (poWK->eWorkingDataType)
2102
0
    {
2103
0
        case GDT_Byte:
2104
0
            *pdfReal = pabySrc[iSrcOffset];
2105
0
            *pdfImag = 0.0;
2106
0
            break;
2107
2108
0
        case GDT_Int8:
2109
0
            *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
2110
0
            *pdfImag = 0.0;
2111
0
            break;
2112
2113
0
        case GDT_Int16:
2114
0
            *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
2115
0
            *pdfImag = 0.0;
2116
0
            break;
2117
2118
0
        case GDT_UInt16:
2119
0
            *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
2120
0
            *pdfImag = 0.0;
2121
0
            break;
2122
2123
0
        case GDT_Int32:
2124
0
            *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
2125
0
            *pdfImag = 0.0;
2126
0
            break;
2127
2128
0
        case GDT_UInt32:
2129
0
            *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
2130
0
            *pdfImag = 0.0;
2131
0
            break;
2132
2133
0
        case GDT_Int64:
2134
0
            *pdfReal = static_cast<double>(
2135
0
                reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
2136
0
            *pdfImag = 0.0;
2137
0
            break;
2138
2139
0
        case GDT_UInt64:
2140
0
            *pdfReal = static_cast<double>(
2141
0
                reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
2142
0
            *pdfImag = 0.0;
2143
0
            break;
2144
2145
0
        case GDT_Float16:
2146
0
            *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset];
2147
0
            *pdfImag = 0.0;
2148
0
            break;
2149
2150
0
        case GDT_Float32:
2151
0
            *pdfReal = double(reinterpret_cast<float *>(pabySrc)[iSrcOffset]);
2152
0
            *pdfImag = 0.0;
2153
0
            break;
2154
2155
0
        case GDT_Float64:
2156
0
            *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
2157
0
            *pdfImag = 0.0;
2158
0
            break;
2159
2160
0
        case GDT_CInt16:
2161
0
            *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2];
2162
0
            *pdfImag = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2 + 1];
2163
0
            break;
2164
2165
0
        case GDT_CInt32:
2166
0
            *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2];
2167
0
            *pdfImag = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2 + 1];
2168
0
            break;
2169
2170
0
        case GDT_CFloat16:
2171
0
            *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset * 2];
2172
0
            *pdfImag =
2173
0
                reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset * 2 + 1];
2174
0
            break;
2175
2176
0
        case GDT_CFloat32:
2177
0
            *pdfReal =
2178
0
                double(reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2]);
2179
0
            *pdfImag =
2180
0
                double(reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2 + 1]);
2181
0
            break;
2182
2183
0
        case GDT_CFloat64:
2184
0
            *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2];
2185
0
            *pdfImag = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2 + 1];
2186
0
            break;
2187
2188
0
        case GDT_Unknown:
2189
0
        case GDT_TypeCount:
2190
0
            CPLAssert(false);
2191
0
            *pdfDensity = 0.0;
2192
0
            return false;
2193
0
    }
2194
2195
0
    if (poWK->pafUnifiedSrcDensity != nullptr)
2196
0
        *pdfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
2197
0
    else
2198
0
        *pdfDensity = 1.0;
2199
2200
0
    return *pdfDensity != 0.0;
2201
0
}
2202
2203
/************************************************************************/
2204
/*                       GWKGetPixelValueReal()                         */
2205
/************************************************************************/
2206
2207
static bool GWKGetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
2208
                                 GPtrDiff_t iSrcOffset, double *pdfDensity,
2209
                                 double *pdfReal)
2210
2211
0
{
2212
0
    GByte *pabySrc = poWK->papabySrcImage[iBand];
2213
2214
0
    if (poWK->papanBandSrcValid != nullptr &&
2215
0
        poWK->papanBandSrcValid[iBand] != nullptr &&
2216
0
        !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
2217
0
    {
2218
0
        *pdfDensity = 0.0;
2219
0
        return false;
2220
0
    }
2221
2222
0
    switch (poWK->eWorkingDataType)
2223
0
    {
2224
0
        case GDT_Byte:
2225
0
            *pdfReal = pabySrc[iSrcOffset];
2226
0
            break;
2227
2228
0
        case GDT_Int8:
2229
0
            *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
2230
0
            break;
2231
2232
0
        case GDT_Int16:
2233
0
            *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
2234
0
            break;
2235
2236
0
        case GDT_UInt16:
2237
0
            *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
2238
0
            break;
2239
2240
0
        case GDT_Int32:
2241
0
            *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
2242
0
            break;
2243
2244
0
        case GDT_UInt32:
2245
0
            *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
2246
0
            break;
2247
2248
0
        case GDT_Int64:
2249
0
            *pdfReal = static_cast<double>(
2250
0
                reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
2251
0
            break;
2252
2253
0
        case GDT_UInt64:
2254
0
            *pdfReal = static_cast<double>(
2255
0
                reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
2256
0
            break;
2257
2258
0
        case GDT_Float16:
2259
0
            *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset];
2260
0
            break;
2261
2262
0
        case GDT_Float32:
2263
0
            *pdfReal = double(reinterpret_cast<float *>(pabySrc)[iSrcOffset]);
2264
0
            break;
2265
2266
0
        case GDT_Float64:
2267
0
            *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
2268
0
            break;
2269
2270
0
        case GDT_CInt16:
2271
0
        case GDT_CInt32:
2272
0
        case GDT_CFloat16:
2273
0
        case GDT_CFloat32:
2274
0
        case GDT_CFloat64:
2275
0
        case GDT_Unknown:
2276
0
        case GDT_TypeCount:
2277
0
            CPLAssert(false);
2278
0
            return false;
2279
0
    }
2280
2281
0
    if (poWK->pafUnifiedSrcDensity != nullptr)
2282
0
        *pdfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
2283
0
    else
2284
0
        *pdfDensity = 1.0;
2285
2286
0
    return *pdfDensity != 0.0;
2287
0
}
2288
2289
/************************************************************************/
2290
/*                          GWKGetPixelRow()                            */
2291
/************************************************************************/
2292
2293
/* It is assumed that adfImag[] is set to 0 by caller code for non-complex */
2294
/* data-types. */
2295
2296
static bool GWKGetPixelRow(const GDALWarpKernel *poWK, int iBand,
2297
                           GPtrDiff_t iSrcOffset, int nHalfSrcLen,
2298
                           double *padfDensity, double adfReal[],
2299
                           double *padfImag)
2300
0
{
2301
    // We know that nSrcLen is even, so we can *always* unroll loops 2x.
2302
0
    const int nSrcLen = nHalfSrcLen * 2;
2303
0
    bool bHasValid = false;
2304
2305
0
    if (padfDensity != nullptr)
2306
0
    {
2307
        // Init the density.
2308
0
        for (int i = 0; i < nSrcLen; i += 2)
2309
0
        {
2310
0
            padfDensity[i] = 1.0;
2311
0
            padfDensity[i + 1] = 1.0;
2312
0
        }
2313
2314
0
        if (poWK->panUnifiedSrcValid != nullptr)
2315
0
        {
2316
0
            for (int i = 0; i < nSrcLen; i += 2)
2317
0
            {
2318
0
                if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i))
2319
0
                    bHasValid = true;
2320
0
                else
2321
0
                    padfDensity[i] = 0.0;
2322
2323
0
                if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i + 1))
2324
0
                    bHasValid = true;
2325
0
                else
2326
0
                    padfDensity[i + 1] = 0.0;
2327
0
            }
2328
2329
            // Reset or fail as needed.
2330
0
            if (bHasValid)
2331
0
                bHasValid = false;
2332
0
            else
2333
0
                return false;
2334
0
        }
2335
2336
0
        if (poWK->papanBandSrcValid != nullptr &&
2337
0
            poWK->papanBandSrcValid[iBand] != nullptr)
2338
0
        {
2339
0
            for (int i = 0; i < nSrcLen; i += 2)
2340
0
            {
2341
0
                if (CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset + i))
2342
0
                    bHasValid = true;
2343
0
                else
2344
0
                    padfDensity[i] = 0.0;
2345
2346
0
                if (CPLMaskGet(poWK->papanBandSrcValid[iBand],
2347
0
                               iSrcOffset + i + 1))
2348
0
                    bHasValid = true;
2349
0
                else
2350
0
                    padfDensity[i + 1] = 0.0;
2351
0
            }
2352
2353
            // Reset or fail as needed.
2354
0
            if (bHasValid)
2355
0
                bHasValid = false;
2356
0
            else
2357
0
                return false;
2358
0
        }
2359
0
    }
2360
2361
    // TODO(schwehr): Fix casting.
2362
    // Fetch data.
2363
0
    switch (poWK->eWorkingDataType)
2364
0
    {
2365
0
        case GDT_Byte:
2366
0
        {
2367
0
            GByte *pSrc =
2368
0
                reinterpret_cast<GByte *>(poWK->papabySrcImage[iBand]);
2369
0
            pSrc += iSrcOffset;
2370
0
            for (int i = 0; i < nSrcLen; i += 2)
2371
0
            {
2372
0
                adfReal[i] = pSrc[i];
2373
0
                adfReal[i + 1] = pSrc[i + 1];
2374
0
            }
2375
0
            break;
2376
0
        }
2377
2378
0
        case GDT_Int8:
2379
0
        {
2380
0
            GInt8 *pSrc =
2381
0
                reinterpret_cast<GInt8 *>(poWK->papabySrcImage[iBand]);
2382
0
            pSrc += iSrcOffset;
2383
0
            for (int i = 0; i < nSrcLen; i += 2)
2384
0
            {
2385
0
                adfReal[i] = pSrc[i];
2386
0
                adfReal[i + 1] = pSrc[i + 1];
2387
0
            }
2388
0
            break;
2389
0
        }
2390
2391
0
        case GDT_Int16:
2392
0
        {
2393
0
            GInt16 *pSrc =
2394
0
                reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
2395
0
            pSrc += iSrcOffset;
2396
0
            for (int i = 0; i < nSrcLen; i += 2)
2397
0
            {
2398
0
                adfReal[i] = pSrc[i];
2399
0
                adfReal[i + 1] = pSrc[i + 1];
2400
0
            }
2401
0
            break;
2402
0
        }
2403
2404
0
        case GDT_UInt16:
2405
0
        {
2406
0
            GUInt16 *pSrc =
2407
0
                reinterpret_cast<GUInt16 *>(poWK->papabySrcImage[iBand]);
2408
0
            pSrc += iSrcOffset;
2409
0
            for (int i = 0; i < nSrcLen; i += 2)
2410
0
            {
2411
0
                adfReal[i] = pSrc[i];
2412
0
                adfReal[i + 1] = pSrc[i + 1];
2413
0
            }
2414
0
            break;
2415
0
        }
2416
2417
0
        case GDT_Int32:
2418
0
        {
2419
0
            GInt32 *pSrc =
2420
0
                reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
2421
0
            pSrc += iSrcOffset;
2422
0
            for (int i = 0; i < nSrcLen; i += 2)
2423
0
            {
2424
0
                adfReal[i] = pSrc[i];
2425
0
                adfReal[i + 1] = pSrc[i + 1];
2426
0
            }
2427
0
            break;
2428
0
        }
2429
2430
0
        case GDT_UInt32:
2431
0
        {
2432
0
            GUInt32 *pSrc =
2433
0
                reinterpret_cast<GUInt32 *>(poWK->papabySrcImage[iBand]);
2434
0
            pSrc += iSrcOffset;
2435
0
            for (int i = 0; i < nSrcLen; i += 2)
2436
0
            {
2437
0
                adfReal[i] = pSrc[i];
2438
0
                adfReal[i + 1] = pSrc[i + 1];
2439
0
            }
2440
0
            break;
2441
0
        }
2442
2443
0
        case GDT_Int64:
2444
0
        {
2445
0
            auto pSrc =
2446
0
                reinterpret_cast<std::int64_t *>(poWK->papabySrcImage[iBand]);
2447
0
            pSrc += iSrcOffset;
2448
0
            for (int i = 0; i < nSrcLen; i += 2)
2449
0
            {
2450
0
                adfReal[i] = static_cast<double>(pSrc[i]);
2451
0
                adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
2452
0
            }
2453
0
            break;
2454
0
        }
2455
2456
0
        case GDT_UInt64:
2457
0
        {
2458
0
            auto pSrc =
2459
0
                reinterpret_cast<std::uint64_t *>(poWK->papabySrcImage[iBand]);
2460
0
            pSrc += iSrcOffset;
2461
0
            for (int i = 0; i < nSrcLen; i += 2)
2462
0
            {
2463
0
                adfReal[i] = static_cast<double>(pSrc[i]);
2464
0
                adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
2465
0
            }
2466
0
            break;
2467
0
        }
2468
2469
0
        case GDT_Float16:
2470
0
        {
2471
0
            GFloat16 *pSrc =
2472
0
                reinterpret_cast<GFloat16 *>(poWK->papabySrcImage[iBand]);
2473
0
            pSrc += iSrcOffset;
2474
0
            for (int i = 0; i < nSrcLen; i += 2)
2475
0
            {
2476
0
                adfReal[i] = pSrc[i];
2477
0
                adfReal[i + 1] = pSrc[i + 1];
2478
0
            }
2479
0
            break;
2480
0
        }
2481
2482
0
        case GDT_Float32:
2483
0
        {
2484
0
            float *pSrc =
2485
0
                reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
2486
0
            pSrc += iSrcOffset;
2487
0
            for (int i = 0; i < nSrcLen; i += 2)
2488
0
            {
2489
0
                adfReal[i] = double(pSrc[i]);
2490
0
                adfReal[i + 1] = double(pSrc[i + 1]);
2491
0
            }
2492
0
            break;
2493
0
        }
2494
2495
0
        case GDT_Float64:
2496
0
        {
2497
0
            double *pSrc =
2498
0
                reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
2499
0
            pSrc += iSrcOffset;
2500
0
            for (int i = 0; i < nSrcLen; i += 2)
2501
0
            {
2502
0
                adfReal[i] = pSrc[i];
2503
0
                adfReal[i + 1] = pSrc[i + 1];
2504
0
            }
2505
0
            break;
2506
0
        }
2507
2508
0
        case GDT_CInt16:
2509
0
        {
2510
0
            GInt16 *pSrc =
2511
0
                reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
2512
0
            pSrc += 2 * iSrcOffset;
2513
0
            for (int i = 0; i < nSrcLen; i += 2)
2514
0
            {
2515
0
                adfReal[i] = pSrc[2 * i];
2516
0
                padfImag[i] = pSrc[2 * i + 1];
2517
2518
0
                adfReal[i + 1] = pSrc[2 * i + 2];
2519
0
                padfImag[i + 1] = pSrc[2 * i + 3];
2520
0
            }
2521
0
            break;
2522
0
        }
2523
2524
0
        case GDT_CInt32:
2525
0
        {
2526
0
            GInt32 *pSrc =
2527
0
                reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
2528
0
            pSrc += 2 * iSrcOffset;
2529
0
            for (int i = 0; i < nSrcLen; i += 2)
2530
0
            {
2531
0
                adfReal[i] = pSrc[2 * i];
2532
0
                padfImag[i] = pSrc[2 * i + 1];
2533
2534
0
                adfReal[i + 1] = pSrc[2 * i + 2];
2535
0
                padfImag[i + 1] = pSrc[2 * i + 3];
2536
0
            }
2537
0
            break;
2538
0
        }
2539
2540
0
        case GDT_CFloat16:
2541
0
        {
2542
0
            GFloat16 *pSrc =
2543
0
                reinterpret_cast<GFloat16 *>(poWK->papabySrcImage[iBand]);
2544
0
            pSrc += 2 * iSrcOffset;
2545
0
            for (int i = 0; i < nSrcLen; i += 2)
2546
0
            {
2547
0
                adfReal[i] = pSrc[2 * i];
2548
0
                padfImag[i] = pSrc[2 * i + 1];
2549
2550
0
                adfReal[i + 1] = pSrc[2 * i + 2];
2551
0
                padfImag[i + 1] = pSrc[2 * i + 3];
2552
0
            }
2553
0
            break;
2554
0
        }
2555
2556
0
        case GDT_CFloat32:
2557
0
        {
2558
0
            float *pSrc =
2559
0
                reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
2560
0
            pSrc += 2 * iSrcOffset;
2561
0
            for (int i = 0; i < nSrcLen; i += 2)
2562
0
            {
2563
0
                adfReal[i] = double(pSrc[2 * i]);
2564
0
                padfImag[i] = double(pSrc[2 * i + 1]);
2565
2566
0
                adfReal[i + 1] = double(pSrc[2 * i + 2]);
2567
0
                padfImag[i + 1] = double(pSrc[2 * i + 3]);
2568
0
            }
2569
0
            break;
2570
0
        }
2571
2572
0
        case GDT_CFloat64:
2573
0
        {
2574
0
            double *pSrc =
2575
0
                reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
2576
0
            pSrc += 2 * iSrcOffset;
2577
0
            for (int i = 0; i < nSrcLen; i += 2)
2578
0
            {
2579
0
                adfReal[i] = pSrc[2 * i];
2580
0
                padfImag[i] = pSrc[2 * i + 1];
2581
2582
0
                adfReal[i + 1] = pSrc[2 * i + 2];
2583
0
                padfImag[i + 1] = pSrc[2 * i + 3];
2584
0
            }
2585
0
            break;
2586
0
        }
2587
2588
0
        case GDT_Unknown:
2589
0
        case GDT_TypeCount:
2590
0
            CPLAssert(false);
2591
0
            if (padfDensity)
2592
0
                memset(padfDensity, 0, nSrcLen * sizeof(double));
2593
0
            return false;
2594
0
    }
2595
2596
0
    if (padfDensity == nullptr)
2597
0
        return true;
2598
2599
0
    if (poWK->pafUnifiedSrcDensity == nullptr)
2600
0
    {
2601
0
        for (int i = 0; i < nSrcLen; i += 2)
2602
0
        {
2603
            // Take into account earlier calcs.
2604
0
            if (padfDensity[i] > SRC_DENSITY_THRESHOLD_DOUBLE)
2605
0
            {
2606
0
                padfDensity[i] = 1.0;
2607
0
                bHasValid = true;
2608
0
            }
2609
2610
0
            if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD_DOUBLE)
2611
0
            {
2612
0
                padfDensity[i + 1] = 1.0;
2613
0
                bHasValid = true;
2614
0
            }
2615
0
        }
2616
0
    }
2617
0
    else
2618
0
    {
2619
0
        for (int i = 0; i < nSrcLen; i += 2)
2620
0
        {
2621
0
            if (padfDensity[i] > SRC_DENSITY_THRESHOLD_DOUBLE)
2622
0
                padfDensity[i] =
2623
0
                    double(poWK->pafUnifiedSrcDensity[iSrcOffset + i]);
2624
0
            if (padfDensity[i] > SRC_DENSITY_THRESHOLD_DOUBLE)
2625
0
                bHasValid = true;
2626
2627
0
            if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD_DOUBLE)
2628
0
                padfDensity[i + 1] =
2629
0
                    double(poWK->pafUnifiedSrcDensity[iSrcOffset + i + 1]);
2630
0
            if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD_DOUBLE)
2631
0
                bHasValid = true;
2632
0
        }
2633
0
    }
2634
2635
0
    return bHasValid;
2636
0
}
2637
2638
/************************************************************************/
2639
/*                          GWKGetPixelT()                              */
2640
/************************************************************************/
2641
2642
template <class T>
2643
static bool GWKGetPixelT(const GDALWarpKernel *poWK, int iBand,
2644
                         GPtrDiff_t iSrcOffset, double *pdfDensity, T *pValue)
2645
2646
0
{
2647
0
    T *pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
2648
2649
0
    if ((poWK->panUnifiedSrcValid != nullptr &&
2650
0
         !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset)) ||
2651
0
        (poWK->papanBandSrcValid != nullptr &&
2652
0
         poWK->papanBandSrcValid[iBand] != nullptr &&
2653
0
         !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset)))
2654
0
    {
2655
0
        *pdfDensity = 0.0;
2656
0
        return false;
2657
0
    }
2658
2659
0
    *pValue = pSrc[iSrcOffset];
2660
2661
0
    if (poWK->pafUnifiedSrcDensity == nullptr)
2662
0
        *pdfDensity = 1.0;
2663
0
    else
2664
0
        *pdfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
2665
2666
0
    return *pdfDensity != 0.0;
2667
0
}
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKGetPixelT<unsigned char>(GDALWarpKernel const*, int, long long, double*, unsigned char*)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKGetPixelT<short>(GDALWarpKernel const*, int, long long, double*, short*)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKGetPixelT<unsigned short>(GDALWarpKernel const*, int, long long, double*, unsigned short*)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKGetPixelT<float>(GDALWarpKernel const*, int, long long, double*, float*)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKGetPixelT<int>(GDALWarpKernel const*, int, long long, double*, int*)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKGetPixelT<unsigned int>(GDALWarpKernel const*, int, long long, double*, unsigned int*)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKGetPixelT<long>(GDALWarpKernel const*, int, long long, double*, long*)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKGetPixelT<unsigned long>(GDALWarpKernel const*, int, long long, double*, unsigned long*)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKGetPixelT<cpl::Float16>(GDALWarpKernel const*, int, long long, double*, cpl::Float16*)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKGetPixelT<double>(GDALWarpKernel const*, int, long long, double*, double*)
2668
2669
/************************************************************************/
2670
/*                        GWKBilinearResample()                         */
2671
/*     Set of bilinear interpolators                                    */
2672
/************************************************************************/
2673
2674
static bool GWKBilinearResample4Sample(const GDALWarpKernel *poWK, int iBand,
2675
                                       double dfSrcX, double dfSrcY,
2676
                                       double *pdfDensity, double *pdfReal,
2677
                                       double *pdfImag)
2678
2679
0
{
2680
    // Save as local variables to avoid following pointers.
2681
0
    const int nSrcXSize = poWK->nSrcXSize;
2682
0
    const int nSrcYSize = poWK->nSrcYSize;
2683
2684
0
    int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
2685
0
    int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
2686
0
    double dfRatioX = 1.5 - (dfSrcX - iSrcX);
2687
0
    double dfRatioY = 1.5 - (dfSrcY - iSrcY);
2688
0
    bool bShifted = false;
2689
2690
0
    if (iSrcX == -1)
2691
0
    {
2692
0
        iSrcX = 0;
2693
0
        dfRatioX = 1;
2694
0
    }
2695
0
    if (iSrcY == -1)
2696
0
    {
2697
0
        iSrcY = 0;
2698
0
        dfRatioY = 1;
2699
0
    }
2700
0
    GPtrDiff_t iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
2701
2702
    // Shift so we don't overrun the array.
2703
0
    if (static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize == iSrcOffset + 1 ||
2704
0
        static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize ==
2705
0
            iSrcOffset + nSrcXSize + 1)
2706
0
    {
2707
0
        bShifted = true;
2708
0
        --iSrcOffset;
2709
0
    }
2710
2711
0
    double adfDensity[2] = {0.0, 0.0};
2712
0
    double adfReal[2] = {0.0, 0.0};
2713
0
    double adfImag[2] = {0.0, 0.0};
2714
0
    double dfAccumulatorReal = 0.0;
2715
0
    double dfAccumulatorImag = 0.0;
2716
0
    double dfAccumulatorDensity = 0.0;
2717
0
    double dfAccumulatorDivisor = 0.0;
2718
2719
0
    const GPtrDiff_t nSrcPixels =
2720
0
        static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize;
2721
    // Get pixel row.
2722
0
    if (iSrcY >= 0 && iSrcY < nSrcYSize && iSrcOffset >= 0 &&
2723
0
        iSrcOffset < nSrcPixels &&
2724
0
        GWKGetPixelRow(poWK, iBand, iSrcOffset, 1, adfDensity, adfReal,
2725
0
                       adfImag))
2726
0
    {
2727
0
        double dfMult1 = dfRatioX * dfRatioY;
2728
0
        double dfMult2 = (1.0 - dfRatioX) * dfRatioY;
2729
2730
        // Shifting corrected.
2731
0
        if (bShifted)
2732
0
        {
2733
0
            adfReal[0] = adfReal[1];
2734
0
            adfImag[0] = adfImag[1];
2735
0
            adfDensity[0] = adfDensity[1];
2736
0
        }
2737
2738
        // Upper Left Pixel.
2739
0
        if (iSrcX >= 0 && iSrcX < nSrcXSize &&
2740
0
            adfDensity[0] > SRC_DENSITY_THRESHOLD_DOUBLE)
2741
0
        {
2742
0
            dfAccumulatorDivisor += dfMult1;
2743
2744
0
            dfAccumulatorReal += adfReal[0] * dfMult1;
2745
0
            dfAccumulatorImag += adfImag[0] * dfMult1;
2746
0
            dfAccumulatorDensity += adfDensity[0] * dfMult1;
2747
0
        }
2748
2749
        // Upper Right Pixel.
2750
0
        if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
2751
0
            adfDensity[1] > SRC_DENSITY_THRESHOLD_DOUBLE)
2752
0
        {
2753
0
            dfAccumulatorDivisor += dfMult2;
2754
2755
0
            dfAccumulatorReal += adfReal[1] * dfMult2;
2756
0
            dfAccumulatorImag += adfImag[1] * dfMult2;
2757
0
            dfAccumulatorDensity += adfDensity[1] * dfMult2;
2758
0
        }
2759
0
    }
2760
2761
    // Get pixel row.
2762
0
    if (iSrcY + 1 >= 0 && iSrcY + 1 < nSrcYSize &&
2763
0
        iSrcOffset + nSrcXSize >= 0 && iSrcOffset + nSrcXSize < nSrcPixels &&
2764
0
        GWKGetPixelRow(poWK, iBand, iSrcOffset + nSrcXSize, 1, adfDensity,
2765
0
                       adfReal, adfImag))
2766
0
    {
2767
0
        double dfMult1 = dfRatioX * (1.0 - dfRatioY);
2768
0
        double dfMult2 = (1.0 - dfRatioX) * (1.0 - dfRatioY);
2769
2770
        // Shifting corrected
2771
0
        if (bShifted)
2772
0
        {
2773
0
            adfReal[0] = adfReal[1];
2774
0
            adfImag[0] = adfImag[1];
2775
0
            adfDensity[0] = adfDensity[1];
2776
0
        }
2777
2778
        // Lower Left Pixel
2779
0
        if (iSrcX >= 0 && iSrcX < nSrcXSize &&
2780
0
            adfDensity[0] > SRC_DENSITY_THRESHOLD_DOUBLE)
2781
0
        {
2782
0
            dfAccumulatorDivisor += dfMult1;
2783
2784
0
            dfAccumulatorReal += adfReal[0] * dfMult1;
2785
0
            dfAccumulatorImag += adfImag[0] * dfMult1;
2786
0
            dfAccumulatorDensity += adfDensity[0] * dfMult1;
2787
0
        }
2788
2789
        // Lower Right Pixel.
2790
0
        if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
2791
0
            adfDensity[1] > SRC_DENSITY_THRESHOLD_DOUBLE)
2792
0
        {
2793
0
            dfAccumulatorDivisor += dfMult2;
2794
2795
0
            dfAccumulatorReal += adfReal[1] * dfMult2;
2796
0
            dfAccumulatorImag += adfImag[1] * dfMult2;
2797
0
            dfAccumulatorDensity += adfDensity[1] * dfMult2;
2798
0
        }
2799
0
    }
2800
2801
    /* -------------------------------------------------------------------- */
2802
    /*      Return result.                                                  */
2803
    /* -------------------------------------------------------------------- */
2804
0
    if (dfAccumulatorDivisor == 1.0)
2805
0
    {
2806
0
        *pdfReal = dfAccumulatorReal;
2807
0
        *pdfImag = dfAccumulatorImag;
2808
0
        *pdfDensity = dfAccumulatorDensity;
2809
0
        return false;
2810
0
    }
2811
0
    else if (dfAccumulatorDivisor < 0.00001)
2812
0
    {
2813
0
        *pdfReal = 0.0;
2814
0
        *pdfImag = 0.0;
2815
0
        *pdfDensity = 0.0;
2816
0
        return false;
2817
0
    }
2818
0
    else
2819
0
    {
2820
0
        *pdfReal = dfAccumulatorReal / dfAccumulatorDivisor;
2821
0
        *pdfImag = dfAccumulatorImag / dfAccumulatorDivisor;
2822
0
        *pdfDensity = dfAccumulatorDensity / dfAccumulatorDivisor;
2823
0
        return true;
2824
0
    }
2825
0
}
2826
2827
template <class T>
2828
static bool GWKBilinearResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
2829
                                               int iBand, double dfSrcX,
2830
                                               double dfSrcY, T *pValue)
2831
2832
0
{
2833
2834
0
    const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
2835
0
    const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
2836
0
    GPtrDiff_t iSrcOffset =
2837
0
        iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
2838
0
    const double dfRatioX = 1.5 - (dfSrcX - iSrcX);
2839
0
    const double dfRatioY = 1.5 - (dfSrcY - iSrcY);
2840
2841
0
    const T *const pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
2842
2843
0
    if (iSrcX >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
2844
0
        iSrcY + 1 < poWK->nSrcYSize)
2845
0
    {
2846
0
        const double dfAccumulator =
2847
0
            (double(pSrc[iSrcOffset]) * dfRatioX +
2848
0
             double(pSrc[iSrcOffset + 1]) * (1.0 - dfRatioX)) *
2849
0
                dfRatioY +
2850
0
            (double(pSrc[iSrcOffset + poWK->nSrcXSize]) * dfRatioX +
2851
0
             double(pSrc[iSrcOffset + 1 + poWK->nSrcXSize]) *
2852
0
                 (1.0 - dfRatioX)) *
2853
0
                (1.0 - dfRatioY);
2854
2855
0
        *pValue = GWKRoundValueT<T>(dfAccumulator);
2856
2857
0
        return true;
2858
0
    }
2859
2860
0
    double dfAccumulatorDivisor = 0.0;
2861
0
    double dfAccumulator = 0.0;
2862
2863
    // Upper Left Pixel.
2864
0
    if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY >= 0 &&
2865
0
        iSrcY < poWK->nSrcYSize)
2866
0
    {
2867
0
        const double dfMult = dfRatioX * dfRatioY;
2868
2869
0
        dfAccumulatorDivisor += dfMult;
2870
2871
0
        dfAccumulator += double(pSrc[iSrcOffset]) * dfMult;
2872
0
    }
2873
2874
    // Upper Right Pixel.
2875
0
    if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
2876
0
        iSrcY < poWK->nSrcYSize)
2877
0
    {
2878
0
        const double dfMult = (1.0 - dfRatioX) * dfRatioY;
2879
2880
0
        dfAccumulatorDivisor += dfMult;
2881
2882
0
        dfAccumulator += double(pSrc[iSrcOffset + 1]) * dfMult;
2883
0
    }
2884
2885
    // Lower Right Pixel.
2886
0
    if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
2887
0
        iSrcY + 1 < poWK->nSrcYSize)
2888
0
    {
2889
0
        const double dfMult = (1.0 - dfRatioX) * (1.0 - dfRatioY);
2890
2891
0
        dfAccumulatorDivisor += dfMult;
2892
2893
0
        dfAccumulator +=
2894
0
            double(pSrc[iSrcOffset + 1 + poWK->nSrcXSize]) * dfMult;
2895
0
    }
2896
2897
    // Lower Left Pixel.
2898
0
    if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
2899
0
        iSrcY + 1 < poWK->nSrcYSize)
2900
0
    {
2901
0
        const double dfMult = dfRatioX * (1.0 - dfRatioY);
2902
2903
0
        dfAccumulatorDivisor += dfMult;
2904
2905
0
        dfAccumulator += double(pSrc[iSrcOffset + poWK->nSrcXSize]) * dfMult;
2906
0
    }
2907
2908
    /* -------------------------------------------------------------------- */
2909
    /*      Return result.                                                  */
2910
    /* -------------------------------------------------------------------- */
2911
0
    double dfValue = 0.0;
2912
2913
0
    if (dfAccumulatorDivisor < 0.00001)
2914
0
    {
2915
0
        *pValue = 0;
2916
0
        return false;
2917
0
    }
2918
0
    else if (dfAccumulatorDivisor == 1.0)
2919
0
    {
2920
0
        dfValue = dfAccumulator;
2921
0
    }
2922
0
    else
2923
0
    {
2924
0
        dfValue = dfAccumulator / dfAccumulatorDivisor;
2925
0
    }
2926
2927
0
    *pValue = GWKRoundValueT<T>(dfValue);
2928
2929
0
    return true;
2930
0
}
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKBilinearResampleNoMasks4SampleT<unsigned char>(GDALWarpKernel const*, int, double, double, unsigned char*)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKBilinearResampleNoMasks4SampleT<float>(GDALWarpKernel const*, int, double, double, float*)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKBilinearResampleNoMasks4SampleT<short>(GDALWarpKernel const*, int, double, double, short*)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKBilinearResampleNoMasks4SampleT<unsigned short>(GDALWarpKernel const*, int, double, double, unsigned short*)
2931
2932
/************************************************************************/
2933
/*                        GWKCubicResample()                            */
2934
/*     Set of bicubic interpolators using cubic convolution.            */
2935
/************************************************************************/
2936
2937
// http://verona.fi-p.unam.mx/boris/practicas/CubConvInterp.pdf Formula 18
2938
// or http://en.wikipedia.org/wiki/Cubic_Hermite_spline : CINTx(p_1,p0,p1,p2)
2939
// http://en.wikipedia.org/wiki/Bicubic_interpolation: matrix notation
2940
2941
template <typename T>
2942
static inline T CubicConvolution(T distance1, T distance2, T distance3, T f0,
2943
                                 T f1, T f2, T f3)
2944
0
{
2945
0
    return (f1 + T(0.5) * (distance1 * (f2 - f0) +
2946
0
                           distance2 * (2 * f0 - 5 * f1 + 4 * f2 - f3) +
2947
0
                           distance3 * (3 * (f1 - f2) + f3 - f0)));
2948
0
}
2949
2950
/************************************************************************/
2951
/*                       GWKCubicComputeWeights()                       */
2952
/************************************************************************/
2953
2954
// adfCoeffs[2] = 1.0 - (adfCoeffs[0] + adfCoeffs[1] - adfCoeffs[3]);
2955
2956
template <typename T>
2957
static inline void GWKCubicComputeWeights(T x, T coeffs[4])
2958
0
{
2959
0
    const T halfX = T(0.5) * x;
2960
0
    const T threeX = T(3.0) * x;
2961
0
    const T halfX2 = halfX * x;
2962
2963
0
    coeffs[0] = halfX * (-1 + x * (2 - x));
2964
0
    coeffs[1] = 1 + halfX2 * (-5 + threeX);
2965
0
    coeffs[2] = halfX * (1 + x * (4 - threeX));
2966
0
    coeffs[3] = halfX2 * (-1 + x);
2967
0
}
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKCubicComputeWeights<double>(double, double*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKCubicComputeWeights<float>(float, float*)
2968
2969
template <typename T> inline double CONVOL4(const double v1[4], const T v2[4])
2970
0
{
2971
0
    return v1[0] * double(v2[0]) + v1[1] * double(v2[1]) +
2972
0
           v1[2] * double(v2[2]) + v1[3] * double(v2[3]);
2973
0
}
Unexecuted instantiation: double CONVOL4<double>(double const*, double const*)
Unexecuted instantiation: double CONVOL4<float>(double const*, float const*)
Unexecuted instantiation: double CONVOL4<unsigned char>(double const*, unsigned char const*)
Unexecuted instantiation: double CONVOL4<unsigned short>(double const*, unsigned short const*)
Unexecuted instantiation: double CONVOL4<short>(double const*, short const*)
2974
2975
#if 0
2976
// Optimal (in theory...) for max 2 convolutions: 14 multiplications
2977
// instead of 17.
2978
// TODO(schwehr): Use an inline function.
2979
#define GWKCubicComputeWeights_Optim2MAX(dfX_, adfCoeffs, dfHalfX)             \
2980
    {                                                                          \
2981
        const double dfX = dfX_;                                               \
2982
        dfHalfX = 0.5 * dfX;                                                   \
2983
        const double dfThreeX = 3.0 * dfX;                                     \
2984
        const double dfXMinus1 = dfX - 1;                                      \
2985
                                                                               \
2986
        adfCoeffs[0] = -1 + dfX * (2 - dfX);                                   \
2987
        adfCoeffs[1] = dfX * (-5 + dfThreeX);                                  \
2988
        /*adfCoeffs[2] = 1 + dfX * (4 - dfThreeX);*/                           \
2989
        adfCoeffs[2] = -dfXMinus1 - adfCoeffs[1];                              \
2990
        /*adfCoeffs[3] = dfX * (-1 + dfX); */                                  \
2991
        adfCoeffs[3] = dfXMinus1 - adfCoeffs[0];                               \
2992
    }
2993
2994
// TODO(schwehr): Use an inline function.
2995
#define CONVOL4_Optim2MAX(adfCoeffs, v, dfHalfX)                               \
2996
    ((v)[1] + (dfHalfX) * ((adfCoeffs)[0] * (v)[0] + (adfCoeffs)[1] * (v)[1] + \
2997
                           (adfCoeffs)[2] * (v)[2] + (adfCoeffs)[3] * (v)[3]))
2998
#endif
2999
3000
static bool GWKCubicResample4Sample(const GDALWarpKernel *poWK, int iBand,
3001
                                    double dfSrcX, double dfSrcY,
3002
                                    double *pdfDensity, double *pdfReal,
3003
                                    double *pdfImag)
3004
3005
0
{
3006
0
    const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3007
0
    const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3008
0
    GPtrDiff_t iSrcOffset =
3009
0
        iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3010
0
    const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3011
0
    const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3012
0
    double adfDensity[4] = {};
3013
0
    double adfReal[4] = {};
3014
0
    double adfImag[4] = {};
3015
3016
    // Get the bilinear interpolation at the image borders.
3017
0
    if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3018
0
        iSrcY + 2 >= poWK->nSrcYSize)
3019
0
        return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3020
0
                                          pdfDensity, pdfReal, pdfImag);
3021
3022
0
    double adfValueDens[4] = {};
3023
0
    double adfValueReal[4] = {};
3024
0
    double adfValueImag[4] = {};
3025
3026
0
    double adfCoeffsX[4] = {};
3027
0
    GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
3028
3029
0
    for (GPtrDiff_t i = -1; i < 3; i++)
3030
0
    {
3031
0
        if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
3032
0
                            2, adfDensity, adfReal, adfImag) ||
3033
0
            adfDensity[0] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3034
0
            adfDensity[1] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3035
0
            adfDensity[2] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3036
0
            adfDensity[3] < SRC_DENSITY_THRESHOLD_DOUBLE)
3037
0
        {
3038
0
            return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3039
0
                                              pdfDensity, pdfReal, pdfImag);
3040
0
        }
3041
3042
0
        adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
3043
0
        adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
3044
0
        adfValueImag[i + 1] = CONVOL4(adfCoeffsX, adfImag);
3045
0
    }
3046
3047
    /* -------------------------------------------------------------------- */
3048
    /*      For now, if we have any pixels missing in the kernel area,      */
3049
    /*      we fallback on using bilinear interpolation.  Ideally we        */
3050
    /*      should do "weight adjustment" of our results similarly to       */
3051
    /*      what is done for the cubic spline and lanc. interpolators.      */
3052
    /* -------------------------------------------------------------------- */
3053
3054
0
    double adfCoeffsY[4] = {};
3055
0
    GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
3056
3057
0
    *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
3058
0
    *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
3059
0
    *pdfImag = CONVOL4(adfCoeffsY, adfValueImag);
3060
3061
0
    return true;
3062
0
}
3063
3064
#ifdef USE_SSE2
3065
3066
/************************************************************************/
3067
/*                           XMMLoad4Values()                           */
3068
/*                                                                      */
3069
/*  Load 4 packed byte or uint16, cast them to float and put them in a  */
3070
/*  m128 register.                                                      */
3071
/************************************************************************/
3072
3073
static CPL_INLINE __m128 XMMLoad4Values(const GByte *ptr)
3074
0
{
3075
0
    unsigned int i;
3076
0
    memcpy(&i, ptr, 4);
3077
0
    __m128i xmm_i = _mm_cvtsi32_si128(i);
3078
    // Zero extend 4 packed unsigned 8-bit integers in a to packed
3079
    // 32-bit integers.
3080
#if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
3081
    xmm_i = _mm_cvtepu8_epi32(xmm_i);
3082
#else
3083
0
    xmm_i = _mm_unpacklo_epi8(xmm_i, _mm_setzero_si128());
3084
0
    xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
3085
0
#endif
3086
0
    return _mm_cvtepi32_ps(xmm_i);
3087
0
}
3088
3089
static CPL_INLINE __m128 XMMLoad4Values(const GUInt16 *ptr)
3090
0
{
3091
0
    GUInt64 i;
3092
0
    memcpy(&i, ptr, 8);
3093
0
    __m128i xmm_i = _mm_cvtsi64_si128(i);
3094
    // Zero extend 4 packed unsigned 16-bit integers in a to packed
3095
    // 32-bit integers.
3096
#if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
3097
    xmm_i = _mm_cvtepu16_epi32(xmm_i);
3098
#else
3099
0
    xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
3100
0
#endif
3101
0
    return _mm_cvtepi32_ps(xmm_i);
3102
0
}
3103
3104
/************************************************************************/
3105
/*                           XMMHorizontalAdd()                         */
3106
/*                                                                      */
3107
/*  Return the sum of the 4 floating points of the register.            */
3108
/************************************************************************/
3109
3110
#if defined(__SSE3__) || defined(USE_NEON_OPTIMIZATIONS)
3111
static CPL_INLINE float XMMHorizontalAdd(__m128 v)
3112
{
3113
    __m128 shuf = _mm_movehdup_ps(v);   // (v3   , v3   , v1   , v1)
3114
    __m128 sums = _mm_add_ps(v, shuf);  // (v3+v3, v3+v2, v1+v1, v1+v0)
3115
    shuf = _mm_movehl_ps(shuf, sums);   // (v3   , v3   , v3+v3, v3+v2)
3116
    sums = _mm_add_ss(sums, shuf);      // (v1+v0)+(v3+v2)
3117
    return _mm_cvtss_f32(sums);
3118
}
3119
#else
3120
static CPL_INLINE float XMMHorizontalAdd(__m128 v)
3121
0
{
3122
0
    __m128 shuf = _mm_movehl_ps(v, v);     // (v3   , v2   , v3   , v2)
3123
0
    __m128 sums = _mm_add_ps(v, shuf);     // (v3+v3, v2+v2, v3+v1, v2+v0)
3124
0
    shuf = _mm_shuffle_ps(sums, sums, 1);  // (v2+v0, v2+v0, v2+v0, v3+v1)
3125
0
    sums = _mm_add_ss(sums, shuf);         // (v2+v0)+(v3+v1)
3126
0
    return _mm_cvtss_f32(sums);
3127
0
}
3128
#endif
3129
3130
#endif  // define USE_SSE2
3131
3132
/************************************************************************/
3133
/*            GWKCubicResampleSrcMaskIsDensity4SampleRealT()            */
3134
/************************************************************************/
3135
3136
// Note: if USE_SSE_CUBIC_IMPL, only instantiate that for Byte and UInt16,
3137
// because there are a few assumptions above those types.
3138
// We do not define USE_SSE_CUBIC_IMPL since in practice, it gives zero
3139
// perf benefit.
3140
3141
template <class T>
3142
static CPL_INLINE bool GWKCubicResampleSrcMaskIsDensity4SampleRealT(
3143
    const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
3144
    double *pdfDensity, double *pdfReal)
3145
0
{
3146
0
    const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3147
0
    const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3148
0
    const GPtrDiff_t iSrcOffset =
3149
0
        iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3150
3151
    // Get the bilinear interpolation at the image borders.
3152
0
    if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3153
0
        iSrcY + 2 >= poWK->nSrcYSize)
3154
0
    {
3155
0
        double adfImagIgnored[4] = {};
3156
0
        return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3157
0
                                          pdfDensity, pdfReal, adfImagIgnored);
3158
0
    }
3159
3160
#if defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2)
3161
    const float fDeltaX = static_cast<float>(dfSrcX) - 0.5f - iSrcX;
3162
    const float fDeltaY = static_cast<float>(dfSrcY) - 0.5f - iSrcY;
3163
3164
    // TODO(schwehr): Explain the magic numbers.
3165
    float afTemp[4 + 4 + 4 + 1];
3166
    float *pafAligned =
3167
        reinterpret_cast<float *>(afTemp + ((size_t)afTemp & 0xf));
3168
    float *pafCoeffs = pafAligned;
3169
    float *pafDensity = pafAligned + 4;
3170
    float *pafValue = pafAligned + 8;
3171
3172
    const float fHalfDeltaX = 0.5f * fDeltaX;
3173
    const float fThreeDeltaX = 3.0f * fDeltaX;
3174
    const float fHalfDeltaX2 = fHalfDeltaX * fDeltaX;
3175
3176
    pafCoeffs[0] = fHalfDeltaX * (-1 + fDeltaX * (2 - fDeltaX));
3177
    pafCoeffs[1] = 1 + fHalfDeltaX2 * (-5 + fThreeDeltaX);
3178
    pafCoeffs[2] = fHalfDeltaX * (1 + fDeltaX * (4 - fThreeDeltaX));
3179
    pafCoeffs[3] = fHalfDeltaX2 * (-1 + fDeltaX);
3180
    __m128 xmmCoeffs = _mm_load_ps(pafCoeffs);
3181
    const __m128 xmmThreshold = _mm_load1_ps(&SRC_DENSITY_THRESHOLD_FLOAT);
3182
3183
    __m128 xmmMaskLowDensity = _mm_setzero_ps();
3184
    for (GPtrDiff_t i = -1, iOffset = iSrcOffset - poWK->nSrcXSize - 1; i < 3;
3185
         i++, iOffset += poWK->nSrcXSize)
3186
    {
3187
        const __m128 xmmDensity =
3188
            _mm_loadu_ps(poWK->pafUnifiedSrcDensity + iOffset);
3189
        xmmMaskLowDensity = _mm_or_ps(xmmMaskLowDensity,
3190
                                      _mm_cmplt_ps(xmmDensity, xmmThreshold));
3191
        pafDensity[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
3192
3193
        const __m128 xmmValues =
3194
            XMMLoad4Values(((T *)poWK->papabySrcImage[iBand]) + iOffset);
3195
        pafValue[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValues));
3196
    }
3197
    if (_mm_movemask_ps(xmmMaskLowDensity))
3198
    {
3199
        double adfImagIgnored[4] = {};
3200
        return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3201
                                          pdfDensity, pdfReal, adfImagIgnored);
3202
    }
3203
3204
    const float fHalfDeltaY = 0.5f * fDeltaY;
3205
    const float fThreeDeltaY = 3.0f * fDeltaY;
3206
    const float fHalfDeltaY2 = fHalfDeltaY * fDeltaY;
3207
3208
    pafCoeffs[0] = fHalfDeltaY * (-1 + fDeltaY * (2 - fDeltaY));
3209
    pafCoeffs[1] = 1 + fHalfDeltaY2 * (-5 + fThreeDeltaY);
3210
    pafCoeffs[2] = fHalfDeltaY * (1 + fDeltaY * (4 - fThreeDeltaY));
3211
    pafCoeffs[3] = fHalfDeltaY2 * (-1 + fDeltaY);
3212
3213
    xmmCoeffs = _mm_load_ps(pafCoeffs);
3214
3215
    const __m128 xmmDensity = _mm_load_ps(pafDensity);
3216
    const __m128 xmmValue = _mm_load_ps(pafValue);
3217
    *pdfDensity = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
3218
    *pdfReal = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValue));
3219
3220
    // We did all above computations on float32 whereas the general case is
3221
    // float64. Not sure if one is fundamentally more correct than the other
3222
    // one, but we want our optimization to give the same result as the
3223
    // general case as much as possible, so if the resulting value is
3224
    // close to some_int_value + 0.5, redo the computation with the general
3225
    // case.
3226
    // Note: If other types than Byte or UInt16, will need changes.
3227
    if (fabs(*pdfReal - static_cast<int>(*pdfReal) - 0.5) > .007)
3228
        return true;
3229
3230
#endif  // defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2)
3231
3232
0
    const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3233
0
    const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3234
3235
0
    double adfValueDens[4] = {};
3236
0
    double adfValueReal[4] = {};
3237
3238
0
    double adfCoeffsX[4] = {};
3239
0
    GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
3240
3241
0
    double adfCoeffsY[4] = {};
3242
0
    GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
3243
3244
0
    for (GPtrDiff_t i = -1; i < 3; i++)
3245
0
    {
3246
0
        const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
3247
0
#if !(defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2))
3248
0
        if (poWK->pafUnifiedSrcDensity[iOffset + 0] <
3249
0
                SRC_DENSITY_THRESHOLD_FLOAT ||
3250
0
            poWK->pafUnifiedSrcDensity[iOffset + 1] <
3251
0
                SRC_DENSITY_THRESHOLD_FLOAT ||
3252
0
            poWK->pafUnifiedSrcDensity[iOffset + 2] <
3253
0
                SRC_DENSITY_THRESHOLD_FLOAT ||
3254
0
            poWK->pafUnifiedSrcDensity[iOffset + 3] <
3255
0
                SRC_DENSITY_THRESHOLD_FLOAT)
3256
0
        {
3257
0
            double adfImagIgnored[4] = {};
3258
0
            return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3259
0
                                              pdfDensity, pdfReal,
3260
0
                                              adfImagIgnored);
3261
0
        }
3262
0
#endif
3263
3264
0
        adfValueDens[i + 1] =
3265
0
            CONVOL4(adfCoeffsX, poWK->pafUnifiedSrcDensity + iOffset);
3266
3267
0
        adfValueReal[i + 1] = CONVOL4(
3268
0
            adfCoeffsX,
3269
0
            reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
3270
0
    }
3271
3272
0
    *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
3273
0
    *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
3274
3275
0
    return true;
3276
0
}
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKCubicResampleSrcMaskIsDensity4SampleRealT<unsigned char>(GDALWarpKernel const*, int, double, double, double*, double*)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKCubicResampleSrcMaskIsDensity4SampleRealT<unsigned short>(GDALWarpKernel const*, int, double, double, double*, double*)
3277
3278
/************************************************************************/
3279
/*              GWKCubicResampleSrcMaskIsDensity4SampleReal()             */
3280
/*     Bi-cubic when source has and only has pafUnifiedSrcDensity.      */
3281
/************************************************************************/
3282
3283
static bool GWKCubicResampleSrcMaskIsDensity4SampleReal(
3284
    const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
3285
    double *pdfDensity, double *pdfReal)
3286
3287
0
{
3288
0
    const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3289
0
    const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3290
0
    const GPtrDiff_t iSrcOffset =
3291
0
        iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3292
0
    const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3293
0
    const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3294
3295
    // Get the bilinear interpolation at the image borders.
3296
0
    if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3297
0
        iSrcY + 2 >= poWK->nSrcYSize)
3298
0
    {
3299
0
        double adfImagIgnored[4] = {};
3300
0
        return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3301
0
                                          pdfDensity, pdfReal, adfImagIgnored);
3302
0
    }
3303
3304
0
    double adfCoeffsX[4] = {};
3305
0
    GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
3306
3307
0
    double adfCoeffsY[4] = {};
3308
0
    GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
3309
3310
0
    double adfValueDens[4] = {};
3311
0
    double adfValueReal[4] = {};
3312
0
    double adfDensity[4] = {};
3313
0
    double adfReal[4] = {};
3314
0
    double adfImagIgnored[4] = {};
3315
3316
0
    for (GPtrDiff_t i = -1; i < 3; i++)
3317
0
    {
3318
0
        if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
3319
0
                            2, adfDensity, adfReal, adfImagIgnored) ||
3320
0
            adfDensity[0] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3321
0
            adfDensity[1] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3322
0
            adfDensity[2] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3323
0
            adfDensity[3] < SRC_DENSITY_THRESHOLD_DOUBLE)
3324
0
        {
3325
0
            return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3326
0
                                              pdfDensity, pdfReal,
3327
0
                                              adfImagIgnored);
3328
0
        }
3329
3330
0
        adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
3331
0
        adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
3332
0
    }
3333
3334
0
    *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
3335
0
    *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
3336
3337
0
    return true;
3338
0
}
3339
3340
template <class T>
3341
static bool GWKCubicResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
3342
                                            int iBand, double dfSrcX,
3343
                                            double dfSrcY, T *pValue)
3344
3345
0
{
3346
0
    const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3347
0
    const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3348
0
    const GPtrDiff_t iSrcOffset =
3349
0
        iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3350
0
    const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3351
0
    const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3352
0
    const double dfDeltaY2 = dfDeltaY * dfDeltaY;
3353
0
    const double dfDeltaY3 = dfDeltaY2 * dfDeltaY;
3354
3355
    // Get the bilinear interpolation at the image borders.
3356
0
    if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3357
0
        iSrcY + 2 >= poWK->nSrcYSize)
3358
0
        return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
3359
0
                                                  pValue);
3360
3361
0
    double adfCoeffs[4] = {};
3362
0
    GWKCubicComputeWeights(dfDeltaX, adfCoeffs);
3363
3364
0
    double adfValue[4] = {};
3365
3366
0
    for (GPtrDiff_t i = -1; i < 3; i++)
3367
0
    {
3368
0
        const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
3369
3370
0
        adfValue[i + 1] = CONVOL4(
3371
0
            adfCoeffs,
3372
0
            reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
3373
0
    }
3374
3375
0
    const double dfValue =
3376
0
        CubicConvolution(dfDeltaY, dfDeltaY2, dfDeltaY3, adfValue[0],
3377
0
                         adfValue[1], adfValue[2], adfValue[3]);
3378
3379
0
    *pValue = GWKClampValueT<T>(dfValue);
3380
3381
0
    return true;
3382
0
}
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKCubicResampleNoMasks4SampleT<unsigned char>(GDALWarpKernel const*, int, double, double, unsigned char*)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKCubicResampleNoMasks4SampleT<float>(GDALWarpKernel const*, int, double, double, float*)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKCubicResampleNoMasks4SampleT<short>(GDALWarpKernel const*, int, double, double, short*)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKCubicResampleNoMasks4SampleT<unsigned short>(GDALWarpKernel const*, int, double, double, unsigned short*)
3383
3384
/************************************************************************/
3385
/*                          GWKLanczosSinc()                            */
3386
/************************************************************************/
3387
3388
/*
3389
 * Lanczos windowed sinc interpolation kernel with radius r.
3390
 *        /
3391
 *        | sinc(x) * sinc(x/r), if |x| < r
3392
 * L(x) = | 1, if x = 0                     ,
3393
 *        | 0, otherwise
3394
 *        \
3395
 *
3396
 * where sinc(x) = sin(PI * x) / (PI * x).
3397
 */
3398
3399
static double GWKLanczosSinc(double dfX)
3400
0
{
3401
0
    if (dfX == 0.0)
3402
0
        return 1.0;
3403
3404
0
    const double dfPIX = M_PI * dfX;
3405
0
    const double dfPIXoverR = dfPIX / 3;
3406
0
    const double dfPIX2overR = dfPIX * dfPIXoverR;
3407
    // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
3408
    // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
3409
0
    const double dfSinPIXoverR = sin(dfPIXoverR);
3410
0
    const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
3411
0
    const double dfSinPIXMulSinPIXoverR =
3412
0
        (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
3413
0
    return dfSinPIXMulSinPIXoverR / dfPIX2overR;
3414
0
}
3415
3416
static double GWKLanczosSinc4Values(double *padfValues)
3417
0
{
3418
0
    for (int i = 0; i < 4; i++)
3419
0
    {
3420
0
        if (padfValues[i] == 0.0)
3421
0
        {
3422
0
            padfValues[i] = 1.0;
3423
0
        }
3424
0
        else
3425
0
        {
3426
0
            const double dfPIX = M_PI * padfValues[i];
3427
0
            const double dfPIXoverR = dfPIX / 3;
3428
0
            const double dfPIX2overR = dfPIX * dfPIXoverR;
3429
            // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
3430
            // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
3431
0
            const double dfSinPIXoverR = sin(dfPIXoverR);
3432
0
            const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
3433
0
            const double dfSinPIXMulSinPIXoverR =
3434
0
                (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
3435
0
            padfValues[i] = dfSinPIXMulSinPIXoverR / dfPIX2overR;
3436
0
        }
3437
0
    }
3438
0
    return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
3439
0
}
3440
3441
/************************************************************************/
3442
/*                           GWKBilinear()                              */
3443
/************************************************************************/
3444
3445
static double GWKBilinear(double dfX)
3446
0
{
3447
0
    double dfAbsX = fabs(dfX);
3448
0
    if (dfAbsX <= 1.0)
3449
0
        return 1 - dfAbsX;
3450
0
    else
3451
0
        return 0.0;
3452
0
}
3453
3454
static double GWKBilinear4Values(double *padfValues)
3455
0
{
3456
0
    double dfAbsX0 = fabs(padfValues[0]);
3457
0
    double dfAbsX1 = fabs(padfValues[1]);
3458
0
    double dfAbsX2 = fabs(padfValues[2]);
3459
0
    double dfAbsX3 = fabs(padfValues[3]);
3460
0
    if (dfAbsX0 <= 1.0)
3461
0
        padfValues[0] = 1 - dfAbsX0;
3462
0
    else
3463
0
        padfValues[0] = 0.0;
3464
0
    if (dfAbsX1 <= 1.0)
3465
0
        padfValues[1] = 1 - dfAbsX1;
3466
0
    else
3467
0
        padfValues[1] = 0.0;
3468
0
    if (dfAbsX2 <= 1.0)
3469
0
        padfValues[2] = 1 - dfAbsX2;
3470
0
    else
3471
0
        padfValues[2] = 0.0;
3472
0
    if (dfAbsX3 <= 1.0)
3473
0
        padfValues[3] = 1 - dfAbsX3;
3474
0
    else
3475
0
        padfValues[3] = 0.0;
3476
0
    return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
3477
0
}
3478
3479
/************************************************************************/
3480
/*                            GWKCubic()                                */
3481
/************************************************************************/
3482
3483
static double GWKCubic(double dfX)
3484
0
{
3485
0
    return CubicKernel(dfX);
3486
0
}
3487
3488
static double GWKCubic4Values(double *padfValues)
3489
0
{
3490
0
    const double dfAbsX_0 = fabs(padfValues[0]);
3491
0
    const double dfAbsX_1 = fabs(padfValues[1]);
3492
0
    const double dfAbsX_2 = fabs(padfValues[2]);
3493
0
    const double dfAbsX_3 = fabs(padfValues[3]);
3494
0
    const double dfX2_0 = padfValues[0] * padfValues[0];
3495
0
    const double dfX2_1 = padfValues[1] * padfValues[1];
3496
0
    const double dfX2_2 = padfValues[2] * padfValues[2];
3497
0
    const double dfX2_3 = padfValues[3] * padfValues[3];
3498
3499
0
    double dfVal0 = 0.0;
3500
0
    if (dfAbsX_0 <= 1.0)
3501
0
        dfVal0 = dfX2_0 * (1.5 * dfAbsX_0 - 2.5) + 1.0;
3502
0
    else if (dfAbsX_0 <= 2.0)
3503
0
        dfVal0 = dfX2_0 * (-0.5 * dfAbsX_0 + 2.5) - 4.0 * dfAbsX_0 + 2.0;
3504
3505
0
    double dfVal1 = 0.0;
3506
0
    if (dfAbsX_1 <= 1.0)
3507
0
        dfVal1 = dfX2_1 * (1.5 * dfAbsX_1 - 2.5) + 1.0;
3508
0
    else if (dfAbsX_1 <= 2.0)
3509
0
        dfVal1 = dfX2_1 * (-0.5 * dfAbsX_1 + 2.5) - 4.0 * dfAbsX_1 + 2.0;
3510
3511
0
    double dfVal2 = 0.0;
3512
0
    if (dfAbsX_2 <= 1.0)
3513
0
        dfVal2 = dfX2_2 * (1.5 * dfAbsX_2 - 2.5) + 1.0;
3514
0
    else if (dfAbsX_2 <= 2.0)
3515
0
        dfVal2 = dfX2_2 * (-0.5 * dfAbsX_2 + 2.5) - 4.0 * dfAbsX_2 + 2.0;
3516
3517
0
    double dfVal3 = 0.0;
3518
0
    if (dfAbsX_3 <= 1.0)
3519
0
        dfVal3 = dfX2_3 * (1.5 * dfAbsX_3 - 2.5) + 1.0;
3520
0
    else if (dfAbsX_3 <= 2.0)
3521
0
        dfVal3 = dfX2_3 * (-0.5 * dfAbsX_3 + 2.5) - 4.0 * dfAbsX_3 + 2.0;
3522
3523
0
    padfValues[0] = dfVal0;
3524
0
    padfValues[1] = dfVal1;
3525
0
    padfValues[2] = dfVal2;
3526
0
    padfValues[3] = dfVal3;
3527
0
    return dfVal0 + dfVal1 + dfVal2 + dfVal3;
3528
0
}
3529
3530
/************************************************************************/
3531
/*                           GWKBSpline()                               */
3532
/************************************************************************/
3533
3534
// https://www.cs.utexas.edu/~fussell/courses/cs384g-fall2013/lectures/mitchell/Mitchell.pdf
3535
// Equation 8 with (B,C)=(1,0)
3536
// 1/6 * ( 3 * |x|^3 -  6 * |x|^2 + 4) |x| < 1
3537
// 1/6 * ( -|x|^3 + 6 |x|^2  - 12|x| + 8) |x| >= 1 and |x| < 2
3538
3539
static double GWKBSpline(double x)
3540
0
{
3541
0
    const double xp2 = x + 2.0;
3542
0
    const double xp1 = x + 1.0;
3543
0
    const double xm1 = x - 1.0;
3544
3545
    // This will most likely be used, so we'll compute it ahead of time to
3546
    // avoid stalling the processor.
3547
0
    const double xp2c = xp2 * xp2 * xp2;
3548
3549
    // Note that the test is computed only if it is needed.
3550
    // TODO(schwehr): Make this easier to follow.
3551
0
    return xp2 > 0.0
3552
0
               ? ((xp1 > 0.0)
3553
0
                      ? ((x > 0.0)
3554
0
                             ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
3555
0
                                   6.0 * x * x * x
3556
0
                             : 0.0) +
3557
0
                            -4.0 * xp1 * xp1 * xp1
3558
0
                      : 0.0) +
3559
0
                     xp2c
3560
0
               : 0.0;  // * 0.166666666666666666666
3561
0
}
3562
3563
static double GWKBSpline4Values(double *padfValues)
3564
0
{
3565
0
    for (int i = 0; i < 4; i++)
3566
0
    {
3567
0
        const double x = padfValues[i];
3568
0
        const double xp2 = x + 2.0;
3569
0
        const double xp1 = x + 1.0;
3570
0
        const double xm1 = x - 1.0;
3571
3572
        // This will most likely be used, so we'll compute it ahead of time to
3573
        // avoid stalling the processor.
3574
0
        const double xp2c = xp2 * xp2 * xp2;
3575
3576
        // Note that the test is computed only if it is needed.
3577
        // TODO(schwehr): Make this easier to follow.
3578
0
        padfValues[i] =
3579
0
            (xp2 > 0.0)
3580
0
                ? ((xp1 > 0.0)
3581
0
                       ? ((x > 0.0)
3582
0
                              ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
3583
0
                                    6.0 * x * x * x
3584
0
                              : 0.0) +
3585
0
                             -4.0 * xp1 * xp1 * xp1
3586
0
                       : 0.0) +
3587
0
                      xp2c
3588
0
                : 0.0;  // * 0.166666666666666666666
3589
0
    }
3590
0
    return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
3591
0
}
3592
/************************************************************************/
3593
/*                       GWKResampleWrkStruct                           */
3594
/************************************************************************/
3595
3596
typedef struct _GWKResampleWrkStruct GWKResampleWrkStruct;
3597
3598
typedef bool (*pfnGWKResampleType)(const GDALWarpKernel *poWK, int iBand,
3599
                                   double dfSrcX, double dfSrcY,
3600
                                   double *pdfDensity, double *pdfReal,
3601
                                   double *pdfImag,
3602
                                   GWKResampleWrkStruct *psWrkStruct);
3603
3604
struct _GWKResampleWrkStruct
3605
{
3606
    pfnGWKResampleType pfnGWKResample;
3607
3608
    // Space for saved X weights.
3609
    double *padfWeightsX;
3610
    bool *pabCalcX;
3611
3612
    double *padfWeightsY;       // Only used by GWKResampleOptimizedLanczos.
3613
    int iLastSrcX;              // Only used by GWKResampleOptimizedLanczos.
3614
    int iLastSrcY;              // Only used by GWKResampleOptimizedLanczos.
3615
    double dfLastDeltaX;        // Only used by GWKResampleOptimizedLanczos.
3616
    double dfLastDeltaY;        // Only used by GWKResampleOptimizedLanczos.
3617
    double dfCosPiXScale;       // Only used by GWKResampleOptimizedLanczos.
3618
    double dfSinPiXScale;       // Only used by GWKResampleOptimizedLanczos.
3619
    double dfCosPiXScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
3620
    double dfSinPiXScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
3621
    double dfCosPiYScale;       // Only used by GWKResampleOptimizedLanczos.
3622
    double dfSinPiYScale;       // Only used by GWKResampleOptimizedLanczos.
3623
    double dfCosPiYScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
3624
    double dfSinPiYScaleOver3;  // Only used by GWKResampleOptimizedLanczos.
3625
3626
    // Space for saving a row of pixels.
3627
    double *padfRowDensity;
3628
    double *padfRowReal;
3629
    double *padfRowImag;
3630
};
3631
3632
/************************************************************************/
3633
/*                    GWKResampleCreateWrkStruct()                      */
3634
/************************************************************************/
3635
3636
static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
3637
                        double dfSrcY, double *pdfDensity, double *pdfReal,
3638
                        double *pdfImag, GWKResampleWrkStruct *psWrkStruct);
3639
3640
static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
3641
                                        double dfSrcX, double dfSrcY,
3642
                                        double *pdfDensity, double *pdfReal,
3643
                                        double *pdfImag,
3644
                                        GWKResampleWrkStruct *psWrkStruct);
3645
3646
static GWKResampleWrkStruct *GWKResampleCreateWrkStruct(GDALWarpKernel *poWK)
3647
0
{
3648
0
    const int nXDist = (poWK->nXRadius + 1) * 2;
3649
0
    const int nYDist = (poWK->nYRadius + 1) * 2;
3650
3651
0
    GWKResampleWrkStruct *psWrkStruct = static_cast<GWKResampleWrkStruct *>(
3652
0
        CPLCalloc(1, sizeof(GWKResampleWrkStruct)));
3653
3654
    // Alloc space for saved X weights.
3655
0
    psWrkStruct->padfWeightsX =
3656
0
        static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3657
0
    psWrkStruct->pabCalcX =
3658
0
        static_cast<bool *>(CPLMalloc(nXDist * sizeof(bool)));
3659
3660
0
    psWrkStruct->padfWeightsY =
3661
0
        static_cast<double *>(CPLCalloc(nYDist, sizeof(double)));
3662
0
    psWrkStruct->iLastSrcX = -10;
3663
0
    psWrkStruct->iLastSrcY = -10;
3664
0
    psWrkStruct->dfLastDeltaX = -10;
3665
0
    psWrkStruct->dfLastDeltaY = -10;
3666
3667
    // Alloc space for saving a row of pixels.
3668
0
    if (poWK->pafUnifiedSrcDensity == nullptr &&
3669
0
        poWK->panUnifiedSrcValid == nullptr &&
3670
0
        poWK->papanBandSrcValid == nullptr)
3671
0
    {
3672
0
        psWrkStruct->padfRowDensity = nullptr;
3673
0
    }
3674
0
    else
3675
0
    {
3676
0
        psWrkStruct->padfRowDensity =
3677
0
            static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3678
0
    }
3679
0
    psWrkStruct->padfRowReal =
3680
0
        static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3681
0
    psWrkStruct->padfRowImag =
3682
0
        static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3683
3684
0
    if (poWK->eResample == GRA_Lanczos)
3685
0
    {
3686
0
        psWrkStruct->pfnGWKResample = GWKResampleOptimizedLanczos;
3687
3688
0
        if (poWK->dfXScale < 1)
3689
0
        {
3690
0
            psWrkStruct->dfCosPiXScaleOver3 = cos(M_PI / 3 * poWK->dfXScale);
3691
0
            psWrkStruct->dfSinPiXScaleOver3 =
3692
0
                sqrt(1 - psWrkStruct->dfCosPiXScaleOver3 *
3693
0
                             psWrkStruct->dfCosPiXScaleOver3);
3694
            // "Naive":
3695
            // const double dfCosPiXScale = cos(  M_PI * dfXScale );
3696
            // const double dfSinPiXScale = sin(  M_PI * dfXScale );
3697
            // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
3698
0
            psWrkStruct->dfCosPiXScale = (4 * psWrkStruct->dfCosPiXScaleOver3 *
3699
0
                                              psWrkStruct->dfCosPiXScaleOver3 -
3700
0
                                          3) *
3701
0
                                         psWrkStruct->dfCosPiXScaleOver3;
3702
0
            psWrkStruct->dfSinPiXScale = sqrt(
3703
0
                1 - psWrkStruct->dfCosPiXScale * psWrkStruct->dfCosPiXScale);
3704
0
        }
3705
3706
0
        if (poWK->dfYScale < 1)
3707
0
        {
3708
0
            psWrkStruct->dfCosPiYScaleOver3 = cos(M_PI / 3 * poWK->dfYScale);
3709
0
            psWrkStruct->dfSinPiYScaleOver3 =
3710
0
                sqrt(1 - psWrkStruct->dfCosPiYScaleOver3 *
3711
0
                             psWrkStruct->dfCosPiYScaleOver3);
3712
            // "Naive":
3713
            // const double dfCosPiYScale = cos(  M_PI * dfYScale );
3714
            // const double dfSinPiYScale = sin(  M_PI * dfYScale );
3715
            // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
3716
0
            psWrkStruct->dfCosPiYScale = (4 * psWrkStruct->dfCosPiYScaleOver3 *
3717
0
                                              psWrkStruct->dfCosPiYScaleOver3 -
3718
0
                                          3) *
3719
0
                                         psWrkStruct->dfCosPiYScaleOver3;
3720
0
            psWrkStruct->dfSinPiYScale = sqrt(
3721
0
                1 - psWrkStruct->dfCosPiYScale * psWrkStruct->dfCosPiYScale);
3722
0
        }
3723
0
    }
3724
0
    else
3725
0
        psWrkStruct->pfnGWKResample = GWKResample;
3726
3727
0
    return psWrkStruct;
3728
0
}
3729
3730
/************************************************************************/
3731
/*                    GWKResampleDeleteWrkStruct()                      */
3732
/************************************************************************/
3733
3734
static void GWKResampleDeleteWrkStruct(GWKResampleWrkStruct *psWrkStruct)
3735
0
{
3736
0
    CPLFree(psWrkStruct->padfWeightsX);
3737
0
    CPLFree(psWrkStruct->padfWeightsY);
3738
0
    CPLFree(psWrkStruct->pabCalcX);
3739
0
    CPLFree(psWrkStruct->padfRowDensity);
3740
0
    CPLFree(psWrkStruct->padfRowReal);
3741
0
    CPLFree(psWrkStruct->padfRowImag);
3742
0
    CPLFree(psWrkStruct);
3743
0
}
3744
3745
/************************************************************************/
3746
/*                           GWKResample()                              */
3747
/************************************************************************/
3748
3749
static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
3750
                        double dfSrcY, double *pdfDensity, double *pdfReal,
3751
                        double *pdfImag, GWKResampleWrkStruct *psWrkStruct)
3752
3753
0
{
3754
    // Save as local variables to avoid following pointers in loops.
3755
0
    const int nSrcXSize = poWK->nSrcXSize;
3756
0
    const int nSrcYSize = poWK->nSrcYSize;
3757
3758
0
    double dfAccumulatorReal = 0.0;
3759
0
    double dfAccumulatorImag = 0.0;
3760
0
    double dfAccumulatorDensity = 0.0;
3761
0
    double dfAccumulatorWeight = 0.0;
3762
0
    const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
3763
0
    const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
3764
0
    const GPtrDiff_t iSrcOffset =
3765
0
        iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
3766
0
    const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3767
0
    const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3768
3769
0
    const double dfXScale = poWK->dfXScale;
3770
0
    const double dfYScale = poWK->dfYScale;
3771
3772
0
    const int nXDist = (poWK->nXRadius + 1) * 2;
3773
3774
    // Space for saved X weights.
3775
0
    double *padfWeightsX = psWrkStruct->padfWeightsX;
3776
0
    bool *pabCalcX = psWrkStruct->pabCalcX;
3777
3778
    // Space for saving a row of pixels.
3779
0
    double *padfRowDensity = psWrkStruct->padfRowDensity;
3780
0
    double *padfRowReal = psWrkStruct->padfRowReal;
3781
0
    double *padfRowImag = psWrkStruct->padfRowImag;
3782
3783
    // Mark as needing calculation (don't calculate the weights yet,
3784
    // because a mask may render it unnecessary).
3785
0
    memset(pabCalcX, false, nXDist * sizeof(bool));
3786
3787
0
    FilterFuncType pfnGetWeight = apfGWKFilter[poWK->eResample];
3788
0
    CPLAssert(pfnGetWeight);
3789
3790
    // Skip sampling over edge of image.
3791
0
    int j = poWK->nFiltInitY;
3792
0
    int jMax = poWK->nYRadius;
3793
0
    if (iSrcY + j < 0)
3794
0
        j = -iSrcY;
3795
0
    if (iSrcY + jMax >= nSrcYSize)
3796
0
        jMax = nSrcYSize - iSrcY - 1;
3797
3798
0
    int iMin = poWK->nFiltInitX;
3799
0
    int iMax = poWK->nXRadius;
3800
0
    if (iSrcX + iMin < 0)
3801
0
        iMin = -iSrcX;
3802
0
    if (iSrcX + iMax >= nSrcXSize)
3803
0
        iMax = nSrcXSize - iSrcX - 1;
3804
3805
0
    const int bXScaleBelow1 = (dfXScale < 1.0);
3806
0
    const int bYScaleBelow1 = (dfYScale < 1.0);
3807
3808
0
    GPtrDiff_t iRowOffset =
3809
0
        iSrcOffset + static_cast<GPtrDiff_t>(j - 1) * nSrcXSize + iMin;
3810
3811
    // Loop over pixel rows in the kernel.
3812
0
    for (; j <= jMax; ++j)
3813
0
    {
3814
0
        iRowOffset += nSrcXSize;
3815
3816
        // Get pixel values.
3817
        // We can potentially read extra elements after the "normal" end of the
3818
        // source arrays, but the contract of papabySrcImage[iBand],
3819
        // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
3820
        // is to have WARP_EXTRA_ELTS reserved at their end.
3821
0
        if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
3822
0
                            padfRowDensity, padfRowReal, padfRowImag))
3823
0
            continue;
3824
3825
        // Calculate the Y weight.
3826
0
        double dfWeight1 = (bYScaleBelow1)
3827
0
                               ? pfnGetWeight((j - dfDeltaY) * dfYScale)
3828
0
                               : pfnGetWeight(j - dfDeltaY);
3829
3830
        // Iterate over pixels in row.
3831
0
        double dfAccumulatorRealLocal = 0.0;
3832
0
        double dfAccumulatorImagLocal = 0.0;
3833
0
        double dfAccumulatorDensityLocal = 0.0;
3834
0
        double dfAccumulatorWeightLocal = 0.0;
3835
3836
0
        for (int i = iMin; i <= iMax; ++i)
3837
0
        {
3838
            // Skip sampling if pixel has zero density.
3839
0
            if (padfRowDensity != nullptr &&
3840
0
                padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD_DOUBLE)
3841
0
                continue;
3842
3843
0
            double dfWeight2 = 0.0;
3844
3845
            // Make or use a cached set of weights for this row.
3846
0
            if (pabCalcX[i - iMin])
3847
0
            {
3848
                // Use saved weight value instead of recomputing it.
3849
0
                dfWeight2 = padfWeightsX[i - iMin];
3850
0
            }
3851
0
            else
3852
0
            {
3853
                // Calculate & save the X weight.
3854
0
                padfWeightsX[i - iMin] = dfWeight2 =
3855
0
                    (bXScaleBelow1) ? pfnGetWeight((i - dfDeltaX) * dfXScale)
3856
0
                                    : pfnGetWeight(i - dfDeltaX);
3857
3858
0
                pabCalcX[i - iMin] = true;
3859
0
            }
3860
3861
            // Accumulate!
3862
0
            dfAccumulatorRealLocal += padfRowReal[i - iMin] * dfWeight2;
3863
0
            dfAccumulatorImagLocal += padfRowImag[i - iMin] * dfWeight2;
3864
0
            if (padfRowDensity != nullptr)
3865
0
                dfAccumulatorDensityLocal +=
3866
0
                    padfRowDensity[i - iMin] * dfWeight2;
3867
0
            dfAccumulatorWeightLocal += dfWeight2;
3868
0
        }
3869
3870
0
        dfAccumulatorReal += dfAccumulatorRealLocal * dfWeight1;
3871
0
        dfAccumulatorImag += dfAccumulatorImagLocal * dfWeight1;
3872
0
        dfAccumulatorDensity += dfAccumulatorDensityLocal * dfWeight1;
3873
0
        dfAccumulatorWeight += dfAccumulatorWeightLocal * dfWeight1;
3874
0
    }
3875
3876
0
    if (dfAccumulatorWeight < 0.000001 ||
3877
0
        (padfRowDensity != nullptr && dfAccumulatorDensity < 0.000001))
3878
0
    {
3879
0
        *pdfDensity = 0.0;
3880
0
        return false;
3881
0
    }
3882
3883
    // Calculate the output taking into account weighting.
3884
0
    if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
3885
0
    {
3886
0
        *pdfReal = dfAccumulatorReal / dfAccumulatorWeight;
3887
0
        *pdfImag = dfAccumulatorImag / dfAccumulatorWeight;
3888
0
        if (padfRowDensity != nullptr)
3889
0
            *pdfDensity = dfAccumulatorDensity / dfAccumulatorWeight;
3890
0
        else
3891
0
            *pdfDensity = 1.0;
3892
0
    }
3893
0
    else
3894
0
    {
3895
0
        *pdfReal = dfAccumulatorReal;
3896
0
        *pdfImag = dfAccumulatorImag;
3897
0
        if (padfRowDensity != nullptr)
3898
0
            *pdfDensity = dfAccumulatorDensity;
3899
0
        else
3900
0
            *pdfDensity = 1.0;
3901
0
    }
3902
3903
0
    return true;
3904
0
}
3905
3906
/************************************************************************/
3907
/*                      GWKResampleOptimizedLanczos()                   */
3908
/************************************************************************/
3909
3910
static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
3911
                                        double dfSrcX, double dfSrcY,
3912
                                        double *pdfDensity, double *pdfReal,
3913
                                        double *pdfImag,
3914
                                        GWKResampleWrkStruct *psWrkStruct)
3915
3916
0
{
3917
    // Save as local variables to avoid following pointers in loops.
3918
0
    const int nSrcXSize = poWK->nSrcXSize;
3919
0
    const int nSrcYSize = poWK->nSrcYSize;
3920
3921
0
    double dfAccumulatorReal = 0.0;
3922
0
    double dfAccumulatorImag = 0.0;
3923
0
    double dfAccumulatorDensity = 0.0;
3924
0
    double dfAccumulatorWeight = 0.0;
3925
0
    const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
3926
0
    const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
3927
0
    const GPtrDiff_t iSrcOffset =
3928
0
        iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
3929
0
    const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3930
0
    const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3931
3932
0
    const double dfXScale = poWK->dfXScale;
3933
0
    const double dfYScale = poWK->dfYScale;
3934
3935
    // Space for saved X weights.
3936
0
    double *const padfWeightsXShifted =
3937
0
        psWrkStruct->padfWeightsX - poWK->nFiltInitX;
3938
0
    double *const padfWeightsYShifted =
3939
0
        psWrkStruct->padfWeightsY - poWK->nFiltInitY;
3940
3941
    // Space for saving a row of pixels.
3942
0
    double *const padfRowDensity = psWrkStruct->padfRowDensity;
3943
0
    double *const padfRowReal = psWrkStruct->padfRowReal;
3944
0
    double *const padfRowImag = psWrkStruct->padfRowImag;
3945
3946
    // Skip sampling over edge of image.
3947
0
    int jMin = poWK->nFiltInitY;
3948
0
    int jMax = poWK->nYRadius;
3949
0
    if (iSrcY + jMin < 0)
3950
0
        jMin = -iSrcY;
3951
0
    if (iSrcY + jMax >= nSrcYSize)
3952
0
        jMax = nSrcYSize - iSrcY - 1;
3953
3954
0
    int iMin = poWK->nFiltInitX;
3955
0
    int iMax = poWK->nXRadius;
3956
0
    if (iSrcX + iMin < 0)
3957
0
        iMin = -iSrcX;
3958
0
    if (iSrcX + iMax >= nSrcXSize)
3959
0
        iMax = nSrcXSize - iSrcX - 1;
3960
3961
0
    if (dfXScale < 1.0)
3962
0
    {
3963
0
        while ((iMin - dfDeltaX) * dfXScale < -3.0)
3964
0
            iMin++;
3965
0
        while ((iMax - dfDeltaX) * dfXScale > 3.0)
3966
0
            iMax--;
3967
3968
        // clang-format off
3969
        /*
3970
        Naive version:
3971
        for (int i = iMin; i <= iMax; ++i)
3972
        {
3973
            psWrkStruct->padfWeightsXShifted[i] =
3974
                GWKLanczosSinc((i - dfDeltaX) * dfXScale);
3975
        }
3976
3977
        but given that:
3978
3979
        GWKLanczosSinc(x):
3980
            if (dfX == 0.0)
3981
                return 1.0;
3982
3983
            const double dfPIX = M_PI * dfX;
3984
            const double dfPIXoverR = dfPIX / 3;
3985
            const double dfPIX2overR = dfPIX * dfPIXoverR;
3986
            return sin(dfPIX) * sin(dfPIXoverR) / dfPIX2overR;
3987
3988
        and
3989
            sin (a + b) = sin a cos b + cos a sin b.
3990
            cos (a + b) = cos a cos b - sin a sin b.
3991
3992
        we can skip any sin() computation within the loop
3993
        */
3994
        // clang-format on
3995
3996
0
        if (iSrcX != psWrkStruct->iLastSrcX ||
3997
0
            dfDeltaX != psWrkStruct->dfLastDeltaX)
3998
0
        {
3999
0
            double dfX = (iMin - dfDeltaX) * dfXScale;
4000
4001
0
            double dfPIXover3 = M_PI / 3 * dfX;
4002
0
            double dfCosOver3 = cos(dfPIXover3);
4003
0
            double dfSinOver3 = sin(dfPIXover3);
4004
4005
            // "Naive":
4006
            // double dfSin = sin( M_PI * dfX );
4007
            // double dfCos = cos( M_PI * dfX );
4008
            // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
4009
0
            double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
4010
0
            double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
4011
4012
0
            const double dfCosPiXScaleOver3 = psWrkStruct->dfCosPiXScaleOver3;
4013
0
            const double dfSinPiXScaleOver3 = psWrkStruct->dfSinPiXScaleOver3;
4014
0
            const double dfCosPiXScale = psWrkStruct->dfCosPiXScale;
4015
0
            const double dfSinPiXScale = psWrkStruct->dfSinPiXScale;
4016
0
            constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
4017
0
            padfWeightsXShifted[iMin] =
4018
0
                dfX == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfX * dfX);
4019
0
            for (int i = iMin + 1; i <= iMax; ++i)
4020
0
            {
4021
0
                dfX += dfXScale;
4022
0
                const double dfNewSin =
4023
0
                    dfSin * dfCosPiXScale + dfCos * dfSinPiXScale;
4024
0
                const double dfNewSinOver3 = dfSinOver3 * dfCosPiXScaleOver3 +
4025
0
                                             dfCosOver3 * dfSinPiXScaleOver3;
4026
0
                padfWeightsXShifted[i] =
4027
0
                    dfX == 0
4028
0
                        ? 1.0
4029
0
                        : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfX * dfX);
4030
0
                const double dfNewCos =
4031
0
                    dfCos * dfCosPiXScale - dfSin * dfSinPiXScale;
4032
0
                const double dfNewCosOver3 = dfCosOver3 * dfCosPiXScaleOver3 -
4033
0
                                             dfSinOver3 * dfSinPiXScaleOver3;
4034
0
                dfSin = dfNewSin;
4035
0
                dfCos = dfNewCos;
4036
0
                dfSinOver3 = dfNewSinOver3;
4037
0
                dfCosOver3 = dfNewCosOver3;
4038
0
            }
4039
4040
0
            psWrkStruct->iLastSrcX = iSrcX;
4041
0
            psWrkStruct->dfLastDeltaX = dfDeltaX;
4042
0
        }
4043
0
    }
4044
0
    else
4045
0
    {
4046
0
        while (iMin - dfDeltaX < -3.0)
4047
0
            iMin++;
4048
0
        while (iMax - dfDeltaX > 3.0)
4049
0
            iMax--;
4050
4051
0
        if (iSrcX != psWrkStruct->iLastSrcX ||
4052
0
            dfDeltaX != psWrkStruct->dfLastDeltaX)
4053
0
        {
4054
            // Optimisation of GWKLanczosSinc(i - dfDeltaX) based on the
4055
            // following trigonometric formulas.
4056
4057
            // TODO(schwehr): Move this somewhere where it can be rendered at
4058
            // LaTeX.
4059
            // clang-format off
4060
            // sin(M_PI * (dfBase + k)) = sin(M_PI * dfBase) * cos(M_PI * k) +
4061
            //                            cos(M_PI * dfBase) * sin(M_PI * k)
4062
            // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k) + dfCosPIBase * sin(M_PI * k)
4063
            // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k)
4064
            // sin(M_PI * (dfBase + k)) = dfSinPIBase * (((k % 2) == 0) ? 1 : -1)
4065
4066
            // sin(M_PI / dfR * (dfBase + k)) = sin(M_PI / dfR * dfBase) * cos(M_PI / dfR * k) +
4067
            //                                  cos(M_PI / dfR * dfBase) * sin(M_PI / dfR * k)
4068
            // sin(M_PI / dfR * (dfBase + k)) = dfSinPIBaseOverR * cos(M_PI / dfR * k) + dfCosPIBaseOverR * sin(M_PI / dfR * k)
4069
            // clang-format on
4070
4071
0
            const double dfSinPIDeltaXOver3 = sin((-M_PI / 3.0) * dfDeltaX);
4072
0
            const double dfSin2PIDeltaXOver3 =
4073
0
                dfSinPIDeltaXOver3 * dfSinPIDeltaXOver3;
4074
            // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaX < PI/2.
4075
0
            const double dfCosPIDeltaXOver3 = sqrt(1.0 - dfSin2PIDeltaXOver3);
4076
0
            const double dfSinPIDeltaX =
4077
0
                (3.0 - 4 * dfSin2PIDeltaXOver3) * dfSinPIDeltaXOver3;
4078
0
            const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
4079
0
            const double dfInvPI2Over3xSinPIDeltaX =
4080
0
                dfInvPI2Over3 * dfSinPIDeltaX;
4081
0
            const double dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 =
4082
0
                -0.5 * dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3;
4083
0
            const double dfSinPIOver3 = 0.8660254037844386;
4084
0
            const double dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3 =
4085
0
                dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaX * dfCosPIDeltaXOver3;
4086
0
            const double padfCst[] = {
4087
0
                dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3,
4088
0
                dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 -
4089
0
                    dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3,
4090
0
                dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 +
4091
0
                    dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3};
4092
4093
0
            for (int i = iMin; i <= iMax; ++i)
4094
0
            {
4095
0
                const double dfX = i - dfDeltaX;
4096
0
                if (dfX == 0.0)
4097
0
                    padfWeightsXShifted[i] = 1.0;
4098
0
                else
4099
0
                    padfWeightsXShifted[i] = padfCst[(i + 3) % 3] / (dfX * dfX);
4100
#if DEBUG_VERBOSE
4101
                    // TODO(schwehr): AlmostEqual.
4102
                    // CPLAssert(fabs(padfWeightsX[i-poWK->nFiltInitX] -
4103
                    //               GWKLanczosSinc(dfX, 3.0)) < 1e-10);
4104
#endif
4105
0
            }
4106
4107
0
            psWrkStruct->iLastSrcX = iSrcX;
4108
0
            psWrkStruct->dfLastDeltaX = dfDeltaX;
4109
0
        }
4110
0
    }
4111
4112
0
    if (dfYScale < 1.0)
4113
0
    {
4114
0
        while ((jMin - dfDeltaY) * dfYScale < -3.0)
4115
0
            jMin++;
4116
0
        while ((jMax - dfDeltaY) * dfYScale > 3.0)
4117
0
            jMax--;
4118
4119
        // clang-format off
4120
        /*
4121
        Naive version:
4122
        for (int j = jMin; j <= jMax; ++j)
4123
        {
4124
            padfWeightsYShifted[j] =
4125
                GWKLanczosSinc((j - dfDeltaY) * dfYScale);
4126
        }
4127
        */
4128
        // clang-format on
4129
4130
0
        if (iSrcY != psWrkStruct->iLastSrcY ||
4131
0
            dfDeltaY != psWrkStruct->dfLastDeltaY)
4132
0
        {
4133
0
            double dfY = (jMin - dfDeltaY) * dfYScale;
4134
4135
0
            double dfPIYover3 = M_PI / 3 * dfY;
4136
0
            double dfCosOver3 = cos(dfPIYover3);
4137
0
            double dfSinOver3 = sin(dfPIYover3);
4138
4139
            // "Naive":
4140
            // double dfSin = sin( M_PI * dfY );
4141
            // double dfCos = cos( M_PI * dfY );
4142
            // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
4143
0
            double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
4144
0
            double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
4145
4146
0
            const double dfCosPiYScaleOver3 = psWrkStruct->dfCosPiYScaleOver3;
4147
0
            const double dfSinPiYScaleOver3 = psWrkStruct->dfSinPiYScaleOver3;
4148
0
            const double dfCosPiYScale = psWrkStruct->dfCosPiYScale;
4149
0
            const double dfSinPiYScale = psWrkStruct->dfSinPiYScale;
4150
0
            constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
4151
0
            padfWeightsYShifted[jMin] =
4152
0
                dfY == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfY * dfY);
4153
0
            for (int j = jMin + 1; j <= jMax; ++j)
4154
0
            {
4155
0
                dfY += dfYScale;
4156
0
                const double dfNewSin =
4157
0
                    dfSin * dfCosPiYScale + dfCos * dfSinPiYScale;
4158
0
                const double dfNewSinOver3 = dfSinOver3 * dfCosPiYScaleOver3 +
4159
0
                                             dfCosOver3 * dfSinPiYScaleOver3;
4160
0
                padfWeightsYShifted[j] =
4161
0
                    dfY == 0
4162
0
                        ? 1.0
4163
0
                        : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfY * dfY);
4164
0
                const double dfNewCos =
4165
0
                    dfCos * dfCosPiYScale - dfSin * dfSinPiYScale;
4166
0
                const double dfNewCosOver3 = dfCosOver3 * dfCosPiYScaleOver3 -
4167
0
                                             dfSinOver3 * dfSinPiYScaleOver3;
4168
0
                dfSin = dfNewSin;
4169
0
                dfCos = dfNewCos;
4170
0
                dfSinOver3 = dfNewSinOver3;
4171
0
                dfCosOver3 = dfNewCosOver3;
4172
0
            }
4173
4174
0
            psWrkStruct->iLastSrcY = iSrcY;
4175
0
            psWrkStruct->dfLastDeltaY = dfDeltaY;
4176
0
        }
4177
0
    }
4178
0
    else
4179
0
    {
4180
0
        while (jMin - dfDeltaY < -3.0)
4181
0
            jMin++;
4182
0
        while (jMax - dfDeltaY > 3.0)
4183
0
            jMax--;
4184
4185
0
        if (iSrcY != psWrkStruct->iLastSrcY ||
4186
0
            dfDeltaY != psWrkStruct->dfLastDeltaY)
4187
0
        {
4188
0
            const double dfSinPIDeltaYOver3 = sin((-M_PI / 3.0) * dfDeltaY);
4189
0
            const double dfSin2PIDeltaYOver3 =
4190
0
                dfSinPIDeltaYOver3 * dfSinPIDeltaYOver3;
4191
            // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaY < PI/2.
4192
0
            const double dfCosPIDeltaYOver3 = sqrt(1.0 - dfSin2PIDeltaYOver3);
4193
0
            const double dfSinPIDeltaY =
4194
0
                (3.0 - 4.0 * dfSin2PIDeltaYOver3) * dfSinPIDeltaYOver3;
4195
0
            const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
4196
0
            const double dfInvPI2Over3xSinPIDeltaY =
4197
0
                dfInvPI2Over3 * dfSinPIDeltaY;
4198
0
            const double dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 =
4199
0
                -0.5 * dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3;
4200
0
            const double dfSinPIOver3 = 0.8660254037844386;
4201
0
            const double dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3 =
4202
0
                dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaY * dfCosPIDeltaYOver3;
4203
0
            const double padfCst[] = {
4204
0
                dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3,
4205
0
                dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 -
4206
0
                    dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3,
4207
0
                dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 +
4208
0
                    dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3};
4209
4210
0
            for (int j = jMin; j <= jMax; ++j)
4211
0
            {
4212
0
                const double dfY = j - dfDeltaY;
4213
0
                if (dfY == 0.0)
4214
0
                    padfWeightsYShifted[j] = 1.0;
4215
0
                else
4216
0
                    padfWeightsYShifted[j] = padfCst[(j + 3) % 3] / (dfY * dfY);
4217
#if DEBUG_VERBOSE
4218
                    // TODO(schwehr): AlmostEqual.
4219
                    // CPLAssert(fabs(padfWeightsYShifted[j] -
4220
                    //               GWKLanczosSinc(dfY, 3.0)) < 1e-10);
4221
#endif
4222
0
            }
4223
4224
0
            psWrkStruct->iLastSrcY = iSrcY;
4225
0
            psWrkStruct->dfLastDeltaY = dfDeltaY;
4226
0
        }
4227
0
    }
4228
4229
    // If we have no density information, we can simply compute the
4230
    // accumulated weight.
4231
0
    if (padfRowDensity == nullptr)
4232
0
    {
4233
0
        double dfRowAccWeight = 0.0;
4234
0
        for (int i = iMin; i <= iMax; ++i)
4235
0
        {
4236
0
            dfRowAccWeight += padfWeightsXShifted[i];
4237
0
        }
4238
0
        double dfColAccWeight = 0.0;
4239
0
        for (int j = jMin; j <= jMax; ++j)
4240
0
        {
4241
0
            dfColAccWeight += padfWeightsYShifted[j];
4242
0
        }
4243
0
        dfAccumulatorWeight = dfRowAccWeight * dfColAccWeight;
4244
0
    }
4245
4246
    // Loop over pixel rows in the kernel.
4247
4248
0
    if (poWK->eWorkingDataType == GDT_Byte && !poWK->panUnifiedSrcValid &&
4249
0
        !poWK->papanBandSrcValid && !poWK->pafUnifiedSrcDensity &&
4250
0
        !padfRowDensity)
4251
0
    {
4252
        // Optimization for Byte case without any masking/alpha
4253
4254
0
        if (dfAccumulatorWeight < 0.000001)
4255
0
        {
4256
0
            *pdfDensity = 0.0;
4257
0
            return false;
4258
0
        }
4259
4260
0
        const GByte *pSrc =
4261
0
            reinterpret_cast<const GByte *>(poWK->papabySrcImage[iBand]);
4262
0
        pSrc += iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
4263
4264
0
#if defined(USE_SSE2)
4265
0
        if (iMax - iMin + 1 == 6)
4266
0
        {
4267
            // This is just an optimized version of the general case in
4268
            // the else clause.
4269
4270
0
            pSrc += iMin;
4271
0
            int j = jMin;
4272
0
            const auto fourXWeights =
4273
0
                XMMReg4Double::Load4Val(padfWeightsXShifted + iMin);
4274
4275
            // Process 2 lines at the same time.
4276
0
            for (; j < jMax; j += 2)
4277
0
            {
4278
0
                const XMMReg4Double v_acc =
4279
0
                    XMMReg4Double::Load4Val(pSrc) * fourXWeights;
4280
0
                const XMMReg4Double v_acc2 =
4281
0
                    XMMReg4Double::Load4Val(pSrc + nSrcXSize) * fourXWeights;
4282
0
                const double dfRowAcc = v_acc.GetHorizSum();
4283
0
                const double dfRowAccEnd =
4284
0
                    pSrc[4] * padfWeightsXShifted[iMin + 4] +
4285
0
                    pSrc[5] * padfWeightsXShifted[iMin + 5];
4286
0
                dfAccumulatorReal +=
4287
0
                    (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
4288
0
                const double dfRowAcc2 = v_acc2.GetHorizSum();
4289
0
                const double dfRowAcc2End =
4290
0
                    pSrc[nSrcXSize + 4] * padfWeightsXShifted[iMin + 4] +
4291
0
                    pSrc[nSrcXSize + 5] * padfWeightsXShifted[iMin + 5];
4292
0
                dfAccumulatorReal +=
4293
0
                    (dfRowAcc2 + dfRowAcc2End) * padfWeightsYShifted[j + 1];
4294
0
                pSrc += 2 * nSrcXSize;
4295
0
            }
4296
0
            if (j == jMax)
4297
0
            {
4298
                // Process last line if there's an odd number of them.
4299
4300
0
                const XMMReg4Double v_acc =
4301
0
                    XMMReg4Double::Load4Val(pSrc) * fourXWeights;
4302
0
                const double dfRowAcc = v_acc.GetHorizSum();
4303
0
                const double dfRowAccEnd =
4304
0
                    pSrc[4] * padfWeightsXShifted[iMin + 4] +
4305
0
                    pSrc[5] * padfWeightsXShifted[iMin + 5];
4306
0
                dfAccumulatorReal +=
4307
0
                    (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
4308
0
            }
4309
0
        }
4310
0
        else
4311
0
#endif
4312
0
        {
4313
0
            for (int j = jMin; j <= jMax; ++j)
4314
0
            {
4315
0
                int i = iMin;
4316
0
                double dfRowAcc1 = 0.0;
4317
0
                double dfRowAcc2 = 0.0;
4318
                // A bit of loop unrolling
4319
0
                for (; i < iMax; i += 2)
4320
0
                {
4321
0
                    dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
4322
0
                    dfRowAcc2 += pSrc[i + 1] * padfWeightsXShifted[i + 1];
4323
0
                }
4324
0
                if (i == iMax)
4325
0
                {
4326
                    // Process last column if there's an odd number of them.
4327
0
                    dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
4328
0
                }
4329
4330
0
                dfAccumulatorReal +=
4331
0
                    (dfRowAcc1 + dfRowAcc2) * padfWeightsYShifted[j];
4332
0
                pSrc += nSrcXSize;
4333
0
            }
4334
0
        }
4335
4336
        // Calculate the output taking into account weighting.
4337
0
        if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
4338
0
        {
4339
0
            const double dfInvAcc = 1.0 / dfAccumulatorWeight;
4340
0
            *pdfReal = dfAccumulatorReal * dfInvAcc;
4341
0
            *pdfDensity = 1.0;
4342
0
        }
4343
0
        else
4344
0
        {
4345
0
            *pdfReal = dfAccumulatorReal;
4346
0
            *pdfDensity = 1.0;
4347
0
        }
4348
4349
0
        return true;
4350
0
    }
4351
4352
0
    GPtrDiff_t iRowOffset =
4353
0
        iSrcOffset + static_cast<GPtrDiff_t>(jMin - 1) * nSrcXSize + iMin;
4354
4355
0
    int nCountValid = 0;
4356
0
    const bool bIsNonComplex = !GDALDataTypeIsComplex(poWK->eWorkingDataType);
4357
4358
0
    for (int j = jMin; j <= jMax; ++j)
4359
0
    {
4360
0
        iRowOffset += nSrcXSize;
4361
4362
        // Get pixel values.
4363
        // We can potentially read extra elements after the "normal" end of the
4364
        // source arrays, but the contract of papabySrcImage[iBand],
4365
        // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
4366
        // is to have WARP_EXTRA_ELTS reserved at their end.
4367
0
        if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
4368
0
                            padfRowDensity, padfRowReal, padfRowImag))
4369
0
            continue;
4370
4371
0
        const double dfWeight1 = padfWeightsYShifted[j];
4372
4373
        // Iterate over pixels in row.
4374
0
        if (padfRowDensity != nullptr)
4375
0
        {
4376
0
            for (int i = iMin; i <= iMax; ++i)
4377
0
            {
4378
                // Skip sampling if pixel has zero density.
4379
0
                if (padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD_DOUBLE)
4380
0
                    continue;
4381
4382
0
                nCountValid++;
4383
4384
                //  Use a cached set of weights for this row.
4385
0
                const double dfWeight2 = dfWeight1 * padfWeightsXShifted[i];
4386
4387
                // Accumulate!
4388
0
                dfAccumulatorReal += padfRowReal[i - iMin] * dfWeight2;
4389
0
                dfAccumulatorImag += padfRowImag[i - iMin] * dfWeight2;
4390
0
                dfAccumulatorDensity += padfRowDensity[i - iMin] * dfWeight2;
4391
0
                dfAccumulatorWeight += dfWeight2;
4392
0
            }
4393
0
        }
4394
0
        else if (bIsNonComplex)
4395
0
        {
4396
0
            double dfRowAccReal = 0.0;
4397
0
            for (int i = iMin; i <= iMax; ++i)
4398
0
            {
4399
0
                const double dfWeight2 = padfWeightsXShifted[i];
4400
4401
                // Accumulate!
4402
0
                dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
4403
0
            }
4404
4405
0
            dfAccumulatorReal += dfRowAccReal * dfWeight1;
4406
0
        }
4407
0
        else
4408
0
        {
4409
0
            double dfRowAccReal = 0.0;
4410
0
            double dfRowAccImag = 0.0;
4411
0
            for (int i = iMin; i <= iMax; ++i)
4412
0
            {
4413
0
                const double dfWeight2 = padfWeightsXShifted[i];
4414
4415
                // Accumulate!
4416
0
                dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
4417
0
                dfRowAccImag += padfRowImag[i - iMin] * dfWeight2;
4418
0
            }
4419
4420
0
            dfAccumulatorReal += dfRowAccReal * dfWeight1;
4421
0
            dfAccumulatorImag += dfRowAccImag * dfWeight1;
4422
0
        }
4423
0
    }
4424
4425
0
    if (dfAccumulatorWeight < 0.000001 ||
4426
0
        (padfRowDensity != nullptr &&
4427
0
         (dfAccumulatorDensity < 0.000001 ||
4428
0
          nCountValid < (jMax - jMin + 1) * (iMax - iMin + 1) / 2)))
4429
0
    {
4430
0
        *pdfDensity = 0.0;
4431
0
        return false;
4432
0
    }
4433
4434
    // Calculate the output taking into account weighting.
4435
0
    if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
4436
0
    {
4437
0
        const double dfInvAcc = 1.0 / dfAccumulatorWeight;
4438
0
        *pdfReal = dfAccumulatorReal * dfInvAcc;
4439
0
        *pdfImag = dfAccumulatorImag * dfInvAcc;
4440
0
        if (padfRowDensity != nullptr)
4441
0
            *pdfDensity = dfAccumulatorDensity * dfInvAcc;
4442
0
        else
4443
0
            *pdfDensity = 1.0;
4444
0
    }
4445
0
    else
4446
0
    {
4447
0
        *pdfReal = dfAccumulatorReal;
4448
0
        *pdfImag = dfAccumulatorImag;
4449
0
        if (padfRowDensity != nullptr)
4450
0
            *pdfDensity = dfAccumulatorDensity;
4451
0
        else
4452
0
            *pdfDensity = 1.0;
4453
0
    }
4454
4455
0
    return true;
4456
0
}
4457
4458
/************************************************************************/
4459
/*                        GWKComputeWeights()                           */
4460
/************************************************************************/
4461
4462
static void GWKComputeWeights(GDALResampleAlg eResample, int iMin, int iMax,
4463
                              double dfDeltaX, double dfXScale, int jMin,
4464
                              int jMax, double dfDeltaY, double dfYScale,
4465
                              double *padfWeightsHorizontal,
4466
                              double *padfWeightsVertical, double &dfInvWeights)
4467
0
{
4468
4469
0
    const FilterFuncType pfnGetWeight = apfGWKFilter[eResample];
4470
0
    CPLAssert(pfnGetWeight);
4471
0
    const FilterFunc4ValuesType pfnGetWeight4Values =
4472
0
        apfGWKFilter4Values[eResample];
4473
0
    CPLAssert(pfnGetWeight4Values);
4474
4475
0
    int i = iMin;  // Used after for.
4476
0
    int iC = 0;    // Used after for.
4477
    // Not zero, but as close as possible to it, to avoid potential division by
4478
    // zero at end of function
4479
0
    double dfAccumulatorWeightHorizontal = cpl::NumericLimits<double>::min();
4480
0
    for (; i + 2 < iMax; i += 4, iC += 4)
4481
0
    {
4482
0
        padfWeightsHorizontal[iC] = (i - dfDeltaX) * dfXScale;
4483
0
        padfWeightsHorizontal[iC + 1] = padfWeightsHorizontal[iC] + dfXScale;
4484
0
        padfWeightsHorizontal[iC + 2] =
4485
0
            padfWeightsHorizontal[iC + 1] + dfXScale;
4486
0
        padfWeightsHorizontal[iC + 3] =
4487
0
            padfWeightsHorizontal[iC + 2] + dfXScale;
4488
0
        dfAccumulatorWeightHorizontal +=
4489
0
            pfnGetWeight4Values(padfWeightsHorizontal + iC);
4490
0
    }
4491
0
    for (; i <= iMax; ++i, ++iC)
4492
0
    {
4493
0
        const double dfWeight = pfnGetWeight((i - dfDeltaX) * dfXScale);
4494
0
        padfWeightsHorizontal[iC] = dfWeight;
4495
0
        dfAccumulatorWeightHorizontal += dfWeight;
4496
0
    }
4497
4498
0
    int j = jMin;  // Used after for.
4499
0
    int jC = 0;    // Used after for.
4500
    // Not zero, but as close as possible to it, to avoid potential division by
4501
    // zero at end of function
4502
0
    double dfAccumulatorWeightVertical = cpl::NumericLimits<double>::min();
4503
0
    for (; j + 2 < jMax; j += 4, jC += 4)
4504
0
    {
4505
0
        padfWeightsVertical[jC] = (j - dfDeltaY) * dfYScale;
4506
0
        padfWeightsVertical[jC + 1] = padfWeightsVertical[jC] + dfYScale;
4507
0
        padfWeightsVertical[jC + 2] = padfWeightsVertical[jC + 1] + dfYScale;
4508
0
        padfWeightsVertical[jC + 3] = padfWeightsVertical[jC + 2] + dfYScale;
4509
0
        dfAccumulatorWeightVertical +=
4510
0
            pfnGetWeight4Values(padfWeightsVertical + jC);
4511
0
    }
4512
0
    for (; j <= jMax; ++j, ++jC)
4513
0
    {
4514
0
        const double dfWeight = pfnGetWeight((j - dfDeltaY) * dfYScale);
4515
0
        padfWeightsVertical[jC] = dfWeight;
4516
0
        dfAccumulatorWeightVertical += dfWeight;
4517
0
    }
4518
4519
0
    dfInvWeights =
4520
0
        1. / (dfAccumulatorWeightHorizontal * dfAccumulatorWeightVertical);
4521
0
}
4522
4523
/************************************************************************/
4524
/*                        GWKResampleNoMasksT()                         */
4525
/************************************************************************/
4526
4527
template <class T>
4528
static bool
4529
GWKResampleNoMasksT(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
4530
                    double dfSrcY, T *pValue, double *padfWeightsHorizontal,
4531
                    double *padfWeightsVertical, double &dfInvWeights)
4532
4533
{
4534
    // Commonly used; save locally.
4535
    const int nSrcXSize = poWK->nSrcXSize;
4536
    const int nSrcYSize = poWK->nSrcYSize;
4537
4538
    const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
4539
    const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
4540
    const GPtrDiff_t iSrcOffset =
4541
        iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
4542
4543
    const int nXRadius = poWK->nXRadius;
4544
    const int nYRadius = poWK->nYRadius;
4545
4546
    // Politely refuse to process invalid coordinates or obscenely small image.
4547
    if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
4548
        nYRadius > nSrcYSize)
4549
        return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
4550
                                                  pValue);
4551
4552
    T *pSrcBand = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
4553
    const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
4554
    const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
4555
4556
    const double dfXScale = std::min(poWK->dfXScale, 1.0);
4557
    const double dfYScale = std::min(poWK->dfYScale, 1.0);
4558
4559
    int iMin = 1 - nXRadius;
4560
    if (iSrcX + iMin < 0)
4561
        iMin = -iSrcX;
4562
    int iMax = nXRadius;
4563
    if (iSrcX + iMax >= nSrcXSize - 1)
4564
        iMax = nSrcXSize - 1 - iSrcX;
4565
4566
    int jMin = 1 - nYRadius;
4567
    if (iSrcY + jMin < 0)
4568
        jMin = -iSrcY;
4569
    int jMax = nYRadius;
4570
    if (iSrcY + jMax >= nSrcYSize - 1)
4571
        jMax = nSrcYSize - 1 - iSrcY;
4572
4573
    if (iBand == 0)
4574
    {
4575
        GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
4576
                          jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
4577
                          padfWeightsVertical, dfInvWeights);
4578
    }
4579
4580
    // Loop over all rows in the kernel.
4581
    double dfAccumulator = 0.0;
4582
    for (int jC = 0, j = jMin; j <= jMax; ++j, ++jC)
4583
    {
4584
        const GPtrDiff_t iSampJ =
4585
            iSrcOffset + static_cast<GPtrDiff_t>(j) * nSrcXSize;
4586
4587
        // Loop over all pixels in the row.
4588
        double dfAccumulatorLocal = 0.0;
4589
        double dfAccumulatorLocal2 = 0.0;
4590
        int iC = 0;
4591
        int i = iMin;
4592
        // Process by chunk of 4 cols.
4593
        for (; i + 2 < iMax; i += 4, iC += 4)
4594
        {
4595
            // Retrieve the pixel & accumulate.
4596
            dfAccumulatorLocal +=
4597
                double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
4598
            dfAccumulatorLocal += double(pSrcBand[i + 1 + iSampJ]) *
4599
                                  padfWeightsHorizontal[iC + 1];
4600
            dfAccumulatorLocal2 += double(pSrcBand[i + 2 + iSampJ]) *
4601
                                   padfWeightsHorizontal[iC + 2];
4602
            dfAccumulatorLocal2 += double(pSrcBand[i + 3 + iSampJ]) *
4603
                                   padfWeightsHorizontal[iC + 3];
4604
        }
4605
        dfAccumulatorLocal += dfAccumulatorLocal2;
4606
        if (i < iMax)
4607
        {
4608
            dfAccumulatorLocal +=
4609
                double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
4610
            dfAccumulatorLocal += double(pSrcBand[i + 1 + iSampJ]) *
4611
                                  padfWeightsHorizontal[iC + 1];
4612
            i += 2;
4613
            iC += 2;
4614
        }
4615
        if (i == iMax)
4616
        {
4617
            dfAccumulatorLocal +=
4618
                double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
4619
        }
4620
4621
        dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
4622
    }
4623
4624
    *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
4625
4626
    return true;
4627
}
4628
4629
/* We restrict to 64bit processors because they are guaranteed to have SSE2 */
4630
/* Could possibly be used too on 32bit, but we would need to check at runtime */
4631
#if defined(USE_SSE2)
4632
4633
/************************************************************************/
4634
/*                    GWKResampleNoMasks_SSE2_T()                       */
4635
/************************************************************************/
4636
4637
template <class T>
4638
static bool GWKResampleNoMasks_SSE2_T(const GDALWarpKernel *poWK, int iBand,
4639
                                      double dfSrcX, double dfSrcY, T *pValue,
4640
                                      double *padfWeightsHorizontal,
4641
                                      double *padfWeightsVertical,
4642
                                      double &dfInvWeights)
4643
0
{
4644
    // Commonly used; save locally.
4645
0
    const int nSrcXSize = poWK->nSrcXSize;
4646
0
    const int nSrcYSize = poWK->nSrcYSize;
4647
4648
0
    const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
4649
0
    const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
4650
0
    const GPtrDiff_t iSrcOffset =
4651
0
        iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
4652
0
    const int nXRadius = poWK->nXRadius;
4653
0
    const int nYRadius = poWK->nYRadius;
4654
4655
    // Politely refuse to process invalid coordinates or obscenely small image.
4656
0
    if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
4657
0
        nYRadius > nSrcYSize)
4658
0
        return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
4659
0
                                                  pValue);
4660
4661
0
    const T *pSrcBand =
4662
0
        reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
4663
4664
0
    const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
4665
0
    const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
4666
0
    const double dfXScale = std::min(poWK->dfXScale, 1.0);
4667
0
    const double dfYScale = std::min(poWK->dfYScale, 1.0);
4668
4669
0
    int iMin = 1 - nXRadius;
4670
0
    if (iSrcX + iMin < 0)
4671
0
        iMin = -iSrcX;
4672
0
    int iMax = nXRadius;
4673
0
    if (iSrcX + iMax >= nSrcXSize - 1)
4674
0
        iMax = nSrcXSize - 1 - iSrcX;
4675
4676
0
    int jMin = 1 - nYRadius;
4677
0
    if (iSrcY + jMin < 0)
4678
0
        jMin = -iSrcY;
4679
0
    int jMax = nYRadius;
4680
0
    if (iSrcY + jMax >= nSrcYSize - 1)
4681
0
        jMax = nSrcYSize - 1 - iSrcY;
4682
4683
0
    if (iBand == 0)
4684
0
    {
4685
0
        GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
4686
0
                          jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
4687
0
                          padfWeightsVertical, dfInvWeights);
4688
0
    }
4689
4690
0
    GPtrDiff_t iSampJ = iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
4691
    // Process by chunk of 4 rows.
4692
0
    int jC = 0;
4693
0
    int j = jMin;
4694
0
    double dfAccumulator = 0.0;
4695
0
    for (; j + 2 < jMax; j += 4, iSampJ += 4 * nSrcXSize, jC += 4)
4696
0
    {
4697
        // Loop over all pixels in the row.
4698
0
        int iC = 0;
4699
0
        int i = iMin;
4700
        // Process by chunk of 4 cols.
4701
0
        XMMReg4Double v_acc_1 = XMMReg4Double::Zero();
4702
0
        XMMReg4Double v_acc_2 = XMMReg4Double::Zero();
4703
0
        XMMReg4Double v_acc_3 = XMMReg4Double::Zero();
4704
0
        XMMReg4Double v_acc_4 = XMMReg4Double::Zero();
4705
0
        for (; i + 2 < iMax; i += 4, iC += 4)
4706
0
        {
4707
            // Retrieve the pixel & accumulate.
4708
0
            XMMReg4Double v_pixels_1 =
4709
0
                XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
4710
0
            XMMReg4Double v_pixels_2 =
4711
0
                XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + nSrcXSize);
4712
0
            XMMReg4Double v_pixels_3 =
4713
0
                XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
4714
0
            XMMReg4Double v_pixels_4 =
4715
0
                XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
4716
4717
0
            XMMReg4Double v_padfWeight =
4718
0
                XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
4719
4720
0
            v_acc_1 += v_pixels_1 * v_padfWeight;
4721
0
            v_acc_2 += v_pixels_2 * v_padfWeight;
4722
0
            v_acc_3 += v_pixels_3 * v_padfWeight;
4723
0
            v_acc_4 += v_pixels_4 * v_padfWeight;
4724
0
        }
4725
4726
0
        if (i < iMax)
4727
0
        {
4728
0
            XMMReg2Double v_pixels_1 =
4729
0
                XMMReg2Double::Load2Val(pSrcBand + i + iSampJ);
4730
0
            XMMReg2Double v_pixels_2 =
4731
0
                XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + nSrcXSize);
4732
0
            XMMReg2Double v_pixels_3 =
4733
0
                XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
4734
0
            XMMReg2Double v_pixels_4 =
4735
0
                XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
4736
4737
0
            XMMReg2Double v_padfWeight =
4738
0
                XMMReg2Double::Load2Val(padfWeightsHorizontal + iC);
4739
4740
0
            v_acc_1.AddToLow(v_pixels_1 * v_padfWeight);
4741
0
            v_acc_2.AddToLow(v_pixels_2 * v_padfWeight);
4742
0
            v_acc_3.AddToLow(v_pixels_3 * v_padfWeight);
4743
0
            v_acc_4.AddToLow(v_pixels_4 * v_padfWeight);
4744
4745
0
            i += 2;
4746
0
            iC += 2;
4747
0
        }
4748
4749
0
        double dfAccumulatorLocal_1 = v_acc_1.GetHorizSum();
4750
0
        double dfAccumulatorLocal_2 = v_acc_2.GetHorizSum();
4751
0
        double dfAccumulatorLocal_3 = v_acc_3.GetHorizSum();
4752
0
        double dfAccumulatorLocal_4 = v_acc_4.GetHorizSum();
4753
4754
0
        if (i == iMax)
4755
0
        {
4756
0
            dfAccumulatorLocal_1 += static_cast<double>(pSrcBand[i + iSampJ]) *
4757
0
                                    padfWeightsHorizontal[iC];
4758
0
            dfAccumulatorLocal_2 +=
4759
0
                static_cast<double>(pSrcBand[i + iSampJ + nSrcXSize]) *
4760
0
                padfWeightsHorizontal[iC];
4761
0
            dfAccumulatorLocal_3 +=
4762
0
                static_cast<double>(pSrcBand[i + iSampJ + 2 * nSrcXSize]) *
4763
0
                padfWeightsHorizontal[iC];
4764
0
            dfAccumulatorLocal_4 +=
4765
0
                static_cast<double>(pSrcBand[i + iSampJ + 3 * nSrcXSize]) *
4766
0
                padfWeightsHorizontal[iC];
4767
0
        }
4768
4769
0
        dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal_1;
4770
0
        dfAccumulator += padfWeightsVertical[jC + 1] * dfAccumulatorLocal_2;
4771
0
        dfAccumulator += padfWeightsVertical[jC + 2] * dfAccumulatorLocal_3;
4772
0
        dfAccumulator += padfWeightsVertical[jC + 3] * dfAccumulatorLocal_4;
4773
0
    }
4774
0
    for (; j <= jMax; ++j, iSampJ += nSrcXSize, ++jC)
4775
0
    {
4776
        // Loop over all pixels in the row.
4777
0
        int iC = 0;
4778
0
        int i = iMin;
4779
        // Process by chunk of 4 cols.
4780
0
        XMMReg4Double v_acc = XMMReg4Double::Zero();
4781
0
        for (; i + 2 < iMax; i += 4, iC += 4)
4782
0
        {
4783
            // Retrieve the pixel & accumulate.
4784
0
            XMMReg4Double v_pixels =
4785
0
                XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
4786
0
            XMMReg4Double v_padfWeight =
4787
0
                XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
4788
4789
0
            v_acc += v_pixels * v_padfWeight;
4790
0
        }
4791
4792
0
        double dfAccumulatorLocal = v_acc.GetHorizSum();
4793
4794
0
        if (i < iMax)
4795
0
        {
4796
0
            dfAccumulatorLocal +=
4797
0
                double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
4798
0
            dfAccumulatorLocal += double(pSrcBand[i + 1 + iSampJ]) *
4799
0
                                  padfWeightsHorizontal[iC + 1];
4800
0
            i += 2;
4801
0
            iC += 2;
4802
0
        }
4803
0
        if (i == iMax)
4804
0
        {
4805
0
            dfAccumulatorLocal += static_cast<double>(pSrcBand[i + iSampJ]) *
4806
0
                                  padfWeightsHorizontal[iC];
4807
0
        }
4808
4809
0
        dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
4810
0
    }
4811
4812
0
    *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
4813
4814
0
    return true;
4815
0
}
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKResampleNoMasks_SSE2_T<unsigned char>(GDALWarpKernel const*, int, double, double, unsigned char*, double*, double*, double&)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKResampleNoMasks_SSE2_T<float>(GDALWarpKernel const*, int, double, double, float*, double*, double*, double&)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKResampleNoMasks_SSE2_T<short>(GDALWarpKernel const*, int, double, double, short*, double*, double*, double&)
Unexecuted instantiation: gdalwarpkernel.cpp:bool GWKResampleNoMasks_SSE2_T<unsigned short>(GDALWarpKernel const*, int, double, double, unsigned short*, double*, double*, double&)
4816
4817
/************************************************************************/
4818
/*                     GWKResampleNoMasksT<GByte>()                     */
4819
/************************************************************************/
4820
4821
template <>
4822
bool GWKResampleNoMasksT<GByte>(const GDALWarpKernel *poWK, int iBand,
4823
                                double dfSrcX, double dfSrcY, GByte *pValue,
4824
                                double *padfWeightsHorizontal,
4825
                                double *padfWeightsVertical,
4826
                                double &dfInvWeights)
4827
0
{
4828
0
    return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
4829
0
                                     padfWeightsHorizontal, padfWeightsVertical,
4830
0
                                     dfInvWeights);
4831
0
}
4832
4833
/************************************************************************/
4834
/*                     GWKResampleNoMasksT<GInt16>()                    */
4835
/************************************************************************/
4836
4837
template <>
4838
bool GWKResampleNoMasksT<GInt16>(const GDALWarpKernel *poWK, int iBand,
4839
                                 double dfSrcX, double dfSrcY, GInt16 *pValue,
4840
                                 double *padfWeightsHorizontal,
4841
                                 double *padfWeightsVertical,
4842
                                 double &dfInvWeights)
4843
0
{
4844
0
    return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
4845
0
                                     padfWeightsHorizontal, padfWeightsVertical,
4846
0
                                     dfInvWeights);
4847
0
}
4848
4849
/************************************************************************/
4850
/*                     GWKResampleNoMasksT<GUInt16>()                   */
4851
/************************************************************************/
4852
4853
template <>
4854
bool GWKResampleNoMasksT<GUInt16>(const GDALWarpKernel *poWK, int iBand,
4855
                                  double dfSrcX, double dfSrcY, GUInt16 *pValue,
4856
                                  double *padfWeightsHorizontal,
4857
                                  double *padfWeightsVertical,
4858
                                  double &dfInvWeights)
4859
0
{
4860
0
    return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
4861
0
                                     padfWeightsHorizontal, padfWeightsVertical,
4862
0
                                     dfInvWeights);
4863
0
}
4864
4865
/************************************************************************/
4866
/*                     GWKResampleNoMasksT<float>()                     */
4867
/************************************************************************/
4868
4869
template <>
4870
bool GWKResampleNoMasksT<float>(const GDALWarpKernel *poWK, int iBand,
4871
                                double dfSrcX, double dfSrcY, float *pValue,
4872
                                double *padfWeightsHorizontal,
4873
                                double *padfWeightsVertical,
4874
                                double &dfInvWeights)
4875
0
{
4876
0
    return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
4877
0
                                     padfWeightsHorizontal, padfWeightsVertical,
4878
0
                                     dfInvWeights);
4879
0
}
4880
4881
#ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
4882
4883
/************************************************************************/
4884
/*                     GWKResampleNoMasksT<double>()                    */
4885
/************************************************************************/
4886
4887
template <>
4888
bool GWKResampleNoMasksT<double>(const GDALWarpKernel *poWK, int iBand,
4889
                                 double dfSrcX, double dfSrcY, double *pValue,
4890
                                 double *padfWeightsHorizontal,
4891
                                 double *padfWeightsVertical,
4892
                                 double &dfInvWeights)
4893
{
4894
    return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
4895
                                     padfWeightsHorizontal, padfWeightsVertical,
4896
                                     dfInvWeights);
4897
}
4898
4899
#endif /* INSTANTIATE_FLOAT64_SSE2_IMPL */
4900
4901
#endif /* defined(USE_SSE2) */
4902
4903
/************************************************************************/
4904
/*                     GWKRoundSourceCoordinates()                      */
4905
/************************************************************************/
4906
4907
static void GWKRoundSourceCoordinates(
4908
    int nDstXSize, double *padfX, double *padfY, double *padfZ, int *pabSuccess,
4909
    double dfSrcCoordPrecision, double dfErrorThreshold,
4910
    GDALTransformerFunc pfnTransformer, void *pTransformerArg, double dfDstXOff,
4911
    double dfDstY)
4912
0
{
4913
0
    double dfPct = 0.8;
4914
0
    if (dfErrorThreshold > 0 && dfSrcCoordPrecision / dfErrorThreshold >= 10.0)
4915
0
    {
4916
0
        dfPct = 1.0 - 2 * 1.0 / (dfSrcCoordPrecision / dfErrorThreshold);
4917
0
    }
4918
0
    const double dfExactTransformThreshold = 0.5 * dfPct * dfSrcCoordPrecision;
4919
4920
0
    for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
4921
0
    {
4922
0
        const double dfXBefore = padfX[iDstX];
4923
0
        const double dfYBefore = padfY[iDstX];
4924
0
        padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
4925
0
                       dfSrcCoordPrecision;
4926
0
        padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
4927
0
                       dfSrcCoordPrecision;
4928
4929
        // If we are in an uncertainty zone, go to non-approximated
4930
        // transformation.
4931
        // Due to the 80% of half-precision threshold, dfSrcCoordPrecision must
4932
        // be at least 10 times greater than the approximation error.
4933
0
        if (fabs(dfXBefore - padfX[iDstX]) > dfExactTransformThreshold ||
4934
0
            fabs(dfYBefore - padfY[iDstX]) > dfExactTransformThreshold)
4935
0
        {
4936
0
            padfX[iDstX] = iDstX + dfDstXOff;
4937
0
            padfY[iDstX] = dfDstY;
4938
0
            padfZ[iDstX] = 0.0;
4939
0
            pfnTransformer(pTransformerArg, TRUE, 1, padfX + iDstX,
4940
0
                           padfY + iDstX, padfZ + iDstX, pabSuccess + iDstX);
4941
0
            padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
4942
0
                           dfSrcCoordPrecision;
4943
0
            padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
4944
0
                           dfSrcCoordPrecision;
4945
0
        }
4946
0
    }
4947
0
}
4948
4949
/************************************************************************/
4950
/*                     GWKCheckAndComputeSrcOffsets()                   */
4951
/************************************************************************/
4952
static CPL_INLINE bool
4953
GWKCheckAndComputeSrcOffsets(GWKJobStruct *psJob, int *_pabSuccess, int _iDstX,
4954
                             int _iDstY, double *_padfX, double *_padfY,
4955
                             int _nSrcXSize, int _nSrcYSize,
4956
                             GPtrDiff_t &iSrcOffset)
4957
0
{
4958
0
    const GDALWarpKernel *_poWK = psJob->poWK;
4959
0
    for (int iTry = 0; iTry < 2; ++iTry)
4960
0
    {
4961
0
        if (iTry == 1)
4962
0
        {
4963
            // If the source coordinate is slightly outside of the source raster
4964
            // retry to transform it alone, so that the exact coordinate
4965
            // transformer is used.
4966
4967
0
            _padfX[_iDstX] = _iDstX + 0.5 + _poWK->nDstXOff;
4968
0
            _padfY[_iDstX] = _iDstY + 0.5 + _poWK->nDstYOff;
4969
0
            double dfZ = 0;
4970
0
            _poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1,
4971
0
                                  _padfX + _iDstX, _padfY + _iDstX, &dfZ,
4972
0
                                  _pabSuccess + _iDstX);
4973
0
        }
4974
0
        if (!_pabSuccess[_iDstX])
4975
0
            return false;
4976
4977
        // If this happens this is likely the symptom of a bug somewhere.
4978
0
        if (std::isnan(_padfX[_iDstX]) || std::isnan(_padfY[_iDstX]))
4979
0
        {
4980
0
            static bool bNanCoordFound = false;
4981
0
            if (!bNanCoordFound)
4982
0
            {
4983
0
                CPLDebug("WARP",
4984
0
                         "GWKCheckAndComputeSrcOffsets(): "
4985
0
                         "NaN coordinate found on point %d.",
4986
0
                         _iDstX);
4987
0
                bNanCoordFound = true;
4988
0
            }
4989
0
            return false;
4990
0
        }
4991
4992
        /* --------------------------------------------------------------------
4993
         */
4994
        /*      Figure out what pixel we want in our source raster, and skip */
4995
        /*      further processing if it is well off the source image. */
4996
        /* --------------------------------------------------------------------
4997
         */
4998
        /* We test against the value before casting to avoid the */
4999
        /* problem of asymmetric truncation effects around zero.  That is */
5000
        /* -0.5 will be 0 when cast to an int. */
5001
0
        if (_padfX[_iDstX] < _poWK->nSrcXOff)
5002
0
        {
5003
            // If the source coordinate is slightly outside of the source raster
5004
            // retry to transform it alone, so that the exact coordinate
5005
            // transformer is used.
5006
0
            if (iTry == 0 && _padfX[_iDstX] > _poWK->nSrcXOff - 1)
5007
0
                continue;
5008
0
            return false;
5009
0
        }
5010
5011
0
        if (_padfY[_iDstX] < _poWK->nSrcYOff)
5012
0
        {
5013
            // If the source coordinate is slightly outside of the source raster
5014
            // retry to transform it alone, so that the exact coordinate
5015
            // transformer is used.
5016
0
            if (iTry == 0 && _padfY[_iDstX] > _poWK->nSrcYOff - 1)
5017
0
                continue;
5018
0
            return false;
5019
0
        }
5020
5021
        // Check for potential overflow when casting from float to int, (if
5022
        // operating outside natural projection area, padfX/Y can be a very huge
5023
        // positive number before doing the actual conversion), as such cast is
5024
        // undefined behavior that can trigger exception with some compilers
5025
        // (see #6753)
5026
0
        if (_padfX[_iDstX] + 1e-10 > _nSrcXSize + _poWK->nSrcXOff)
5027
0
        {
5028
            // If the source coordinate is slightly outside of the source raster
5029
            // retry to transform it alone, so that the exact coordinate
5030
            // transformer is used.
5031
0
            if (iTry == 0 && _padfX[_iDstX] < _nSrcXSize + _poWK->nSrcXOff + 1)
5032
0
                continue;
5033
0
            return false;
5034
0
        }
5035
0
        if (_padfY[_iDstX] + 1e-10 > _nSrcYSize + _poWK->nSrcYOff)
5036
0
        {
5037
            // If the source coordinate is slightly outside of the source raster
5038
            // retry to transform it alone, so that the exact coordinate
5039
            // transformer is used.
5040
0
            if (iTry == 0 && _padfY[_iDstX] < _nSrcYSize + _poWK->nSrcYOff + 1)
5041
0
                continue;
5042
0
            return false;
5043
0
        }
5044
5045
0
        break;
5046
0
    }
5047
5048
0
    int iSrcX = static_cast<int>(_padfX[_iDstX] + 1.0e-10) - _poWK->nSrcXOff;
5049
0
    int iSrcY = static_cast<int>(_padfY[_iDstX] + 1.0e-10) - _poWK->nSrcYOff;
5050
0
    if (iSrcX == _nSrcXSize)
5051
0
        iSrcX--;
5052
0
    if (iSrcY == _nSrcYSize)
5053
0
        iSrcY--;
5054
5055
    // Those checks should normally be OK given the previous ones.
5056
0
    CPLAssert(iSrcX >= 0);
5057
0
    CPLAssert(iSrcY >= 0);
5058
0
    CPLAssert(iSrcX < _nSrcXSize);
5059
0
    CPLAssert(iSrcY < _nSrcYSize);
5060
5061
0
    iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * _nSrcXSize;
5062
5063
0
    return true;
5064
0
}
5065
5066
/************************************************************************/
5067
/*                   GWKOneSourceCornerFailsToReproject()               */
5068
/************************************************************************/
5069
5070
static bool GWKOneSourceCornerFailsToReproject(GWKJobStruct *psJob)
5071
0
{
5072
0
    GDALWarpKernel *poWK = psJob->poWK;
5073
0
    for (int iY = 0; iY <= 1; ++iY)
5074
0
    {
5075
0
        for (int iX = 0; iX <= 1; ++iX)
5076
0
        {
5077
0
            double dfXTmp = poWK->nSrcXOff + iX * poWK->nSrcXSize;
5078
0
            double dfYTmp = poWK->nSrcYOff + iY * poWK->nSrcYSize;
5079
0
            double dfZTmp = 0;
5080
0
            int nSuccess = FALSE;
5081
0
            poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp,
5082
0
                                 &dfYTmp, &dfZTmp, &nSuccess);
5083
0
            if (!nSuccess)
5084
0
                return true;
5085
0
        }
5086
0
    }
5087
0
    return false;
5088
0
}
5089
5090
/************************************************************************/
5091
/*                       GWKAdjustSrcOffsetOnEdge()                     */
5092
/************************************************************************/
5093
5094
static bool GWKAdjustSrcOffsetOnEdge(GWKJobStruct *psJob,
5095
                                     GPtrDiff_t &iSrcOffset)
5096
0
{
5097
0
    GDALWarpKernel *poWK = psJob->poWK;
5098
0
    const int nSrcXSize = poWK->nSrcXSize;
5099
0
    const int nSrcYSize = poWK->nSrcYSize;
5100
5101
    // Check if the computed source position slightly altered
5102
    // fails to reproject. If so, then we are at the edge of
5103
    // the validity area, and it is worth checking neighbour
5104
    // source pixels for validity.
5105
0
    int nSuccess = FALSE;
5106
0
    {
5107
0
        double dfXTmp =
5108
0
            poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5109
0
        double dfYTmp =
5110
0
            poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5111
0
        double dfZTmp = 0;
5112
0
        poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5113
0
                             &dfZTmp, &nSuccess);
5114
0
    }
5115
0
    if (nSuccess)
5116
0
    {
5117
0
        double dfXTmp =
5118
0
            poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5119
0
        double dfYTmp =
5120
0
            poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
5121
0
        double dfZTmp = 0;
5122
0
        nSuccess = FALSE;
5123
0
        poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5124
0
                             &dfZTmp, &nSuccess);
5125
0
    }
5126
0
    if (nSuccess)
5127
0
    {
5128
0
        double dfXTmp =
5129
0
            poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
5130
0
        double dfYTmp =
5131
0
            poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5132
0
        double dfZTmp = 0;
5133
0
        nSuccess = FALSE;
5134
0
        poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5135
0
                             &dfZTmp, &nSuccess);
5136
0
    }
5137
5138
0
    if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
5139
0
        CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + 1))
5140
0
    {
5141
0
        iSrcOffset++;
5142
0
        return true;
5143
0
    }
5144
0
    else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
5145
0
             CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + nSrcXSize))
5146
0
    {
5147
0
        iSrcOffset += nSrcXSize;
5148
0
        return true;
5149
0
    }
5150
0
    else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
5151
0
             CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - 1))
5152
0
    {
5153
0
        iSrcOffset--;
5154
0
        return true;
5155
0
    }
5156
0
    else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
5157
0
             CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - nSrcXSize))
5158
0
    {
5159
0
        iSrcOffset -= nSrcXSize;
5160
0
        return true;
5161
0
    }
5162
5163
0
    return false;
5164
0
}
5165
5166
/************************************************************************/
5167
/*                 GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity()          */
5168
/************************************************************************/
5169
5170
static bool GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(GWKJobStruct *psJob,
5171
                                                      GPtrDiff_t &iSrcOffset)
5172
0
{
5173
0
    GDALWarpKernel *poWK = psJob->poWK;
5174
0
    const int nSrcXSize = poWK->nSrcXSize;
5175
0
    const int nSrcYSize = poWK->nSrcYSize;
5176
5177
    // Check if the computed source position slightly altered
5178
    // fails to reproject. If so, then we are at the edge of
5179
    // the validity area, and it is worth checking neighbour
5180
    // source pixels for validity.
5181
0
    int nSuccess = FALSE;
5182
0
    {
5183
0
        double dfXTmp =
5184
0
            poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5185
0
        double dfYTmp =
5186
0
            poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5187
0
        double dfZTmp = 0;
5188
0
        poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5189
0
                             &dfZTmp, &nSuccess);
5190
0
    }
5191
0
    if (nSuccess)
5192
0
    {
5193
0
        double dfXTmp =
5194
0
            poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5195
0
        double dfYTmp =
5196
0
            poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
5197
0
        double dfZTmp = 0;
5198
0
        nSuccess = FALSE;
5199
0
        poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5200
0
                             &dfZTmp, &nSuccess);
5201
0
    }
5202
0
    if (nSuccess)
5203
0
    {
5204
0
        double dfXTmp =
5205
0
            poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
5206
0
        double dfYTmp =
5207
0
            poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5208
0
        double dfZTmp = 0;
5209
0
        nSuccess = FALSE;
5210
0
        poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5211
0
                             &dfZTmp, &nSuccess);
5212
0
    }
5213
5214
0
    if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
5215
0
        poWK->pafUnifiedSrcDensity[iSrcOffset + 1] >=
5216
0
            SRC_DENSITY_THRESHOLD_FLOAT)
5217
0
    {
5218
0
        iSrcOffset++;
5219
0
        return true;
5220
0
    }
5221
0
    else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
5222
0
             poWK->pafUnifiedSrcDensity[iSrcOffset + nSrcXSize] >=
5223
0
                 SRC_DENSITY_THRESHOLD_FLOAT)
5224
0
    {
5225
0
        iSrcOffset += nSrcXSize;
5226
0
        return true;
5227
0
    }
5228
0
    else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
5229
0
             poWK->pafUnifiedSrcDensity[iSrcOffset - 1] >=
5230
0
                 SRC_DENSITY_THRESHOLD_FLOAT)
5231
0
    {
5232
0
        iSrcOffset--;
5233
0
        return true;
5234
0
    }
5235
0
    else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
5236
0
             poWK->pafUnifiedSrcDensity[iSrcOffset - nSrcXSize] >=
5237
0
                 SRC_DENSITY_THRESHOLD_FLOAT)
5238
0
    {
5239
0
        iSrcOffset -= nSrcXSize;
5240
0
        return true;
5241
0
    }
5242
5243
0
    return false;
5244
0
}
5245
5246
/************************************************************************/
5247
/*                           GWKGeneralCase()                           */
5248
/*                                                                      */
5249
/*      This is the most general case.  It attempts to handle all       */
5250
/*      possible features with relatively little concern for            */
5251
/*      efficiency.                                                     */
5252
/************************************************************************/
5253
5254
static void GWKGeneralCaseThread(void *pData)
5255
0
{
5256
0
    GWKJobStruct *psJob = reinterpret_cast<GWKJobStruct *>(pData);
5257
0
    GDALWarpKernel *poWK = psJob->poWK;
5258
0
    const int iYMin = psJob->iYMin;
5259
0
    const int iYMax = psJob->iYMax;
5260
0
    const double dfMultFactorVerticalShiftPipeline =
5261
0
        poWK->bApplyVerticalShift
5262
0
            ? CPLAtof(CSLFetchNameValueDef(
5263
0
                  poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
5264
0
                  "1.0"))
5265
0
            : 0.0;
5266
5267
0
    int nDstXSize = poWK->nDstXSize;
5268
0
    int nSrcXSize = poWK->nSrcXSize;
5269
0
    int nSrcYSize = poWK->nSrcYSize;
5270
5271
    /* -------------------------------------------------------------------- */
5272
    /*      Allocate x,y,z coordinate arrays for transformation ... one     */
5273
    /*      scanlines worth of positions.                                   */
5274
    /* -------------------------------------------------------------------- */
5275
    // For x, 2 *, because we cache the precomputed values at the end.
5276
0
    double *padfX =
5277
0
        static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
5278
0
    double *padfY =
5279
0
        static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5280
0
    double *padfZ =
5281
0
        static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5282
0
    int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
5283
5284
0
    const bool bUse4SamplesFormula =
5285
0
        poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
5286
5287
0
    GWKResampleWrkStruct *psWrkStruct = nullptr;
5288
0
    if (poWK->eResample != GRA_NearestNeighbour)
5289
0
    {
5290
0
        psWrkStruct = GWKResampleCreateWrkStruct(poWK);
5291
0
    }
5292
0
    const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
5293
0
        poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
5294
0
    const double dfErrorThreshold = CPLAtof(
5295
0
        CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
5296
5297
0
    const bool bOneSourceCornerFailsToReproject =
5298
0
        GWKOneSourceCornerFailsToReproject(psJob);
5299
5300
    // Precompute values.
5301
0
    for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5302
0
        padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
5303
5304
    /* ==================================================================== */
5305
    /*      Loop over output lines.                                         */
5306
    /* ==================================================================== */
5307
0
    for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
5308
0
    {
5309
        /* --------------------------------------------------------------------
5310
         */
5311
        /*      Setup points to transform to source image space. */
5312
        /* --------------------------------------------------------------------
5313
         */
5314
0
        memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
5315
0
        const double dfY = iDstY + 0.5 + poWK->nDstYOff;
5316
0
        for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5317
0
            padfY[iDstX] = dfY;
5318
0
        memset(padfZ, 0, sizeof(double) * nDstXSize);
5319
5320
        /* --------------------------------------------------------------------
5321
         */
5322
        /*      Transform the points from destination pixel/line coordinates */
5323
        /*      to source pixel/line coordinates. */
5324
        /* --------------------------------------------------------------------
5325
         */
5326
0
        poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
5327
0
                             padfY, padfZ, pabSuccess);
5328
0
        if (dfSrcCoordPrecision > 0.0)
5329
0
        {
5330
0
            GWKRoundSourceCoordinates(
5331
0
                nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
5332
0
                dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
5333
0
                0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
5334
0
        }
5335
5336
        /* ====================================================================
5337
         */
5338
        /*      Loop over pixels in output scanline. */
5339
        /* ====================================================================
5340
         */
5341
0
        for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5342
0
        {
5343
0
            GPtrDiff_t iSrcOffset = 0;
5344
0
            if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
5345
0
                                              padfX, padfY, nSrcXSize,
5346
0
                                              nSrcYSize, iSrcOffset))
5347
0
                continue;
5348
5349
            /* --------------------------------------------------------------------
5350
             */
5351
            /*      Do not try to apply transparent/invalid source pixels to the
5352
             */
5353
            /*      destination.  This currently ignores the multi-pixel input
5354
             */
5355
            /*      of bilinear and cubic resamples. */
5356
            /* --------------------------------------------------------------------
5357
             */
5358
0
            double dfDensity = 1.0;
5359
5360
0
            if (poWK->pafUnifiedSrcDensity != nullptr)
5361
0
            {
5362
0
                dfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
5363
0
                if (dfDensity < SRC_DENSITY_THRESHOLD_DOUBLE)
5364
0
                {
5365
0
                    if (!bOneSourceCornerFailsToReproject)
5366
0
                    {
5367
0
                        continue;
5368
0
                    }
5369
0
                    else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
5370
0
                                 psJob, iSrcOffset))
5371
0
                    {
5372
0
                        dfDensity =
5373
0
                            double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
5374
0
                    }
5375
0
                    else
5376
0
                    {
5377
0
                        continue;
5378
0
                    }
5379
0
                }
5380
0
            }
5381
5382
0
            if (poWK->panUnifiedSrcValid != nullptr &&
5383
0
                !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
5384
0
            {
5385
0
                if (!bOneSourceCornerFailsToReproject)
5386
0
                {
5387
0
                    continue;
5388
0
                }
5389
0
                else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
5390
0
                {
5391
0
                    continue;
5392
0
                }
5393
0
            }
5394
5395
            /* ====================================================================
5396
             */
5397
            /*      Loop processing each band. */
5398
            /* ====================================================================
5399
             */
5400
0
            bool bHasFoundDensity = false;
5401
5402
0
            const GPtrDiff_t iDstOffset =
5403
0
                iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
5404
0
            for (int iBand = 0; iBand < poWK->nBands; iBand++)
5405
0
            {
5406
0
                double dfBandDensity = 0.0;
5407
0
                double dfValueReal = 0.0;
5408
0
                double dfValueImag = 0.0;
5409
5410
                /* --------------------------------------------------------------------
5411
                 */
5412
                /*      Collect the source value. */
5413
                /* --------------------------------------------------------------------
5414
                 */
5415
0
                if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
5416
0
                    nSrcYSize == 1)
5417
0
                {
5418
                    // FALSE is returned if dfBandDensity == 0, which is
5419
                    // checked below.
5420
0
                    CPL_IGNORE_RET_VAL(GWKGetPixelValue(
5421
0
                        poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal,
5422
0
                        &dfValueImag));
5423
0
                }
5424
0
                else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
5425
0
                {
5426
0
                    GWKBilinearResample4Sample(
5427
0
                        poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5428
0
                        padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5429
0
                        &dfValueReal, &dfValueImag);
5430
0
                }
5431
0
                else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
5432
0
                {
5433
0
                    GWKCubicResample4Sample(
5434
0
                        poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5435
0
                        padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5436
0
                        &dfValueReal, &dfValueImag);
5437
0
                }
5438
0
                else
5439
0
#ifdef DEBUG
5440
                    // Only useful for clang static analyzer.
5441
0
                    if (psWrkStruct != nullptr)
5442
0
#endif
5443
0
                    {
5444
0
                        psWrkStruct->pfnGWKResample(
5445
0
                            poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5446
0
                            padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5447
0
                            &dfValueReal, &dfValueImag, psWrkStruct);
5448
0
                    }
5449
5450
                // If we didn't find any valid inputs skip to next band.
5451
0
                if (dfBandDensity < BAND_DENSITY_THRESHOLD)
5452
0
                    continue;
5453
5454
0
                if (poWK->bApplyVerticalShift)
5455
0
                {
5456
0
                    if (!std::isfinite(padfZ[iDstX]))
5457
0
                        continue;
5458
                    // Subtract padfZ[] since the coordinate transformation is
5459
                    // from target to source
5460
0
                    dfValueReal =
5461
0
                        dfValueReal * poWK->dfMultFactorVerticalShift -
5462
0
                        padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
5463
0
                }
5464
5465
0
                bHasFoundDensity = true;
5466
5467
                /* --------------------------------------------------------------------
5468
                 */
5469
                /*      We have a computed value from the source.  Now apply it
5470
                 * to      */
5471
                /*      the destination pixel. */
5472
                /* --------------------------------------------------------------------
5473
                 */
5474
0
                GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
5475
0
                                 dfValueReal, dfValueImag);
5476
0
            }
5477
5478
0
            if (!bHasFoundDensity)
5479
0
                continue;
5480
5481
            /* --------------------------------------------------------------------
5482
             */
5483
            /*      Update destination density/validity masks. */
5484
            /* --------------------------------------------------------------------
5485
             */
5486
0
            GWKOverlayDensity(poWK, iDstOffset, dfDensity);
5487
5488
0
            if (poWK->panDstValid != nullptr)
5489
0
            {
5490
0
                CPLMaskSet(poWK->panDstValid, iDstOffset);
5491
0
            }
5492
0
        } /* Next iDstX */
5493
5494
        /* --------------------------------------------------------------------
5495
         */
5496
        /*      Report progress to the user, and optionally cancel out. */
5497
        /* --------------------------------------------------------------------
5498
         */
5499
0
        if (psJob->pfnProgress && psJob->pfnProgress(psJob))
5500
0
            break;
5501
0
    }
5502
5503
    /* -------------------------------------------------------------------- */
5504
    /*      Cleanup and return.                                             */
5505
    /* -------------------------------------------------------------------- */
5506
0
    CPLFree(padfX);
5507
0
    CPLFree(padfY);
5508
0
    CPLFree(padfZ);
5509
0
    CPLFree(pabSuccess);
5510
0
    if (psWrkStruct)
5511
0
        GWKResampleDeleteWrkStruct(psWrkStruct);
5512
0
}
5513
5514
static CPLErr GWKGeneralCase(GDALWarpKernel *poWK)
5515
0
{
5516
0
    return GWKRun(poWK, "GWKGeneralCase", GWKGeneralCaseThread);
5517
0
}
5518
5519
/************************************************************************/
5520
/*                            GWKRealCase()                             */
5521
/*                                                                      */
5522
/*      General case for non-complex data types.                        */
5523
/************************************************************************/
5524
5525
static void GWKRealCaseThread(void *pData)
5526
5527
0
{
5528
0
    GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
5529
0
    GDALWarpKernel *poWK = psJob->poWK;
5530
0
    const int iYMin = psJob->iYMin;
5531
0
    const int iYMax = psJob->iYMax;
5532
5533
0
    const int nDstXSize = poWK->nDstXSize;
5534
0
    const int nSrcXSize = poWK->nSrcXSize;
5535
0
    const int nSrcYSize = poWK->nSrcYSize;
5536
0
    const double dfMultFactorVerticalShiftPipeline =
5537
0
        poWK->bApplyVerticalShift
5538
0
            ? CPLAtof(CSLFetchNameValueDef(
5539
0
                  poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
5540
0
                  "1.0"))
5541
0
            : 0.0;
5542
5543
    /* -------------------------------------------------------------------- */
5544
    /*      Allocate x,y,z coordinate arrays for transformation ... one     */
5545
    /*      scanlines worth of positions.                                   */
5546
    /* -------------------------------------------------------------------- */
5547
5548
    // For x, 2 *, because we cache the precomputed values at the end.
5549
0
    double *padfX =
5550
0
        static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
5551
0
    double *padfY =
5552
0
        static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5553
0
    double *padfZ =
5554
0
        static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5555
0
    int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
5556
5557
0
    const bool bUse4SamplesFormula =
5558
0
        poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
5559
5560
0
    GWKResampleWrkStruct *psWrkStruct = nullptr;
5561
0
    if (poWK->eResample != GRA_NearestNeighbour)
5562
0
    {
5563
0
        psWrkStruct = GWKResampleCreateWrkStruct(poWK);
5564
0
    }
5565
0
    const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
5566
0
        poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
5567
0
    const double dfErrorThreshold = CPLAtof(
5568
0
        CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
5569
5570
0
    const bool bSrcMaskIsDensity = poWK->panUnifiedSrcValid == nullptr &&
5571
0
                                   poWK->papanBandSrcValid == nullptr &&
5572
0
                                   poWK->pafUnifiedSrcDensity != nullptr;
5573
5574
0
    const bool bOneSourceCornerFailsToReproject =
5575
0
        GWKOneSourceCornerFailsToReproject(psJob);
5576
5577
    // Precompute values.
5578
0
    for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5579
0
        padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
5580
5581
    /* ==================================================================== */
5582
    /*      Loop over output lines.                                         */
5583
    /* ==================================================================== */
5584
0
    for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
5585
0
    {
5586
        /* --------------------------------------------------------------------
5587
         */
5588
        /*      Setup points to transform to source image space. */
5589
        /* --------------------------------------------------------------------
5590
         */
5591
0
        memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
5592
0
        const double dfY = iDstY + 0.5 + poWK->nDstYOff;
5593
0
        for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5594
0
            padfY[iDstX] = dfY;
5595
0
        memset(padfZ, 0, sizeof(double) * nDstXSize);
5596
5597
        /* --------------------------------------------------------------------
5598
         */
5599
        /*      Transform the points from destination pixel/line coordinates */
5600
        /*      to source pixel/line coordinates. */
5601
        /* --------------------------------------------------------------------
5602
         */
5603
0
        poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
5604
0
                             padfY, padfZ, pabSuccess);
5605
0
        if (dfSrcCoordPrecision > 0.0)
5606
0
        {
5607
0
            GWKRoundSourceCoordinates(
5608
0
                nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
5609
0
                dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
5610
0
                0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
5611
0
        }
5612
5613
        /* ====================================================================
5614
         */
5615
        /*      Loop over pixels in output scanline. */
5616
        /* ====================================================================
5617
         */
5618
0
        for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5619
0
        {
5620
0
            GPtrDiff_t iSrcOffset = 0;
5621
0
            if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
5622
0
                                              padfX, padfY, nSrcXSize,
5623
0
                                              nSrcYSize, iSrcOffset))
5624
0
                continue;
5625
5626
            /* --------------------------------------------------------------------
5627
             */
5628
            /*      Do not try to apply transparent/invalid source pixels to the
5629
             */
5630
            /*      destination.  This currently ignores the multi-pixel input
5631
             */
5632
            /*      of bilinear and cubic resamples. */
5633
            /* --------------------------------------------------------------------
5634
             */
5635
0
            double dfDensity = 1.0;
5636
5637
0
            if (poWK->pafUnifiedSrcDensity != nullptr)
5638
0
            {
5639
0
                dfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
5640
0
                if (dfDensity < SRC_DENSITY_THRESHOLD_DOUBLE)
5641
0
                {
5642
0
                    if (!bOneSourceCornerFailsToReproject)
5643
0
                    {
5644
0
                        continue;
5645
0
                    }
5646
0
                    else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
5647
0
                                 psJob, iSrcOffset))
5648
0
                    {
5649
0
                        dfDensity =
5650
0
                            double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
5651
0
                    }
5652
0
                    else
5653
0
                    {
5654
0
                        continue;
5655
0
                    }
5656
0
                }
5657
0
            }
5658
5659
0
            if (poWK->panUnifiedSrcValid != nullptr &&
5660
0
                !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
5661
0
            {
5662
0
                if (!bOneSourceCornerFailsToReproject)
5663
0
                {
5664
0
                    continue;
5665
0
                }
5666
0
                else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
5667
0
                {
5668
0
                    continue;
5669
0
                }
5670
0
            }
5671
5672
            /* ====================================================================
5673
             */
5674
            /*      Loop processing each band. */
5675
            /* ====================================================================
5676
             */
5677
0
            bool bHasFoundDensity = false;
5678
5679
0
            const GPtrDiff_t iDstOffset =
5680
0
                iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
5681
0
            for (int iBand = 0; iBand < poWK->nBands; iBand++)
5682
0
            {
5683
0
                double dfBandDensity = 0.0;
5684
0
                double dfValueReal = 0.0;
5685
5686
                /* --------------------------------------------------------------------
5687
                 */
5688
                /*      Collect the source value. */
5689
                /* --------------------------------------------------------------------
5690
                 */
5691
0
                if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
5692
0
                    nSrcYSize == 1)
5693
0
                {
5694
                    // FALSE is returned if dfBandDensity == 0, which is
5695
                    // checked below.
5696
0
                    CPL_IGNORE_RET_VAL(GWKGetPixelValueReal(
5697
0
                        poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal));
5698
0
                }
5699
0
                else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
5700
0
                {
5701
0
                    double dfValueImagIgnored = 0.0;
5702
0
                    GWKBilinearResample4Sample(
5703
0
                        poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5704
0
                        padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5705
0
                        &dfValueReal, &dfValueImagIgnored);
5706
0
                }
5707
0
                else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
5708
0
                {
5709
0
                    if (bSrcMaskIsDensity)
5710
0
                    {
5711
0
                        if (poWK->eWorkingDataType == GDT_Byte)
5712
0
                        {
5713
0
                            GWKCubicResampleSrcMaskIsDensity4SampleRealT<GByte>(
5714
0
                                poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5715
0
                                padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5716
0
                                &dfValueReal);
5717
0
                        }
5718
0
                        else if (poWK->eWorkingDataType == GDT_UInt16)
5719
0
                        {
5720
0
                            GWKCubicResampleSrcMaskIsDensity4SampleRealT<
5721
0
                                GUInt16>(poWK, iBand,
5722
0
                                         padfX[iDstX] - poWK->nSrcXOff,
5723
0
                                         padfY[iDstX] - poWK->nSrcYOff,
5724
0
                                         &dfBandDensity, &dfValueReal);
5725
0
                        }
5726
0
                        else
5727
0
                        {
5728
0
                            GWKCubicResampleSrcMaskIsDensity4SampleReal(
5729
0
                                poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5730
0
                                padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5731
0
                                &dfValueReal);
5732
0
                        }
5733
0
                    }
5734
0
                    else
5735
0
                    {
5736
0
                        double dfValueImagIgnored = 0.0;
5737
0
                        GWKCubicResample4Sample(
5738
0
                            poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5739
0
                            padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5740
0
                            &dfValueReal, &dfValueImagIgnored);
5741
0
                    }
5742
0
                }
5743
0
                else
5744
0
#ifdef DEBUG
5745
                    // Only useful for clang static analyzer.
5746
0
                    if (psWrkStruct != nullptr)
5747
0
#endif
5748
0
                    {
5749
0
                        double dfValueImagIgnored = 0.0;
5750
0
                        psWrkStruct->pfnGWKResample(
5751
0
                            poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5752
0
                            padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5753
0
                            &dfValueReal, &dfValueImagIgnored, psWrkStruct);
5754
0
                    }
5755
5756
                // If we didn't find any valid inputs skip to next band.
5757
0
                if (dfBandDensity < BAND_DENSITY_THRESHOLD)
5758
0
                    continue;
5759
5760
0
                if (poWK->bApplyVerticalShift)
5761
0
                {
5762
0
                    if (!std::isfinite(padfZ[iDstX]))
5763
0
                        continue;
5764
                    // Subtract padfZ[] since the coordinate transformation is
5765
                    // from target to source
5766
0
                    dfValueReal =
5767
0
                        dfValueReal * poWK->dfMultFactorVerticalShift -
5768
0
                        padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
5769
0
                }
5770
5771
0
                bHasFoundDensity = true;
5772
5773
                /* --------------------------------------------------------------------
5774
                 */
5775
                /*      We have a computed value from the source.  Now apply it
5776
                 * to      */
5777
                /*      the destination pixel. */
5778
                /* --------------------------------------------------------------------
5779
                 */
5780
0
                GWKSetPixelValueReal(poWK, iBand, iDstOffset, dfBandDensity,
5781
0
                                     dfValueReal);
5782
0
            }
5783
5784
0
            if (!bHasFoundDensity)
5785
0
                continue;
5786
5787
            /* --------------------------------------------------------------------
5788
             */
5789
            /*      Update destination density/validity masks. */
5790
            /* --------------------------------------------------------------------
5791
             */
5792
0
            GWKOverlayDensity(poWK, iDstOffset, dfDensity);
5793
5794
0
            if (poWK->panDstValid != nullptr)
5795
0
            {
5796
0
                CPLMaskSet(poWK->panDstValid, iDstOffset);
5797
0
            }
5798
0
        }  // Next iDstX.
5799
5800
        /* --------------------------------------------------------------------
5801
         */
5802
        /*      Report progress to the user, and optionally cancel out. */
5803
        /* --------------------------------------------------------------------
5804
         */
5805
0
        if (psJob->pfnProgress && psJob->pfnProgress(psJob))
5806
0
            break;
5807
0
    }
5808
5809
    /* -------------------------------------------------------------------- */
5810
    /*      Cleanup and return.                                             */
5811
    /* -------------------------------------------------------------------- */
5812
0
    CPLFree(padfX);
5813
0
    CPLFree(padfY);
5814
0
    CPLFree(padfZ);
5815
0
    CPLFree(pabSuccess);
5816
0
    if (psWrkStruct)
5817
0
        GWKResampleDeleteWrkStruct(psWrkStruct);
5818
0
}
5819
5820
static CPLErr GWKRealCase(GDALWarpKernel *poWK)
5821
0
{
5822
0
    return GWKRun(poWK, "GWKRealCase", GWKRealCaseThread);
5823
0
}
5824
5825
/************************************************************************/
5826
/*                 GWKCubicResampleNoMasks4MultiBandT()                 */
5827
/************************************************************************/
5828
5829
/* We restrict to 64bit processors because they are guaranteed to have SSE2 */
5830
/* and enough SSE registries */
5831
#if defined(USE_SSE2)
5832
5833
static inline float Convolute4x4(const __m128 row0, const __m128 row1,
5834
                                 const __m128 row2, const __m128 row3,
5835
                                 const __m128 weightsXY0,
5836
                                 const __m128 weightsXY1,
5837
                                 const __m128 weightsXY2,
5838
                                 const __m128 weightsXY3)
5839
0
{
5840
0
    return XMMHorizontalAdd(_mm_add_ps(
5841
0
        _mm_add_ps(_mm_mul_ps(row0, weightsXY0), _mm_mul_ps(row1, weightsXY1)),
5842
0
        _mm_add_ps(_mm_mul_ps(row2, weightsXY2),
5843
0
                   _mm_mul_ps(row3, weightsXY3))));
5844
0
}
5845
5846
template <class T>
5847
static void GWKCubicResampleNoMasks4MultiBandT(const GDALWarpKernel *poWK,
5848
                                               double dfSrcX, double dfSrcY,
5849
                                               const GPtrDiff_t iDstOffset)
5850
0
{
5851
0
    const double dfSrcXShifted = dfSrcX - 0.5;
5852
0
    const int iSrcX = static_cast<int>(dfSrcXShifted);
5853
0
    const double dfSrcYShifted = dfSrcY - 0.5;
5854
0
    const int iSrcY = static_cast<int>(dfSrcYShifted);
5855
0
    const GPtrDiff_t iSrcOffset =
5856
0
        iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
5857
5858
    // Get the bilinear interpolation at the image borders.
5859
0
    if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
5860
0
        iSrcY + 2 >= poWK->nSrcYSize)
5861
0
    {
5862
0
        for (int iBand = 0; iBand < poWK->nBands; iBand++)
5863
0
        {
5864
0
            T value;
5865
0
            GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
5866
0
                                               &value);
5867
0
            reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
5868
0
                value;
5869
0
        }
5870
0
    }
5871
0
    else
5872
0
    {
5873
0
        const float fDeltaX = static_cast<float>(dfSrcXShifted) - iSrcX;
5874
0
        const float fDeltaY = static_cast<float>(dfSrcYShifted) - iSrcY;
5875
5876
0
        float afCoeffsX[4];
5877
0
        float afCoeffsY[4];
5878
0
        GWKCubicComputeWeights(fDeltaX, afCoeffsX);
5879
0
        GWKCubicComputeWeights(fDeltaY, afCoeffsY);
5880
0
        const auto weightsX = _mm_loadu_ps(afCoeffsX);
5881
0
        const auto weightsXY0 =
5882
0
            _mm_mul_ps(_mm_load1_ps(&afCoeffsY[0]), weightsX);
5883
0
        const auto weightsXY1 =
5884
0
            _mm_mul_ps(_mm_load1_ps(&afCoeffsY[1]), weightsX);
5885
0
        const auto weightsXY2 =
5886
0
            _mm_mul_ps(_mm_load1_ps(&afCoeffsY[2]), weightsX);
5887
0
        const auto weightsXY3 =
5888
0
            _mm_mul_ps(_mm_load1_ps(&afCoeffsY[3]), weightsX);
5889
5890
0
        const GPtrDiff_t iOffset = iSrcOffset - poWK->nSrcXSize - 1;
5891
5892
0
        int iBand = 0;
5893
        // Process 2 bands at a time
5894
0
        for (; iBand + 1 < poWK->nBands; iBand += 2)
5895
0
        {
5896
0
            const T *CPL_RESTRICT pBand0 =
5897
0
                reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
5898
0
            const auto row0_0 = XMMLoad4Values(pBand0 + iOffset);
5899
0
            const auto row1_0 =
5900
0
                XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
5901
0
            const auto row2_0 =
5902
0
                XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
5903
0
            const auto row3_0 =
5904
0
                XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
5905
5906
0
            const T *CPL_RESTRICT pBand1 =
5907
0
                reinterpret_cast<const T *>(poWK->papabySrcImage[iBand + 1]);
5908
0
            const auto row0_1 = XMMLoad4Values(pBand1 + iOffset);
5909
0
            const auto row1_1 =
5910
0
                XMMLoad4Values(pBand1 + iOffset + poWK->nSrcXSize);
5911
0
            const auto row2_1 =
5912
0
                XMMLoad4Values(pBand1 + iOffset + 2 * poWK->nSrcXSize);
5913
0
            const auto row3_1 =
5914
0
                XMMLoad4Values(pBand1 + iOffset + 3 * poWK->nSrcXSize);
5915
5916
0
            const float fValue_0 =
5917
0
                Convolute4x4(row0_0, row1_0, row2_0, row3_0, weightsXY0,
5918
0
                             weightsXY1, weightsXY2, weightsXY3);
5919
5920
0
            const float fValue_1 =
5921
0
                Convolute4x4(row0_1, row1_1, row2_1, row3_1, weightsXY0,
5922
0
                             weightsXY1, weightsXY2, weightsXY3);
5923
5924
0
            T *CPL_RESTRICT pDstBand0 =
5925
0
                reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
5926
0
            pDstBand0[iDstOffset] = GWKClampValueT<T>(fValue_0);
5927
5928
0
            T *CPL_RESTRICT pDstBand1 =
5929
0
                reinterpret_cast<T *>(poWK->papabyDstImage[iBand + 1]);
5930
0
            pDstBand1[iDstOffset] = GWKClampValueT<T>(fValue_1);
5931
0
        }
5932
0
        if (iBand < poWK->nBands)
5933
0
        {
5934
0
            const T *CPL_RESTRICT pBand0 =
5935
0
                reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
5936
0
            const auto row0 = XMMLoad4Values(pBand0 + iOffset);
5937
0
            const auto row1 =
5938
0
                XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
5939
0
            const auto row2 =
5940
0
                XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
5941
0
            const auto row3 =
5942
0
                XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
5943
5944
0
            const float fValue =
5945
0
                Convolute4x4(row0, row1, row2, row3, weightsXY0, weightsXY1,
5946
0
                             weightsXY2, weightsXY3);
5947
5948
0
            T *CPL_RESTRICT pDstBand =
5949
0
                reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
5950
0
            pDstBand[iDstOffset] = GWKClampValueT<T>(fValue);
5951
0
        }
5952
0
    }
5953
5954
0
    if (poWK->pafDstDensity)
5955
0
        poWK->pafDstDensity[iDstOffset] = 1.0f;
5956
0
}
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKCubicResampleNoMasks4MultiBandT<unsigned char>(GDALWarpKernel const*, double, double, long long)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKCubicResampleNoMasks4MultiBandT<unsigned short>(GDALWarpKernel const*, double, double, long long)
5957
5958
#endif  // defined(USE_SSE2)
5959
5960
/************************************************************************/
5961
/*                GWKResampleNoMasksOrDstDensityOnlyThreadInternal()    */
5962
/************************************************************************/
5963
5964
template <class T, GDALResampleAlg eResample, int bUse4SamplesFormula>
5965
static void GWKResampleNoMasksOrDstDensityOnlyThreadInternal(void *pData)
5966
5967
0
{
5968
0
    GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
5969
0
    GDALWarpKernel *poWK = psJob->poWK;
5970
0
    const int iYMin = psJob->iYMin;
5971
0
    const int iYMax = psJob->iYMax;
5972
0
    const double dfMultFactorVerticalShiftPipeline =
5973
0
        poWK->bApplyVerticalShift
5974
0
            ? CPLAtof(CSLFetchNameValueDef(
5975
0
                  poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
5976
0
                  "1.0"))
5977
0
            : 0.0;
5978
5979
0
    const int nDstXSize = poWK->nDstXSize;
5980
0
    const int nSrcXSize = poWK->nSrcXSize;
5981
0
    const int nSrcYSize = poWK->nSrcYSize;
5982
5983
    /* -------------------------------------------------------------------- */
5984
    /*      Allocate x,y,z coordinate arrays for transformation ... one     */
5985
    /*      scanlines worth of positions.                                   */
5986
    /* -------------------------------------------------------------------- */
5987
5988
    // For x, 2 *, because we cache the precomputed values at the end.
5989
0
    double *padfX =
5990
0
        static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
5991
0
    double *padfY =
5992
0
        static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5993
0
    double *padfZ =
5994
0
        static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5995
0
    int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
5996
5997
0
    const int nXRadius = poWK->nXRadius;
5998
0
    double *padfWeightsX =
5999
0
        static_cast<double *>(CPLCalloc(1 + nXRadius * 2, sizeof(double)));
6000
0
    double *padfWeightsY = static_cast<double *>(
6001
0
        CPLCalloc(1 + poWK->nYRadius * 2, sizeof(double)));
6002
0
    const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
6003
0
        poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
6004
0
    const double dfErrorThreshold = CPLAtof(
6005
0
        CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
6006
6007
    // Precompute values.
6008
0
    for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6009
0
        padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
6010
6011
    /* ==================================================================== */
6012
    /*      Loop over output lines.                                         */
6013
    /* ==================================================================== */
6014
0
    for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
6015
0
    {
6016
        /* --------------------------------------------------------------------
6017
         */
6018
        /*      Setup points to transform to source image space. */
6019
        /* --------------------------------------------------------------------
6020
         */
6021
0
        memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
6022
0
        const double dfY = iDstY + 0.5 + poWK->nDstYOff;
6023
0
        for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6024
0
            padfY[iDstX] = dfY;
6025
0
        memset(padfZ, 0, sizeof(double) * nDstXSize);
6026
6027
        /* --------------------------------------------------------------------
6028
         */
6029
        /*      Transform the points from destination pixel/line coordinates */
6030
        /*      to source pixel/line coordinates. */
6031
        /* --------------------------------------------------------------------
6032
         */
6033
0
        poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
6034
0
                             padfY, padfZ, pabSuccess);
6035
0
        if (dfSrcCoordPrecision > 0.0)
6036
0
        {
6037
0
            GWKRoundSourceCoordinates(
6038
0
                nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
6039
0
                dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
6040
0
                0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
6041
0
        }
6042
6043
        /* ====================================================================
6044
         */
6045
        /*      Loop over pixels in output scanline. */
6046
        /* ====================================================================
6047
         */
6048
0
        for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6049
0
        {
6050
0
            GPtrDiff_t iSrcOffset = 0;
6051
0
            if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
6052
0
                                              padfX, padfY, nSrcXSize,
6053
0
                                              nSrcYSize, iSrcOffset))
6054
0
                continue;
6055
6056
            /* ====================================================================
6057
             */
6058
            /*      Loop processing each band. */
6059
            /* ====================================================================
6060
             */
6061
0
            const GPtrDiff_t iDstOffset =
6062
0
                iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
6063
6064
0
#if defined(USE_SSE2)
6065
            if constexpr (bUse4SamplesFormula && eResample == GRA_Cubic &&
6066
                          (std::is_same<T, GByte>::value ||
6067
                           std::is_same<T, GUInt16>::value))
6068
0
            {
6069
0
                if (poWK->nBands > 1 && !poWK->bApplyVerticalShift)
6070
0
                {
6071
0
                    GWKCubicResampleNoMasks4MultiBandT<T>(
6072
0
                        poWK, padfX[iDstX] - poWK->nSrcXOff,
6073
0
                        padfY[iDstX] - poWK->nSrcYOff, iDstOffset);
6074
6075
0
                    continue;
6076
0
                }
6077
0
            }
6078
0
#endif  // defined(USE_SSE2)
6079
6080
0
            [[maybe_unused]] double dfInvWeights = 0;
6081
0
            for (int iBand = 0; iBand < poWK->nBands; iBand++)
6082
0
            {
6083
0
                T value = 0;
6084
                if constexpr (eResample == GRA_NearestNeighbour)
6085
0
                {
6086
0
                    value = reinterpret_cast<T *>(
6087
0
                        poWK->papabySrcImage[iBand])[iSrcOffset];
6088
                }
6089
                else if constexpr (bUse4SamplesFormula)
6090
0
                {
6091
                    if constexpr (eResample == GRA_Bilinear)
6092
0
                        GWKBilinearResampleNoMasks4SampleT(
6093
0
                            poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6094
                            padfY[iDstX] - poWK->nSrcYOff, &value);
6095
                    else
6096
0
                        GWKCubicResampleNoMasks4SampleT(
6097
0
                            poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6098
0
                            padfY[iDstX] - poWK->nSrcYOff, &value);
6099
                }
6100
                else
6101
0
                {
6102
0
                    GWKResampleNoMasksT(
6103
0
                        poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6104
0
                        padfY[iDstX] - poWK->nSrcYOff, &value, padfWeightsX,
6105
0
                        padfWeightsY, dfInvWeights);
6106
0
                }
6107
6108
0
                if (poWK->bApplyVerticalShift)
6109
0
                {
6110
0
                    if (!std::isfinite(padfZ[iDstX]))
6111
0
                        continue;
6112
                    // Subtract padfZ[] since the coordinate transformation is
6113
                    // from target to source
6114
0
                    value = GWKClampValueT<T>(
6115
0
                        double(value) * poWK->dfMultFactorVerticalShift -
6116
0
                        padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
6117
0
                }
6118
6119
0
                if (poWK->pafDstDensity)
6120
0
                    poWK->pafDstDensity[iDstOffset] = 1.0f;
6121
6122
0
                reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
6123
0
                    value;
6124
0
            }
6125
0
        }
6126
6127
        /* --------------------------------------------------------------------
6128
         */
6129
        /*      Report progress to the user, and optionally cancel out. */
6130
        /* --------------------------------------------------------------------
6131
         */
6132
0
        if (psJob->pfnProgress && psJob->pfnProgress(psJob))
6133
0
            break;
6134
0
    }
6135
6136
    /* -------------------------------------------------------------------- */
6137
    /*      Cleanup and return.                                             */
6138
    /* -------------------------------------------------------------------- */
6139
0
    CPLFree(padfX);
6140
0
    CPLFree(padfY);
6141
0
    CPLFree(padfZ);
6142
0
    CPLFree(pabSuccess);
6143
0
    CPLFree(padfWeightsX);
6144
0
    CPLFree(padfWeightsY);
6145
0
}
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<unsigned char, (GDALResampleAlg)0, 0>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<unsigned char, (GDALResampleAlg)1, 1>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<unsigned char, (GDALResampleAlg)1, 0>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<unsigned char, (GDALResampleAlg)2, 1>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<unsigned char, (GDALResampleAlg)2, 0>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<float, (GDALResampleAlg)2, 1>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<float, (GDALResampleAlg)2, 0>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<unsigned char, (GDALResampleAlg)3, 0>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<short, (GDALResampleAlg)0, 0>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<short, (GDALResampleAlg)1, 1>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<short, (GDALResampleAlg)1, 0>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<unsigned short, (GDALResampleAlg)1, 1>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<unsigned short, (GDALResampleAlg)1, 0>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<float, (GDALResampleAlg)1, 1>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<float, (GDALResampleAlg)1, 0>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<short, (GDALResampleAlg)2, 1>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<short, (GDALResampleAlg)2, 0>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<unsigned short, (GDALResampleAlg)2, 1>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<unsigned short, (GDALResampleAlg)2, 0>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<short, (GDALResampleAlg)3, 0>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<unsigned short, (GDALResampleAlg)3, 0>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThreadInternal<float, (GDALResampleAlg)0, 0>(void*)
6146
6147
template <class T, GDALResampleAlg eResample>
6148
static void GWKResampleNoMasksOrDstDensityOnlyThread(void *pData)
6149
0
{
6150
0
    GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
6151
0
        pData);
6152
0
}
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThread<unsigned char, (GDALResampleAlg)0>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThread<unsigned char, (GDALResampleAlg)3>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThread<short, (GDALResampleAlg)0>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThread<short, (GDALResampleAlg)3>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThread<unsigned short, (GDALResampleAlg)3>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyThread<float, (GDALResampleAlg)0>(void*)
6153
6154
template <class T, GDALResampleAlg eResample>
6155
static void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread(void *pData)
6156
6157
0
{
6158
0
    GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
6159
0
    GDALWarpKernel *poWK = psJob->poWK;
6160
0
    static_assert(eResample == GRA_Bilinear || eResample == GRA_Cubic);
6161
0
    const bool bUse4SamplesFormula =
6162
0
        poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
6163
0
    if (bUse4SamplesFormula)
6164
0
        GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, TRUE>(
6165
0
            pData);
6166
0
    else
6167
0
        GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
6168
0
            pData);
6169
0
}
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<unsigned char, (GDALResampleAlg)1>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<unsigned char, (GDALResampleAlg)2>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float, (GDALResampleAlg)2>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<short, (GDALResampleAlg)1>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<unsigned short, (GDALResampleAlg)1>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float, (GDALResampleAlg)1>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<short, (GDALResampleAlg)2>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<unsigned short, (GDALResampleAlg)2>(void*)
6170
6171
static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6172
0
{
6173
0
    return GWKRun(
6174
0
        poWK, "GWKNearestNoMasksOrDstDensityOnlyByte",
6175
0
        GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_NearestNeighbour>);
6176
0
}
6177
6178
static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6179
0
{
6180
0
    return GWKRun(
6181
0
        poWK, "GWKBilinearNoMasksOrDstDensityOnlyByte",
6182
0
        GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte,
6183
0
                                                           GRA_Bilinear>);
6184
0
}
6185
6186
static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6187
0
{
6188
0
    return GWKRun(
6189
0
        poWK, "GWKCubicNoMasksOrDstDensityOnlyByte",
6190
0
        GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte, GRA_Cubic>);
6191
0
}
6192
6193
static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
6194
0
{
6195
0
    return GWKRun(
6196
0
        poWK, "GWKCubicNoMasksOrDstDensityOnlyFloat",
6197
0
        GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float, GRA_Cubic>);
6198
0
}
6199
6200
#ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
6201
6202
static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
6203
{
6204
    return GWKRun(
6205
        poWK, "GWKCubicNoMasksOrDstDensityOnlyDouble",
6206
        GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double, GRA_Cubic>);
6207
}
6208
#endif
6209
6210
static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6211
0
{
6212
0
    return GWKRun(
6213
0
        poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyByte",
6214
0
        GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_CubicSpline>);
6215
0
}
6216
6217
/************************************************************************/
6218
/*                          GWKNearestByte()                            */
6219
/*                                                                      */
6220
/*      Case for 8bit input data with nearest neighbour resampling      */
6221
/*      using valid flags. Should be as fast as possible for this       */
6222
/*      particular transformation type.                                 */
6223
/************************************************************************/
6224
6225
template <class T> static void GWKNearestThread(void *pData)
6226
6227
0
{
6228
0
    GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
6229
0
    GDALWarpKernel *poWK = psJob->poWK;
6230
0
    const int iYMin = psJob->iYMin;
6231
0
    const int iYMax = psJob->iYMax;
6232
0
    const double dfMultFactorVerticalShiftPipeline =
6233
0
        poWK->bApplyVerticalShift
6234
0
            ? CPLAtof(CSLFetchNameValueDef(
6235
0
                  poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
6236
0
                  "1.0"))
6237
0
            : 0.0;
6238
6239
0
    const int nDstXSize = poWK->nDstXSize;
6240
0
    const int nSrcXSize = poWK->nSrcXSize;
6241
0
    const int nSrcYSize = poWK->nSrcYSize;
6242
6243
    /* -------------------------------------------------------------------- */
6244
    /*      Allocate x,y,z coordinate arrays for transformation ... one     */
6245
    /*      scanlines worth of positions.                                   */
6246
    /* -------------------------------------------------------------------- */
6247
6248
    // For x, 2 *, because we cache the precomputed values at the end.
6249
0
    double *padfX =
6250
0
        static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
6251
0
    double *padfY =
6252
0
        static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6253
0
    double *padfZ =
6254
0
        static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6255
0
    int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
6256
6257
0
    const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
6258
0
        poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
6259
0
    const double dfErrorThreshold = CPLAtof(
6260
0
        CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
6261
6262
0
    const bool bOneSourceCornerFailsToReproject =
6263
0
        GWKOneSourceCornerFailsToReproject(psJob);
6264
6265
    // Precompute values.
6266
0
    for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6267
0
        padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
6268
6269
    /* ==================================================================== */
6270
    /*      Loop over output lines.                                         */
6271
    /* ==================================================================== */
6272
0
    for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
6273
0
    {
6274
6275
        /* --------------------------------------------------------------------
6276
         */
6277
        /*      Setup points to transform to source image space. */
6278
        /* --------------------------------------------------------------------
6279
         */
6280
0
        memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
6281
0
        const double dfY = iDstY + 0.5 + poWK->nDstYOff;
6282
0
        for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6283
0
            padfY[iDstX] = dfY;
6284
0
        memset(padfZ, 0, sizeof(double) * nDstXSize);
6285
6286
        /* --------------------------------------------------------------------
6287
         */
6288
        /*      Transform the points from destination pixel/line coordinates */
6289
        /*      to source pixel/line coordinates. */
6290
        /* --------------------------------------------------------------------
6291
         */
6292
0
        poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
6293
0
                             padfY, padfZ, pabSuccess);
6294
0
        if (dfSrcCoordPrecision > 0.0)
6295
0
        {
6296
0
            GWKRoundSourceCoordinates(
6297
0
                nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
6298
0
                dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
6299
0
                0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
6300
0
        }
6301
        /* ====================================================================
6302
         */
6303
        /*      Loop over pixels in output scanline. */
6304
        /* ====================================================================
6305
         */
6306
0
        for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6307
0
        {
6308
0
            GPtrDiff_t iSrcOffset = 0;
6309
0
            if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
6310
0
                                              padfX, padfY, nSrcXSize,
6311
0
                                              nSrcYSize, iSrcOffset))
6312
0
                continue;
6313
6314
            /* --------------------------------------------------------------------
6315
             */
6316
            /*      Do not try to apply invalid source pixels to the dest. */
6317
            /* --------------------------------------------------------------------
6318
             */
6319
0
            if (poWK->panUnifiedSrcValid != nullptr &&
6320
0
                !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
6321
0
            {
6322
0
                if (!bOneSourceCornerFailsToReproject)
6323
0
                {
6324
0
                    continue;
6325
0
                }
6326
0
                else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
6327
0
                {
6328
0
                    continue;
6329
0
                }
6330
0
            }
6331
6332
            /* --------------------------------------------------------------------
6333
             */
6334
            /*      Do not try to apply transparent source pixels to the
6335
             * destination.*/
6336
            /* --------------------------------------------------------------------
6337
             */
6338
0
            double dfDensity = 1.0;
6339
6340
0
            if (poWK->pafUnifiedSrcDensity != nullptr)
6341
0
            {
6342
0
                dfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
6343
0
                if (dfDensity < SRC_DENSITY_THRESHOLD_DOUBLE)
6344
0
                    continue;
6345
0
            }
6346
6347
            /* ====================================================================
6348
             */
6349
            /*      Loop processing each band. */
6350
            /* ====================================================================
6351
             */
6352
6353
0
            const GPtrDiff_t iDstOffset =
6354
0
                iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
6355
6356
0
            for (int iBand = 0; iBand < poWK->nBands; iBand++)
6357
0
            {
6358
0
                T value = 0;
6359
0
                double dfBandDensity = 0.0;
6360
6361
                /* --------------------------------------------------------------------
6362
                 */
6363
                /*      Collect the source value. */
6364
                /* --------------------------------------------------------------------
6365
                 */
6366
0
                if (GWKGetPixelT(poWK, iBand, iSrcOffset, &dfBandDensity,
6367
0
                                 &value))
6368
0
                {
6369
6370
0
                    if (poWK->bApplyVerticalShift)
6371
0
                    {
6372
0
                        if (!std::isfinite(padfZ[iDstX]))
6373
0
                            continue;
6374
                        // Subtract padfZ[] since the coordinate transformation
6375
                        // is from target to source
6376
0
                        value = GWKClampValueT<T>(
6377
0
                            double(value) * poWK->dfMultFactorVerticalShift -
6378
0
                            padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
6379
0
                    }
6380
6381
0
                    GWKSetPixelValueRealT(poWK, iBand, iDstOffset,
6382
0
                                          dfBandDensity, value);
6383
0
                }
6384
0
            }
6385
6386
            /* --------------------------------------------------------------------
6387
             */
6388
            /*      Mark this pixel valid/opaque in the output. */
6389
            /* --------------------------------------------------------------------
6390
             */
6391
0
            GWKOverlayDensity(poWK, iDstOffset, dfDensity);
6392
6393
0
            if (poWK->panDstValid != nullptr)
6394
0
            {
6395
0
                CPLMaskSet(poWK->panDstValid, iDstOffset);
6396
0
            }
6397
0
        } /* Next iDstX */
6398
6399
        /* --------------------------------------------------------------------
6400
         */
6401
        /*      Report progress to the user, and optionally cancel out. */
6402
        /* --------------------------------------------------------------------
6403
         */
6404
0
        if (psJob->pfnProgress && psJob->pfnProgress(psJob))
6405
0
            break;
6406
0
    }
6407
6408
    /* -------------------------------------------------------------------- */
6409
    /*      Cleanup and return.                                             */
6410
    /* -------------------------------------------------------------------- */
6411
0
    CPLFree(padfX);
6412
0
    CPLFree(padfY);
6413
0
    CPLFree(padfZ);
6414
0
    CPLFree(pabSuccess);
6415
0
}
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKNearestThread<unsigned char>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKNearestThread<short>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKNearestThread<unsigned short>(void*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKNearestThread<float>(void*)
6416
6417
static CPLErr GWKNearestByte(GDALWarpKernel *poWK)
6418
0
{
6419
0
    return GWKRun(poWK, "GWKNearestByte", GWKNearestThread<GByte>);
6420
0
}
6421
6422
static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6423
0
{
6424
0
    return GWKRun(
6425
0
        poWK, "GWKNearestNoMasksOrDstDensityOnlyShort",
6426
0
        GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_NearestNeighbour>);
6427
0
}
6428
6429
static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6430
0
{
6431
0
    return GWKRun(
6432
0
        poWK, "GWKBilinearNoMasksOrDstDensityOnlyShort",
6433
0
        GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16,
6434
0
                                                           GRA_Bilinear>);
6435
0
}
6436
6437
static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
6438
0
{
6439
0
    return GWKRun(
6440
0
        poWK, "GWKBilinearNoMasksOrDstDensityOnlyUShort",
6441
0
        GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16,
6442
0
                                                           GRA_Bilinear>);
6443
0
}
6444
6445
static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
6446
0
{
6447
0
    return GWKRun(
6448
0
        poWK, "GWKBilinearNoMasksOrDstDensityOnlyFloat",
6449
0
        GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float,
6450
0
                                                           GRA_Bilinear>);
6451
0
}
6452
6453
#ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
6454
6455
static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
6456
{
6457
    return GWKRun(
6458
        poWK, "GWKBilinearNoMasksOrDstDensityOnlyDouble",
6459
        GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double,
6460
                                                           GRA_Bilinear>);
6461
}
6462
#endif
6463
6464
static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6465
0
{
6466
0
    return GWKRun(
6467
0
        poWK, "GWKCubicNoMasksOrDstDensityOnlyShort",
6468
0
        GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16, GRA_Cubic>);
6469
0
}
6470
6471
static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
6472
0
{
6473
0
    return GWKRun(
6474
0
        poWK, "GWKCubicNoMasksOrDstDensityOnlyUShort",
6475
0
        GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16, GRA_Cubic>);
6476
0
}
6477
6478
static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6479
0
{
6480
0
    return GWKRun(
6481
0
        poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyShort",
6482
0
        GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_CubicSpline>);
6483
0
}
6484
6485
static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
6486
0
{
6487
0
    return GWKRun(
6488
0
        poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyUShort",
6489
0
        GWKResampleNoMasksOrDstDensityOnlyThread<GUInt16, GRA_CubicSpline>);
6490
0
}
6491
6492
static CPLErr GWKNearestShort(GDALWarpKernel *poWK)
6493
0
{
6494
0
    return GWKRun(poWK, "GWKNearestShort", GWKNearestThread<GInt16>);
6495
0
}
6496
6497
static CPLErr GWKNearestUnsignedShort(GDALWarpKernel *poWK)
6498
0
{
6499
0
    return GWKRun(poWK, "GWKNearestUnsignedShort", GWKNearestThread<GUInt16>);
6500
0
}
6501
6502
static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
6503
0
{
6504
0
    return GWKRun(
6505
0
        poWK, "GWKNearestNoMasksOrDstDensityOnlyFloat",
6506
0
        GWKResampleNoMasksOrDstDensityOnlyThread<float, GRA_NearestNeighbour>);
6507
0
}
6508
6509
static CPLErr GWKNearestFloat(GDALWarpKernel *poWK)
6510
0
{
6511
0
    return GWKRun(poWK, "GWKNearestFloat", GWKNearestThread<float>);
6512
0
}
6513
6514
/************************************************************************/
6515
/*                           GWKAverageOrMode()                         */
6516
/*                                                                      */
6517
/************************************************************************/
6518
6519
#define COMPUTE_WEIGHT_Y(iSrcY)                                                \
6520
0
    ((iSrcY == iSrcYMin)                                                       \
6521
0
         ? ((iSrcYMin + 1 == iSrcYMax) ? 1.0 : 1 - (dfYMin - iSrcYMin))        \
6522
0
     : (iSrcY + 1 == iSrcYMax) ? 1 - (iSrcYMax - dfYMax)                       \
6523
0
                               : 1.0)
6524
6525
#define COMPUTE_WEIGHT(iSrcX, dfWeightY)                                       \
6526
0
    ((iSrcX == iSrcXMin)       ? ((iSrcXMin + 1 == iSrcXMax)                   \
6527
0
                                      ? dfWeightY                              \
6528
0
                                      : dfWeightY * (1 - (dfXMin - iSrcXMin))) \
6529
0
     : (iSrcX + 1 == iSrcXMax) ? dfWeightY * (1 - (iSrcXMax - dfXMax))         \
6530
0
                               : dfWeightY)
6531
6532
static void GWKAverageOrModeThread(void *pData);
6533
6534
static CPLErr GWKAverageOrMode(GDALWarpKernel *poWK)
6535
0
{
6536
0
    return GWKRun(poWK, "GWKAverageOrMode", GWKAverageOrModeThread);
6537
0
}
6538
6539
/************************************************************************/
6540
/*                   GWKAverageOrModeComputeLineCoords()                */
6541
/************************************************************************/
6542
6543
static void GWKAverageOrModeComputeLineCoords(
6544
    const GWKJobStruct *psJob, double *padfX, double *padfX2, double *padfY,
6545
    double *padfY2, double *padfZ, double *padfZ2, int *pabSuccess,
6546
    int *pabSuccess2, int iDstY, double dfSrcCoordPrecision,
6547
    double dfErrorThreshold)
6548
0
{
6549
0
    const GDALWarpKernel *poWK = psJob->poWK;
6550
0
    const int nDstXSize = poWK->nDstXSize;
6551
6552
    // Setup points to transform to source image space.
6553
0
    for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6554
0
    {
6555
0
        padfX[iDstX] = iDstX + poWK->nDstXOff;
6556
0
        padfY[iDstX] = iDstY + poWK->nDstYOff;
6557
0
        padfZ[iDstX] = 0.0;
6558
0
        padfX2[iDstX] = iDstX + 1.0 + poWK->nDstXOff;
6559
0
        padfY2[iDstX] = iDstY + 1.0 + poWK->nDstYOff;
6560
0
        padfZ2[iDstX] = 0.0;
6561
0
    }
6562
6563
    /* ----------------------------------------------------------------- */
6564
    /*      Transform the points from destination pixel/line coordinates */
6565
    /*      to source pixel/line coordinates.                            */
6566
    /* ----------------------------------------------------------------- */
6567
0
    poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX, padfY,
6568
0
                         padfZ, pabSuccess);
6569
0
    poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX2,
6570
0
                         padfY2, padfZ2, pabSuccess2);
6571
6572
0
    if (dfSrcCoordPrecision > 0.0)
6573
0
    {
6574
0
        GWKRoundSourceCoordinates(nDstXSize, padfX, padfY, padfZ, pabSuccess,
6575
0
                                  dfSrcCoordPrecision, dfErrorThreshold,
6576
0
                                  poWK->pfnTransformer, psJob->pTransformerArg,
6577
0
                                  poWK->nDstXOff, iDstY + poWK->nDstYOff);
6578
0
        GWKRoundSourceCoordinates(
6579
0
            nDstXSize, padfX2, padfY2, padfZ2, pabSuccess2, dfSrcCoordPrecision,
6580
0
            dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
6581
0
            1.0 + poWK->nDstXOff, iDstY + 1.0 + poWK->nDstYOff);
6582
0
    }
6583
0
}
6584
6585
/************************************************************************/
6586
/*              GWKAverageOrModeComputeSourceCoords()                   */
6587
/************************************************************************/
6588
6589
static bool GWKAverageOrModeComputeSourceCoords(
6590
    const GWKJobStruct *psJob, double *padfX, double *padfX2, double *padfY,
6591
    double *padfY2, int iDstX, int iDstY, int nXMargin, int nYMargin,
6592
    // Output:
6593
    bool &bWrapOverX, double &dfXMin, double &dfYMin, double &dfXMax,
6594
    double &dfYMax, int &iSrcXMin, int &iSrcYMin, int &iSrcXMax, int &iSrcYMax)
6595
0
{
6596
0
    const GDALWarpKernel *poWK = psJob->poWK;
6597
0
    const int nSrcXSize = poWK->nSrcXSize;
6598
0
    const int nSrcYSize = poWK->nSrcYSize;
6599
6600
    // Add some checks so that padfX[iDstX] - poWK->nSrcXOff is in
6601
    // reasonable range (https://github.com/OSGeo/gdal/issues/2365)
6602
0
    if (!(padfX[iDstX] - poWK->nSrcXOff >= -nXMargin &&
6603
0
          padfX2[iDstX] - poWK->nSrcXOff >= -nXMargin &&
6604
0
          padfY[iDstX] - poWK->nSrcYOff >= -nYMargin &&
6605
0
          padfY2[iDstX] - poWK->nSrcYOff >= -nYMargin &&
6606
0
          padfX[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
6607
0
          padfX2[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
6608
0
          padfY[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin &&
6609
0
          padfY2[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin))
6610
0
    {
6611
0
        return false;
6612
0
    }
6613
6614
    // Compute corners in source crs.
6615
6616
    // The transformation might not have preserved ordering of
6617
    // coordinates so do the necessary swapping (#5433).
6618
    // NOTE: this is really an approximative fix. To do something
6619
    // more precise we would for example need to compute the
6620
    // transformation of coordinates in the
6621
    // [iDstX,iDstY]x[iDstX+1,iDstY+1] square back to source
6622
    // coordinates, and take the bounding box of the got source
6623
    // coordinates.
6624
6625
0
    if (padfX[iDstX] > padfX2[iDstX])
6626
0
        std::swap(padfX[iDstX], padfX2[iDstX]);
6627
6628
    // Detect situations where the target pixel is close to the
6629
    // antimeridian and when padfX[iDstX] and padfX2[iDstX] are very
6630
    // close to the left-most and right-most columns of the source
6631
    // raster. The 2 value below was experimentally determined to
6632
    // avoid false-positives and false-negatives.
6633
    // Addresses https://github.com/OSGeo/gdal/issues/6478
6634
0
    bWrapOverX = false;
6635
0
    const int nThresholdWrapOverX = std::min(2, nSrcXSize / 10);
6636
0
    if (poWK->nSrcXOff == 0 &&
6637
0
        padfX[iDstX] * poWK->dfXScale < nThresholdWrapOverX &&
6638
0
        (nSrcXSize - padfX2[iDstX]) * poWK->dfXScale < nThresholdWrapOverX)
6639
0
    {
6640
        // Check there is a discontinuity by checking at mid-pixel.
6641
        // NOTE: all this remains fragile. To confidently
6642
        // detect antimeridian warping we should probably try to access
6643
        // georeferenced coordinates, and not rely only on tests on
6644
        // image space coordinates. But accessing georeferenced
6645
        // coordinates from here is not trivial, and we would for example
6646
        // have to handle both geographic, Mercator, etc.
6647
        // Let's hope this heuristics is good enough for now.
6648
0
        double x = iDstX + 0.5 + poWK->nDstXOff;
6649
0
        double y = iDstY + poWK->nDstYOff;
6650
0
        double z = 0;
6651
0
        int bSuccess = FALSE;
6652
0
        poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1, &x, &y, &z,
6653
0
                             &bSuccess);
6654
0
        if (bSuccess && x < padfX[iDstX])
6655
0
        {
6656
0
            bWrapOverX = true;
6657
0
            std::swap(padfX[iDstX], padfX2[iDstX]);
6658
0
            padfX2[iDstX] += nSrcXSize;
6659
0
        }
6660
0
    }
6661
6662
0
    dfXMin = padfX[iDstX] - poWK->nSrcXOff;
6663
0
    dfXMax = padfX2[iDstX] - poWK->nSrcXOff;
6664
0
    constexpr double EPSILON = 1e-10;
6665
    // Check that [dfXMin, dfXMax] intersect with [0,nSrcXSize] with a tolerance
6666
0
    if (!(dfXMax > -EPSILON && dfXMin < nSrcXSize + EPSILON))
6667
0
        return false;
6668
0
    iSrcXMin = static_cast<int>(std::max(floor(dfXMin + EPSILON), 0.0));
6669
0
    iSrcXMax = static_cast<int>(
6670
0
        std::min(ceil(dfXMax - EPSILON), static_cast<double>(INT_MAX)));
6671
0
    if (!bWrapOverX)
6672
0
        iSrcXMax = std::min(iSrcXMax, nSrcXSize);
6673
0
    if (iSrcXMin == iSrcXMax && iSrcXMax < nSrcXSize)
6674
0
        iSrcXMax++;
6675
6676
0
    if (padfY[iDstX] > padfY2[iDstX])
6677
0
        std::swap(padfY[iDstX], padfY2[iDstX]);
6678
0
    dfYMin = padfY[iDstX] - poWK->nSrcYOff;
6679
0
    dfYMax = padfY2[iDstX] - poWK->nSrcYOff;
6680
    // Check that [dfYMin, dfYMax] intersect with [0,nSrcYSize] with a tolerance
6681
0
    if (!(dfYMax > -EPSILON && dfYMin < nSrcYSize + EPSILON))
6682
0
        return false;
6683
0
    iSrcYMin = static_cast<int>(std::max(floor(dfYMin + EPSILON), 0.0));
6684
0
    iSrcYMax = std::min(static_cast<int>(ceil(dfYMax - EPSILON)), nSrcYSize);
6685
0
    if (iSrcYMin == iSrcYMax && iSrcYMax < nSrcYSize)
6686
0
        iSrcYMax++;
6687
6688
0
    return true;
6689
0
}
6690
6691
/************************************************************************/
6692
/*                         GWKModeRealType()                            */
6693
/************************************************************************/
6694
6695
template <class T> static inline bool IsSame(T a, T b)
6696
0
{
6697
0
    return a == b;
6698
0
}
Unexecuted instantiation: gdalwarpkernel.cpp:bool IsSame<int>(int, int)
Unexecuted instantiation: gdalwarpkernel.cpp:bool IsSame<unsigned int>(unsigned int, unsigned int)
Unexecuted instantiation: gdalwarpkernel.cpp:bool IsSame<long>(long, long)
Unexecuted instantiation: gdalwarpkernel.cpp:bool IsSame<unsigned long>(unsigned long, unsigned long)
6699
6700
template <> bool IsSame<GFloat16>(GFloat16 a, GFloat16 b)
6701
0
{
6702
0
    return a == b || (CPLIsNan(a) && CPLIsNan(b));
6703
0
}
6704
6705
template <> bool IsSame<float>(float a, float b)
6706
0
{
6707
0
    return a == b || (std::isnan(a) && std::isnan(b));
6708
0
}
6709
6710
template <> bool IsSame<double>(double a, double b)
6711
0
{
6712
0
    return a == b || (std::isnan(a) && std::isnan(b));
6713
0
}
6714
6715
template <class T> static void GWKModeRealType(GWKJobStruct *psJob)
6716
0
{
6717
0
    const GDALWarpKernel *poWK = psJob->poWK;
6718
0
    const int iYMin = psJob->iYMin;
6719
0
    const int iYMax = psJob->iYMax;
6720
0
    const int nDstXSize = poWK->nDstXSize;
6721
0
    const int nSrcXSize = poWK->nSrcXSize;
6722
0
    const int nSrcYSize = poWK->nSrcYSize;
6723
0
    const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
6724
6725
0
    T *pVals = nullptr;
6726
0
    float *pafCounts = nullptr;
6727
6728
0
    if (nSrcXSize > 0 && nSrcYSize > 0)
6729
0
    {
6730
0
        pVals = static_cast<T *>(
6731
0
            VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(T)));
6732
0
        pafCounts = static_cast<float *>(
6733
0
            VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
6734
0
        if (pVals == nullptr || pafCounts == nullptr)
6735
0
        {
6736
0
            VSIFree(pVals);
6737
0
            VSIFree(pafCounts);
6738
0
            return;
6739
0
        }
6740
0
    }
6741
6742
    /* -------------------------------------------------------------------- */
6743
    /*      Allocate x,y,z coordinate arrays for transformation ... two     */
6744
    /*      scanlines worth of positions.                                   */
6745
    /* -------------------------------------------------------------------- */
6746
6747
0
    double *padfX =
6748
0
        static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6749
0
    double *padfY =
6750
0
        static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6751
0
    double *padfZ =
6752
0
        static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6753
0
    double *padfX2 =
6754
0
        static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6755
0
    double *padfY2 =
6756
0
        static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6757
0
    double *padfZ2 =
6758
0
        static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6759
0
    int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
6760
0
    int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
6761
6762
0
    const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
6763
0
        poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
6764
0
    const double dfErrorThreshold = CPLAtof(
6765
0
        CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
6766
6767
0
    const int nXMargin =
6768
0
        2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
6769
0
    const int nYMargin =
6770
0
        2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
6771
6772
    /* ==================================================================== */
6773
    /*      Loop over output lines.                                         */
6774
    /* ==================================================================== */
6775
0
    for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
6776
0
    {
6777
0
        GWKAverageOrModeComputeLineCoords(
6778
0
            psJob, padfX, padfX2, padfY, padfY2, padfZ, padfZ2, pabSuccess,
6779
0
            pabSuccess2, iDstY, dfSrcCoordPrecision, dfErrorThreshold);
6780
6781
        // Loop over pixels in output scanline.
6782
0
        for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6783
0
        {
6784
0
            GPtrDiff_t iSrcOffset = 0;
6785
0
            double dfDensity = 1.0;
6786
0
            bool bHasFoundDensity = false;
6787
6788
0
            bool bWrapOverX = false;
6789
0
            double dfXMin = 0;
6790
0
            double dfYMin = 0;
6791
0
            double dfXMax = 0;
6792
0
            double dfYMax = 0;
6793
0
            int iSrcXMin = 0;
6794
0
            int iSrcYMin = 0;
6795
0
            int iSrcXMax = 0;
6796
0
            int iSrcYMax = 0;
6797
0
            if (!GWKAverageOrModeComputeSourceCoords(
6798
0
                    psJob, padfX, padfX2, padfY, padfY2, iDstX, iDstY, nXMargin,
6799
0
                    nYMargin, bWrapOverX, dfXMin, dfYMin, dfXMax, dfYMax,
6800
0
                    iSrcXMin, iSrcYMin, iSrcXMax, iSrcYMax))
6801
0
            {
6802
0
                continue;
6803
0
            }
6804
6805
0
            const GPtrDiff_t iDstOffset =
6806
0
                iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
6807
6808
            // Loop processing each band.
6809
0
            for (int iBand = 0; iBand < poWK->nBands; iBand++)
6810
0
            {
6811
0
                double dfBandDensity = 0.0;
6812
6813
0
                int nBins = 0;
6814
0
                int iModeIndex = -1;
6815
0
                T nVal{};
6816
6817
0
                for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
6818
0
                {
6819
0
                    const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
6820
0
                    iSrcOffset =
6821
0
                        iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
6822
0
                    for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
6823
0
                         iSrcX++, iSrcOffset++)
6824
0
                    {
6825
0
                        if (bWrapOverX)
6826
0
                            iSrcOffset =
6827
0
                                (iSrcX % nSrcXSize) +
6828
0
                                static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
6829
6830
0
                        if (poWK->panUnifiedSrcValid != nullptr &&
6831
0
                            !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
6832
0
                            continue;
6833
6834
0
                        if (GWKGetPixelT(poWK, iBand, iSrcOffset,
6835
0
                                         &dfBandDensity, &nVal) &&
6836
0
                            dfBandDensity > BAND_DENSITY_THRESHOLD)
6837
0
                        {
6838
0
                            const double dfWeight =
6839
0
                                COMPUTE_WEIGHT(iSrcX, dfWeightY);
6840
6841
                            // Check array for existing entry.
6842
0
                            int i = 0;
6843
0
                            for (i = 0; i < nBins; ++i)
6844
0
                            {
6845
0
                                if (IsSame(pVals[i], nVal))
6846
0
                                {
6847
6848
0
                                    pafCounts[i] +=
6849
0
                                        static_cast<float>(dfWeight);
6850
0
                                    bool bValIsMaxCount =
6851
0
                                        (pafCounts[i] > pafCounts[iModeIndex]);
6852
6853
0
                                    if (!bValIsMaxCount &&
6854
0
                                        pafCounts[i] == pafCounts[iModeIndex])
6855
0
                                    {
6856
0
                                        switch (eTieStrategy)
6857
0
                                        {
6858
0
                                            case GWKTS_First:
6859
0
                                                break;
6860
0
                                            case GWKTS_Min:
6861
0
                                                bValIsMaxCount =
6862
0
                                                    nVal < pVals[iModeIndex];
6863
0
                                                break;
6864
0
                                            case GWKTS_Max:
6865
0
                                                bValIsMaxCount =
6866
0
                                                    nVal > pVals[iModeIndex];
6867
0
                                                break;
6868
0
                                        }
6869
0
                                    }
6870
6871
0
                                    if (bValIsMaxCount)
6872
0
                                    {
6873
0
                                        iModeIndex = i;
6874
0
                                    }
6875
6876
0
                                    break;
6877
0
                                }
6878
0
                            }
6879
6880
                            // Add to arr if entry not already there.
6881
0
                            if (i == nBins)
6882
0
                            {
6883
0
                                pVals[i] = nVal;
6884
0
                                pafCounts[i] = static_cast<float>(dfWeight);
6885
6886
0
                                if (iModeIndex < 0)
6887
0
                                    iModeIndex = i;
6888
6889
0
                                ++nBins;
6890
0
                            }
6891
0
                        }
6892
0
                    }
6893
0
                }
6894
6895
0
                if (iModeIndex != -1)
6896
0
                {
6897
0
                    nVal = pVals[iModeIndex];
6898
0
                    dfBandDensity = 1;
6899
0
                    bHasFoundDensity = true;
6900
0
                }
6901
6902
                // We have a computed value from the source.  Now apply it
6903
                // to the destination pixel
6904
0
                if (bHasFoundDensity)
6905
0
                {
6906
0
                    GWKSetPixelValueRealT(poWK, iBand, iDstOffset,
6907
0
                                          dfBandDensity, nVal);
6908
0
                }
6909
0
            }
6910
6911
0
            if (!bHasFoundDensity)
6912
0
                continue;
6913
6914
            /* --------------------------------------------------------------------
6915
             */
6916
            /*      Update destination density/validity masks. */
6917
            /* --------------------------------------------------------------------
6918
             */
6919
0
            GWKOverlayDensity(poWK, iDstOffset, dfDensity);
6920
6921
0
            if (poWK->panDstValid != nullptr)
6922
0
            {
6923
0
                CPLMaskSet(poWK->panDstValid, iDstOffset);
6924
0
            }
6925
0
        } /* Next iDstX */
6926
6927
        /* --------------------------------------------------------------------
6928
         */
6929
        /*      Report progress to the user, and optionally cancel out. */
6930
        /* --------------------------------------------------------------------
6931
         */
6932
0
        if (psJob->pfnProgress && psJob->pfnProgress(psJob))
6933
0
            break;
6934
0
    }
6935
6936
    /* -------------------------------------------------------------------- */
6937
    /*      Cleanup and return.                                             */
6938
    /* -------------------------------------------------------------------- */
6939
0
    CPLFree(padfX);
6940
0
    CPLFree(padfY);
6941
0
    CPLFree(padfZ);
6942
0
    CPLFree(padfX2);
6943
0
    CPLFree(padfY2);
6944
0
    CPLFree(padfZ2);
6945
0
    CPLFree(pabSuccess);
6946
0
    CPLFree(pabSuccess2);
6947
0
    VSIFree(pVals);
6948
0
    VSIFree(pafCounts);
6949
0
}
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKModeRealType<int>(GWKJobStruct*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKModeRealType<unsigned int>(GWKJobStruct*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKModeRealType<long>(GWKJobStruct*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKModeRealType<unsigned long>(GWKJobStruct*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKModeRealType<cpl::Float16>(GWKJobStruct*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKModeRealType<float>(GWKJobStruct*)
Unexecuted instantiation: gdalwarpkernel.cpp:void GWKModeRealType<double>(GWKJobStruct*)
6950
6951
/************************************************************************/
6952
/*                        GWKModeComplexType()                          */
6953
/************************************************************************/
6954
6955
static void GWKModeComplexType(GWKJobStruct *psJob)
6956
0
{
6957
0
    const GDALWarpKernel *poWK = psJob->poWK;
6958
0
    const int iYMin = psJob->iYMin;
6959
0
    const int iYMax = psJob->iYMax;
6960
0
    const int nDstXSize = poWK->nDstXSize;
6961
0
    const int nSrcXSize = poWK->nSrcXSize;
6962
0
    const int nSrcYSize = poWK->nSrcYSize;
6963
0
    const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
6964
0
    const double dfMultFactorVerticalShiftPipeline =
6965
0
        poWK->bApplyVerticalShift
6966
0
            ? CPLAtof(CSLFetchNameValueDef(
6967
0
                  poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
6968
0
                  "1.0"))
6969
0
            : 0.0;
6970
6971
0
    double *padfRealVals = nullptr;
6972
0
    double *padfImagVals = nullptr;
6973
0
    float *pafCounts = nullptr;
6974
6975
0
    if (nSrcXSize > 0 && nSrcYSize > 0)
6976
0
    {
6977
0
        padfRealVals = static_cast<double *>(
6978
0
            VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(double)));
6979
0
        padfImagVals = static_cast<double *>(
6980
0
            VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(double)));
6981
0
        pafCounts = static_cast<float *>(
6982
0
            VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
6983
0
        if (padfRealVals == nullptr || padfImagVals == nullptr ||
6984
0
            pafCounts == nullptr)
6985
0
        {
6986
0
            VSIFree(padfRealVals);
6987
0
            VSIFree(padfImagVals);
6988
0
            VSIFree(pafCounts);
6989
0
            return;
6990
0
        }
6991
0
    }
6992
6993
    /* -------------------------------------------------------------------- */
6994
    /*      Allocate x,y,z coordinate arrays for transformation ... two     */
6995
    /*      scanlines worth of positions.                                   */
6996
    /* -------------------------------------------------------------------- */
6997
6998
0
    double *padfX =
6999
0
        static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7000
0
    double *padfY =
7001
0
        static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7002
0
    double *padfZ =
7003
0
        static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7004
0
    double *padfX2 =
7005
0
        static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7006
0
    double *padfY2 =
7007
0
        static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7008
0
    double *padfZ2 =
7009
0
        static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7010
0
    int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7011
0
    int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7012
7013
0
    const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
7014
0
        poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
7015
0
    const double dfErrorThreshold = CPLAtof(
7016
0
        CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
7017
7018
0
    const int nXMargin =
7019
0
        2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
7020
0
    const int nYMargin =
7021
0
        2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
7022
7023
    /* ==================================================================== */
7024
    /*      Loop over output lines.                                         */
7025
    /* ==================================================================== */
7026
0
    for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
7027
0
    {
7028
0
        GWKAverageOrModeComputeLineCoords(
7029
0
            psJob, padfX, padfX2, padfY, padfY2, padfZ, padfZ2, pabSuccess,
7030
0
            pabSuccess2, iDstY, dfSrcCoordPrecision, dfErrorThreshold);
7031
7032
        // Loop over pixels in output scanline.
7033
0
        for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
7034
0
        {
7035
0
            GPtrDiff_t iSrcOffset = 0;
7036
0
            double dfDensity = 1.0;
7037
0
            bool bHasFoundDensity = false;
7038
7039
0
            bool bWrapOverX = false;
7040
0
            double dfXMin = 0;
7041
0
            double dfYMin = 0;
7042
0
            double dfXMax = 0;
7043
0
            double dfYMax = 0;
7044
0
            int iSrcXMin = 0;
7045
0
            int iSrcYMin = 0;
7046
0
            int iSrcXMax = 0;
7047
0
            int iSrcYMax = 0;
7048
0
            if (!GWKAverageOrModeComputeSourceCoords(
7049
0
                    psJob, padfX, padfX2, padfY, padfY2, iDstX, iDstY, nXMargin,
7050
0
                    nYMargin, bWrapOverX, dfXMin, dfYMin, dfXMax, dfYMax,
7051
0
                    iSrcXMin, iSrcYMin, iSrcXMax, iSrcYMax))
7052
0
            {
7053
0
                continue;
7054
0
            }
7055
7056
0
            const GPtrDiff_t iDstOffset =
7057
0
                iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
7058
7059
            // Loop processing each band.
7060
0
            for (int iBand = 0; iBand < poWK->nBands; iBand++)
7061
0
            {
7062
0
                double dfBandDensity = 0.0;
7063
7064
0
                int nBins = 0;
7065
0
                int iModeIndex = -1;
7066
0
                double dfValueReal = 0;
7067
0
                double dfValueImag = 0;
7068
7069
0
                for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7070
0
                {
7071
0
                    const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7072
0
                    iSrcOffset =
7073
0
                        iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7074
0
                    for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7075
0
                         iSrcX++, iSrcOffset++)
7076
0
                    {
7077
0
                        if (bWrapOverX)
7078
0
                            iSrcOffset =
7079
0
                                (iSrcX % nSrcXSize) +
7080
0
                                static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7081
7082
0
                        if (poWK->panUnifiedSrcValid != nullptr &&
7083
0
                            !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
7084
0
                            continue;
7085
7086
0
                        if (GWKGetPixelValue(poWK, iBand, iSrcOffset,
7087
0
                                             &dfBandDensity, &dfValueReal,
7088
0
                                             &dfValueImag) &&
7089
0
                            dfBandDensity > BAND_DENSITY_THRESHOLD)
7090
0
                        {
7091
0
                            const double dfWeight =
7092
0
                                COMPUTE_WEIGHT(iSrcX, dfWeightY);
7093
7094
                            // Check array for existing entry.
7095
0
                            int i = 0;
7096
0
                            for (i = 0; i < nBins; ++i)
7097
0
                            {
7098
0
                                if (IsSame(padfRealVals[i], dfValueReal) &&
7099
0
                                    IsSame(padfImagVals[i], dfValueImag))
7100
0
                                {
7101
7102
0
                                    pafCounts[i] +=
7103
0
                                        static_cast<float>(dfWeight);
7104
0
                                    bool bValIsMaxCount =
7105
0
                                        (pafCounts[i] > pafCounts[iModeIndex]);
7106
7107
0
                                    if (!bValIsMaxCount &&
7108
0
                                        pafCounts[i] == pafCounts[iModeIndex])
7109
0
                                    {
7110
0
                                        switch (eTieStrategy)
7111
0
                                        {
7112
0
                                            case GWKTS_First:
7113
0
                                                break;
7114
0
                                            case GWKTS_Min:
7115
0
                                                bValIsMaxCount =
7116
0
                                                    dfValueReal <
7117
0
                                                    padfRealVals[iModeIndex];
7118
0
                                                break;
7119
0
                                            case GWKTS_Max:
7120
0
                                                bValIsMaxCount =
7121
0
                                                    dfValueReal >
7122
0
                                                    padfRealVals[iModeIndex];
7123
0
                                                break;
7124
0
                                        }
7125
0
                                    }
7126
7127
0
                                    if (bValIsMaxCount)
7128
0
                                    {
7129
0
                                        iModeIndex = i;
7130
0
                                    }
7131
7132
0
                                    break;
7133
0
                                }
7134
0
                            }
7135
7136
                            // Add to arr if entry not already there.
7137
0
                            if (i == nBins)
7138
0
                            {
7139
0
                                padfRealVals[i] = dfValueReal;
7140
0
                                padfImagVals[i] = dfValueImag;
7141
0
                                pafCounts[i] = static_cast<float>(dfWeight);
7142
7143
0
                                if (iModeIndex < 0)
7144
0
                                    iModeIndex = i;
7145
7146
0
                                ++nBins;
7147
0
                            }
7148
0
                        }
7149
0
                    }
7150
0
                }
7151
7152
0
                if (iModeIndex != -1)
7153
0
                {
7154
0
                    dfValueReal = padfRealVals[iModeIndex];
7155
0
                    dfValueImag = padfImagVals[iModeIndex];
7156
0
                    dfBandDensity = 1;
7157
7158
0
                    if (poWK->bApplyVerticalShift)
7159
0
                    {
7160
0
                        if (!std::isfinite(padfZ[iDstX]))
7161
0
                            continue;
7162
                        // Subtract padfZ[] since the coordinate
7163
                        // transformation is from target to source
7164
0
                        dfValueReal =
7165
0
                            dfValueReal * poWK->dfMultFactorVerticalShift -
7166
0
                            padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
7167
0
                    }
7168
7169
0
                    bHasFoundDensity = true;
7170
0
                }
7171
7172
                // We have a computed value from the source.  Now apply it
7173
                // to the destination pixel
7174
0
                if (bHasFoundDensity)
7175
0
                {
7176
0
                    GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
7177
0
                                     dfValueReal, dfValueImag);
7178
0
                }
7179
0
            }
7180
7181
0
            if (!bHasFoundDensity)
7182
0
                continue;
7183
7184
            /* --------------------------------------------------------------------
7185
             */
7186
            /*      Update destination density/validity masks. */
7187
            /* --------------------------------------------------------------------
7188
             */
7189
0
            GWKOverlayDensity(poWK, iDstOffset, dfDensity);
7190
7191
0
            if (poWK->panDstValid != nullptr)
7192
0
            {
7193
0
                CPLMaskSet(poWK->panDstValid, iDstOffset);
7194
0
            }
7195
0
        } /* Next iDstX */
7196
7197
        /* --------------------------------------------------------------------
7198
         */
7199
        /*      Report progress to the user, and optionally cancel out. */
7200
        /* --------------------------------------------------------------------
7201
         */
7202
0
        if (psJob->pfnProgress && psJob->pfnProgress(psJob))
7203
0
            break;
7204
0
    }
7205
7206
    /* -------------------------------------------------------------------- */
7207
    /*      Cleanup and return.                                             */
7208
    /* -------------------------------------------------------------------- */
7209
0
    CPLFree(padfX);
7210
0
    CPLFree(padfY);
7211
0
    CPLFree(padfZ);
7212
0
    CPLFree(padfX2);
7213
0
    CPLFree(padfY2);
7214
0
    CPLFree(padfZ2);
7215
0
    CPLFree(pabSuccess);
7216
0
    CPLFree(pabSuccess2);
7217
0
    VSIFree(padfRealVals);
7218
0
    VSIFree(padfImagVals);
7219
0
    VSIFree(pafCounts);
7220
0
}
7221
7222
/************************************************************************/
7223
/*                       GWKAverageOrModeThread()                       */
7224
/************************************************************************/
7225
7226
// Overall logic based on GWKGeneralCaseThread().
7227
static void GWKAverageOrModeThread(void *pData)
7228
0
{
7229
0
    GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
7230
0
    const GDALWarpKernel *poWK = psJob->poWK;
7231
0
    const int iYMin = psJob->iYMin;
7232
0
    const int iYMax = psJob->iYMax;
7233
0
    const double dfMultFactorVerticalShiftPipeline =
7234
0
        poWK->bApplyVerticalShift
7235
0
            ? CPLAtof(CSLFetchNameValueDef(
7236
0
                  poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
7237
0
                  "1.0"))
7238
0
            : 0.0;
7239
7240
0
    const int nDstXSize = poWK->nDstXSize;
7241
0
    const int nSrcXSize = poWK->nSrcXSize;
7242
7243
    /* -------------------------------------------------------------------- */
7244
    /*      Find out which algorithm to use (small optim.)                  */
7245
    /* -------------------------------------------------------------------- */
7246
7247
    // Only used for GRA_Mode
7248
0
    float *pafCounts = nullptr;
7249
0
    int nBins = 0;
7250
0
    int nBinsOffset = 0;
7251
0
    const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
7252
7253
    // Only used with Q1, Med and Q3
7254
0
    float quant = 0.0f;
7255
7256
    // To control array allocation only when data type is complex
7257
0
    const bool bIsComplex = GDALDataTypeIsComplex(poWK->eWorkingDataType) != 0;
7258
7259
0
    if (poWK->eResample == GRA_Mode)
7260
0
    {
7261
0
        if (poWK->bApplyVerticalShift)
7262
0
        {
7263
0
            return GWKModeComplexType(psJob);
7264
0
        }
7265
7266
0
        switch (poWK->eWorkingDataType)
7267
0
        {
7268
0
            case GDT_Byte:
7269
0
                nBins = 256;
7270
0
                break;
7271
7272
0
            case GDT_Int8:
7273
0
                nBins = 256;
7274
0
                nBinsOffset = nBins / 2;
7275
0
                break;
7276
7277
0
            case GDT_UInt16:
7278
0
                nBins = 65536;
7279
0
                break;
7280
7281
0
            case GDT_Int16:
7282
0
                nBins = 65536;
7283
0
                nBinsOffset = nBins / 2;
7284
0
                break;
7285
7286
0
            case GDT_Int32:
7287
0
                return GWKModeRealType<int32_t>(psJob);
7288
7289
0
            case GDT_UInt32:
7290
0
                return GWKModeRealType<uint32_t>(psJob);
7291
7292
0
            case GDT_Int64:
7293
0
                return GWKModeRealType<int64_t>(psJob);
7294
7295
0
            case GDT_UInt64:
7296
0
                return GWKModeRealType<uint64_t>(psJob);
7297
7298
0
            case GDT_Float16:
7299
0
                return GWKModeRealType<GFloat16>(psJob);
7300
7301
0
            case GDT_Float32:
7302
0
                return GWKModeRealType<float>(psJob);
7303
7304
0
            case GDT_Float64:
7305
0
                return GWKModeRealType<double>(psJob);
7306
7307
0
            case GDT_CInt16:
7308
0
            case GDT_CInt32:
7309
0
            case GDT_CFloat16:
7310
0
            case GDT_CFloat32:
7311
0
            case GDT_CFloat64:
7312
0
                return GWKModeComplexType(psJob);
7313
7314
0
            case GDT_Unknown:
7315
0
            case GDT_TypeCount:
7316
0
                CPLAssert(false);
7317
0
                return;
7318
0
        }
7319
7320
0
        if (nBins)
7321
0
        {
7322
0
            pafCounts =
7323
0
                static_cast<float *>(VSI_MALLOC_VERBOSE(nBins * sizeof(float)));
7324
0
            if (pafCounts == nullptr)
7325
0
                return;
7326
0
        }
7327
0
    }
7328
0
    else if (poWK->eResample == GRA_Med)
7329
0
    {
7330
0
        quant = 0.5f;
7331
0
    }
7332
0
    else if (poWK->eResample == GRA_Q1)
7333
0
    {
7334
0
        quant = 0.25f;
7335
0
    }
7336
0
    else if (poWK->eResample == GRA_Q3)
7337
0
    {
7338
0
        quant = 0.75f;
7339
0
    }
7340
0
    else if (poWK->eResample != GRA_Average && poWK->eResample != GRA_RMS &&
7341
0
             poWK->eResample != GRA_Min && poWK->eResample != GRA_Max)
7342
0
    {
7343
        // Other resample algorithms not permitted here.
7344
0
        CPLError(CE_Fatal, CPLE_AppDefined,
7345
0
                 "GDALWarpKernel():GWKAverageOrModeThread() ERROR, "
7346
0
                 "illegal resample");
7347
0
    }
7348
7349
0
    CPLDebug("GDAL", "GDALWarpKernel():GWKAverageOrModeThread()");
7350
7351
    /* -------------------------------------------------------------------- */
7352
    /*      Allocate x,y,z coordinate arrays for transformation ... two     */
7353
    /*      scanlines worth of positions.                                   */
7354
    /* -------------------------------------------------------------------- */
7355
7356
0
    double *padfX =
7357
0
        static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7358
0
    double *padfY =
7359
0
        static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7360
0
    double *padfZ =
7361
0
        static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7362
0
    double *padfX2 =
7363
0
        static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7364
0
    double *padfY2 =
7365
0
        static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7366
0
    double *padfZ2 =
7367
0
        static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7368
0
    int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7369
0
    int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7370
7371
0
    const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
7372
0
        poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
7373
0
    const double dfErrorThreshold = CPLAtof(
7374
0
        CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
7375
7376
0
    const double dfExcludedValuesThreshold =
7377
0
        CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
7378
0
                                     "EXCLUDED_VALUES_PCT_THRESHOLD", "50")) /
7379
0
        100.0;
7380
0
    const double dfNodataValuesThreshold =
7381
0
        CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
7382
0
                                     "NODATA_VALUES_PCT_THRESHOLD", "100")) /
7383
0
        100.0;
7384
7385
0
    const int nXMargin =
7386
0
        2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
7387
0
    const int nYMargin =
7388
0
        2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
7389
7390
    /* ==================================================================== */
7391
    /*      Loop over output lines.                                         */
7392
    /* ==================================================================== */
7393
0
    for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
7394
0
    {
7395
0
        GWKAverageOrModeComputeLineCoords(
7396
0
            psJob, padfX, padfX2, padfY, padfY2, padfZ, padfZ2, pabSuccess,
7397
0
            pabSuccess2, iDstY, dfSrcCoordPrecision, dfErrorThreshold);
7398
7399
        /* ====================================================================
7400
         */
7401
        /*      Loop over pixels in output scanline. */
7402
        /* ====================================================================
7403
         */
7404
0
        for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
7405
0
        {
7406
0
            GPtrDiff_t iSrcOffset = 0;
7407
0
            double dfDensity = 1.0;
7408
0
            bool bHasFoundDensity = false;
7409
7410
0
            bool bWrapOverX = false;
7411
0
            double dfXMin = 0;
7412
0
            double dfYMin = 0;
7413
0
            double dfXMax = 0;
7414
0
            double dfYMax = 0;
7415
0
            int iSrcXMin = 0;
7416
0
            int iSrcYMin = 0;
7417
0
            int iSrcXMax = 0;
7418
0
            int iSrcYMax = 0;
7419
0
            if (!GWKAverageOrModeComputeSourceCoords(
7420
0
                    psJob, padfX, padfX2, padfY, padfY2, iDstX, iDstY, nXMargin,
7421
0
                    nYMargin, bWrapOverX, dfXMin, dfYMin, dfXMax, dfYMax,
7422
0
                    iSrcXMin, iSrcYMin, iSrcXMax, iSrcYMax))
7423
0
            {
7424
0
                continue;
7425
0
            }
7426
7427
0
            const GPtrDiff_t iDstOffset =
7428
0
                iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
7429
7430
0
            bool bDone = false;
7431
7432
            // Special Average mode where we process all bands together,
7433
            // to avoid averaging tuples that match an entry of m_aadfExcludedValues
7434
0
            constexpr double EPSILON = 1e-10;
7435
0
            if (poWK->eResample == GRA_Average &&
7436
0
                (!poWK->m_aadfExcludedValues.empty() ||
7437
0
                 dfNodataValuesThreshold < 1 - EPSILON) &&
7438
0
                !poWK->bApplyVerticalShift && !bIsComplex)
7439
0
            {
7440
0
                double dfTotalWeightInvalid = 0.0;
7441
0
                double dfTotalWeightExcluded = 0.0;
7442
0
                double dfTotalWeightRegular = 0.0;
7443
0
                std::vector<double> adfValueReal(poWK->nBands, 0);
7444
0
                std::vector<double> adfValueAveraged(poWK->nBands, 0);
7445
0
                std::vector<int> anCountExcludedValues(
7446
0
                    poWK->m_aadfExcludedValues.size(), 0);
7447
7448
0
                for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7449
0
                {
7450
0
                    const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7451
0
                    iSrcOffset =
7452
0
                        iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7453
0
                    for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7454
0
                         iSrcX++, iSrcOffset++)
7455
0
                    {
7456
0
                        if (bWrapOverX)
7457
0
                            iSrcOffset =
7458
0
                                (iSrcX % nSrcXSize) +
7459
0
                                static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7460
7461
0
                        const double dfWeight =
7462
0
                            COMPUTE_WEIGHT(iSrcX, dfWeightY);
7463
0
                        if (dfWeight <= 0)
7464
0
                            continue;
7465
7466
0
                        if (poWK->panUnifiedSrcValid != nullptr &&
7467
0
                            !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
7468
0
                        {
7469
0
                            dfTotalWeightInvalid += dfWeight;
7470
0
                            continue;
7471
0
                        }
7472
7473
0
                        bool bAllValid = true;
7474
0
                        for (int iBand = 0; iBand < poWK->nBands; iBand++)
7475
0
                        {
7476
0
                            double dfBandDensity = 0;
7477
0
                            double dfValueImagTmp = 0;
7478
0
                            if (!(GWKGetPixelValue(
7479
0
                                      poWK, iBand, iSrcOffset, &dfBandDensity,
7480
0
                                      &adfValueReal[iBand], &dfValueImagTmp) &&
7481
0
                                  dfBandDensity > BAND_DENSITY_THRESHOLD))
7482
0
                            {
7483
0
                                bAllValid = false;
7484
0
                                break;
7485
0
                            }
7486
0
                        }
7487
7488
0
                        if (!bAllValid)
7489
0
                        {
7490
0
                            dfTotalWeightInvalid += dfWeight;
7491
0
                            continue;
7492
0
                        }
7493
7494
0
                        bool bExcludedValueFound = false;
7495
0
                        for (size_t i = 0;
7496
0
                             i < poWK->m_aadfExcludedValues.size(); ++i)
7497
0
                        {
7498
0
                            if (poWK->m_aadfExcludedValues[i] == adfValueReal)
7499
0
                            {
7500
0
                                bExcludedValueFound = true;
7501
0
                                ++anCountExcludedValues[i];
7502
0
                                dfTotalWeightExcluded += dfWeight;
7503
0
                                break;
7504
0
                            }
7505
0
                        }
7506
0
                        if (!bExcludedValueFound)
7507
0
                        {
7508
                            // Weighted incremental algorithm mean
7509
                            // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
7510
0
                            dfTotalWeightRegular += dfWeight;
7511
0
                            for (int iBand = 0; iBand < poWK->nBands; iBand++)
7512
0
                            {
7513
0
                                adfValueAveraged[iBand] +=
7514
0
                                    (dfWeight / dfTotalWeightRegular) *
7515
0
                                    (adfValueReal[iBand] -
7516
0
                                     adfValueAveraged[iBand]);
7517
0
                            }
7518
0
                        }
7519
0
                    }
7520
0
                }
7521
7522
0
                const double dfTotalWeight = dfTotalWeightInvalid +
7523
0
                                             dfTotalWeightExcluded +
7524
0
                                             dfTotalWeightRegular;
7525
0
                if (dfTotalWeightInvalid > 0 &&
7526
0
                    dfTotalWeightInvalid >=
7527
0
                        dfNodataValuesThreshold * dfTotalWeight)
7528
0
                {
7529
                    // Do nothing. Let bHasFoundDensity to false.
7530
0
                }
7531
0
                else if (dfTotalWeightExcluded > 0 &&
7532
0
                         dfTotalWeightExcluded >=
7533
0
                             dfExcludedValuesThreshold * dfTotalWeight)
7534
0
                {
7535
                    // Find the most represented excluded value tuple
7536
0
                    size_t iExcludedValue = 0;
7537
0
                    int nExcludedValueCount = 0;
7538
0
                    for (size_t i = 0; i < poWK->m_aadfExcludedValues.size();
7539
0
                         ++i)
7540
0
                    {
7541
0
                        if (anCountExcludedValues[i] > nExcludedValueCount)
7542
0
                        {
7543
0
                            iExcludedValue = i;
7544
0
                            nExcludedValueCount = anCountExcludedValues[i];
7545
0
                        }
7546
0
                    }
7547
7548
0
                    bHasFoundDensity = true;
7549
7550
0
                    for (int iBand = 0; iBand < poWK->nBands; iBand++)
7551
0
                    {
7552
0
                        GWKSetPixelValue(
7553
0
                            poWK, iBand, iDstOffset, /* dfBandDensity = */ 1.0,
7554
0
                            poWK->m_aadfExcludedValues[iExcludedValue][iBand],
7555
0
                            0);
7556
0
                    }
7557
0
                }
7558
0
                else if (dfTotalWeightRegular > 0)
7559
0
                {
7560
0
                    bHasFoundDensity = true;
7561
7562
0
                    for (int iBand = 0; iBand < poWK->nBands; iBand++)
7563
0
                    {
7564
0
                        GWKSetPixelValue(poWK, iBand, iDstOffset,
7565
0
                                         /* dfBandDensity = */ 1.0,
7566
0
                                         adfValueAveraged[iBand], 0);
7567
0
                    }
7568
0
                }
7569
7570
                // Skip below loop on bands
7571
0
                bDone = true;
7572
0
            }
7573
7574
            /* ====================================================================
7575
             */
7576
            /*      Loop processing each band. */
7577
            /* ====================================================================
7578
             */
7579
7580
0
            for (int iBand = 0; !bDone && iBand < poWK->nBands; iBand++)
7581
0
            {
7582
0
                double dfBandDensity = 0.0;
7583
0
                double dfValueReal = 0.0;
7584
0
                double dfValueImag = 0.0;
7585
0
                double dfValueRealTmp = 0.0;
7586
0
                double dfValueImagTmp = 0.0;
7587
7588
                /* --------------------------------------------------------------------
7589
                 */
7590
                /*      Collect the source value. */
7591
                /* --------------------------------------------------------------------
7592
                 */
7593
7594
                // Loop over source lines and pixels - 3 possible algorithms.
7595
7596
0
                if (poWK->eResample == GRA_Average)
7597
0
                {
7598
0
                    double dfTotalWeight = 0.0;
7599
7600
                    // This code adapted from GDALDownsampleChunk32R_AverageT()
7601
                    // in gcore/overview.cpp.
7602
0
                    for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7603
0
                    {
7604
0
                        const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7605
0
                        iSrcOffset = iSrcXMin +
7606
0
                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7607
0
                        for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7608
0
                             iSrcX++, iSrcOffset++)
7609
0
                        {
7610
0
                            if (bWrapOverX)
7611
0
                                iSrcOffset =
7612
0
                                    (iSrcX % nSrcXSize) +
7613
0
                                    static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7614
7615
0
                            if (poWK->panUnifiedSrcValid != nullptr &&
7616
0
                                !CPLMaskGet(poWK->panUnifiedSrcValid,
7617
0
                                            iSrcOffset))
7618
0
                            {
7619
0
                                continue;
7620
0
                            }
7621
7622
0
                            if (GWKGetPixelValue(
7623
0
                                    poWK, iBand, iSrcOffset, &dfBandDensity,
7624
0
                                    &dfValueRealTmp, &dfValueImagTmp) &&
7625
0
                                dfBandDensity > BAND_DENSITY_THRESHOLD)
7626
0
                            {
7627
0
                                const double dfWeight =
7628
0
                                    COMPUTE_WEIGHT(iSrcX, dfWeightY);
7629
0
                                if (dfWeight > 0)
7630
0
                                {
7631
                                    // Weighted incremental algorithm mean
7632
                                    // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
7633
0
                                    dfTotalWeight += dfWeight;
7634
0
                                    dfValueReal +=
7635
0
                                        (dfWeight / dfTotalWeight) *
7636
0
                                        (dfValueRealTmp - dfValueReal);
7637
0
                                    if (bIsComplex)
7638
0
                                    {
7639
0
                                        dfValueImag +=
7640
0
                                            (dfWeight / dfTotalWeight) *
7641
0
                                            (dfValueImagTmp - dfValueImag);
7642
0
                                    }
7643
0
                                }
7644
0
                            }
7645
0
                        }
7646
0
                    }
7647
7648
0
                    if (dfTotalWeight > 0)
7649
0
                    {
7650
0
                        if (poWK->bApplyVerticalShift)
7651
0
                        {
7652
0
                            if (!std::isfinite(padfZ[iDstX]))
7653
0
                                continue;
7654
                            // Subtract padfZ[] since the coordinate
7655
                            // transformation is from target to source
7656
0
                            dfValueReal =
7657
0
                                dfValueReal * poWK->dfMultFactorVerticalShift -
7658
0
                                padfZ[iDstX] *
7659
0
                                    dfMultFactorVerticalShiftPipeline;
7660
0
                        }
7661
7662
0
                        dfBandDensity = 1;
7663
0
                        bHasFoundDensity = true;
7664
0
                    }
7665
0
                }  // GRA_Average.
7666
7667
0
                else if (poWK->eResample == GRA_RMS)
7668
0
                {
7669
0
                    double dfTotalReal = 0.0;
7670
0
                    double dfTotalImag = 0.0;
7671
0
                    double dfTotalWeight = 0.0;
7672
                    // This code adapted from GDALDownsampleChunk32R_AverageT()
7673
                    // in gcore/overview.cpp.
7674
0
                    for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7675
0
                    {
7676
0
                        const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7677
0
                        iSrcOffset = iSrcXMin +
7678
0
                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7679
0
                        for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7680
0
                             iSrcX++, iSrcOffset++)
7681
0
                        {
7682
0
                            if (bWrapOverX)
7683
0
                                iSrcOffset =
7684
0
                                    (iSrcX % nSrcXSize) +
7685
0
                                    static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7686
7687
0
                            if (poWK->panUnifiedSrcValid != nullptr &&
7688
0
                                !CPLMaskGet(poWK->panUnifiedSrcValid,
7689
0
                                            iSrcOffset))
7690
0
                            {
7691
0
                                continue;
7692
0
                            }
7693
7694
0
                            if (GWKGetPixelValue(
7695
0
                                    poWK, iBand, iSrcOffset, &dfBandDensity,
7696
0
                                    &dfValueRealTmp, &dfValueImagTmp) &&
7697
0
                                dfBandDensity > BAND_DENSITY_THRESHOLD)
7698
0
                            {
7699
0
                                const double dfWeight =
7700
0
                                    COMPUTE_WEIGHT(iSrcX, dfWeightY);
7701
0
                                dfTotalWeight += dfWeight;
7702
0
                                dfTotalReal +=
7703
0
                                    dfValueRealTmp * dfValueRealTmp * dfWeight;
7704
0
                                if (bIsComplex)
7705
0
                                    dfTotalImag += dfValueImagTmp *
7706
0
                                                   dfValueImagTmp * dfWeight;
7707
0
                            }
7708
0
                        }
7709
0
                    }
7710
7711
0
                    if (dfTotalWeight > 0)
7712
0
                    {
7713
0
                        dfValueReal = sqrt(dfTotalReal / dfTotalWeight);
7714
7715
0
                        if (poWK->bApplyVerticalShift)
7716
0
                        {
7717
0
                            if (!std::isfinite(padfZ[iDstX]))
7718
0
                                continue;
7719
                            // Subtract padfZ[] since the coordinate
7720
                            // transformation is from target to source
7721
0
                            dfValueReal =
7722
0
                                dfValueReal * poWK->dfMultFactorVerticalShift -
7723
0
                                padfZ[iDstX] *
7724
0
                                    dfMultFactorVerticalShiftPipeline;
7725
0
                        }
7726
7727
0
                        if (bIsComplex)
7728
0
                            dfValueImag = sqrt(dfTotalImag / dfTotalWeight);
7729
7730
0
                        dfBandDensity = 1;
7731
0
                        bHasFoundDensity = true;
7732
0
                    }
7733
0
                }  // GRA_RMS.
7734
7735
0
                else if (poWK->eResample == GRA_Mode)
7736
0
                {
7737
0
                    float fMaxCount = 0.0f;
7738
0
                    int nMode = -1;
7739
0
                    bool bHasSourceValues = false;
7740
7741
0
                    memset(pafCounts, 0, nBins * sizeof(float));
7742
7743
0
                    for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7744
0
                    {
7745
0
                        const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7746
0
                        iSrcOffset = iSrcXMin +
7747
0
                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7748
0
                        for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7749
0
                             iSrcX++, iSrcOffset++)
7750
0
                        {
7751
0
                            if (bWrapOverX)
7752
0
                                iSrcOffset =
7753
0
                                    (iSrcX % nSrcXSize) +
7754
0
                                    static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7755
7756
0
                            if (poWK->panUnifiedSrcValid != nullptr &&
7757
0
                                !CPLMaskGet(poWK->panUnifiedSrcValid,
7758
0
                                            iSrcOffset))
7759
0
                                continue;
7760
7761
0
                            if (GWKGetPixelValue(
7762
0
                                    poWK, iBand, iSrcOffset, &dfBandDensity,
7763
0
                                    &dfValueRealTmp, &dfValueImagTmp) &&
7764
0
                                dfBandDensity > BAND_DENSITY_THRESHOLD)
7765
0
                            {
7766
0
                                bHasSourceValues = true;
7767
0
                                const int nVal =
7768
0
                                    static_cast<int>(dfValueRealTmp);
7769
0
                                const int iBin = nVal + nBinsOffset;
7770
0
                                const double dfWeight =
7771
0
                                    COMPUTE_WEIGHT(iSrcX, dfWeightY);
7772
7773
                                // Sum the density.
7774
0
                                pafCounts[iBin] += static_cast<float>(dfWeight);
7775
                                // Is it the most common value so far?
7776
0
                                bool bUpdateMode = pafCounts[iBin] > fMaxCount;
7777
0
                                if (!bUpdateMode &&
7778
0
                                    pafCounts[iBin] == fMaxCount)
7779
0
                                {
7780
0
                                    switch (eTieStrategy)
7781
0
                                    {
7782
0
                                        case GWKTS_First:
7783
0
                                            break;
7784
0
                                        case GWKTS_Min:
7785
0
                                            bUpdateMode = nVal < nMode;
7786
0
                                            break;
7787
0
                                        case GWKTS_Max:
7788
0
                                            bUpdateMode = nVal > nMode;
7789
0
                                            break;
7790
0
                                    }
7791
0
                                }
7792
0
                                if (bUpdateMode)
7793
0
                                {
7794
0
                                    nMode = nVal;
7795
0
                                    fMaxCount = pafCounts[iBin];
7796
0
                                }
7797
0
                            }
7798
0
                        }
7799
0
                    }
7800
7801
0
                    if (bHasSourceValues)
7802
0
                    {
7803
0
                        dfValueReal = nMode;
7804
0
                        dfBandDensity = 1;
7805
0
                        bHasFoundDensity = true;
7806
0
                    }
7807
0
                }  // GRA_Mode.
7808
7809
0
                else if (poWK->eResample == GRA_Max)
7810
0
                {
7811
0
                    bool bFoundValid = false;
7812
0
                    double dfTotalReal = cpl::NumericLimits<double>::lowest();
7813
                    // This code adapted from nAlgo 1 method, GRA_Average.
7814
0
                    for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7815
0
                    {
7816
0
                        iSrcOffset = iSrcXMin +
7817
0
                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7818
0
                        for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7819
0
                             iSrcX++, iSrcOffset++)
7820
0
                        {
7821
0
                            if (bWrapOverX)
7822
0
                                iSrcOffset =
7823
0
                                    (iSrcX % nSrcXSize) +
7824
0
                                    static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7825
7826
0
                            if (poWK->panUnifiedSrcValid != nullptr &&
7827
0
                                !CPLMaskGet(poWK->panUnifiedSrcValid,
7828
0
                                            iSrcOffset))
7829
0
                            {
7830
0
                                continue;
7831
0
                            }
7832
7833
                            // Returns pixel value if it is not no data.
7834
0
                            if (GWKGetPixelValue(
7835
0
                                    poWK, iBand, iSrcOffset, &dfBandDensity,
7836
0
                                    &dfValueRealTmp, &dfValueImagTmp) &&
7837
0
                                dfBandDensity > BAND_DENSITY_THRESHOLD)
7838
0
                            {
7839
0
                                bFoundValid = true;
7840
0
                                if (dfTotalReal < dfValueRealTmp)
7841
0
                                {
7842
0
                                    dfTotalReal = dfValueRealTmp;
7843
0
                                }
7844
0
                            }
7845
0
                        }
7846
0
                    }
7847
7848
0
                    if (bFoundValid)
7849
0
                    {
7850
0
                        dfValueReal = dfTotalReal;
7851
7852
0
                        if (poWK->bApplyVerticalShift)
7853
0
                        {
7854
0
                            if (!std::isfinite(padfZ[iDstX]))
7855
0
                                continue;
7856
                            // Subtract padfZ[] since the coordinate
7857
                            // transformation is from target to source
7858
0
                            dfValueReal =
7859
0
                                dfValueReal * poWK->dfMultFactorVerticalShift -
7860
0
                                padfZ[iDstX] *
7861
0
                                    dfMultFactorVerticalShiftPipeline;
7862
0
                        }
7863
7864
0
                        dfBandDensity = 1;
7865
0
                        bHasFoundDensity = true;
7866
0
                    }
7867
0
                }
7868
7869
0
                else if (poWK->eResample == GRA_Min)
7870
0
                {
7871
0
                    bool bFoundValid = false;
7872
0
                    double dfTotalReal = cpl::NumericLimits<double>::max();
7873
                    // This code adapted from nAlgo 1 method, GRA_Average.
7874
0
                    for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7875
0
                    {
7876
0
                        iSrcOffset = iSrcXMin +
7877
0
                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7878
0
                        for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7879
0
                             iSrcX++, iSrcOffset++)
7880
0
                        {
7881
0
                            if (bWrapOverX)
7882
0
                                iSrcOffset =
7883
0
                                    (iSrcX % nSrcXSize) +
7884
0
                                    static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7885
7886
0
                            if (poWK->panUnifiedSrcValid != nullptr &&
7887
0
                                !CPLMaskGet(poWK->panUnifiedSrcValid,
7888
0
                                            iSrcOffset))
7889
0
                            {
7890
0
                                continue;
7891
0
                            }
7892
7893
                            // Returns pixel value if it is not no data.
7894
0
                            if (GWKGetPixelValue(
7895
0
                                    poWK, iBand, iSrcOffset, &dfBandDensity,
7896
0
                                    &dfValueRealTmp, &dfValueImagTmp) &&
7897
0
                                dfBandDensity > BAND_DENSITY_THRESHOLD)
7898
0
                            {
7899
0
                                bFoundValid = true;
7900
0
                                if (dfTotalReal > dfValueRealTmp)
7901
0
                                {
7902
0
                                    dfTotalReal = dfValueRealTmp;
7903
0
                                }
7904
0
                            }
7905
0
                        }
7906
0
                    }
7907
7908
0
                    if (bFoundValid)
7909
0
                    {
7910
0
                        dfValueReal = dfTotalReal;
7911
7912
0
                        if (poWK->bApplyVerticalShift)
7913
0
                        {
7914
0
                            if (!std::isfinite(padfZ[iDstX]))
7915
0
                                continue;
7916
                            // Subtract padfZ[] since the coordinate
7917
                            // transformation is from target to source
7918
0
                            dfValueReal =
7919
0
                                dfValueReal * poWK->dfMultFactorVerticalShift -
7920
0
                                padfZ[iDstX] *
7921
0
                                    dfMultFactorVerticalShiftPipeline;
7922
0
                        }
7923
7924
0
                        dfBandDensity = 1;
7925
0
                        bHasFoundDensity = true;
7926
0
                    }
7927
0
                }  // GRA_Min.
7928
7929
0
                else
7930
                // poWK->eResample == GRA_Med | GRA_Q1 | GRA_Q3.
7931
0
                {
7932
0
                    CPLAssert(quant > 0.0f);
7933
7934
0
                    bool bFoundValid = false;
7935
0
                    std::vector<double> dfRealValuesTmp;
7936
7937
                    // This code adapted from nAlgo 1 method, GRA_Average.
7938
0
                    for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7939
0
                    {
7940
0
                        iSrcOffset = iSrcXMin +
7941
0
                                     static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7942
0
                        for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7943
0
                             iSrcX++, iSrcOffset++)
7944
0
                        {
7945
0
                            if (bWrapOverX)
7946
0
                                iSrcOffset =
7947
0
                                    (iSrcX % nSrcXSize) +
7948
0
                                    static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7949
7950
0
                            if (poWK->panUnifiedSrcValid != nullptr &&
7951
0
                                !CPLMaskGet(poWK->panUnifiedSrcValid,
7952
0
                                            iSrcOffset))
7953
0
                            {
7954
0
                                continue;
7955
0
                            }
7956
7957
                            // Returns pixel value if it is not no data.
7958
0
                            if (GWKGetPixelValue(
7959
0
                                    poWK, iBand, iSrcOffset, &dfBandDensity,
7960
0
                                    &dfValueRealTmp, &dfValueImagTmp) &&
7961
0
                                dfBandDensity > BAND_DENSITY_THRESHOLD)
7962
0
                            {
7963
0
                                bFoundValid = true;
7964
0
                                dfRealValuesTmp.push_back(dfValueRealTmp);
7965
0
                            }
7966
0
                        }
7967
0
                    }
7968
7969
0
                    if (bFoundValid)
7970
0
                    {
7971
0
                        std::sort(dfRealValuesTmp.begin(),
7972
0
                                  dfRealValuesTmp.end());
7973
0
                        int quantIdx = static_cast<int>(
7974
0
                            std::ceil(quant * dfRealValuesTmp.size() - 1));
7975
0
                        dfValueReal = dfRealValuesTmp[quantIdx];
7976
7977
0
                        if (poWK->bApplyVerticalShift)
7978
0
                        {
7979
0
                            if (!std::isfinite(padfZ[iDstX]))
7980
0
                                continue;
7981
                            // Subtract padfZ[] since the coordinate
7982
                            // transformation is from target to source
7983
0
                            dfValueReal =
7984
0
                                dfValueReal * poWK->dfMultFactorVerticalShift -
7985
0
                                padfZ[iDstX] *
7986
0
                                    dfMultFactorVerticalShiftPipeline;
7987
0
                        }
7988
7989
0
                        dfBandDensity = 1;
7990
0
                        bHasFoundDensity = true;
7991
0
                        dfRealValuesTmp.clear();
7992
0
                    }
7993
0
                }  // Quantile.
7994
7995
                /* --------------------------------------------------------------------
7996
                 */
7997
                /*      We have a computed value from the source.  Now apply it
7998
                 * to      */
7999
                /*      the destination pixel. */
8000
                /* --------------------------------------------------------------------
8001
                 */
8002
0
                if (bHasFoundDensity)
8003
0
                {
8004
                    // TODO: Should we compute dfBandDensity in fct of
8005
                    // nCount/nCount2, or use as a threshold to set the dest
8006
                    // value?
8007
                    // dfBandDensity = (float) nCount / nCount2;
8008
                    // if( (float) nCount / nCount2 > 0.1 )
8009
                    // or fix gdalwarp crop_to_cutline to crop partially
8010
                    // overlapping pixels.
8011
0
                    GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
8012
0
                                     dfValueReal, dfValueImag);
8013
0
                }
8014
0
            }
8015
8016
0
            if (!bHasFoundDensity)
8017
0
                continue;
8018
8019
            /* --------------------------------------------------------------------
8020
             */
8021
            /*      Update destination density/validity masks. */
8022
            /* --------------------------------------------------------------------
8023
             */
8024
0
            GWKOverlayDensity(poWK, iDstOffset, dfDensity);
8025
8026
0
            if (poWK->panDstValid != nullptr)
8027
0
            {
8028
0
                CPLMaskSet(poWK->panDstValid, iDstOffset);
8029
0
            }
8030
0
        } /* Next iDstX */
8031
8032
        /* --------------------------------------------------------------------
8033
         */
8034
        /*      Report progress to the user, and optionally cancel out. */
8035
        /* --------------------------------------------------------------------
8036
         */
8037
0
        if (psJob->pfnProgress && psJob->pfnProgress(psJob))
8038
0
            break;
8039
0
    }
8040
8041
    /* -------------------------------------------------------------------- */
8042
    /*      Cleanup and return.                                             */
8043
    /* -------------------------------------------------------------------- */
8044
0
    CPLFree(padfX);
8045
0
    CPLFree(padfY);
8046
0
    CPLFree(padfZ);
8047
0
    CPLFree(padfX2);
8048
0
    CPLFree(padfY2);
8049
0
    CPLFree(padfZ2);
8050
0
    CPLFree(pabSuccess);
8051
0
    CPLFree(pabSuccess2);
8052
0
    VSIFree(pafCounts);
8053
0
}
8054
8055
/************************************************************************/
8056
/*                         getOrientation()                             */
8057
/************************************************************************/
8058
8059
typedef std::pair<double, double> XYPair;
8060
8061
// Returns 1 whether (p1,p2,p3) is clockwise oriented,
8062
// -1 if it is counter-clockwise oriented,
8063
// or 0 if it is colinear.
8064
static int getOrientation(const XYPair &p1, const XYPair &p2, const XYPair &p3)
8065
0
{
8066
0
    const double p1x = p1.first;
8067
0
    const double p1y = p1.second;
8068
0
    const double p2x = p2.first;
8069
0
    const double p2y = p2.second;
8070
0
    const double p3x = p3.first;
8071
0
    const double p3y = p3.second;
8072
0
    const double val = (p2y - p1y) * (p3x - p2x) - (p2x - p1x) * (p3y - p2y);
8073
0
    if (std::abs(val) < 1e-20)
8074
0
        return 0;
8075
0
    else if (val > 0)
8076
0
        return 1;
8077
0
    else
8078
0
        return -1;
8079
0
}
8080
8081
/************************************************************************/
8082
/*                          isConvex()                                  */
8083
/************************************************************************/
8084
8085
typedef std::vector<XYPair> XYPoly;
8086
8087
// poly must be closed
8088
static bool isConvex(const XYPoly &poly)
8089
0
{
8090
0
    const size_t n = poly.size();
8091
0
    size_t i = 0;
8092
0
    int last_orientation = getOrientation(poly[i], poly[i + 1], poly[i + 2]);
8093
0
    ++i;
8094
0
    for (; i < n - 2; ++i)
8095
0
    {
8096
0
        const int orientation =
8097
0
            getOrientation(poly[i], poly[i + 1], poly[i + 2]);
8098
0
        if (orientation != 0)
8099
0
        {
8100
0
            if (last_orientation == 0)
8101
0
                last_orientation = orientation;
8102
0
            else if (orientation != last_orientation)
8103
0
                return false;
8104
0
        }
8105
0
    }
8106
0
    return true;
8107
0
}
8108
8109
/************************************************************************/
8110
/*                     pointIntersectsConvexPoly()                      */
8111
/************************************************************************/
8112
8113
// Returns whether xy intersects poly, that must be closed and convex.
8114
static bool pointIntersectsConvexPoly(const XYPair &xy, const XYPoly &poly)
8115
0
{
8116
0
    const size_t n = poly.size();
8117
0
    double dx1 = xy.first - poly[0].first;
8118
0
    double dy1 = xy.second - poly[0].second;
8119
0
    double dx2 = poly[1].first - poly[0].first;
8120
0
    double dy2 = poly[1].second - poly[0].second;
8121
0
    double prevCrossProduct = dx1 * dy2 - dx2 * dy1;
8122
8123
    // Check if the point remains on the same side (left/right) of all edges
8124
0
    for (size_t i = 2; i < n; i++)
8125
0
    {
8126
0
        dx1 = xy.first - poly[i - 1].first;
8127
0
        dy1 = xy.second - poly[i - 1].second;
8128
8129
0
        dx2 = poly[i].first - poly[i - 1].first;
8130
0
        dy2 = poly[i].second - poly[i - 1].second;
8131
8132
0
        double crossProduct = dx1 * dy2 - dx2 * dy1;
8133
0
        if (std::abs(prevCrossProduct) < 1e-20)
8134
0
            prevCrossProduct = crossProduct;
8135
0
        else if (prevCrossProduct * crossProduct < 0)
8136
0
            return false;
8137
0
    }
8138
8139
0
    return true;
8140
0
}
8141
8142
/************************************************************************/
8143
/*                     getIntersection()                                */
8144
/************************************************************************/
8145
8146
/* Returns intersection of [p1,p2] with [p3,p4], if
8147
 * it is a single point, and the 2 segments are not colinear.
8148
 */
8149
static bool getIntersection(const XYPair &p1, const XYPair &p2,
8150
                            const XYPair &p3, const XYPair &p4, XYPair &xy)
8151
0
{
8152
0
    const double x1 = p1.first;
8153
0
    const double y1 = p1.second;
8154
0
    const double x2 = p2.first;
8155
0
    const double y2 = p2.second;
8156
0
    const double x3 = p3.first;
8157
0
    const double y3 = p3.second;
8158
0
    const double x4 = p4.first;
8159
0
    const double y4 = p4.second;
8160
0
    const double t_num = (x1 - x3) * (y3 - y4) - (y1 - y3) * (x3 - x4);
8161
0
    const double denom = (x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4);
8162
0
    if (t_num * denom < 0 || std::abs(t_num) > std::abs(denom) || denom == 0)
8163
0
        return false;
8164
8165
0
    const double u_num = (x1 - x3) * (y1 - y2) - (y1 - y3) * (x1 - x2);
8166
0
    if (u_num * denom < 0 || std::abs(u_num) > std::abs(denom))
8167
0
        return false;
8168
8169
0
    const double t = t_num / denom;
8170
0
    xy.first = x1 + t * (x2 - x1);
8171
0
    xy.second = y1 + t * (y2 - y1);
8172
0
    return true;
8173
0
}
8174
8175
/************************************************************************/
8176
/*                     getConvexPolyIntersection()                      */
8177
/************************************************************************/
8178
8179
// poly1 and poly2 must be closed and convex.
8180
// The returned intersection will not necessary be closed.
8181
static void getConvexPolyIntersection(const XYPoly &poly1, const XYPoly &poly2,
8182
                                      XYPoly &intersection)
8183
0
{
8184
0
    intersection.clear();
8185
8186
    // Add all points of poly1 inside poly2
8187
0
    for (size_t i = 0; i < poly1.size() - 1; ++i)
8188
0
    {
8189
0
        if (pointIntersectsConvexPoly(poly1[i], poly2))
8190
0
            intersection.push_back(poly1[i]);
8191
0
    }
8192
0
    if (intersection.size() == poly1.size() - 1)
8193
0
    {
8194
        // poly1 is inside poly2
8195
0
        return;
8196
0
    }
8197
8198
    // Add all points of poly2 inside poly1
8199
0
    for (size_t i = 0; i < poly2.size() - 1; ++i)
8200
0
    {
8201
0
        if (pointIntersectsConvexPoly(poly2[i], poly1))
8202
0
            intersection.push_back(poly2[i]);
8203
0
    }
8204
8205
    // Compute the intersection of all edges of both polygons
8206
0
    XYPair xy;
8207
0
    for (size_t i1 = 0; i1 < poly1.size() - 1; ++i1)
8208
0
    {
8209
0
        for (size_t i2 = 0; i2 < poly2.size() - 1; ++i2)
8210
0
        {
8211
0
            if (getIntersection(poly1[i1], poly1[i1 + 1], poly2[i2],
8212
0
                                poly2[i2 + 1], xy))
8213
0
            {
8214
0
                intersection.push_back(xy);
8215
0
            }
8216
0
        }
8217
0
    }
8218
8219
0
    if (intersection.empty())
8220
0
        return;
8221
8222
    // Find lowest-left point in intersection set
8223
0
    double lowest_x = cpl::NumericLimits<double>::max();
8224
0
    double lowest_y = cpl::NumericLimits<double>::max();
8225
0
    for (const auto &pair : intersection)
8226
0
    {
8227
0
        const double x = pair.first;
8228
0
        const double y = pair.second;
8229
0
        if (y < lowest_y || (y == lowest_y && x < lowest_x))
8230
0
        {
8231
0
            lowest_x = x;
8232
0
            lowest_y = y;
8233
0
        }
8234
0
    }
8235
8236
0
    const auto sortFunc = [&](const XYPair &p1, const XYPair &p2)
8237
0
    {
8238
0
        const double p1x_diff = p1.first - lowest_x;
8239
0
        const double p1y_diff = p1.second - lowest_y;
8240
0
        const double p2x_diff = p2.first - lowest_x;
8241
0
        const double p2y_diff = p2.second - lowest_y;
8242
0
        if (p2y_diff == 0.0 && p1y_diff == 0.0)
8243
0
        {
8244
0
            if (p1x_diff >= 0)
8245
0
            {
8246
0
                if (p2x_diff >= 0)
8247
0
                    return p1.first < p2.first;
8248
0
                return true;
8249
0
            }
8250
0
            else
8251
0
            {
8252
0
                if (p2x_diff >= 0)
8253
0
                    return false;
8254
0
                return p1.first < p2.first;
8255
0
            }
8256
0
        }
8257
8258
0
        if (p2x_diff == 0.0 && p1x_diff == 0.0)
8259
0
            return p1.second < p2.second;
8260
8261
0
        double tan_p1;
8262
0
        if (p1x_diff == 0.0)
8263
0
            tan_p1 = p1y_diff == 0.0 ? 0.0 : cpl::NumericLimits<double>::max();
8264
0
        else
8265
0
            tan_p1 = p1y_diff / p1x_diff;
8266
8267
0
        double tan_p2;
8268
0
        if (p2x_diff == 0.0)
8269
0
            tan_p2 = p2y_diff == 0.0 ? 0.0 : cpl::NumericLimits<double>::max();
8270
0
        else
8271
0
            tan_p2 = p2y_diff / p2x_diff;
8272
8273
0
        if (tan_p1 >= 0)
8274
0
        {
8275
0
            if (tan_p2 >= 0)
8276
0
                return tan_p1 < tan_p2;
8277
0
            else
8278
0
                return true;
8279
0
        }
8280
0
        else
8281
0
        {
8282
0
            if (tan_p2 >= 0)
8283
0
                return false;
8284
0
            else
8285
0
                return tan_p1 < tan_p2;
8286
0
        }
8287
0
    };
8288
8289
    // Sort points by increasing atan2(y-lowest_y, x-lowest_x) to form a convex
8290
    // hull
8291
0
    std::sort(intersection.begin(), intersection.end(), sortFunc);
8292
8293
    // Remove duplicated points
8294
0
    size_t j = 1;
8295
0
    for (size_t i = 1; i < intersection.size(); ++i)
8296
0
    {
8297
0
        if (intersection[i] != intersection[i - 1])
8298
0
        {
8299
0
            if (j < i)
8300
0
                intersection[j] = intersection[i];
8301
0
            ++j;
8302
0
        }
8303
0
    }
8304
0
    intersection.resize(j);
8305
0
}
8306
8307
/************************************************************************/
8308
/*                            getArea()                                 */
8309
/************************************************************************/
8310
8311
// poly may or may not be closed.
8312
static double getArea(const XYPoly &poly)
8313
0
{
8314
    // CPLAssert(poly.size() >= 2);
8315
0
    const size_t nPointCount = poly.size();
8316
0
    double dfAreaSum =
8317
0
        poly[0].first * (poly[1].second - poly[nPointCount - 1].second);
8318
8319
0
    for (size_t i = 1; i < nPointCount - 1; i++)
8320
0
    {
8321
0
        dfAreaSum += poly[i].first * (poly[i + 1].second - poly[i - 1].second);
8322
0
    }
8323
8324
0
    dfAreaSum += poly[nPointCount - 1].first *
8325
0
                 (poly[0].second - poly[nPointCount - 2].second);
8326
8327
0
    return 0.5 * std::fabs(dfAreaSum);
8328
0
}
8329
8330
/************************************************************************/
8331
/*                           GWKSumPreserving()                         */
8332
/************************************************************************/
8333
8334
static void GWKSumPreservingThread(void *pData);
8335
8336
static CPLErr GWKSumPreserving(GDALWarpKernel *poWK)
8337
0
{
8338
0
    return GWKRun(poWK, "GWKSumPreserving", GWKSumPreservingThread);
8339
0
}
8340
8341
static void GWKSumPreservingThread(void *pData)
8342
0
{
8343
0
    GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
8344
0
    GDALWarpKernel *poWK = psJob->poWK;
8345
0
    const int iYMin = psJob->iYMin;
8346
0
    const int iYMax = psJob->iYMax;
8347
0
    const bool bIsAffineNoRotation =
8348
0
        GDALTransformIsAffineNoRotation(poWK->pfnTransformer,
8349
0
                                        poWK->pTransformerArg) &&
8350
        // for debug/testing purposes
8351
0
        CPLTestBool(
8352
0
            CPLGetConfigOption("GDAL_WARP_USE_AFFINE_OPTIMIZATION", "YES"));
8353
8354
0
    const int nDstXSize = poWK->nDstXSize;
8355
0
    const int nSrcXSize = poWK->nSrcXSize;
8356
0
    const int nSrcYSize = poWK->nSrcYSize;
8357
8358
0
    std::vector<double> adfX0(nSrcXSize + 1);
8359
0
    std::vector<double> adfY0(nSrcXSize + 1);
8360
0
    std::vector<double> adfZ0(nSrcXSize + 1);
8361
0
    std::vector<double> adfX1(nSrcXSize + 1);
8362
0
    std::vector<double> adfY1(nSrcXSize + 1);
8363
0
    std::vector<double> adfZ1(nSrcXSize + 1);
8364
0
    std::vector<int> abSuccess0(nSrcXSize + 1);
8365
0
    std::vector<int> abSuccess1(nSrcXSize + 1);
8366
8367
0
    CPLRectObj sGlobalBounds;
8368
0
    sGlobalBounds.minx = -2 * poWK->dfXScale;
8369
0
    sGlobalBounds.miny = iYMin - 2 * poWK->dfYScale;
8370
0
    sGlobalBounds.maxx = nDstXSize + 2 * poWK->dfXScale;
8371
0
    sGlobalBounds.maxy = iYMax + 2 * poWK->dfYScale;
8372
0
    CPLQuadTree *hQuadTree = CPLQuadTreeCreate(&sGlobalBounds, nullptr);
8373
8374
0
    struct SourcePixel
8375
0
    {
8376
0
        int iSrcX;
8377
0
        int iSrcY;
8378
8379
        // Coordinates of source pixel in target pixel coordinates
8380
0
        double dfDstX0;
8381
0
        double dfDstY0;
8382
0
        double dfDstX1;
8383
0
        double dfDstY1;
8384
0
        double dfDstX2;
8385
0
        double dfDstY2;
8386
0
        double dfDstX3;
8387
0
        double dfDstY3;
8388
8389
        // Source pixel total area (might be larger than the one described
8390
        // by above coordinates, if the pixel was crossing the antimeridian
8391
        // and split)
8392
0
        double dfArea;
8393
0
    };
8394
8395
0
    std::vector<SourcePixel> sourcePixels;
8396
8397
0
    XYPoly discontinuityLeft(5);
8398
0
    XYPoly discontinuityRight(5);
8399
8400
    /* ==================================================================== */
8401
    /*      First pass: transform the 4 corners of each potential           */
8402
    /*      contributing source pixel to target pixel coordinates.          */
8403
    /* ==================================================================== */
8404
8405
    // Special case for top line
8406
0
    {
8407
0
        int iY = 0;
8408
0
        for (int iX = 0; iX <= nSrcXSize; ++iX)
8409
0
        {
8410
0
            adfX1[iX] = iX + poWK->nSrcXOff;
8411
0
            adfY1[iX] = iY + poWK->nSrcYOff;
8412
0
            adfZ1[iX] = 0;
8413
0
        }
8414
8415
0
        poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
8416
0
                             adfX1.data(), adfY1.data(), adfZ1.data(),
8417
0
                             abSuccess1.data());
8418
8419
0
        for (int iX = 0; iX <= nSrcXSize; ++iX)
8420
0
        {
8421
0
            if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
8422
0
                abSuccess1[iX] = FALSE;
8423
0
            else
8424
0
            {
8425
0
                adfX1[iX] -= poWK->nDstXOff;
8426
0
                adfY1[iX] -= poWK->nDstYOff;
8427
0
            }
8428
0
        }
8429
0
    }
8430
8431
0
    const auto getInsideXSign = [poWK, nDstXSize](double dfX)
8432
0
    {
8433
0
        return dfX - poWK->nDstXOff >= -2 * poWK->dfXScale &&
8434
0
                       dfX - poWK->nDstXOff <= nDstXSize + 2 * poWK->dfXScale
8435
0
                   ? 1
8436
0
                   : -1;
8437
0
    };
8438
8439
0
    const auto FindDiscontinuity =
8440
0
        [poWK, psJob, getInsideXSign](
8441
0
            double dfXLeft, double dfXRight, double dfY,
8442
0
            int XLeftReprojectedInsideSign, double &dfXMidReprojectedLeft,
8443
0
            double &dfXMidReprojectedRight, double &dfYMidReprojected)
8444
0
    {
8445
0
        for (int i = 0; i < 10 && dfXRight - dfXLeft > 1e-8; ++i)
8446
0
        {
8447
0
            double dfXMid = (dfXLeft + dfXRight) / 2;
8448
0
            double dfXMidReprojected = dfXMid;
8449
0
            dfYMidReprojected = dfY;
8450
0
            double dfZ = 0;
8451
0
            int nSuccess = 0;
8452
0
            poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1,
8453
0
                                 &dfXMidReprojected, &dfYMidReprojected, &dfZ,
8454
0
                                 &nSuccess);
8455
0
            if (XLeftReprojectedInsideSign != getInsideXSign(dfXMidReprojected))
8456
0
            {
8457
0
                dfXRight = dfXMid;
8458
0
                dfXMidReprojectedRight = dfXMidReprojected;
8459
0
            }
8460
0
            else
8461
0
            {
8462
0
                dfXLeft = dfXMid;
8463
0
                dfXMidReprojectedLeft = dfXMidReprojected;
8464
0
            }
8465
0
        }
8466
0
    };
8467
8468
0
    for (int iY = 0; iY < nSrcYSize; ++iY)
8469
0
    {
8470
0
        std::swap(adfX0, adfX1);
8471
0
        std::swap(adfY0, adfY1);
8472
0
        std::swap(adfZ0, adfZ1);
8473
0
        std::swap(abSuccess0, abSuccess1);
8474
8475
0
        for (int iX = 0; iX <= nSrcXSize; ++iX)
8476
0
        {
8477
0
            adfX1[iX] = iX + poWK->nSrcXOff;
8478
0
            adfY1[iX] = iY + 1 + poWK->nSrcYOff;
8479
0
            adfZ1[iX] = 0;
8480
0
        }
8481
8482
0
        poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
8483
0
                             adfX1.data(), adfY1.data(), adfZ1.data(),
8484
0
                             abSuccess1.data());
8485
8486
0
        for (int iX = 0; iX <= nSrcXSize; ++iX)
8487
0
        {
8488
0
            if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
8489
0
                abSuccess1[iX] = FALSE;
8490
0
            else
8491
0
            {
8492
0
                adfX1[iX] -= poWK->nDstXOff;
8493
0
                adfY1[iX] -= poWK->nDstYOff;
8494
0
            }
8495
0
        }
8496
8497
0
        for (int iX = 0; iX < nSrcXSize; ++iX)
8498
0
        {
8499
0
            if (abSuccess0[iX] && abSuccess0[iX + 1] && abSuccess1[iX] &&
8500
0
                abSuccess1[iX + 1])
8501
0
            {
8502
                /* --------------------------------------------------------------------
8503
                 */
8504
                /*      Do not try to apply transparent source pixels to the
8505
                 * destination.*/
8506
                /* --------------------------------------------------------------------
8507
                 */
8508
0
                const auto iSrcOffset =
8509
0
                    iX + static_cast<GPtrDiff_t>(iY) * nSrcXSize;
8510
0
                if (poWK->panUnifiedSrcValid != nullptr &&
8511
0
                    !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
8512
0
                {
8513
0
                    continue;
8514
0
                }
8515
8516
0
                if (poWK->pafUnifiedSrcDensity != nullptr)
8517
0
                {
8518
0
                    if (poWK->pafUnifiedSrcDensity[iSrcOffset] <
8519
0
                        SRC_DENSITY_THRESHOLD_FLOAT)
8520
0
                        continue;
8521
0
                }
8522
8523
0
                SourcePixel sp;
8524
0
                sp.dfArea = 0;
8525
0
                sp.dfDstX0 = adfX0[iX];
8526
0
                sp.dfDstY0 = adfY0[iX];
8527
0
                sp.dfDstX1 = adfX0[iX + 1];
8528
0
                sp.dfDstY1 = adfY0[iX + 1];
8529
0
                sp.dfDstX2 = adfX1[iX + 1];
8530
0
                sp.dfDstY2 = adfY1[iX + 1];
8531
0
                sp.dfDstX3 = adfX1[iX];
8532
0
                sp.dfDstY3 = adfY1[iX];
8533
8534
                // Detect pixel that likely cross the anti-meridian and
8535
                // introduce a discontinuity when reprojected.
8536
8537
0
                if (getInsideXSign(adfX0[iX]) !=
8538
0
                        getInsideXSign(adfX0[iX + 1]) &&
8539
0
                    getInsideXSign(adfX0[iX]) == getInsideXSign(adfX1[iX]) &&
8540
0
                    getInsideXSign(adfX0[iX + 1]) ==
8541
0
                        getInsideXSign(adfX1[iX + 1]) &&
8542
0
                    (adfY1[iX] - adfY0[iX]) * (adfY1[iX + 1] - adfY0[iX + 1]) >
8543
0
                        0)
8544
0
                {
8545
0
                    double dfXMidReprojectedLeftTop = 0;
8546
0
                    double dfXMidReprojectedRightTop = 0;
8547
0
                    double dfYMidReprojectedTop = 0;
8548
0
                    FindDiscontinuity(
8549
0
                        iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
8550
0
                        iY + poWK->nSrcYOff, getInsideXSign(adfX0[iX]),
8551
0
                        dfXMidReprojectedLeftTop, dfXMidReprojectedRightTop,
8552
0
                        dfYMidReprojectedTop);
8553
0
                    double dfXMidReprojectedLeftBottom = 0;
8554
0
                    double dfXMidReprojectedRightBottom = 0;
8555
0
                    double dfYMidReprojectedBottom = 0;
8556
0
                    FindDiscontinuity(
8557
0
                        iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
8558
0
                        iY + poWK->nSrcYOff + 1, getInsideXSign(adfX1[iX]),
8559
0
                        dfXMidReprojectedLeftBottom,
8560
0
                        dfXMidReprojectedRightBottom, dfYMidReprojectedBottom);
8561
8562
0
                    discontinuityLeft[0] = XYPair(adfX0[iX], adfY0[iX]);
8563
0
                    discontinuityLeft[1] =
8564
0
                        XYPair(dfXMidReprojectedLeftTop, dfYMidReprojectedTop);
8565
0
                    discontinuityLeft[2] = XYPair(dfXMidReprojectedLeftBottom,
8566
0
                                                  dfYMidReprojectedBottom);
8567
0
                    discontinuityLeft[3] = XYPair(adfX1[iX], adfY1[iX]);
8568
0
                    discontinuityLeft[4] = XYPair(adfX0[iX], adfY0[iX]);
8569
8570
0
                    discontinuityRight[0] =
8571
0
                        XYPair(adfX0[iX + 1], adfY0[iX + 1]);
8572
0
                    discontinuityRight[1] =
8573
0
                        XYPair(dfXMidReprojectedRightTop, dfYMidReprojectedTop);
8574
0
                    discontinuityRight[2] = XYPair(dfXMidReprojectedRightBottom,
8575
0
                                                   dfYMidReprojectedBottom);
8576
0
                    discontinuityRight[3] =
8577
0
                        XYPair(adfX1[iX + 1], adfY1[iX + 1]);
8578
0
                    discontinuityRight[4] =
8579
0
                        XYPair(adfX0[iX + 1], adfY0[iX + 1]);
8580
8581
0
                    sp.dfArea = getArea(discontinuityLeft) +
8582
0
                                getArea(discontinuityRight);
8583
0
                    if (getInsideXSign(adfX0[iX]) >= 1)
8584
0
                    {
8585
0
                        sp.dfDstX1 = dfXMidReprojectedLeftTop;
8586
0
                        sp.dfDstY1 = dfYMidReprojectedTop;
8587
0
                        sp.dfDstX2 = dfXMidReprojectedLeftBottom;
8588
0
                        sp.dfDstY2 = dfYMidReprojectedBottom;
8589
0
                    }
8590
0
                    else
8591
0
                    {
8592
0
                        sp.dfDstX0 = dfXMidReprojectedRightTop;
8593
0
                        sp.dfDstY0 = dfYMidReprojectedTop;
8594
0
                        sp.dfDstX3 = dfXMidReprojectedRightBottom;
8595
0
                        sp.dfDstY3 = dfYMidReprojectedBottom;
8596
0
                    }
8597
0
                }
8598
8599
                // Bounding box of source pixel (expressed in target pixel
8600
                // coordinates)
8601
0
                CPLRectObj sRect;
8602
0
                sRect.minx = std::min(std::min(sp.dfDstX0, sp.dfDstX1),
8603
0
                                      std::min(sp.dfDstX2, sp.dfDstX3));
8604
0
                sRect.miny = std::min(std::min(sp.dfDstY0, sp.dfDstY1),
8605
0
                                      std::min(sp.dfDstY2, sp.dfDstY3));
8606
0
                sRect.maxx = std::max(std::max(sp.dfDstX0, sp.dfDstX1),
8607
0
                                      std::max(sp.dfDstX2, sp.dfDstX3));
8608
0
                sRect.maxy = std::max(std::max(sp.dfDstY0, sp.dfDstY1),
8609
0
                                      std::max(sp.dfDstY2, sp.dfDstY3));
8610
0
                if (!(sRect.minx < nDstXSize && sRect.maxx > 0 &&
8611
0
                      sRect.miny < iYMax && sRect.maxy > iYMin))
8612
0
                {
8613
0
                    continue;
8614
0
                }
8615
8616
0
                sp.iSrcX = iX;
8617
0
                sp.iSrcY = iY;
8618
8619
0
                if (!bIsAffineNoRotation)
8620
0
                {
8621
                    // Check polygon validity (no self-crossing)
8622
0
                    XYPair xy;
8623
0
                    if (getIntersection(XYPair(sp.dfDstX0, sp.dfDstY0),
8624
0
                                        XYPair(sp.dfDstX1, sp.dfDstY1),
8625
0
                                        XYPair(sp.dfDstX2, sp.dfDstY2),
8626
0
                                        XYPair(sp.dfDstX3, sp.dfDstY3), xy) ||
8627
0
                        getIntersection(XYPair(sp.dfDstX1, sp.dfDstY1),
8628
0
                                        XYPair(sp.dfDstX2, sp.dfDstY2),
8629
0
                                        XYPair(sp.dfDstX0, sp.dfDstY0),
8630
0
                                        XYPair(sp.dfDstX3, sp.dfDstY3), xy))
8631
0
                    {
8632
0
                        continue;
8633
0
                    }
8634
0
                }
8635
8636
0
                CPLQuadTreeInsertWithBounds(
8637
0
                    hQuadTree,
8638
0
                    reinterpret_cast<void *>(
8639
0
                        static_cast<uintptr_t>(sourcePixels.size())),
8640
0
                    &sRect);
8641
8642
0
                sourcePixels.push_back(sp);
8643
0
            }
8644
0
        }
8645
0
    }
8646
8647
0
    std::vector<double> adfRealValue(poWK->nBands);
8648
0
    std::vector<double> adfImagValue(poWK->nBands);
8649
0
    std::vector<double> adfBandDensity(poWK->nBands);
8650
0
    std::vector<double> adfWeight(poWK->nBands);
8651
8652
#ifdef CHECK_SUM_WITH_GEOS
8653
    auto hGEOSContext = OGRGeometry::createGEOSContext();
8654
    auto seq1 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
8655
    GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 0, 0.0, 0.0);
8656
    GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 1, 1.0, 0.0);
8657
    GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 2, 1.0, 1.0);
8658
    GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 3, 0.0, 1.0);
8659
    GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 4, 0.0, 0.0);
8660
    auto hLR1 = GEOSGeom_createLinearRing_r(hGEOSContext, seq1);
8661
    auto hP1 = GEOSGeom_createPolygon_r(hGEOSContext, hLR1, nullptr, 0);
8662
8663
    auto seq2 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
8664
    auto hLR2 = GEOSGeom_createLinearRing_r(hGEOSContext, seq2);
8665
    auto hP2 = GEOSGeom_createPolygon_r(hGEOSContext, hLR2, nullptr, 0);
8666
#endif
8667
8668
0
    const XYPoly xy1{
8669
0
        {0.0, 0.0}, {1.0, 0.0}, {1.0, 1.0}, {0.0, 1.0}, {0.0, 0.0}};
8670
0
    XYPoly xy2(5);
8671
0
    XYPoly xy2_triangle(4);
8672
0
    XYPoly intersection;
8673
8674
    /* ==================================================================== */
8675
    /*      Loop over output lines.                                         */
8676
    /* ==================================================================== */
8677
0
    for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
8678
0
    {
8679
0
        CPLRectObj sRect;
8680
0
        sRect.miny = iDstY;
8681
0
        sRect.maxy = iDstY + 1;
8682
8683
        /* ====================================================================
8684
         */
8685
        /*      Loop over pixels in output scanline. */
8686
        /* ====================================================================
8687
         */
8688
0
        for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
8689
0
        {
8690
0
            sRect.minx = iDstX;
8691
0
            sRect.maxx = iDstX + 1;
8692
0
            int nSourcePixels = 0;
8693
0
            void **pahSourcePixel =
8694
0
                CPLQuadTreeSearch(hQuadTree, &sRect, &nSourcePixels);
8695
0
            if (nSourcePixels == 0)
8696
0
            {
8697
0
                CPLFree(pahSourcePixel);
8698
0
                continue;
8699
0
            }
8700
8701
0
            std::fill(adfRealValue.begin(), adfRealValue.end(), 0);
8702
0
            std::fill(adfImagValue.begin(), adfImagValue.end(), 0);
8703
0
            std::fill(adfBandDensity.begin(), adfBandDensity.end(), 0);
8704
0
            std::fill(adfWeight.begin(), adfWeight.end(), 0);
8705
0
            double dfDensity = 0;
8706
            // Just above zero to please Coveriy Scan
8707
0
            double dfTotalWeight = std::numeric_limits<double>::min();
8708
8709
            /* ====================================================================
8710
             */
8711
            /*          Iterate over each contributing source pixel to add its
8712
             */
8713
            /*          value weighed by the ratio of the area of its
8714
             * intersection  */
8715
            /*          with the target pixel divided by the area of the source
8716
             */
8717
            /*          pixel. */
8718
            /* ====================================================================
8719
             */
8720
0
            for (int i = 0; i < nSourcePixels; ++i)
8721
0
            {
8722
0
                const int iSourcePixel = static_cast<int>(
8723
0
                    reinterpret_cast<uintptr_t>(pahSourcePixel[i]));
8724
0
                auto &sp = sourcePixels[iSourcePixel];
8725
8726
0
                double dfWeight = 0.0;
8727
0
                if (bIsAffineNoRotation)
8728
0
                {
8729
                    // Optimization since the source pixel is a rectangle in
8730
                    // target pixel coordinates
8731
0
                    double dfSrcMinX = std::min(sp.dfDstX0, sp.dfDstX2);
8732
0
                    double dfSrcMaxX = std::max(sp.dfDstX0, sp.dfDstX2);
8733
0
                    double dfSrcMinY = std::min(sp.dfDstY0, sp.dfDstY2);
8734
0
                    double dfSrcMaxY = std::max(sp.dfDstY0, sp.dfDstY2);
8735
0
                    double dfIntersMinX = std::max<double>(dfSrcMinX, iDstX);
8736
0
                    double dfIntersMaxX = std::min(dfSrcMaxX, iDstX + 1.0);
8737
0
                    double dfIntersMinY = std::max<double>(dfSrcMinY, iDstY);
8738
0
                    double dfIntersMaxY = std::min(dfSrcMaxY, iDstY + 1.0);
8739
0
                    dfWeight =
8740
0
                        ((dfIntersMaxX - dfIntersMinX) *
8741
0
                         (dfIntersMaxY - dfIntersMinY)) /
8742
0
                        ((dfSrcMaxX - dfSrcMinX) * (dfSrcMaxY - dfSrcMinY));
8743
0
                }
8744
0
                else
8745
0
                {
8746
                    // Compute the polygon of the source pixel in target pixel
8747
                    // coordinates, and shifted to the target pixel (unit square
8748
                    // coordinates)
8749
8750
0
                    xy2[0] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
8751
0
                    xy2[1] = {sp.dfDstX1 - iDstX, sp.dfDstY1 - iDstY};
8752
0
                    xy2[2] = {sp.dfDstX2 - iDstX, sp.dfDstY2 - iDstY};
8753
0
                    xy2[3] = {sp.dfDstX3 - iDstX, sp.dfDstY3 - iDstY};
8754
0
                    xy2[4] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
8755
8756
0
                    if (isConvex(xy2))
8757
0
                    {
8758
0
                        getConvexPolyIntersection(xy1, xy2, intersection);
8759
0
                        if (intersection.size() >= 3)
8760
0
                        {
8761
0
                            dfWeight = getArea(intersection);
8762
0
                        }
8763
0
                    }
8764
0
                    else
8765
0
                    {
8766
                        // Split xy2 into 2 triangles.
8767
0
                        xy2_triangle[0] = xy2[0];
8768
0
                        xy2_triangle[1] = xy2[1];
8769
0
                        xy2_triangle[2] = xy2[2];
8770
0
                        xy2_triangle[3] = xy2[0];
8771
0
                        getConvexPolyIntersection(xy1, xy2_triangle,
8772
0
                                                  intersection);
8773
0
                        if (intersection.size() >= 3)
8774
0
                        {
8775
0
                            dfWeight = getArea(intersection);
8776
0
                        }
8777
8778
0
                        xy2_triangle[1] = xy2[2];
8779
0
                        xy2_triangle[2] = xy2[3];
8780
0
                        getConvexPolyIntersection(xy1, xy2_triangle,
8781
0
                                                  intersection);
8782
0
                        if (intersection.size() >= 3)
8783
0
                        {
8784
0
                            dfWeight += getArea(intersection);
8785
0
                        }
8786
0
                    }
8787
0
                    if (dfWeight > 0.0)
8788
0
                    {
8789
0
                        if (sp.dfArea == 0)
8790
0
                            sp.dfArea = getArea(xy2);
8791
0
                        dfWeight /= sp.dfArea;
8792
0
                    }
8793
8794
#ifdef CHECK_SUM_WITH_GEOS
8795
                    GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 0,
8796
                                         sp.dfDstX0 - iDstX,
8797
                                         sp.dfDstY0 - iDstY);
8798
                    GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 1,
8799
                                         sp.dfDstX1 - iDstX,
8800
                                         sp.dfDstY1 - iDstY);
8801
                    GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 2,
8802
                                         sp.dfDstX2 - iDstX,
8803
                                         sp.dfDstY2 - iDstY);
8804
                    GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 3,
8805
                                         sp.dfDstX3 - iDstX,
8806
                                         sp.dfDstY3 - iDstY);
8807
                    GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 4,
8808
                                         sp.dfDstX0 - iDstX,
8809
                                         sp.dfDstY0 - iDstY);
8810
8811
                    double dfWeightGEOS = 0.0;
8812
                    auto hIntersection =
8813
                        GEOSIntersection_r(hGEOSContext, hP1, hP2);
8814
                    if (hIntersection)
8815
                    {
8816
                        double dfIntersArea = 0.0;
8817
                        if (GEOSArea_r(hGEOSContext, hIntersection,
8818
                                       &dfIntersArea) &&
8819
                            dfIntersArea > 0)
8820
                        {
8821
                            double dfSourceArea = 0.0;
8822
                            if (GEOSArea_r(hGEOSContext, hP2, &dfSourceArea))
8823
                            {
8824
                                dfWeightGEOS = dfIntersArea / dfSourceArea;
8825
                            }
8826
                        }
8827
                        GEOSGeom_destroy_r(hGEOSContext, hIntersection);
8828
                    }
8829
                    if (fabs(dfWeight - dfWeightGEOS) > 1e-5 * dfWeightGEOS)
8830
                    {
8831
                        /* ok */ printf("dfWeight=%f dfWeightGEOS=%f\n",
8832
                                        dfWeight, dfWeightGEOS);
8833
                        printf("xy2: ");  // ok
8834
                        for (const auto &xy : xy2)
8835
                            printf("[%f, %f], ", xy.first, xy.second);  // ok
8836
                        printf("\n");                                   // ok
8837
                        printf("intersection: ");                       // ok
8838
                        for (const auto &xy : intersection)
8839
                            printf("[%f, %f], ", xy.first, xy.second);  // ok
8840
                        printf("\n");                                   // ok
8841
                    }
8842
#endif
8843
0
                }
8844
0
                if (dfWeight > 0.0)
8845
0
                {
8846
0
                    const GPtrDiff_t iSrcOffset =
8847
0
                        sp.iSrcX +
8848
0
                        static_cast<GPtrDiff_t>(sp.iSrcY) * nSrcXSize;
8849
0
                    dfTotalWeight += dfWeight;
8850
8851
0
                    if (poWK->pafUnifiedSrcDensity != nullptr)
8852
0
                    {
8853
0
                        dfDensity +=
8854
0
                            dfWeight *
8855
0
                            double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
8856
0
                    }
8857
0
                    else
8858
0
                    {
8859
0
                        dfDensity += dfWeight;
8860
0
                    }
8861
8862
0
                    for (int iBand = 0; iBand < poWK->nBands; ++iBand)
8863
0
                    {
8864
                        // Returns pixel value if it is not no data.
8865
0
                        double dfBandDensity;
8866
0
                        double dfRealValue;
8867
0
                        double dfImagValue;
8868
0
                        if (!(GWKGetPixelValue(poWK, iBand, iSrcOffset,
8869
0
                                               &dfBandDensity, &dfRealValue,
8870
0
                                               &dfImagValue) &&
8871
0
                              dfBandDensity > BAND_DENSITY_THRESHOLD))
8872
0
                        {
8873
0
                            continue;
8874
0
                        }
8875
8876
0
                        adfRealValue[iBand] += dfRealValue * dfWeight;
8877
0
                        adfImagValue[iBand] += dfImagValue * dfWeight;
8878
0
                        adfBandDensity[iBand] += dfBandDensity * dfWeight;
8879
0
                        adfWeight[iBand] += dfWeight;
8880
0
                    }
8881
0
                }
8882
0
            }
8883
8884
0
            CPLFree(pahSourcePixel);
8885
8886
            /* --------------------------------------------------------------------
8887
             */
8888
            /*          Update destination pixel value. */
8889
            /* --------------------------------------------------------------------
8890
             */
8891
0
            bool bHasFoundDensity = false;
8892
0
            const GPtrDiff_t iDstOffset =
8893
0
                iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
8894
0
            for (int iBand = 0; iBand < poWK->nBands; ++iBand)
8895
0
            {
8896
0
                if (adfWeight[iBand] > 0)
8897
0
                {
8898
0
                    const double dfBandDensity =
8899
0
                        adfBandDensity[iBand] / adfWeight[iBand];
8900
0
                    if (dfBandDensity > BAND_DENSITY_THRESHOLD)
8901
0
                    {
8902
0
                        bHasFoundDensity = true;
8903
0
                        GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
8904
0
                                         adfRealValue[iBand],
8905
0
                                         adfImagValue[iBand]);
8906
0
                    }
8907
0
                }
8908
0
            }
8909
8910
0
            if (!bHasFoundDensity)
8911
0
                continue;
8912
8913
            /* --------------------------------------------------------------------
8914
             */
8915
            /*          Update destination density/validity masks. */
8916
            /* --------------------------------------------------------------------
8917
             */
8918
0
            GWKOverlayDensity(poWK, iDstOffset, dfDensity / dfTotalWeight);
8919
8920
0
            if (poWK->panDstValid != nullptr)
8921
0
            {
8922
0
                CPLMaskSet(poWK->panDstValid, iDstOffset);
8923
0
            }
8924
0
        }
8925
8926
        /* --------------------------------------------------------------------
8927
         */
8928
        /*      Report progress to the user, and optionally cancel out. */
8929
        /* --------------------------------------------------------------------
8930
         */
8931
0
        if (psJob->pfnProgress && psJob->pfnProgress(psJob))
8932
0
            break;
8933
0
    }
8934
8935
#ifdef CHECK_SUM_WITH_GEOS
8936
    GEOSGeom_destroy_r(hGEOSContext, hP1);
8937
    GEOSGeom_destroy_r(hGEOSContext, hP2);
8938
    OGRGeometry::freeGEOSContext(hGEOSContext);
8939
#endif
8940
0
    CPLQuadTreeDestroy(hQuadTree);
8941
0
}