/src/gdal/gcore/overview.cpp
Line | Count | Source |
1 | | |
2 | | /****************************************************************************** |
3 | | * |
4 | | * Project: GDAL Core |
5 | | * Purpose: Helper code to implement overview support in different drivers. |
6 | | * Author: Frank Warmerdam, warmerdam@pobox.com |
7 | | * |
8 | | ****************************************************************************** |
9 | | * Copyright (c) 2000, Frank Warmerdam |
10 | | * Copyright (c) 2007-2010, Even Rouault <even dot rouault at spatialys.com> |
11 | | * |
12 | | * SPDX-License-Identifier: MIT |
13 | | ****************************************************************************/ |
14 | | |
15 | | #include "cpl_port.h" |
16 | | #include "gdal_priv.h" |
17 | | |
18 | | #include <cmath> |
19 | | #include <cstddef> |
20 | | #include <cstdlib> |
21 | | |
22 | | #include <algorithm> |
23 | | #include <complex> |
24 | | #include <condition_variable> |
25 | | #include <limits> |
26 | | #include <list> |
27 | | #include <memory> |
28 | | #include <mutex> |
29 | | #include <vector> |
30 | | |
31 | | #include "cpl_conv.h" |
32 | | #include "cpl_error.h" |
33 | | #include "cpl_float.h" |
34 | | #include "cpl_progress.h" |
35 | | #include "cpl_vsi.h" |
36 | | #include "gdal.h" |
37 | | #include "gdal_thread_pool.h" |
38 | | #include "gdalwarper.h" |
39 | | #include "gdal_vrt.h" |
40 | | #include "vrtdataset.h" |
41 | | |
42 | | #ifdef USE_NEON_OPTIMIZATIONS |
43 | | #include "include_sse2neon.h" |
44 | | |
45 | | #if (!defined(__aarch64__) && !defined(_M_ARM64)) |
46 | | #define ARM_V7 |
47 | | #endif |
48 | | |
49 | | #define USE_SSE2 |
50 | | |
51 | | #include "gdalsse_priv.h" |
52 | | |
53 | | // Restrict to 64bit processors because they are guaranteed to have SSE2, |
54 | | // or if __AVX2__ is defined. |
55 | | #elif defined(__x86_64) || defined(_M_X64) || defined(__AVX2__) |
56 | | #define USE_SSE2 |
57 | | |
58 | | #include "gdalsse_priv.h" |
59 | | |
60 | | #ifdef __SSE3__ |
61 | | #include <pmmintrin.h> |
62 | | #endif |
63 | | #ifdef __SSSE3__ |
64 | | #include <tmmintrin.h> |
65 | | #endif |
66 | | #ifdef __SSE4_1__ |
67 | | #include <smmintrin.h> |
68 | | #endif |
69 | | #ifdef __AVX2__ |
70 | | #include <immintrin.h> |
71 | | #endif |
72 | | |
73 | | #endif |
74 | | |
75 | | // To be included after above USE_SSE2 and include gdalsse_priv.h |
76 | | // to avoid build issue on Windows x86 |
77 | | #include "gdal_priv_templates.hpp" |
78 | | |
79 | | /************************************************************************/ |
80 | | /* GDALResampleChunk_Near() */ |
81 | | /************************************************************************/ |
82 | | |
83 | | template <class T> |
84 | | static CPLErr GDALResampleChunk_NearT(const GDALOverviewResampleArgs &args, |
85 | | const T *pChunk, T **ppDstBuffer) |
86 | | |
87 | 0 | { |
88 | 0 | const double dfXRatioDstToSrc = args.dfXRatioDstToSrc; |
89 | 0 | const double dfYRatioDstToSrc = args.dfYRatioDstToSrc; |
90 | 0 | const GDALDataType eWrkDataType = args.eWrkDataType; |
91 | 0 | const int nChunkXOff = args.nChunkXOff; |
92 | 0 | const int nChunkXSize = args.nChunkXSize; |
93 | 0 | const int nChunkYOff = args.nChunkYOff; |
94 | 0 | const int nDstXOff = args.nDstXOff; |
95 | 0 | const int nDstXOff2 = args.nDstXOff2; |
96 | 0 | const int nDstYOff = args.nDstYOff; |
97 | 0 | const int nDstYOff2 = args.nDstYOff2; |
98 | 0 | const int nDstXWidth = nDstXOff2 - nDstXOff; |
99 | | |
100 | | /* -------------------------------------------------------------------- */ |
101 | | /* Allocate buffers. */ |
102 | | /* -------------------------------------------------------------------- */ |
103 | 0 | *ppDstBuffer = static_cast<T *>( |
104 | 0 | VSI_MALLOC3_VERBOSE(nDstXWidth, nDstYOff2 - nDstYOff, |
105 | 0 | GDALGetDataTypeSizeBytes(eWrkDataType))); |
106 | 0 | if (*ppDstBuffer == nullptr) |
107 | 0 | { |
108 | 0 | return CE_Failure; |
109 | 0 | } |
110 | 0 | T *const pDstBuffer = *ppDstBuffer; |
111 | |
|
112 | 0 | int *panSrcXOff = |
113 | 0 | static_cast<int *>(VSI_MALLOC2_VERBOSE(nDstXWidth, sizeof(int))); |
114 | |
|
115 | 0 | if (panSrcXOff == nullptr) |
116 | 0 | { |
117 | 0 | return CE_Failure; |
118 | 0 | } |
119 | | |
120 | | /* ==================================================================== */ |
121 | | /* Precompute inner loop constants. */ |
122 | | /* ==================================================================== */ |
123 | 0 | for (int iDstPixel = nDstXOff; iDstPixel < nDstXOff2; ++iDstPixel) |
124 | 0 | { |
125 | 0 | int nSrcXOff = static_cast<int>(0.5 + iDstPixel * dfXRatioDstToSrc); |
126 | 0 | if (nSrcXOff < nChunkXOff) |
127 | 0 | nSrcXOff = nChunkXOff; |
128 | |
|
129 | 0 | panSrcXOff[iDstPixel - nDstXOff] = nSrcXOff; |
130 | 0 | } |
131 | | |
132 | | /* ==================================================================== */ |
133 | | /* Loop over destination scanlines. */ |
134 | | /* ==================================================================== */ |
135 | 0 | for (int iDstLine = nDstYOff; iDstLine < nDstYOff2; ++iDstLine) |
136 | 0 | { |
137 | 0 | int nSrcYOff = static_cast<int>(0.5 + iDstLine * dfYRatioDstToSrc); |
138 | 0 | if (nSrcYOff < nChunkYOff) |
139 | 0 | nSrcYOff = nChunkYOff; |
140 | |
|
141 | 0 | const T *const pSrcScanline = |
142 | 0 | pChunk + |
143 | 0 | (static_cast<size_t>(nSrcYOff - nChunkYOff) * nChunkXSize) - |
144 | 0 | nChunkXOff; |
145 | | |
146 | | /* -------------------------------------------------------------------- |
147 | | */ |
148 | | /* Loop over destination pixels */ |
149 | | /* -------------------------------------------------------------------- |
150 | | */ |
151 | 0 | T *pDstScanline = |
152 | 0 | pDstBuffer + static_cast<size_t>(iDstLine - nDstYOff) * nDstXWidth; |
153 | 0 | for (int iDstPixel = 0; iDstPixel < nDstXWidth; ++iDstPixel) |
154 | 0 | { |
155 | 0 | pDstScanline[iDstPixel] = pSrcScanline[panSrcXOff[iDstPixel]]; |
156 | 0 | } |
157 | 0 | } |
158 | |
|
159 | 0 | CPLFree(panSrcXOff); |
160 | |
|
161 | 0 | return CE_None; |
162 | 0 | } Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_NearT<unsigned char>(GDALOverviewResampleArgs const&, unsigned char const*, unsigned char**) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_NearT<unsigned short>(GDALOverviewResampleArgs const&, unsigned short const*, unsigned short**) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_NearT<unsigned int>(GDALOverviewResampleArgs const&, unsigned int const*, unsigned int**) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_NearT<unsigned long>(GDALOverviewResampleArgs const&, unsigned long const*, unsigned long**) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_NearT<std::__1::complex<double> >(GDALOverviewResampleArgs const&, std::__1::complex<double> const*, std::__1::complex<double>**) |
163 | | |
164 | | static CPLErr GDALResampleChunk_Near(const GDALOverviewResampleArgs &args, |
165 | | const void *pChunk, void **ppDstBuffer, |
166 | | GDALDataType *peDstBufferDataType) |
167 | 0 | { |
168 | 0 | *peDstBufferDataType = args.eWrkDataType; |
169 | 0 | switch (args.eWrkDataType) |
170 | 0 | { |
171 | | // For nearest resampling, as no computation is done, only the |
172 | | // size of the data type matters. |
173 | 0 | case GDT_Byte: |
174 | 0 | case GDT_Int8: |
175 | 0 | { |
176 | 0 | CPLAssert(GDALGetDataTypeSizeBytes(args.eWrkDataType) == 1); |
177 | 0 | return GDALResampleChunk_NearT( |
178 | 0 | args, static_cast<const uint8_t *>(pChunk), |
179 | 0 | reinterpret_cast<uint8_t **>(ppDstBuffer)); |
180 | 0 | } |
181 | | |
182 | 0 | case GDT_Int16: |
183 | 0 | case GDT_UInt16: |
184 | 0 | case GDT_Float16: |
185 | 0 | { |
186 | 0 | CPLAssert(GDALGetDataTypeSizeBytes(args.eWrkDataType) == 2); |
187 | 0 | return GDALResampleChunk_NearT( |
188 | 0 | args, static_cast<const uint16_t *>(pChunk), |
189 | 0 | reinterpret_cast<uint16_t **>(ppDstBuffer)); |
190 | 0 | } |
191 | | |
192 | 0 | case GDT_CInt16: |
193 | 0 | case GDT_CFloat16: |
194 | 0 | case GDT_Int32: |
195 | 0 | case GDT_UInt32: |
196 | 0 | case GDT_Float32: |
197 | 0 | { |
198 | 0 | CPLAssert(GDALGetDataTypeSizeBytes(args.eWrkDataType) == 4); |
199 | 0 | return GDALResampleChunk_NearT( |
200 | 0 | args, static_cast<const uint32_t *>(pChunk), |
201 | 0 | reinterpret_cast<uint32_t **>(ppDstBuffer)); |
202 | 0 | } |
203 | | |
204 | 0 | case GDT_CInt32: |
205 | 0 | case GDT_CFloat32: |
206 | 0 | case GDT_Int64: |
207 | 0 | case GDT_UInt64: |
208 | 0 | case GDT_Float64: |
209 | 0 | { |
210 | 0 | CPLAssert(GDALGetDataTypeSizeBytes(args.eWrkDataType) == 8); |
211 | 0 | return GDALResampleChunk_NearT( |
212 | 0 | args, static_cast<const uint64_t *>(pChunk), |
213 | 0 | reinterpret_cast<uint64_t **>(ppDstBuffer)); |
214 | 0 | } |
215 | | |
216 | 0 | case GDT_CFloat64: |
217 | 0 | { |
218 | 0 | return GDALResampleChunk_NearT( |
219 | 0 | args, static_cast<const std::complex<double> *>(pChunk), |
220 | 0 | reinterpret_cast<std::complex<double> **>(ppDstBuffer)); |
221 | 0 | } |
222 | | |
223 | 0 | case GDT_Unknown: |
224 | 0 | case GDT_TypeCount: |
225 | 0 | break; |
226 | 0 | } |
227 | 0 | CPLAssert(false); |
228 | 0 | return CE_Failure; |
229 | 0 | } |
230 | | |
231 | | namespace |
232 | | { |
233 | | |
234 | | // Find in the color table the entry whose RGB value is the closest |
235 | | // (using quadratic distance) to the test color, ignoring transparent entries. |
236 | | int BestColorEntry(const std::vector<GDALColorEntry> &entries, |
237 | | const GDALColorEntry &test) |
238 | 0 | { |
239 | 0 | int nMinDist = std::numeric_limits<int>::max(); |
240 | 0 | size_t bestEntry = 0; |
241 | 0 | for (size_t i = 0; i < entries.size(); ++i) |
242 | 0 | { |
243 | 0 | const GDALColorEntry &entry = entries[i]; |
244 | | // Ignore transparent entries |
245 | 0 | if (entry.c4 == 0) |
246 | 0 | continue; |
247 | | |
248 | 0 | int nDist = ((test.c1 - entry.c1) * (test.c1 - entry.c1)) + |
249 | 0 | ((test.c2 - entry.c2) * (test.c2 - entry.c2)) + |
250 | 0 | ((test.c3 - entry.c3) * (test.c3 - entry.c3)); |
251 | 0 | if (nDist < nMinDist) |
252 | 0 | { |
253 | 0 | nMinDist = nDist; |
254 | 0 | bestEntry = i; |
255 | 0 | } |
256 | 0 | } |
257 | 0 | return static_cast<int>(bestEntry); |
258 | 0 | } |
259 | | |
260 | | std::vector<GDALColorEntry> ReadColorTable(const GDALColorTable &table, |
261 | | int &transparentIdx) |
262 | 0 | { |
263 | 0 | std::vector<GDALColorEntry> entries(table.GetColorEntryCount()); |
264 | |
|
265 | 0 | transparentIdx = -1; |
266 | 0 | int i = 0; |
267 | 0 | for (auto &entry : entries) |
268 | 0 | { |
269 | 0 | table.GetColorEntryAsRGB(i, &entry); |
270 | 0 | if (transparentIdx < 0 && entry.c4 == 0) |
271 | 0 | transparentIdx = i; |
272 | 0 | ++i; |
273 | 0 | } |
274 | 0 | return entries; |
275 | 0 | } |
276 | | |
277 | | } // unnamed namespace |
278 | | |
279 | | /************************************************************************/ |
280 | | /* SQUARE() */ |
281 | | /************************************************************************/ |
282 | | |
283 | | template <class T, class Tsquare = T> inline Tsquare SQUARE(T val) |
284 | 0 | { |
285 | 0 | return static_cast<Tsquare>(val) * val; |
286 | 0 | } Unexecuted instantiation: int SQUARE<int, int>(int) Unexecuted instantiation: double SQUARE<double, double>(double) Unexecuted instantiation: float SQUARE<float, float>(float) |
287 | | |
288 | | /************************************************************************/ |
289 | | /* ComputeIntegerRMS() */ |
290 | | /************************************************************************/ |
291 | | // Compute rms = sqrt(sumSquares / weight) in such a way that it is the |
292 | | // integer that minimizes abs(rms**2 - sumSquares / weight) |
293 | | template <class T, class Twork> |
294 | | inline T ComputeIntegerRMS(double sumSquares, double weight) |
295 | 0 | { |
296 | 0 | const double sumDivWeight = sumSquares / weight; |
297 | 0 | T rms = static_cast<T>(sqrt(sumDivWeight)); |
298 | | |
299 | | // Is rms**2 or (rms+1)**2 closest to sumSquares / weight ? |
300 | | // Naive version: |
301 | | // if( weight * (rms+1)**2 - sumSquares < sumSquares - weight * rms**2 ) |
302 | 0 | if (static_cast<double>(static_cast<Twork>(2) * rms * (rms + 1) + 1) < |
303 | 0 | 2 * sumDivWeight) |
304 | 0 | rms += 1; |
305 | 0 | return rms; |
306 | 0 | } Unexecuted instantiation: unsigned char ComputeIntegerRMS<unsigned char, int>(double, double) Unexecuted instantiation: unsigned short ComputeIntegerRMS<unsigned short, unsigned long>(double, double) |
307 | | |
308 | | template <class T, class Tsum> inline T ComputeIntegerRMS_4values(Tsum) |
309 | | { |
310 | | CPLAssert(false); |
311 | | return 0; |
312 | | } |
313 | | |
314 | | template <> inline GByte ComputeIntegerRMS_4values<GByte, int>(int sumSquares) |
315 | 0 | { |
316 | | // It has been verified that given the correction on rms below, using |
317 | | // sqrt((float)((sumSquares + 1)/ 4)) or sqrt((float)sumSquares * 0.25f) |
318 | | // is equivalent, so use the former as it is used twice. |
319 | 0 | const int sumSquaresPlusOneDiv4 = (sumSquares + 1) / 4; |
320 | 0 | const float sumDivWeight = static_cast<float>(sumSquaresPlusOneDiv4); |
321 | 0 | GByte rms = static_cast<GByte>(std::sqrt(sumDivWeight)); |
322 | | |
323 | | // Is rms**2 or (rms+1)**2 closest to sumSquares / weight ? |
324 | | // Naive version: |
325 | | // if( weight * (rms+1)**2 - sumSquares < sumSquares - weight * rms**2 ) |
326 | | // Optimized version for integer case and weight == 4 |
327 | 0 | if (static_cast<int>(rms) * (rms + 1) < sumSquaresPlusOneDiv4) |
328 | 0 | rms += 1; |
329 | 0 | return rms; |
330 | 0 | } |
331 | | |
332 | | template <> |
333 | | inline GUInt16 ComputeIntegerRMS_4values<GUInt16, double>(double sumSquares) |
334 | 0 | { |
335 | 0 | const double sumDivWeight = sumSquares * 0.25; |
336 | 0 | GUInt16 rms = static_cast<GUInt16>(std::sqrt(sumDivWeight)); |
337 | | |
338 | | // Is rms**2 or (rms+1)**2 closest to sumSquares / weight ? |
339 | | // Naive version: |
340 | | // if( weight * (rms+1)**2 - sumSquares < sumSquares - weight * rms**2 ) |
341 | | // Optimized version for integer case and weight == 4 |
342 | 0 | if (static_cast<GUInt32>(rms) * (rms + 1) < |
343 | 0 | static_cast<GUInt32>(sumDivWeight + 0.25)) |
344 | 0 | rms += 1; |
345 | 0 | return rms; |
346 | 0 | } |
347 | | |
348 | | #ifdef USE_SSE2 |
349 | | |
350 | | /************************************************************************/ |
351 | | /* QuadraticMeanByteSSE2OrAVX2() */ |
352 | | /************************************************************************/ |
353 | | |
354 | | #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS) |
355 | | #define sse2_packus_epi32 _mm_packus_epi32 |
356 | | #else |
357 | | inline __m128i sse2_packus_epi32(__m128i a, __m128i b) |
358 | 0 | { |
359 | 0 | const auto minus32768_32 = _mm_set1_epi32(-32768); |
360 | 0 | const auto minus32768_16 = _mm_set1_epi16(-32768); |
361 | 0 | a = _mm_add_epi32(a, minus32768_32); |
362 | 0 | b = _mm_add_epi32(b, minus32768_32); |
363 | 0 | a = _mm_packs_epi32(a, b); |
364 | 0 | a = _mm_sub_epi16(a, minus32768_16); |
365 | 0 | return a; |
366 | 0 | } |
367 | | #endif |
368 | | |
369 | | #if defined(__SSSE3__) || defined(USE_NEON_OPTIMIZATIONS) |
370 | | #define sse2_hadd_epi16 _mm_hadd_epi16 |
371 | | #else |
372 | | inline __m128i sse2_hadd_epi16(__m128i a, __m128i b) |
373 | 0 | { |
374 | | // Horizontal addition of adjacent pairs |
375 | 0 | const auto mask = _mm_set1_epi32(0xFFFF); |
376 | 0 | const auto horizLo = |
377 | 0 | _mm_add_epi32(_mm_and_si128(a, mask), _mm_srli_epi32(a, 16)); |
378 | 0 | const auto horizHi = |
379 | 0 | _mm_add_epi32(_mm_and_si128(b, mask), _mm_srli_epi32(b, 16)); |
380 | | |
381 | | // Recombine low and high parts |
382 | 0 | return _mm_packs_epi32(horizLo, horizHi); |
383 | 0 | } |
384 | | #endif |
385 | | |
386 | | #ifdef __AVX2__ |
387 | | |
388 | | #define set1_epi16 _mm256_set1_epi16 |
389 | | #define set1_epi32 _mm256_set1_epi32 |
390 | | #define setzero _mm256_setzero_si256 |
391 | | #define set1_ps _mm256_set1_ps |
392 | | #define loadu_int(x) _mm256_loadu_si256(reinterpret_cast<__m256i const *>(x)) |
393 | | #define unpacklo_epi8 _mm256_unpacklo_epi8 |
394 | | #define unpackhi_epi8 _mm256_unpackhi_epi8 |
395 | | #define madd_epi16 _mm256_madd_epi16 |
396 | | #define add_epi32 _mm256_add_epi32 |
397 | | #define mul_ps _mm256_mul_ps |
398 | | #define cvtepi32_ps _mm256_cvtepi32_ps |
399 | | #define sqrt_ps _mm256_sqrt_ps |
400 | | #define cvttps_epi32 _mm256_cvttps_epi32 |
401 | | #define packs_epi32 _mm256_packs_epi32 |
402 | | #define packus_epi32 _mm256_packus_epi32 |
403 | | #define srli_epi32 _mm256_srli_epi32 |
404 | | #define mullo_epi16 _mm256_mullo_epi16 |
405 | | #define srli_epi16 _mm256_srli_epi16 |
406 | | #define cmpgt_epi16 _mm256_cmpgt_epi16 |
407 | | #define add_epi16 _mm256_add_epi16 |
408 | | #define sub_epi16 _mm256_sub_epi16 |
409 | | #define packus_epi16 _mm256_packus_epi16 |
410 | | |
411 | | /* AVX2 operates on 2 separate 128-bit lanes, so we have to do shuffling */ |
412 | | /* to get the lower 128-bit bits of what would be a true 256-bit vector register |
413 | | */ |
414 | | |
415 | | inline __m256i FIXUP_LANES(__m256i x) |
416 | | { |
417 | | return _mm256_permute4x64_epi64(x, _MM_SHUFFLE(3, 1, 2, 0)); |
418 | | } |
419 | | |
420 | | #define store_lo(x, y) \ |
421 | | _mm_storeu_si128(reinterpret_cast<__m128i *>(x), \ |
422 | | _mm256_extracti128_si256(FIXUP_LANES(y), 0)) |
423 | | #define storeu_int(x, y) \ |
424 | | _mm256_storeu_si256(reinterpret_cast<__m256i *>(x), FIXUP_LANES(y)) |
425 | | #define hadd_epi16 _mm256_hadd_epi16 |
426 | | #else |
427 | 0 | #define set1_epi16 _mm_set1_epi16 |
428 | 0 | #define set1_epi32 _mm_set1_epi32 |
429 | 0 | #define setzero _mm_setzero_si128 |
430 | | #define set1_ps _mm_set1_ps |
431 | 0 | #define loadu_int(x) _mm_loadu_si128(reinterpret_cast<__m128i const *>(x)) |
432 | 0 | #define unpacklo_epi8 _mm_unpacklo_epi8 |
433 | 0 | #define unpackhi_epi8 _mm_unpackhi_epi8 |
434 | 0 | #define madd_epi16 _mm_madd_epi16 |
435 | 0 | #define add_epi32 _mm_add_epi32 |
436 | | #define mul_ps _mm_mul_ps |
437 | 0 | #define cvtepi32_ps _mm_cvtepi32_ps |
438 | 0 | #define sqrt_ps _mm_sqrt_ps |
439 | 0 | #define cvttps_epi32 _mm_cvttps_epi32 |
440 | 0 | #define packs_epi32 _mm_packs_epi32 |
441 | 0 | #define packus_epi32 sse2_packus_epi32 |
442 | 0 | #define srli_epi32 _mm_srli_epi32 |
443 | 0 | #define mullo_epi16 _mm_mullo_epi16 |
444 | 0 | #define srli_epi16 _mm_srli_epi16 |
445 | 0 | #define cmpgt_epi16 _mm_cmpgt_epi16 |
446 | 0 | #define add_epi16 _mm_add_epi16 |
447 | 0 | #define sub_epi16 _mm_sub_epi16 |
448 | 0 | #define packus_epi16 _mm_packus_epi16 |
449 | 0 | #define store_lo(x, y) _mm_storel_epi64(reinterpret_cast<__m128i *>(x), (y)) |
450 | 0 | #define storeu_int(x, y) _mm_storeu_si128(reinterpret_cast<__m128i *>(x), (y)) |
451 | 0 | #define hadd_epi16 sse2_hadd_epi16 |
452 | | #endif |
453 | | |
454 | | template <class T> |
455 | | static int |
456 | | #if defined(__GNUC__) |
457 | | __attribute__((noinline)) |
458 | | #endif |
459 | | QuadraticMeanByteSSE2OrAVX2(int nDstXWidth, int nChunkXSize, |
460 | | const T *&CPL_RESTRICT pSrcScanlineShiftedInOut, |
461 | | T *CPL_RESTRICT pDstScanline) |
462 | 0 | { |
463 | | // Optimized implementation for RMS on Byte by |
464 | | // processing by group of 8 output pixels, so as to use |
465 | | // a single _mm_sqrt_ps() call for 4 output pixels |
466 | 0 | const T *CPL_RESTRICT pSrcScanlineShifted = pSrcScanlineShiftedInOut; |
467 | |
|
468 | 0 | int iDstPixel = 0; |
469 | 0 | const auto one16 = set1_epi16(1); |
470 | 0 | const auto one32 = set1_epi32(1); |
471 | 0 | const auto zero = setzero(); |
472 | 0 | const auto minus32768 = set1_epi16(-32768); |
473 | |
|
474 | 0 | constexpr int DEST_ELTS = static_cast<int>(sizeof(zero)) / 2; |
475 | 0 | for (; iDstPixel < nDstXWidth - (DEST_ELTS - 1); iDstPixel += DEST_ELTS) |
476 | 0 | { |
477 | | // Load 2 * DEST_ELTS bytes from each line |
478 | 0 | auto firstLine = loadu_int(pSrcScanlineShifted); |
479 | 0 | auto secondLine = loadu_int(pSrcScanlineShifted + nChunkXSize); |
480 | | // Extend those Bytes as UInt16s |
481 | 0 | auto firstLineLo = unpacklo_epi8(firstLine, zero); |
482 | 0 | auto firstLineHi = unpackhi_epi8(firstLine, zero); |
483 | 0 | auto secondLineLo = unpacklo_epi8(secondLine, zero); |
484 | 0 | auto secondLineHi = unpackhi_epi8(secondLine, zero); |
485 | | |
486 | | // Multiplication of 16 bit values and horizontal |
487 | | // addition of 32 bit results |
488 | | // [ src[2*i+0]^2 + src[2*i+1]^2 for i in range(4) ] |
489 | 0 | firstLineLo = madd_epi16(firstLineLo, firstLineLo); |
490 | 0 | firstLineHi = madd_epi16(firstLineHi, firstLineHi); |
491 | 0 | secondLineLo = madd_epi16(secondLineLo, secondLineLo); |
492 | 0 | secondLineHi = madd_epi16(secondLineHi, secondLineHi); |
493 | | |
494 | | // Vertical addition |
495 | 0 | const auto sumSquaresLo = add_epi32(firstLineLo, secondLineLo); |
496 | 0 | const auto sumSquaresHi = add_epi32(firstLineHi, secondLineHi); |
497 | |
|
498 | 0 | const auto sumSquaresPlusOneDiv4Lo = |
499 | 0 | srli_epi32(add_epi32(sumSquaresLo, one32), 2); |
500 | 0 | const auto sumSquaresPlusOneDiv4Hi = |
501 | 0 | srli_epi32(add_epi32(sumSquaresHi, one32), 2); |
502 | | |
503 | | // Take square root and truncate/floor to int32 |
504 | 0 | const auto rmsLo = |
505 | 0 | cvttps_epi32(sqrt_ps(cvtepi32_ps(sumSquaresPlusOneDiv4Lo))); |
506 | 0 | const auto rmsHi = |
507 | 0 | cvttps_epi32(sqrt_ps(cvtepi32_ps(sumSquaresPlusOneDiv4Hi))); |
508 | | |
509 | | // Merge back low and high registers with each RMS value |
510 | | // as a 16 bit value. |
511 | 0 | auto rms = packs_epi32(rmsLo, rmsHi); |
512 | | |
513 | | // Round to upper value if it minimizes the |
514 | | // error |rms^2 - sumSquares/4| |
515 | | // if( 2 * (2 * rms * (rms + 1) + 1) < sumSquares ) |
516 | | // rms += 1; |
517 | | // which is equivalent to: |
518 | | // if( rms * (rms + 1) < (sumSquares+1) / 4 ) |
519 | | // rms += 1; |
520 | | // And both left and right parts fit on 16 (unsigned) bits |
521 | 0 | const auto sumSquaresPlusOneDiv4 = |
522 | 0 | packus_epi32(sumSquaresPlusOneDiv4Lo, sumSquaresPlusOneDiv4Hi); |
523 | | // cmpgt_epi16 operates on signed int16, but here |
524 | | // we have unsigned values, so shift them by -32768 before |
525 | 0 | const auto mask = cmpgt_epi16( |
526 | 0 | add_epi16(sumSquaresPlusOneDiv4, minus32768), |
527 | 0 | add_epi16(mullo_epi16(rms, add_epi16(rms, one16)), minus32768)); |
528 | | // The value of the mask will be -1 when the correction needs to be |
529 | | // applied |
530 | 0 | rms = sub_epi16(rms, mask); |
531 | | |
532 | | // Pack each 16 bit RMS value to 8 bits |
533 | 0 | rms = packus_epi16(rms, rms /* could be anything */); |
534 | 0 | store_lo(&pDstScanline[iDstPixel], rms); |
535 | 0 | pSrcScanlineShifted += 2 * DEST_ELTS; |
536 | 0 | } |
537 | |
|
538 | 0 | pSrcScanlineShiftedInOut = pSrcScanlineShifted; |
539 | 0 | return iDstPixel; |
540 | 0 | } |
541 | | |
542 | | /************************************************************************/ |
543 | | /* AverageByteSSE2OrAVX2() */ |
544 | | /************************************************************************/ |
545 | | |
546 | | static int |
547 | | AverageByteSSE2OrAVX2(int nDstXWidth, int nChunkXSize, |
548 | | const GByte *&CPL_RESTRICT pSrcScanlineShiftedInOut, |
549 | | GByte *CPL_RESTRICT pDstScanline) |
550 | 0 | { |
551 | | // Optimized implementation for average on Byte by |
552 | | // processing by group of 16 output pixels for SSE2, or 32 for AVX2 |
553 | |
|
554 | 0 | const auto zero = setzero(); |
555 | 0 | const auto two16 = set1_epi16(2); |
556 | 0 | const GByte *CPL_RESTRICT pSrcScanlineShifted = pSrcScanlineShiftedInOut; |
557 | |
|
558 | 0 | constexpr int DEST_ELTS = static_cast<int>(sizeof(zero)) / 2; |
559 | 0 | int iDstPixel = 0; |
560 | 0 | for (; iDstPixel < nDstXWidth - (2 * DEST_ELTS - 1); |
561 | 0 | iDstPixel += 2 * DEST_ELTS) |
562 | 0 | { |
563 | 0 | decltype(setzero()) average0; |
564 | 0 | { |
565 | | // Load 2 * DEST_ELTS bytes from each line |
566 | 0 | const auto firstLine = loadu_int(pSrcScanlineShifted); |
567 | 0 | const auto secondLine = |
568 | 0 | loadu_int(pSrcScanlineShifted + nChunkXSize); |
569 | | // Extend those Bytes as UInt16s |
570 | 0 | const auto firstLineLo = unpacklo_epi8(firstLine, zero); |
571 | 0 | const auto firstLineHi = unpackhi_epi8(firstLine, zero); |
572 | 0 | const auto secondLineLo = unpacklo_epi8(secondLine, zero); |
573 | 0 | const auto secondLineHi = unpackhi_epi8(secondLine, zero); |
574 | | |
575 | | // Vertical addition |
576 | 0 | const auto sumLo = add_epi16(firstLineLo, secondLineLo); |
577 | 0 | const auto sumHi = add_epi16(firstLineHi, secondLineHi); |
578 | | |
579 | | // Horizontal addition of adjacent pairs, and recombine low and high |
580 | | // parts |
581 | 0 | const auto sum = hadd_epi16(sumLo, sumHi); |
582 | | |
583 | | // average = (sum + 2) / 4 |
584 | 0 | average0 = srli_epi16(add_epi16(sum, two16), 2); |
585 | |
|
586 | 0 | pSrcScanlineShifted += 2 * DEST_ELTS; |
587 | 0 | } |
588 | |
|
589 | 0 | decltype(setzero()) average1; |
590 | 0 | { |
591 | | // Load 2 * DEST_ELTS bytes from each line |
592 | 0 | const auto firstLine = loadu_int(pSrcScanlineShifted); |
593 | 0 | const auto secondLine = |
594 | 0 | loadu_int(pSrcScanlineShifted + nChunkXSize); |
595 | | // Extend those Bytes as UInt16s |
596 | 0 | const auto firstLineLo = unpacklo_epi8(firstLine, zero); |
597 | 0 | const auto firstLineHi = unpackhi_epi8(firstLine, zero); |
598 | 0 | const auto secondLineLo = unpacklo_epi8(secondLine, zero); |
599 | 0 | const auto secondLineHi = unpackhi_epi8(secondLine, zero); |
600 | | |
601 | | // Vertical addition |
602 | 0 | const auto sumLo = add_epi16(firstLineLo, secondLineLo); |
603 | 0 | const auto sumHi = add_epi16(firstLineHi, secondLineHi); |
604 | | |
605 | | // Horizontal addition of adjacent pairs, and recombine low and high |
606 | | // parts |
607 | 0 | const auto sum = hadd_epi16(sumLo, sumHi); |
608 | | |
609 | | // average = (sum + 2) / 4 |
610 | 0 | average1 = srli_epi16(add_epi16(sum, two16), 2); |
611 | |
|
612 | 0 | pSrcScanlineShifted += 2 * DEST_ELTS; |
613 | 0 | } |
614 | | |
615 | | // Pack each 16 bit average value to 8 bits |
616 | 0 | const auto average = packus_epi16(average0, average1); |
617 | 0 | storeu_int(&pDstScanline[iDstPixel], average); |
618 | 0 | } |
619 | |
|
620 | 0 | pSrcScanlineShiftedInOut = pSrcScanlineShifted; |
621 | 0 | return iDstPixel; |
622 | 0 | } |
623 | | |
624 | | /************************************************************************/ |
625 | | /* QuadraticMeanUInt16SSE2() */ |
626 | | /************************************************************************/ |
627 | | |
628 | | #ifdef __SSE3__ |
629 | | #define sse2_hadd_pd _mm_hadd_pd |
630 | | #else |
631 | | inline __m128d sse2_hadd_pd(__m128d a, __m128d b) |
632 | 0 | { |
633 | 0 | auto aLo_bLo = |
634 | 0 | _mm_castps_pd(_mm_movelh_ps(_mm_castpd_ps(a), _mm_castpd_ps(b))); |
635 | 0 | auto aHi_bHi = |
636 | 0 | _mm_castps_pd(_mm_movehl_ps(_mm_castpd_ps(b), _mm_castpd_ps(a))); |
637 | 0 | return _mm_add_pd(aLo_bLo, aHi_bHi); // (aLo + aHi, bLo + bHi) |
638 | 0 | } |
639 | | #endif |
640 | | |
641 | | inline __m128d SQUARE_PD(__m128d x) |
642 | 0 | { |
643 | 0 | return _mm_mul_pd(x, x); |
644 | 0 | } |
645 | | |
646 | | #ifdef __AVX2__ |
647 | | |
648 | | inline __m256d SQUARE_PD(__m256d x) |
649 | | { |
650 | | return _mm256_mul_pd(x, x); |
651 | | } |
652 | | |
653 | | inline __m256d FIXUP_LANES(__m256d x) |
654 | | { |
655 | | return _mm256_permute4x64_pd(x, _MM_SHUFFLE(3, 1, 2, 0)); |
656 | | } |
657 | | |
658 | | inline __m256 FIXUP_LANES(__m256 x) |
659 | | { |
660 | | return _mm256_castpd_ps(FIXUP_LANES(_mm256_castps_pd(x))); |
661 | | } |
662 | | |
663 | | #endif |
664 | | |
665 | | static int |
666 | | QuadraticMeanUInt16SSE2(int nDstXWidth, int nChunkXSize, |
667 | | const uint16_t *&CPL_RESTRICT pSrcScanlineShiftedInOut, |
668 | | uint16_t *CPL_RESTRICT pDstScanline) |
669 | 0 | { |
670 | | // Optimized implementation for RMS on UInt16 by |
671 | | // processing by group of 4 output pixels. |
672 | 0 | const uint16_t *CPL_RESTRICT pSrcScanlineShifted = pSrcScanlineShiftedInOut; |
673 | |
|
674 | 0 | int iDstPixel = 0; |
675 | 0 | const auto zero = _mm_setzero_si128(); |
676 | |
|
677 | | #ifdef __AVX2__ |
678 | | const auto zeroDot25 = _mm256_set1_pd(0.25); |
679 | | const auto zeroDot5 = _mm256_set1_pd(0.5); |
680 | | |
681 | | // The first four 0's could be anything, as we only take the bottom |
682 | | // 128 bits. |
683 | | const auto permutation = _mm256_set_epi32(0, 0, 0, 0, 6, 4, 2, 0); |
684 | | #else |
685 | 0 | const auto zeroDot25 = _mm_set1_pd(0.25); |
686 | 0 | const auto zeroDot5 = _mm_set1_pd(0.5); |
687 | 0 | #endif |
688 | |
|
689 | 0 | constexpr int DEST_ELTS = |
690 | 0 | static_cast<int>(sizeof(zero) / sizeof(uint16_t)) / 2; |
691 | 0 | for (; iDstPixel < nDstXWidth - (DEST_ELTS - 1); iDstPixel += DEST_ELTS) |
692 | 0 | { |
693 | | // Load 8 UInt16 from each line |
694 | 0 | const auto firstLine = _mm_loadu_si128( |
695 | 0 | reinterpret_cast<__m128i const *>(pSrcScanlineShifted)); |
696 | 0 | const auto secondLine = |
697 | 0 | _mm_loadu_si128(reinterpret_cast<__m128i const *>( |
698 | 0 | pSrcScanlineShifted + nChunkXSize)); |
699 | | |
700 | | // Detect if all of the source values fit in 14 bits. |
701 | | // because if x < 2^14, then 4 * x^2 < 2^30 which fits in a signed int32 |
702 | | // and we can do a much faster implementation. |
703 | 0 | const auto maskTmp = |
704 | 0 | _mm_srli_epi16(_mm_or_si128(firstLine, secondLine), 14); |
705 | | #if defined(__i386__) || defined(_M_IX86) |
706 | | uint64_t nMaskFitsIn14Bits = 0; |
707 | | _mm_storel_epi64( |
708 | | reinterpret_cast<__m128i *>(&nMaskFitsIn14Bits), |
709 | | _mm_packus_epi16(maskTmp, maskTmp /* could be anything */)); |
710 | | #else |
711 | 0 | const auto nMaskFitsIn14Bits = _mm_cvtsi128_si64( |
712 | 0 | _mm_packus_epi16(maskTmp, maskTmp /* could be anything */)); |
713 | 0 | #endif |
714 | 0 | if (nMaskFitsIn14Bits == 0) |
715 | 0 | { |
716 | | // Multiplication of 16 bit values and horizontal |
717 | | // addition of 32 bit results |
718 | 0 | const auto firstLineHSumSquare = |
719 | 0 | _mm_madd_epi16(firstLine, firstLine); |
720 | 0 | const auto secondLineHSumSquare = |
721 | 0 | _mm_madd_epi16(secondLine, secondLine); |
722 | | // Vertical addition |
723 | 0 | const auto sumSquares = |
724 | 0 | _mm_add_epi32(firstLineHSumSquare, secondLineHSumSquare); |
725 | | // In theory we should take sqrt(sumSquares * 0.25f) |
726 | | // but given the rounding we do, this is equivalent to |
727 | | // sqrt((sumSquares + 1)/4). This has been verified exhaustively for |
728 | | // sumSquares <= 4 * 16383^2 |
729 | 0 | const auto one32 = _mm_set1_epi32(1); |
730 | 0 | const auto sumSquaresPlusOneDiv4 = |
731 | 0 | _mm_srli_epi32(_mm_add_epi32(sumSquares, one32), 2); |
732 | | // Take square root and truncate/floor to int32 |
733 | 0 | auto rms = _mm_cvttps_epi32( |
734 | 0 | _mm_sqrt_ps(_mm_cvtepi32_ps(sumSquaresPlusOneDiv4))); |
735 | | |
736 | | // Round to upper value if it minimizes the |
737 | | // error |rms^2 - sumSquares/4| |
738 | | // if( 2 * (2 * rms * (rms + 1) + 1) < sumSquares ) |
739 | | // rms += 1; |
740 | | // which is equivalent to: |
741 | | // if( rms * rms + rms < (sumSquares+1) / 4 ) |
742 | | // rms += 1; |
743 | 0 | auto mask = |
744 | 0 | _mm_cmpgt_epi32(sumSquaresPlusOneDiv4, |
745 | 0 | _mm_add_epi32(_mm_madd_epi16(rms, rms), rms)); |
746 | 0 | rms = _mm_sub_epi32(rms, mask); |
747 | | // Pack each 32 bit RMS value to 16 bits |
748 | 0 | rms = _mm_packs_epi32(rms, rms /* could be anything */); |
749 | 0 | _mm_storel_epi64( |
750 | 0 | reinterpret_cast<__m128i *>(&pDstScanline[iDstPixel]), rms); |
751 | 0 | pSrcScanlineShifted += 2 * DEST_ELTS; |
752 | 0 | continue; |
753 | 0 | } |
754 | | |
755 | | // An approach using _mm_mullo_epi16, _mm_mulhi_epu16 before extending |
756 | | // to 32 bit would result in 4 multiplications instead of 8, but |
757 | | // mullo/mulhi have a worse throughput than mul_pd. |
758 | | |
759 | | // Extend those UInt16s as UInt32s |
760 | 0 | const auto firstLineLo = _mm_unpacklo_epi16(firstLine, zero); |
761 | 0 | const auto firstLineHi = _mm_unpackhi_epi16(firstLine, zero); |
762 | 0 | const auto secondLineLo = _mm_unpacklo_epi16(secondLine, zero); |
763 | 0 | const auto secondLineHi = _mm_unpackhi_epi16(secondLine, zero); |
764 | |
|
765 | | #ifdef __AVX2__ |
766 | | // Multiplication of 32 bit values previously converted to 64 bit double |
767 | | const auto firstLineLoDbl = SQUARE_PD(_mm256_cvtepi32_pd(firstLineLo)); |
768 | | const auto firstLineHiDbl = SQUARE_PD(_mm256_cvtepi32_pd(firstLineHi)); |
769 | | const auto secondLineLoDbl = |
770 | | SQUARE_PD(_mm256_cvtepi32_pd(secondLineLo)); |
771 | | const auto secondLineHiDbl = |
772 | | SQUARE_PD(_mm256_cvtepi32_pd(secondLineHi)); |
773 | | |
774 | | // Vertical addition of squares |
775 | | const auto sumSquaresLo = |
776 | | _mm256_add_pd(firstLineLoDbl, secondLineLoDbl); |
777 | | const auto sumSquaresHi = |
778 | | _mm256_add_pd(firstLineHiDbl, secondLineHiDbl); |
779 | | |
780 | | // Horizontal addition of squares |
781 | | const auto sumSquares = |
782 | | FIXUP_LANES(_mm256_hadd_pd(sumSquaresLo, sumSquaresHi)); |
783 | | |
784 | | const auto sumDivWeight = _mm256_mul_pd(sumSquares, zeroDot25); |
785 | | |
786 | | // Take square root and truncate/floor to int32 |
787 | | auto rms = _mm256_cvttpd_epi32(_mm256_sqrt_pd(sumDivWeight)); |
788 | | const auto rmsDouble = _mm256_cvtepi32_pd(rms); |
789 | | const auto right = _mm256_sub_pd( |
790 | | sumDivWeight, _mm256_add_pd(SQUARE_PD(rmsDouble), rmsDouble)); |
791 | | |
792 | | auto mask = |
793 | | _mm256_castpd_ps(_mm256_cmp_pd(zeroDot5, right, _CMP_LT_OS)); |
794 | | // Extract 32-bit from each of the 4 64-bit masks |
795 | | // mask = FIXUP_LANES(_mm256_shuffle_ps(mask, mask, |
796 | | // _MM_SHUFFLE(2,0,2,0))); |
797 | | mask = _mm256_permutevar8x32_ps(mask, permutation); |
798 | | const auto maskI = _mm_castps_si128(_mm256_extractf128_ps(mask, 0)); |
799 | | |
800 | | // Apply the correction |
801 | | rms = _mm_sub_epi32(rms, maskI); |
802 | | |
803 | | // Pack each 32 bit RMS value to 16 bits |
804 | | rms = _mm_packus_epi32(rms, rms /* could be anything */); |
805 | | #else |
806 | | // Multiplication of 32 bit values previously converted to 64 bit double |
807 | 0 | const auto firstLineLoLo = SQUARE_PD(_mm_cvtepi32_pd(firstLineLo)); |
808 | 0 | const auto firstLineLoHi = |
809 | 0 | SQUARE_PD(_mm_cvtepi32_pd(_mm_srli_si128(firstLineLo, 8))); |
810 | 0 | const auto firstLineHiLo = SQUARE_PD(_mm_cvtepi32_pd(firstLineHi)); |
811 | 0 | const auto firstLineHiHi = |
812 | 0 | SQUARE_PD(_mm_cvtepi32_pd(_mm_srli_si128(firstLineHi, 8))); |
813 | |
|
814 | 0 | const auto secondLineLoLo = SQUARE_PD(_mm_cvtepi32_pd(secondLineLo)); |
815 | 0 | const auto secondLineLoHi = |
816 | 0 | SQUARE_PD(_mm_cvtepi32_pd(_mm_srli_si128(secondLineLo, 8))); |
817 | 0 | const auto secondLineHiLo = SQUARE_PD(_mm_cvtepi32_pd(secondLineHi)); |
818 | 0 | const auto secondLineHiHi = |
819 | 0 | SQUARE_PD(_mm_cvtepi32_pd(_mm_srli_si128(secondLineHi, 8))); |
820 | | |
821 | | // Vertical addition of squares |
822 | 0 | const auto sumSquaresLoLo = _mm_add_pd(firstLineLoLo, secondLineLoLo); |
823 | 0 | const auto sumSquaresLoHi = _mm_add_pd(firstLineLoHi, secondLineLoHi); |
824 | 0 | const auto sumSquaresHiLo = _mm_add_pd(firstLineHiLo, secondLineHiLo); |
825 | 0 | const auto sumSquaresHiHi = _mm_add_pd(firstLineHiHi, secondLineHiHi); |
826 | | |
827 | | // Horizontal addition of squares |
828 | 0 | const auto sumSquaresLo = sse2_hadd_pd(sumSquaresLoLo, sumSquaresLoHi); |
829 | 0 | const auto sumSquaresHi = sse2_hadd_pd(sumSquaresHiLo, sumSquaresHiHi); |
830 | |
|
831 | 0 | const auto sumDivWeightLo = _mm_mul_pd(sumSquaresLo, zeroDot25); |
832 | 0 | const auto sumDivWeightHi = _mm_mul_pd(sumSquaresHi, zeroDot25); |
833 | | // Take square root and truncate/floor to int32 |
834 | 0 | const auto rmsLo = _mm_cvttpd_epi32(_mm_sqrt_pd(sumDivWeightLo)); |
835 | 0 | const auto rmsHi = _mm_cvttpd_epi32(_mm_sqrt_pd(sumDivWeightHi)); |
836 | | |
837 | | // Correctly round rms to minimize | rms^2 - sumSquares / 4 | |
838 | | // if( 0.5 < sumDivWeight - (rms * rms + rms) ) |
839 | | // rms += 1; |
840 | 0 | const auto rmsLoDouble = _mm_cvtepi32_pd(rmsLo); |
841 | 0 | const auto rmsHiDouble = _mm_cvtepi32_pd(rmsHi); |
842 | 0 | const auto rightLo = _mm_sub_pd( |
843 | 0 | sumDivWeightLo, _mm_add_pd(SQUARE_PD(rmsLoDouble), rmsLoDouble)); |
844 | 0 | const auto rightHi = _mm_sub_pd( |
845 | 0 | sumDivWeightHi, _mm_add_pd(SQUARE_PD(rmsHiDouble), rmsHiDouble)); |
846 | |
|
847 | 0 | const auto maskLo = _mm_castpd_ps(_mm_cmplt_pd(zeroDot5, rightLo)); |
848 | 0 | const auto maskHi = _mm_castpd_ps(_mm_cmplt_pd(zeroDot5, rightHi)); |
849 | | // The value of the mask will be -1 when the correction needs to be |
850 | | // applied |
851 | 0 | const auto mask = _mm_castps_si128(_mm_shuffle_ps( |
852 | 0 | maskLo, maskHi, (0 << 0) | (2 << 2) | (0 << 4) | (2 << 6))); |
853 | |
|
854 | 0 | auto rms = _mm_castps_si128( |
855 | 0 | _mm_movelh_ps(_mm_castsi128_ps(rmsLo), _mm_castsi128_ps(rmsHi))); |
856 | | // Apply the correction |
857 | 0 | rms = _mm_sub_epi32(rms, mask); |
858 | | |
859 | | // Pack each 32 bit RMS value to 16 bits |
860 | 0 | rms = sse2_packus_epi32(rms, rms /* could be anything */); |
861 | 0 | #endif |
862 | |
|
863 | 0 | _mm_storel_epi64(reinterpret_cast<__m128i *>(&pDstScanline[iDstPixel]), |
864 | 0 | rms); |
865 | 0 | pSrcScanlineShifted += 2 * DEST_ELTS; |
866 | 0 | } |
867 | |
|
868 | 0 | pSrcScanlineShiftedInOut = pSrcScanlineShifted; |
869 | 0 | return iDstPixel; |
870 | 0 | } |
871 | | |
872 | | /************************************************************************/ |
873 | | /* AverageUInt16SSE2() */ |
874 | | /************************************************************************/ |
875 | | |
876 | | static int |
877 | | AverageUInt16SSE2(int nDstXWidth, int nChunkXSize, |
878 | | const uint16_t *&CPL_RESTRICT pSrcScanlineShiftedInOut, |
879 | | uint16_t *CPL_RESTRICT pDstScanline) |
880 | 0 | { |
881 | | // Optimized implementation for average on UInt16 by |
882 | | // processing by group of 8 output pixels. |
883 | |
|
884 | 0 | const auto mask = _mm_set1_epi32(0xFFFF); |
885 | 0 | const auto two = _mm_set1_epi32(2); |
886 | 0 | const uint16_t *CPL_RESTRICT pSrcScanlineShifted = pSrcScanlineShiftedInOut; |
887 | |
|
888 | 0 | int iDstPixel = 0; |
889 | 0 | constexpr int DEST_ELTS = static_cast<int>(sizeof(mask) / sizeof(uint16_t)); |
890 | 0 | for (; iDstPixel < nDstXWidth - (DEST_ELTS - 1); iDstPixel += DEST_ELTS) |
891 | 0 | { |
892 | 0 | __m128i averageLow; |
893 | | // Load 8 UInt16 from each line |
894 | 0 | { |
895 | 0 | const auto firstLine = _mm_loadu_si128( |
896 | 0 | reinterpret_cast<__m128i const *>(pSrcScanlineShifted)); |
897 | 0 | const auto secondLine = |
898 | 0 | _mm_loadu_si128(reinterpret_cast<__m128i const *>( |
899 | 0 | pSrcScanlineShifted + nChunkXSize)); |
900 | | |
901 | | // Horizontal addition and extension to 32 bit |
902 | 0 | const auto horizAddFirstLine = _mm_add_epi32( |
903 | 0 | _mm_and_si128(firstLine, mask), _mm_srli_epi32(firstLine, 16)); |
904 | 0 | const auto horizAddSecondLine = |
905 | 0 | _mm_add_epi32(_mm_and_si128(secondLine, mask), |
906 | 0 | _mm_srli_epi32(secondLine, 16)); |
907 | | |
908 | | // Vertical addition and average computation |
909 | | // average = (sum + 2) >> 2 |
910 | 0 | const auto sum = _mm_add_epi32( |
911 | 0 | _mm_add_epi32(horizAddFirstLine, horizAddSecondLine), two); |
912 | 0 | averageLow = _mm_srli_epi32(sum, 2); |
913 | 0 | } |
914 | | // Load 8 UInt16 from each line |
915 | 0 | __m128i averageHigh; |
916 | 0 | { |
917 | 0 | const auto firstLine = |
918 | 0 | _mm_loadu_si128(reinterpret_cast<__m128i const *>( |
919 | 0 | pSrcScanlineShifted + DEST_ELTS)); |
920 | 0 | const auto secondLine = |
921 | 0 | _mm_loadu_si128(reinterpret_cast<__m128i const *>( |
922 | 0 | pSrcScanlineShifted + DEST_ELTS + nChunkXSize)); |
923 | | |
924 | | // Horizontal addition and extension to 32 bit |
925 | 0 | const auto horizAddFirstLine = _mm_add_epi32( |
926 | 0 | _mm_and_si128(firstLine, mask), _mm_srli_epi32(firstLine, 16)); |
927 | 0 | const auto horizAddSecondLine = |
928 | 0 | _mm_add_epi32(_mm_and_si128(secondLine, mask), |
929 | 0 | _mm_srli_epi32(secondLine, 16)); |
930 | | |
931 | | // Vertical addition and average computation |
932 | | // average = (sum + 2) >> 2 |
933 | 0 | const auto sum = _mm_add_epi32( |
934 | 0 | _mm_add_epi32(horizAddFirstLine, horizAddSecondLine), two); |
935 | 0 | averageHigh = _mm_srli_epi32(sum, 2); |
936 | 0 | } |
937 | | |
938 | | // Pack each 32 bit average value to 16 bits |
939 | 0 | auto average = sse2_packus_epi32(averageLow, averageHigh); |
940 | 0 | _mm_storeu_si128(reinterpret_cast<__m128i *>(&pDstScanline[iDstPixel]), |
941 | 0 | average); |
942 | 0 | pSrcScanlineShifted += 2 * DEST_ELTS; |
943 | 0 | } |
944 | |
|
945 | 0 | pSrcScanlineShiftedInOut = pSrcScanlineShifted; |
946 | 0 | return iDstPixel; |
947 | 0 | } |
948 | | |
949 | | /************************************************************************/ |
950 | | /* QuadraticMeanFloatSSE2() */ |
951 | | /************************************************************************/ |
952 | | |
953 | | #if !defined(ARM_V7) |
954 | | |
955 | | #ifdef __SSE3__ |
956 | | #define sse2_hadd_ps _mm_hadd_ps |
957 | | #else |
958 | | inline __m128 sse2_hadd_ps(__m128 a, __m128 b) |
959 | 0 | { |
960 | 0 | auto aEven_bEven = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 0, 2, 0)); |
961 | 0 | auto aOdd_bOdd = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3, 1, 3, 1)); |
962 | 0 | return _mm_add_ps(aEven_bEven, aOdd_bOdd); // (aEven + aOdd, bEven + bOdd) |
963 | 0 | } |
964 | | #endif |
965 | | |
966 | | #ifdef __AVX2__ |
967 | | #define set1_ps _mm256_set1_ps |
968 | | #define loadu_ps _mm256_loadu_ps |
969 | | #define andnot_ps _mm256_andnot_ps |
970 | | #define and_ps _mm256_and_ps |
971 | | #define max_ps _mm256_max_ps |
972 | | #define shuffle_ps _mm256_shuffle_ps |
973 | | #define div_ps _mm256_div_ps |
974 | | #define cmpeq_ps(x, y) _mm256_cmp_ps((x), (y), _CMP_EQ_OQ) |
975 | | #define mul_ps _mm256_mul_ps |
976 | | #define add_ps _mm256_add_ps |
977 | | #define hadd_ps _mm256_hadd_ps |
978 | | #define sqrt_ps _mm256_sqrt_ps |
979 | | #define or_ps _mm256_or_ps |
980 | | #define unpacklo_ps _mm256_unpacklo_ps |
981 | | #define unpackhi_ps _mm256_unpackhi_ps |
982 | | #define storeu_ps _mm256_storeu_ps |
983 | | #define blendv_ps _mm256_blendv_ps |
984 | | |
985 | | inline __m256 SQUARE_PS(__m256 x) |
986 | | { |
987 | | return _mm256_mul_ps(x, x); |
988 | | } |
989 | | |
990 | | #else |
991 | | |
992 | 0 | #define set1_ps _mm_set1_ps |
993 | 0 | #define loadu_ps _mm_loadu_ps |
994 | 0 | #define andnot_ps _mm_andnot_ps |
995 | | #define and_ps _mm_and_ps |
996 | 0 | #define max_ps _mm_max_ps |
997 | 0 | #define shuffle_ps _mm_shuffle_ps |
998 | 0 | #define div_ps _mm_div_ps |
999 | 0 | #define cmpeq_ps _mm_cmpeq_ps |
1000 | 0 | #define mul_ps _mm_mul_ps |
1001 | 0 | #define add_ps _mm_add_ps |
1002 | | #define hadd_ps sse2_hadd_ps |
1003 | 0 | #define sqrt_ps _mm_sqrt_ps |
1004 | | #define or_ps _mm_or_ps |
1005 | | #define unpacklo_ps _mm_unpacklo_ps |
1006 | | #define unpackhi_ps _mm_unpackhi_ps |
1007 | 0 | #define storeu_ps _mm_storeu_ps |
1008 | | |
1009 | | inline __m128 blendv_ps(__m128 a, __m128 b, __m128 mask) |
1010 | | { |
1011 | | #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS) |
1012 | | return _mm_blendv_ps(a, b, mask); |
1013 | | #else |
1014 | | return _mm_or_ps(_mm_andnot_ps(mask, a), _mm_and_ps(mask, b)); |
1015 | | #endif |
1016 | | } |
1017 | | |
1018 | | inline __m128 SQUARE_PS(__m128 x) |
1019 | 0 | { |
1020 | 0 | return _mm_mul_ps(x, x); |
1021 | 0 | } |
1022 | | |
1023 | | inline __m128 FIXUP_LANES(__m128 x) |
1024 | 0 | { |
1025 | 0 | return x; |
1026 | 0 | } |
1027 | | |
1028 | | #endif |
1029 | | |
1030 | | static int |
1031 | | #if defined(__GNUC__) |
1032 | | __attribute__((noinline)) |
1033 | | #endif |
1034 | | QuadraticMeanFloatSSE2(int nDstXWidth, int nChunkXSize, |
1035 | | const float *&CPL_RESTRICT pSrcScanlineShiftedInOut, |
1036 | | float *CPL_RESTRICT pDstScanline) |
1037 | 0 | { |
1038 | | // Optimized implementation for RMS on Float32 by |
1039 | | // processing by group of output pixels. |
1040 | 0 | const float *CPL_RESTRICT pSrcScanlineShifted = pSrcScanlineShiftedInOut; |
1041 | |
|
1042 | 0 | int iDstPixel = 0; |
1043 | 0 | const auto minus_zero = set1_ps(-0.0f); |
1044 | 0 | const auto zeroDot25 = set1_ps(0.25f); |
1045 | 0 | const auto one = set1_ps(1.0f); |
1046 | 0 | const auto infv = set1_ps(std::numeric_limits<float>::infinity()); |
1047 | 0 | constexpr int DEST_ELTS = static_cast<int>(sizeof(one) / sizeof(float)); |
1048 | |
|
1049 | 0 | for (; iDstPixel < nDstXWidth - (DEST_ELTS - 1); iDstPixel += DEST_ELTS) |
1050 | 0 | { |
1051 | | // Load 2*DEST_ELTS Float32 from each line |
1052 | 0 | auto firstLineLo = loadu_ps(pSrcScanlineShifted); |
1053 | 0 | auto firstLineHi = loadu_ps(pSrcScanlineShifted + DEST_ELTS); |
1054 | 0 | auto secondLineLo = loadu_ps(pSrcScanlineShifted + nChunkXSize); |
1055 | 0 | auto secondLineHi = |
1056 | 0 | loadu_ps(pSrcScanlineShifted + DEST_ELTS + nChunkXSize); |
1057 | | |
1058 | | // Take the absolute value |
1059 | 0 | firstLineLo = andnot_ps(minus_zero, firstLineLo); |
1060 | 0 | firstLineHi = andnot_ps(minus_zero, firstLineHi); |
1061 | 0 | secondLineLo = andnot_ps(minus_zero, secondLineLo); |
1062 | 0 | secondLineHi = andnot_ps(minus_zero, secondLineHi); |
1063 | |
|
1064 | 0 | auto firstLineEven = |
1065 | 0 | shuffle_ps(firstLineLo, firstLineHi, _MM_SHUFFLE(2, 0, 2, 0)); |
1066 | 0 | auto firstLineOdd = |
1067 | 0 | shuffle_ps(firstLineLo, firstLineHi, _MM_SHUFFLE(3, 1, 3, 1)); |
1068 | 0 | auto secondLineEven = |
1069 | 0 | shuffle_ps(secondLineLo, secondLineHi, _MM_SHUFFLE(2, 0, 2, 0)); |
1070 | 0 | auto secondLineOdd = |
1071 | 0 | shuffle_ps(secondLineLo, secondLineHi, _MM_SHUFFLE(3, 1, 3, 1)); |
1072 | | |
1073 | | // Compute the maximum of each DEST_ELTS value to RMS-average |
1074 | 0 | const auto maxV = max_ps(max_ps(firstLineEven, firstLineOdd), |
1075 | 0 | max_ps(secondLineEven, secondLineEven)); |
1076 | | |
1077 | | // Normalize each value by the maximum of the DEST_ELTS ones. |
1078 | | // This step is important to avoid that the square evaluates to infinity |
1079 | | // for sufficiently big input. |
1080 | 0 | auto invMax = div_ps(one, maxV); |
1081 | | // Deal with 0 being the maximum to correct division by zero |
1082 | | // note: comparing to -0 leads to identical results as to comparing with |
1083 | | // 0 |
1084 | 0 | invMax = andnot_ps(cmpeq_ps(maxV, minus_zero), invMax); |
1085 | |
|
1086 | 0 | firstLineEven = mul_ps(firstLineEven, invMax); |
1087 | 0 | firstLineOdd = mul_ps(firstLineOdd, invMax); |
1088 | 0 | secondLineEven = mul_ps(secondLineEven, invMax); |
1089 | 0 | secondLineOdd = mul_ps(secondLineOdd, invMax); |
1090 | | |
1091 | | // Compute squares |
1092 | 0 | firstLineEven = SQUARE_PS(firstLineEven); |
1093 | 0 | firstLineOdd = SQUARE_PS(firstLineOdd); |
1094 | 0 | secondLineEven = SQUARE_PS(secondLineEven); |
1095 | 0 | secondLineOdd = SQUARE_PS(secondLineOdd); |
1096 | |
|
1097 | 0 | const auto sumSquares = add_ps(add_ps(firstLineEven, firstLineOdd), |
1098 | 0 | add_ps(secondLineEven, secondLineOdd)); |
1099 | |
|
1100 | 0 | auto rms = mul_ps(maxV, sqrt_ps(mul_ps(sumSquares, zeroDot25))); |
1101 | | |
1102 | | // Deal with infinity being the maximum |
1103 | 0 | const auto maskIsInf = cmpeq_ps(maxV, infv); |
1104 | 0 | rms = blendv_ps(rms, infv, maskIsInf); |
1105 | |
|
1106 | 0 | rms = FIXUP_LANES(rms); |
1107 | |
|
1108 | 0 | storeu_ps(&pDstScanline[iDstPixel], rms); |
1109 | 0 | pSrcScanlineShifted += DEST_ELTS * 2; |
1110 | 0 | } |
1111 | |
|
1112 | 0 | pSrcScanlineShiftedInOut = pSrcScanlineShifted; |
1113 | 0 | return iDstPixel; |
1114 | 0 | } |
1115 | | |
1116 | | /************************************************************************/ |
1117 | | /* AverageFloatSSE2() */ |
1118 | | /************************************************************************/ |
1119 | | |
1120 | | static int AverageFloatSSE2(int nDstXWidth, int nChunkXSize, |
1121 | | const float *&CPL_RESTRICT pSrcScanlineShiftedInOut, |
1122 | | float *CPL_RESTRICT pDstScanline) |
1123 | 0 | { |
1124 | | // Optimized implementation for average on Float32 by |
1125 | | // processing by group of output pixels. |
1126 | 0 | const float *CPL_RESTRICT pSrcScanlineShifted = pSrcScanlineShiftedInOut; |
1127 | |
|
1128 | 0 | int iDstPixel = 0; |
1129 | 0 | const auto zeroDot25 = _mm_set1_ps(0.25f); |
1130 | 0 | constexpr int DEST_ELTS = |
1131 | 0 | static_cast<int>(sizeof(zeroDot25) / sizeof(float)); |
1132 | |
|
1133 | 0 | for (; iDstPixel < nDstXWidth - (DEST_ELTS - 1); iDstPixel += DEST_ELTS) |
1134 | 0 | { |
1135 | | // Load 2 * DEST_ELTS Float32 from each line |
1136 | 0 | const auto firstLineLo = |
1137 | 0 | _mm_mul_ps(_mm_loadu_ps(pSrcScanlineShifted), zeroDot25); |
1138 | 0 | const auto firstLineHi = _mm_mul_ps( |
1139 | 0 | _mm_loadu_ps(pSrcScanlineShifted + DEST_ELTS), zeroDot25); |
1140 | 0 | const auto secondLineLo = _mm_mul_ps( |
1141 | 0 | _mm_loadu_ps(pSrcScanlineShifted + nChunkXSize), zeroDot25); |
1142 | 0 | const auto secondLineHi = _mm_mul_ps( |
1143 | 0 | _mm_loadu_ps(pSrcScanlineShifted + DEST_ELTS + nChunkXSize), |
1144 | 0 | zeroDot25); |
1145 | | |
1146 | | // Vertical addition |
1147 | 0 | const auto tmpLo = _mm_add_ps(firstLineLo, secondLineLo); |
1148 | 0 | const auto tmpHi = _mm_add_ps(firstLineHi, secondLineHi); |
1149 | | |
1150 | | // Horizontal addition |
1151 | 0 | const auto average = sse2_hadd_ps(tmpLo, tmpHi); |
1152 | |
|
1153 | 0 | _mm_storeu_ps(&pDstScanline[iDstPixel], average); |
1154 | 0 | pSrcScanlineShifted += DEST_ELTS * 2; |
1155 | 0 | } |
1156 | |
|
1157 | 0 | pSrcScanlineShiftedInOut = pSrcScanlineShifted; |
1158 | 0 | return iDstPixel; |
1159 | 0 | } |
1160 | | |
1161 | | /************************************************************************/ |
1162 | | /* AverageDoubleSSE2() */ |
1163 | | /************************************************************************/ |
1164 | | |
1165 | | static int |
1166 | | AverageDoubleSSE2(int nDstXWidth, int nChunkXSize, |
1167 | | const double *&CPL_RESTRICT pSrcScanlineShiftedInOut, |
1168 | | double *CPL_RESTRICT pDstScanline) |
1169 | 0 | { |
1170 | | // Optimized implementation for average on Float64 by |
1171 | | // processing by group of output pixels. |
1172 | 0 | const double *CPL_RESTRICT pSrcScanlineShifted = pSrcScanlineShiftedInOut; |
1173 | |
|
1174 | 0 | int iDstPixel = 0; |
1175 | 0 | const auto zeroDot25 = _mm_set1_pd(0.25); |
1176 | 0 | constexpr int DEST_ELTS = |
1177 | 0 | static_cast<int>(sizeof(zeroDot25) / sizeof(double)); |
1178 | |
|
1179 | 0 | for (; iDstPixel < nDstXWidth - (DEST_ELTS - 1); iDstPixel += DEST_ELTS) |
1180 | 0 | { |
1181 | | // Load 4 * DEST_ELTS Float64 from each line |
1182 | 0 | const auto firstLine0 = _mm_mul_pd( |
1183 | 0 | _mm_loadu_pd(pSrcScanlineShifted + 0 * DEST_ELTS), zeroDot25); |
1184 | 0 | const auto firstLine1 = _mm_mul_pd( |
1185 | 0 | _mm_loadu_pd(pSrcScanlineShifted + 1 * DEST_ELTS), zeroDot25); |
1186 | 0 | const auto secondLine0 = _mm_mul_pd( |
1187 | 0 | _mm_loadu_pd(pSrcScanlineShifted + 0 * DEST_ELTS + nChunkXSize), |
1188 | 0 | zeroDot25); |
1189 | 0 | const auto secondLine1 = _mm_mul_pd( |
1190 | 0 | _mm_loadu_pd(pSrcScanlineShifted + 1 * DEST_ELTS + nChunkXSize), |
1191 | 0 | zeroDot25); |
1192 | | |
1193 | | // Vertical addition |
1194 | 0 | const auto tmp0 = _mm_add_pd(firstLine0, secondLine0); |
1195 | 0 | const auto tmp1 = _mm_add_pd(firstLine1, secondLine1); |
1196 | | |
1197 | | // Horizontal addition |
1198 | 0 | const auto average0 = sse2_hadd_pd(tmp0, tmp1); |
1199 | |
|
1200 | 0 | _mm_storeu_pd(&pDstScanline[iDstPixel + 0], average0); |
1201 | 0 | pSrcScanlineShifted += DEST_ELTS * 2; |
1202 | 0 | } |
1203 | |
|
1204 | 0 | pSrcScanlineShiftedInOut = pSrcScanlineShifted; |
1205 | 0 | return iDstPixel; |
1206 | 0 | } |
1207 | | |
1208 | | #endif |
1209 | | |
1210 | | #endif |
1211 | | |
1212 | | /************************************************************************/ |
1213 | | /* GDALResampleChunk_AverageOrRMS() */ |
1214 | | /************************************************************************/ |
1215 | | |
1216 | | template <class T, class Tsum, GDALDataType eWrkDataType, bool bQuadraticMean> |
1217 | | static CPLErr |
1218 | | GDALResampleChunk_AverageOrRMS_T(const GDALOverviewResampleArgs &args, |
1219 | | const T *pChunk, void **ppDstBuffer) |
1220 | 0 | { |
1221 | 0 | const double dfXRatioDstToSrc = args.dfXRatioDstToSrc; |
1222 | 0 | const double dfYRatioDstToSrc = args.dfYRatioDstToSrc; |
1223 | 0 | const double dfSrcXDelta = args.dfSrcXDelta; |
1224 | 0 | const double dfSrcYDelta = args.dfSrcYDelta; |
1225 | 0 | const GByte *pabyChunkNodataMask = args.pabyChunkNodataMask; |
1226 | 0 | const int nChunkXOff = args.nChunkXOff; |
1227 | 0 | const int nChunkYOff = args.nChunkYOff; |
1228 | 0 | const int nChunkXSize = args.nChunkXSize; |
1229 | 0 | const int nChunkYSize = args.nChunkYSize; |
1230 | 0 | const int nDstXOff = args.nDstXOff; |
1231 | 0 | const int nDstXOff2 = args.nDstXOff2; |
1232 | 0 | const int nDstYOff = args.nDstYOff; |
1233 | 0 | const int nDstYOff2 = args.nDstYOff2; |
1234 | 0 | const char *pszResampling = args.pszResampling; |
1235 | 0 | bool bHasNoData = args.bHasNoData; |
1236 | 0 | const double dfNoDataValue = args.dfNoDataValue; |
1237 | 0 | const GDALColorTable *const poColorTable = |
1238 | 0 | !bQuadraticMean && |
1239 | | // AVERAGE_BIT2GRAYSCALE |
1240 | 0 | CPL_TO_BOOL(STARTS_WITH_CI(pszResampling, "AVERAGE_BIT2G")) |
1241 | 0 | ? nullptr |
1242 | 0 | : args.poColorTable; |
1243 | 0 | const bool bPropagateNoData = args.bPropagateNoData; |
1244 | |
|
1245 | 0 | T tNoDataValue = (!bHasNoData) ? 0 : static_cast<T>(dfNoDataValue); |
1246 | 0 | const T tReplacementVal = |
1247 | 0 | bHasNoData ? static_cast<T>(GDALGetNoDataReplacementValue( |
1248 | 0 | args.eOvrDataType, dfNoDataValue)) |
1249 | 0 | : 0; |
1250 | |
|
1251 | 0 | const int nChunkRightXOff = nChunkXOff + nChunkXSize; |
1252 | 0 | const int nChunkBottomYOff = nChunkYOff + nChunkYSize; |
1253 | 0 | const int nDstXWidth = nDstXOff2 - nDstXOff; |
1254 | | |
1255 | | /* -------------------------------------------------------------------- */ |
1256 | | /* Allocate buffers. */ |
1257 | | /* -------------------------------------------------------------------- */ |
1258 | 0 | *ppDstBuffer = static_cast<T *>( |
1259 | 0 | VSI_MALLOC3_VERBOSE(nDstXWidth, nDstYOff2 - nDstYOff, |
1260 | 0 | GDALGetDataTypeSizeBytes(eWrkDataType))); |
1261 | 0 | if (*ppDstBuffer == nullptr) |
1262 | 0 | { |
1263 | 0 | return CE_Failure; |
1264 | 0 | } |
1265 | 0 | T *const pDstBuffer = static_cast<T *>(*ppDstBuffer); |
1266 | |
|
1267 | 0 | struct PrecomputedXValue |
1268 | 0 | { |
1269 | 0 | int nLeftXOffShifted; |
1270 | 0 | int nRightXOffShifted; |
1271 | 0 | double dfLeftWeight; |
1272 | 0 | double dfRightWeight; |
1273 | 0 | double dfTotalWeightFullLine; |
1274 | 0 | }; |
1275 | |
|
1276 | 0 | PrecomputedXValue *pasSrcX = static_cast<PrecomputedXValue *>( |
1277 | 0 | VSI_MALLOC2_VERBOSE(nDstXWidth, sizeof(PrecomputedXValue))); |
1278 | |
|
1279 | 0 | if (pasSrcX == nullptr) |
1280 | 0 | { |
1281 | 0 | return CE_Failure; |
1282 | 0 | } |
1283 | | |
1284 | 0 | std::vector<GDALColorEntry> colorEntries; |
1285 | |
|
1286 | 0 | if (poColorTable) |
1287 | 0 | { |
1288 | 0 | int nTransparentIdx = -1; |
1289 | 0 | colorEntries = ReadColorTable(*poColorTable, nTransparentIdx); |
1290 | | |
1291 | | // Force c4 of nodata entry to 0 so that GDALFindBestEntry() identifies |
1292 | | // it as nodata value |
1293 | 0 | if (bHasNoData && dfNoDataValue >= 0.0 && |
1294 | 0 | tNoDataValue < colorEntries.size()) |
1295 | 0 | colorEntries[static_cast<int>(tNoDataValue)].c4 = 0; |
1296 | | |
1297 | | // Or if we have no explicit nodata, but a color table entry that is |
1298 | | // transparent, consider it as the nodata value |
1299 | 0 | else if (!bHasNoData && nTransparentIdx >= 0) |
1300 | 0 | { |
1301 | 0 | bHasNoData = true; |
1302 | 0 | tNoDataValue = static_cast<T>(nTransparentIdx); |
1303 | 0 | } |
1304 | 0 | } |
1305 | | |
1306 | | /* ==================================================================== */ |
1307 | | /* Precompute inner loop constants. */ |
1308 | | /* ==================================================================== */ |
1309 | 0 | bool bSrcXSpacingIsTwo = true; |
1310 | 0 | int nLastSrcXOff2 = -1; |
1311 | 0 | for (int iDstPixel = nDstXOff; iDstPixel < nDstXOff2; ++iDstPixel) |
1312 | 0 | { |
1313 | 0 | const double dfSrcXOff = dfSrcXDelta + iDstPixel * dfXRatioDstToSrc; |
1314 | | // Apply some epsilon to avoid numerical precision issues |
1315 | 0 | int nSrcXOff = static_cast<int>(dfSrcXOff + 1e-8); |
1316 | 0 | const double dfSrcXOff2 = |
1317 | 0 | dfSrcXDelta + (iDstPixel + 1) * dfXRatioDstToSrc; |
1318 | 0 | int nSrcXOff2 = static_cast<int>(ceil(dfSrcXOff2 - 1e-8)); |
1319 | |
|
1320 | 0 | if (nSrcXOff < nChunkXOff) |
1321 | 0 | nSrcXOff = nChunkXOff; |
1322 | 0 | if (nSrcXOff2 == nSrcXOff) |
1323 | 0 | nSrcXOff2++; |
1324 | 0 | if (nSrcXOff2 > nChunkRightXOff) |
1325 | 0 | nSrcXOff2 = nChunkRightXOff; |
1326 | |
|
1327 | 0 | pasSrcX[iDstPixel - nDstXOff].nLeftXOffShifted = nSrcXOff - nChunkXOff; |
1328 | 0 | pasSrcX[iDstPixel - nDstXOff].nRightXOffShifted = |
1329 | 0 | nSrcXOff2 - nChunkXOff; |
1330 | 0 | pasSrcX[iDstPixel - nDstXOff].dfLeftWeight = |
1331 | 0 | (nSrcXOff2 == nSrcXOff + 1) ? 1.0 : 1 - (dfSrcXOff - nSrcXOff); |
1332 | 0 | pasSrcX[iDstPixel - nDstXOff].dfRightWeight = |
1333 | 0 | 1 - (nSrcXOff2 - dfSrcXOff2); |
1334 | 0 | pasSrcX[iDstPixel - nDstXOff].dfTotalWeightFullLine = |
1335 | 0 | pasSrcX[iDstPixel - nDstXOff].dfLeftWeight; |
1336 | 0 | if (nSrcXOff + 1 < nSrcXOff2) |
1337 | 0 | { |
1338 | 0 | pasSrcX[iDstPixel - nDstXOff].dfTotalWeightFullLine += |
1339 | 0 | nSrcXOff2 - nSrcXOff - 2; |
1340 | 0 | pasSrcX[iDstPixel - nDstXOff].dfTotalWeightFullLine += |
1341 | 0 | pasSrcX[iDstPixel - nDstXOff].dfRightWeight; |
1342 | 0 | } |
1343 | |
|
1344 | 0 | if (nSrcXOff2 - nSrcXOff != 2 || |
1345 | 0 | (nLastSrcXOff2 >= 0 && nLastSrcXOff2 != nSrcXOff)) |
1346 | 0 | { |
1347 | 0 | bSrcXSpacingIsTwo = false; |
1348 | 0 | } |
1349 | 0 | nLastSrcXOff2 = nSrcXOff2; |
1350 | 0 | } |
1351 | | |
1352 | | /* ==================================================================== */ |
1353 | | /* Loop over destination scanlines. */ |
1354 | | /* ==================================================================== */ |
1355 | 0 | for (int iDstLine = nDstYOff; iDstLine < nDstYOff2; ++iDstLine) |
1356 | 0 | { |
1357 | 0 | const double dfSrcYOff = dfSrcYDelta + iDstLine * dfYRatioDstToSrc; |
1358 | 0 | int nSrcYOff = static_cast<int>(dfSrcYOff + 1e-8); |
1359 | 0 | if (nSrcYOff < nChunkYOff) |
1360 | 0 | nSrcYOff = nChunkYOff; |
1361 | |
|
1362 | 0 | const double dfSrcYOff2 = |
1363 | 0 | dfSrcYDelta + (iDstLine + 1) * dfYRatioDstToSrc; |
1364 | 0 | int nSrcYOff2 = static_cast<int>(ceil(dfSrcYOff2 - 1e-8)); |
1365 | 0 | if (nSrcYOff2 == nSrcYOff) |
1366 | 0 | ++nSrcYOff2; |
1367 | 0 | if (nSrcYOff2 > nChunkBottomYOff) |
1368 | 0 | nSrcYOff2 = nChunkBottomYOff; |
1369 | |
|
1370 | 0 | T *const pDstScanline = |
1371 | 0 | pDstBuffer + static_cast<size_t>(iDstLine - nDstYOff) * nDstXWidth; |
1372 | | |
1373 | | /* -------------------------------------------------------------------- |
1374 | | */ |
1375 | | /* Loop over destination pixels */ |
1376 | | /* -------------------------------------------------------------------- |
1377 | | */ |
1378 | 0 | if (poColorTable == nullptr) |
1379 | 0 | { |
1380 | 0 | if (bSrcXSpacingIsTwo && nSrcYOff2 == nSrcYOff + 2 && |
1381 | 0 | pabyChunkNodataMask == nullptr) |
1382 | 0 | { |
1383 | | if constexpr (eWrkDataType == GDT_Byte || |
1384 | | eWrkDataType == GDT_UInt16) |
1385 | 0 | { |
1386 | | // Optimized case : no nodata, overview by a factor of 2 and |
1387 | | // regular x and y src spacing. |
1388 | 0 | const T *pSrcScanlineShifted = |
1389 | 0 | pChunk + pasSrcX[0].nLeftXOffShifted + |
1390 | 0 | static_cast<size_t>(nSrcYOff - nChunkYOff) * |
1391 | 0 | nChunkXSize; |
1392 | 0 | int iDstPixel = 0; |
1393 | 0 | #ifdef USE_SSE2 |
1394 | | if constexpr (eWrkDataType == GDT_Byte) |
1395 | 0 | { |
1396 | | if constexpr (bQuadraticMean) |
1397 | 0 | { |
1398 | 0 | iDstPixel = QuadraticMeanByteSSE2OrAVX2( |
1399 | 0 | nDstXWidth, nChunkXSize, pSrcScanlineShifted, |
1400 | 0 | pDstScanline); |
1401 | | } |
1402 | | else |
1403 | 0 | { |
1404 | 0 | iDstPixel = AverageByteSSE2OrAVX2( |
1405 | 0 | nDstXWidth, nChunkXSize, pSrcScanlineShifted, |
1406 | 0 | pDstScanline); |
1407 | 0 | } |
1408 | | } |
1409 | | else |
1410 | 0 | { |
1411 | 0 | static_assert(eWrkDataType == GDT_UInt16); |
1412 | | if constexpr (bQuadraticMean) |
1413 | 0 | { |
1414 | 0 | iDstPixel = QuadraticMeanUInt16SSE2( |
1415 | 0 | nDstXWidth, nChunkXSize, pSrcScanlineShifted, |
1416 | 0 | pDstScanline); |
1417 | | } |
1418 | | else |
1419 | 0 | { |
1420 | 0 | iDstPixel = AverageUInt16SSE2( |
1421 | 0 | nDstXWidth, nChunkXSize, pSrcScanlineShifted, |
1422 | 0 | pDstScanline); |
1423 | 0 | } |
1424 | 0 | } |
1425 | 0 | #endif |
1426 | 0 | for (; iDstPixel < nDstXWidth; ++iDstPixel) |
1427 | 0 | { |
1428 | 0 | Tsum nTotal = 0; |
1429 | 0 | T nVal; |
1430 | | if constexpr (bQuadraticMean) |
1431 | 0 | nTotal = |
1432 | 0 | SQUARE<Tsum>(pSrcScanlineShifted[0]) + |
1433 | 0 | SQUARE<Tsum>(pSrcScanlineShifted[1]) + |
1434 | 0 | SQUARE<Tsum>(pSrcScanlineShifted[nChunkXSize]) + |
1435 | 0 | SQUARE<Tsum>( |
1436 | | pSrcScanlineShifted[1 + nChunkXSize]); |
1437 | | else |
1438 | 0 | nTotal = pSrcScanlineShifted[0] + |
1439 | 0 | pSrcScanlineShifted[1] + |
1440 | 0 | pSrcScanlineShifted[nChunkXSize] + |
1441 | 0 | pSrcScanlineShifted[1 + nChunkXSize]; |
1442 | |
|
1443 | 0 | constexpr int nTotalWeight = 4; |
1444 | | if constexpr (bQuadraticMean) |
1445 | 0 | nVal = ComputeIntegerRMS_4values<T>(nTotal); |
1446 | | else |
1447 | 0 | nVal = static_cast<T>((nTotal + nTotalWeight / 2) / |
1448 | 0 | nTotalWeight); |
1449 | | |
1450 | | // No need to compare nVal against tNoDataValue as we |
1451 | | // are in a case where pabyChunkNodataMask == nullptr |
1452 | | // implies the absence of nodata value. |
1453 | 0 | pDstScanline[iDstPixel] = nVal; |
1454 | 0 | pSrcScanlineShifted += 2; |
1455 | 0 | } |
1456 | | } |
1457 | | else |
1458 | 0 | { |
1459 | 0 | static_assert(eWrkDataType == GDT_Float32 || |
1460 | 0 | eWrkDataType == GDT_Float64); |
1461 | 0 | const T *pSrcScanlineShifted = |
1462 | 0 | pChunk + pasSrcX[0].nLeftXOffShifted + |
1463 | 0 | static_cast<size_t>(nSrcYOff - nChunkYOff) * |
1464 | 0 | nChunkXSize; |
1465 | 0 | int iDstPixel = 0; |
1466 | 0 | #if defined(USE_SSE2) && !defined(ARM_V7) |
1467 | | if constexpr (eWrkDataType == GDT_Float32) |
1468 | 0 | { |
1469 | 0 | static_assert(std::is_same_v<T, float>); |
1470 | | if constexpr (bQuadraticMean) |
1471 | 0 | { |
1472 | 0 | iDstPixel = QuadraticMeanFloatSSE2( |
1473 | 0 | nDstXWidth, nChunkXSize, pSrcScanlineShifted, |
1474 | 0 | pDstScanline); |
1475 | | } |
1476 | | else |
1477 | 0 | { |
1478 | 0 | iDstPixel = AverageFloatSSE2( |
1479 | 0 | nDstXWidth, nChunkXSize, pSrcScanlineShifted, |
1480 | 0 | pDstScanline); |
1481 | 0 | } |
1482 | | } |
1483 | | else |
1484 | 0 | { |
1485 | | if constexpr (!bQuadraticMean) |
1486 | 0 | { |
1487 | 0 | iDstPixel = AverageDoubleSSE2( |
1488 | 0 | nDstXWidth, nChunkXSize, pSrcScanlineShifted, |
1489 | 0 | pDstScanline); |
1490 | 0 | } |
1491 | 0 | } |
1492 | 0 | #endif |
1493 | |
|
1494 | 0 | for (; iDstPixel < nDstXWidth; ++iDstPixel) |
1495 | 0 | { |
1496 | 0 | T nVal; |
1497 | |
|
1498 | | if constexpr (bQuadraticMean) |
1499 | 0 | { |
1500 | | // Avoid issues with large values by renormalizing |
1501 | 0 | const auto max = std::max( |
1502 | 0 | {std::fabs(pSrcScanlineShifted[0]), |
1503 | 0 | std::fabs(pSrcScanlineShifted[1]), |
1504 | 0 | std::fabs(pSrcScanlineShifted[nChunkXSize]), |
1505 | 0 | std::fabs( |
1506 | 0 | pSrcScanlineShifted[1 + nChunkXSize])}); |
1507 | 0 | if (max == 0) |
1508 | 0 | { |
1509 | 0 | nVal = 0; |
1510 | 0 | } |
1511 | 0 | else if (std::isinf(max)) |
1512 | 0 | { |
1513 | | // If there is at least one infinity value, |
1514 | | // then just summing, and taking the abs |
1515 | | // value will give the expected result: |
1516 | | // * +inf if all values are +inf |
1517 | | // * +inf if all values are -inf |
1518 | | // * NaN otherwise |
1519 | 0 | nVal = std::fabs( |
1520 | 0 | pSrcScanlineShifted[0] + |
1521 | 0 | pSrcScanlineShifted[1] + |
1522 | 0 | pSrcScanlineShifted[nChunkXSize] + |
1523 | 0 | pSrcScanlineShifted[1 + nChunkXSize]); |
1524 | 0 | } |
1525 | 0 | else |
1526 | 0 | { |
1527 | 0 | const auto inv_max = static_cast<T>(1.0) / max; |
1528 | 0 | nVal = |
1529 | 0 | max * |
1530 | 0 | std::sqrt( |
1531 | 0 | static_cast<T>(0.25) * |
1532 | 0 | (SQUARE(pSrcScanlineShifted[0] * |
1533 | 0 | inv_max) + |
1534 | 0 | SQUARE(pSrcScanlineShifted[1] * |
1535 | 0 | inv_max) + |
1536 | 0 | SQUARE( |
1537 | 0 | pSrcScanlineShifted[nChunkXSize] * |
1538 | 0 | inv_max) + |
1539 | 0 | SQUARE( |
1540 | 0 | pSrcScanlineShifted[1 + |
1541 | 0 | nChunkXSize] * |
1542 | 0 | inv_max))); |
1543 | 0 | } |
1544 | | } |
1545 | | else |
1546 | 0 | { |
1547 | 0 | constexpr auto weight = static_cast<T>(0.25); |
1548 | | // Multiply each value by weight to avoid |
1549 | | // potential overflow |
1550 | 0 | nVal = |
1551 | 0 | (weight * pSrcScanlineShifted[0] + |
1552 | 0 | weight * pSrcScanlineShifted[1] + |
1553 | 0 | weight * pSrcScanlineShifted[nChunkXSize] + |
1554 | 0 | weight * pSrcScanlineShifted[1 + nChunkXSize]); |
1555 | 0 | } |
1556 | | |
1557 | | // No need to compare nVal against tNoDataValue as we |
1558 | | // are in a case where pabyChunkNodataMask == nullptr |
1559 | | // implies the absence of nodata value. |
1560 | 0 | pDstScanline[iDstPixel] = nVal; |
1561 | 0 | pSrcScanlineShifted += 2; |
1562 | 0 | } |
1563 | 0 | } |
1564 | 0 | } |
1565 | 0 | else |
1566 | 0 | { |
1567 | 0 | const double dfBottomWeight = |
1568 | 0 | (nSrcYOff + 1 == nSrcYOff2) ? 1.0 |
1569 | 0 | : 1.0 - (dfSrcYOff - nSrcYOff); |
1570 | 0 | const double dfTopWeight = 1.0 - (nSrcYOff2 - dfSrcYOff2); |
1571 | 0 | nSrcYOff -= nChunkYOff; |
1572 | 0 | nSrcYOff2 -= nChunkYOff; |
1573 | |
|
1574 | 0 | double dfTotalWeightFullColumn = dfBottomWeight; |
1575 | 0 | if (nSrcYOff + 1 < nSrcYOff2) |
1576 | 0 | { |
1577 | 0 | dfTotalWeightFullColumn += nSrcYOff2 - nSrcYOff - 2; |
1578 | 0 | dfTotalWeightFullColumn += dfTopWeight; |
1579 | 0 | } |
1580 | |
|
1581 | 0 | for (int iDstPixel = 0; iDstPixel < nDstXWidth; ++iDstPixel) |
1582 | 0 | { |
1583 | 0 | const int nSrcXOff = pasSrcX[iDstPixel].nLeftXOffShifted; |
1584 | 0 | const int nSrcXOff2 = pasSrcX[iDstPixel].nRightXOffShifted; |
1585 | |
|
1586 | 0 | double dfTotal = 0; |
1587 | 0 | double dfTotalWeight = 0; |
1588 | 0 | [[maybe_unused]] double dfMulFactor = 1.0; |
1589 | 0 | [[maybe_unused]] double dfInvMulFactor = 1.0; |
1590 | 0 | constexpr bool bUseMulFactor = |
1591 | 0 | (eWrkDataType == GDT_Float32 || |
1592 | 0 | eWrkDataType == GDT_Float64); |
1593 | 0 | if (pabyChunkNodataMask == nullptr) |
1594 | 0 | { |
1595 | | if constexpr (bUseMulFactor) |
1596 | 0 | { |
1597 | | if constexpr (bQuadraticMean) |
1598 | 0 | { |
1599 | 0 | T mulFactor = 0; |
1600 | 0 | auto pChunkShifted = |
1601 | 0 | pChunk + |
1602 | 0 | static_cast<size_t>(nSrcYOff) * nChunkXSize; |
1603 | |
|
1604 | 0 | for (int iY = nSrcYOff; iY < nSrcYOff2; |
1605 | 0 | ++iY, pChunkShifted += nChunkXSize) |
1606 | 0 | { |
1607 | 0 | for (int iX = nSrcXOff; iX < nSrcXOff2; |
1608 | 0 | ++iX) |
1609 | 0 | mulFactor = std::max( |
1610 | 0 | mulFactor, |
1611 | 0 | std::fabs(pChunkShifted[iX])); |
1612 | 0 | } |
1613 | 0 | dfMulFactor = double(mulFactor); |
1614 | 0 | dfInvMulFactor = |
1615 | 0 | dfMulFactor > 0 && |
1616 | 0 | std::isfinite(dfMulFactor) |
1617 | 0 | ? 1.0 / dfMulFactor |
1618 | 0 | : 1.0; |
1619 | | } |
1620 | | else |
1621 | 0 | { |
1622 | 0 | dfMulFactor = (nSrcYOff2 - nSrcYOff) * |
1623 | 0 | (nSrcXOff2 - nSrcXOff); |
1624 | 0 | dfInvMulFactor = 1.0 / dfMulFactor; |
1625 | 0 | } |
1626 | 0 | } |
1627 | |
|
1628 | 0 | auto pChunkShifted = |
1629 | 0 | pChunk + |
1630 | 0 | static_cast<size_t>(nSrcYOff) * nChunkXSize; |
1631 | 0 | int nCounterY = nSrcYOff2 - nSrcYOff - 1; |
1632 | 0 | double dfWeightY = dfBottomWeight; |
1633 | 0 | while (true) |
1634 | 0 | { |
1635 | 0 | double dfTotalLine; |
1636 | | if constexpr (bQuadraticMean) |
1637 | 0 | { |
1638 | | // Left pixel |
1639 | 0 | { |
1640 | 0 | const T val = pChunkShifted[nSrcXOff]; |
1641 | 0 | dfTotalLine = |
1642 | 0 | SQUARE(double(val) * dfInvMulFactor) * |
1643 | 0 | pasSrcX[iDstPixel].dfLeftWeight; |
1644 | 0 | } |
1645 | |
|
1646 | 0 | if (nSrcXOff + 1 < nSrcXOff2) |
1647 | 0 | { |
1648 | | // Middle pixels |
1649 | 0 | for (int iX = nSrcXOff + 1; |
1650 | 0 | iX < nSrcXOff2 - 1; ++iX) |
1651 | 0 | { |
1652 | 0 | const T val = pChunkShifted[iX]; |
1653 | 0 | dfTotalLine += SQUARE(double(val) * |
1654 | 0 | dfInvMulFactor); |
1655 | 0 | } |
1656 | | |
1657 | | // Right pixel |
1658 | 0 | { |
1659 | 0 | const T val = |
1660 | 0 | pChunkShifted[nSrcXOff2 - 1]; |
1661 | 0 | dfTotalLine += |
1662 | 0 | SQUARE(double(val) * |
1663 | 0 | dfInvMulFactor) * |
1664 | 0 | pasSrcX[iDstPixel].dfRightWeight; |
1665 | 0 | } |
1666 | 0 | } |
1667 | | } |
1668 | | else |
1669 | 0 | { |
1670 | | // Left pixel |
1671 | 0 | { |
1672 | 0 | const T val = pChunkShifted[nSrcXOff]; |
1673 | 0 | dfTotalLine = |
1674 | 0 | double(val) * dfInvMulFactor * |
1675 | 0 | pasSrcX[iDstPixel].dfLeftWeight; |
1676 | 0 | } |
1677 | |
|
1678 | 0 | if (nSrcXOff + 1 < nSrcXOff2) |
1679 | 0 | { |
1680 | | // Middle pixels |
1681 | 0 | for (int iX = nSrcXOff + 1; |
1682 | 0 | iX < nSrcXOff2 - 1; ++iX) |
1683 | 0 | { |
1684 | 0 | const T val = pChunkShifted[iX]; |
1685 | 0 | dfTotalLine += |
1686 | 0 | double(val) * dfInvMulFactor; |
1687 | 0 | } |
1688 | | |
1689 | | // Right pixel |
1690 | 0 | { |
1691 | 0 | const T val = |
1692 | 0 | pChunkShifted[nSrcXOff2 - 1]; |
1693 | 0 | dfTotalLine += |
1694 | 0 | double(val) * dfInvMulFactor * |
1695 | 0 | pasSrcX[iDstPixel].dfRightWeight; |
1696 | 0 | } |
1697 | 0 | } |
1698 | 0 | } |
1699 | |
|
1700 | 0 | dfTotal += dfTotalLine * dfWeightY; |
1701 | 0 | --nCounterY; |
1702 | 0 | if (nCounterY < 0) |
1703 | 0 | break; |
1704 | 0 | pChunkShifted += nChunkXSize; |
1705 | 0 | dfWeightY = (nCounterY == 0) ? dfTopWeight : 1.0; |
1706 | 0 | } |
1707 | |
|
1708 | 0 | dfTotalWeight = |
1709 | 0 | pasSrcX[iDstPixel].dfTotalWeightFullLine * |
1710 | 0 | dfTotalWeightFullColumn; |
1711 | 0 | } |
1712 | 0 | else |
1713 | 0 | { |
1714 | 0 | size_t nCount = 0; |
1715 | 0 | for (int iY = nSrcYOff; iY < nSrcYOff2; ++iY) |
1716 | 0 | { |
1717 | 0 | const auto pChunkShifted = |
1718 | 0 | pChunk + static_cast<size_t>(iY) * nChunkXSize; |
1719 | |
|
1720 | 0 | double dfTotalLine = 0; |
1721 | 0 | double dfTotalWeightLine = 0; |
1722 | | // Left pixel |
1723 | 0 | { |
1724 | 0 | const int iX = nSrcXOff; |
1725 | 0 | const T val = pChunkShifted[iX]; |
1726 | 0 | if (pabyChunkNodataMask |
1727 | 0 | [iX + |
1728 | 0 | static_cast<size_t>(iY) * nChunkXSize]) |
1729 | 0 | { |
1730 | 0 | nCount++; |
1731 | 0 | const double dfWeightX = |
1732 | 0 | pasSrcX[iDstPixel].dfLeftWeight; |
1733 | 0 | dfTotalWeightLine = dfWeightX; |
1734 | | if constexpr (bQuadraticMean) |
1735 | 0 | dfTotalLine = |
1736 | | SQUARE(double(val)) * dfWeightX; |
1737 | | else |
1738 | 0 | dfTotalLine = double(val) * dfWeightX; |
1739 | 0 | } |
1740 | 0 | } |
1741 | |
|
1742 | 0 | if (nSrcXOff < nSrcXOff2 - 1) |
1743 | 0 | { |
1744 | | // Middle pixels |
1745 | 0 | for (int iX = nSrcXOff + 1; iX < nSrcXOff2 - 1; |
1746 | 0 | ++iX) |
1747 | 0 | { |
1748 | 0 | const T val = pChunkShifted[iX]; |
1749 | 0 | if (pabyChunkNodataMask |
1750 | 0 | [iX + static_cast<size_t>(iY) * |
1751 | 0 | nChunkXSize]) |
1752 | 0 | { |
1753 | 0 | nCount++; |
1754 | 0 | dfTotalWeightLine += 1; |
1755 | | if constexpr (bQuadraticMean) |
1756 | 0 | dfTotalLine += SQUARE(double(val)); |
1757 | | else |
1758 | 0 | dfTotalLine += double(val); |
1759 | 0 | } |
1760 | 0 | } |
1761 | | |
1762 | | // Right pixel |
1763 | 0 | { |
1764 | 0 | const int iX = nSrcXOff2 - 1; |
1765 | 0 | const T val = pChunkShifted[iX]; |
1766 | 0 | if (pabyChunkNodataMask |
1767 | 0 | [iX + static_cast<size_t>(iY) * |
1768 | 0 | nChunkXSize]) |
1769 | 0 | { |
1770 | 0 | nCount++; |
1771 | 0 | const double dfWeightX = |
1772 | 0 | pasSrcX[iDstPixel].dfRightWeight; |
1773 | 0 | dfTotalWeightLine += dfWeightX; |
1774 | | if constexpr (bQuadraticMean) |
1775 | 0 | dfTotalLine += |
1776 | | SQUARE(double(val)) * dfWeightX; |
1777 | | else |
1778 | 0 | dfTotalLine += |
1779 | 0 | double(val) * dfWeightX; |
1780 | 0 | } |
1781 | 0 | } |
1782 | 0 | } |
1783 | |
|
1784 | 0 | const double dfWeightY = |
1785 | 0 | (iY == nSrcYOff) ? dfBottomWeight |
1786 | 0 | : (iY + 1 == nSrcYOff2) ? dfTopWeight |
1787 | 0 | : 1.0; |
1788 | 0 | dfTotal += dfTotalLine * dfWeightY; |
1789 | 0 | dfTotalWeight += dfTotalWeightLine * dfWeightY; |
1790 | 0 | } |
1791 | |
|
1792 | 0 | if (nCount == 0 || |
1793 | 0 | (bPropagateNoData && |
1794 | 0 | nCount < |
1795 | 0 | static_cast<size_t>(nSrcYOff2 - nSrcYOff) * |
1796 | 0 | (nSrcXOff2 - nSrcXOff))) |
1797 | 0 | { |
1798 | 0 | pDstScanline[iDstPixel] = tNoDataValue; |
1799 | 0 | continue; |
1800 | 0 | } |
1801 | 0 | } |
1802 | | if constexpr (eWrkDataType == GDT_Byte) |
1803 | 0 | { |
1804 | 0 | T nVal; |
1805 | | if constexpr (bQuadraticMean) |
1806 | 0 | nVal = ComputeIntegerRMS<T, int>(dfTotal, |
1807 | | dfTotalWeight); |
1808 | | else |
1809 | 0 | nVal = |
1810 | 0 | static_cast<T>(dfTotal / dfTotalWeight + 0.5); |
1811 | 0 | if (bHasNoData && nVal == tNoDataValue) |
1812 | 0 | nVal = tReplacementVal; |
1813 | 0 | pDstScanline[iDstPixel] = nVal; |
1814 | | } |
1815 | | else if constexpr (eWrkDataType == GDT_UInt16) |
1816 | 0 | { |
1817 | 0 | T nVal; |
1818 | | if constexpr (bQuadraticMean) |
1819 | 0 | nVal = ComputeIntegerRMS<T, uint64_t>( |
1820 | | dfTotal, dfTotalWeight); |
1821 | | else |
1822 | 0 | nVal = |
1823 | 0 | static_cast<T>(dfTotal / dfTotalWeight + 0.5); |
1824 | 0 | if (bHasNoData && nVal == tNoDataValue) |
1825 | 0 | nVal = tReplacementVal; |
1826 | 0 | pDstScanline[iDstPixel] = nVal; |
1827 | | } |
1828 | | else |
1829 | 0 | { |
1830 | 0 | T nVal; |
1831 | | if constexpr (bQuadraticMean) |
1832 | 0 | { |
1833 | | if constexpr (bUseMulFactor) |
1834 | 0 | nVal = static_cast<T>( |
1835 | 0 | dfMulFactor * |
1836 | | sqrt(dfTotal / dfTotalWeight)); |
1837 | | else |
1838 | | nVal = static_cast<T>( |
1839 | | sqrt(dfTotal / dfTotalWeight)); |
1840 | | } |
1841 | | else |
1842 | 0 | { |
1843 | | if constexpr (bUseMulFactor) |
1844 | 0 | nVal = static_cast<T>( |
1845 | | dfMulFactor * (dfTotal / dfTotalWeight)); |
1846 | | else |
1847 | | nVal = static_cast<T>(dfTotal / dfTotalWeight); |
1848 | 0 | } |
1849 | 0 | if (bHasNoData && nVal == tNoDataValue) |
1850 | 0 | nVal = tReplacementVal; |
1851 | 0 | pDstScanline[iDstPixel] = nVal; |
1852 | 0 | } |
1853 | 0 | } |
1854 | 0 | } |
1855 | 0 | } |
1856 | 0 | else |
1857 | 0 | { |
1858 | 0 | nSrcYOff -= nChunkYOff; |
1859 | 0 | nSrcYOff2 -= nChunkYOff; |
1860 | |
|
1861 | 0 | for (int iDstPixel = 0; iDstPixel < nDstXWidth; ++iDstPixel) |
1862 | 0 | { |
1863 | 0 | const int nSrcXOff = pasSrcX[iDstPixel].nLeftXOffShifted; |
1864 | 0 | const int nSrcXOff2 = pasSrcX[iDstPixel].nRightXOffShifted; |
1865 | |
|
1866 | 0 | uint64_t nTotalR = 0; |
1867 | 0 | uint64_t nTotalG = 0; |
1868 | 0 | uint64_t nTotalB = 0; |
1869 | 0 | size_t nCount = 0; |
1870 | |
|
1871 | 0 | for (int iY = nSrcYOff; iY < nSrcYOff2; ++iY) |
1872 | 0 | { |
1873 | 0 | for (int iX = nSrcXOff; iX < nSrcXOff2; ++iX) |
1874 | 0 | { |
1875 | 0 | const T val = |
1876 | 0 | pChunk[iX + static_cast<size_t>(iY) * nChunkXSize]; |
1877 | | // cppcheck-suppress unsignedLessThanZero |
1878 | 0 | if (val < 0 || val >= colorEntries.size()) |
1879 | 0 | continue; |
1880 | 0 | const size_t idx = static_cast<size_t>(val); |
1881 | 0 | const auto &entry = colorEntries[idx]; |
1882 | 0 | if (entry.c4) |
1883 | 0 | { |
1884 | | if constexpr (bQuadraticMean) |
1885 | 0 | { |
1886 | 0 | nTotalR += SQUARE<int>(entry.c1); |
1887 | 0 | nTotalG += SQUARE<int>(entry.c2); |
1888 | 0 | nTotalB += SQUARE<int>(entry.c3); |
1889 | 0 | ++nCount; |
1890 | | } |
1891 | | else |
1892 | 0 | { |
1893 | 0 | nTotalR += entry.c1; |
1894 | 0 | nTotalG += entry.c2; |
1895 | 0 | nTotalB += entry.c3; |
1896 | 0 | ++nCount; |
1897 | 0 | } |
1898 | 0 | } |
1899 | 0 | } |
1900 | 0 | } |
1901 | |
|
1902 | 0 | if (nCount == 0 || |
1903 | 0 | (bPropagateNoData && |
1904 | 0 | nCount < static_cast<size_t>(nSrcYOff2 - nSrcYOff) * |
1905 | 0 | (nSrcXOff2 - nSrcXOff))) |
1906 | 0 | { |
1907 | 0 | pDstScanline[iDstPixel] = tNoDataValue; |
1908 | 0 | } |
1909 | 0 | else |
1910 | 0 | { |
1911 | 0 | GDALColorEntry color; |
1912 | | if constexpr (bQuadraticMean) |
1913 | 0 | { |
1914 | 0 | color.c1 = |
1915 | 0 | static_cast<short>(sqrt(nTotalR / nCount) + 0.5); |
1916 | 0 | color.c2 = |
1917 | 0 | static_cast<short>(sqrt(nTotalG / nCount) + 0.5); |
1918 | 0 | color.c3 = |
1919 | 0 | static_cast<short>(sqrt(nTotalB / nCount) + 0.5); |
1920 | | } |
1921 | | else |
1922 | 0 | { |
1923 | 0 | color.c1 = |
1924 | 0 | static_cast<short>((nTotalR + nCount / 2) / nCount); |
1925 | 0 | color.c2 = |
1926 | 0 | static_cast<short>((nTotalG + nCount / 2) / nCount); |
1927 | 0 | color.c3 = |
1928 | 0 | static_cast<short>((nTotalB + nCount / 2) / nCount); |
1929 | 0 | } |
1930 | 0 | pDstScanline[iDstPixel] = |
1931 | 0 | static_cast<T>(BestColorEntry(colorEntries, color)); |
1932 | 0 | } |
1933 | 0 | } |
1934 | 0 | } |
1935 | 0 | } |
1936 | |
|
1937 | 0 | CPLFree(pasSrcX); |
1938 | |
|
1939 | 0 | return CE_None; |
1940 | 0 | } Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_AverageOrRMS_T<unsigned char, int, (GDALDataType)1, true>(GDALOverviewResampleArgs const&, unsigned char const*, void**) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_AverageOrRMS_T<unsigned short, double, (GDALDataType)2, true>(GDALOverviewResampleArgs const&, unsigned short const*, void**) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_AverageOrRMS_T<float, double, (GDALDataType)6, true>(GDALOverviewResampleArgs const&, float const*, void**) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_AverageOrRMS_T<double, double, (GDALDataType)7, true>(GDALOverviewResampleArgs const&, double const*, void**) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_AverageOrRMS_T<unsigned char, int, (GDALDataType)1, false>(GDALOverviewResampleArgs const&, unsigned char const*, void**) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_AverageOrRMS_T<unsigned short, unsigned int, (GDALDataType)2, false>(GDALOverviewResampleArgs const&, unsigned short const*, void**) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_AverageOrRMS_T<float, double, (GDALDataType)6, false>(GDALOverviewResampleArgs const&, float const*, void**) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_AverageOrRMS_T<double, double, (GDALDataType)7, false>(GDALOverviewResampleArgs const&, double const*, void**) |
1941 | | |
1942 | | template <bool bQuadraticMean> |
1943 | | static CPLErr |
1944 | | GDALResampleChunk_AverageOrRMSInternal(const GDALOverviewResampleArgs &args, |
1945 | | const void *pChunk, void **ppDstBuffer, |
1946 | | GDALDataType *peDstBufferDataType) |
1947 | 0 | { |
1948 | 0 | *peDstBufferDataType = args.eWrkDataType; |
1949 | 0 | switch (args.eWrkDataType) |
1950 | 0 | { |
1951 | 0 | case GDT_Byte: |
1952 | 0 | { |
1953 | 0 | return GDALResampleChunk_AverageOrRMS_T<GByte, int, GDT_Byte, |
1954 | 0 | bQuadraticMean>( |
1955 | 0 | args, static_cast<const GByte *>(pChunk), ppDstBuffer); |
1956 | 0 | } |
1957 | | |
1958 | 0 | case GDT_UInt16: |
1959 | 0 | { |
1960 | | if constexpr (bQuadraticMean) |
1961 | 0 | { |
1962 | | // Use double as accumulation type, because UInt32 could overflow |
1963 | 0 | return GDALResampleChunk_AverageOrRMS_T< |
1964 | 0 | GUInt16, double, GDT_UInt16, bQuadraticMean>( |
1965 | 0 | args, static_cast<const GUInt16 *>(pChunk), ppDstBuffer); |
1966 | | } |
1967 | | else |
1968 | 0 | { |
1969 | 0 | return GDALResampleChunk_AverageOrRMS_T< |
1970 | 0 | GUInt16, GUInt32, GDT_UInt16, bQuadraticMean>( |
1971 | 0 | args, static_cast<const GUInt16 *>(pChunk), ppDstBuffer); |
1972 | 0 | } |
1973 | 0 | } |
1974 | | |
1975 | 0 | case GDT_Float32: |
1976 | 0 | { |
1977 | 0 | return GDALResampleChunk_AverageOrRMS_T<float, double, GDT_Float32, |
1978 | 0 | bQuadraticMean>( |
1979 | 0 | args, static_cast<const float *>(pChunk), ppDstBuffer); |
1980 | 0 | } |
1981 | | |
1982 | 0 | case GDT_Float64: |
1983 | 0 | { |
1984 | 0 | return GDALResampleChunk_AverageOrRMS_T<double, double, GDT_Float64, |
1985 | 0 | bQuadraticMean>( |
1986 | 0 | args, static_cast<const double *>(pChunk), ppDstBuffer); |
1987 | 0 | } |
1988 | | |
1989 | 0 | default: |
1990 | 0 | break; |
1991 | 0 | } |
1992 | | |
1993 | 0 | CPLAssert(false); |
1994 | 0 | return CE_Failure; |
1995 | 0 | } Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_AverageOrRMSInternal<true>(GDALOverviewResampleArgs const&, void const*, void**, GDALDataType*) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_AverageOrRMSInternal<false>(GDALOverviewResampleArgs const&, void const*, void**, GDALDataType*) |
1996 | | |
1997 | | static CPLErr |
1998 | | GDALResampleChunk_AverageOrRMS(const GDALOverviewResampleArgs &args, |
1999 | | const void *pChunk, void **ppDstBuffer, |
2000 | | GDALDataType *peDstBufferDataType) |
2001 | 0 | { |
2002 | 0 | if (EQUAL(args.pszResampling, "RMS")) |
2003 | 0 | return GDALResampleChunk_AverageOrRMSInternal<true>( |
2004 | 0 | args, pChunk, ppDstBuffer, peDstBufferDataType); |
2005 | 0 | else |
2006 | 0 | return GDALResampleChunk_AverageOrRMSInternal<false>( |
2007 | 0 | args, pChunk, ppDstBuffer, peDstBufferDataType); |
2008 | 0 | } |
2009 | | |
2010 | | /************************************************************************/ |
2011 | | /* GDALResampleChunk_Gauss() */ |
2012 | | /************************************************************************/ |
2013 | | |
2014 | | static CPLErr GDALResampleChunk_Gauss(const GDALOverviewResampleArgs &args, |
2015 | | const void *pChunk, void **ppDstBuffer, |
2016 | | GDALDataType *peDstBufferDataType) |
2017 | | |
2018 | 0 | { |
2019 | 0 | const double dfXRatioDstToSrc = args.dfXRatioDstToSrc; |
2020 | 0 | const double dfYRatioDstToSrc = args.dfYRatioDstToSrc; |
2021 | 0 | const GByte *pabyChunkNodataMask = args.pabyChunkNodataMask; |
2022 | 0 | const int nChunkXOff = args.nChunkXOff; |
2023 | 0 | const int nChunkXSize = args.nChunkXSize; |
2024 | 0 | const int nChunkYOff = args.nChunkYOff; |
2025 | 0 | const int nChunkYSize = args.nChunkYSize; |
2026 | 0 | const int nDstXOff = args.nDstXOff; |
2027 | 0 | const int nDstXOff2 = args.nDstXOff2; |
2028 | 0 | const int nDstYOff = args.nDstYOff; |
2029 | 0 | const int nDstYOff2 = args.nDstYOff2; |
2030 | 0 | const bool bHasNoData = args.bHasNoData; |
2031 | 0 | double dfNoDataValue = args.dfNoDataValue; |
2032 | 0 | const GDALColorTable *poColorTable = args.poColorTable; |
2033 | |
|
2034 | 0 | const double *const padfChunk = static_cast<const double *>(pChunk); |
2035 | |
|
2036 | 0 | *ppDstBuffer = |
2037 | 0 | VSI_MALLOC3_VERBOSE(nDstXOff2 - nDstXOff, nDstYOff2 - nDstYOff, |
2038 | 0 | GDALGetDataTypeSizeBytes(GDT_Float64)); |
2039 | 0 | if (*ppDstBuffer == nullptr) |
2040 | 0 | { |
2041 | 0 | return CE_Failure; |
2042 | 0 | } |
2043 | 0 | *peDstBufferDataType = GDT_Float64; |
2044 | 0 | double *const padfDstBuffer = static_cast<double *>(*ppDstBuffer); |
2045 | | |
2046 | | /* -------------------------------------------------------------------- */ |
2047 | | /* Create the filter kernel and allocate scanline buffer. */ |
2048 | | /* -------------------------------------------------------------------- */ |
2049 | 0 | int nGaussMatrixDim = 3; |
2050 | 0 | const int *panGaussMatrix; |
2051 | 0 | constexpr int anGaussMatrix3x3[] = {1, 2, 1, 2, 4, 2, 1, 2, 1}; |
2052 | 0 | constexpr int anGaussMatrix5x5[] = {1, 4, 6, 4, 1, 4, 16, 24, 16, |
2053 | 0 | 4, 6, 24, 36, 24, 6, 4, 16, 24, |
2054 | 0 | 16, 4, 1, 4, 6, 4, 1}; |
2055 | 0 | constexpr int anGaussMatrix7x7[] = { |
2056 | 0 | 1, 6, 15, 20, 15, 6, 1, 6, 36, 90, 120, 90, 36, |
2057 | 0 | 6, 15, 90, 225, 300, 225, 90, 15, 20, 120, 300, 400, 300, |
2058 | 0 | 120, 20, 15, 90, 225, 300, 225, 90, 15, 6, 36, 90, 120, |
2059 | 0 | 90, 36, 6, 1, 6, 15, 20, 15, 6, 1}; |
2060 | |
|
2061 | 0 | const int nOXSize = args.nOvrXSize; |
2062 | 0 | const int nOYSize = args.nOvrYSize; |
2063 | 0 | const int nResYFactor = static_cast<int>(0.5 + dfYRatioDstToSrc); |
2064 | | |
2065 | | // matrix for gauss filter |
2066 | 0 | if (nResYFactor <= 2) |
2067 | 0 | { |
2068 | 0 | panGaussMatrix = anGaussMatrix3x3; |
2069 | 0 | nGaussMatrixDim = 3; |
2070 | 0 | } |
2071 | 0 | else if (nResYFactor <= 4) |
2072 | 0 | { |
2073 | 0 | panGaussMatrix = anGaussMatrix5x5; |
2074 | 0 | nGaussMatrixDim = 5; |
2075 | 0 | } |
2076 | 0 | else |
2077 | 0 | { |
2078 | 0 | panGaussMatrix = anGaussMatrix7x7; |
2079 | 0 | nGaussMatrixDim = 7; |
2080 | 0 | } |
2081 | |
|
2082 | | #ifdef DEBUG_OUT_OF_BOUND_ACCESS |
2083 | | int *panGaussMatrixDup = static_cast<int *>( |
2084 | | CPLMalloc(sizeof(int) * nGaussMatrixDim * nGaussMatrixDim)); |
2085 | | memcpy(panGaussMatrixDup, panGaussMatrix, |
2086 | | sizeof(int) * nGaussMatrixDim * nGaussMatrixDim); |
2087 | | panGaussMatrix = panGaussMatrixDup; |
2088 | | #endif |
2089 | |
|
2090 | 0 | if (!bHasNoData) |
2091 | 0 | dfNoDataValue = 0.0; |
2092 | |
|
2093 | 0 | std::vector<GDALColorEntry> colorEntries; |
2094 | 0 | int nTransparentIdx = -1; |
2095 | 0 | if (poColorTable) |
2096 | 0 | colorEntries = ReadColorTable(*poColorTable, nTransparentIdx); |
2097 | | |
2098 | | // Force c4 of nodata entry to 0 so that GDALFindBestEntry() identifies |
2099 | | // it as nodata value. |
2100 | 0 | if (bHasNoData && dfNoDataValue >= 0.0 && |
2101 | 0 | dfNoDataValue < colorEntries.size()) |
2102 | 0 | colorEntries[static_cast<int>(dfNoDataValue)].c4 = 0; |
2103 | | |
2104 | | // Or if we have no explicit nodata, but a color table entry that is |
2105 | | // transparent, consider it as the nodata value. |
2106 | 0 | else if (!bHasNoData && nTransparentIdx >= 0) |
2107 | 0 | { |
2108 | 0 | dfNoDataValue = nTransparentIdx; |
2109 | 0 | } |
2110 | |
|
2111 | 0 | const int nChunkRightXOff = nChunkXOff + nChunkXSize; |
2112 | 0 | const int nChunkBottomYOff = nChunkYOff + nChunkYSize; |
2113 | 0 | const int nDstXWidth = nDstXOff2 - nDstXOff; |
2114 | | |
2115 | | /* ==================================================================== */ |
2116 | | /* Loop over destination scanlines. */ |
2117 | | /* ==================================================================== */ |
2118 | 0 | for (int iDstLine = nDstYOff; iDstLine < nDstYOff2; ++iDstLine) |
2119 | 0 | { |
2120 | 0 | int nSrcYOff = static_cast<int>(0.5 + iDstLine * dfYRatioDstToSrc); |
2121 | 0 | int nSrcYOff2 = |
2122 | 0 | static_cast<int>(0.5 + (iDstLine + 1) * dfYRatioDstToSrc) + 1; |
2123 | |
|
2124 | 0 | if (nSrcYOff < nChunkYOff) |
2125 | 0 | { |
2126 | 0 | nSrcYOff = nChunkYOff; |
2127 | 0 | nSrcYOff2++; |
2128 | 0 | } |
2129 | |
|
2130 | 0 | const int iSizeY = nSrcYOff2 - nSrcYOff; |
2131 | 0 | nSrcYOff = nSrcYOff + iSizeY / 2 - nGaussMatrixDim / 2; |
2132 | 0 | nSrcYOff2 = nSrcYOff + nGaussMatrixDim; |
2133 | |
|
2134 | 0 | if (nSrcYOff2 > nChunkBottomYOff || |
2135 | 0 | (dfYRatioDstToSrc > 1 && iDstLine == nOYSize - 1)) |
2136 | 0 | { |
2137 | 0 | nSrcYOff2 = std::min(nChunkBottomYOff, nSrcYOff + nGaussMatrixDim); |
2138 | 0 | } |
2139 | |
|
2140 | 0 | int nYShiftGaussMatrix = 0; |
2141 | 0 | if (nSrcYOff < nChunkYOff) |
2142 | 0 | { |
2143 | 0 | nYShiftGaussMatrix = -(nSrcYOff - nChunkYOff); |
2144 | 0 | nSrcYOff = nChunkYOff; |
2145 | 0 | } |
2146 | |
|
2147 | 0 | const double *const padfSrcScanline = |
2148 | 0 | padfChunk + ((nSrcYOff - nChunkYOff) * nChunkXSize); |
2149 | 0 | const GByte *pabySrcScanlineNodataMask = nullptr; |
2150 | 0 | if (pabyChunkNodataMask != nullptr) |
2151 | 0 | pabySrcScanlineNodataMask = |
2152 | 0 | pabyChunkNodataMask + ((nSrcYOff - nChunkYOff) * nChunkXSize); |
2153 | | |
2154 | | /* -------------------------------------------------------------------- |
2155 | | */ |
2156 | | /* Loop over destination pixels */ |
2157 | | /* -------------------------------------------------------------------- |
2158 | | */ |
2159 | 0 | double *const padfDstScanline = |
2160 | 0 | padfDstBuffer + (iDstLine - nDstYOff) * nDstXWidth; |
2161 | 0 | for (int iDstPixel = nDstXOff; iDstPixel < nDstXOff2; ++iDstPixel) |
2162 | 0 | { |
2163 | 0 | int nSrcXOff = static_cast<int>(0.5 + iDstPixel * dfXRatioDstToSrc); |
2164 | 0 | int nSrcXOff2 = |
2165 | 0 | static_cast<int>(0.5 + (iDstPixel + 1) * dfXRatioDstToSrc) + 1; |
2166 | |
|
2167 | 0 | if (nSrcXOff < nChunkXOff) |
2168 | 0 | { |
2169 | 0 | nSrcXOff = nChunkXOff; |
2170 | 0 | nSrcXOff2++; |
2171 | 0 | } |
2172 | |
|
2173 | 0 | const int iSizeX = nSrcXOff2 - nSrcXOff; |
2174 | 0 | nSrcXOff = nSrcXOff + iSizeX / 2 - nGaussMatrixDim / 2; |
2175 | 0 | nSrcXOff2 = nSrcXOff + nGaussMatrixDim; |
2176 | |
|
2177 | 0 | if (nSrcXOff2 > nChunkRightXOff || |
2178 | 0 | (dfXRatioDstToSrc > 1 && iDstPixel == nOXSize - 1)) |
2179 | 0 | { |
2180 | 0 | nSrcXOff2 = |
2181 | 0 | std::min(nChunkRightXOff, nSrcXOff + nGaussMatrixDim); |
2182 | 0 | } |
2183 | |
|
2184 | 0 | int nXShiftGaussMatrix = 0; |
2185 | 0 | if (nSrcXOff < nChunkXOff) |
2186 | 0 | { |
2187 | 0 | nXShiftGaussMatrix = -(nSrcXOff - nChunkXOff); |
2188 | 0 | nSrcXOff = nChunkXOff; |
2189 | 0 | } |
2190 | |
|
2191 | 0 | if (poColorTable == nullptr) |
2192 | 0 | { |
2193 | 0 | double dfTotal = 0.0; |
2194 | 0 | GInt64 nCount = 0; |
2195 | 0 | const int *panLineWeight = |
2196 | 0 | panGaussMatrix + nYShiftGaussMatrix * nGaussMatrixDim + |
2197 | 0 | nXShiftGaussMatrix; |
2198 | |
|
2199 | 0 | for (int j = 0, iY = nSrcYOff; iY < nSrcYOff2; |
2200 | 0 | ++iY, ++j, panLineWeight += nGaussMatrixDim) |
2201 | 0 | { |
2202 | 0 | for (int i = 0, iX = nSrcXOff; iX < nSrcXOff2; ++iX, ++i) |
2203 | 0 | { |
2204 | 0 | const double val = |
2205 | 0 | padfSrcScanline[iX - nChunkXOff + |
2206 | 0 | static_cast<GPtrDiff_t>(iY - |
2207 | 0 | nSrcYOff) * |
2208 | 0 | nChunkXSize]; |
2209 | 0 | if (pabySrcScanlineNodataMask == nullptr || |
2210 | 0 | pabySrcScanlineNodataMask[iX - nChunkXOff + |
2211 | 0 | static_cast<GPtrDiff_t>( |
2212 | 0 | iY - nSrcYOff) * |
2213 | 0 | nChunkXSize]) |
2214 | 0 | { |
2215 | 0 | const int nWeight = panLineWeight[i]; |
2216 | 0 | dfTotal += val * nWeight; |
2217 | 0 | nCount += nWeight; |
2218 | 0 | } |
2219 | 0 | } |
2220 | 0 | } |
2221 | |
|
2222 | 0 | if (nCount == 0) |
2223 | 0 | { |
2224 | 0 | padfDstScanline[iDstPixel - nDstXOff] = dfNoDataValue; |
2225 | 0 | } |
2226 | 0 | else |
2227 | 0 | { |
2228 | 0 | padfDstScanline[iDstPixel - nDstXOff] = dfTotal / nCount; |
2229 | 0 | } |
2230 | 0 | } |
2231 | 0 | else |
2232 | 0 | { |
2233 | 0 | GInt64 nTotalR = 0; |
2234 | 0 | GInt64 nTotalG = 0; |
2235 | 0 | GInt64 nTotalB = 0; |
2236 | 0 | GInt64 nTotalWeight = 0; |
2237 | 0 | const int *panLineWeight = |
2238 | 0 | panGaussMatrix + nYShiftGaussMatrix * nGaussMatrixDim + |
2239 | 0 | nXShiftGaussMatrix; |
2240 | |
|
2241 | 0 | for (int j = 0, iY = nSrcYOff; iY < nSrcYOff2; |
2242 | 0 | ++iY, ++j, panLineWeight += nGaussMatrixDim) |
2243 | 0 | { |
2244 | 0 | for (int i = 0, iX = nSrcXOff; iX < nSrcXOff2; ++iX, ++i) |
2245 | 0 | { |
2246 | 0 | const double val = |
2247 | 0 | padfSrcScanline[iX - nChunkXOff + |
2248 | 0 | static_cast<GPtrDiff_t>(iY - |
2249 | 0 | nSrcYOff) * |
2250 | 0 | nChunkXSize]; |
2251 | 0 | if (val < 0 || val >= colorEntries.size()) |
2252 | 0 | continue; |
2253 | | |
2254 | 0 | size_t idx = static_cast<size_t>(val); |
2255 | 0 | if (colorEntries[idx].c4) |
2256 | 0 | { |
2257 | 0 | const int nWeight = panLineWeight[i]; |
2258 | 0 | nTotalR += |
2259 | 0 | static_cast<GInt64>(colorEntries[idx].c1) * |
2260 | 0 | nWeight; |
2261 | 0 | nTotalG += |
2262 | 0 | static_cast<GInt64>(colorEntries[idx].c2) * |
2263 | 0 | nWeight; |
2264 | 0 | nTotalB += |
2265 | 0 | static_cast<GInt64>(colorEntries[idx].c3) * |
2266 | 0 | nWeight; |
2267 | 0 | nTotalWeight += nWeight; |
2268 | 0 | } |
2269 | 0 | } |
2270 | 0 | } |
2271 | |
|
2272 | 0 | if (nTotalWeight == 0) |
2273 | 0 | { |
2274 | 0 | padfDstScanline[iDstPixel - nDstXOff] = dfNoDataValue; |
2275 | 0 | } |
2276 | 0 | else |
2277 | 0 | { |
2278 | 0 | GDALColorEntry color; |
2279 | |
|
2280 | 0 | color.c1 = static_cast<short>((nTotalR + nTotalWeight / 2) / |
2281 | 0 | nTotalWeight); |
2282 | 0 | color.c2 = static_cast<short>((nTotalG + nTotalWeight / 2) / |
2283 | 0 | nTotalWeight); |
2284 | 0 | color.c3 = static_cast<short>((nTotalB + nTotalWeight / 2) / |
2285 | 0 | nTotalWeight); |
2286 | 0 | padfDstScanline[iDstPixel - nDstXOff] = |
2287 | 0 | BestColorEntry(colorEntries, color); |
2288 | 0 | } |
2289 | 0 | } |
2290 | 0 | } |
2291 | 0 | } |
2292 | |
|
2293 | | #ifdef DEBUG_OUT_OF_BOUND_ACCESS |
2294 | | CPLFree(panGaussMatrixDup); |
2295 | | #endif |
2296 | |
|
2297 | 0 | return CE_None; |
2298 | 0 | } |
2299 | | |
2300 | | /************************************************************************/ |
2301 | | /* GDALResampleChunk_Mode() */ |
2302 | | /************************************************************************/ |
2303 | | |
2304 | | template <class T> static inline bool IsSame(T a, T b) |
2305 | 0 | { |
2306 | 0 | return a == b; |
2307 | 0 | } Unexecuted instantiation: overview.cpp:bool IsSame<unsigned char>(unsigned char, unsigned char) Unexecuted instantiation: overview.cpp:bool IsSame<signed char>(signed char, signed char) Unexecuted instantiation: overview.cpp:bool IsSame<unsigned short>(unsigned short, unsigned short) Unexecuted instantiation: overview.cpp:bool IsSame<unsigned int>(unsigned int, unsigned int) Unexecuted instantiation: overview.cpp:bool IsSame<unsigned long>(unsigned long, unsigned long) |
2308 | | |
2309 | | template <> bool IsSame<GFloat16>(GFloat16 a, GFloat16 b) |
2310 | 0 | { |
2311 | 0 | return a == b || (CPLIsNan(a) && CPLIsNan(b)); |
2312 | 0 | } |
2313 | | |
2314 | | template <> bool IsSame<float>(float a, float b) |
2315 | 0 | { |
2316 | 0 | return a == b || (std::isnan(a) && std::isnan(b)); |
2317 | 0 | } |
2318 | | |
2319 | | template <> bool IsSame<double>(double a, double b) |
2320 | 0 | { |
2321 | 0 | return a == b || (std::isnan(a) && std::isnan(b)); |
2322 | 0 | } |
2323 | | |
2324 | | namespace |
2325 | | { |
2326 | | struct ComplexFloat16 |
2327 | | { |
2328 | | GFloat16 r; |
2329 | | GFloat16 i; |
2330 | | }; |
2331 | | } // namespace |
2332 | | |
2333 | | template <> bool IsSame<ComplexFloat16>(ComplexFloat16 a, ComplexFloat16 b) |
2334 | 0 | { |
2335 | 0 | return (a.r == b.r && a.i == b.i) || |
2336 | 0 | (CPLIsNan(a.r) && CPLIsNan(a.i) && CPLIsNan(b.r) && CPLIsNan(b.i)); |
2337 | 0 | } |
2338 | | |
2339 | | template <> |
2340 | | bool IsSame<std::complex<float>>(std::complex<float> a, std::complex<float> b) |
2341 | 0 | { |
2342 | 0 | return a == b || (std::isnan(a.real()) && std::isnan(a.imag()) && |
2343 | 0 | std::isnan(b.real()) && std::isnan(b.imag())); |
2344 | 0 | } |
2345 | | |
2346 | | template <> |
2347 | | bool IsSame<std::complex<double>>(std::complex<double> a, |
2348 | | std::complex<double> b) |
2349 | 0 | { |
2350 | 0 | return a == b || (std::isnan(a.real()) && std::isnan(a.imag()) && |
2351 | 0 | std::isnan(b.real()) && std::isnan(b.imag())); |
2352 | 0 | } |
2353 | | |
2354 | | template <class T> |
2355 | | static CPLErr GDALResampleChunk_ModeT(const GDALOverviewResampleArgs &args, |
2356 | | const T *pChunk, T *const pDstBuffer) |
2357 | | |
2358 | 0 | { |
2359 | 0 | const double dfXRatioDstToSrc = args.dfXRatioDstToSrc; |
2360 | 0 | const double dfYRatioDstToSrc = args.dfYRatioDstToSrc; |
2361 | 0 | const double dfSrcXDelta = args.dfSrcXDelta; |
2362 | 0 | const double dfSrcYDelta = args.dfSrcYDelta; |
2363 | 0 | const GByte *pabyChunkNodataMask = args.pabyChunkNodataMask; |
2364 | 0 | const int nChunkXOff = args.nChunkXOff; |
2365 | 0 | const int nChunkXSize = args.nChunkXSize; |
2366 | 0 | const int nChunkYOff = args.nChunkYOff; |
2367 | 0 | const int nChunkYSize = args.nChunkYSize; |
2368 | 0 | const int nDstXOff = args.nDstXOff; |
2369 | 0 | const int nDstXOff2 = args.nDstXOff2; |
2370 | 0 | const int nDstYOff = args.nDstYOff; |
2371 | 0 | const int nDstYOff2 = args.nDstYOff2; |
2372 | 0 | const bool bHasNoData = args.bHasNoData; |
2373 | 0 | const GDALColorTable *poColorTable = args.poColorTable; |
2374 | 0 | const int nDstXSize = nDstXOff2 - nDstXOff; |
2375 | |
|
2376 | 0 | T tNoDataValue; |
2377 | | if constexpr (std::is_same<T, ComplexFloat16>::value) |
2378 | 0 | { |
2379 | 0 | tNoDataValue.r = cpl::NumericLimits<GFloat16>::quiet_NaN(); |
2380 | 0 | tNoDataValue.i = cpl::NumericLimits<GFloat16>::quiet_NaN(); |
2381 | | } |
2382 | | else if constexpr (std::is_same<T, std::complex<float>>::value || |
2383 | | std::is_same<T, std::complex<double>>::value) |
2384 | 0 | { |
2385 | 0 | using BaseT = typename T::value_type; |
2386 | 0 | tNoDataValue = |
2387 | 0 | std::complex<BaseT>(std::numeric_limits<BaseT>::quiet_NaN(), |
2388 | 0 | std::numeric_limits<BaseT>::quiet_NaN()); |
2389 | | } |
2390 | 0 | else if (!bHasNoData || !GDALIsValueInRange<T>(args.dfNoDataValue)) |
2391 | 0 | tNoDataValue = 0; |
2392 | 0 | else |
2393 | 0 | tNoDataValue = static_cast<T>(args.dfNoDataValue); |
2394 | |
|
2395 | 0 | using CountType = uint32_t; |
2396 | 0 | CountType nMaxNumPx = 0; |
2397 | 0 | T *paVals = nullptr; |
2398 | 0 | CountType *panCounts = nullptr; |
2399 | |
|
2400 | 0 | const int nChunkRightXOff = nChunkXOff + nChunkXSize; |
2401 | 0 | const int nChunkBottomYOff = nChunkYOff + nChunkYSize; |
2402 | 0 | std::vector<int> anVals(256, 0); |
2403 | | |
2404 | | /* ==================================================================== */ |
2405 | | /* Loop over destination scanlines. */ |
2406 | | /* ==================================================================== */ |
2407 | 0 | for (int iDstLine = nDstYOff; iDstLine < nDstYOff2; ++iDstLine) |
2408 | 0 | { |
2409 | 0 | const double dfSrcYOff = dfSrcYDelta + iDstLine * dfYRatioDstToSrc; |
2410 | 0 | int nSrcYOff = static_cast<int>(dfSrcYOff + 1e-8); |
2411 | | #ifdef only_pixels_with_more_than_10_pct_participation |
2412 | | // When oversampling, don't take into account pixels that have a tiny |
2413 | | // participation in the resulting pixel |
2414 | | if (dfYRatioDstToSrc > 1 && dfSrcYOff - nSrcYOff > 0.9 && |
2415 | | nSrcYOff < nChunkBottomYOff) |
2416 | | nSrcYOff++; |
2417 | | #endif |
2418 | 0 | if (nSrcYOff < nChunkYOff) |
2419 | 0 | nSrcYOff = nChunkYOff; |
2420 | |
|
2421 | 0 | const double dfSrcYOff2 = |
2422 | 0 | dfSrcYDelta + (iDstLine + 1) * dfYRatioDstToSrc; |
2423 | 0 | int nSrcYOff2 = static_cast<int>(ceil(dfSrcYOff2 - 1e-8)); |
2424 | | #ifdef only_pixels_with_more_than_10_pct_participation |
2425 | | // When oversampling, don't take into account pixels that have a tiny |
2426 | | // participation in the resulting pixel |
2427 | | if (dfYRatioDstToSrc > 1 && nSrcYOff2 - dfSrcYOff2 > 0.9 && |
2428 | | nSrcYOff2 > nChunkYOff) |
2429 | | nSrcYOff2--; |
2430 | | #endif |
2431 | 0 | if (nSrcYOff2 == nSrcYOff) |
2432 | 0 | ++nSrcYOff2; |
2433 | 0 | if (nSrcYOff2 > nChunkBottomYOff) |
2434 | 0 | nSrcYOff2 = nChunkBottomYOff; |
2435 | |
|
2436 | 0 | const T *const paSrcScanline = |
2437 | 0 | pChunk + |
2438 | 0 | (static_cast<GPtrDiff_t>(nSrcYOff - nChunkYOff) * nChunkXSize); |
2439 | 0 | const GByte *pabySrcScanlineNodataMask = nullptr; |
2440 | 0 | if (pabyChunkNodataMask != nullptr) |
2441 | 0 | pabySrcScanlineNodataMask = |
2442 | 0 | pabyChunkNodataMask + |
2443 | 0 | static_cast<GPtrDiff_t>(nSrcYOff - nChunkYOff) * nChunkXSize; |
2444 | |
|
2445 | 0 | T *const paDstScanline = pDstBuffer + (iDstLine - nDstYOff) * nDstXSize; |
2446 | | /* -------------------------------------------------------------------- |
2447 | | */ |
2448 | | /* Loop over destination pixels */ |
2449 | | /* -------------------------------------------------------------------- |
2450 | | */ |
2451 | 0 | for (int iDstPixel = nDstXOff; iDstPixel < nDstXOff2; ++iDstPixel) |
2452 | 0 | { |
2453 | 0 | const double dfSrcXOff = dfSrcXDelta + iDstPixel * dfXRatioDstToSrc; |
2454 | | // Apply some epsilon to avoid numerical precision issues |
2455 | 0 | int nSrcXOff = static_cast<int>(dfSrcXOff + 1e-8); |
2456 | | #ifdef only_pixels_with_more_than_10_pct_participation |
2457 | | // When oversampling, don't take into account pixels that have a |
2458 | | // tiny participation in the resulting pixel |
2459 | | if (dfXRatioDstToSrc > 1 && dfSrcXOff - nSrcXOff > 0.9 && |
2460 | | nSrcXOff < nChunkRightXOff) |
2461 | | nSrcXOff++; |
2462 | | #endif |
2463 | 0 | if (nSrcXOff < nChunkXOff) |
2464 | 0 | nSrcXOff = nChunkXOff; |
2465 | |
|
2466 | 0 | const double dfSrcXOff2 = |
2467 | 0 | dfSrcXDelta + (iDstPixel + 1) * dfXRatioDstToSrc; |
2468 | 0 | int nSrcXOff2 = static_cast<int>(ceil(dfSrcXOff2 - 1e-8)); |
2469 | | #ifdef only_pixels_with_more_than_10_pct_participation |
2470 | | // When oversampling, don't take into account pixels that have a |
2471 | | // tiny participation in the resulting pixel |
2472 | | if (dfXRatioDstToSrc > 1 && nSrcXOff2 - dfSrcXOff2 > 0.9 && |
2473 | | nSrcXOff2 > nChunkXOff) |
2474 | | nSrcXOff2--; |
2475 | | #endif |
2476 | 0 | if (nSrcXOff2 == nSrcXOff) |
2477 | 0 | nSrcXOff2++; |
2478 | 0 | if (nSrcXOff2 > nChunkRightXOff) |
2479 | 0 | nSrcXOff2 = nChunkRightXOff; |
2480 | |
|
2481 | 0 | bool bRegularProcessing = false; |
2482 | | if constexpr (!std::is_same<T, GByte>::value) |
2483 | 0 | bRegularProcessing = true; |
2484 | 0 | else if (poColorTable && poColorTable->GetColorEntryCount() > 256) |
2485 | 0 | bRegularProcessing = true; |
2486 | |
|
2487 | 0 | if (bRegularProcessing) |
2488 | 0 | { |
2489 | | // Sanity check to make sure the allocation of paVals and |
2490 | | // panCounts don't overflow. |
2491 | 0 | static_assert(sizeof(CountType) <= sizeof(size_t)); |
2492 | 0 | if (nSrcYOff2 - nSrcYOff <= 0 || nSrcXOff2 - nSrcXOff <= 0 || |
2493 | 0 | static_cast<CountType>(nSrcYOff2 - nSrcYOff) > |
2494 | 0 | (std::numeric_limits<CountType>::max() / |
2495 | 0 | std::max(sizeof(T), sizeof(CountType))) / |
2496 | 0 | static_cast<CountType>(nSrcXOff2 - nSrcXOff)) |
2497 | 0 | { |
2498 | 0 | CPLError(CE_Failure, CPLE_NotSupported, |
2499 | 0 | "Too big downsampling factor"); |
2500 | 0 | CPLFree(paVals); |
2501 | 0 | CPLFree(panCounts); |
2502 | 0 | return CE_Failure; |
2503 | 0 | } |
2504 | 0 | const CountType nNumPx = |
2505 | 0 | static_cast<CountType>(nSrcYOff2 - nSrcYOff) * |
2506 | 0 | (nSrcXOff2 - nSrcXOff); |
2507 | 0 | CountType iMaxInd = 0; |
2508 | 0 | CountType iMaxVal = 0; |
2509 | |
|
2510 | 0 | if (paVals == nullptr || nNumPx > nMaxNumPx) |
2511 | 0 | { |
2512 | 0 | T *paValsNew = static_cast<T *>( |
2513 | 0 | VSI_REALLOC_VERBOSE(paVals, nNumPx * sizeof(T))); |
2514 | 0 | CountType *panCountsNew = |
2515 | 0 | static_cast<CountType *>(VSI_REALLOC_VERBOSE( |
2516 | 0 | panCounts, nNumPx * sizeof(CountType))); |
2517 | 0 | if (paValsNew != nullptr) |
2518 | 0 | paVals = paValsNew; |
2519 | 0 | if (panCountsNew != nullptr) |
2520 | 0 | panCounts = panCountsNew; |
2521 | 0 | if (paValsNew == nullptr || panCountsNew == nullptr) |
2522 | 0 | { |
2523 | 0 | CPLFree(paVals); |
2524 | 0 | CPLFree(panCounts); |
2525 | 0 | return CE_Failure; |
2526 | 0 | } |
2527 | 0 | nMaxNumPx = nNumPx; |
2528 | 0 | } |
2529 | | |
2530 | 0 | for (int iY = nSrcYOff; iY < nSrcYOff2; ++iY) |
2531 | 0 | { |
2532 | 0 | const GPtrDiff_t iTotYOff = |
2533 | 0 | static_cast<GPtrDiff_t>(iY - nSrcYOff) * nChunkXSize - |
2534 | 0 | nChunkXOff; |
2535 | 0 | for (int iX = nSrcXOff; iX < nSrcXOff2; ++iX) |
2536 | 0 | { |
2537 | 0 | if (pabySrcScanlineNodataMask == nullptr || |
2538 | 0 | pabySrcScanlineNodataMask[iX + iTotYOff]) |
2539 | 0 | { |
2540 | 0 | const T val = paSrcScanline[iX + iTotYOff]; |
2541 | 0 | CountType i = 0; // Used after for. |
2542 | | |
2543 | | // Check array for existing entry. |
2544 | 0 | for (; i < iMaxInd; ++i) |
2545 | 0 | { |
2546 | 0 | if (IsSame(paVals[i], val)) |
2547 | 0 | { |
2548 | 0 | if (++panCounts[i] > panCounts[iMaxVal]) |
2549 | 0 | { |
2550 | 0 | iMaxVal = i; |
2551 | 0 | } |
2552 | 0 | break; |
2553 | 0 | } |
2554 | 0 | } |
2555 | | |
2556 | | // Add to arr if entry not already there. |
2557 | 0 | if (i == iMaxInd) |
2558 | 0 | { |
2559 | 0 | paVals[iMaxInd] = val; |
2560 | 0 | panCounts[iMaxInd] = 1; |
2561 | |
|
2562 | 0 | if (iMaxInd == 0) |
2563 | 0 | { |
2564 | 0 | iMaxVal = iMaxInd; |
2565 | 0 | } |
2566 | |
|
2567 | 0 | ++iMaxInd; |
2568 | 0 | } |
2569 | 0 | } |
2570 | 0 | } |
2571 | 0 | } |
2572 | |
|
2573 | 0 | if (iMaxInd == 0) |
2574 | 0 | paDstScanline[iDstPixel - nDstXOff] = tNoDataValue; |
2575 | 0 | else |
2576 | 0 | paDstScanline[iDstPixel - nDstXOff] = paVals[iMaxVal]; |
2577 | 0 | } |
2578 | | else if constexpr (std::is_same<T, GByte>::value) |
2579 | | // ( eSrcDataType == GDT_Byte && nEntryCount < 256 ) |
2580 | 0 | { |
2581 | | // So we go here for a paletted or non-paletted byte band. |
2582 | | // The input values are then between 0 and 255. |
2583 | 0 | int nMaxVal = 0; |
2584 | 0 | int iMaxInd = -1; |
2585 | | |
2586 | | // The cost of this zeroing might be high. Perhaps we should |
2587 | | // just use the above generic case, and go to this one if the |
2588 | | // number of source pixels is large enough |
2589 | 0 | std::fill(anVals.begin(), anVals.end(), 0); |
2590 | |
|
2591 | 0 | for (int iY = nSrcYOff; iY < nSrcYOff2; ++iY) |
2592 | 0 | { |
2593 | 0 | const GPtrDiff_t iTotYOff = |
2594 | 0 | static_cast<GPtrDiff_t>(iY - nSrcYOff) * nChunkXSize - |
2595 | 0 | nChunkXOff; |
2596 | 0 | for (int iX = nSrcXOff; iX < nSrcXOff2; ++iX) |
2597 | 0 | { |
2598 | 0 | const T val = paSrcScanline[iX + iTotYOff]; |
2599 | 0 | if (!bHasNoData || val != tNoDataValue) |
2600 | 0 | { |
2601 | 0 | int nVal = static_cast<int>(val); |
2602 | 0 | if (++anVals[nVal] > nMaxVal) |
2603 | 0 | { |
2604 | | // Sum the density. |
2605 | | // Is it the most common value so far? |
2606 | 0 | iMaxInd = nVal; |
2607 | 0 | nMaxVal = anVals[nVal]; |
2608 | 0 | } |
2609 | 0 | } |
2610 | 0 | } |
2611 | 0 | } |
2612 | |
|
2613 | 0 | if (iMaxInd == -1) |
2614 | 0 | paDstScanline[iDstPixel - nDstXOff] = tNoDataValue; |
2615 | 0 | else |
2616 | 0 | paDstScanline[iDstPixel - nDstXOff] = |
2617 | 0 | static_cast<T>(iMaxInd); |
2618 | 0 | } |
2619 | 0 | } |
2620 | 0 | } |
2621 | | |
2622 | 0 | CPLFree(paVals); |
2623 | 0 | CPLFree(panCounts); |
2624 | |
|
2625 | 0 | return CE_None; |
2626 | 0 | } Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_ModeT<unsigned char>(GDALOverviewResampleArgs const&, unsigned char const*, unsigned char*) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_ModeT<signed char>(GDALOverviewResampleArgs const&, signed char const*, signed char*) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_ModeT<unsigned short>(GDALOverviewResampleArgs const&, unsigned short const*, unsigned short*) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_ModeT<unsigned int>(GDALOverviewResampleArgs const&, unsigned int const*, unsigned int*) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_ModeT<unsigned long>(GDALOverviewResampleArgs const&, unsigned long const*, unsigned long*) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_ModeT<cpl::Float16>(GDALOverviewResampleArgs const&, cpl::Float16 const*, cpl::Float16*) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_ModeT<float>(GDALOverviewResampleArgs const&, float const*, float*) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_ModeT<double>(GDALOverviewResampleArgs const&, double const*, double*) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_ModeT<(anonymous namespace)::ComplexFloat16>(GDALOverviewResampleArgs const&, (anonymous namespace)::ComplexFloat16 const*, (anonymous namespace)::ComplexFloat16*) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_ModeT<std::__1::complex<float> >(GDALOverviewResampleArgs const&, std::__1::complex<float> const*, std::__1::complex<float>*) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_ModeT<std::__1::complex<double> >(GDALOverviewResampleArgs const&, std::__1::complex<double> const*, std::__1::complex<double>*) |
2627 | | |
2628 | | static CPLErr GDALResampleChunk_Mode(const GDALOverviewResampleArgs &args, |
2629 | | const void *pChunk, void **ppDstBuffer, |
2630 | | GDALDataType *peDstBufferDataType) |
2631 | 0 | { |
2632 | 0 | *ppDstBuffer = VSI_MALLOC3_VERBOSE( |
2633 | 0 | args.nDstXOff2 - args.nDstXOff, args.nDstYOff2 - args.nDstYOff, |
2634 | 0 | GDALGetDataTypeSizeBytes(args.eWrkDataType)); |
2635 | 0 | if (*ppDstBuffer == nullptr) |
2636 | 0 | { |
2637 | 0 | return CE_Failure; |
2638 | 0 | } |
2639 | | |
2640 | 0 | CPLAssert(args.eSrcDataType == args.eWrkDataType); |
2641 | | |
2642 | 0 | *peDstBufferDataType = args.eWrkDataType; |
2643 | 0 | switch (args.eWrkDataType) |
2644 | 0 | { |
2645 | | // For mode resampling, as no computation is done, only the |
2646 | | // size of the data type matters... except for Byte where we have |
2647 | | // special processing. And for floating point values |
2648 | 0 | case GDT_Byte: |
2649 | 0 | { |
2650 | 0 | return GDALResampleChunk_ModeT(args, |
2651 | 0 | static_cast<const GByte *>(pChunk), |
2652 | 0 | static_cast<GByte *>(*ppDstBuffer)); |
2653 | 0 | } |
2654 | | |
2655 | 0 | case GDT_Int8: |
2656 | 0 | { |
2657 | 0 | return GDALResampleChunk_ModeT(args, |
2658 | 0 | static_cast<const int8_t *>(pChunk), |
2659 | 0 | static_cast<int8_t *>(*ppDstBuffer)); |
2660 | 0 | } |
2661 | | |
2662 | 0 | case GDT_Int16: |
2663 | 0 | case GDT_UInt16: |
2664 | 0 | { |
2665 | 0 | CPLAssert(GDALGetDataTypeSizeBytes(args.eWrkDataType) == 2); |
2666 | 0 | return GDALResampleChunk_ModeT( |
2667 | 0 | args, static_cast<const uint16_t *>(pChunk), |
2668 | 0 | static_cast<uint16_t *>(*ppDstBuffer)); |
2669 | 0 | } |
2670 | | |
2671 | 0 | case GDT_CInt16: |
2672 | 0 | case GDT_Int32: |
2673 | 0 | case GDT_UInt32: |
2674 | 0 | { |
2675 | 0 | CPLAssert(GDALGetDataTypeSizeBytes(args.eWrkDataType) == 4); |
2676 | 0 | return GDALResampleChunk_ModeT( |
2677 | 0 | args, static_cast<const uint32_t *>(pChunk), |
2678 | 0 | static_cast<uint32_t *>(*ppDstBuffer)); |
2679 | 0 | } |
2680 | | |
2681 | 0 | case GDT_CInt32: |
2682 | 0 | case GDT_Int64: |
2683 | 0 | case GDT_UInt64: |
2684 | 0 | { |
2685 | 0 | CPLAssert(GDALGetDataTypeSizeBytes(args.eWrkDataType) == 8); |
2686 | 0 | return GDALResampleChunk_ModeT( |
2687 | 0 | args, static_cast<const uint64_t *>(pChunk), |
2688 | 0 | static_cast<uint64_t *>(*ppDstBuffer)); |
2689 | 0 | } |
2690 | | |
2691 | 0 | case GDT_Float16: |
2692 | 0 | { |
2693 | 0 | return GDALResampleChunk_ModeT( |
2694 | 0 | args, static_cast<const GFloat16 *>(pChunk), |
2695 | 0 | static_cast<GFloat16 *>(*ppDstBuffer)); |
2696 | 0 | } |
2697 | | |
2698 | 0 | case GDT_Float32: |
2699 | 0 | { |
2700 | 0 | return GDALResampleChunk_ModeT(args, |
2701 | 0 | static_cast<const float *>(pChunk), |
2702 | 0 | static_cast<float *>(*ppDstBuffer)); |
2703 | 0 | } |
2704 | | |
2705 | 0 | case GDT_Float64: |
2706 | 0 | { |
2707 | 0 | return GDALResampleChunk_ModeT(args, |
2708 | 0 | static_cast<const double *>(pChunk), |
2709 | 0 | static_cast<double *>(*ppDstBuffer)); |
2710 | 0 | } |
2711 | | |
2712 | 0 | case GDT_CFloat16: |
2713 | 0 | { |
2714 | 0 | return GDALResampleChunk_ModeT( |
2715 | 0 | args, static_cast<const ComplexFloat16 *>(pChunk), |
2716 | 0 | static_cast<ComplexFloat16 *>(*ppDstBuffer)); |
2717 | 0 | } |
2718 | | |
2719 | 0 | case GDT_CFloat32: |
2720 | 0 | { |
2721 | 0 | return GDALResampleChunk_ModeT( |
2722 | 0 | args, static_cast<const std::complex<float> *>(pChunk), |
2723 | 0 | static_cast<std::complex<float> *>(*ppDstBuffer)); |
2724 | 0 | } |
2725 | | |
2726 | 0 | case GDT_CFloat64: |
2727 | 0 | { |
2728 | 0 | return GDALResampleChunk_ModeT( |
2729 | 0 | args, static_cast<const std::complex<double> *>(pChunk), |
2730 | 0 | static_cast<std::complex<double> *>(*ppDstBuffer)); |
2731 | 0 | } |
2732 | | |
2733 | 0 | case GDT_Unknown: |
2734 | 0 | case GDT_TypeCount: |
2735 | 0 | break; |
2736 | 0 | } |
2737 | | |
2738 | 0 | CPLAssert(false); |
2739 | 0 | return CE_Failure; |
2740 | 0 | } |
2741 | | |
2742 | | /************************************************************************/ |
2743 | | /* GDALResampleConvolutionHorizontal() */ |
2744 | | /************************************************************************/ |
2745 | | |
2746 | | template <class T> |
2747 | | static inline double |
2748 | | GDALResampleConvolutionHorizontal(const T *pChunk, const double *padfWeights, |
2749 | | int nSrcPixelCount) |
2750 | 0 | { |
2751 | 0 | double dfVal1 = 0.0; |
2752 | 0 | double dfVal2 = 0.0; |
2753 | 0 | int i = 0; // Used after for. |
2754 | | // Intel Compiler 2024.0.2.29 (maybe other versions?) crashes on this |
2755 | | // manually (untypical) unrolled loop in -O2 and -O3: |
2756 | | // https://github.com/OSGeo/gdal/issues/9508 |
2757 | 0 | #if !defined(__INTEL_CLANG_COMPILER) |
2758 | 0 | for (; i < nSrcPixelCount - 3; i += 4) |
2759 | 0 | { |
2760 | 0 | dfVal1 += double(pChunk[i + 0]) * padfWeights[i]; |
2761 | 0 | dfVal1 += double(pChunk[i + 1]) * padfWeights[i + 1]; |
2762 | 0 | dfVal2 += double(pChunk[i + 2]) * padfWeights[i + 2]; |
2763 | 0 | dfVal2 += double(pChunk[i + 3]) * padfWeights[i + 3]; |
2764 | 0 | } |
2765 | 0 | #endif |
2766 | 0 | for (; i < nSrcPixelCount; ++i) |
2767 | 0 | { |
2768 | 0 | dfVal1 += double(pChunk[i]) * padfWeights[i]; |
2769 | 0 | } |
2770 | 0 | return dfVal1 + dfVal2; |
2771 | 0 | } Unexecuted instantiation: overview.cpp:double GDALResampleConvolutionHorizontal<float>(float const*, double const*, int) Unexecuted instantiation: overview.cpp:double GDALResampleConvolutionHorizontal<double>(double const*, double const*, int) |
2772 | | |
2773 | | template <class T> |
2774 | | static inline void GDALResampleConvolutionHorizontalWithMask( |
2775 | | const T *pChunk, const GByte *pabyMask, const double *padfWeights, |
2776 | | int nSrcPixelCount, double &dfVal, double &dfWeightSum) |
2777 | 0 | { |
2778 | 0 | dfVal = 0; |
2779 | 0 | dfWeightSum = 0; |
2780 | 0 | int i = 0; |
2781 | 0 | for (; i < nSrcPixelCount - 3; i += 4) |
2782 | 0 | { |
2783 | 0 | const double dfWeight0 = padfWeights[i] * pabyMask[i]; |
2784 | 0 | const double dfWeight1 = padfWeights[i + 1] * pabyMask[i + 1]; |
2785 | 0 | const double dfWeight2 = padfWeights[i + 2] * pabyMask[i + 2]; |
2786 | 0 | const double dfWeight3 = padfWeights[i + 3] * pabyMask[i + 3]; |
2787 | 0 | dfVal += double(pChunk[i + 0]) * dfWeight0; |
2788 | 0 | dfVal += double(pChunk[i + 1]) * dfWeight1; |
2789 | 0 | dfVal += double(pChunk[i + 2]) * dfWeight2; |
2790 | 0 | dfVal += double(pChunk[i + 3]) * dfWeight3; |
2791 | 0 | dfWeightSum += dfWeight0 + dfWeight1 + dfWeight2 + dfWeight3; |
2792 | 0 | } |
2793 | 0 | for (; i < nSrcPixelCount; ++i) |
2794 | 0 | { |
2795 | 0 | const double dfWeight = padfWeights[i] * pabyMask[i]; |
2796 | 0 | dfVal += double(pChunk[i]) * dfWeight; |
2797 | 0 | dfWeightSum += dfWeight; |
2798 | 0 | } |
2799 | 0 | } Unexecuted instantiation: overview.cpp:void GDALResampleConvolutionHorizontalWithMask<float>(float const*, unsigned char const*, double const*, int, double&, double&) Unexecuted instantiation: overview.cpp:void GDALResampleConvolutionHorizontalWithMask<double>(double const*, unsigned char const*, double const*, int, double&, double&) |
2800 | | |
2801 | | template <class T> |
2802 | | static inline void GDALResampleConvolutionHorizontal_3rows( |
2803 | | const T *pChunkRow1, const T *pChunkRow2, const T *pChunkRow3, |
2804 | | const double *padfWeights, int nSrcPixelCount, double &dfRes1, |
2805 | | double &dfRes2, double &dfRes3) |
2806 | 0 | { |
2807 | 0 | double dfVal1 = 0.0; |
2808 | 0 | double dfVal2 = 0.0; |
2809 | 0 | double dfVal3 = 0.0; |
2810 | 0 | double dfVal4 = 0.0; |
2811 | 0 | double dfVal5 = 0.0; |
2812 | 0 | double dfVal6 = 0.0; |
2813 | 0 | int i = 0; // Used after for. |
2814 | 0 | for (; i < nSrcPixelCount - 3; i += 4) |
2815 | 0 | { |
2816 | 0 | dfVal1 += double(pChunkRow1[i + 0]) * padfWeights[i + 0]; |
2817 | 0 | dfVal1 += double(pChunkRow1[i + 1]) * padfWeights[i + 1]; |
2818 | 0 | dfVal2 += double(pChunkRow1[i + 2]) * padfWeights[i + 2]; |
2819 | 0 | dfVal2 += double(pChunkRow1[i + 3]) * padfWeights[i + 3]; |
2820 | 0 | dfVal3 += double(pChunkRow2[i + 0]) * padfWeights[i + 0]; |
2821 | 0 | dfVal3 += double(pChunkRow2[i + 1]) * padfWeights[i + 1]; |
2822 | 0 | dfVal4 += double(pChunkRow2[i + 2]) * padfWeights[i + 2]; |
2823 | 0 | dfVal4 += double(pChunkRow2[i + 3]) * padfWeights[i + 3]; |
2824 | 0 | dfVal5 += double(pChunkRow3[i + 0]) * padfWeights[i + 0]; |
2825 | 0 | dfVal5 += double(pChunkRow3[i + 1]) * padfWeights[i + 1]; |
2826 | 0 | dfVal6 += double(pChunkRow3[i + 2]) * padfWeights[i + 2]; |
2827 | 0 | dfVal6 += double(pChunkRow3[i + 3]) * padfWeights[i + 3]; |
2828 | 0 | } |
2829 | 0 | for (; i < nSrcPixelCount; ++i) |
2830 | 0 | { |
2831 | 0 | dfVal1 += double(pChunkRow1[i]) * padfWeights[i]; |
2832 | 0 | dfVal3 += double(pChunkRow2[i]) * padfWeights[i]; |
2833 | 0 | dfVal5 += double(pChunkRow3[i]) * padfWeights[i]; |
2834 | 0 | } |
2835 | 0 | dfRes1 = dfVal1 + dfVal2; |
2836 | 0 | dfRes2 = dfVal3 + dfVal4; |
2837 | 0 | dfRes3 = dfVal5 + dfVal6; |
2838 | 0 | } Unexecuted instantiation: overview.cpp:void GDALResampleConvolutionHorizontal_3rows<float>(float const*, float const*, float const*, double const*, int, double&, double&, double&) Unexecuted instantiation: overview.cpp:void GDALResampleConvolutionHorizontal_3rows<double>(double const*, double const*, double const*, double const*, int, double&, double&, double&) |
2839 | | |
2840 | | template <class T> |
2841 | | static inline void GDALResampleConvolutionHorizontalPixelCountLess8_3rows( |
2842 | | const T *pChunkRow1, const T *pChunkRow2, const T *pChunkRow3, |
2843 | | const double *padfWeights, int nSrcPixelCount, double &dfRes1, |
2844 | | double &dfRes2, double &dfRes3) |
2845 | 0 | { |
2846 | 0 | GDALResampleConvolutionHorizontal_3rows(pChunkRow1, pChunkRow2, pChunkRow3, |
2847 | 0 | padfWeights, nSrcPixelCount, dfRes1, |
2848 | 0 | dfRes2, dfRes3); |
2849 | 0 | } Unexecuted instantiation: overview.cpp:void GDALResampleConvolutionHorizontalPixelCountLess8_3rows<float>(float const*, float const*, float const*, double const*, int, double&, double&, double&) Unexecuted instantiation: overview.cpp:void GDALResampleConvolutionHorizontalPixelCountLess8_3rows<double>(double const*, double const*, double const*, double const*, int, double&, double&, double&) |
2850 | | |
2851 | | template <class T> |
2852 | | static inline void GDALResampleConvolutionHorizontalPixelCount4_3rows( |
2853 | | const T *pChunkRow1, const T *pChunkRow2, const T *pChunkRow3, |
2854 | | const double *padfWeights, double &dfRes1, double &dfRes2, double &dfRes3) |
2855 | 0 | { |
2856 | 0 | GDALResampleConvolutionHorizontal_3rows(pChunkRow1, pChunkRow2, pChunkRow3, |
2857 | 0 | padfWeights, 4, dfRes1, dfRes2, |
2858 | 0 | dfRes3); |
2859 | 0 | } Unexecuted instantiation: overview.cpp:void GDALResampleConvolutionHorizontalPixelCount4_3rows<float>(float const*, float const*, float const*, double const*, double&, double&, double&) Unexecuted instantiation: overview.cpp:void GDALResampleConvolutionHorizontalPixelCount4_3rows<double>(double const*, double const*, double const*, double const*, double&, double&, double&) |
2860 | | |
2861 | | /************************************************************************/ |
2862 | | /* GDALResampleConvolutionVertical() */ |
2863 | | /************************************************************************/ |
2864 | | |
2865 | | template <class T> |
2866 | | static inline double |
2867 | | GDALResampleConvolutionVertical(const T *pChunk, size_t nStride, |
2868 | | const double *padfWeights, int nSrcLineCount) |
2869 | 0 | { |
2870 | 0 | double dfVal1 = 0.0; |
2871 | 0 | double dfVal2 = 0.0; |
2872 | 0 | int i = 0; |
2873 | 0 | size_t j = 0; |
2874 | 0 | for (; i < nSrcLineCount - 3; i += 4, j += 4 * nStride) |
2875 | 0 | { |
2876 | 0 | dfVal1 += pChunk[j + 0 * nStride] * padfWeights[i + 0]; |
2877 | 0 | dfVal1 += pChunk[j + 1 * nStride] * padfWeights[i + 1]; |
2878 | 0 | dfVal2 += pChunk[j + 2 * nStride] * padfWeights[i + 2]; |
2879 | 0 | dfVal2 += pChunk[j + 3 * nStride] * padfWeights[i + 3]; |
2880 | 0 | } |
2881 | 0 | for (; i < nSrcLineCount; ++i, j += nStride) |
2882 | 0 | { |
2883 | 0 | dfVal1 += pChunk[j] * padfWeights[i]; |
2884 | 0 | } |
2885 | 0 | return dfVal1 + dfVal2; |
2886 | 0 | } |
2887 | | |
2888 | | template <class T> |
2889 | | static inline void GDALResampleConvolutionVertical_2cols( |
2890 | | const T *pChunk, size_t nStride, const double *padfWeights, |
2891 | | int nSrcLineCount, double &dfRes1, double &dfRes2) |
2892 | 0 | { |
2893 | 0 | double dfVal1 = 0.0; |
2894 | 0 | double dfVal2 = 0.0; |
2895 | 0 | double dfVal3 = 0.0; |
2896 | 0 | double dfVal4 = 0.0; |
2897 | 0 | int i = 0; |
2898 | 0 | size_t j = 0; |
2899 | 0 | for (; i < nSrcLineCount - 3; i += 4, j += 4 * nStride) |
2900 | 0 | { |
2901 | 0 | dfVal1 += pChunk[j + 0 + 0 * nStride] * padfWeights[i + 0]; |
2902 | 0 | dfVal3 += pChunk[j + 1 + 0 * nStride] * padfWeights[i + 0]; |
2903 | 0 | dfVal1 += pChunk[j + 0 + 1 * nStride] * padfWeights[i + 1]; |
2904 | 0 | dfVal3 += pChunk[j + 1 + 1 * nStride] * padfWeights[i + 1]; |
2905 | 0 | dfVal2 += pChunk[j + 0 + 2 * nStride] * padfWeights[i + 2]; |
2906 | 0 | dfVal4 += pChunk[j + 1 + 2 * nStride] * padfWeights[i + 2]; |
2907 | 0 | dfVal2 += pChunk[j + 0 + 3 * nStride] * padfWeights[i + 3]; |
2908 | 0 | dfVal4 += pChunk[j + 1 + 3 * nStride] * padfWeights[i + 3]; |
2909 | 0 | } |
2910 | 0 | for (; i < nSrcLineCount; ++i, j += nStride) |
2911 | 0 | { |
2912 | 0 | dfVal1 += pChunk[j + 0] * padfWeights[i]; |
2913 | 0 | dfVal3 += pChunk[j + 1] * padfWeights[i]; |
2914 | 0 | } |
2915 | 0 | dfRes1 = dfVal1 + dfVal2; |
2916 | 0 | dfRes2 = dfVal3 + dfVal4; |
2917 | 0 | } |
2918 | | |
2919 | | #ifdef USE_SSE2 |
2920 | | |
2921 | | #ifdef __AVX__ |
2922 | | /************************************************************************/ |
2923 | | /* GDALResampleConvolutionVertical_16cols<T> */ |
2924 | | /************************************************************************/ |
2925 | | |
2926 | | template <class T> |
2927 | | static inline void |
2928 | | GDALResampleConvolutionVertical_16cols(const T *pChunk, size_t nStride, |
2929 | | const double *padfWeights, |
2930 | | int nSrcLineCount, float *afDest) |
2931 | | { |
2932 | | int i = 0; |
2933 | | size_t j = 0; |
2934 | | XMMReg4Double v_acc0 = XMMReg4Double::Zero(); |
2935 | | XMMReg4Double v_acc1 = XMMReg4Double::Zero(); |
2936 | | XMMReg4Double v_acc2 = XMMReg4Double::Zero(); |
2937 | | XMMReg4Double v_acc3 = XMMReg4Double::Zero(); |
2938 | | for (; i < nSrcLineCount - 3; i += 4, j += 4 * nStride) |
2939 | | { |
2940 | | XMMReg4Double w0 = |
2941 | | XMMReg4Double::Load1ValHighAndLow(padfWeights + i + 0); |
2942 | | XMMReg4Double w1 = |
2943 | | XMMReg4Double::Load1ValHighAndLow(padfWeights + i + 1); |
2944 | | XMMReg4Double w2 = |
2945 | | XMMReg4Double::Load1ValHighAndLow(padfWeights + i + 2); |
2946 | | XMMReg4Double w3 = |
2947 | | XMMReg4Double::Load1ValHighAndLow(padfWeights + i + 3); |
2948 | | v_acc0 += XMMReg4Double::Load4Val(pChunk + j + 0 + 0 * nStride) * w0; |
2949 | | v_acc1 += XMMReg4Double::Load4Val(pChunk + j + 4 + 0 * nStride) * w0; |
2950 | | v_acc2 += XMMReg4Double::Load4Val(pChunk + j + 8 + 0 * nStride) * w0; |
2951 | | v_acc3 += XMMReg4Double::Load4Val(pChunk + j + 12 + 0 * nStride) * w0; |
2952 | | v_acc0 += XMMReg4Double::Load4Val(pChunk + j + 0 + 1 * nStride) * w1; |
2953 | | v_acc1 += XMMReg4Double::Load4Val(pChunk + j + 4 + 1 * nStride) * w1; |
2954 | | v_acc2 += XMMReg4Double::Load4Val(pChunk + j + 8 + 1 * nStride) * w1; |
2955 | | v_acc3 += XMMReg4Double::Load4Val(pChunk + j + 12 + 1 * nStride) * w1; |
2956 | | v_acc0 += XMMReg4Double::Load4Val(pChunk + j + 0 + 2 * nStride) * w2; |
2957 | | v_acc1 += XMMReg4Double::Load4Val(pChunk + j + 4 + 2 * nStride) * w2; |
2958 | | v_acc2 += XMMReg4Double::Load4Val(pChunk + j + 8 + 2 * nStride) * w2; |
2959 | | v_acc3 += XMMReg4Double::Load4Val(pChunk + j + 12 + 2 * nStride) * w2; |
2960 | | v_acc0 += XMMReg4Double::Load4Val(pChunk + j + 0 + 3 * nStride) * w3; |
2961 | | v_acc1 += XMMReg4Double::Load4Val(pChunk + j + 4 + 3 * nStride) * w3; |
2962 | | v_acc2 += XMMReg4Double::Load4Val(pChunk + j + 8 + 3 * nStride) * w3; |
2963 | | v_acc3 += XMMReg4Double::Load4Val(pChunk + j + 12 + 3 * nStride) * w3; |
2964 | | } |
2965 | | for (; i < nSrcLineCount; ++i, j += nStride) |
2966 | | { |
2967 | | XMMReg4Double w = XMMReg4Double::Load1ValHighAndLow(padfWeights + i); |
2968 | | v_acc0 += XMMReg4Double::Load4Val(pChunk + j + 0) * w; |
2969 | | v_acc1 += XMMReg4Double::Load4Val(pChunk + j + 4) * w; |
2970 | | v_acc2 += XMMReg4Double::Load4Val(pChunk + j + 8) * w; |
2971 | | v_acc3 += XMMReg4Double::Load4Val(pChunk + j + 12) * w; |
2972 | | } |
2973 | | v_acc0.Store4Val(afDest); |
2974 | | v_acc1.Store4Val(afDest + 4); |
2975 | | v_acc2.Store4Val(afDest + 8); |
2976 | | v_acc3.Store4Val(afDest + 12); |
2977 | | } |
2978 | | |
2979 | | template <class T> |
2980 | | static inline void GDALResampleConvolutionVertical_16cols(const T *, int, |
2981 | | const double *, int, |
2982 | | double *) |
2983 | | { |
2984 | | // Cannot be reached |
2985 | | CPLAssert(false); |
2986 | | } |
2987 | | |
2988 | | #else |
2989 | | |
2990 | | /************************************************************************/ |
2991 | | /* GDALResampleConvolutionVertical_8cols<T> */ |
2992 | | /************************************************************************/ |
2993 | | |
2994 | | template <class T> |
2995 | | static inline void |
2996 | | GDALResampleConvolutionVertical_8cols(const T *pChunk, size_t nStride, |
2997 | | const double *padfWeights, |
2998 | | int nSrcLineCount, float *afDest) |
2999 | 0 | { |
3000 | 0 | int i = 0; |
3001 | 0 | size_t j = 0; |
3002 | 0 | XMMReg4Double v_acc0 = XMMReg4Double::Zero(); |
3003 | 0 | XMMReg4Double v_acc1 = XMMReg4Double::Zero(); |
3004 | 0 | for (; i < nSrcLineCount - 3; i += 4, j += 4 * nStride) |
3005 | 0 | { |
3006 | 0 | XMMReg4Double w0 = |
3007 | 0 | XMMReg4Double::Load1ValHighAndLow(padfWeights + i + 0); |
3008 | 0 | XMMReg4Double w1 = |
3009 | 0 | XMMReg4Double::Load1ValHighAndLow(padfWeights + i + 1); |
3010 | 0 | XMMReg4Double w2 = |
3011 | 0 | XMMReg4Double::Load1ValHighAndLow(padfWeights + i + 2); |
3012 | 0 | XMMReg4Double w3 = |
3013 | 0 | XMMReg4Double::Load1ValHighAndLow(padfWeights + i + 3); |
3014 | 0 | v_acc0 += XMMReg4Double::Load4Val(pChunk + j + 0 + 0 * nStride) * w0; |
3015 | 0 | v_acc1 += XMMReg4Double::Load4Val(pChunk + j + 4 + 0 * nStride) * w0; |
3016 | 0 | v_acc0 += XMMReg4Double::Load4Val(pChunk + j + 0 + 1 * nStride) * w1; |
3017 | 0 | v_acc1 += XMMReg4Double::Load4Val(pChunk + j + 4 + 1 * nStride) * w1; |
3018 | 0 | v_acc0 += XMMReg4Double::Load4Val(pChunk + j + 0 + 2 * nStride) * w2; |
3019 | 0 | v_acc1 += XMMReg4Double::Load4Val(pChunk + j + 4 + 2 * nStride) * w2; |
3020 | 0 | v_acc0 += XMMReg4Double::Load4Val(pChunk + j + 0 + 3 * nStride) * w3; |
3021 | 0 | v_acc1 += XMMReg4Double::Load4Val(pChunk + j + 4 + 3 * nStride) * w3; |
3022 | 0 | } |
3023 | 0 | for (; i < nSrcLineCount; ++i, j += nStride) |
3024 | 0 | { |
3025 | 0 | XMMReg4Double w = XMMReg4Double::Load1ValHighAndLow(padfWeights + i); |
3026 | 0 | v_acc0 += XMMReg4Double::Load4Val(pChunk + j + 0) * w; |
3027 | 0 | v_acc1 += XMMReg4Double::Load4Val(pChunk + j + 4) * w; |
3028 | 0 | } |
3029 | 0 | v_acc0.Store4Val(afDest); |
3030 | 0 | v_acc1.Store4Val(afDest + 4); |
3031 | 0 | } |
3032 | | |
3033 | | template <class T> |
3034 | | static inline void GDALResampleConvolutionVertical_8cols(const T *, int, |
3035 | | const double *, int, |
3036 | | double *) |
3037 | | { |
3038 | | // Cannot be reached |
3039 | | CPLAssert(false); |
3040 | | } |
3041 | | |
3042 | | #endif // __AVX__ |
3043 | | |
3044 | | /************************************************************************/ |
3045 | | /* GDALResampleConvolutionHorizontalSSE2<T> */ |
3046 | | /************************************************************************/ |
3047 | | |
3048 | | template <class T> |
3049 | | static inline double GDALResampleConvolutionHorizontalSSE2( |
3050 | | const T *pChunk, const double *padfWeightsAligned, int nSrcPixelCount) |
3051 | 0 | { |
3052 | 0 | XMMReg4Double v_acc1 = XMMReg4Double::Zero(); |
3053 | 0 | XMMReg4Double v_acc2 = XMMReg4Double::Zero(); |
3054 | 0 | int i = 0; // Used after for. |
3055 | 0 | for (; i < nSrcPixelCount - 7; i += 8) |
3056 | 0 | { |
3057 | | // Retrieve the pixel & accumulate |
3058 | 0 | const XMMReg4Double v_pixels1 = XMMReg4Double::Load4Val(pChunk + i); |
3059 | 0 | const XMMReg4Double v_pixels2 = XMMReg4Double::Load4Val(pChunk + i + 4); |
3060 | 0 | const XMMReg4Double v_weight1 = |
3061 | 0 | XMMReg4Double::Load4ValAligned(padfWeightsAligned + i); |
3062 | 0 | const XMMReg4Double v_weight2 = |
3063 | 0 | XMMReg4Double::Load4ValAligned(padfWeightsAligned + i + 4); |
3064 | |
|
3065 | 0 | v_acc1 += v_pixels1 * v_weight1; |
3066 | 0 | v_acc2 += v_pixels2 * v_weight2; |
3067 | 0 | } |
3068 | |
|
3069 | 0 | v_acc1 += v_acc2; |
3070 | |
|
3071 | 0 | double dfVal = v_acc1.GetHorizSum(); |
3072 | 0 | for (; i < nSrcPixelCount; ++i) |
3073 | 0 | { |
3074 | 0 | dfVal += pChunk[i] * padfWeightsAligned[i]; |
3075 | 0 | } |
3076 | 0 | return dfVal; |
3077 | 0 | } Unexecuted instantiation: overview.cpp:double GDALResampleConvolutionHorizontalSSE2<unsigned char>(unsigned char const*, double const*, int) Unexecuted instantiation: overview.cpp:double GDALResampleConvolutionHorizontalSSE2<unsigned short>(unsigned short const*, double const*, int) |
3078 | | |
3079 | | /************************************************************************/ |
3080 | | /* GDALResampleConvolutionHorizontal<GByte> */ |
3081 | | /************************************************************************/ |
3082 | | |
3083 | | template <> |
3084 | | inline double GDALResampleConvolutionHorizontal<GByte>( |
3085 | | const GByte *pChunk, const double *padfWeightsAligned, int nSrcPixelCount) |
3086 | 0 | { |
3087 | 0 | return GDALResampleConvolutionHorizontalSSE2(pChunk, padfWeightsAligned, |
3088 | 0 | nSrcPixelCount); |
3089 | 0 | } |
3090 | | |
3091 | | template <> |
3092 | | inline double GDALResampleConvolutionHorizontal<GUInt16>( |
3093 | | const GUInt16 *pChunk, const double *padfWeightsAligned, int nSrcPixelCount) |
3094 | 0 | { |
3095 | 0 | return GDALResampleConvolutionHorizontalSSE2(pChunk, padfWeightsAligned, |
3096 | 0 | nSrcPixelCount); |
3097 | 0 | } |
3098 | | |
3099 | | /************************************************************************/ |
3100 | | /* GDALResampleConvolutionHorizontalWithMaskSSE2<T> */ |
3101 | | /************************************************************************/ |
3102 | | |
3103 | | template <class T> |
3104 | | static inline void GDALResampleConvolutionHorizontalWithMaskSSE2( |
3105 | | const T *pChunk, const GByte *pabyMask, const double *padfWeightsAligned, |
3106 | | int nSrcPixelCount, double &dfVal, double &dfWeightSum) |
3107 | 0 | { |
3108 | 0 | int i = 0; // Used after for. |
3109 | 0 | XMMReg4Double v_acc = XMMReg4Double::Zero(); |
3110 | 0 | XMMReg4Double v_acc_weight = XMMReg4Double::Zero(); |
3111 | 0 | for (; i < nSrcPixelCount - 3; i += 4) |
3112 | 0 | { |
3113 | 0 | const XMMReg4Double v_pixels = XMMReg4Double::Load4Val(pChunk + i); |
3114 | 0 | const XMMReg4Double v_mask = XMMReg4Double::Load4Val(pabyMask + i); |
3115 | 0 | XMMReg4Double v_weight = |
3116 | 0 | XMMReg4Double::Load4ValAligned(padfWeightsAligned + i); |
3117 | 0 | v_weight *= v_mask; |
3118 | 0 | v_acc += v_pixels * v_weight; |
3119 | 0 | v_acc_weight += v_weight; |
3120 | 0 | } |
3121 | |
|
3122 | 0 | dfVal = v_acc.GetHorizSum(); |
3123 | 0 | dfWeightSum = v_acc_weight.GetHorizSum(); |
3124 | 0 | for (; i < nSrcPixelCount; ++i) |
3125 | 0 | { |
3126 | 0 | const double dfWeight = padfWeightsAligned[i] * pabyMask[i]; |
3127 | 0 | dfVal += pChunk[i] * dfWeight; |
3128 | 0 | dfWeightSum += dfWeight; |
3129 | 0 | } |
3130 | 0 | } Unexecuted instantiation: overview.cpp:void GDALResampleConvolutionHorizontalWithMaskSSE2<unsigned char>(unsigned char const*, unsigned char const*, double const*, int, double&, double&) Unexecuted instantiation: overview.cpp:void GDALResampleConvolutionHorizontalWithMaskSSE2<unsigned short>(unsigned short const*, unsigned char const*, double const*, int, double&, double&) |
3131 | | |
3132 | | /************************************************************************/ |
3133 | | /* GDALResampleConvolutionHorizontalWithMask<GByte> */ |
3134 | | /************************************************************************/ |
3135 | | |
3136 | | template <> |
3137 | | inline void GDALResampleConvolutionHorizontalWithMask<GByte>( |
3138 | | const GByte *pChunk, const GByte *pabyMask, |
3139 | | const double *padfWeightsAligned, int nSrcPixelCount, double &dfVal, |
3140 | | double &dfWeightSum) |
3141 | 0 | { |
3142 | 0 | GDALResampleConvolutionHorizontalWithMaskSSE2( |
3143 | 0 | pChunk, pabyMask, padfWeightsAligned, nSrcPixelCount, dfVal, |
3144 | 0 | dfWeightSum); |
3145 | 0 | } |
3146 | | |
3147 | | template <> |
3148 | | inline void GDALResampleConvolutionHorizontalWithMask<GUInt16>( |
3149 | | const GUInt16 *pChunk, const GByte *pabyMask, |
3150 | | const double *padfWeightsAligned, int nSrcPixelCount, double &dfVal, |
3151 | | double &dfWeightSum) |
3152 | 0 | { |
3153 | 0 | GDALResampleConvolutionHorizontalWithMaskSSE2( |
3154 | 0 | pChunk, pabyMask, padfWeightsAligned, nSrcPixelCount, dfVal, |
3155 | 0 | dfWeightSum); |
3156 | 0 | } |
3157 | | |
3158 | | /************************************************************************/ |
3159 | | /* GDALResampleConvolutionHorizontal_3rows_SSE2<T> */ |
3160 | | /************************************************************************/ |
3161 | | |
3162 | | template <class T> |
3163 | | static inline void GDALResampleConvolutionHorizontal_3rows_SSE2( |
3164 | | const T *pChunkRow1, const T *pChunkRow2, const T *pChunkRow3, |
3165 | | const double *padfWeightsAligned, int nSrcPixelCount, double &dfRes1, |
3166 | | double &dfRes2, double &dfRes3) |
3167 | 0 | { |
3168 | 0 | XMMReg4Double v_acc1 = XMMReg4Double::Zero(), |
3169 | 0 | v_acc2 = XMMReg4Double::Zero(), |
3170 | 0 | v_acc3 = XMMReg4Double::Zero(); |
3171 | 0 | int i = 0; |
3172 | 0 | for (; i < nSrcPixelCount - 7; i += 8) |
3173 | 0 | { |
3174 | | // Retrieve the pixel & accumulate. |
3175 | 0 | XMMReg4Double v_pixels1 = XMMReg4Double::Load4Val(pChunkRow1 + i); |
3176 | 0 | XMMReg4Double v_pixels2 = XMMReg4Double::Load4Val(pChunkRow1 + i + 4); |
3177 | 0 | const XMMReg4Double v_weight1 = |
3178 | 0 | XMMReg4Double::Load4ValAligned(padfWeightsAligned + i); |
3179 | 0 | const XMMReg4Double v_weight2 = |
3180 | 0 | XMMReg4Double::Load4ValAligned(padfWeightsAligned + i + 4); |
3181 | |
|
3182 | 0 | v_acc1 += v_pixels1 * v_weight1; |
3183 | 0 | v_acc1 += v_pixels2 * v_weight2; |
3184 | |
|
3185 | 0 | v_pixels1 = XMMReg4Double::Load4Val(pChunkRow2 + i); |
3186 | 0 | v_pixels2 = XMMReg4Double::Load4Val(pChunkRow2 + i + 4); |
3187 | 0 | v_acc2 += v_pixels1 * v_weight1; |
3188 | 0 | v_acc2 += v_pixels2 * v_weight2; |
3189 | |
|
3190 | 0 | v_pixels1 = XMMReg4Double::Load4Val(pChunkRow3 + i); |
3191 | 0 | v_pixels2 = XMMReg4Double::Load4Val(pChunkRow3 + i + 4); |
3192 | 0 | v_acc3 += v_pixels1 * v_weight1; |
3193 | 0 | v_acc3 += v_pixels2 * v_weight2; |
3194 | 0 | } |
3195 | |
|
3196 | 0 | dfRes1 = v_acc1.GetHorizSum(); |
3197 | 0 | dfRes2 = v_acc2.GetHorizSum(); |
3198 | 0 | dfRes3 = v_acc3.GetHorizSum(); |
3199 | 0 | for (; i < nSrcPixelCount; ++i) |
3200 | 0 | { |
3201 | 0 | dfRes1 += pChunkRow1[i] * padfWeightsAligned[i]; |
3202 | 0 | dfRes2 += pChunkRow2[i] * padfWeightsAligned[i]; |
3203 | 0 | dfRes3 += pChunkRow3[i] * padfWeightsAligned[i]; |
3204 | 0 | } |
3205 | 0 | } Unexecuted instantiation: overview.cpp:void GDALResampleConvolutionHorizontal_3rows_SSE2<unsigned char>(unsigned char const*, unsigned char const*, unsigned char const*, double const*, int, double&, double&, double&) Unexecuted instantiation: overview.cpp:void GDALResampleConvolutionHorizontal_3rows_SSE2<unsigned short>(unsigned short const*, unsigned short const*, unsigned short const*, double const*, int, double&, double&, double&) |
3206 | | |
3207 | | /************************************************************************/ |
3208 | | /* GDALResampleConvolutionHorizontal_3rows<GByte> */ |
3209 | | /************************************************************************/ |
3210 | | |
3211 | | template <> |
3212 | | inline void GDALResampleConvolutionHorizontal_3rows<GByte>( |
3213 | | const GByte *pChunkRow1, const GByte *pChunkRow2, const GByte *pChunkRow3, |
3214 | | const double *padfWeightsAligned, int nSrcPixelCount, double &dfRes1, |
3215 | | double &dfRes2, double &dfRes3) |
3216 | 0 | { |
3217 | 0 | GDALResampleConvolutionHorizontal_3rows_SSE2( |
3218 | 0 | pChunkRow1, pChunkRow2, pChunkRow3, padfWeightsAligned, nSrcPixelCount, |
3219 | 0 | dfRes1, dfRes2, dfRes3); |
3220 | 0 | } |
3221 | | |
3222 | | template <> |
3223 | | inline void GDALResampleConvolutionHorizontal_3rows<GUInt16>( |
3224 | | const GUInt16 *pChunkRow1, const GUInt16 *pChunkRow2, |
3225 | | const GUInt16 *pChunkRow3, const double *padfWeightsAligned, |
3226 | | int nSrcPixelCount, double &dfRes1, double &dfRes2, double &dfRes3) |
3227 | 0 | { |
3228 | 0 | GDALResampleConvolutionHorizontal_3rows_SSE2( |
3229 | 0 | pChunkRow1, pChunkRow2, pChunkRow3, padfWeightsAligned, nSrcPixelCount, |
3230 | 0 | dfRes1, dfRes2, dfRes3); |
3231 | 0 | } |
3232 | | |
3233 | | /************************************************************************/ |
3234 | | /* GDALResampleConvolutionHorizontalPixelCountLess8_3rows_SSE2<T> */ |
3235 | | /************************************************************************/ |
3236 | | |
3237 | | template <class T> |
3238 | | static inline void GDALResampleConvolutionHorizontalPixelCountLess8_3rows_SSE2( |
3239 | | const T *pChunkRow1, const T *pChunkRow2, const T *pChunkRow3, |
3240 | | const double *padfWeightsAligned, int nSrcPixelCount, double &dfRes1, |
3241 | | double &dfRes2, double &dfRes3) |
3242 | 0 | { |
3243 | 0 | XMMReg4Double v_acc1 = XMMReg4Double::Zero(); |
3244 | 0 | XMMReg4Double v_acc2 = XMMReg4Double::Zero(); |
3245 | 0 | XMMReg4Double v_acc3 = XMMReg4Double::Zero(); |
3246 | 0 | int i = 0; // Use after for. |
3247 | 0 | for (; i < nSrcPixelCount - 3; i += 4) |
3248 | 0 | { |
3249 | | // Retrieve the pixel & accumulate. |
3250 | 0 | const XMMReg4Double v_pixels1 = XMMReg4Double::Load4Val(pChunkRow1 + i); |
3251 | 0 | const XMMReg4Double v_pixels2 = XMMReg4Double::Load4Val(pChunkRow2 + i); |
3252 | 0 | const XMMReg4Double v_pixels3 = XMMReg4Double::Load4Val(pChunkRow3 + i); |
3253 | 0 | const XMMReg4Double v_weight = |
3254 | 0 | XMMReg4Double::Load4ValAligned(padfWeightsAligned + i); |
3255 | |
|
3256 | 0 | v_acc1 += v_pixels1 * v_weight; |
3257 | 0 | v_acc2 += v_pixels2 * v_weight; |
3258 | 0 | v_acc3 += v_pixels3 * v_weight; |
3259 | 0 | } |
3260 | |
|
3261 | 0 | dfRes1 = v_acc1.GetHorizSum(); |
3262 | 0 | dfRes2 = v_acc2.GetHorizSum(); |
3263 | 0 | dfRes3 = v_acc3.GetHorizSum(); |
3264 | |
|
3265 | 0 | for (; i < nSrcPixelCount; ++i) |
3266 | 0 | { |
3267 | 0 | dfRes1 += pChunkRow1[i] * padfWeightsAligned[i]; |
3268 | 0 | dfRes2 += pChunkRow2[i] * padfWeightsAligned[i]; |
3269 | 0 | dfRes3 += pChunkRow3[i] * padfWeightsAligned[i]; |
3270 | 0 | } |
3271 | 0 | } Unexecuted instantiation: overview.cpp:void GDALResampleConvolutionHorizontalPixelCountLess8_3rows_SSE2<unsigned char>(unsigned char const*, unsigned char const*, unsigned char const*, double const*, int, double&, double&, double&) Unexecuted instantiation: overview.cpp:void GDALResampleConvolutionHorizontalPixelCountLess8_3rows_SSE2<unsigned short>(unsigned short const*, unsigned short const*, unsigned short const*, double const*, int, double&, double&, double&) |
3272 | | |
3273 | | /************************************************************************/ |
3274 | | /* GDALResampleConvolutionHorizontalPixelCountLess8_3rows<GByte> */ |
3275 | | /************************************************************************/ |
3276 | | |
3277 | | template <> |
3278 | | inline void GDALResampleConvolutionHorizontalPixelCountLess8_3rows<GByte>( |
3279 | | const GByte *pChunkRow1, const GByte *pChunkRow2, const GByte *pChunkRow3, |
3280 | | const double *padfWeightsAligned, int nSrcPixelCount, double &dfRes1, |
3281 | | double &dfRes2, double &dfRes3) |
3282 | 0 | { |
3283 | 0 | GDALResampleConvolutionHorizontalPixelCountLess8_3rows_SSE2( |
3284 | 0 | pChunkRow1, pChunkRow2, pChunkRow3, padfWeightsAligned, nSrcPixelCount, |
3285 | 0 | dfRes1, dfRes2, dfRes3); |
3286 | 0 | } |
3287 | | |
3288 | | template <> |
3289 | | inline void GDALResampleConvolutionHorizontalPixelCountLess8_3rows<GUInt16>( |
3290 | | const GUInt16 *pChunkRow1, const GUInt16 *pChunkRow2, |
3291 | | const GUInt16 *pChunkRow3, const double *padfWeightsAligned, |
3292 | | int nSrcPixelCount, double &dfRes1, double &dfRes2, double &dfRes3) |
3293 | 0 | { |
3294 | 0 | GDALResampleConvolutionHorizontalPixelCountLess8_3rows_SSE2( |
3295 | 0 | pChunkRow1, pChunkRow2, pChunkRow3, padfWeightsAligned, nSrcPixelCount, |
3296 | 0 | dfRes1, dfRes2, dfRes3); |
3297 | 0 | } |
3298 | | |
3299 | | /************************************************************************/ |
3300 | | /* GDALResampleConvolutionHorizontalPixelCount4_3rows_SSE2<T> */ |
3301 | | /************************************************************************/ |
3302 | | |
3303 | | template <class T> |
3304 | | static inline void GDALResampleConvolutionHorizontalPixelCount4_3rows_SSE2( |
3305 | | const T *pChunkRow1, const T *pChunkRow2, const T *pChunkRow3, |
3306 | | const double *padfWeightsAligned, double &dfRes1, double &dfRes2, |
3307 | | double &dfRes3) |
3308 | 0 | { |
3309 | 0 | const XMMReg4Double v_weight = |
3310 | 0 | XMMReg4Double::Load4ValAligned(padfWeightsAligned); |
3311 | | |
3312 | | // Retrieve the pixel & accumulate. |
3313 | 0 | const XMMReg4Double v_pixels1 = XMMReg4Double::Load4Val(pChunkRow1); |
3314 | 0 | const XMMReg4Double v_pixels2 = XMMReg4Double::Load4Val(pChunkRow2); |
3315 | 0 | const XMMReg4Double v_pixels3 = XMMReg4Double::Load4Val(pChunkRow3); |
3316 | |
|
3317 | 0 | XMMReg4Double v_acc1 = v_pixels1 * v_weight; |
3318 | 0 | XMMReg4Double v_acc2 = v_pixels2 * v_weight; |
3319 | 0 | XMMReg4Double v_acc3 = v_pixels3 * v_weight; |
3320 | |
|
3321 | 0 | dfRes1 = v_acc1.GetHorizSum(); |
3322 | 0 | dfRes2 = v_acc2.GetHorizSum(); |
3323 | 0 | dfRes3 = v_acc3.GetHorizSum(); |
3324 | 0 | } Unexecuted instantiation: overview.cpp:void GDALResampleConvolutionHorizontalPixelCount4_3rows_SSE2<unsigned char>(unsigned char const*, unsigned char const*, unsigned char const*, double const*, double&, double&, double&) Unexecuted instantiation: overview.cpp:void GDALResampleConvolutionHorizontalPixelCount4_3rows_SSE2<unsigned short>(unsigned short const*, unsigned short const*, unsigned short const*, double const*, double&, double&, double&) |
3325 | | |
3326 | | /************************************************************************/ |
3327 | | /* GDALResampleConvolutionHorizontalPixelCount4_3rows<GByte> */ |
3328 | | /************************************************************************/ |
3329 | | |
3330 | | template <> |
3331 | | inline void GDALResampleConvolutionHorizontalPixelCount4_3rows<GByte>( |
3332 | | const GByte *pChunkRow1, const GByte *pChunkRow2, const GByte *pChunkRow3, |
3333 | | const double *padfWeightsAligned, double &dfRes1, double &dfRes2, |
3334 | | double &dfRes3) |
3335 | 0 | { |
3336 | 0 | GDALResampleConvolutionHorizontalPixelCount4_3rows_SSE2( |
3337 | 0 | pChunkRow1, pChunkRow2, pChunkRow3, padfWeightsAligned, dfRes1, dfRes2, |
3338 | 0 | dfRes3); |
3339 | 0 | } |
3340 | | |
3341 | | template <> |
3342 | | inline void GDALResampleConvolutionHorizontalPixelCount4_3rows<GUInt16>( |
3343 | | const GUInt16 *pChunkRow1, const GUInt16 *pChunkRow2, |
3344 | | const GUInt16 *pChunkRow3, const double *padfWeightsAligned, double &dfRes1, |
3345 | | double &dfRes2, double &dfRes3) |
3346 | 0 | { |
3347 | 0 | GDALResampleConvolutionHorizontalPixelCount4_3rows_SSE2( |
3348 | 0 | pChunkRow1, pChunkRow2, pChunkRow3, padfWeightsAligned, dfRes1, dfRes2, |
3349 | 0 | dfRes3); |
3350 | 0 | } |
3351 | | |
3352 | | #endif // USE_SSE2 |
3353 | | |
3354 | | /************************************************************************/ |
3355 | | /* GDALResampleChunk_Convolution() */ |
3356 | | /************************************************************************/ |
3357 | | |
3358 | | template <class T, class Twork, GDALDataType eWrkDataType, |
3359 | | bool bKernelWithNegativeWeights, bool bNeedRescale> |
3360 | | static CPLErr GDALResampleChunk_ConvolutionT( |
3361 | | const GDALOverviewResampleArgs &args, const T *pChunk, void *pDstBuffer, |
3362 | | FilterFuncType pfnFilterFunc, FilterFunc4ValuesType pfnFilterFunc4Values, |
3363 | | int nKernelRadius, float fMaxVal) |
3364 | | |
3365 | 0 | { |
3366 | 0 | const double dfXRatioDstToSrc = args.dfXRatioDstToSrc; |
3367 | 0 | const double dfYRatioDstToSrc = args.dfYRatioDstToSrc; |
3368 | 0 | const double dfSrcXDelta = args.dfSrcXDelta; |
3369 | 0 | const double dfSrcYDelta = args.dfSrcYDelta; |
3370 | 0 | constexpr int nBands = 1; |
3371 | 0 | const GByte *pabyChunkNodataMask = args.pabyChunkNodataMask; |
3372 | 0 | const int nChunkXOff = args.nChunkXOff; |
3373 | 0 | const int nChunkXSize = args.nChunkXSize; |
3374 | 0 | const int nChunkYOff = args.nChunkYOff; |
3375 | 0 | const int nChunkYSize = args.nChunkYSize; |
3376 | 0 | const int nDstXOff = args.nDstXOff; |
3377 | 0 | const int nDstXOff2 = args.nDstXOff2; |
3378 | 0 | const int nDstYOff = args.nDstYOff; |
3379 | 0 | const int nDstYOff2 = args.nDstYOff2; |
3380 | 0 | const bool bHasNoData = args.bHasNoData; |
3381 | 0 | double dfNoDataValue = args.dfNoDataValue; |
3382 | |
|
3383 | 0 | if (!bHasNoData) |
3384 | 0 | dfNoDataValue = 0.0; |
3385 | 0 | const auto dstDataType = args.eOvrDataType; |
3386 | 0 | const int nDstDataTypeSize = GDALGetDataTypeSizeBytes(dstDataType); |
3387 | 0 | const double dfReplacementVal = |
3388 | 0 | bHasNoData ? GDALGetNoDataReplacementValue(dstDataType, dfNoDataValue) |
3389 | 0 | : dfNoDataValue; |
3390 | | // cppcheck-suppress unreadVariable |
3391 | 0 | const int isIntegerDT = GDALDataTypeIsInteger(dstDataType); |
3392 | 0 | const bool bNoDataValueInt64Valid = |
3393 | 0 | isIntegerDT && GDALIsValueExactAs<GInt64>(dfNoDataValue); |
3394 | 0 | const auto nNodataValueInt64 = |
3395 | 0 | bNoDataValueInt64Valid ? static_cast<GInt64>(dfNoDataValue) : 0; |
3396 | 0 | constexpr int nWrkDataTypeSize = static_cast<int>(sizeof(Twork)); |
3397 | | |
3398 | | // TODO: we should have some generic function to do this. |
3399 | 0 | Twork fDstMin = cpl::NumericLimits<Twork>::lowest(); |
3400 | 0 | Twork fDstMax = cpl::NumericLimits<Twork>::max(); |
3401 | 0 | if (dstDataType == GDT_Byte) |
3402 | 0 | { |
3403 | 0 | fDstMin = std::numeric_limits<GByte>::min(); |
3404 | 0 | fDstMax = std::numeric_limits<GByte>::max(); |
3405 | 0 | } |
3406 | 0 | else if (dstDataType == GDT_Int8) |
3407 | 0 | { |
3408 | 0 | fDstMin = std::numeric_limits<GInt8>::min(); |
3409 | 0 | fDstMax = std::numeric_limits<GInt8>::max(); |
3410 | 0 | } |
3411 | 0 | else if (dstDataType == GDT_UInt16) |
3412 | 0 | { |
3413 | 0 | fDstMin = std::numeric_limits<GUInt16>::min(); |
3414 | 0 | fDstMax = std::numeric_limits<GUInt16>::max(); |
3415 | 0 | } |
3416 | 0 | else if (dstDataType == GDT_Int16) |
3417 | 0 | { |
3418 | 0 | fDstMin = std::numeric_limits<GInt16>::min(); |
3419 | 0 | fDstMax = std::numeric_limits<GInt16>::max(); |
3420 | 0 | } |
3421 | 0 | else if (dstDataType == GDT_UInt32) |
3422 | 0 | { |
3423 | 0 | fDstMin = static_cast<Twork>(std::numeric_limits<GUInt32>::min()); |
3424 | 0 | fDstMax = static_cast<Twork>(std::numeric_limits<GUInt32>::max()); |
3425 | 0 | } |
3426 | 0 | else if (dstDataType == GDT_Int32) |
3427 | 0 | { |
3428 | | // cppcheck-suppress unreadVariable |
3429 | 0 | fDstMin = static_cast<Twork>(std::numeric_limits<GInt32>::min()); |
3430 | | // cppcheck-suppress unreadVariable |
3431 | 0 | fDstMax = static_cast<Twork>(std::numeric_limits<GInt32>::max()); |
3432 | 0 | } |
3433 | 0 | else if (dstDataType == GDT_UInt64) |
3434 | 0 | { |
3435 | | // cppcheck-suppress unreadVariable |
3436 | 0 | fDstMin = static_cast<Twork>(std::numeric_limits<uint64_t>::min()); |
3437 | | // cppcheck-suppress unreadVariable |
3438 | | // (1 << 64) - 2048: largest uint64 value a double can hold |
3439 | 0 | fDstMax = static_cast<Twork>(18446744073709549568ULL); |
3440 | 0 | } |
3441 | 0 | else if (dstDataType == GDT_Int64) |
3442 | 0 | { |
3443 | | // cppcheck-suppress unreadVariable |
3444 | 0 | fDstMin = static_cast<Twork>(std::numeric_limits<int64_t>::min()); |
3445 | | // cppcheck-suppress unreadVariable |
3446 | | // (1 << 63) - 1024: largest int64 that a double can hold |
3447 | 0 | fDstMax = static_cast<Twork>(9223372036854774784LL); |
3448 | 0 | } |
3449 | |
|
3450 | 0 | auto replaceValIfNodata = [bHasNoData, isIntegerDT, fDstMin, fDstMax, |
3451 | 0 | bNoDataValueInt64Valid, nNodataValueInt64, |
3452 | 0 | dfNoDataValue, dfReplacementVal](Twork fVal) |
3453 | 0 | { |
3454 | 0 | if (!bHasNoData) |
3455 | 0 | return fVal; |
3456 | | |
3457 | | // Clamp value before comparing to nodata: this is only needed for |
3458 | | // kernels with negative weights (Lanczos) |
3459 | 0 | Twork fClamped = fVal; |
3460 | 0 | if (fClamped < fDstMin) |
3461 | 0 | fClamped = fDstMin; |
3462 | 0 | else if (fClamped > fDstMax) |
3463 | 0 | fClamped = fDstMax; |
3464 | 0 | if (isIntegerDT) |
3465 | 0 | { |
3466 | 0 | if (bNoDataValueInt64Valid) |
3467 | 0 | { |
3468 | 0 | const double fClampedRounded = double(std::round(fClamped)); |
3469 | 0 | if (fClampedRounded >= |
3470 | 0 | static_cast<double>(static_cast<Twork>( |
3471 | 0 | std::numeric_limits<int64_t>::min())) && |
3472 | 0 | fClampedRounded <= static_cast<double>(static_cast<Twork>( |
3473 | 0 | 9223372036854774784LL)) && |
3474 | 0 | nNodataValueInt64 == |
3475 | 0 | static_cast<GInt64>(std::round(fClamped))) |
3476 | 0 | { |
3477 | | // Do not use the nodata value |
3478 | 0 | return static_cast<Twork>(dfReplacementVal); |
3479 | 0 | } |
3480 | 0 | } |
3481 | 0 | } |
3482 | 0 | else if (dfNoDataValue == static_cast<double>(fClamped)) |
3483 | 0 | { |
3484 | | // Do not use the nodata value |
3485 | 0 | return static_cast<Twork>(dfReplacementVal); |
3486 | 0 | } |
3487 | 0 | return fClamped; |
3488 | 0 | }; Unexecuted instantiation: overview.cpp:GDALResampleChunk_ConvolutionT<unsigned char, float, (GDALDataType)6, true, true>(GDALOverviewResampleArgs const&, unsigned char const*, void*, double (*)(double), double (*)(double*), int, float)::{lambda(float)#1}::operator()(float) constUnexecuted instantiation: overview.cpp:GDALResampleChunk_ConvolutionT<unsigned short, float, (GDALDataType)6, true, true>(GDALOverviewResampleArgs const&, unsigned short const*, void*, double (*)(double), double (*)(double*), int, float)::{lambda(float)#1}::operator()(float) constUnexecuted instantiation: overview.cpp:GDALResampleChunk_ConvolutionT<float, float, (GDALDataType)6, true, true>(GDALOverviewResampleArgs const&, float const*, void*, double (*)(double), double (*)(double*), int, float)::{lambda(float)#1}::operator()(float) constUnexecuted instantiation: overview.cpp:GDALResampleChunk_ConvolutionT<double, double, (GDALDataType)7, true, true>(GDALOverviewResampleArgs const&, double const*, void*, double (*)(double), double (*)(double*), int, float)::{lambda(double)#1}::operator()(double) constUnexecuted instantiation: overview.cpp:GDALResampleChunk_ConvolutionT<unsigned char, float, (GDALDataType)6, false, true>(GDALOverviewResampleArgs const&, unsigned char const*, void*, double (*)(double), double (*)(double*), int, float)::{lambda(float)#1}::operator()(float) constUnexecuted instantiation: overview.cpp:GDALResampleChunk_ConvolutionT<unsigned short, float, (GDALDataType)6, false, true>(GDALOverviewResampleArgs const&, unsigned short const*, void*, double (*)(double), double (*)(double*), int, float)::{lambda(float)#1}::operator()(float) constUnexecuted instantiation: overview.cpp:GDALResampleChunk_ConvolutionT<float, float, (GDALDataType)6, false, true>(GDALOverviewResampleArgs const&, float const*, void*, double (*)(double), double (*)(double*), int, float)::{lambda(float)#1}::operator()(float) constUnexecuted instantiation: overview.cpp:GDALResampleChunk_ConvolutionT<double, double, (GDALDataType)7, false, true>(GDALOverviewResampleArgs const&, double const*, void*, double (*)(double), double (*)(double*), int, float)::{lambda(double)#1}::operator()(double) constUnexecuted instantiation: overview.cpp:GDALResampleChunk_ConvolutionT<unsigned char, float, (GDALDataType)6, false, false>(GDALOverviewResampleArgs const&, unsigned char const*, void*, double (*)(double), double (*)(double*), int, float)::{lambda(float)#1}::operator()(float) constUnexecuted instantiation: overview.cpp:GDALResampleChunk_ConvolutionT<unsigned short, float, (GDALDataType)6, false, false>(GDALOverviewResampleArgs const&, unsigned short const*, void*, double (*)(double), double (*)(double*), int, float)::{lambda(float)#1}::operator()(float) constUnexecuted instantiation: overview.cpp:GDALResampleChunk_ConvolutionT<float, float, (GDALDataType)6, false, false>(GDALOverviewResampleArgs const&, float const*, void*, double (*)(double), double (*)(double*), int, float)::{lambda(float)#1}::operator()(float) constUnexecuted instantiation: overview.cpp:GDALResampleChunk_ConvolutionT<double, double, (GDALDataType)7, false, false>(GDALOverviewResampleArgs const&, double const*, void*, double (*)(double), double (*)(double*), int, float)::{lambda(double)#1}::operator()(double) const |
3489 | | |
3490 | | /* -------------------------------------------------------------------- */ |
3491 | | /* Allocate work buffers. */ |
3492 | | /* -------------------------------------------------------------------- */ |
3493 | 0 | const int nDstXSize = nDstXOff2 - nDstXOff; |
3494 | 0 | Twork *pafWrkScanline = nullptr; |
3495 | 0 | if (dstDataType != eWrkDataType) |
3496 | 0 | { |
3497 | 0 | pafWrkScanline = |
3498 | 0 | static_cast<Twork *>(VSI_MALLOC2_VERBOSE(nDstXSize, sizeof(Twork))); |
3499 | 0 | if (pafWrkScanline == nullptr) |
3500 | 0 | return CE_Failure; |
3501 | 0 | } |
3502 | | |
3503 | 0 | const double dfXScale = 1.0 / dfXRatioDstToSrc; |
3504 | 0 | const double dfXScaleWeight = (dfXScale >= 1.0) ? 1.0 : dfXScale; |
3505 | 0 | const double dfXScaledRadius = nKernelRadius / dfXScaleWeight; |
3506 | 0 | const double dfYScale = 1.0 / dfYRatioDstToSrc; |
3507 | 0 | const double dfYScaleWeight = (dfYScale >= 1.0) ? 1.0 : dfYScale; |
3508 | 0 | const double dfYScaledRadius = nKernelRadius / dfYScaleWeight; |
3509 | | |
3510 | | // Temporary array to store result of horizontal filter. |
3511 | 0 | double *const padfHorizontalFiltered = static_cast<double *>( |
3512 | 0 | VSI_MALLOC3_VERBOSE(nChunkYSize, nDstXSize, sizeof(double) * nBands)); |
3513 | | |
3514 | | // To store convolution coefficients. |
3515 | 0 | double *const padfWeights = |
3516 | 0 | static_cast<double *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE( |
3517 | 0 | static_cast<int>( |
3518 | 0 | 2 + 2 * std::max(dfXScaledRadius, dfYScaledRadius) + 0.5) * |
3519 | 0 | sizeof(double))); |
3520 | |
|
3521 | 0 | GByte *pabyChunkNodataMaskHorizontalFiltered = nullptr; |
3522 | 0 | if (pabyChunkNodataMask) |
3523 | 0 | pabyChunkNodataMaskHorizontalFiltered = |
3524 | 0 | static_cast<GByte *>(VSI_MALLOC2_VERBOSE(nChunkYSize, nDstXSize)); |
3525 | 0 | if (padfHorizontalFiltered == nullptr || padfWeights == nullptr || |
3526 | 0 | (pabyChunkNodataMask != nullptr && |
3527 | 0 | pabyChunkNodataMaskHorizontalFiltered == nullptr)) |
3528 | 0 | { |
3529 | 0 | VSIFree(pafWrkScanline); |
3530 | 0 | VSIFree(padfHorizontalFiltered); |
3531 | 0 | VSIFreeAligned(padfWeights); |
3532 | 0 | VSIFree(pabyChunkNodataMaskHorizontalFiltered); |
3533 | 0 | return CE_Failure; |
3534 | 0 | } |
3535 | | |
3536 | | /* ==================================================================== */ |
3537 | | /* First pass: horizontal filter */ |
3538 | | /* ==================================================================== */ |
3539 | 0 | const int nChunkRightXOff = nChunkXOff + nChunkXSize; |
3540 | 0 | #ifdef USE_SSE2 |
3541 | 0 | const bool bSrcPixelCountLess8 = dfXScaledRadius < 4; |
3542 | 0 | #endif |
3543 | 0 | for (int iDstPixel = nDstXOff; iDstPixel < nDstXOff2; ++iDstPixel) |
3544 | 0 | { |
3545 | 0 | const double dfSrcPixel = |
3546 | 0 | (iDstPixel + 0.5) * dfXRatioDstToSrc + dfSrcXDelta; |
3547 | 0 | int nSrcPixelStart = |
3548 | 0 | static_cast<int>(floor(dfSrcPixel - dfXScaledRadius + 0.5)); |
3549 | 0 | if (nSrcPixelStart < nChunkXOff) |
3550 | 0 | nSrcPixelStart = nChunkXOff; |
3551 | 0 | int nSrcPixelStop = |
3552 | 0 | static_cast<int>(dfSrcPixel + dfXScaledRadius + 0.5); |
3553 | 0 | if (nSrcPixelStop > nChunkRightXOff) |
3554 | 0 | nSrcPixelStop = nChunkRightXOff; |
3555 | | #if 0 |
3556 | | if( nSrcPixelStart < nChunkXOff && nChunkXOff > 0 ) |
3557 | | { |
3558 | | printf( "truncated iDstPixel = %d\n", iDstPixel );/*ok*/ |
3559 | | } |
3560 | | if( nSrcPixelStop > nChunkRightXOff && nChunkRightXOff < nSrcWidth ) |
3561 | | { |
3562 | | printf( "truncated iDstPixel = %d\n", iDstPixel );/*ok*/ |
3563 | | } |
3564 | | #endif |
3565 | 0 | const int nSrcPixelCount = nSrcPixelStop - nSrcPixelStart; |
3566 | 0 | double dfWeightSum = 0.0; |
3567 | | |
3568 | | // Compute convolution coefficients. |
3569 | 0 | int nSrcPixel = nSrcPixelStart; |
3570 | 0 | double dfX = dfXScaleWeight * (nSrcPixel - dfSrcPixel + 0.5); |
3571 | 0 | for (; nSrcPixel < nSrcPixelStop - 3; nSrcPixel += 4) |
3572 | 0 | { |
3573 | 0 | padfWeights[nSrcPixel - nSrcPixelStart] = dfX; |
3574 | 0 | dfX += dfXScaleWeight; |
3575 | 0 | padfWeights[nSrcPixel + 1 - nSrcPixelStart] = dfX; |
3576 | 0 | dfX += dfXScaleWeight; |
3577 | 0 | padfWeights[nSrcPixel + 2 - nSrcPixelStart] = dfX; |
3578 | 0 | dfX += dfXScaleWeight; |
3579 | 0 | padfWeights[nSrcPixel + 3 - nSrcPixelStart] = dfX; |
3580 | 0 | dfX += dfXScaleWeight; |
3581 | 0 | dfWeightSum += |
3582 | 0 | pfnFilterFunc4Values(padfWeights + nSrcPixel - nSrcPixelStart); |
3583 | 0 | } |
3584 | 0 | for (; nSrcPixel < nSrcPixelStop; ++nSrcPixel, dfX += dfXScaleWeight) |
3585 | 0 | { |
3586 | 0 | const double dfWeight = pfnFilterFunc(dfX); |
3587 | 0 | padfWeights[nSrcPixel - nSrcPixelStart] = dfWeight; |
3588 | 0 | dfWeightSum += dfWeight; |
3589 | 0 | } |
3590 | |
|
3591 | 0 | const int nHeight = nChunkYSize * nBands; |
3592 | 0 | if (pabyChunkNodataMask == nullptr) |
3593 | 0 | { |
3594 | | // For floating-point data types, we must scale down a bit values |
3595 | | // if input values are close to +/- std::numeric_limits<T>::max() |
3596 | | #ifdef OLD_CPPCHECK |
3597 | | constexpr double mulFactor = 1; |
3598 | | #else |
3599 | 0 | constexpr double mulFactor = |
3600 | 0 | (bNeedRescale && |
3601 | 0 | (std::is_same_v<T, float> || std::is_same_v<T, double>)) |
3602 | 0 | ? 2 |
3603 | 0 | : 1; |
3604 | 0 | #endif |
3605 | |
|
3606 | 0 | if (dfWeightSum != 0) |
3607 | 0 | { |
3608 | 0 | const double dfInvWeightSum = 1.0 / (mulFactor * dfWeightSum); |
3609 | 0 | for (int i = 0; i < nSrcPixelCount; ++i) |
3610 | 0 | { |
3611 | 0 | padfWeights[i] *= dfInvWeightSum; |
3612 | 0 | } |
3613 | 0 | } |
3614 | |
|
3615 | 0 | const auto ScaleValue = [ |
3616 | | #ifdef _MSC_VER |
3617 | | mulFactor |
3618 | | #endif |
3619 | 0 | ](double dfVal, [[maybe_unused]] const T *inputValues, |
3620 | 0 | [[maybe_unused]] int nInputValues) |
3621 | 0 | { |
3622 | 0 | constexpr bool isFloat = |
3623 | 0 | std::is_same_v<T, float> || std::is_same_v<T, double>; |
3624 | | if constexpr (isFloat) |
3625 | 0 | { |
3626 | 0 | if (std::isfinite(dfVal)) |
3627 | 0 | { |
3628 | 0 | return std::clamp(dfVal, |
3629 | 0 | -std::numeric_limits<double>::max() / |
3630 | 0 | mulFactor, |
3631 | 0 | std::numeric_limits<double>::max() / |
3632 | 0 | mulFactor) * |
3633 | 0 | mulFactor; |
3634 | 0 | } |
3635 | | else if constexpr (bKernelWithNegativeWeights) |
3636 | 0 | { |
3637 | 0 | if (std::isnan(dfVal)) |
3638 | 0 | { |
3639 | | // Either one of the input value is NaN or they are +/-Inf |
3640 | 0 | const bool isPositive = inputValues[0] >= 0; |
3641 | 0 | for (int i = 0; i < nInputValues; ++i) |
3642 | 0 | { |
3643 | 0 | if (std::isnan(inputValues[i])) |
3644 | 0 | return dfVal; |
3645 | | // cppcheck-suppress knownConditionTrueFalse |
3646 | 0 | if ((inputValues[i] >= 0) != isPositive) |
3647 | 0 | return dfVal; |
3648 | 0 | } |
3649 | | // All values are positive or negative infinity |
3650 | 0 | return static_cast<double>(inputValues[0]); |
3651 | 0 | } |
3652 | 0 | } |
3653 | 0 | } |
3654 | 0 | return dfVal; |
3655 | 0 | }; Unexecuted instantiation: overview.cpp:GDALResampleChunk_ConvolutionT<float, float, (GDALDataType)6, true, true>(GDALOverviewResampleArgs const&, float const*, void*, double (*)(double), double (*)(double*), int, float)::{lambda(double, float const*, int)#1}::operator()(double, float const*, int) constUnexecuted instantiation: overview.cpp:GDALResampleChunk_ConvolutionT<double, double, (GDALDataType)7, true, true>(GDALOverviewResampleArgs const&, double const*, void*, double (*)(double), double (*)(double*), int, float)::{lambda(double, double const*, int)#1}::operator()(double, double const*, int) constUnexecuted instantiation: overview.cpp:GDALResampleChunk_ConvolutionT<float, float, (GDALDataType)6, false, true>(GDALOverviewResampleArgs const&, float const*, void*, double (*)(double), double (*)(double*), int, float)::{lambda(double, float const*, int)#1}::operator()(double, float const*, int) constUnexecuted instantiation: overview.cpp:GDALResampleChunk_ConvolutionT<double, double, (GDALDataType)7, false, true>(GDALOverviewResampleArgs const&, double const*, void*, double (*)(double), double (*)(double*), int, float)::{lambda(double, double const*, int)#1}::operator()(double, double const*, int) constUnexecuted instantiation: overview.cpp:GDALResampleChunk_ConvolutionT<float, float, (GDALDataType)6, false, false>(GDALOverviewResampleArgs const&, float const*, void*, double (*)(double), double (*)(double*), int, float)::{lambda(double, float const*, int)#1}::operator()(double, float const*, int) constUnexecuted instantiation: overview.cpp:GDALResampleChunk_ConvolutionT<double, double, (GDALDataType)7, false, false>(GDALOverviewResampleArgs const&, double const*, void*, double (*)(double), double (*)(double*), int, float)::{lambda(double, double const*, int)#1}::operator()(double, double const*, int) const |
3656 | |
|
3657 | 0 | int iSrcLineOff = 0; |
3658 | 0 | #ifdef USE_SSE2 |
3659 | 0 | if (nSrcPixelCount == 4) |
3660 | 0 | { |
3661 | 0 | for (; iSrcLineOff < nHeight - 2; iSrcLineOff += 3) |
3662 | 0 | { |
3663 | 0 | const size_t j = |
3664 | 0 | static_cast<size_t>(iSrcLineOff) * nChunkXSize + |
3665 | 0 | (nSrcPixelStart - nChunkXOff); |
3666 | 0 | double dfVal1 = 0.0; |
3667 | 0 | double dfVal2 = 0.0; |
3668 | 0 | double dfVal3 = 0.0; |
3669 | 0 | GDALResampleConvolutionHorizontalPixelCount4_3rows( |
3670 | 0 | pChunk + j, pChunk + j + nChunkXSize, |
3671 | 0 | pChunk + j + 2 * nChunkXSize, padfWeights, dfVal1, |
3672 | 0 | dfVal2, dfVal3); |
3673 | 0 | padfHorizontalFiltered[static_cast<size_t>(iSrcLineOff) * |
3674 | 0 | nDstXSize + |
3675 | 0 | iDstPixel - nDstXOff] = |
3676 | 0 | ScaleValue(dfVal1, pChunk + j, 4); |
3677 | 0 | padfHorizontalFiltered[(static_cast<size_t>(iSrcLineOff) + |
3678 | 0 | 1) * |
3679 | 0 | nDstXSize + |
3680 | 0 | iDstPixel - nDstXOff] = |
3681 | 0 | ScaleValue(dfVal2, pChunk + j + nChunkXSize, 4); |
3682 | 0 | padfHorizontalFiltered[(static_cast<size_t>(iSrcLineOff) + |
3683 | 0 | 2) * |
3684 | 0 | nDstXSize + |
3685 | 0 | iDstPixel - nDstXOff] = |
3686 | 0 | ScaleValue(dfVal3, pChunk + j + 2 * nChunkXSize, 4); |
3687 | 0 | } |
3688 | 0 | } |
3689 | 0 | else if (bSrcPixelCountLess8) |
3690 | 0 | { |
3691 | 0 | for (; iSrcLineOff < nHeight - 2; iSrcLineOff += 3) |
3692 | 0 | { |
3693 | 0 | const size_t j = |
3694 | 0 | static_cast<size_t>(iSrcLineOff) * nChunkXSize + |
3695 | 0 | (nSrcPixelStart - nChunkXOff); |
3696 | 0 | double dfVal1 = 0.0; |
3697 | 0 | double dfVal2 = 0.0; |
3698 | 0 | double dfVal3 = 0.0; |
3699 | 0 | GDALResampleConvolutionHorizontalPixelCountLess8_3rows( |
3700 | 0 | pChunk + j, pChunk + j + nChunkXSize, |
3701 | 0 | pChunk + j + 2 * nChunkXSize, padfWeights, |
3702 | 0 | nSrcPixelCount, dfVal1, dfVal2, dfVal3); |
3703 | 0 | padfHorizontalFiltered[static_cast<size_t>(iSrcLineOff) * |
3704 | 0 | nDstXSize + |
3705 | 0 | iDstPixel - nDstXOff] = |
3706 | 0 | ScaleValue(dfVal1, pChunk + j, nSrcPixelCount); |
3707 | 0 | padfHorizontalFiltered[(static_cast<size_t>(iSrcLineOff) + |
3708 | 0 | 1) * |
3709 | 0 | nDstXSize + |
3710 | 0 | iDstPixel - nDstXOff] = |
3711 | 0 | ScaleValue(dfVal2, pChunk + j + nChunkXSize, |
3712 | 0 | nSrcPixelCount); |
3713 | 0 | padfHorizontalFiltered[(static_cast<size_t>(iSrcLineOff) + |
3714 | 0 | 2) * |
3715 | 0 | nDstXSize + |
3716 | 0 | iDstPixel - nDstXOff] = |
3717 | 0 | ScaleValue(dfVal3, pChunk + j + 2 * nChunkXSize, |
3718 | 0 | nSrcPixelCount); |
3719 | 0 | } |
3720 | 0 | } |
3721 | 0 | else |
3722 | 0 | #endif |
3723 | 0 | { |
3724 | 0 | for (; iSrcLineOff < nHeight - 2; iSrcLineOff += 3) |
3725 | 0 | { |
3726 | 0 | const size_t j = |
3727 | 0 | static_cast<size_t>(iSrcLineOff) * nChunkXSize + |
3728 | 0 | (nSrcPixelStart - nChunkXOff); |
3729 | 0 | double dfVal1 = 0.0; |
3730 | 0 | double dfVal2 = 0.0; |
3731 | 0 | double dfVal3 = 0.0; |
3732 | 0 | GDALResampleConvolutionHorizontal_3rows( |
3733 | 0 | pChunk + j, pChunk + j + nChunkXSize, |
3734 | 0 | pChunk + j + 2 * nChunkXSize, padfWeights, |
3735 | 0 | nSrcPixelCount, dfVal1, dfVal2, dfVal3); |
3736 | 0 | padfHorizontalFiltered[static_cast<size_t>(iSrcLineOff) * |
3737 | 0 | nDstXSize + |
3738 | 0 | iDstPixel - nDstXOff] = |
3739 | 0 | ScaleValue(dfVal1, pChunk + j, nSrcPixelCount); |
3740 | 0 | padfHorizontalFiltered[(static_cast<size_t>(iSrcLineOff) + |
3741 | 0 | 1) * |
3742 | 0 | nDstXSize + |
3743 | 0 | iDstPixel - nDstXOff] = |
3744 | 0 | ScaleValue(dfVal2, pChunk + j + nChunkXSize, |
3745 | 0 | nSrcPixelCount); |
3746 | 0 | padfHorizontalFiltered[(static_cast<size_t>(iSrcLineOff) + |
3747 | 0 | 2) * |
3748 | 0 | nDstXSize + |
3749 | 0 | iDstPixel - nDstXOff] = |
3750 | 0 | ScaleValue(dfVal3, pChunk + j + 2 * nChunkXSize, |
3751 | 0 | nSrcPixelCount); |
3752 | 0 | } |
3753 | 0 | } |
3754 | 0 | for (; iSrcLineOff < nHeight; ++iSrcLineOff) |
3755 | 0 | { |
3756 | 0 | const size_t j = |
3757 | 0 | static_cast<size_t>(iSrcLineOff) * nChunkXSize + |
3758 | 0 | (nSrcPixelStart - nChunkXOff); |
3759 | 0 | const double dfVal = GDALResampleConvolutionHorizontal( |
3760 | 0 | pChunk + j, padfWeights, nSrcPixelCount); |
3761 | 0 | padfHorizontalFiltered[static_cast<size_t>(iSrcLineOff) * |
3762 | 0 | nDstXSize + |
3763 | 0 | iDstPixel - nDstXOff] = |
3764 | 0 | ScaleValue(dfVal, pChunk + j, nSrcPixelCount); |
3765 | 0 | } |
3766 | 0 | } |
3767 | 0 | else |
3768 | 0 | { |
3769 | 0 | for (int iSrcLineOff = 0; iSrcLineOff < nHeight; ++iSrcLineOff) |
3770 | 0 | { |
3771 | 0 | const size_t j = |
3772 | 0 | static_cast<size_t>(iSrcLineOff) * nChunkXSize + |
3773 | 0 | (nSrcPixelStart - nChunkXOff); |
3774 | |
|
3775 | 0 | if (bKernelWithNegativeWeights) |
3776 | 0 | { |
3777 | 0 | int nConsecutiveValid = 0; |
3778 | 0 | int nMaxConsecutiveValid = 0; |
3779 | 0 | for (int k = 0; k < nSrcPixelCount; k++) |
3780 | 0 | { |
3781 | 0 | if (pabyChunkNodataMask[j + k]) |
3782 | 0 | nConsecutiveValid++; |
3783 | 0 | else if (nConsecutiveValid) |
3784 | 0 | { |
3785 | 0 | nMaxConsecutiveValid = std::max( |
3786 | 0 | nMaxConsecutiveValid, nConsecutiveValid); |
3787 | 0 | nConsecutiveValid = 0; |
3788 | 0 | } |
3789 | 0 | } |
3790 | 0 | nMaxConsecutiveValid = |
3791 | 0 | std::max(nMaxConsecutiveValid, nConsecutiveValid); |
3792 | 0 | if (nMaxConsecutiveValid < nSrcPixelCount / 2) |
3793 | 0 | { |
3794 | 0 | const size_t nTempOffset = |
3795 | 0 | static_cast<size_t>(iSrcLineOff) * nDstXSize + |
3796 | 0 | iDstPixel - nDstXOff; |
3797 | 0 | padfHorizontalFiltered[nTempOffset] = 0.0; |
3798 | 0 | pabyChunkNodataMaskHorizontalFiltered[nTempOffset] = 0; |
3799 | 0 | continue; |
3800 | 0 | } |
3801 | 0 | } |
3802 | | |
3803 | 0 | double dfVal = 0.0; |
3804 | 0 | GDALResampleConvolutionHorizontalWithMask( |
3805 | 0 | pChunk + j, pabyChunkNodataMask + j, padfWeights, |
3806 | 0 | nSrcPixelCount, dfVal, dfWeightSum); |
3807 | 0 | const size_t nTempOffset = |
3808 | 0 | static_cast<size_t>(iSrcLineOff) * nDstXSize + iDstPixel - |
3809 | 0 | nDstXOff; |
3810 | 0 | if (dfWeightSum > 0.0) |
3811 | 0 | { |
3812 | 0 | padfHorizontalFiltered[nTempOffset] = dfVal / dfWeightSum; |
3813 | 0 | pabyChunkNodataMaskHorizontalFiltered[nTempOffset] = 1; |
3814 | 0 | } |
3815 | 0 | else |
3816 | 0 | { |
3817 | 0 | padfHorizontalFiltered[nTempOffset] = 0.0; |
3818 | 0 | pabyChunkNodataMaskHorizontalFiltered[nTempOffset] = 0; |
3819 | 0 | } |
3820 | 0 | } |
3821 | 0 | } |
3822 | 0 | } |
3823 | | |
3824 | | /* ==================================================================== */ |
3825 | | /* Second pass: vertical filter */ |
3826 | | /* ==================================================================== */ |
3827 | 0 | const int nChunkBottomYOff = nChunkYOff + nChunkYSize; |
3828 | |
|
3829 | 0 | for (int iDstLine = nDstYOff; iDstLine < nDstYOff2; ++iDstLine) |
3830 | 0 | { |
3831 | 0 | Twork *const pafDstScanline = |
3832 | 0 | pafWrkScanline |
3833 | 0 | ? pafWrkScanline |
3834 | 0 | : static_cast<Twork *>(pDstBuffer) + |
3835 | 0 | static_cast<size_t>(iDstLine - nDstYOff) * nDstXSize; |
3836 | |
|
3837 | 0 | const double dfSrcLine = |
3838 | 0 | (iDstLine + 0.5) * dfYRatioDstToSrc + dfSrcYDelta; |
3839 | 0 | int nSrcLineStart = |
3840 | 0 | static_cast<int>(floor(dfSrcLine - dfYScaledRadius + 0.5)); |
3841 | 0 | int nSrcLineStop = static_cast<int>(dfSrcLine + dfYScaledRadius + 0.5); |
3842 | 0 | if (nSrcLineStart < nChunkYOff) |
3843 | 0 | nSrcLineStart = nChunkYOff; |
3844 | 0 | if (nSrcLineStop > nChunkBottomYOff) |
3845 | 0 | nSrcLineStop = nChunkBottomYOff; |
3846 | | #if 0 |
3847 | | if( nSrcLineStart < nChunkYOff && |
3848 | | nChunkYOff > 0 ) |
3849 | | { |
3850 | | printf( "truncated iDstLine = %d\n", iDstLine );/*ok*/ |
3851 | | } |
3852 | | if( nSrcLineStop > nChunkBottomYOff && nChunkBottomYOff < nSrcHeight ) |
3853 | | { |
3854 | | printf( "truncated iDstLine = %d\n", iDstLine );/*ok*/ |
3855 | | } |
3856 | | #endif |
3857 | 0 | const int nSrcLineCount = nSrcLineStop - nSrcLineStart; |
3858 | 0 | double dfWeightSum = 0.0; |
3859 | | |
3860 | | // Compute convolution coefficients. |
3861 | 0 | int nSrcLine = nSrcLineStart; // Used after for. |
3862 | 0 | double dfY = dfYScaleWeight * (nSrcLine - dfSrcLine + 0.5); |
3863 | 0 | for (; nSrcLine < nSrcLineStop - 3; |
3864 | 0 | nSrcLine += 4, dfY += 4 * dfYScaleWeight) |
3865 | 0 | { |
3866 | 0 | padfWeights[nSrcLine - nSrcLineStart] = dfY; |
3867 | 0 | padfWeights[nSrcLine + 1 - nSrcLineStart] = dfY + dfYScaleWeight; |
3868 | 0 | padfWeights[nSrcLine + 2 - nSrcLineStart] = |
3869 | 0 | dfY + 2 * dfYScaleWeight; |
3870 | 0 | padfWeights[nSrcLine + 3 - nSrcLineStart] = |
3871 | 0 | dfY + 3 * dfYScaleWeight; |
3872 | 0 | dfWeightSum += |
3873 | 0 | pfnFilterFunc4Values(padfWeights + nSrcLine - nSrcLineStart); |
3874 | 0 | } |
3875 | 0 | for (; nSrcLine < nSrcLineStop; ++nSrcLine, dfY += dfYScaleWeight) |
3876 | 0 | { |
3877 | 0 | const double dfWeight = pfnFilterFunc(dfY); |
3878 | 0 | padfWeights[nSrcLine - nSrcLineStart] = dfWeight; |
3879 | 0 | dfWeightSum += dfWeight; |
3880 | 0 | } |
3881 | |
|
3882 | 0 | if (pabyChunkNodataMask == nullptr) |
3883 | 0 | { |
3884 | | // For floating-point data types, we must scale down a bit values |
3885 | | // if input values are close to +/- std::numeric_limits<T>::max() |
3886 | | #ifdef OLD_CPPCHECK |
3887 | | constexpr double mulFactor = 1; |
3888 | | #else |
3889 | 0 | constexpr double mulFactor = |
3890 | 0 | (bNeedRescale && |
3891 | 0 | (std::is_same_v<T, float> || std::is_same_v<T, double>)) |
3892 | 0 | ? 2 |
3893 | 0 | : 1; |
3894 | 0 | #endif |
3895 | |
|
3896 | 0 | if (dfWeightSum != 0) |
3897 | 0 | { |
3898 | 0 | const double dfInvWeightSum = 1.0 / (mulFactor * dfWeightSum); |
3899 | 0 | for (int i = 0; i < nSrcLineCount; ++i) |
3900 | 0 | padfWeights[i] *= dfInvWeightSum; |
3901 | 0 | } |
3902 | |
|
3903 | 0 | int iFilteredPixelOff = 0; // Used after for. |
3904 | | // j used after for. |
3905 | 0 | size_t j = |
3906 | 0 | (nSrcLineStart - nChunkYOff) * static_cast<size_t>(nDstXSize); |
3907 | 0 | #ifdef USE_SSE2 |
3908 | | if constexpr ((!bNeedRescale || |
3909 | | !std::is_same_v<T, float>)&&eWrkDataType == |
3910 | | GDT_Float32) |
3911 | 0 | { |
3912 | | #ifdef __AVX__ |
3913 | | for (; iFilteredPixelOff < nDstXSize - 15; |
3914 | | iFilteredPixelOff += 16, j += 16) |
3915 | | { |
3916 | | GDALResampleConvolutionVertical_16cols( |
3917 | | padfHorizontalFiltered + j, nDstXSize, padfWeights, |
3918 | | nSrcLineCount, pafDstScanline + iFilteredPixelOff); |
3919 | | if (bHasNoData) |
3920 | | { |
3921 | | for (int k = 0; k < 16; k++) |
3922 | | { |
3923 | | pafDstScanline[iFilteredPixelOff + k] = |
3924 | | replaceValIfNodata( |
3925 | | pafDstScanline[iFilteredPixelOff + k]); |
3926 | | } |
3927 | | } |
3928 | | } |
3929 | | #else |
3930 | 0 | for (; iFilteredPixelOff < nDstXSize - 7; |
3931 | 0 | iFilteredPixelOff += 8, j += 8) |
3932 | 0 | { |
3933 | 0 | GDALResampleConvolutionVertical_8cols( |
3934 | 0 | padfHorizontalFiltered + j, nDstXSize, padfWeights, |
3935 | 0 | nSrcLineCount, pafDstScanline + iFilteredPixelOff); |
3936 | 0 | if (bHasNoData) |
3937 | 0 | { |
3938 | 0 | for (int k = 0; k < 8; k++) |
3939 | 0 | { |
3940 | 0 | pafDstScanline[iFilteredPixelOff + k] = |
3941 | 0 | replaceValIfNodata( |
3942 | 0 | pafDstScanline[iFilteredPixelOff + k]); |
3943 | 0 | } |
3944 | 0 | } |
3945 | 0 | } |
3946 | 0 | #endif |
3947 | |
|
3948 | 0 | for (; iFilteredPixelOff < nDstXSize; iFilteredPixelOff++, j++) |
3949 | 0 | { |
3950 | 0 | const Twork fVal = |
3951 | 0 | static_cast<Twork>(GDALResampleConvolutionVertical( |
3952 | 0 | padfHorizontalFiltered + j, nDstXSize, padfWeights, |
3953 | 0 | nSrcLineCount)); |
3954 | 0 | pafDstScanline[iFilteredPixelOff] = |
3955 | 0 | replaceValIfNodata(fVal); |
3956 | 0 | } |
3957 | | } |
3958 | | else |
3959 | | #endif |
3960 | 0 | { |
3961 | 0 | const auto ScaleValue = [ |
3962 | | #ifdef _MSC_VER |
3963 | | mulFactor |
3964 | | #endif |
3965 | 0 | ](double dfVal, [[maybe_unused]] const double *inputValues, |
3966 | 0 | [[maybe_unused]] int nStride, |
3967 | 0 | [[maybe_unused]] int nInputValues) |
3968 | 0 | { |
3969 | 0 | constexpr bool isFloat = |
3970 | 0 | std::is_same_v<T, float> || std::is_same_v<T, double>; |
3971 | | if constexpr (isFloat) |
3972 | 0 | { |
3973 | 0 | if (std::isfinite(dfVal)) |
3974 | 0 | { |
3975 | 0 | return std::clamp( |
3976 | 0 | dfVal, |
3977 | 0 | static_cast<double>( |
3978 | 0 | -std::numeric_limits<Twork>::max()) / |
3979 | 0 | mulFactor, |
3980 | 0 | static_cast<double>( |
3981 | 0 | std::numeric_limits<Twork>::max()) / |
3982 | 0 | mulFactor) * |
3983 | 0 | mulFactor; |
3984 | 0 | } |
3985 | | else if constexpr (bKernelWithNegativeWeights) |
3986 | 0 | { |
3987 | 0 | if (std::isnan(dfVal)) |
3988 | 0 | { |
3989 | | // Either one of the input value is NaN or they are +/-Inf |
3990 | 0 | const bool isPositive = inputValues[0] >= 0; |
3991 | 0 | for (int i = 0; i < nInputValues; ++i) |
3992 | 0 | { |
3993 | 0 | if (std::isnan(inputValues[i * nStride])) |
3994 | 0 | return dfVal; |
3995 | | // cppcheck-suppress knownConditionTrueFalse |
3996 | 0 | if ((inputValues[i] >= 0) != isPositive) |
3997 | 0 | return dfVal; |
3998 | 0 | } |
3999 | | // All values are positive or negative infinity |
4000 | 0 | return inputValues[0]; |
4001 | 0 | } |
4002 | 0 | } |
4003 | 0 | } |
4004 | | |
4005 | 0 | return dfVal; |
4006 | 0 | }; Unexecuted instantiation: overview.cpp:GDALResampleChunk_ConvolutionT<float, float, (GDALDataType)6, true, true>(GDALOverviewResampleArgs const&, float const*, void*, double (*)(double), double (*)(double*), int, float)::{lambda(double, double const*, int, int)#1}::operator()(double, double const*, int, int) constUnexecuted instantiation: overview.cpp:GDALResampleChunk_ConvolutionT<double, double, (GDALDataType)7, true, true>(GDALOverviewResampleArgs const&, double const*, void*, double (*)(double), double (*)(double*), int, float)::{lambda(double, double const*, int, int)#1}::operator()(double, double const*, int, int) constUnexecuted instantiation: overview.cpp:GDALResampleChunk_ConvolutionT<float, float, (GDALDataType)6, false, true>(GDALOverviewResampleArgs const&, float const*, void*, double (*)(double), double (*)(double*), int, float)::{lambda(double, double const*, int, int)#1}::operator()(double, double const*, int, int) constUnexecuted instantiation: overview.cpp:GDALResampleChunk_ConvolutionT<double, double, (GDALDataType)7, false, true>(GDALOverviewResampleArgs const&, double const*, void*, double (*)(double), double (*)(double*), int, float)::{lambda(double, double const*, int, int)#1}::operator()(double, double const*, int, int) constUnexecuted instantiation: overview.cpp:GDALResampleChunk_ConvolutionT<double, double, (GDALDataType)7, false, false>(GDALOverviewResampleArgs const&, double const*, void*, double (*)(double), double (*)(double*), int, float)::{lambda(double, double const*, int, int)#1}::operator()(double, double const*, int, int) const |
4007 | |
|
4008 | 0 | for (; iFilteredPixelOff < nDstXSize - 1; |
4009 | 0 | iFilteredPixelOff += 2, j += 2) |
4010 | 0 | { |
4011 | 0 | double dfVal1 = 0.0; |
4012 | 0 | double dfVal2 = 0.0; |
4013 | 0 | GDALResampleConvolutionVertical_2cols( |
4014 | 0 | padfHorizontalFiltered + j, nDstXSize, padfWeights, |
4015 | 0 | nSrcLineCount, dfVal1, dfVal2); |
4016 | 0 | pafDstScanline[iFilteredPixelOff] = |
4017 | 0 | replaceValIfNodata(static_cast<Twork>( |
4018 | 0 | ScaleValue(dfVal1, padfHorizontalFiltered + j, |
4019 | 0 | nDstXSize, nSrcLineCount))); |
4020 | 0 | pafDstScanline[iFilteredPixelOff + 1] = |
4021 | 0 | replaceValIfNodata(static_cast<Twork>( |
4022 | 0 | ScaleValue(dfVal2, padfHorizontalFiltered + j + 1, |
4023 | 0 | nDstXSize, nSrcLineCount))); |
4024 | 0 | } |
4025 | 0 | if (iFilteredPixelOff < nDstXSize) |
4026 | 0 | { |
4027 | 0 | const double dfVal = GDALResampleConvolutionVertical( |
4028 | 0 | padfHorizontalFiltered + j, nDstXSize, padfWeights, |
4029 | 0 | nSrcLineCount); |
4030 | 0 | pafDstScanline[iFilteredPixelOff] = |
4031 | 0 | replaceValIfNodata(static_cast<Twork>( |
4032 | 0 | ScaleValue(dfVal, padfHorizontalFiltered + j, |
4033 | 0 | nDstXSize, nSrcLineCount))); |
4034 | 0 | } |
4035 | 0 | } |
4036 | 0 | } |
4037 | 0 | else |
4038 | 0 | { |
4039 | 0 | for (int iFilteredPixelOff = 0; iFilteredPixelOff < nDstXSize; |
4040 | 0 | ++iFilteredPixelOff) |
4041 | 0 | { |
4042 | 0 | double dfVal = 0.0; |
4043 | 0 | dfWeightSum = 0.0; |
4044 | 0 | size_t j = (nSrcLineStart - nChunkYOff) * |
4045 | 0 | static_cast<size_t>(nDstXSize) + |
4046 | 0 | iFilteredPixelOff; |
4047 | 0 | if (bKernelWithNegativeWeights) |
4048 | 0 | { |
4049 | 0 | int nConsecutiveValid = 0; |
4050 | 0 | int nMaxConsecutiveValid = 0; |
4051 | 0 | for (int i = 0; i < nSrcLineCount; ++i, j += nDstXSize) |
4052 | 0 | { |
4053 | 0 | const double dfWeight = |
4054 | 0 | padfWeights[i] * |
4055 | 0 | pabyChunkNodataMaskHorizontalFiltered[j]; |
4056 | 0 | if (pabyChunkNodataMaskHorizontalFiltered[j]) |
4057 | 0 | { |
4058 | 0 | nConsecutiveValid++; |
4059 | 0 | } |
4060 | 0 | else if (nConsecutiveValid) |
4061 | 0 | { |
4062 | 0 | nMaxConsecutiveValid = std::max( |
4063 | 0 | nMaxConsecutiveValid, nConsecutiveValid); |
4064 | 0 | nConsecutiveValid = 0; |
4065 | 0 | } |
4066 | 0 | dfVal += padfHorizontalFiltered[j] * dfWeight; |
4067 | 0 | dfWeightSum += dfWeight; |
4068 | 0 | } |
4069 | 0 | nMaxConsecutiveValid = |
4070 | 0 | std::max(nMaxConsecutiveValid, nConsecutiveValid); |
4071 | 0 | if (nMaxConsecutiveValid < nSrcLineCount / 2) |
4072 | 0 | { |
4073 | 0 | pafDstScanline[iFilteredPixelOff] = |
4074 | 0 | static_cast<Twork>(dfNoDataValue); |
4075 | 0 | continue; |
4076 | 0 | } |
4077 | 0 | } |
4078 | 0 | else |
4079 | 0 | { |
4080 | 0 | for (int i = 0; i < nSrcLineCount; ++i, j += nDstXSize) |
4081 | 0 | { |
4082 | 0 | const double dfWeight = |
4083 | 0 | padfWeights[i] * |
4084 | 0 | pabyChunkNodataMaskHorizontalFiltered[j]; |
4085 | 0 | dfVal += padfHorizontalFiltered[j] * dfWeight; |
4086 | 0 | dfWeightSum += dfWeight; |
4087 | 0 | } |
4088 | 0 | } |
4089 | 0 | if (dfWeightSum > 0.0) |
4090 | 0 | { |
4091 | 0 | pafDstScanline[iFilteredPixelOff] = replaceValIfNodata( |
4092 | 0 | static_cast<Twork>(dfVal / dfWeightSum)); |
4093 | 0 | } |
4094 | 0 | else |
4095 | 0 | { |
4096 | 0 | pafDstScanline[iFilteredPixelOff] = |
4097 | 0 | static_cast<Twork>(dfNoDataValue); |
4098 | 0 | } |
4099 | 0 | } |
4100 | 0 | } |
4101 | |
|
4102 | 0 | if (fMaxVal != 0.0f) |
4103 | 0 | { |
4104 | | if constexpr (std::is_same_v<T, double>) |
4105 | 0 | { |
4106 | 0 | for (int i = 0; i < nDstXSize; ++i) |
4107 | 0 | { |
4108 | 0 | if (pafDstScanline[i] > static_cast<double>(fMaxVal)) |
4109 | 0 | pafDstScanline[i] = static_cast<double>(fMaxVal); |
4110 | 0 | } |
4111 | | } |
4112 | | else |
4113 | 0 | { |
4114 | 0 | for (int i = 0; i < nDstXSize; ++i) |
4115 | 0 | { |
4116 | 0 | if (pafDstScanline[i] > fMaxVal) |
4117 | 0 | pafDstScanline[i] = fMaxVal; |
4118 | 0 | } |
4119 | 0 | } |
4120 | 0 | } |
4121 | |
|
4122 | 0 | if (pafWrkScanline) |
4123 | 0 | { |
4124 | 0 | GDALCopyWords64(pafWrkScanline, eWrkDataType, nWrkDataTypeSize, |
4125 | 0 | static_cast<GByte *>(pDstBuffer) + |
4126 | 0 | static_cast<size_t>(iDstLine - nDstYOff) * |
4127 | 0 | nDstXSize * nDstDataTypeSize, |
4128 | 0 | dstDataType, nDstDataTypeSize, nDstXSize); |
4129 | 0 | } |
4130 | 0 | } |
4131 | |
|
4132 | 0 | VSIFree(pafWrkScanline); |
4133 | 0 | VSIFreeAligned(padfWeights); |
4134 | 0 | VSIFree(padfHorizontalFiltered); |
4135 | 0 | VSIFree(pabyChunkNodataMaskHorizontalFiltered); |
4136 | |
|
4137 | 0 | return CE_None; |
4138 | 0 | } Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_ConvolutionT<unsigned char, float, (GDALDataType)6, true, true>(GDALOverviewResampleArgs const&, unsigned char const*, void*, double (*)(double), double (*)(double*), int, float) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_ConvolutionT<unsigned short, float, (GDALDataType)6, true, true>(GDALOverviewResampleArgs const&, unsigned short const*, void*, double (*)(double), double (*)(double*), int, float) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_ConvolutionT<float, float, (GDALDataType)6, true, true>(GDALOverviewResampleArgs const&, float const*, void*, double (*)(double), double (*)(double*), int, float) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_ConvolutionT<double, double, (GDALDataType)7, true, true>(GDALOverviewResampleArgs const&, double const*, void*, double (*)(double), double (*)(double*), int, float) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_ConvolutionT<unsigned char, float, (GDALDataType)6, false, true>(GDALOverviewResampleArgs const&, unsigned char const*, void*, double (*)(double), double (*)(double*), int, float) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_ConvolutionT<unsigned short, float, (GDALDataType)6, false, true>(GDALOverviewResampleArgs const&, unsigned short const*, void*, double (*)(double), double (*)(double*), int, float) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_ConvolutionT<float, float, (GDALDataType)6, false, true>(GDALOverviewResampleArgs const&, float const*, void*, double (*)(double), double (*)(double*), int, float) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_ConvolutionT<double, double, (GDALDataType)7, false, true>(GDALOverviewResampleArgs const&, double const*, void*, double (*)(double), double (*)(double*), int, float) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_ConvolutionT<unsigned char, float, (GDALDataType)6, false, false>(GDALOverviewResampleArgs const&, unsigned char const*, void*, double (*)(double), double (*)(double*), int, float) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_ConvolutionT<unsigned short, float, (GDALDataType)6, false, false>(GDALOverviewResampleArgs const&, unsigned short const*, void*, double (*)(double), double (*)(double*), int, float) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_ConvolutionT<float, float, (GDALDataType)6, false, false>(GDALOverviewResampleArgs const&, float const*, void*, double (*)(double), double (*)(double*), int, float) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_ConvolutionT<double, double, (GDALDataType)7, false, false>(GDALOverviewResampleArgs const&, double const*, void*, double (*)(double), double (*)(double*), int, float) |
4139 | | |
4140 | | template <bool bKernelWithNegativeWeights, bool bNeedRescale> |
4141 | | static CPLErr |
4142 | | GDALResampleChunk_ConvolutionInternal(const GDALOverviewResampleArgs &args, |
4143 | | const void *pChunk, void **ppDstBuffer, |
4144 | | GDALDataType *peDstBufferDataType) |
4145 | 0 | { |
4146 | 0 | GDALResampleAlg eResample; |
4147 | 0 | if (EQUAL(args.pszResampling, "BILINEAR")) |
4148 | 0 | eResample = GRA_Bilinear; |
4149 | 0 | else if (EQUAL(args.pszResampling, "CUBIC")) |
4150 | 0 | eResample = GRA_Cubic; |
4151 | 0 | else if (EQUAL(args.pszResampling, "CUBICSPLINE")) |
4152 | 0 | eResample = GRA_CubicSpline; |
4153 | 0 | else if (EQUAL(args.pszResampling, "LANCZOS")) |
4154 | 0 | eResample = GRA_Lanczos; |
4155 | 0 | else |
4156 | 0 | { |
4157 | 0 | CPLAssert(false); |
4158 | 0 | return CE_Failure; |
4159 | 0 | } |
4160 | 0 | const int nKernelRadius = GWKGetFilterRadius(eResample); |
4161 | 0 | FilterFuncType pfnFilterFunc = GWKGetFilterFunc(eResample); |
4162 | 0 | const FilterFunc4ValuesType pfnFilterFunc4Values = |
4163 | 0 | GWKGetFilterFunc4Values(eResample); |
4164 | |
|
4165 | 0 | float fMaxVal = 0.f; |
4166 | | // Cubic, etc... can have overshoots, so make sure we clamp values to the |
4167 | | // maximum value if NBITS is set. |
4168 | 0 | if (eResample != GRA_Bilinear && args.nOvrNBITS > 0 && |
4169 | 0 | (args.eOvrDataType == GDT_Byte || args.eOvrDataType == GDT_UInt16 || |
4170 | 0 | args.eOvrDataType == GDT_UInt32)) |
4171 | 0 | { |
4172 | 0 | int nBits = args.nOvrNBITS; |
4173 | 0 | if (nBits == GDALGetDataTypeSizeBits(args.eOvrDataType)) |
4174 | 0 | nBits = 0; |
4175 | 0 | if (nBits > 0 && nBits < 32) |
4176 | 0 | fMaxVal = static_cast<float>((1U << nBits) - 1); |
4177 | 0 | } |
4178 | |
|
4179 | 0 | *ppDstBuffer = VSI_MALLOC3_VERBOSE( |
4180 | 0 | args.nDstXOff2 - args.nDstXOff, args.nDstYOff2 - args.nDstYOff, |
4181 | 0 | GDALGetDataTypeSizeBytes(args.eOvrDataType)); |
4182 | 0 | if (*ppDstBuffer == nullptr) |
4183 | 0 | { |
4184 | 0 | return CE_Failure; |
4185 | 0 | } |
4186 | 0 | *peDstBufferDataType = args.eOvrDataType; |
4187 | |
|
4188 | 0 | switch (args.eWrkDataType) |
4189 | 0 | { |
4190 | 0 | case GDT_Byte: |
4191 | 0 | { |
4192 | 0 | return GDALResampleChunk_ConvolutionT<GByte, float, GDT_Float32, |
4193 | 0 | bKernelWithNegativeWeights, |
4194 | 0 | bNeedRescale>( |
4195 | 0 | args, static_cast<const GByte *>(pChunk), *ppDstBuffer, |
4196 | 0 | pfnFilterFunc, pfnFilterFunc4Values, nKernelRadius, fMaxVal); |
4197 | 0 | } |
4198 | | |
4199 | 0 | case GDT_UInt16: |
4200 | 0 | { |
4201 | 0 | return GDALResampleChunk_ConvolutionT<GUInt16, float, GDT_Float32, |
4202 | 0 | bKernelWithNegativeWeights, |
4203 | 0 | bNeedRescale>( |
4204 | 0 | args, static_cast<const GUInt16 *>(pChunk), *ppDstBuffer, |
4205 | 0 | pfnFilterFunc, pfnFilterFunc4Values, nKernelRadius, fMaxVal); |
4206 | 0 | } |
4207 | | |
4208 | 0 | case GDT_Float32: |
4209 | 0 | { |
4210 | 0 | return GDALResampleChunk_ConvolutionT<float, float, GDT_Float32, |
4211 | 0 | bKernelWithNegativeWeights, |
4212 | 0 | bNeedRescale>( |
4213 | 0 | args, static_cast<const float *>(pChunk), *ppDstBuffer, |
4214 | 0 | pfnFilterFunc, pfnFilterFunc4Values, nKernelRadius, fMaxVal); |
4215 | 0 | } |
4216 | | |
4217 | 0 | case GDT_Float64: |
4218 | 0 | { |
4219 | 0 | return GDALResampleChunk_ConvolutionT<double, double, GDT_Float64, |
4220 | 0 | bKernelWithNegativeWeights, |
4221 | 0 | bNeedRescale>( |
4222 | 0 | args, static_cast<const double *>(pChunk), *ppDstBuffer, |
4223 | 0 | pfnFilterFunc, pfnFilterFunc4Values, nKernelRadius, fMaxVal); |
4224 | 0 | } |
4225 | | |
4226 | 0 | default: |
4227 | 0 | break; |
4228 | 0 | } |
4229 | | |
4230 | 0 | CPLAssert(false); |
4231 | 0 | return CE_Failure; |
4232 | 0 | } Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_ConvolutionInternal<true, true>(GDALOverviewResampleArgs const&, void const*, void**, GDALDataType*) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_ConvolutionInternal<false, true>(GDALOverviewResampleArgs const&, void const*, void**, GDALDataType*) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_ConvolutionInternal<false, false>(GDALOverviewResampleArgs const&, void const*, void**, GDALDataType*) |
4233 | | |
4234 | | static CPLErr |
4235 | | GDALResampleChunk_Convolution(const GDALOverviewResampleArgs &args, |
4236 | | const void *pChunk, void **ppDstBuffer, |
4237 | | GDALDataType *peDstBufferDataType) |
4238 | 0 | { |
4239 | 0 | if (EQUAL(args.pszResampling, "CUBIC") || |
4240 | 0 | EQUAL(args.pszResampling, "LANCZOS")) |
4241 | 0 | return GDALResampleChunk_ConvolutionInternal< |
4242 | 0 | /* bKernelWithNegativeWeights=*/true, /* bNeedRescale = */ true>( |
4243 | 0 | args, pChunk, ppDstBuffer, peDstBufferDataType); |
4244 | 0 | else if (EQUAL(args.pszResampling, "CUBICSPLINE")) |
4245 | 0 | return GDALResampleChunk_ConvolutionInternal<false, true>( |
4246 | 0 | args, pChunk, ppDstBuffer, peDstBufferDataType); |
4247 | 0 | else |
4248 | 0 | return GDALResampleChunk_ConvolutionInternal<false, false>( |
4249 | 0 | args, pChunk, ppDstBuffer, peDstBufferDataType); |
4250 | 0 | } |
4251 | | |
4252 | | /************************************************************************/ |
4253 | | /* GDALResampleChunkC32R() */ |
4254 | | /************************************************************************/ |
4255 | | |
4256 | | static CPLErr GDALResampleChunkC32R(const int nSrcWidth, const int nSrcHeight, |
4257 | | const float *pafChunk, const int nChunkYOff, |
4258 | | const int nChunkYSize, const int nDstYOff, |
4259 | | const int nDstYOff2, const int nOvrXSize, |
4260 | | const int nOvrYSize, void **ppDstBuffer, |
4261 | | GDALDataType *peDstBufferDataType, |
4262 | | const char *pszResampling) |
4263 | | |
4264 | 0 | { |
4265 | 0 | enum Method |
4266 | 0 | { |
4267 | 0 | NEAR, |
4268 | 0 | AVERAGE, |
4269 | 0 | AVERAGE_MAGPHASE, |
4270 | 0 | RMS, |
4271 | 0 | }; |
4272 | |
|
4273 | 0 | Method eMethod = NEAR; |
4274 | 0 | if (STARTS_WITH_CI(pszResampling, "NEAR")) |
4275 | 0 | { |
4276 | 0 | eMethod = NEAR; |
4277 | 0 | } |
4278 | 0 | else if (EQUAL(pszResampling, "AVERAGE_MAGPHASE")) |
4279 | 0 | { |
4280 | 0 | eMethod = AVERAGE_MAGPHASE; |
4281 | 0 | } |
4282 | 0 | else if (EQUAL(pszResampling, "RMS")) |
4283 | 0 | { |
4284 | 0 | eMethod = RMS; |
4285 | 0 | } |
4286 | 0 | else if (STARTS_WITH_CI(pszResampling, "AVER")) |
4287 | 0 | { |
4288 | 0 | eMethod = AVERAGE; |
4289 | 0 | } |
4290 | 0 | else |
4291 | 0 | { |
4292 | 0 | CPLError( |
4293 | 0 | CE_Failure, CPLE_NotSupported, |
4294 | 0 | "Resampling method %s is not supported for complex data types. " |
4295 | 0 | "Only NEAREST, AVERAGE, AVERAGE_MAGPHASE and RMS are supported", |
4296 | 0 | pszResampling); |
4297 | 0 | return CE_Failure; |
4298 | 0 | } |
4299 | | |
4300 | 0 | const int nOXSize = nOvrXSize; |
4301 | 0 | *ppDstBuffer = VSI_MALLOC3_VERBOSE(nOXSize, nDstYOff2 - nDstYOff, |
4302 | 0 | GDALGetDataTypeSizeBytes(GDT_CFloat32)); |
4303 | 0 | if (*ppDstBuffer == nullptr) |
4304 | 0 | { |
4305 | 0 | return CE_Failure; |
4306 | 0 | } |
4307 | 0 | float *const pafDstBuffer = static_cast<float *>(*ppDstBuffer); |
4308 | 0 | *peDstBufferDataType = GDT_CFloat32; |
4309 | |
|
4310 | 0 | const int nOYSize = nOvrYSize; |
4311 | 0 | const double dfXRatioDstToSrc = static_cast<double>(nSrcWidth) / nOXSize; |
4312 | 0 | const double dfYRatioDstToSrc = static_cast<double>(nSrcHeight) / nOYSize; |
4313 | | |
4314 | | /* ==================================================================== */ |
4315 | | /* Loop over destination scanlines. */ |
4316 | | /* ==================================================================== */ |
4317 | 0 | for (int iDstLine = nDstYOff; iDstLine < nDstYOff2; ++iDstLine) |
4318 | 0 | { |
4319 | 0 | int nSrcYOff = static_cast<int>(0.5 + iDstLine * dfYRatioDstToSrc); |
4320 | 0 | if (nSrcYOff < nChunkYOff) |
4321 | 0 | nSrcYOff = nChunkYOff; |
4322 | |
|
4323 | 0 | int nSrcYOff2 = |
4324 | 0 | static_cast<int>(0.5 + (iDstLine + 1) * dfYRatioDstToSrc); |
4325 | 0 | if (nSrcYOff2 == nSrcYOff) |
4326 | 0 | nSrcYOff2++; |
4327 | |
|
4328 | 0 | if (nSrcYOff2 > nSrcHeight || iDstLine == nOYSize - 1) |
4329 | 0 | { |
4330 | 0 | if (nSrcYOff == nSrcHeight && nSrcHeight - 1 >= nChunkYOff) |
4331 | 0 | nSrcYOff = nSrcHeight - 1; |
4332 | 0 | nSrcYOff2 = nSrcHeight; |
4333 | 0 | } |
4334 | 0 | if (nSrcYOff2 > nChunkYOff + nChunkYSize) |
4335 | 0 | nSrcYOff2 = nChunkYOff + nChunkYSize; |
4336 | |
|
4337 | 0 | const float *const pafSrcScanline = |
4338 | 0 | pafChunk + |
4339 | 0 | (static_cast<size_t>(nSrcYOff - nChunkYOff) * nSrcWidth) * 2; |
4340 | 0 | float *const pafDstScanline = |
4341 | 0 | pafDstBuffer + |
4342 | 0 | static_cast<size_t>(iDstLine - nDstYOff) * 2 * nOXSize; |
4343 | | |
4344 | | /* -------------------------------------------------------------------- |
4345 | | */ |
4346 | | /* Loop over destination pixels */ |
4347 | | /* -------------------------------------------------------------------- |
4348 | | */ |
4349 | 0 | for (int iDstPixel = 0; iDstPixel < nOXSize; ++iDstPixel) |
4350 | 0 | { |
4351 | 0 | const size_t iDstPixelSZ = static_cast<size_t>(iDstPixel); |
4352 | 0 | int nSrcXOff = static_cast<int>(0.5 + iDstPixel * dfXRatioDstToSrc); |
4353 | 0 | int nSrcXOff2 = |
4354 | 0 | static_cast<int>(0.5 + (iDstPixel + 1) * dfXRatioDstToSrc); |
4355 | 0 | if (nSrcXOff2 == nSrcXOff) |
4356 | 0 | nSrcXOff2++; |
4357 | 0 | if (nSrcXOff2 > nSrcWidth || iDstPixel == nOXSize - 1) |
4358 | 0 | { |
4359 | 0 | if (nSrcXOff == nSrcWidth && nSrcWidth - 1 >= 0) |
4360 | 0 | nSrcXOff = nSrcWidth - 1; |
4361 | 0 | nSrcXOff2 = nSrcWidth; |
4362 | 0 | } |
4363 | 0 | const size_t nSrcXOffSZ = static_cast<size_t>(nSrcXOff); |
4364 | |
|
4365 | 0 | if (eMethod == NEAR) |
4366 | 0 | { |
4367 | 0 | pafDstScanline[iDstPixelSZ * 2] = |
4368 | 0 | pafSrcScanline[nSrcXOffSZ * 2]; |
4369 | 0 | pafDstScanline[iDstPixelSZ * 2 + 1] = |
4370 | 0 | pafSrcScanline[nSrcXOffSZ * 2 + 1]; |
4371 | 0 | } |
4372 | 0 | else if (eMethod == AVERAGE_MAGPHASE) |
4373 | 0 | { |
4374 | 0 | double dfTotalR = 0.0; |
4375 | 0 | double dfTotalI = 0.0; |
4376 | 0 | double dfTotalM = 0.0; |
4377 | 0 | size_t nCount = 0; |
4378 | |
|
4379 | 0 | for (int iY = nSrcYOff; iY < nSrcYOff2; ++iY) |
4380 | 0 | { |
4381 | 0 | for (int iX = nSrcXOff; iX < nSrcXOff2; ++iX) |
4382 | 0 | { |
4383 | 0 | const double dfR = double( |
4384 | 0 | pafSrcScanline[static_cast<size_t>(iX) * 2 + |
4385 | 0 | static_cast<size_t>(iY - nSrcYOff) * |
4386 | 0 | nSrcWidth * 2]); |
4387 | 0 | const double dfI = double( |
4388 | 0 | pafSrcScanline[static_cast<size_t>(iX) * 2 + |
4389 | 0 | static_cast<size_t>(iY - nSrcYOff) * |
4390 | 0 | nSrcWidth * 2 + |
4391 | 0 | 1]); |
4392 | 0 | dfTotalR += dfR; |
4393 | 0 | dfTotalI += dfI; |
4394 | 0 | dfTotalM += std::hypot(dfR, dfI); |
4395 | 0 | ++nCount; |
4396 | 0 | } |
4397 | 0 | } |
4398 | |
|
4399 | 0 | CPLAssert(nCount > 0); |
4400 | 0 | if (nCount == 0) |
4401 | 0 | { |
4402 | 0 | pafDstScanline[iDstPixelSZ * 2] = 0.0; |
4403 | 0 | pafDstScanline[iDstPixelSZ * 2 + 1] = 0.0; |
4404 | 0 | } |
4405 | 0 | else |
4406 | 0 | { |
4407 | 0 | pafDstScanline[iDstPixelSZ * 2] = static_cast<float>( |
4408 | 0 | dfTotalR / static_cast<double>(nCount)); |
4409 | 0 | pafDstScanline[iDstPixelSZ * 2 + 1] = static_cast<float>( |
4410 | 0 | dfTotalI / static_cast<double>(nCount)); |
4411 | 0 | const double dfM = |
4412 | 0 | double(std::hypot(pafDstScanline[iDstPixelSZ * 2], |
4413 | 0 | pafDstScanline[iDstPixelSZ * 2 + 1])); |
4414 | 0 | const double dfDesiredM = |
4415 | 0 | dfTotalM / static_cast<double>(nCount); |
4416 | 0 | double dfRatio = 1.0; |
4417 | 0 | if (dfM != 0.0) |
4418 | 0 | dfRatio = dfDesiredM / dfM; |
4419 | |
|
4420 | 0 | pafDstScanline[iDstPixelSZ * 2] *= |
4421 | 0 | static_cast<float>(dfRatio); |
4422 | 0 | pafDstScanline[iDstPixelSZ * 2 + 1] *= |
4423 | 0 | static_cast<float>(dfRatio); |
4424 | 0 | } |
4425 | 0 | } |
4426 | 0 | else if (eMethod == RMS) |
4427 | 0 | { |
4428 | 0 | double dfTotalR = 0.0; |
4429 | 0 | double dfTotalI = 0.0; |
4430 | 0 | size_t nCount = 0; |
4431 | |
|
4432 | 0 | for (int iY = nSrcYOff; iY < nSrcYOff2; ++iY) |
4433 | 0 | { |
4434 | 0 | for (int iX = nSrcXOff; iX < nSrcXOff2; ++iX) |
4435 | 0 | { |
4436 | 0 | const double dfR = double( |
4437 | 0 | pafSrcScanline[static_cast<size_t>(iX) * 2 + |
4438 | 0 | static_cast<size_t>(iY - nSrcYOff) * |
4439 | 0 | nSrcWidth * 2]); |
4440 | 0 | const double dfI = double( |
4441 | 0 | pafSrcScanline[static_cast<size_t>(iX) * 2 + |
4442 | 0 | static_cast<size_t>(iY - nSrcYOff) * |
4443 | 0 | nSrcWidth * 2 + |
4444 | 0 | 1]); |
4445 | |
|
4446 | 0 | dfTotalR += SQUARE(dfR); |
4447 | 0 | dfTotalI += SQUARE(dfI); |
4448 | |
|
4449 | 0 | ++nCount; |
4450 | 0 | } |
4451 | 0 | } |
4452 | |
|
4453 | 0 | CPLAssert(nCount > 0); |
4454 | 0 | if (nCount == 0) |
4455 | 0 | { |
4456 | 0 | pafDstScanline[iDstPixelSZ * 2] = 0.0; |
4457 | 0 | pafDstScanline[iDstPixelSZ * 2 + 1] = 0.0; |
4458 | 0 | } |
4459 | 0 | else |
4460 | 0 | { |
4461 | | /* compute RMS */ |
4462 | 0 | pafDstScanline[iDstPixelSZ * 2] = static_cast<float>( |
4463 | 0 | sqrt(dfTotalR / static_cast<double>(nCount))); |
4464 | 0 | pafDstScanline[iDstPixelSZ * 2 + 1] = static_cast<float>( |
4465 | 0 | sqrt(dfTotalI / static_cast<double>(nCount))); |
4466 | 0 | } |
4467 | 0 | } |
4468 | 0 | else if (eMethod == AVERAGE) |
4469 | 0 | { |
4470 | 0 | double dfTotalR = 0.0; |
4471 | 0 | double dfTotalI = 0.0; |
4472 | 0 | size_t nCount = 0; |
4473 | |
|
4474 | 0 | for (int iY = nSrcYOff; iY < nSrcYOff2; ++iY) |
4475 | 0 | { |
4476 | 0 | for (int iX = nSrcXOff; iX < nSrcXOff2; ++iX) |
4477 | 0 | { |
4478 | | // TODO(schwehr): Maybe use std::complex? |
4479 | 0 | dfTotalR += double( |
4480 | 0 | pafSrcScanline[static_cast<size_t>(iX) * 2 + |
4481 | 0 | static_cast<size_t>(iY - nSrcYOff) * |
4482 | 0 | nSrcWidth * 2]); |
4483 | 0 | dfTotalI += double( |
4484 | 0 | pafSrcScanline[static_cast<size_t>(iX) * 2 + |
4485 | 0 | static_cast<size_t>(iY - nSrcYOff) * |
4486 | 0 | nSrcWidth * 2 + |
4487 | 0 | 1]); |
4488 | 0 | ++nCount; |
4489 | 0 | } |
4490 | 0 | } |
4491 | |
|
4492 | 0 | CPLAssert(nCount > 0); |
4493 | 0 | if (nCount == 0) |
4494 | 0 | { |
4495 | 0 | pafDstScanline[iDstPixelSZ * 2] = 0.0; |
4496 | 0 | pafDstScanline[iDstPixelSZ * 2 + 1] = 0.0; |
4497 | 0 | } |
4498 | 0 | else |
4499 | 0 | { |
4500 | 0 | pafDstScanline[iDstPixelSZ * 2] = static_cast<float>( |
4501 | 0 | dfTotalR / static_cast<double>(nCount)); |
4502 | 0 | pafDstScanline[iDstPixelSZ * 2 + 1] = static_cast<float>( |
4503 | 0 | dfTotalI / static_cast<double>(nCount)); |
4504 | 0 | } |
4505 | 0 | } |
4506 | 0 | } |
4507 | 0 | } |
4508 | | |
4509 | 0 | return CE_None; |
4510 | 0 | } |
4511 | | |
4512 | | /************************************************************************/ |
4513 | | /* GDALRegenerateCascadingOverviews() */ |
4514 | | /* */ |
4515 | | /* Generate a list of overviews in order from largest to */ |
4516 | | /* smallest, computing each from the next larger. */ |
4517 | | /************************************************************************/ |
4518 | | |
4519 | | static CPLErr GDALRegenerateCascadingOverviews( |
4520 | | GDALRasterBand *poSrcBand, int nOverviews, GDALRasterBand **papoOvrBands, |
4521 | | const char *pszResampling, GDALProgressFunc pfnProgress, |
4522 | | void *pProgressData, CSLConstList papszOptions) |
4523 | | |
4524 | 0 | { |
4525 | | /* -------------------------------------------------------------------- */ |
4526 | | /* First, we must put the overviews in order from largest to */ |
4527 | | /* smallest. */ |
4528 | | /* -------------------------------------------------------------------- */ |
4529 | 0 | for (int i = 0; i < nOverviews - 1; ++i) |
4530 | 0 | { |
4531 | 0 | for (int j = 0; j < nOverviews - i - 1; ++j) |
4532 | 0 | { |
4533 | 0 | if (papoOvrBands[j]->GetXSize() * |
4534 | 0 | static_cast<float>(papoOvrBands[j]->GetYSize()) < |
4535 | 0 | papoOvrBands[j + 1]->GetXSize() * |
4536 | 0 | static_cast<float>(papoOvrBands[j + 1]->GetYSize())) |
4537 | 0 | { |
4538 | 0 | GDALRasterBand *poTempBand = papoOvrBands[j]; |
4539 | 0 | papoOvrBands[j] = papoOvrBands[j + 1]; |
4540 | 0 | papoOvrBands[j + 1] = poTempBand; |
4541 | 0 | } |
4542 | 0 | } |
4543 | 0 | } |
4544 | | |
4545 | | /* -------------------------------------------------------------------- */ |
4546 | | /* Count total pixels so we can prepare appropriate scaled */ |
4547 | | /* progress functions. */ |
4548 | | /* -------------------------------------------------------------------- */ |
4549 | 0 | double dfTotalPixels = 0.0; |
4550 | |
|
4551 | 0 | for (int i = 0; i < nOverviews; ++i) |
4552 | 0 | { |
4553 | 0 | dfTotalPixels += papoOvrBands[i]->GetXSize() * |
4554 | 0 | static_cast<double>(papoOvrBands[i]->GetYSize()); |
4555 | 0 | } |
4556 | | |
4557 | | /* -------------------------------------------------------------------- */ |
4558 | | /* Generate all the bands. */ |
4559 | | /* -------------------------------------------------------------------- */ |
4560 | 0 | double dfPixelsProcessed = 0.0; |
4561 | |
|
4562 | 0 | for (int i = 0; i < nOverviews; ++i) |
4563 | 0 | { |
4564 | 0 | GDALRasterBand *poBaseBand = poSrcBand; |
4565 | 0 | if (i != 0) |
4566 | 0 | poBaseBand = papoOvrBands[i - 1]; |
4567 | |
|
4568 | 0 | double dfPixels = papoOvrBands[i]->GetXSize() * |
4569 | 0 | static_cast<double>(papoOvrBands[i]->GetYSize()); |
4570 | |
|
4571 | 0 | void *pScaledProgressData = GDALCreateScaledProgress( |
4572 | 0 | dfPixelsProcessed / dfTotalPixels, |
4573 | 0 | (dfPixelsProcessed + dfPixels) / dfTotalPixels, pfnProgress, |
4574 | 0 | pProgressData); |
4575 | |
|
4576 | 0 | const CPLErr eErr = GDALRegenerateOverviewsEx( |
4577 | 0 | poBaseBand, 1, |
4578 | 0 | reinterpret_cast<GDALRasterBandH *>(papoOvrBands) + i, |
4579 | 0 | pszResampling, GDALScaledProgress, pScaledProgressData, |
4580 | 0 | papszOptions); |
4581 | 0 | GDALDestroyScaledProgress(pScaledProgressData); |
4582 | |
|
4583 | 0 | if (eErr != CE_None) |
4584 | 0 | return eErr; |
4585 | | |
4586 | 0 | dfPixelsProcessed += dfPixels; |
4587 | | |
4588 | | // Only do the bit2grayscale promotion on the base band. |
4589 | 0 | if (STARTS_WITH_CI(pszResampling, |
4590 | 0 | "AVERAGE_BIT2G" /* AVERAGE_BIT2GRAYSCALE */)) |
4591 | 0 | pszResampling = "AVERAGE"; |
4592 | 0 | } |
4593 | | |
4594 | 0 | return CE_None; |
4595 | 0 | } |
4596 | | |
4597 | | /************************************************************************/ |
4598 | | /* GDALGetResampleFunction() */ |
4599 | | /************************************************************************/ |
4600 | | |
4601 | | GDALResampleFunction GDALGetResampleFunction(const char *pszResampling, |
4602 | | int *pnRadius) |
4603 | 0 | { |
4604 | 0 | if (pnRadius) |
4605 | 0 | *pnRadius = 0; |
4606 | 0 | if (STARTS_WITH_CI(pszResampling, "NEAR")) |
4607 | 0 | return GDALResampleChunk_Near; |
4608 | 0 | else if (STARTS_WITH_CI(pszResampling, "AVER") || |
4609 | 0 | EQUAL(pszResampling, "RMS")) |
4610 | 0 | return GDALResampleChunk_AverageOrRMS; |
4611 | 0 | else if (EQUAL(pszResampling, "GAUSS")) |
4612 | 0 | { |
4613 | 0 | if (pnRadius) |
4614 | 0 | *pnRadius = 1; |
4615 | 0 | return GDALResampleChunk_Gauss; |
4616 | 0 | } |
4617 | 0 | else if (EQUAL(pszResampling, "MODE")) |
4618 | 0 | return GDALResampleChunk_Mode; |
4619 | 0 | else if (EQUAL(pszResampling, "CUBIC")) |
4620 | 0 | { |
4621 | 0 | if (pnRadius) |
4622 | 0 | *pnRadius = GWKGetFilterRadius(GRA_Cubic); |
4623 | 0 | return GDALResampleChunk_Convolution; |
4624 | 0 | } |
4625 | 0 | else if (EQUAL(pszResampling, "CUBICSPLINE")) |
4626 | 0 | { |
4627 | 0 | if (pnRadius) |
4628 | 0 | *pnRadius = GWKGetFilterRadius(GRA_CubicSpline); |
4629 | 0 | return GDALResampleChunk_Convolution; |
4630 | 0 | } |
4631 | 0 | else if (EQUAL(pszResampling, "LANCZOS")) |
4632 | 0 | { |
4633 | 0 | if (pnRadius) |
4634 | 0 | *pnRadius = GWKGetFilterRadius(GRA_Lanczos); |
4635 | 0 | return GDALResampleChunk_Convolution; |
4636 | 0 | } |
4637 | 0 | else if (EQUAL(pszResampling, "BILINEAR")) |
4638 | 0 | { |
4639 | 0 | if (pnRadius) |
4640 | 0 | *pnRadius = GWKGetFilterRadius(GRA_Bilinear); |
4641 | 0 | return GDALResampleChunk_Convolution; |
4642 | 0 | } |
4643 | 0 | else |
4644 | 0 | { |
4645 | 0 | CPLError( |
4646 | 0 | CE_Failure, CPLE_AppDefined, |
4647 | 0 | "GDALGetResampleFunction: Unsupported resampling method \"%s\".", |
4648 | 0 | pszResampling); |
4649 | 0 | return nullptr; |
4650 | 0 | } |
4651 | 0 | } |
4652 | | |
4653 | | /************************************************************************/ |
4654 | | /* GDALGetOvrWorkDataType() */ |
4655 | | /************************************************************************/ |
4656 | | |
4657 | | GDALDataType GDALGetOvrWorkDataType(const char *pszResampling, |
4658 | | GDALDataType eSrcDataType) |
4659 | 0 | { |
4660 | 0 | if (STARTS_WITH_CI(pszResampling, "NEAR") || EQUAL(pszResampling, "MODE")) |
4661 | 0 | { |
4662 | 0 | return eSrcDataType; |
4663 | 0 | } |
4664 | 0 | else if (eSrcDataType == GDT_Byte && |
4665 | 0 | (STARTS_WITH_CI(pszResampling, "AVER") || |
4666 | 0 | EQUAL(pszResampling, "RMS") || EQUAL(pszResampling, "CUBIC") || |
4667 | 0 | EQUAL(pszResampling, "CUBICSPLINE") || |
4668 | 0 | EQUAL(pszResampling, "LANCZOS") || |
4669 | 0 | EQUAL(pszResampling, "BILINEAR") || EQUAL(pszResampling, "MODE"))) |
4670 | 0 | { |
4671 | 0 | return GDT_Byte; |
4672 | 0 | } |
4673 | 0 | else if (eSrcDataType == GDT_UInt16 && |
4674 | 0 | (STARTS_WITH_CI(pszResampling, "AVER") || |
4675 | 0 | EQUAL(pszResampling, "RMS") || EQUAL(pszResampling, "CUBIC") || |
4676 | 0 | EQUAL(pszResampling, "CUBICSPLINE") || |
4677 | 0 | EQUAL(pszResampling, "LANCZOS") || |
4678 | 0 | EQUAL(pszResampling, "BILINEAR") || EQUAL(pszResampling, "MODE"))) |
4679 | 0 | { |
4680 | 0 | return GDT_UInt16; |
4681 | 0 | } |
4682 | 0 | else if (EQUAL(pszResampling, "GAUSS")) |
4683 | 0 | return GDT_Float64; |
4684 | | |
4685 | 0 | if (eSrcDataType == GDT_Byte || eSrcDataType == GDT_Int8 || |
4686 | 0 | eSrcDataType == GDT_UInt16 || eSrcDataType == GDT_Int16 || |
4687 | 0 | eSrcDataType == GDT_Float32) |
4688 | 0 | { |
4689 | 0 | return GDT_Float32; |
4690 | 0 | } |
4691 | 0 | return GDT_Float64; |
4692 | 0 | } |
4693 | | |
4694 | | namespace |
4695 | | { |
4696 | | // Structure to hold a pointer to free with CPLFree() |
4697 | | struct PointerHolder |
4698 | | { |
4699 | | void *ptr = nullptr; |
4700 | | |
4701 | 0 | explicit PointerHolder(void *ptrIn) : ptr(ptrIn) |
4702 | 0 | { |
4703 | 0 | } |
4704 | | |
4705 | | ~PointerHolder() |
4706 | 0 | { |
4707 | 0 | CPLFree(ptr); |
4708 | 0 | } |
4709 | | |
4710 | | PointerHolder(const PointerHolder &) = delete; |
4711 | | PointerHolder &operator=(const PointerHolder &) = delete; |
4712 | | }; |
4713 | | } // namespace |
4714 | | |
4715 | | /************************************************************************/ |
4716 | | /* GDALRegenerateOverviews() */ |
4717 | | /************************************************************************/ |
4718 | | |
4719 | | /** |
4720 | | * \brief Generate downsampled overviews. |
4721 | | * |
4722 | | * This function will generate one or more overview images from a base image |
4723 | | * using the requested downsampling algorithm. Its primary use is for |
4724 | | * generating overviews via GDALDataset::BuildOverviews(), but it can also be |
4725 | | * used to generate downsampled images in one file from another outside the |
4726 | | * overview architecture. |
4727 | | * |
4728 | | * The output bands need to exist in advance. |
4729 | | * |
4730 | | * The full set of resampling algorithms is documented in |
4731 | | * GDALDataset::BuildOverviews(). |
4732 | | * |
4733 | | * This function will honour properly NODATA_VALUES tuples (special dataset |
4734 | | * metadata) so that only a given RGB triplet (in case of a RGB image) will be |
4735 | | * considered as the nodata value and not each value of the triplet |
4736 | | * independently per band. |
4737 | | * |
4738 | | * Starting with GDAL 3.2, the GDAL_NUM_THREADS configuration option can be set |
4739 | | * to "ALL_CPUS" or a integer value to specify the number of threads to use for |
4740 | | * overview computation. |
4741 | | * |
4742 | | * @param hSrcBand the source (base level) band. |
4743 | | * @param nOverviewCount the number of downsampled bands being generated. |
4744 | | * @param pahOvrBands the list of downsampled bands to be generated. |
4745 | | * @param pszResampling Resampling algorithm (e.g. "AVERAGE"). |
4746 | | * @param pfnProgress progress report function. |
4747 | | * @param pProgressData progress function callback data. |
4748 | | * @return CE_None on success or CE_Failure on failure. |
4749 | | */ |
4750 | | CPLErr GDALRegenerateOverviews(GDALRasterBandH hSrcBand, int nOverviewCount, |
4751 | | GDALRasterBandH *pahOvrBands, |
4752 | | const char *pszResampling, |
4753 | | GDALProgressFunc pfnProgress, |
4754 | | void *pProgressData) |
4755 | | |
4756 | 0 | { |
4757 | 0 | return GDALRegenerateOverviewsEx(hSrcBand, nOverviewCount, pahOvrBands, |
4758 | 0 | pszResampling, pfnProgress, pProgressData, |
4759 | 0 | nullptr); |
4760 | 0 | } |
4761 | | |
4762 | | /************************************************************************/ |
4763 | | /* GDALRegenerateOverviewsEx() */ |
4764 | | /************************************************************************/ |
4765 | | |
4766 | | constexpr int RADIUS_TO_DIAMETER = 2; |
4767 | | |
4768 | | /** |
4769 | | * \brief Generate downsampled overviews. |
4770 | | * |
4771 | | * This function will generate one or more overview images from a base image |
4772 | | * using the requested downsampling algorithm. Its primary use is for |
4773 | | * generating overviews via GDALDataset::BuildOverviews(), but it can also be |
4774 | | * used to generate downsampled images in one file from another outside the |
4775 | | * overview architecture. |
4776 | | * |
4777 | | * The output bands need to exist in advance. |
4778 | | * |
4779 | | * The full set of resampling algorithms is documented in |
4780 | | * GDALDataset::BuildOverviews(). |
4781 | | * |
4782 | | * This function will honour properly NODATA_VALUES tuples (special dataset |
4783 | | * metadata) so that only a given RGB triplet (in case of a RGB image) will be |
4784 | | * considered as the nodata value and not each value of the triplet |
4785 | | * independently per band. |
4786 | | * |
4787 | | * Starting with GDAL 3.2, the GDAL_NUM_THREADS configuration option can be set |
4788 | | * to "ALL_CPUS" or a integer value to specify the number of threads to use for |
4789 | | * overview computation. |
4790 | | * |
4791 | | * @param hSrcBand the source (base level) band. |
4792 | | * @param nOverviewCount the number of downsampled bands being generated. |
4793 | | * @param pahOvrBands the list of downsampled bands to be generated. |
4794 | | * @param pszResampling Resampling algorithm (e.g. "AVERAGE"). |
4795 | | * @param pfnProgress progress report function. |
4796 | | * @param pProgressData progress function callback data. |
4797 | | * @param papszOptions NULL terminated list of options as key=value pairs, or |
4798 | | * NULL |
4799 | | * @return CE_None on success or CE_Failure on failure. |
4800 | | * @since GDAL 3.6 |
4801 | | */ |
4802 | | CPLErr GDALRegenerateOverviewsEx(GDALRasterBandH hSrcBand, int nOverviewCount, |
4803 | | GDALRasterBandH *pahOvrBands, |
4804 | | const char *pszResampling, |
4805 | | GDALProgressFunc pfnProgress, |
4806 | | void *pProgressData, CSLConstList papszOptions) |
4807 | | |
4808 | 0 | { |
4809 | 0 | GDALRasterBand *poSrcBand = GDALRasterBand::FromHandle(hSrcBand); |
4810 | 0 | GDALRasterBand **papoOvrBands = |
4811 | 0 | reinterpret_cast<GDALRasterBand **>(pahOvrBands); |
4812 | |
|
4813 | 0 | if (pfnProgress == nullptr) |
4814 | 0 | pfnProgress = GDALDummyProgress; |
4815 | |
|
4816 | 0 | if (EQUAL(pszResampling, "NONE")) |
4817 | 0 | return CE_None; |
4818 | | |
4819 | 0 | int nKernelRadius = 0; |
4820 | 0 | GDALResampleFunction pfnResampleFn = |
4821 | 0 | GDALGetResampleFunction(pszResampling, &nKernelRadius); |
4822 | |
|
4823 | 0 | if (pfnResampleFn == nullptr) |
4824 | 0 | return CE_Failure; |
4825 | | |
4826 | | /* -------------------------------------------------------------------- */ |
4827 | | /* Check color tables... */ |
4828 | | /* -------------------------------------------------------------------- */ |
4829 | 0 | GDALColorTable *poColorTable = nullptr; |
4830 | |
|
4831 | 0 | if ((STARTS_WITH_CI(pszResampling, "AVER") || EQUAL(pszResampling, "RMS") || |
4832 | 0 | EQUAL(pszResampling, "MODE") || EQUAL(pszResampling, "GAUSS")) && |
4833 | 0 | poSrcBand->GetColorInterpretation() == GCI_PaletteIndex) |
4834 | 0 | { |
4835 | 0 | poColorTable = poSrcBand->GetColorTable(); |
4836 | 0 | if (poColorTable != nullptr) |
4837 | 0 | { |
4838 | 0 | if (poColorTable->GetPaletteInterpretation() != GPI_RGB) |
4839 | 0 | { |
4840 | 0 | CPLError(CE_Warning, CPLE_AppDefined, |
4841 | 0 | "Computing overviews on palette index raster bands " |
4842 | 0 | "with a palette whose color interpretation is not RGB " |
4843 | 0 | "will probably lead to unexpected results."); |
4844 | 0 | poColorTable = nullptr; |
4845 | 0 | } |
4846 | 0 | else if (poColorTable->IsIdentity()) |
4847 | 0 | { |
4848 | 0 | poColorTable = nullptr; |
4849 | 0 | } |
4850 | 0 | } |
4851 | 0 | else |
4852 | 0 | { |
4853 | 0 | CPLError(CE_Warning, CPLE_AppDefined, |
4854 | 0 | "Computing overviews on palette index raster bands " |
4855 | 0 | "without a palette will probably lead to unexpected " |
4856 | 0 | "results."); |
4857 | 0 | } |
4858 | 0 | } |
4859 | | // Not ready yet |
4860 | 0 | else if ((EQUAL(pszResampling, "CUBIC") || |
4861 | 0 | EQUAL(pszResampling, "CUBICSPLINE") || |
4862 | 0 | EQUAL(pszResampling, "LANCZOS") || |
4863 | 0 | EQUAL(pszResampling, "BILINEAR")) && |
4864 | 0 | poSrcBand->GetColorInterpretation() == GCI_PaletteIndex) |
4865 | 0 | { |
4866 | 0 | CPLError(CE_Warning, CPLE_AppDefined, |
4867 | 0 | "Computing %s overviews on palette index raster bands " |
4868 | 0 | "will probably lead to unexpected results.", |
4869 | 0 | pszResampling); |
4870 | 0 | } |
4871 | | |
4872 | | // If we have a nodata mask and we are doing something more complicated |
4873 | | // than nearest neighbouring, we have to fetch to nodata mask. |
4874 | |
|
4875 | 0 | GDALRasterBand *poMaskBand = nullptr; |
4876 | 0 | bool bUseNoDataMask = false; |
4877 | 0 | bool bCanUseCascaded = true; |
4878 | |
|
4879 | 0 | if (!STARTS_WITH_CI(pszResampling, "NEAR")) |
4880 | 0 | { |
4881 | | // Special case if we are an alpha/mask band. We want it to be |
4882 | | // considered as the mask band to avoid alpha=0 to be taken into account |
4883 | | // in average computation. |
4884 | 0 | if (poSrcBand->IsMaskBand()) |
4885 | 0 | { |
4886 | 0 | poMaskBand = poSrcBand; |
4887 | 0 | bUseNoDataMask = true; |
4888 | 0 | } |
4889 | 0 | else |
4890 | 0 | { |
4891 | 0 | poMaskBand = poSrcBand->GetMaskBand(); |
4892 | 0 | const int nMaskFlags = poSrcBand->GetMaskFlags(); |
4893 | 0 | bCanUseCascaded = |
4894 | 0 | (nMaskFlags == GMF_NODATA || nMaskFlags == GMF_ALL_VALID); |
4895 | 0 | bUseNoDataMask = (nMaskFlags & GMF_ALL_VALID) == 0; |
4896 | 0 | } |
4897 | 0 | } |
4898 | | |
4899 | | /* -------------------------------------------------------------------- */ |
4900 | | /* If we are operating on multiple overviews, and using */ |
4901 | | /* averaging, lets do them in cascading order to reduce the */ |
4902 | | /* amount of computation. */ |
4903 | | /* -------------------------------------------------------------------- */ |
4904 | | |
4905 | | // In case the mask made be computed from another band of the dataset, |
4906 | | // we can't use cascaded generation, as the computation of the overviews |
4907 | | // of the band used for the mask band may not have yet occurred (#3033). |
4908 | 0 | if ((STARTS_WITH_CI(pszResampling, "AVER") || |
4909 | 0 | EQUAL(pszResampling, "GAUSS") || EQUAL(pszResampling, "RMS") || |
4910 | 0 | EQUAL(pszResampling, "CUBIC") || EQUAL(pszResampling, "CUBICSPLINE") || |
4911 | 0 | EQUAL(pszResampling, "LANCZOS") || EQUAL(pszResampling, "BILINEAR") || |
4912 | 0 | EQUAL(pszResampling, "MODE")) && |
4913 | 0 | nOverviewCount > 1 && bCanUseCascaded) |
4914 | 0 | return GDALRegenerateCascadingOverviews( |
4915 | 0 | poSrcBand, nOverviewCount, papoOvrBands, pszResampling, pfnProgress, |
4916 | 0 | pProgressData, papszOptions); |
4917 | | |
4918 | | /* -------------------------------------------------------------------- */ |
4919 | | /* Setup one horizontal swath to read from the raw buffer. */ |
4920 | | /* -------------------------------------------------------------------- */ |
4921 | 0 | int nFRXBlockSize = 0; |
4922 | 0 | int nFRYBlockSize = 0; |
4923 | 0 | poSrcBand->GetBlockSize(&nFRXBlockSize, &nFRYBlockSize); |
4924 | |
|
4925 | 0 | const GDALDataType eSrcDataType = poSrcBand->GetRasterDataType(); |
4926 | 0 | const bool bUseGenericResampleFn = STARTS_WITH_CI(pszResampling, "NEAR") || |
4927 | 0 | EQUAL(pszResampling, "MODE") || |
4928 | 0 | !GDALDataTypeIsComplex(eSrcDataType); |
4929 | 0 | const GDALDataType eWrkDataType = |
4930 | 0 | bUseGenericResampleFn |
4931 | 0 | ? GDALGetOvrWorkDataType(pszResampling, eSrcDataType) |
4932 | 0 | : GDT_CFloat32; |
4933 | |
|
4934 | 0 | const int nWidth = poSrcBand->GetXSize(); |
4935 | 0 | const int nHeight = poSrcBand->GetYSize(); |
4936 | |
|
4937 | 0 | int nMaxOvrFactor = 1; |
4938 | 0 | for (int iOverview = 0; iOverview < nOverviewCount; ++iOverview) |
4939 | 0 | { |
4940 | 0 | const int nDstWidth = papoOvrBands[iOverview]->GetXSize(); |
4941 | 0 | const int nDstHeight = papoOvrBands[iOverview]->GetYSize(); |
4942 | 0 | nMaxOvrFactor = std::max( |
4943 | 0 | nMaxOvrFactor, |
4944 | 0 | static_cast<int>(static_cast<double>(nWidth) / nDstWidth + 0.5)); |
4945 | 0 | nMaxOvrFactor = std::max( |
4946 | 0 | nMaxOvrFactor, |
4947 | 0 | static_cast<int>(static_cast<double>(nHeight) / nDstHeight + 0.5)); |
4948 | 0 | } |
4949 | |
|
4950 | 0 | int nFullResYChunk = nFRYBlockSize; |
4951 | 0 | int nMaxChunkYSizeQueried = 0; |
4952 | |
|
4953 | 0 | const auto UpdateChunkHeightAndGetChunkSize = |
4954 | 0 | [&nFullResYChunk, &nMaxChunkYSizeQueried, nKernelRadius, nMaxOvrFactor, |
4955 | 0 | eWrkDataType, nWidth]() |
4956 | 0 | { |
4957 | | // Make sure that round(nChunkYOff / nMaxOvrFactor) < round((nChunkYOff |
4958 | | // + nFullResYChunk) / nMaxOvrFactor) |
4959 | 0 | if (nMaxOvrFactor > INT_MAX / RADIUS_TO_DIAMETER) |
4960 | 0 | { |
4961 | 0 | return GINTBIG_MAX; |
4962 | 0 | } |
4963 | 0 | nFullResYChunk = |
4964 | 0 | std::max(nFullResYChunk, RADIUS_TO_DIAMETER * nMaxOvrFactor); |
4965 | 0 | if ((nKernelRadius > 0 && |
4966 | 0 | nMaxOvrFactor > INT_MAX / (RADIUS_TO_DIAMETER * nKernelRadius)) || |
4967 | 0 | nFullResYChunk > |
4968 | 0 | INT_MAX - RADIUS_TO_DIAMETER * nKernelRadius * nMaxOvrFactor) |
4969 | 0 | { |
4970 | 0 | return GINTBIG_MAX; |
4971 | 0 | } |
4972 | 0 | nMaxChunkYSizeQueried = |
4973 | 0 | nFullResYChunk + RADIUS_TO_DIAMETER * nKernelRadius * nMaxOvrFactor; |
4974 | 0 | if (GDALGetDataTypeSizeBytes(eWrkDataType) > |
4975 | 0 | std::numeric_limits<int64_t>::max() / |
4976 | 0 | (static_cast<int64_t>(nMaxChunkYSizeQueried) * nWidth)) |
4977 | 0 | { |
4978 | 0 | return GINTBIG_MAX; |
4979 | 0 | } |
4980 | 0 | return static_cast<GIntBig>(GDALGetDataTypeSizeBytes(eWrkDataType)) * |
4981 | 0 | nMaxChunkYSizeQueried * nWidth; |
4982 | 0 | }; |
4983 | |
|
4984 | 0 | const char *pszChunkYSize = |
4985 | 0 | CPLGetConfigOption("GDAL_OVR_CHUNKYSIZE", nullptr); |
4986 | 0 | #ifndef __COVERITY__ |
4987 | | // Only configurable for debug / testing |
4988 | 0 | if (pszChunkYSize) |
4989 | 0 | { |
4990 | 0 | nFullResYChunk = atoi(pszChunkYSize); |
4991 | 0 | } |
4992 | 0 | #endif |
4993 | | |
4994 | | // Only configurable for debug / testing |
4995 | 0 | const int nChunkMaxSize = |
4996 | 0 | atoi(CPLGetConfigOption("GDAL_OVR_CHUNK_MAX_SIZE", "10485760")); |
4997 | |
|
4998 | 0 | auto nChunkSize = UpdateChunkHeightAndGetChunkSize(); |
4999 | 0 | if (nChunkSize > nChunkMaxSize) |
5000 | 0 | { |
5001 | 0 | if (poColorTable == nullptr && nFRXBlockSize < nWidth && |
5002 | 0 | !GDALDataTypeIsComplex(eSrcDataType) && |
5003 | 0 | (!STARTS_WITH_CI(pszResampling, "AVER") || |
5004 | 0 | EQUAL(pszResampling, "AVERAGE"))) |
5005 | 0 | { |
5006 | | // If this is tiled, then use GDALRegenerateOverviewsMultiBand() |
5007 | | // which use a block based strategy, which is much less memory |
5008 | | // hungry. |
5009 | 0 | return GDALRegenerateOverviewsMultiBand( |
5010 | 0 | 1, &poSrcBand, nOverviewCount, &papoOvrBands, pszResampling, |
5011 | 0 | pfnProgress, pProgressData, papszOptions); |
5012 | 0 | } |
5013 | 0 | else if (nOverviewCount > 1 && STARTS_WITH_CI(pszResampling, "NEAR")) |
5014 | 0 | { |
5015 | 0 | return GDALRegenerateCascadingOverviews( |
5016 | 0 | poSrcBand, nOverviewCount, papoOvrBands, pszResampling, |
5017 | 0 | pfnProgress, pProgressData, papszOptions); |
5018 | 0 | } |
5019 | 0 | } |
5020 | 0 | else if (pszChunkYSize == nullptr) |
5021 | 0 | { |
5022 | | // Try to get as close as possible to nChunkMaxSize |
5023 | 0 | while (nChunkSize < nChunkMaxSize / 2) |
5024 | 0 | { |
5025 | 0 | nFullResYChunk *= 2; |
5026 | 0 | nChunkSize = UpdateChunkHeightAndGetChunkSize(); |
5027 | 0 | } |
5028 | 0 | } |
5029 | | |
5030 | 0 | int nHasNoData = 0; |
5031 | 0 | const double dfNoDataValue = poSrcBand->GetNoDataValue(&nHasNoData); |
5032 | 0 | const bool bHasNoData = CPL_TO_BOOL(nHasNoData); |
5033 | 0 | const bool bPropagateNoData = |
5034 | 0 | CPLTestBool(CPLGetConfigOption("GDAL_OVR_PROPAGATE_NODATA", "NO")); |
5035 | | |
5036 | | // Structure describing a resampling job |
5037 | 0 | struct OvrJob |
5038 | 0 | { |
5039 | | // Buffers to free when job is finished |
5040 | 0 | std::shared_ptr<PointerHolder> oSrcMaskBufferHolder{}; |
5041 | 0 | std::shared_ptr<PointerHolder> oSrcBufferHolder{}; |
5042 | 0 | std::unique_ptr<PointerHolder> oDstBufferHolder{}; |
5043 | |
|
5044 | 0 | GDALRasterBand *poDstBand = nullptr; |
5045 | | |
5046 | | // Input parameters of pfnResampleFn |
5047 | 0 | GDALResampleFunction pfnResampleFn = nullptr; |
5048 | 0 | int nSrcWidth = 0; |
5049 | 0 | int nSrcHeight = 0; |
5050 | 0 | int nDstWidth = 0; |
5051 | 0 | GDALOverviewResampleArgs args{}; |
5052 | 0 | const void *pChunk = nullptr; |
5053 | 0 | bool bUseGenericResampleFn = false; |
5054 | | |
5055 | | // Output values of resampling function |
5056 | 0 | CPLErr eErr = CE_Failure; |
5057 | 0 | void *pDstBuffer = nullptr; |
5058 | 0 | GDALDataType eDstBufferDataType = GDT_Unknown; |
5059 | |
|
5060 | 0 | void SetSrcMaskBufferHolder( |
5061 | 0 | const std::shared_ptr<PointerHolder> &oSrcMaskBufferHolderIn) |
5062 | 0 | { |
5063 | 0 | oSrcMaskBufferHolder = oSrcMaskBufferHolderIn; |
5064 | 0 | } |
5065 | |
|
5066 | 0 | void SetSrcBufferHolder( |
5067 | 0 | const std::shared_ptr<PointerHolder> &oSrcBufferHolderIn) |
5068 | 0 | { |
5069 | 0 | oSrcBufferHolder = oSrcBufferHolderIn; |
5070 | 0 | } |
5071 | |
|
5072 | 0 | void NotifyFinished() |
5073 | 0 | { |
5074 | 0 | std::lock_guard guard(mutex); |
5075 | 0 | bFinished = true; |
5076 | 0 | cv.notify_one(); |
5077 | 0 | } |
5078 | |
|
5079 | 0 | bool IsFinished() |
5080 | 0 | { |
5081 | 0 | std::lock_guard guard(mutex); |
5082 | 0 | return bFinished; |
5083 | 0 | } |
5084 | |
|
5085 | 0 | void WaitFinished() |
5086 | 0 | { |
5087 | 0 | std::unique_lock oGuard(mutex); |
5088 | 0 | while (!bFinished) |
5089 | 0 | { |
5090 | 0 | cv.wait(oGuard); |
5091 | 0 | } |
5092 | 0 | } |
5093 | |
|
5094 | 0 | private: |
5095 | | // Synchronization |
5096 | 0 | bool bFinished = false; |
5097 | 0 | std::mutex mutex{}; |
5098 | 0 | std::condition_variable cv{}; |
5099 | 0 | }; |
5100 | | |
5101 | | // Thread function to resample |
5102 | 0 | const auto JobResampleFunc = [](void *pData) |
5103 | 0 | { |
5104 | 0 | OvrJob *poJob = static_cast<OvrJob *>(pData); |
5105 | |
|
5106 | 0 | if (poJob->bUseGenericResampleFn) |
5107 | 0 | { |
5108 | 0 | poJob->eErr = poJob->pfnResampleFn(poJob->args, poJob->pChunk, |
5109 | 0 | &(poJob->pDstBuffer), |
5110 | 0 | &(poJob->eDstBufferDataType)); |
5111 | 0 | } |
5112 | 0 | else |
5113 | 0 | { |
5114 | 0 | poJob->eErr = GDALResampleChunkC32R( |
5115 | 0 | poJob->nSrcWidth, poJob->nSrcHeight, |
5116 | 0 | static_cast<const float *>(poJob->pChunk), |
5117 | 0 | poJob->args.nChunkYOff, poJob->args.nChunkYSize, |
5118 | 0 | poJob->args.nDstYOff, poJob->args.nDstYOff2, |
5119 | 0 | poJob->args.nOvrXSize, poJob->args.nOvrYSize, |
5120 | 0 | &(poJob->pDstBuffer), &(poJob->eDstBufferDataType), |
5121 | 0 | poJob->args.pszResampling); |
5122 | 0 | } |
5123 | |
|
5124 | 0 | poJob->oDstBufferHolder = |
5125 | 0 | std::make_unique<PointerHolder>(poJob->pDstBuffer); |
5126 | |
|
5127 | 0 | poJob->NotifyFinished(); |
5128 | 0 | }; |
5129 | | |
5130 | | // Function to write resample data to target band |
5131 | 0 | const auto WriteJobData = [](const OvrJob *poJob) |
5132 | 0 | { |
5133 | 0 | return poJob->poDstBand->RasterIO( |
5134 | 0 | GF_Write, 0, poJob->args.nDstYOff, poJob->nDstWidth, |
5135 | 0 | poJob->args.nDstYOff2 - poJob->args.nDstYOff, poJob->pDstBuffer, |
5136 | 0 | poJob->nDstWidth, poJob->args.nDstYOff2 - poJob->args.nDstYOff, |
5137 | 0 | poJob->eDstBufferDataType, 0, 0, nullptr); |
5138 | 0 | }; |
5139 | | |
5140 | | // Wait for completion of oldest job and serialize it |
5141 | 0 | const auto WaitAndFinalizeOldestJob = |
5142 | 0 | [WriteJobData](std::list<std::unique_ptr<OvrJob>> &jobList) |
5143 | 0 | { |
5144 | 0 | auto poOldestJob = jobList.front().get(); |
5145 | 0 | poOldestJob->WaitFinished(); |
5146 | 0 | CPLErr l_eErr = poOldestJob->eErr; |
5147 | 0 | if (l_eErr == CE_None) |
5148 | 0 | { |
5149 | 0 | l_eErr = WriteJobData(poOldestJob); |
5150 | 0 | } |
5151 | |
|
5152 | 0 | jobList.pop_front(); |
5153 | 0 | return l_eErr; |
5154 | 0 | }; |
5155 | | |
5156 | | // Queue of jobs |
5157 | 0 | std::list<std::unique_ptr<OvrJob>> jobList; |
5158 | |
|
5159 | 0 | GByte *pabyChunkNodataMask = nullptr; |
5160 | 0 | void *pChunk = nullptr; |
5161 | |
|
5162 | 0 | const char *pszThreads = CPLGetConfigOption("GDAL_NUM_THREADS", "1"); |
5163 | 0 | const int nThreads = std::max(1, std::min(128, EQUAL(pszThreads, "ALL_CPUS") |
5164 | 0 | ? CPLGetNumCPUs() |
5165 | 0 | : atoi(pszThreads))); |
5166 | 0 | auto poThreadPool = |
5167 | 0 | nThreads > 1 ? GDALGetGlobalThreadPool(nThreads) : nullptr; |
5168 | 0 | auto poJobQueue = poThreadPool ? poThreadPool->CreateJobQueue() |
5169 | 0 | : std::unique_ptr<CPLJobQueue>(nullptr); |
5170 | | |
5171 | | /* -------------------------------------------------------------------- */ |
5172 | | /* Loop over image operating on chunks. */ |
5173 | | /* -------------------------------------------------------------------- */ |
5174 | 0 | int nChunkYOff = 0; |
5175 | 0 | CPLErr eErr = CE_None; |
5176 | |
|
5177 | 0 | for (nChunkYOff = 0; nChunkYOff < nHeight && eErr == CE_None; |
5178 | 0 | nChunkYOff += nFullResYChunk) |
5179 | 0 | { |
5180 | 0 | if (!pfnProgress(nChunkYOff / static_cast<double>(nHeight), nullptr, |
5181 | 0 | pProgressData)) |
5182 | 0 | { |
5183 | 0 | CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated"); |
5184 | 0 | eErr = CE_Failure; |
5185 | 0 | } |
5186 | |
|
5187 | 0 | if (nFullResYChunk + nChunkYOff > nHeight) |
5188 | 0 | nFullResYChunk = nHeight - nChunkYOff; |
5189 | |
|
5190 | 0 | int nChunkYOffQueried = nChunkYOff - nKernelRadius * nMaxOvrFactor; |
5191 | 0 | int nChunkYSizeQueried = |
5192 | 0 | nFullResYChunk + 2 * nKernelRadius * nMaxOvrFactor; |
5193 | 0 | if (nChunkYOffQueried < 0) |
5194 | 0 | { |
5195 | 0 | nChunkYSizeQueried += nChunkYOffQueried; |
5196 | 0 | nChunkYOffQueried = 0; |
5197 | 0 | } |
5198 | 0 | if (nChunkYOffQueried + nChunkYSizeQueried > nHeight) |
5199 | 0 | nChunkYSizeQueried = nHeight - nChunkYOffQueried; |
5200 | | |
5201 | | // Avoid accumulating too many tasks and exhaust RAM |
5202 | | // Try to complete already finished jobs |
5203 | 0 | while (eErr == CE_None && !jobList.empty()) |
5204 | 0 | { |
5205 | 0 | auto poOldestJob = jobList.front().get(); |
5206 | 0 | if (!poOldestJob->IsFinished()) |
5207 | 0 | break; |
5208 | 0 | eErr = poOldestJob->eErr; |
5209 | 0 | if (eErr == CE_None) |
5210 | 0 | { |
5211 | 0 | eErr = WriteJobData(poOldestJob); |
5212 | 0 | } |
5213 | |
|
5214 | 0 | jobList.pop_front(); |
5215 | 0 | } |
5216 | | |
5217 | | // And in case we have saturated the number of threads, |
5218 | | // wait for completion of tasks to go below the threshold. |
5219 | 0 | while (eErr == CE_None && |
5220 | 0 | jobList.size() >= static_cast<size_t>(nThreads)) |
5221 | 0 | { |
5222 | 0 | eErr = WaitAndFinalizeOldestJob(jobList); |
5223 | 0 | } |
5224 | | |
5225 | | // (Re)allocate buffers if needed |
5226 | 0 | if (pChunk == nullptr) |
5227 | 0 | { |
5228 | 0 | pChunk = VSI_MALLOC3_VERBOSE(GDALGetDataTypeSizeBytes(eWrkDataType), |
5229 | 0 | nMaxChunkYSizeQueried, nWidth); |
5230 | 0 | } |
5231 | 0 | if (bUseNoDataMask && pabyChunkNodataMask == nullptr) |
5232 | 0 | { |
5233 | 0 | pabyChunkNodataMask = static_cast<GByte *>( |
5234 | 0 | VSI_MALLOC2_VERBOSE(nMaxChunkYSizeQueried, nWidth)); |
5235 | 0 | } |
5236 | |
|
5237 | 0 | if (pChunk == nullptr || |
5238 | 0 | (bUseNoDataMask && pabyChunkNodataMask == nullptr)) |
5239 | 0 | { |
5240 | 0 | CPLFree(pChunk); |
5241 | 0 | CPLFree(pabyChunkNodataMask); |
5242 | 0 | return CE_Failure; |
5243 | 0 | } |
5244 | | |
5245 | | // Read chunk. |
5246 | 0 | if (eErr == CE_None) |
5247 | 0 | eErr = poSrcBand->RasterIO(GF_Read, 0, nChunkYOffQueried, nWidth, |
5248 | 0 | nChunkYSizeQueried, pChunk, nWidth, |
5249 | 0 | nChunkYSizeQueried, eWrkDataType, 0, 0, |
5250 | 0 | nullptr); |
5251 | 0 | if (eErr == CE_None && bUseNoDataMask) |
5252 | 0 | eErr = poMaskBand->RasterIO(GF_Read, 0, nChunkYOffQueried, nWidth, |
5253 | 0 | nChunkYSizeQueried, pabyChunkNodataMask, |
5254 | 0 | nWidth, nChunkYSizeQueried, GDT_Byte, 0, |
5255 | 0 | 0, nullptr); |
5256 | | |
5257 | | // Special case to promote 1bit data to 8bit 0/255 values. |
5258 | 0 | if (EQUAL(pszResampling, "AVERAGE_BIT2GRAYSCALE")) |
5259 | 0 | { |
5260 | 0 | if (eWrkDataType == GDT_Float32) |
5261 | 0 | { |
5262 | 0 | float *pafChunk = static_cast<float *>(pChunk); |
5263 | 0 | for (size_t i = 0; |
5264 | 0 | i < static_cast<size_t>(nChunkYSizeQueried) * nWidth; i++) |
5265 | 0 | { |
5266 | 0 | if (pafChunk[i] == 1.0f) |
5267 | 0 | pafChunk[i] = 255.0f; |
5268 | 0 | } |
5269 | 0 | } |
5270 | 0 | else if (eWrkDataType == GDT_Byte) |
5271 | 0 | { |
5272 | 0 | GByte *pabyChunk = static_cast<GByte *>(pChunk); |
5273 | 0 | for (size_t i = 0; |
5274 | 0 | i < static_cast<size_t>(nChunkYSizeQueried) * nWidth; i++) |
5275 | 0 | { |
5276 | 0 | if (pabyChunk[i] == 1) |
5277 | 0 | pabyChunk[i] = 255; |
5278 | 0 | } |
5279 | 0 | } |
5280 | 0 | else if (eWrkDataType == GDT_UInt16) |
5281 | 0 | { |
5282 | 0 | GUInt16 *pasChunk = static_cast<GUInt16 *>(pChunk); |
5283 | 0 | for (size_t i = 0; |
5284 | 0 | i < static_cast<size_t>(nChunkYSizeQueried) * nWidth; i++) |
5285 | 0 | { |
5286 | 0 | if (pasChunk[i] == 1) |
5287 | 0 | pasChunk[i] = 255; |
5288 | 0 | } |
5289 | 0 | } |
5290 | 0 | else if (eWrkDataType == GDT_Float64) |
5291 | 0 | { |
5292 | 0 | double *padfChunk = static_cast<double *>(pChunk); |
5293 | 0 | for (size_t i = 0; |
5294 | 0 | i < static_cast<size_t>(nChunkYSizeQueried) * nWidth; i++) |
5295 | 0 | { |
5296 | 0 | if (padfChunk[i] == 1.0) |
5297 | 0 | padfChunk[i] = 255.0; |
5298 | 0 | } |
5299 | 0 | } |
5300 | 0 | else |
5301 | 0 | { |
5302 | 0 | CPLAssert(false); |
5303 | 0 | } |
5304 | 0 | } |
5305 | 0 | else if (EQUAL(pszResampling, "AVERAGE_BIT2GRAYSCALE_MINISWHITE")) |
5306 | 0 | { |
5307 | 0 | if (eWrkDataType == GDT_Float32) |
5308 | 0 | { |
5309 | 0 | float *pafChunk = static_cast<float *>(pChunk); |
5310 | 0 | for (size_t i = 0; |
5311 | 0 | i < static_cast<size_t>(nChunkYSizeQueried) * nWidth; i++) |
5312 | 0 | { |
5313 | 0 | if (pafChunk[i] == 1.0f) |
5314 | 0 | pafChunk[i] = 0.0f; |
5315 | 0 | else if (pafChunk[i] == 0.0f) |
5316 | 0 | pafChunk[i] = 255.0f; |
5317 | 0 | } |
5318 | 0 | } |
5319 | 0 | else if (eWrkDataType == GDT_Byte) |
5320 | 0 | { |
5321 | 0 | GByte *pabyChunk = static_cast<GByte *>(pChunk); |
5322 | 0 | for (size_t i = 0; |
5323 | 0 | i < static_cast<size_t>(nChunkYSizeQueried) * nWidth; i++) |
5324 | 0 | { |
5325 | 0 | if (pabyChunk[i] == 1) |
5326 | 0 | pabyChunk[i] = 0; |
5327 | 0 | else if (pabyChunk[i] == 0) |
5328 | 0 | pabyChunk[i] = 255; |
5329 | 0 | } |
5330 | 0 | } |
5331 | 0 | else if (eWrkDataType == GDT_UInt16) |
5332 | 0 | { |
5333 | 0 | GUInt16 *pasChunk = static_cast<GUInt16 *>(pChunk); |
5334 | 0 | for (size_t i = 0; |
5335 | 0 | i < static_cast<size_t>(nChunkYSizeQueried) * nWidth; i++) |
5336 | 0 | { |
5337 | 0 | if (pasChunk[i] == 1) |
5338 | 0 | pasChunk[i] = 0; |
5339 | 0 | else if (pasChunk[i] == 0) |
5340 | 0 | pasChunk[i] = 255; |
5341 | 0 | } |
5342 | 0 | } |
5343 | 0 | else if (eWrkDataType == GDT_Float64) |
5344 | 0 | { |
5345 | 0 | double *padfChunk = static_cast<double *>(pChunk); |
5346 | 0 | for (size_t i = 0; |
5347 | 0 | i < static_cast<size_t>(nChunkYSizeQueried) * nWidth; i++) |
5348 | 0 | { |
5349 | 0 | if (padfChunk[i] == 1.0) |
5350 | 0 | padfChunk[i] = 0.0; |
5351 | 0 | else if (padfChunk[i] == 0.0) |
5352 | 0 | padfChunk[i] = 255.0; |
5353 | 0 | } |
5354 | 0 | } |
5355 | 0 | else |
5356 | 0 | { |
5357 | 0 | CPLAssert(false); |
5358 | 0 | } |
5359 | 0 | } |
5360 | | |
5361 | 0 | auto oSrcBufferHolder = |
5362 | 0 | std::make_shared<PointerHolder>(poJobQueue ? pChunk : nullptr); |
5363 | 0 | auto oSrcMaskBufferHolder = std::make_shared<PointerHolder>( |
5364 | 0 | poJobQueue ? pabyChunkNodataMask : nullptr); |
5365 | |
|
5366 | 0 | for (int iOverview = 0; iOverview < nOverviewCount && eErr == CE_None; |
5367 | 0 | ++iOverview) |
5368 | 0 | { |
5369 | 0 | GDALRasterBand *poDstBand = papoOvrBands[iOverview]; |
5370 | 0 | const int nDstWidth = poDstBand->GetXSize(); |
5371 | 0 | const int nDstHeight = poDstBand->GetYSize(); |
5372 | |
|
5373 | 0 | const double dfXRatioDstToSrc = |
5374 | 0 | static_cast<double>(nWidth) / nDstWidth; |
5375 | 0 | const double dfYRatioDstToSrc = |
5376 | 0 | static_cast<double>(nHeight) / nDstHeight; |
5377 | | |
5378 | | /* -------------------------------------------------------------------- |
5379 | | */ |
5380 | | /* Figure out the line to start writing to, and the first line |
5381 | | */ |
5382 | | /* to not write to. In theory this approach should ensure that |
5383 | | */ |
5384 | | /* every output line will be written if all input chunks are */ |
5385 | | /* processed. */ |
5386 | | /* -------------------------------------------------------------------- |
5387 | | */ |
5388 | 0 | int nDstYOff = |
5389 | 0 | static_cast<int>(0.5 + nChunkYOff / dfYRatioDstToSrc); |
5390 | 0 | if (nDstYOff == nDstHeight) |
5391 | 0 | continue; |
5392 | 0 | int nDstYOff2 = static_cast<int>( |
5393 | 0 | 0.5 + (nChunkYOff + nFullResYChunk) / dfYRatioDstToSrc); |
5394 | |
|
5395 | 0 | if (nChunkYOff + nFullResYChunk == nHeight) |
5396 | 0 | nDstYOff2 = nDstHeight; |
5397 | | #if DEBUG_VERBOSE |
5398 | | CPLDebug("GDAL", |
5399 | | "Reading (%dx%d -> %dx%d) for output (%dx%d -> %dx%d)", 0, |
5400 | | nChunkYOffQueried, nWidth, nChunkYSizeQueried, 0, nDstYOff, |
5401 | | nDstWidth, nDstYOff2 - nDstYOff); |
5402 | | #endif |
5403 | |
|
5404 | 0 | auto poJob = std::make_unique<OvrJob>(); |
5405 | 0 | poJob->pfnResampleFn = pfnResampleFn; |
5406 | 0 | poJob->bUseGenericResampleFn = bUseGenericResampleFn; |
5407 | 0 | poJob->args.eOvrDataType = poDstBand->GetRasterDataType(); |
5408 | 0 | poJob->args.nOvrXSize = poDstBand->GetXSize(); |
5409 | 0 | poJob->args.nOvrYSize = poDstBand->GetYSize(); |
5410 | 0 | const char *pszNBITS = |
5411 | 0 | poDstBand->GetMetadataItem("NBITS", "IMAGE_STRUCTURE"); |
5412 | 0 | poJob->args.nOvrNBITS = pszNBITS ? atoi(pszNBITS) : 0; |
5413 | 0 | poJob->args.dfXRatioDstToSrc = dfXRatioDstToSrc; |
5414 | 0 | poJob->args.dfYRatioDstToSrc = dfYRatioDstToSrc; |
5415 | 0 | poJob->args.eWrkDataType = eWrkDataType; |
5416 | 0 | poJob->pChunk = pChunk; |
5417 | 0 | poJob->args.pabyChunkNodataMask = pabyChunkNodataMask; |
5418 | 0 | poJob->nSrcWidth = nWidth; |
5419 | 0 | poJob->nSrcHeight = nHeight; |
5420 | 0 | poJob->args.nChunkXOff = 0; |
5421 | 0 | poJob->args.nChunkXSize = nWidth; |
5422 | 0 | poJob->args.nChunkYOff = nChunkYOffQueried; |
5423 | 0 | poJob->args.nChunkYSize = nChunkYSizeQueried; |
5424 | 0 | poJob->nDstWidth = nDstWidth; |
5425 | 0 | poJob->args.nDstXOff = 0; |
5426 | 0 | poJob->args.nDstXOff2 = nDstWidth; |
5427 | 0 | poJob->args.nDstYOff = nDstYOff; |
5428 | 0 | poJob->args.nDstYOff2 = nDstYOff2; |
5429 | 0 | poJob->poDstBand = poDstBand; |
5430 | 0 | poJob->args.pszResampling = pszResampling; |
5431 | 0 | poJob->args.bHasNoData = bHasNoData; |
5432 | 0 | poJob->args.dfNoDataValue = dfNoDataValue; |
5433 | 0 | poJob->args.poColorTable = poColorTable; |
5434 | 0 | poJob->args.eSrcDataType = eSrcDataType; |
5435 | 0 | poJob->args.bPropagateNoData = bPropagateNoData; |
5436 | |
|
5437 | 0 | if (poJobQueue) |
5438 | 0 | { |
5439 | 0 | poJob->SetSrcMaskBufferHolder(oSrcMaskBufferHolder); |
5440 | 0 | poJob->SetSrcBufferHolder(oSrcBufferHolder); |
5441 | 0 | poJobQueue->SubmitJob(JobResampleFunc, poJob.get()); |
5442 | 0 | jobList.emplace_back(std::move(poJob)); |
5443 | 0 | } |
5444 | 0 | else |
5445 | 0 | { |
5446 | 0 | JobResampleFunc(poJob.get()); |
5447 | 0 | eErr = poJob->eErr; |
5448 | 0 | if (eErr == CE_None) |
5449 | 0 | { |
5450 | 0 | eErr = WriteJobData(poJob.get()); |
5451 | 0 | } |
5452 | 0 | } |
5453 | 0 | } |
5454 | |
|
5455 | 0 | if (poJobQueue) |
5456 | 0 | { |
5457 | 0 | pChunk = nullptr; |
5458 | 0 | pabyChunkNodataMask = nullptr; |
5459 | 0 | } |
5460 | 0 | } |
5461 | | |
5462 | 0 | VSIFree(pChunk); |
5463 | 0 | VSIFree(pabyChunkNodataMask); |
5464 | | |
5465 | | // Wait for all pending jobs to complete |
5466 | 0 | while (!jobList.empty()) |
5467 | 0 | { |
5468 | 0 | const auto l_eErr = WaitAndFinalizeOldestJob(jobList); |
5469 | 0 | if (l_eErr != CE_None && eErr == CE_None) |
5470 | 0 | eErr = l_eErr; |
5471 | 0 | } |
5472 | | |
5473 | | /* -------------------------------------------------------------------- */ |
5474 | | /* Renormalized overview mean / stddev if needed. */ |
5475 | | /* -------------------------------------------------------------------- */ |
5476 | 0 | if (eErr == CE_None && EQUAL(pszResampling, "AVERAGE_MP")) |
5477 | 0 | { |
5478 | 0 | GDALOverviewMagnitudeCorrection( |
5479 | 0 | poSrcBand, nOverviewCount, |
5480 | 0 | reinterpret_cast<GDALRasterBandH *>(papoOvrBands), |
5481 | 0 | GDALDummyProgress, nullptr); |
5482 | 0 | } |
5483 | | |
5484 | | /* -------------------------------------------------------------------- */ |
5485 | | /* It can be important to flush out data to overviews. */ |
5486 | | /* -------------------------------------------------------------------- */ |
5487 | 0 | for (int iOverview = 0; eErr == CE_None && iOverview < nOverviewCount; |
5488 | 0 | ++iOverview) |
5489 | 0 | { |
5490 | 0 | eErr = papoOvrBands[iOverview]->FlushCache(false); |
5491 | 0 | } |
5492 | |
|
5493 | 0 | if (eErr == CE_None) |
5494 | 0 | pfnProgress(1.0, nullptr, pProgressData); |
5495 | |
|
5496 | 0 | return eErr; |
5497 | 0 | } |
5498 | | |
5499 | | /************************************************************************/ |
5500 | | /* GDALRegenerateOverviewsMultiBand() */ |
5501 | | /************************************************************************/ |
5502 | | |
5503 | | /** |
5504 | | * \brief Variant of GDALRegenerateOverviews, specially dedicated for generating |
5505 | | * compressed pixel-interleaved overviews (JPEG-IN-TIFF for example) |
5506 | | * |
5507 | | * This function will generate one or more overview images from a base |
5508 | | * image using the requested downsampling algorithm. Its primary use |
5509 | | * is for generating overviews via GDALDataset::BuildOverviews(), but it |
5510 | | * can also be used to generate downsampled images in one file from another |
5511 | | * outside the overview architecture. |
5512 | | * |
5513 | | * The output bands need to exist in advance and share the same characteristics |
5514 | | * (type, dimensions) |
5515 | | * |
5516 | | * The resampling algorithms supported for the moment are "NEAREST", "AVERAGE", |
5517 | | * "RMS", "GAUSS", "CUBIC", "CUBICSPLINE", "LANCZOS" and "BILINEAR" |
5518 | | * |
5519 | | * It does not support color tables or complex data types. |
5520 | | * |
5521 | | * The pseudo-algorithm used by the function is : |
5522 | | * for each overview |
5523 | | * iterate on lines of the source by a step of deltay |
5524 | | * iterate on columns of the source by a step of deltax |
5525 | | * read the source data of size deltax * deltay for all the bands |
5526 | | * generate the corresponding overview block for all the bands |
5527 | | * |
5528 | | * This function will honour properly NODATA_VALUES tuples (special dataset |
5529 | | * metadata) so that only a given RGB triplet (in case of a RGB image) will be |
5530 | | * considered as the nodata value and not each value of the triplet |
5531 | | * independently per band. |
5532 | | * |
5533 | | * Starting with GDAL 3.2, the GDAL_NUM_THREADS configuration option can be set |
5534 | | * to "ALL_CPUS" or a integer value to specify the number of threads to use for |
5535 | | * overview computation. |
5536 | | * |
5537 | | * @param nBands the number of bands, size of papoSrcBands and size of |
5538 | | * first dimension of papapoOverviewBands |
5539 | | * @param papoSrcBands the list of source bands to downsample |
5540 | | * @param nOverviews the number of downsampled overview levels being generated. |
5541 | | * @param papapoOverviewBands bidimension array of bands. First dimension is |
5542 | | * indexed by nBands. Second dimension is indexed by |
5543 | | * nOverviews. |
5544 | | * @param pszResampling Resampling algorithm ("NEAREST", "AVERAGE", "RMS", |
5545 | | * "GAUSS", "CUBIC", "CUBICSPLINE", "LANCZOS" or "BILINEAR"). |
5546 | | * @param pfnProgress progress report function. |
5547 | | * @param pProgressData progress function callback data. |
5548 | | * @param papszOptions (GDAL >= 3.6) NULL terminated list of options as |
5549 | | * key=value pairs, or NULL |
5550 | | * Starting with GDAL 3.8, the XOFF, YOFF, XSIZE and YSIZE |
5551 | | * options can be specified to express that overviews should |
5552 | | * be regenerated only in the specified subset of the source |
5553 | | * dataset. |
5554 | | * @return CE_None on success or CE_Failure on failure. |
5555 | | */ |
5556 | | |
5557 | | CPLErr GDALRegenerateOverviewsMultiBand( |
5558 | | int nBands, GDALRasterBand *const *papoSrcBands, int nOverviews, |
5559 | | GDALRasterBand *const *const *papapoOverviewBands, |
5560 | | const char *pszResampling, GDALProgressFunc pfnProgress, |
5561 | | void *pProgressData, CSLConstList papszOptions) |
5562 | 0 | { |
5563 | 0 | CPL_IGNORE_RET_VAL(papszOptions); |
5564 | |
|
5565 | 0 | if (pfnProgress == nullptr) |
5566 | 0 | pfnProgress = GDALDummyProgress; |
5567 | |
|
5568 | 0 | if (EQUAL(pszResampling, "NONE") || nBands == 0 || nOverviews == 0) |
5569 | 0 | return CE_None; |
5570 | | |
5571 | | // Sanity checks. |
5572 | 0 | if (!STARTS_WITH_CI(pszResampling, "NEAR") && |
5573 | 0 | !EQUAL(pszResampling, "RMS") && !EQUAL(pszResampling, "AVERAGE") && |
5574 | 0 | !EQUAL(pszResampling, "GAUSS") && !EQUAL(pszResampling, "CUBIC") && |
5575 | 0 | !EQUAL(pszResampling, "CUBICSPLINE") && |
5576 | 0 | !EQUAL(pszResampling, "LANCZOS") && !EQUAL(pszResampling, "BILINEAR") && |
5577 | 0 | !EQUAL(pszResampling, "MODE")) |
5578 | 0 | { |
5579 | 0 | CPLError(CE_Failure, CPLE_NotSupported, |
5580 | 0 | "GDALRegenerateOverviewsMultiBand: pszResampling='%s' " |
5581 | 0 | "not supported", |
5582 | 0 | pszResampling); |
5583 | 0 | return CE_Failure; |
5584 | 0 | } |
5585 | | |
5586 | 0 | int nKernelRadius = 0; |
5587 | 0 | GDALResampleFunction pfnResampleFn = |
5588 | 0 | GDALGetResampleFunction(pszResampling, &nKernelRadius); |
5589 | 0 | if (pfnResampleFn == nullptr) |
5590 | 0 | return CE_Failure; |
5591 | | |
5592 | 0 | const int nToplevelSrcWidth = papoSrcBands[0]->GetXSize(); |
5593 | 0 | const int nToplevelSrcHeight = papoSrcBands[0]->GetYSize(); |
5594 | 0 | if (nToplevelSrcWidth <= 0 || nToplevelSrcHeight <= 0) |
5595 | 0 | return CE_None; |
5596 | 0 | GDALDataType eDataType = papoSrcBands[0]->GetRasterDataType(); |
5597 | 0 | for (int iBand = 1; iBand < nBands; ++iBand) |
5598 | 0 | { |
5599 | 0 | if (papoSrcBands[iBand]->GetXSize() != nToplevelSrcWidth || |
5600 | 0 | papoSrcBands[iBand]->GetYSize() != nToplevelSrcHeight) |
5601 | 0 | { |
5602 | 0 | CPLError( |
5603 | 0 | CE_Failure, CPLE_NotSupported, |
5604 | 0 | "GDALRegenerateOverviewsMultiBand: all the source bands must " |
5605 | 0 | "have the same dimensions"); |
5606 | 0 | return CE_Failure; |
5607 | 0 | } |
5608 | 0 | if (papoSrcBands[iBand]->GetRasterDataType() != eDataType) |
5609 | 0 | { |
5610 | 0 | CPLError( |
5611 | 0 | CE_Failure, CPLE_NotSupported, |
5612 | 0 | "GDALRegenerateOverviewsMultiBand: all the source bands must " |
5613 | 0 | "have the same data type"); |
5614 | 0 | return CE_Failure; |
5615 | 0 | } |
5616 | 0 | } |
5617 | | |
5618 | 0 | for (int iOverview = 0; iOverview < nOverviews; ++iOverview) |
5619 | 0 | { |
5620 | 0 | const auto poOvrFirstBand = papapoOverviewBands[0][iOverview]; |
5621 | 0 | const int nDstWidth = poOvrFirstBand->GetXSize(); |
5622 | 0 | const int nDstHeight = poOvrFirstBand->GetYSize(); |
5623 | 0 | for (int iBand = 1; iBand < nBands; ++iBand) |
5624 | 0 | { |
5625 | 0 | const auto poOvrBand = papapoOverviewBands[iBand][iOverview]; |
5626 | 0 | if (poOvrBand->GetXSize() != nDstWidth || |
5627 | 0 | poOvrBand->GetYSize() != nDstHeight) |
5628 | 0 | { |
5629 | 0 | CPLError( |
5630 | 0 | CE_Failure, CPLE_NotSupported, |
5631 | 0 | "GDALRegenerateOverviewsMultiBand: all the overviews bands " |
5632 | 0 | "of the same level must have the same dimensions"); |
5633 | 0 | return CE_Failure; |
5634 | 0 | } |
5635 | 0 | if (poOvrBand->GetRasterDataType() != eDataType) |
5636 | 0 | { |
5637 | 0 | CPLError( |
5638 | 0 | CE_Failure, CPLE_NotSupported, |
5639 | 0 | "GDALRegenerateOverviewsMultiBand: all the overviews bands " |
5640 | 0 | "must have the same data type as the source bands"); |
5641 | 0 | return CE_Failure; |
5642 | 0 | } |
5643 | 0 | } |
5644 | 0 | } |
5645 | | |
5646 | | // First pass to compute the total number of pixels to write. |
5647 | 0 | double dfTotalPixelCount = 0; |
5648 | 0 | const int nSrcXOff = atoi(CSLFetchNameValueDef(papszOptions, "XOFF", "0")); |
5649 | 0 | const int nSrcYOff = atoi(CSLFetchNameValueDef(papszOptions, "YOFF", "0")); |
5650 | 0 | const int nSrcXSize = atoi(CSLFetchNameValueDef( |
5651 | 0 | papszOptions, "XSIZE", CPLSPrintf("%d", nToplevelSrcWidth))); |
5652 | 0 | const int nSrcYSize = atoi(CSLFetchNameValueDef( |
5653 | 0 | papszOptions, "YSIZE", CPLSPrintf("%d", nToplevelSrcHeight))); |
5654 | 0 | for (int iOverview = 0; iOverview < nOverviews; ++iOverview) |
5655 | 0 | { |
5656 | 0 | dfTotalPixelCount += |
5657 | 0 | static_cast<double>(nSrcXSize) / nToplevelSrcWidth * |
5658 | 0 | papapoOverviewBands[0][iOverview]->GetXSize() * |
5659 | 0 | static_cast<double>(nSrcYSize) / nToplevelSrcHeight * |
5660 | 0 | papapoOverviewBands[0][iOverview]->GetYSize(); |
5661 | 0 | } |
5662 | |
|
5663 | 0 | const GDALDataType eWrkDataType = |
5664 | 0 | GDALGetOvrWorkDataType(pszResampling, eDataType); |
5665 | 0 | const int nWrkDataTypeSize = |
5666 | 0 | std::max(1, GDALGetDataTypeSizeBytes(eWrkDataType)); |
5667 | |
|
5668 | 0 | const bool bIsMask = papoSrcBands[0]->IsMaskBand(); |
5669 | | |
5670 | | // If we have a nodata mask and we are doing something more complicated |
5671 | | // than nearest neighbouring, we have to fetch to nodata mask. |
5672 | 0 | const bool bUseNoDataMask = |
5673 | 0 | !STARTS_WITH_CI(pszResampling, "NEAR") && |
5674 | 0 | (bIsMask || (papoSrcBands[0]->GetMaskFlags() & GMF_ALL_VALID) == 0); |
5675 | |
|
5676 | 0 | std::vector<bool> abHasNoData(nBands); |
5677 | 0 | std::vector<double> adfNoDataValue(nBands); |
5678 | |
|
5679 | 0 | for (int iBand = 0; iBand < nBands; ++iBand) |
5680 | 0 | { |
5681 | 0 | int nHasNoData = 0; |
5682 | 0 | adfNoDataValue[iBand] = |
5683 | 0 | papoSrcBands[iBand]->GetNoDataValue(&nHasNoData); |
5684 | 0 | abHasNoData[iBand] = CPL_TO_BOOL(nHasNoData); |
5685 | 0 | } |
5686 | 0 | const bool bPropagateNoData = |
5687 | 0 | CPLTestBool(CPLGetConfigOption("GDAL_OVR_PROPAGATE_NODATA", "NO")); |
5688 | |
|
5689 | 0 | const char *pszThreads = CPLGetConfigOption("GDAL_NUM_THREADS", "1"); |
5690 | 0 | const int nThreads = std::max(1, std::min(128, EQUAL(pszThreads, "ALL_CPUS") |
5691 | 0 | ? CPLGetNumCPUs() |
5692 | 0 | : atoi(pszThreads))); |
5693 | 0 | auto poThreadPool = |
5694 | 0 | nThreads > 1 ? GDALGetGlobalThreadPool(nThreads) : nullptr; |
5695 | 0 | auto poJobQueue = poThreadPool ? poThreadPool->CreateJobQueue() |
5696 | 0 | : std::unique_ptr<CPLJobQueue>(nullptr); |
5697 | | |
5698 | | // Only configurable for debug / testing |
5699 | 0 | const GIntBig nChunkMaxSize = []() -> GIntBig |
5700 | 0 | { |
5701 | 0 | const char *pszVal = |
5702 | 0 | CPLGetConfigOption("GDAL_OVR_CHUNK_MAX_SIZE", nullptr); |
5703 | 0 | if (pszVal) |
5704 | 0 | { |
5705 | 0 | GIntBig nRet = 0; |
5706 | 0 | CPLParseMemorySize(pszVal, &nRet, nullptr); |
5707 | 0 | return std::max<GIntBig>(100, nRet); |
5708 | 0 | } |
5709 | 0 | return 10 * 1024 * 1024; |
5710 | 0 | }(); |
5711 | | |
5712 | | // Only configurable for debug / testing |
5713 | 0 | const GIntBig nChunkMaxSizeForTempFile = []() -> GIntBig |
5714 | 0 | { |
5715 | 0 | const char *pszVal = CPLGetConfigOption( |
5716 | 0 | "GDAL_OVR_CHUNK_MAX_SIZE_FOR_TEMP_FILE", nullptr); |
5717 | 0 | if (pszVal) |
5718 | 0 | { |
5719 | 0 | GIntBig nRet = 0; |
5720 | 0 | CPLParseMemorySize(pszVal, &nRet, nullptr); |
5721 | 0 | return std::max<GIntBig>(100, nRet); |
5722 | 0 | } |
5723 | 0 | const auto nUsableRAM = CPLGetUsablePhysicalRAM(); |
5724 | 0 | if (nUsableRAM > 0) |
5725 | 0 | return nUsableRAM / 10; |
5726 | | // Select a value to be able to at least downsample by 2 for a RGB |
5727 | | // 1024x1024 tiled output: (2 * 1024 + 2) * (2 * 1024 + 2) * 3 = 12 MB |
5728 | 0 | return 100 * 1024 * 1024; |
5729 | 0 | }(); |
5730 | | |
5731 | | // Second pass to do the real job. |
5732 | 0 | double dfCurPixelCount = 0; |
5733 | 0 | CPLErr eErr = CE_None; |
5734 | 0 | for (int iOverview = 0; iOverview < nOverviews && eErr == CE_None; |
5735 | 0 | ++iOverview) |
5736 | 0 | { |
5737 | 0 | int iSrcOverview = -1; // -1 means the source bands. |
5738 | |
|
5739 | 0 | const int nDstTotalWidth = |
5740 | 0 | papapoOverviewBands[0][iOverview]->GetXSize(); |
5741 | 0 | const int nDstTotalHeight = |
5742 | 0 | papapoOverviewBands[0][iOverview]->GetYSize(); |
5743 | | |
5744 | | // Compute the coordinates of the target region to refresh |
5745 | 0 | constexpr double EPS = 1e-8; |
5746 | 0 | const int nDstXOffStart = static_cast<int>( |
5747 | 0 | static_cast<double>(nSrcXOff) / nToplevelSrcWidth * nDstTotalWidth + |
5748 | 0 | EPS); |
5749 | 0 | const int nDstXOffEnd = |
5750 | 0 | std::min(static_cast<int>( |
5751 | 0 | std::ceil(static_cast<double>(nSrcXOff + nSrcXSize) / |
5752 | 0 | nToplevelSrcWidth * nDstTotalWidth - |
5753 | 0 | EPS)), |
5754 | 0 | nDstTotalWidth); |
5755 | 0 | const int nDstWidth = nDstXOffEnd - nDstXOffStart; |
5756 | 0 | const int nDstYOffStart = |
5757 | 0 | static_cast<int>(static_cast<double>(nSrcYOff) / |
5758 | 0 | nToplevelSrcHeight * nDstTotalHeight + |
5759 | 0 | EPS); |
5760 | 0 | const int nDstYOffEnd = |
5761 | 0 | std::min(static_cast<int>( |
5762 | 0 | std::ceil(static_cast<double>(nSrcYOff + nSrcYSize) / |
5763 | 0 | nToplevelSrcHeight * nDstTotalHeight - |
5764 | 0 | EPS)), |
5765 | 0 | nDstTotalHeight); |
5766 | 0 | const int nDstHeight = nDstYOffEnd - nDstYOffStart; |
5767 | | |
5768 | | // Try to use previous level of overview as the source to compute |
5769 | | // the next level. |
5770 | 0 | int nSrcWidth = nToplevelSrcWidth; |
5771 | 0 | int nSrcHeight = nToplevelSrcHeight; |
5772 | 0 | if (iOverview > 0 && |
5773 | 0 | papapoOverviewBands[0][iOverview - 1]->GetXSize() > nDstTotalWidth) |
5774 | 0 | { |
5775 | 0 | nSrcWidth = papapoOverviewBands[0][iOverview - 1]->GetXSize(); |
5776 | 0 | nSrcHeight = papapoOverviewBands[0][iOverview - 1]->GetYSize(); |
5777 | 0 | iSrcOverview = iOverview - 1; |
5778 | 0 | } |
5779 | |
|
5780 | 0 | const double dfXRatioDstToSrc = |
5781 | 0 | static_cast<double>(nSrcWidth) / nDstTotalWidth; |
5782 | 0 | const double dfYRatioDstToSrc = |
5783 | 0 | static_cast<double>(nSrcHeight) / nDstTotalHeight; |
5784 | |
|
5785 | 0 | const int nOvrFactor = |
5786 | 0 | std::max(1, std::max(static_cast<int>(0.5 + dfXRatioDstToSrc), |
5787 | 0 | static_cast<int>(0.5 + dfYRatioDstToSrc))); |
5788 | |
|
5789 | 0 | int nDstChunkXSize = 0; |
5790 | 0 | int nDstChunkYSize = 0; |
5791 | 0 | papapoOverviewBands[0][iOverview]->GetBlockSize(&nDstChunkXSize, |
5792 | 0 | &nDstChunkYSize); |
5793 | |
|
5794 | 0 | constexpr int PIXEL_MARGIN = 2; |
5795 | | // Try to extend the chunk size so that the memory needed to acquire |
5796 | | // source pixels goes up to 10 MB. |
5797 | | // This can help for drivers that support multi-threaded reading |
5798 | 0 | const int nFullResYChunk = static_cast<int>(std::min<double>( |
5799 | 0 | nSrcHeight, PIXEL_MARGIN + nDstChunkYSize * dfYRatioDstToSrc)); |
5800 | 0 | const int nFullResYChunkQueried = static_cast<int>(std::min<int64_t>( |
5801 | 0 | nSrcHeight, |
5802 | 0 | nFullResYChunk + static_cast<int64_t>(RADIUS_TO_DIAMETER) * |
5803 | 0 | nKernelRadius * nOvrFactor)); |
5804 | 0 | while (nDstChunkXSize < nDstWidth) |
5805 | 0 | { |
5806 | 0 | constexpr int INCREASE_FACTOR = 2; |
5807 | |
|
5808 | 0 | const int nFullResXChunk = static_cast<int>(std::min<double>( |
5809 | 0 | nSrcWidth, PIXEL_MARGIN + INCREASE_FACTOR * nDstChunkXSize * |
5810 | 0 | dfXRatioDstToSrc)); |
5811 | |
|
5812 | 0 | const int nFullResXChunkQueried = |
5813 | 0 | static_cast<int>(std::min<int64_t>( |
5814 | 0 | nSrcWidth, |
5815 | 0 | nFullResXChunk + static_cast<int64_t>(RADIUS_TO_DIAMETER) * |
5816 | 0 | nKernelRadius * nOvrFactor)); |
5817 | |
|
5818 | 0 | if (nBands > nChunkMaxSize / nFullResXChunkQueried / |
5819 | 0 | nFullResYChunkQueried / nWrkDataTypeSize) |
5820 | 0 | { |
5821 | 0 | break; |
5822 | 0 | } |
5823 | | |
5824 | 0 | nDstChunkXSize *= INCREASE_FACTOR; |
5825 | 0 | } |
5826 | 0 | nDstChunkXSize = std::min(nDstChunkXSize, nDstWidth); |
5827 | |
|
5828 | 0 | const int nFullResXChunk = static_cast<int>(std::min<double>( |
5829 | 0 | nSrcWidth, PIXEL_MARGIN + nDstChunkXSize * dfXRatioDstToSrc)); |
5830 | 0 | const int nFullResXChunkQueried = static_cast<int>(std::min<int64_t>( |
5831 | 0 | nSrcWidth, |
5832 | 0 | nFullResXChunk + static_cast<int64_t>(RADIUS_TO_DIAMETER) * |
5833 | 0 | nKernelRadius * nOvrFactor)); |
5834 | | |
5835 | | // Make sure that the RAM requirements to acquire the source data does |
5836 | | // not exceed nChunkMaxSizeForTempFile |
5837 | | // If so, reduce the destination chunk size, generate overviews in a |
5838 | | // temporary dataset, and copy that temporary dataset over the target |
5839 | | // overview bands (to avoid issues with lossy compression) |
5840 | 0 | const bool bOverflowFullResXChunkYChunkQueried = |
5841 | 0 | nBands > std::numeric_limits<int64_t>::max() / |
5842 | 0 | nFullResXChunkQueried / nFullResYChunkQueried / |
5843 | 0 | nWrkDataTypeSize; |
5844 | |
|
5845 | 0 | const auto nMemRequirement = |
5846 | 0 | bOverflowFullResXChunkYChunkQueried |
5847 | 0 | ? 0 |
5848 | 0 | : static_cast<GIntBig>(nFullResXChunkQueried) * |
5849 | 0 | nFullResYChunkQueried * nBands * nWrkDataTypeSize; |
5850 | | // Use a temporary dataset with a smaller destination chunk size |
5851 | 0 | const auto nOverShootFactor = |
5852 | 0 | nMemRequirement / nChunkMaxSizeForTempFile; |
5853 | |
|
5854 | 0 | constexpr int MIN_OVERSHOOT_FACTOR = 4; |
5855 | 0 | const auto nSqrtOverShootFactor = std::max<GIntBig>( |
5856 | 0 | MIN_OVERSHOOT_FACTOR, static_cast<GIntBig>(std::ceil(std::sqrt( |
5857 | 0 | static_cast<double>(nOverShootFactor))))); |
5858 | 0 | constexpr int DEFAULT_CHUNK_SIZE = 256; |
5859 | 0 | constexpr int GTIFF_BLOCK_SIZE_MULTIPLE = 16; |
5860 | 0 | const int nReducedDstChunkXSize = |
5861 | 0 | bOverflowFullResXChunkYChunkQueried |
5862 | 0 | ? DEFAULT_CHUNK_SIZE |
5863 | 0 | : std::max(1, static_cast<int>(nDstChunkXSize / |
5864 | 0 | nSqrtOverShootFactor) & |
5865 | 0 | ~(GTIFF_BLOCK_SIZE_MULTIPLE - 1)); |
5866 | 0 | const int nReducedDstChunkYSize = |
5867 | 0 | bOverflowFullResXChunkYChunkQueried |
5868 | 0 | ? DEFAULT_CHUNK_SIZE |
5869 | 0 | : std::max(1, static_cast<int>(nDstChunkYSize / |
5870 | 0 | nSqrtOverShootFactor) & |
5871 | 0 | ~(GTIFF_BLOCK_SIZE_MULTIPLE - 1)); |
5872 | |
|
5873 | 0 | if (bOverflowFullResXChunkYChunkQueried || |
5874 | 0 | nMemRequirement > nChunkMaxSizeForTempFile) |
5875 | 0 | { |
5876 | 0 | const auto nDTSize = |
5877 | 0 | std::max(1, GDALGetDataTypeSizeBytes(eDataType)); |
5878 | 0 | const bool bTmpDSMemRequirementOverflow = |
5879 | 0 | nBands > std::numeric_limits<int64_t>::max() / nDstWidth / |
5880 | 0 | nDstHeight / nDTSize; |
5881 | 0 | const auto nTmpDSMemRequirement = |
5882 | 0 | bTmpDSMemRequirementOverflow |
5883 | 0 | ? 0 |
5884 | 0 | : static_cast<GIntBig>(nDstWidth) * nDstHeight * nBands * |
5885 | 0 | nDTSize; |
5886 | | |
5887 | | // make sure that one band buffer doesn't overflow size_t |
5888 | 0 | const bool bChunkSizeOverflow = |
5889 | 0 | static_cast<size_t>(nDTSize) > |
5890 | 0 | std::numeric_limits<size_t>::max() / nDstWidth / nDstHeight; |
5891 | 0 | const size_t nChunkSize = |
5892 | 0 | bChunkSizeOverflow |
5893 | 0 | ? 0 |
5894 | 0 | : static_cast<size_t>(nDstWidth) * nDstHeight * nDTSize; |
5895 | |
|
5896 | 0 | const auto CreateVRT = |
5897 | 0 | [nBands, nSrcWidth, nSrcHeight, nDstTotalWidth, nDstTotalHeight, |
5898 | 0 | pszResampling, eWrkDataType, papoSrcBands, papapoOverviewBands, |
5899 | 0 | iSrcOverview, &abHasNoData, |
5900 | 0 | &adfNoDataValue](int nVRTBlockXSize, int nVRTBlockYSize) |
5901 | 0 | { |
5902 | 0 | auto poVRTDS = std::make_unique<VRTDataset>( |
5903 | 0 | nDstTotalWidth, nDstTotalHeight, nVRTBlockXSize, |
5904 | 0 | nVRTBlockYSize); |
5905 | |
|
5906 | 0 | for (int iBand = 0; iBand < nBands; ++iBand) |
5907 | 0 | { |
5908 | 0 | auto poVRTSrc = std::make_unique<VRTSimpleSource>(); |
5909 | 0 | poVRTSrc->SetResampling(pszResampling); |
5910 | 0 | poVRTDS->AddBand(eWrkDataType); |
5911 | 0 | auto poVRTBand = static_cast<VRTSourcedRasterBand *>( |
5912 | 0 | poVRTDS->GetRasterBand(iBand + 1)); |
5913 | |
|
5914 | 0 | auto poSrcBand = papoSrcBands[iBand]; |
5915 | 0 | if (iSrcOverview != -1) |
5916 | 0 | poSrcBand = papapoOverviewBands[iBand][iSrcOverview]; |
5917 | 0 | poVRTBand->ConfigureSource( |
5918 | 0 | poVRTSrc.get(), poSrcBand, false, 0, 0, nSrcWidth, |
5919 | 0 | nSrcHeight, 0, 0, nDstTotalWidth, nDstTotalHeight); |
5920 | | // Add the source to the band |
5921 | 0 | poVRTBand->AddSource(poVRTSrc.release()); |
5922 | 0 | if (abHasNoData[iBand]) |
5923 | 0 | poVRTBand->SetNoDataValue(adfNoDataValue[iBand]); |
5924 | 0 | } |
5925 | |
|
5926 | 0 | if (papoSrcBands[0]->GetMaskFlags() == GMF_PER_DATASET && |
5927 | 0 | poVRTDS->CreateMaskBand(GMF_PER_DATASET) == CE_None) |
5928 | 0 | { |
5929 | 0 | VRTSourcedRasterBand *poMaskVRTBand = |
5930 | 0 | cpl::down_cast<VRTSourcedRasterBand *>( |
5931 | 0 | poVRTDS->GetRasterBand(1)->GetMaskBand()); |
5932 | 0 | auto poSrcBand = papoSrcBands[0]; |
5933 | 0 | if (iSrcOverview != -1) |
5934 | 0 | poSrcBand = papapoOverviewBands[0][iSrcOverview]; |
5935 | 0 | poMaskVRTBand->AddMaskBandSource( |
5936 | 0 | poSrcBand->GetMaskBand(), 0, 0, nSrcWidth, nSrcHeight, |
5937 | 0 | 0, 0, nDstTotalWidth, nDstTotalHeight); |
5938 | 0 | } |
5939 | |
|
5940 | 0 | return poVRTDS; |
5941 | 0 | }; |
5942 | | |
5943 | | // If the overview accommodates chunking, do so and recurse |
5944 | | // to avoid generating full size temporary files |
5945 | 0 | if (!bOverflowFullResXChunkYChunkQueried && |
5946 | 0 | !bTmpDSMemRequirementOverflow && !bChunkSizeOverflow && |
5947 | 0 | (nDstChunkXSize < nDstWidth || nDstChunkYSize < nDstHeight)) |
5948 | 0 | { |
5949 | | // Create a VRT with the smaller chunk to do the scaling |
5950 | 0 | auto poVRTDS = |
5951 | 0 | CreateVRT(nReducedDstChunkXSize, nReducedDstChunkYSize); |
5952 | |
|
5953 | 0 | std::vector<GDALRasterBand *> apoVRTBand(nBands); |
5954 | 0 | std::vector<GDALRasterBand *> apoDstBand(nBands); |
5955 | 0 | for (int iBand = 0; iBand < nBands; ++iBand) |
5956 | 0 | { |
5957 | 0 | apoDstBand[iBand] = papapoOverviewBands[iBand][iOverview]; |
5958 | 0 | apoVRTBand[iBand] = poVRTDS->GetRasterBand(iBand + 1); |
5959 | 0 | } |
5960 | | |
5961 | | // Use a flag to avoid reading from the overview being built |
5962 | 0 | GDALRasterIOExtraArg sExtraArg; |
5963 | 0 | INIT_RASTERIO_EXTRA_ARG(sExtraArg); |
5964 | 0 | if (iSrcOverview == -1) |
5965 | 0 | sExtraArg.bUseOnlyThisScale = true; |
5966 | | |
5967 | | // A single band buffer for data transfer to the overview |
5968 | 0 | std::vector<GByte> abyChunk; |
5969 | 0 | try |
5970 | 0 | { |
5971 | 0 | abyChunk.resize(nChunkSize); |
5972 | 0 | } |
5973 | 0 | catch (const std::exception &) |
5974 | 0 | { |
5975 | 0 | CPLError(CE_Failure, CPLE_OutOfMemory, |
5976 | 0 | "Out of memory allocating temporary buffer"); |
5977 | 0 | return CE_Failure; |
5978 | 0 | } |
5979 | | |
5980 | | // Loop over output height, in chunks |
5981 | 0 | for (int nDstYOff = nDstYOffStart; |
5982 | 0 | nDstYOff < nDstYOffEnd && eErr == CE_None; |
5983 | 0 | /* */) |
5984 | 0 | { |
5985 | 0 | const int nDstYCount = |
5986 | 0 | std::min(nDstChunkYSize, nDstYOffEnd - nDstYOff); |
5987 | | // Loop over output width, in output chunks |
5988 | 0 | for (int nDstXOff = nDstXOffStart; |
5989 | 0 | nDstXOff < nDstXOffEnd && eErr == CE_None; |
5990 | 0 | /* */) |
5991 | 0 | { |
5992 | 0 | const int nDstXCount = |
5993 | 0 | std::min(nDstChunkXSize, nDstXOffEnd - nDstXOff); |
5994 | | // Read and transfer the chunk to the overview |
5995 | 0 | for (int iBand = 0; iBand < nBands && eErr == CE_None; |
5996 | 0 | ++iBand) |
5997 | 0 | { |
5998 | 0 | eErr = apoVRTBand[iBand]->RasterIO( |
5999 | 0 | GF_Read, nDstXOff, nDstYOff, nDstXCount, |
6000 | 0 | nDstYCount, abyChunk.data(), nDstXCount, |
6001 | 0 | nDstYCount, eDataType, 0, 0, &sExtraArg); |
6002 | 0 | if (eErr == CE_None) |
6003 | 0 | { |
6004 | 0 | eErr = apoDstBand[iBand]->RasterIO( |
6005 | 0 | GF_Write, nDstXOff, nDstYOff, nDstXCount, |
6006 | 0 | nDstYCount, abyChunk.data(), nDstXCount, |
6007 | 0 | nDstYCount, eDataType, 0, 0, nullptr); |
6008 | 0 | } |
6009 | 0 | } |
6010 | |
|
6011 | 0 | dfCurPixelCount += |
6012 | 0 | static_cast<double>(nDstXCount) * nDstYCount; |
6013 | |
|
6014 | 0 | nDstXOff += nDstXCount; |
6015 | 0 | } // width |
6016 | |
|
6017 | 0 | if (!pfnProgress(dfCurPixelCount / dfTotalPixelCount, |
6018 | 0 | nullptr, pProgressData)) |
6019 | 0 | { |
6020 | 0 | CPLError(CE_Failure, CPLE_UserInterrupt, |
6021 | 0 | "User terminated"); |
6022 | 0 | eErr = CE_Failure; |
6023 | 0 | } |
6024 | |
|
6025 | 0 | nDstYOff += nDstYCount; |
6026 | 0 | } // height |
6027 | |
|
6028 | 0 | if (CE_None != eErr) |
6029 | 0 | { |
6030 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
6031 | 0 | "Error while writing overview"); |
6032 | 0 | return CE_Failure; |
6033 | 0 | } |
6034 | | |
6035 | 0 | pfnProgress(1.0, nullptr, pProgressData); |
6036 | | // Flush the overviews we just generated |
6037 | 0 | for (int iBand = 0; iBand < nBands; ++iBand) |
6038 | 0 | apoDstBand[iBand]->FlushCache(false); |
6039 | |
|
6040 | 0 | continue; // Next overview |
6041 | 0 | } // chunking via temporary dataset |
6042 | | |
6043 | 0 | std::unique_ptr<GDALDataset> poTmpDS; |
6044 | | // Config option mostly/only for autotest purposes |
6045 | 0 | const char *pszGDAL_OVR_TEMP_DRIVER = |
6046 | 0 | CPLGetConfigOption("GDAL_OVR_TEMP_DRIVER", ""); |
6047 | 0 | if ((!bTmpDSMemRequirementOverflow && |
6048 | 0 | nTmpDSMemRequirement <= nChunkMaxSizeForTempFile && |
6049 | 0 | !EQUAL(pszGDAL_OVR_TEMP_DRIVER, "GTIFF")) || |
6050 | 0 | EQUAL(pszGDAL_OVR_TEMP_DRIVER, "MEM")) |
6051 | 0 | { |
6052 | 0 | auto poTmpDrv = GetGDALDriverManager()->GetDriverByName("MEM"); |
6053 | 0 | if (!poTmpDrv) |
6054 | 0 | { |
6055 | 0 | eErr = CE_Failure; |
6056 | 0 | break; |
6057 | 0 | } |
6058 | 0 | poTmpDS.reset(poTmpDrv->Create("", nDstTotalWidth, |
6059 | 0 | nDstTotalHeight, nBands, |
6060 | 0 | eDataType, nullptr)); |
6061 | 0 | } |
6062 | 0 | else |
6063 | 0 | { |
6064 | | // Create a temporary file for the overview |
6065 | 0 | auto poTmpDrv = |
6066 | 0 | GetGDALDriverManager()->GetDriverByName("GTiff"); |
6067 | 0 | if (!poTmpDrv) |
6068 | 0 | { |
6069 | 0 | eErr = CE_Failure; |
6070 | 0 | break; |
6071 | 0 | } |
6072 | 0 | std::string osTmpFilename; |
6073 | 0 | auto poDstDS = papapoOverviewBands[0][0]->GetDataset(); |
6074 | 0 | if (poDstDS) |
6075 | 0 | { |
6076 | 0 | osTmpFilename = poDstDS->GetDescription(); |
6077 | 0 | VSIStatBufL sStatBuf; |
6078 | 0 | if (!osTmpFilename.empty() && |
6079 | 0 | VSIStatL(osTmpFilename.c_str(), &sStatBuf) == 0) |
6080 | 0 | osTmpFilename += "_tmp_ovr.tif"; |
6081 | 0 | } |
6082 | 0 | if (osTmpFilename.empty()) |
6083 | 0 | { |
6084 | 0 | osTmpFilename = CPLGenerateTempFilenameSafe(nullptr); |
6085 | 0 | osTmpFilename += ".tif"; |
6086 | 0 | } |
6087 | 0 | CPLDebug("GDAL", "Creating temporary file %s of %d x %d x %d", |
6088 | 0 | osTmpFilename.c_str(), nDstWidth, nDstHeight, nBands); |
6089 | 0 | CPLStringList aosCO; |
6090 | 0 | if (0 == ((nReducedDstChunkXSize % GTIFF_BLOCK_SIZE_MULTIPLE) | |
6091 | 0 | (nReducedDstChunkYSize % GTIFF_BLOCK_SIZE_MULTIPLE))) |
6092 | 0 | { |
6093 | 0 | aosCO.SetNameValue("TILED", "YES"); |
6094 | 0 | aosCO.SetNameValue("BLOCKXSIZE", |
6095 | 0 | CPLSPrintf("%d", nReducedDstChunkXSize)); |
6096 | 0 | aosCO.SetNameValue("BLOCKYSIZE", |
6097 | 0 | CPLSPrintf("%d", nReducedDstChunkYSize)); |
6098 | 0 | } |
6099 | 0 | if (const char *pszCOList = |
6100 | 0 | poTmpDrv->GetMetadataItem(GDAL_DMD_CREATIONOPTIONLIST)) |
6101 | 0 | { |
6102 | 0 | aosCO.SetNameValue( |
6103 | 0 | "COMPRESS", strstr(pszCOList, "ZSTD") ? "ZSTD" : "LZW"); |
6104 | 0 | } |
6105 | 0 | poTmpDS.reset(poTmpDrv->Create(osTmpFilename.c_str(), nDstWidth, |
6106 | 0 | nDstHeight, nBands, eDataType, |
6107 | 0 | aosCO.List())); |
6108 | 0 | if (poTmpDS) |
6109 | 0 | { |
6110 | 0 | poTmpDS->MarkSuppressOnClose(); |
6111 | 0 | VSIUnlink(osTmpFilename.c_str()); |
6112 | 0 | } |
6113 | 0 | } |
6114 | 0 | if (!poTmpDS) |
6115 | 0 | { |
6116 | 0 | eErr = CE_Failure; |
6117 | 0 | break; |
6118 | 0 | } |
6119 | | |
6120 | | // Create a full size VRT to do the resampling without edge effects |
6121 | 0 | auto poVRTDS = |
6122 | 0 | CreateVRT(nReducedDstChunkXSize, nReducedDstChunkYSize); |
6123 | | |
6124 | | // Allocate a band buffer with the overview chunk size |
6125 | 0 | std::unique_ptr<void, VSIFreeReleaser> pDstBuffer( |
6126 | 0 | VSI_MALLOC3_VERBOSE(size_t(nWrkDataTypeSize), nDstChunkXSize, |
6127 | 0 | nDstChunkYSize)); |
6128 | 0 | if (pDstBuffer == nullptr) |
6129 | 0 | { |
6130 | 0 | eErr = CE_Failure; |
6131 | 0 | break; |
6132 | 0 | } |
6133 | | |
6134 | | // Use a flag to avoid reading the overview being built |
6135 | 0 | GDALRasterIOExtraArg sExtraArg; |
6136 | 0 | INIT_RASTERIO_EXTRA_ARG(sExtraArg); |
6137 | 0 | if (iSrcOverview == -1) |
6138 | 0 | sExtraArg.bUseOnlyThisScale = true; |
6139 | | |
6140 | | // Scale and copy data from the VRT to the temp file |
6141 | 0 | for (int nDstYOff = nDstYOffStart; |
6142 | 0 | nDstYOff < nDstYOffEnd && eErr == CE_None; |
6143 | 0 | /* */) |
6144 | 0 | { |
6145 | 0 | const int nDstYCount = |
6146 | 0 | std::min(nReducedDstChunkYSize, nDstYOffEnd - nDstYOff); |
6147 | 0 | for (int nDstXOff = nDstXOffStart; |
6148 | 0 | nDstXOff < nDstXOffEnd && eErr == CE_None; |
6149 | 0 | /* */) |
6150 | 0 | { |
6151 | 0 | const int nDstXCount = |
6152 | 0 | std::min(nReducedDstChunkXSize, nDstXOffEnd - nDstXOff); |
6153 | 0 | for (int iBand = 0; iBand < nBands && eErr == CE_None; |
6154 | 0 | ++iBand) |
6155 | 0 | { |
6156 | 0 | auto poSrcBand = poVRTDS->GetRasterBand(iBand + 1); |
6157 | 0 | eErr = poSrcBand->RasterIO( |
6158 | 0 | GF_Read, nDstXOff, nDstYOff, nDstXCount, nDstYCount, |
6159 | 0 | pDstBuffer.get(), nDstXCount, nDstYCount, |
6160 | 0 | eWrkDataType, 0, 0, &sExtraArg); |
6161 | 0 | if (eErr == CE_None) |
6162 | 0 | { |
6163 | | // Write to the temporary dataset, shifted |
6164 | 0 | auto poOvrBand = poTmpDS->GetRasterBand(iBand + 1); |
6165 | 0 | eErr = poOvrBand->RasterIO( |
6166 | 0 | GF_Write, nDstXOff - nDstXOffStart, |
6167 | 0 | nDstYOff - nDstYOffStart, nDstXCount, |
6168 | 0 | nDstYCount, pDstBuffer.get(), nDstXCount, |
6169 | 0 | nDstYCount, eWrkDataType, 0, 0, nullptr); |
6170 | 0 | } |
6171 | 0 | } |
6172 | 0 | nDstXOff += nDstXCount; |
6173 | 0 | } |
6174 | 0 | nDstYOff += nDstYCount; |
6175 | 0 | } |
6176 | | |
6177 | | // Copy from the temporary to the overview |
6178 | 0 | for (int nDstYOff = nDstYOffStart; |
6179 | 0 | nDstYOff < nDstYOffEnd && eErr == CE_None; |
6180 | 0 | /* */) |
6181 | 0 | { |
6182 | 0 | const int nDstYCount = |
6183 | 0 | std::min(nDstChunkYSize, nDstYOffEnd - nDstYOff); |
6184 | 0 | for (int nDstXOff = nDstXOffStart; |
6185 | 0 | nDstXOff < nDstXOffEnd && eErr == CE_None; |
6186 | 0 | /* */) |
6187 | 0 | { |
6188 | 0 | const int nDstXCount = |
6189 | 0 | std::min(nDstChunkXSize, nDstXOffEnd - nDstXOff); |
6190 | 0 | for (int iBand = 0; iBand < nBands && eErr == CE_None; |
6191 | 0 | ++iBand) |
6192 | 0 | { |
6193 | 0 | auto poSrcBand = poTmpDS->GetRasterBand(iBand + 1); |
6194 | 0 | eErr = poSrcBand->RasterIO( |
6195 | 0 | GF_Read, nDstXOff - nDstXOffStart, |
6196 | 0 | nDstYOff - nDstYOffStart, nDstXCount, nDstYCount, |
6197 | 0 | pDstBuffer.get(), nDstXCount, nDstYCount, |
6198 | 0 | eWrkDataType, 0, 0, nullptr); |
6199 | 0 | if (eErr == CE_None) |
6200 | 0 | { |
6201 | | // Write to the destination overview bands |
6202 | 0 | auto poOvrBand = |
6203 | 0 | papapoOverviewBands[iBand][iOverview]; |
6204 | 0 | eErr = poOvrBand->RasterIO( |
6205 | 0 | GF_Write, nDstXOff, nDstYOff, nDstXCount, |
6206 | 0 | nDstYCount, pDstBuffer.get(), nDstXCount, |
6207 | 0 | nDstYCount, eWrkDataType, 0, 0, nullptr); |
6208 | 0 | } |
6209 | 0 | } |
6210 | 0 | nDstXOff += nDstXCount; |
6211 | 0 | } |
6212 | 0 | nDstYOff += nDstYCount; |
6213 | 0 | } |
6214 | |
|
6215 | 0 | if (eErr != CE_None) |
6216 | 0 | { |
6217 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
6218 | 0 | "Failed to write overview %d", iOverview); |
6219 | 0 | return eErr; |
6220 | 0 | } |
6221 | | |
6222 | | // Flush the data to overviews. |
6223 | 0 | for (int iBand = 0; iBand < nBands; ++iBand) |
6224 | 0 | papapoOverviewBands[iBand][iOverview]->FlushCache(false); |
6225 | |
|
6226 | 0 | continue; |
6227 | 0 | } |
6228 | | |
6229 | | // Structure describing a resampling job |
6230 | 0 | struct OvrJob |
6231 | 0 | { |
6232 | | // Buffers to free when job is finished |
6233 | 0 | std::unique_ptr<PointerHolder> oSrcMaskBufferHolder{}; |
6234 | 0 | std::unique_ptr<PointerHolder> oSrcBufferHolder{}; |
6235 | 0 | std::unique_ptr<PointerHolder> oDstBufferHolder{}; |
6236 | |
|
6237 | 0 | GDALRasterBand *poDstBand = nullptr; |
6238 | | |
6239 | | // Input parameters of pfnResampleFn |
6240 | 0 | GDALResampleFunction pfnResampleFn = nullptr; |
6241 | 0 | GDALOverviewResampleArgs args{}; |
6242 | 0 | const void *pChunk = nullptr; |
6243 | | |
6244 | | // Output values of resampling function |
6245 | 0 | CPLErr eErr = CE_Failure; |
6246 | 0 | void *pDstBuffer = nullptr; |
6247 | 0 | GDALDataType eDstBufferDataType = GDT_Unknown; |
6248 | |
|
6249 | 0 | void NotifyFinished() |
6250 | 0 | { |
6251 | 0 | std::lock_guard guard(mutex); |
6252 | 0 | bFinished = true; |
6253 | 0 | cv.notify_one(); |
6254 | 0 | } |
6255 | |
|
6256 | 0 | bool IsFinished() |
6257 | 0 | { |
6258 | 0 | std::lock_guard guard(mutex); |
6259 | 0 | return bFinished; |
6260 | 0 | } |
6261 | |
|
6262 | 0 | void WaitFinished() |
6263 | 0 | { |
6264 | 0 | std::unique_lock oGuard(mutex); |
6265 | 0 | while (!bFinished) |
6266 | 0 | { |
6267 | 0 | cv.wait(oGuard); |
6268 | 0 | } |
6269 | 0 | } |
6270 | |
|
6271 | 0 | private: |
6272 | | // Synchronization |
6273 | 0 | bool bFinished = false; |
6274 | 0 | std::mutex mutex{}; |
6275 | 0 | std::condition_variable cv{}; |
6276 | 0 | }; |
6277 | | |
6278 | | // Thread function to resample |
6279 | 0 | const auto JobResampleFunc = [](void *pData) |
6280 | 0 | { |
6281 | 0 | OvrJob *poJob = static_cast<OvrJob *>(pData); |
6282 | |
|
6283 | 0 | poJob->eErr = poJob->pfnResampleFn(poJob->args, poJob->pChunk, |
6284 | 0 | &(poJob->pDstBuffer), |
6285 | 0 | &(poJob->eDstBufferDataType)); |
6286 | |
|
6287 | 0 | poJob->oDstBufferHolder.reset(new PointerHolder(poJob->pDstBuffer)); |
6288 | |
|
6289 | 0 | poJob->NotifyFinished(); |
6290 | 0 | }; |
6291 | | |
6292 | | // Function to write resample data to target band |
6293 | 0 | const auto WriteJobData = [](const OvrJob *poJob) |
6294 | 0 | { |
6295 | 0 | return poJob->poDstBand->RasterIO( |
6296 | 0 | GF_Write, poJob->args.nDstXOff, poJob->args.nDstYOff, |
6297 | 0 | poJob->args.nDstXOff2 - poJob->args.nDstXOff, |
6298 | 0 | poJob->args.nDstYOff2 - poJob->args.nDstYOff, poJob->pDstBuffer, |
6299 | 0 | poJob->args.nDstXOff2 - poJob->args.nDstXOff, |
6300 | 0 | poJob->args.nDstYOff2 - poJob->args.nDstYOff, |
6301 | 0 | poJob->eDstBufferDataType, 0, 0, nullptr); |
6302 | 0 | }; |
6303 | | |
6304 | | // Wait for completion of oldest job and serialize it |
6305 | 0 | const auto WaitAndFinalizeOldestJob = |
6306 | 0 | [WriteJobData](std::list<std::unique_ptr<OvrJob>> &jobList) |
6307 | 0 | { |
6308 | 0 | auto poOldestJob = jobList.front().get(); |
6309 | 0 | poOldestJob->WaitFinished(); |
6310 | 0 | CPLErr l_eErr = poOldestJob->eErr; |
6311 | 0 | if (l_eErr == CE_None) |
6312 | 0 | { |
6313 | 0 | l_eErr = WriteJobData(poOldestJob); |
6314 | 0 | } |
6315 | |
|
6316 | 0 | jobList.pop_front(); |
6317 | 0 | return l_eErr; |
6318 | 0 | }; |
6319 | | |
6320 | | // Queue of jobs |
6321 | 0 | std::list<std::unique_ptr<OvrJob>> jobList; |
6322 | |
|
6323 | 0 | std::vector<std::unique_ptr<void, VSIFreeReleaser>> apaChunk(nBands); |
6324 | 0 | std::vector<std::unique_ptr<GByte, VSIFreeReleaser>> |
6325 | 0 | apabyChunkNoDataMask(nBands); |
6326 | | |
6327 | | // Iterate on destination overview, block by block. |
6328 | 0 | for (int nDstYOff = nDstYOffStart; |
6329 | 0 | nDstYOff < nDstYOffEnd && eErr == CE_None; |
6330 | 0 | nDstYOff += nDstChunkYSize) |
6331 | 0 | { |
6332 | 0 | int nDstYCount; |
6333 | 0 | if (nDstYOff + nDstChunkYSize <= nDstYOffEnd) |
6334 | 0 | nDstYCount = nDstChunkYSize; |
6335 | 0 | else |
6336 | 0 | nDstYCount = nDstYOffEnd - nDstYOff; |
6337 | |
|
6338 | 0 | int nChunkYOff = static_cast<int>(nDstYOff * dfYRatioDstToSrc); |
6339 | 0 | int nChunkYOff2 = static_cast<int>( |
6340 | 0 | ceil((nDstYOff + nDstYCount) * dfYRatioDstToSrc)); |
6341 | 0 | if (nChunkYOff2 > nSrcHeight || |
6342 | 0 | nDstYOff + nDstYCount == nDstTotalHeight) |
6343 | 0 | nChunkYOff2 = nSrcHeight; |
6344 | 0 | int nYCount = nChunkYOff2 - nChunkYOff; |
6345 | 0 | CPLAssert(nYCount <= nFullResYChunk); |
6346 | | |
6347 | 0 | int nChunkYOffQueried = nChunkYOff - nKernelRadius * nOvrFactor; |
6348 | 0 | int nChunkYSizeQueried = |
6349 | 0 | nYCount + RADIUS_TO_DIAMETER * nKernelRadius * nOvrFactor; |
6350 | 0 | if (nChunkYOffQueried < 0) |
6351 | 0 | { |
6352 | 0 | nChunkYSizeQueried += nChunkYOffQueried; |
6353 | 0 | nChunkYOffQueried = 0; |
6354 | 0 | } |
6355 | 0 | if (nChunkYSizeQueried + nChunkYOffQueried > nSrcHeight) |
6356 | 0 | nChunkYSizeQueried = nSrcHeight - nChunkYOffQueried; |
6357 | 0 | CPLAssert(nChunkYSizeQueried <= nFullResYChunkQueried); |
6358 | | |
6359 | 0 | if (!pfnProgress(std::min(1.0, dfCurPixelCount / dfTotalPixelCount), |
6360 | 0 | nullptr, pProgressData)) |
6361 | 0 | { |
6362 | 0 | CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated"); |
6363 | 0 | eErr = CE_Failure; |
6364 | 0 | } |
6365 | | |
6366 | | // Iterate on destination overview, block by block. |
6367 | 0 | for (int nDstXOff = nDstXOffStart; |
6368 | 0 | nDstXOff < nDstXOffEnd && eErr == CE_None; |
6369 | 0 | nDstXOff += nDstChunkXSize) |
6370 | 0 | { |
6371 | 0 | int nDstXCount = 0; |
6372 | 0 | if (nDstXOff + nDstChunkXSize <= nDstXOffEnd) |
6373 | 0 | nDstXCount = nDstChunkXSize; |
6374 | 0 | else |
6375 | 0 | nDstXCount = nDstXOffEnd - nDstXOff; |
6376 | |
|
6377 | 0 | dfCurPixelCount += static_cast<double>(nDstXCount) * nDstYCount; |
6378 | |
|
6379 | 0 | int nChunkXOff = static_cast<int>(nDstXOff * dfXRatioDstToSrc); |
6380 | 0 | int nChunkXOff2 = static_cast<int>( |
6381 | 0 | ceil((nDstXOff + nDstXCount) * dfXRatioDstToSrc)); |
6382 | 0 | if (nChunkXOff2 > nSrcWidth || |
6383 | 0 | nDstXOff + nDstXCount == nDstTotalWidth) |
6384 | 0 | nChunkXOff2 = nSrcWidth; |
6385 | 0 | const int nXCount = nChunkXOff2 - nChunkXOff; |
6386 | 0 | CPLAssert(nXCount <= nFullResXChunk); |
6387 | | |
6388 | 0 | int nChunkXOffQueried = nChunkXOff - nKernelRadius * nOvrFactor; |
6389 | 0 | int nChunkXSizeQueried = |
6390 | 0 | nXCount + RADIUS_TO_DIAMETER * nKernelRadius * nOvrFactor; |
6391 | 0 | if (nChunkXOffQueried < 0) |
6392 | 0 | { |
6393 | 0 | nChunkXSizeQueried += nChunkXOffQueried; |
6394 | 0 | nChunkXOffQueried = 0; |
6395 | 0 | } |
6396 | 0 | if (nChunkXSizeQueried + nChunkXOffQueried > nSrcWidth) |
6397 | 0 | nChunkXSizeQueried = nSrcWidth - nChunkXOffQueried; |
6398 | 0 | CPLAssert(nChunkXSizeQueried <= nFullResXChunkQueried); |
6399 | | #if DEBUG_VERBOSE |
6400 | | CPLDebug("GDAL", |
6401 | | "Reading (%dx%d -> %dx%d) for output (%dx%d -> %dx%d)", |
6402 | | nChunkXOffQueried, nChunkYOffQueried, |
6403 | | nChunkXSizeQueried, nChunkYSizeQueried, nDstXOff, |
6404 | | nDstYOff, nDstXCount, nDstYCount); |
6405 | | #endif |
6406 | | |
6407 | | // Avoid accumulating too many tasks and exhaust RAM |
6408 | | |
6409 | | // Try to complete already finished jobs |
6410 | 0 | while (eErr == CE_None && !jobList.empty()) |
6411 | 0 | { |
6412 | 0 | auto poOldestJob = jobList.front().get(); |
6413 | 0 | if (!poOldestJob->IsFinished()) |
6414 | 0 | break; |
6415 | 0 | eErr = poOldestJob->eErr; |
6416 | 0 | if (eErr == CE_None) |
6417 | 0 | { |
6418 | 0 | eErr = WriteJobData(poOldestJob); |
6419 | 0 | } |
6420 | |
|
6421 | 0 | jobList.pop_front(); |
6422 | 0 | } |
6423 | | |
6424 | | // And in case we have saturated the number of threads, |
6425 | | // wait for completion of tasks to go below the threshold. |
6426 | 0 | while (eErr == CE_None && |
6427 | 0 | jobList.size() >= static_cast<size_t>(nThreads)) |
6428 | 0 | { |
6429 | 0 | eErr = WaitAndFinalizeOldestJob(jobList); |
6430 | 0 | } |
6431 | | |
6432 | | // Read the source buffers for all the bands. |
6433 | 0 | for (int iBand = 0; iBand < nBands && eErr == CE_None; ++iBand) |
6434 | 0 | { |
6435 | | // (Re)allocate buffers if needed |
6436 | 0 | if (apaChunk[iBand] == nullptr) |
6437 | 0 | { |
6438 | 0 | apaChunk[iBand].reset(VSI_MALLOC3_VERBOSE( |
6439 | 0 | nFullResXChunkQueried, nFullResYChunkQueried, |
6440 | 0 | nWrkDataTypeSize)); |
6441 | 0 | if (apaChunk[iBand] == nullptr) |
6442 | 0 | { |
6443 | 0 | eErr = CE_Failure; |
6444 | 0 | } |
6445 | 0 | } |
6446 | 0 | if (bUseNoDataMask && |
6447 | 0 | apabyChunkNoDataMask[iBand] == nullptr) |
6448 | 0 | { |
6449 | 0 | apabyChunkNoDataMask[iBand].reset( |
6450 | 0 | static_cast<GByte *>(VSI_MALLOC2_VERBOSE( |
6451 | 0 | nFullResXChunkQueried, nFullResYChunkQueried))); |
6452 | 0 | if (apabyChunkNoDataMask[iBand] == nullptr) |
6453 | 0 | { |
6454 | 0 | eErr = CE_Failure; |
6455 | 0 | } |
6456 | 0 | } |
6457 | |
|
6458 | 0 | if (eErr == CE_None) |
6459 | 0 | { |
6460 | 0 | GDALRasterBand *poSrcBand = nullptr; |
6461 | 0 | if (iSrcOverview == -1) |
6462 | 0 | poSrcBand = papoSrcBands[iBand]; |
6463 | 0 | else |
6464 | 0 | poSrcBand = |
6465 | 0 | papapoOverviewBands[iBand][iSrcOverview]; |
6466 | 0 | eErr = poSrcBand->RasterIO( |
6467 | 0 | GF_Read, nChunkXOffQueried, nChunkYOffQueried, |
6468 | 0 | nChunkXSizeQueried, nChunkYSizeQueried, |
6469 | 0 | apaChunk[iBand].get(), nChunkXSizeQueried, |
6470 | 0 | nChunkYSizeQueried, eWrkDataType, 0, 0, nullptr); |
6471 | |
|
6472 | 0 | if (bUseNoDataMask && eErr == CE_None) |
6473 | 0 | { |
6474 | 0 | auto poMaskBand = poSrcBand->IsMaskBand() |
6475 | 0 | ? poSrcBand |
6476 | 0 | : poSrcBand->GetMaskBand(); |
6477 | 0 | eErr = poMaskBand->RasterIO( |
6478 | 0 | GF_Read, nChunkXOffQueried, nChunkYOffQueried, |
6479 | 0 | nChunkXSizeQueried, nChunkYSizeQueried, |
6480 | 0 | apabyChunkNoDataMask[iBand].get(), |
6481 | 0 | nChunkXSizeQueried, nChunkYSizeQueried, |
6482 | 0 | GDT_Byte, 0, 0, nullptr); |
6483 | 0 | } |
6484 | 0 | } |
6485 | 0 | } |
6486 | | |
6487 | | // Compute the resulting overview block. |
6488 | 0 | for (int iBand = 0; iBand < nBands && eErr == CE_None; ++iBand) |
6489 | 0 | { |
6490 | 0 | auto poJob = std::make_unique<OvrJob>(); |
6491 | 0 | poJob->pfnResampleFn = pfnResampleFn; |
6492 | 0 | poJob->poDstBand = papapoOverviewBands[iBand][iOverview]; |
6493 | 0 | poJob->args.eOvrDataType = |
6494 | 0 | poJob->poDstBand->GetRasterDataType(); |
6495 | 0 | poJob->args.nOvrXSize = poJob->poDstBand->GetXSize(); |
6496 | 0 | poJob->args.nOvrYSize = poJob->poDstBand->GetYSize(); |
6497 | 0 | const char *pszNBITS = poJob->poDstBand->GetMetadataItem( |
6498 | 0 | "NBITS", "IMAGE_STRUCTURE"); |
6499 | 0 | poJob->args.nOvrNBITS = pszNBITS ? atoi(pszNBITS) : 0; |
6500 | 0 | poJob->args.dfXRatioDstToSrc = dfXRatioDstToSrc; |
6501 | 0 | poJob->args.dfYRatioDstToSrc = dfYRatioDstToSrc; |
6502 | 0 | poJob->args.eWrkDataType = eWrkDataType; |
6503 | 0 | poJob->pChunk = apaChunk[iBand].get(); |
6504 | 0 | poJob->args.pabyChunkNodataMask = |
6505 | 0 | apabyChunkNoDataMask[iBand].get(); |
6506 | 0 | poJob->args.nChunkXOff = nChunkXOffQueried; |
6507 | 0 | poJob->args.nChunkXSize = nChunkXSizeQueried; |
6508 | 0 | poJob->args.nChunkYOff = nChunkYOffQueried; |
6509 | 0 | poJob->args.nChunkYSize = nChunkYSizeQueried; |
6510 | 0 | poJob->args.nDstXOff = nDstXOff; |
6511 | 0 | poJob->args.nDstXOff2 = nDstXOff + nDstXCount; |
6512 | 0 | poJob->args.nDstYOff = nDstYOff; |
6513 | 0 | poJob->args.nDstYOff2 = nDstYOff + nDstYCount; |
6514 | 0 | poJob->args.pszResampling = pszResampling; |
6515 | 0 | poJob->args.bHasNoData = abHasNoData[iBand]; |
6516 | 0 | poJob->args.dfNoDataValue = adfNoDataValue[iBand]; |
6517 | 0 | poJob->args.eSrcDataType = eDataType; |
6518 | 0 | poJob->args.bPropagateNoData = bPropagateNoData; |
6519 | |
|
6520 | 0 | if (poJobQueue) |
6521 | 0 | { |
6522 | 0 | poJob->oSrcMaskBufferHolder.reset(new PointerHolder( |
6523 | 0 | apabyChunkNoDataMask[iBand].release())); |
6524 | |
|
6525 | 0 | poJob->oSrcBufferHolder.reset( |
6526 | 0 | new PointerHolder(apaChunk[iBand].release())); |
6527 | |
|
6528 | 0 | poJobQueue->SubmitJob(JobResampleFunc, poJob.get()); |
6529 | 0 | jobList.emplace_back(std::move(poJob)); |
6530 | 0 | } |
6531 | 0 | else |
6532 | 0 | { |
6533 | 0 | JobResampleFunc(poJob.get()); |
6534 | 0 | eErr = poJob->eErr; |
6535 | 0 | if (eErr == CE_None) |
6536 | 0 | { |
6537 | 0 | eErr = WriteJobData(poJob.get()); |
6538 | 0 | } |
6539 | 0 | } |
6540 | 0 | } |
6541 | 0 | } |
6542 | 0 | } |
6543 | | |
6544 | | // Wait for all pending jobs to complete |
6545 | 0 | while (!jobList.empty()) |
6546 | 0 | { |
6547 | 0 | const auto l_eErr = WaitAndFinalizeOldestJob(jobList); |
6548 | 0 | if (l_eErr != CE_None && eErr == CE_None) |
6549 | 0 | eErr = l_eErr; |
6550 | 0 | } |
6551 | | |
6552 | | // Flush the data to overviews. |
6553 | 0 | for (int iBand = 0; iBand < nBands; ++iBand) |
6554 | 0 | { |
6555 | 0 | if (papapoOverviewBands[iBand][iOverview]->FlushCache(false) != |
6556 | 0 | CE_None) |
6557 | 0 | eErr = CE_Failure; |
6558 | 0 | } |
6559 | 0 | } |
6560 | | |
6561 | 0 | if (eErr == CE_None) |
6562 | 0 | pfnProgress(1.0, nullptr, pProgressData); |
6563 | |
|
6564 | 0 | return eErr; |
6565 | 0 | } |
6566 | | |
6567 | | /************************************************************************/ |
6568 | | /* GDALRegenerateOverviewsMultiBand() */ |
6569 | | /************************************************************************/ |
6570 | | |
6571 | | /** |
6572 | | * \brief Variant of GDALRegenerateOverviews, specially dedicated for generating |
6573 | | * compressed pixel-interleaved overviews (JPEG-IN-TIFF for example) |
6574 | | * |
6575 | | * This function will generate one or more overview images from a base |
6576 | | * image using the requested downsampling algorithm. Its primary use |
6577 | | * is for generating overviews via GDALDataset::BuildOverviews(), but it |
6578 | | * can also be used to generate downsampled images in one file from another |
6579 | | * outside the overview architecture. |
6580 | | * |
6581 | | * The output bands need to exist in advance and share the same characteristics |
6582 | | * (type, dimensions) |
6583 | | * |
6584 | | * The resampling algorithms supported for the moment are "NEAREST", "AVERAGE", |
6585 | | * "RMS", "GAUSS", "CUBIC", "CUBICSPLINE", "LANCZOS" and "BILINEAR" |
6586 | | * |
6587 | | * It does not support color tables or complex data types. |
6588 | | * |
6589 | | * The pseudo-algorithm used by the function is : |
6590 | | * for each overview |
6591 | | * iterate on lines of the source by a step of deltay |
6592 | | * iterate on columns of the source by a step of deltax |
6593 | | * read the source data of size deltax * deltay for all the bands |
6594 | | * generate the corresponding overview block for all the bands |
6595 | | * |
6596 | | * This function will honour properly NODATA_VALUES tuples (special dataset |
6597 | | * metadata) so that only a given RGB triplet (in case of a RGB image) will be |
6598 | | * considered as the nodata value and not each value of the triplet |
6599 | | * independently per band. |
6600 | | * |
6601 | | * The GDAL_NUM_THREADS configuration option can be set |
6602 | | * to "ALL_CPUS" or a integer value to specify the number of threads to use for |
6603 | | * overview computation. |
6604 | | * |
6605 | | * @param apoSrcBands the list of source bands to downsample |
6606 | | * @param aapoOverviewBands bidimension array of bands. First dimension is |
6607 | | * indexed by bands. Second dimension is indexed by |
6608 | | * overview levels. All aapoOverviewBands[i] arrays |
6609 | | * must have the same size (i.e. same number of |
6610 | | * overviews) |
6611 | | * @param pszResampling Resampling algorithm ("NEAREST", "AVERAGE", "RMS", |
6612 | | * "GAUSS", "CUBIC", "CUBICSPLINE", "LANCZOS" or "BILINEAR"). |
6613 | | * @param pfnProgress progress report function. |
6614 | | * @param pProgressData progress function callback data. |
6615 | | * @param papszOptions NULL terminated list of options as |
6616 | | * key=value pairs, or NULL |
6617 | | * The XOFF, YOFF, XSIZE and YSIZE |
6618 | | * options can be specified to express that overviews should |
6619 | | * be regenerated only in the specified subset of the source |
6620 | | * dataset. |
6621 | | * @return CE_None on success or CE_Failure on failure. |
6622 | | * @since 3.10 |
6623 | | */ |
6624 | | |
6625 | | CPLErr GDALRegenerateOverviewsMultiBand( |
6626 | | const std::vector<GDALRasterBand *> &apoSrcBands, |
6627 | | const std::vector<std::vector<GDALRasterBand *>> &aapoOverviewBands, |
6628 | | const char *pszResampling, GDALProgressFunc pfnProgress, |
6629 | | void *pProgressData, CSLConstList papszOptions) |
6630 | 0 | { |
6631 | 0 | CPLAssert(apoSrcBands.size() == aapoOverviewBands.size()); |
6632 | 0 | for (size_t i = 1; i < aapoOverviewBands.size(); ++i) |
6633 | 0 | { |
6634 | 0 | CPLAssert(aapoOverviewBands[i].size() == aapoOverviewBands[0].size()); |
6635 | 0 | } |
6636 | | |
6637 | 0 | if (aapoOverviewBands.empty()) |
6638 | 0 | return CE_None; |
6639 | | |
6640 | 0 | std::vector<GDALRasterBand **> apapoOverviewBands; |
6641 | 0 | for (auto &apoOverviewBands : aapoOverviewBands) |
6642 | 0 | { |
6643 | 0 | auto papoOverviewBands = static_cast<GDALRasterBand **>( |
6644 | 0 | CPLMalloc(apoOverviewBands.size() * sizeof(GDALRasterBand *))); |
6645 | 0 | for (size_t i = 0; i < apoOverviewBands.size(); ++i) |
6646 | 0 | { |
6647 | 0 | papoOverviewBands[i] = apoOverviewBands[i]; |
6648 | 0 | } |
6649 | 0 | apapoOverviewBands.push_back(papoOverviewBands); |
6650 | 0 | } |
6651 | 0 | const CPLErr eErr = GDALRegenerateOverviewsMultiBand( |
6652 | 0 | static_cast<int>(apoSrcBands.size()), apoSrcBands.data(), |
6653 | 0 | static_cast<int>(aapoOverviewBands[0].size()), |
6654 | 0 | apapoOverviewBands.data(), pszResampling, pfnProgress, pProgressData, |
6655 | 0 | papszOptions); |
6656 | 0 | for (GDALRasterBand **papoOverviewBands : apapoOverviewBands) |
6657 | 0 | CPLFree(papoOverviewBands); |
6658 | 0 | return eErr; |
6659 | 0 | } |
6660 | | |
6661 | | /************************************************************************/ |
6662 | | /* GDALComputeBandStats() */ |
6663 | | /************************************************************************/ |
6664 | | |
6665 | | /** Undocumented |
6666 | | * @param hSrcBand undocumented. |
6667 | | * @param nSampleStep Step between scanlines used to compute statistics. |
6668 | | * When nSampleStep is equal to 1, all scanlines will |
6669 | | * be processed. |
6670 | | * @param pdfMean undocumented. |
6671 | | * @param pdfStdDev undocumented. |
6672 | | * @param pfnProgress undocumented. |
6673 | | * @param pProgressData undocumented. |
6674 | | * @return undocumented |
6675 | | */ |
6676 | | CPLErr CPL_STDCALL GDALComputeBandStats(GDALRasterBandH hSrcBand, |
6677 | | int nSampleStep, double *pdfMean, |
6678 | | double *pdfStdDev, |
6679 | | GDALProgressFunc pfnProgress, |
6680 | | void *pProgressData) |
6681 | | |
6682 | 0 | { |
6683 | 0 | VALIDATE_POINTER1(hSrcBand, "GDALComputeBandStats", CE_Failure); |
6684 | | |
6685 | 0 | GDALRasterBand *poSrcBand = GDALRasterBand::FromHandle(hSrcBand); |
6686 | |
|
6687 | 0 | if (pfnProgress == nullptr) |
6688 | 0 | pfnProgress = GDALDummyProgress; |
6689 | |
|
6690 | 0 | const int nWidth = poSrcBand->GetXSize(); |
6691 | 0 | const int nHeight = poSrcBand->GetYSize(); |
6692 | |
|
6693 | 0 | if (nSampleStep >= nHeight || nSampleStep < 1) |
6694 | 0 | nSampleStep = 1; |
6695 | |
|
6696 | 0 | GDALDataType eWrkType = GDT_Unknown; |
6697 | 0 | float *pafData = nullptr; |
6698 | 0 | GDALDataType eType = poSrcBand->GetRasterDataType(); |
6699 | 0 | const bool bComplex = CPL_TO_BOOL(GDALDataTypeIsComplex(eType)); |
6700 | 0 | if (bComplex) |
6701 | 0 | { |
6702 | 0 | pafData = static_cast<float *>( |
6703 | 0 | VSI_MALLOC2_VERBOSE(nWidth, 2 * sizeof(float))); |
6704 | 0 | eWrkType = GDT_CFloat32; |
6705 | 0 | } |
6706 | 0 | else |
6707 | 0 | { |
6708 | 0 | pafData = |
6709 | 0 | static_cast<float *>(VSI_MALLOC2_VERBOSE(nWidth, sizeof(float))); |
6710 | 0 | eWrkType = GDT_Float32; |
6711 | 0 | } |
6712 | |
|
6713 | 0 | if (nWidth == 0 || pafData == nullptr) |
6714 | 0 | { |
6715 | 0 | VSIFree(pafData); |
6716 | 0 | return CE_Failure; |
6717 | 0 | } |
6718 | | |
6719 | | /* -------------------------------------------------------------------- */ |
6720 | | /* Loop over all sample lines. */ |
6721 | | /* -------------------------------------------------------------------- */ |
6722 | 0 | double dfSum = 0.0; |
6723 | 0 | double dfSum2 = 0.0; |
6724 | 0 | int iLine = 0; |
6725 | 0 | GIntBig nSamples = 0; |
6726 | |
|
6727 | 0 | do |
6728 | 0 | { |
6729 | 0 | if (!pfnProgress(iLine / static_cast<double>(nHeight), nullptr, |
6730 | 0 | pProgressData)) |
6731 | 0 | { |
6732 | 0 | CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated"); |
6733 | 0 | CPLFree(pafData); |
6734 | 0 | return CE_Failure; |
6735 | 0 | } |
6736 | | |
6737 | 0 | const CPLErr eErr = |
6738 | 0 | poSrcBand->RasterIO(GF_Read, 0, iLine, nWidth, 1, pafData, nWidth, |
6739 | 0 | 1, eWrkType, 0, 0, nullptr); |
6740 | 0 | if (eErr != CE_None) |
6741 | 0 | { |
6742 | 0 | CPLFree(pafData); |
6743 | 0 | return eErr; |
6744 | 0 | } |
6745 | | |
6746 | 0 | for (int iPixel = 0; iPixel < nWidth; ++iPixel) |
6747 | 0 | { |
6748 | 0 | float fValue = 0.0f; |
6749 | |
|
6750 | 0 | if (bComplex) |
6751 | 0 | { |
6752 | | // Compute the magnitude of the complex value. |
6753 | 0 | fValue = |
6754 | 0 | std::hypot(pafData[static_cast<size_t>(iPixel) * 2], |
6755 | 0 | pafData[static_cast<size_t>(iPixel) * 2 + 1]); |
6756 | 0 | } |
6757 | 0 | else |
6758 | 0 | { |
6759 | 0 | fValue = pafData[iPixel]; |
6760 | 0 | } |
6761 | |
|
6762 | 0 | dfSum += static_cast<double>(fValue); |
6763 | 0 | dfSum2 += static_cast<double>(fValue) * static_cast<double>(fValue); |
6764 | 0 | } |
6765 | |
|
6766 | 0 | nSamples += nWidth; |
6767 | 0 | iLine += nSampleStep; |
6768 | 0 | } while (iLine < nHeight); |
6769 | | |
6770 | 0 | if (!pfnProgress(1.0, nullptr, pProgressData)) |
6771 | 0 | { |
6772 | 0 | CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated"); |
6773 | 0 | CPLFree(pafData); |
6774 | 0 | return CE_Failure; |
6775 | 0 | } |
6776 | | |
6777 | | /* -------------------------------------------------------------------- */ |
6778 | | /* Produce the result values. */ |
6779 | | /* -------------------------------------------------------------------- */ |
6780 | 0 | if (pdfMean != nullptr) |
6781 | 0 | *pdfMean = dfSum / nSamples; |
6782 | |
|
6783 | 0 | if (pdfStdDev != nullptr) |
6784 | 0 | { |
6785 | 0 | const double dfMean = dfSum / nSamples; |
6786 | |
|
6787 | 0 | *pdfStdDev = sqrt((dfSum2 / nSamples) - (dfMean * dfMean)); |
6788 | 0 | } |
6789 | |
|
6790 | 0 | CPLFree(pafData); |
6791 | |
|
6792 | 0 | return CE_None; |
6793 | 0 | } |
6794 | | |
6795 | | /************************************************************************/ |
6796 | | /* GDALOverviewMagnitudeCorrection() */ |
6797 | | /* */ |
6798 | | /* Correct the mean and standard deviation of the overviews of */ |
6799 | | /* the given band to match the base layer approximately. */ |
6800 | | /************************************************************************/ |
6801 | | |
6802 | | /** Undocumented |
6803 | | * @param hBaseBand undocumented. |
6804 | | * @param nOverviewCount undocumented. |
6805 | | * @param pahOverviews undocumented. |
6806 | | * @param pfnProgress undocumented. |
6807 | | * @param pProgressData undocumented. |
6808 | | * @return undocumented |
6809 | | */ |
6810 | | CPLErr GDALOverviewMagnitudeCorrection(GDALRasterBandH hBaseBand, |
6811 | | int nOverviewCount, |
6812 | | GDALRasterBandH *pahOverviews, |
6813 | | GDALProgressFunc pfnProgress, |
6814 | | void *pProgressData) |
6815 | | |
6816 | 0 | { |
6817 | 0 | VALIDATE_POINTER1(hBaseBand, "GDALOverviewMagnitudeCorrection", CE_Failure); |
6818 | | |
6819 | | /* -------------------------------------------------------------------- */ |
6820 | | /* Compute mean/stddev for source raster. */ |
6821 | | /* -------------------------------------------------------------------- */ |
6822 | 0 | double dfOrigMean = 0.0; |
6823 | 0 | double dfOrigStdDev = 0.0; |
6824 | 0 | { |
6825 | 0 | const CPLErr eErr = |
6826 | 0 | GDALComputeBandStats(hBaseBand, 2, &dfOrigMean, &dfOrigStdDev, |
6827 | 0 | pfnProgress, pProgressData); |
6828 | |
|
6829 | 0 | if (eErr != CE_None) |
6830 | 0 | return eErr; |
6831 | 0 | } |
6832 | | |
6833 | | /* -------------------------------------------------------------------- */ |
6834 | | /* Loop on overview bands. */ |
6835 | | /* -------------------------------------------------------------------- */ |
6836 | 0 | for (int iOverview = 0; iOverview < nOverviewCount; ++iOverview) |
6837 | 0 | { |
6838 | 0 | GDALRasterBand *poOverview = |
6839 | 0 | GDALRasterBand::FromHandle(pahOverviews[iOverview]); |
6840 | 0 | double dfOverviewMean, dfOverviewStdDev; |
6841 | |
|
6842 | 0 | const CPLErr eErr = |
6843 | 0 | GDALComputeBandStats(pahOverviews[iOverview], 1, &dfOverviewMean, |
6844 | 0 | &dfOverviewStdDev, pfnProgress, pProgressData); |
6845 | |
|
6846 | 0 | if (eErr != CE_None) |
6847 | 0 | return eErr; |
6848 | | |
6849 | 0 | double dfGain = 1.0; |
6850 | 0 | if (dfOrigStdDev >= 0.0001) |
6851 | 0 | dfGain = dfOrigStdDev / dfOverviewStdDev; |
6852 | | |
6853 | | /* -------------------------------------------------------------------- |
6854 | | */ |
6855 | | /* Apply gain and offset. */ |
6856 | | /* -------------------------------------------------------------------- |
6857 | | */ |
6858 | 0 | const int nWidth = poOverview->GetXSize(); |
6859 | 0 | const int nHeight = poOverview->GetYSize(); |
6860 | |
|
6861 | 0 | GDALDataType eWrkType = GDT_Unknown; |
6862 | 0 | float *pafData = nullptr; |
6863 | 0 | const GDALDataType eType = poOverview->GetRasterDataType(); |
6864 | 0 | const bool bComplex = CPL_TO_BOOL(GDALDataTypeIsComplex(eType)); |
6865 | 0 | if (bComplex) |
6866 | 0 | { |
6867 | 0 | pafData = static_cast<float *>( |
6868 | 0 | VSI_MALLOC2_VERBOSE(nWidth, 2 * sizeof(float))); |
6869 | 0 | eWrkType = GDT_CFloat32; |
6870 | 0 | } |
6871 | 0 | else |
6872 | 0 | { |
6873 | 0 | pafData = static_cast<float *>( |
6874 | 0 | VSI_MALLOC2_VERBOSE(nWidth, sizeof(float))); |
6875 | 0 | eWrkType = GDT_Float32; |
6876 | 0 | } |
6877 | |
|
6878 | 0 | if (pafData == nullptr) |
6879 | 0 | { |
6880 | 0 | return CE_Failure; |
6881 | 0 | } |
6882 | | |
6883 | 0 | for (int iLine = 0; iLine < nHeight; ++iLine) |
6884 | 0 | { |
6885 | 0 | if (!pfnProgress(iLine / static_cast<double>(nHeight), nullptr, |
6886 | 0 | pProgressData)) |
6887 | 0 | { |
6888 | 0 | CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated"); |
6889 | 0 | CPLFree(pafData); |
6890 | 0 | return CE_Failure; |
6891 | 0 | } |
6892 | | |
6893 | 0 | if (poOverview->RasterIO(GF_Read, 0, iLine, nWidth, 1, pafData, |
6894 | 0 | nWidth, 1, eWrkType, 0, 0, |
6895 | 0 | nullptr) != CE_None) |
6896 | 0 | { |
6897 | 0 | CPLFree(pafData); |
6898 | 0 | return CE_Failure; |
6899 | 0 | } |
6900 | | |
6901 | 0 | for (int iPixel = 0; iPixel < nWidth; ++iPixel) |
6902 | 0 | { |
6903 | 0 | if (bComplex) |
6904 | 0 | { |
6905 | 0 | pafData[static_cast<size_t>(iPixel) * 2] *= |
6906 | 0 | static_cast<float>(dfGain); |
6907 | 0 | pafData[static_cast<size_t>(iPixel) * 2 + 1] *= |
6908 | 0 | static_cast<float>(dfGain); |
6909 | 0 | } |
6910 | 0 | else |
6911 | 0 | { |
6912 | 0 | pafData[iPixel] = static_cast<float>( |
6913 | 0 | (double(pafData[iPixel]) - dfOverviewMean) * dfGain + |
6914 | 0 | dfOrigMean); |
6915 | 0 | } |
6916 | 0 | } |
6917 | |
|
6918 | 0 | if (poOverview->RasterIO(GF_Write, 0, iLine, nWidth, 1, pafData, |
6919 | 0 | nWidth, 1, eWrkType, 0, 0, |
6920 | 0 | nullptr) != CE_None) |
6921 | 0 | { |
6922 | 0 | CPLFree(pafData); |
6923 | 0 | return CE_Failure; |
6924 | 0 | } |
6925 | 0 | } |
6926 | | |
6927 | 0 | if (!pfnProgress(1.0, nullptr, pProgressData)) |
6928 | 0 | { |
6929 | 0 | CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated"); |
6930 | 0 | CPLFree(pafData); |
6931 | 0 | return CE_Failure; |
6932 | 0 | } |
6933 | | |
6934 | 0 | CPLFree(pafData); |
6935 | 0 | } |
6936 | | |
6937 | 0 | return CE_None; |
6938 | 0 | } |