/src/gdal/gcore/overview.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | |
2 | | /****************************************************************************** |
3 | | * |
4 | | * Project: GDAL Core |
5 | | * Purpose: Helper code to implement overview support in different drivers. |
6 | | * Author: Frank Warmerdam, warmerdam@pobox.com |
7 | | * |
8 | | ****************************************************************************** |
9 | | * Copyright (c) 2000, Frank Warmerdam |
10 | | * Copyright (c) 2007-2010, Even Rouault <even dot rouault at spatialys.com> |
11 | | * |
12 | | * SPDX-License-Identifier: MIT |
13 | | ****************************************************************************/ |
14 | | |
15 | | #include "cpl_port.h" |
16 | | #include "gdal_priv.h" |
17 | | |
18 | | #include <cmath> |
19 | | #include <cstddef> |
20 | | #include <cstdlib> |
21 | | |
22 | | #include <algorithm> |
23 | | #include <complex> |
24 | | #include <condition_variable> |
25 | | #include <limits> |
26 | | #include <list> |
27 | | #include <memory> |
28 | | #include <mutex> |
29 | | #include <vector> |
30 | | |
31 | | #include "cpl_conv.h" |
32 | | #include "cpl_error.h" |
33 | | #include "cpl_float.h" |
34 | | #include "cpl_progress.h" |
35 | | #include "cpl_vsi.h" |
36 | | #include "gdal.h" |
37 | | #include "gdal_thread_pool.h" |
38 | | #include "gdalwarper.h" |
39 | | #include "gdal_vrt.h" |
40 | | #include "vrtdataset.h" |
41 | | |
42 | | #ifdef USE_NEON_OPTIMIZATIONS |
43 | | #include "include_sse2neon.h" |
44 | | #define USE_SSE2 |
45 | | |
46 | | #include "gdalsse_priv.h" |
47 | | |
48 | | // Restrict to 64bit processors because they are guaranteed to have SSE2, |
49 | | // or if __AVX2__ is defined. |
50 | | #elif defined(__x86_64) || defined(_M_X64) || defined(__AVX2__) |
51 | | #define USE_SSE2 |
52 | | |
53 | | #include "gdalsse_priv.h" |
54 | | |
55 | | #ifdef __SSE3__ |
56 | | #include <pmmintrin.h> |
57 | | #endif |
58 | | #ifdef __SSSE3__ |
59 | | #include <tmmintrin.h> |
60 | | #endif |
61 | | #ifdef __SSE4_1__ |
62 | | #include <smmintrin.h> |
63 | | #endif |
64 | | #ifdef __AVX2__ |
65 | | #include <immintrin.h> |
66 | | #endif |
67 | | |
68 | | #endif |
69 | | |
70 | | // To be included after above USE_SSE2 and include gdalsse_priv.h |
71 | | // to avoid build issue on Windows x86 |
72 | | #include "gdal_priv_templates.hpp" |
73 | | |
74 | | /************************************************************************/ |
75 | | /* GDALResampleChunk_Near() */ |
76 | | /************************************************************************/ |
77 | | |
78 | | template <class T> |
79 | | static CPLErr GDALResampleChunk_NearT(const GDALOverviewResampleArgs &args, |
80 | | const T *pChunk, T **ppDstBuffer) |
81 | | |
82 | 0 | { |
83 | 0 | const double dfXRatioDstToSrc = args.dfXRatioDstToSrc; |
84 | 0 | const double dfYRatioDstToSrc = args.dfYRatioDstToSrc; |
85 | 0 | const GDALDataType eWrkDataType = args.eWrkDataType; |
86 | 0 | const int nChunkXOff = args.nChunkXOff; |
87 | 0 | const int nChunkXSize = args.nChunkXSize; |
88 | 0 | const int nChunkYOff = args.nChunkYOff; |
89 | 0 | const int nDstXOff = args.nDstXOff; |
90 | 0 | const int nDstXOff2 = args.nDstXOff2; |
91 | 0 | const int nDstYOff = args.nDstYOff; |
92 | 0 | const int nDstYOff2 = args.nDstYOff2; |
93 | 0 | const int nDstXWidth = nDstXOff2 - nDstXOff; |
94 | | |
95 | | /* -------------------------------------------------------------------- */ |
96 | | /* Allocate buffers. */ |
97 | | /* -------------------------------------------------------------------- */ |
98 | 0 | *ppDstBuffer = static_cast<T *>( |
99 | 0 | VSI_MALLOC3_VERBOSE(nDstXWidth, nDstYOff2 - nDstYOff, |
100 | 0 | GDALGetDataTypeSizeBytes(eWrkDataType))); |
101 | 0 | if (*ppDstBuffer == nullptr) |
102 | 0 | { |
103 | 0 | return CE_Failure; |
104 | 0 | } |
105 | 0 | T *const pDstBuffer = *ppDstBuffer; |
106 | |
|
107 | 0 | int *panSrcXOff = |
108 | 0 | static_cast<int *>(VSI_MALLOC2_VERBOSE(nDstXWidth, sizeof(int))); |
109 | |
|
110 | 0 | if (panSrcXOff == nullptr) |
111 | 0 | { |
112 | 0 | return CE_Failure; |
113 | 0 | } |
114 | | |
115 | | /* ==================================================================== */ |
116 | | /* Precompute inner loop constants. */ |
117 | | /* ==================================================================== */ |
118 | 0 | for (int iDstPixel = nDstXOff; iDstPixel < nDstXOff2; ++iDstPixel) |
119 | 0 | { |
120 | 0 | int nSrcXOff = static_cast<int>(0.5 + iDstPixel * dfXRatioDstToSrc); |
121 | 0 | if (nSrcXOff < nChunkXOff) |
122 | 0 | nSrcXOff = nChunkXOff; |
123 | |
|
124 | 0 | panSrcXOff[iDstPixel - nDstXOff] = nSrcXOff; |
125 | 0 | } |
126 | | |
127 | | /* ==================================================================== */ |
128 | | /* Loop over destination scanlines. */ |
129 | | /* ==================================================================== */ |
130 | 0 | for (int iDstLine = nDstYOff; iDstLine < nDstYOff2; ++iDstLine) |
131 | 0 | { |
132 | 0 | int nSrcYOff = static_cast<int>(0.5 + iDstLine * dfYRatioDstToSrc); |
133 | 0 | if (nSrcYOff < nChunkYOff) |
134 | 0 | nSrcYOff = nChunkYOff; |
135 | |
|
136 | 0 | const T *const pSrcScanline = |
137 | 0 | pChunk + |
138 | 0 | (static_cast<GPtrDiff_t>(nSrcYOff - nChunkYOff) * nChunkXSize) - |
139 | 0 | nChunkXOff; |
140 | | |
141 | | /* -------------------------------------------------------------------- |
142 | | */ |
143 | | /* Loop over destination pixels */ |
144 | | /* -------------------------------------------------------------------- |
145 | | */ |
146 | 0 | T *pDstScanline = pDstBuffer + (iDstLine - nDstYOff) * nDstXWidth; |
147 | 0 | for (int iDstPixel = 0; iDstPixel < nDstXWidth; ++iDstPixel) |
148 | 0 | { |
149 | 0 | pDstScanline[iDstPixel] = pSrcScanline[panSrcXOff[iDstPixel]]; |
150 | 0 | } |
151 | 0 | } |
152 | |
|
153 | 0 | CPLFree(panSrcXOff); |
154 | |
|
155 | 0 | return CE_None; |
156 | 0 | } Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_NearT<unsigned char>(GDALOverviewResampleArgs const&, unsigned char const*, unsigned char**) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_NearT<unsigned short>(GDALOverviewResampleArgs const&, unsigned short const*, unsigned short**) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_NearT<unsigned int>(GDALOverviewResampleArgs const&, unsigned int const*, unsigned int**) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_NearT<unsigned long>(GDALOverviewResampleArgs const&, unsigned long const*, unsigned long**) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_NearT<std::__1::complex<double> >(GDALOverviewResampleArgs const&, std::__1::complex<double> const*, std::__1::complex<double>**) |
157 | | |
158 | | static CPLErr GDALResampleChunk_Near(const GDALOverviewResampleArgs &args, |
159 | | const void *pChunk, void **ppDstBuffer, |
160 | | GDALDataType *peDstBufferDataType) |
161 | 0 | { |
162 | 0 | *peDstBufferDataType = args.eWrkDataType; |
163 | 0 | switch (args.eWrkDataType) |
164 | 0 | { |
165 | | // For nearest resampling, as no computation is done, only the |
166 | | // size of the data type matters. |
167 | 0 | case GDT_Byte: |
168 | 0 | case GDT_Int8: |
169 | 0 | { |
170 | 0 | CPLAssert(GDALGetDataTypeSizeBytes(args.eWrkDataType) == 1); |
171 | 0 | return GDALResampleChunk_NearT( |
172 | 0 | args, static_cast<const uint8_t *>(pChunk), |
173 | 0 | reinterpret_cast<uint8_t **>(ppDstBuffer)); |
174 | 0 | } |
175 | | |
176 | 0 | case GDT_Int16: |
177 | 0 | case GDT_UInt16: |
178 | 0 | case GDT_Float16: |
179 | 0 | { |
180 | 0 | CPLAssert(GDALGetDataTypeSizeBytes(args.eWrkDataType) == 2); |
181 | 0 | return GDALResampleChunk_NearT( |
182 | 0 | args, static_cast<const uint16_t *>(pChunk), |
183 | 0 | reinterpret_cast<uint16_t **>(ppDstBuffer)); |
184 | 0 | } |
185 | | |
186 | 0 | case GDT_CInt16: |
187 | 0 | case GDT_CFloat16: |
188 | 0 | case GDT_Int32: |
189 | 0 | case GDT_UInt32: |
190 | 0 | case GDT_Float32: |
191 | 0 | { |
192 | 0 | CPLAssert(GDALGetDataTypeSizeBytes(args.eWrkDataType) == 4); |
193 | 0 | return GDALResampleChunk_NearT( |
194 | 0 | args, static_cast<const uint32_t *>(pChunk), |
195 | 0 | reinterpret_cast<uint32_t **>(ppDstBuffer)); |
196 | 0 | } |
197 | | |
198 | 0 | case GDT_CInt32: |
199 | 0 | case GDT_CFloat32: |
200 | 0 | case GDT_Int64: |
201 | 0 | case GDT_UInt64: |
202 | 0 | case GDT_Float64: |
203 | 0 | { |
204 | 0 | CPLAssert(GDALGetDataTypeSizeBytes(args.eWrkDataType) == 8); |
205 | 0 | return GDALResampleChunk_NearT( |
206 | 0 | args, static_cast<const uint64_t *>(pChunk), |
207 | 0 | reinterpret_cast<uint64_t **>(ppDstBuffer)); |
208 | 0 | } |
209 | | |
210 | 0 | case GDT_CFloat64: |
211 | 0 | { |
212 | 0 | return GDALResampleChunk_NearT( |
213 | 0 | args, static_cast<const std::complex<double> *>(pChunk), |
214 | 0 | reinterpret_cast<std::complex<double> **>(ppDstBuffer)); |
215 | 0 | } |
216 | | |
217 | 0 | case GDT_Unknown: |
218 | 0 | case GDT_TypeCount: |
219 | 0 | break; |
220 | 0 | } |
221 | 0 | CPLAssert(false); |
222 | 0 | return CE_Failure; |
223 | 0 | } |
224 | | |
225 | | namespace |
226 | | { |
227 | | |
228 | | // Find in the color table the entry whose RGB value is the closest |
229 | | // (using quadratic distance) to the test color, ignoring transparent entries. |
230 | | int BestColorEntry(const std::vector<GDALColorEntry> &entries, |
231 | | const GDALColorEntry &test) |
232 | 0 | { |
233 | 0 | int nMinDist = std::numeric_limits<int>::max(); |
234 | 0 | size_t bestEntry = 0; |
235 | 0 | for (size_t i = 0; i < entries.size(); ++i) |
236 | 0 | { |
237 | 0 | const GDALColorEntry &entry = entries[i]; |
238 | | // Ignore transparent entries |
239 | 0 | if (entry.c4 == 0) |
240 | 0 | continue; |
241 | | |
242 | 0 | int nDist = ((test.c1 - entry.c1) * (test.c1 - entry.c1)) + |
243 | 0 | ((test.c2 - entry.c2) * (test.c2 - entry.c2)) + |
244 | 0 | ((test.c3 - entry.c3) * (test.c3 - entry.c3)); |
245 | 0 | if (nDist < nMinDist) |
246 | 0 | { |
247 | 0 | nMinDist = nDist; |
248 | 0 | bestEntry = i; |
249 | 0 | } |
250 | 0 | } |
251 | 0 | return static_cast<int>(bestEntry); |
252 | 0 | } |
253 | | |
254 | | std::vector<GDALColorEntry> ReadColorTable(const GDALColorTable &table, |
255 | | int &transparentIdx) |
256 | 0 | { |
257 | 0 | std::vector<GDALColorEntry> entries(table.GetColorEntryCount()); |
258 | |
|
259 | 0 | transparentIdx = -1; |
260 | 0 | int i = 0; |
261 | 0 | for (auto &entry : entries) |
262 | 0 | { |
263 | 0 | table.GetColorEntryAsRGB(i, &entry); |
264 | 0 | if (transparentIdx < 0 && entry.c4 == 0) |
265 | 0 | transparentIdx = i; |
266 | 0 | ++i; |
267 | 0 | } |
268 | 0 | return entries; |
269 | 0 | } |
270 | | |
271 | | } // unnamed namespace |
272 | | |
273 | | /************************************************************************/ |
274 | | /* SQUARE() */ |
275 | | /************************************************************************/ |
276 | | |
277 | | template <class T, class Tsquare = T> inline Tsquare SQUARE(T val) |
278 | 0 | { |
279 | 0 | return static_cast<Tsquare>(val) * val; |
280 | 0 | } Unexecuted instantiation: int SQUARE<int, int>(int) Unexecuted instantiation: double SQUARE<double, double>(double) Unexecuted instantiation: unsigned int SQUARE<unsigned int, unsigned int>(unsigned int) |
281 | | |
282 | | /************************************************************************/ |
283 | | /* ComputeIntegerRMS() */ |
284 | | /************************************************************************/ |
285 | | // Compute rms = sqrt(sumSquares / weight) in such a way that it is the |
286 | | // integer that minimizes abs(rms**2 - sumSquares / weight) |
287 | | template <class T, class Twork> |
288 | | inline T ComputeIntegerRMS(double sumSquares, double weight) |
289 | 0 | { |
290 | 0 | const double sumDivWeight = sumSquares / weight; |
291 | 0 | T rms = static_cast<T>(sqrt(sumDivWeight)); |
292 | | |
293 | | // Is rms**2 or (rms+1)**2 closest to sumSquares / weight ? |
294 | | // Naive version: |
295 | | // if( weight * (rms+1)**2 - sumSquares < sumSquares - weight * rms**2 ) |
296 | 0 | if (static_cast<double>(static_cast<Twork>(2) * rms * (rms + 1) + 1) < |
297 | 0 | 2 * sumDivWeight) |
298 | 0 | rms += 1; |
299 | 0 | return rms; |
300 | 0 | } Unexecuted instantiation: unsigned char ComputeIntegerRMS<unsigned char, int>(double, double) Unexecuted instantiation: unsigned short ComputeIntegerRMS<unsigned short, unsigned long>(double, double) |
301 | | |
302 | | template <class T, class Tsum> inline T ComputeIntegerRMS_4values(Tsum) |
303 | 0 | { |
304 | 0 | CPLAssert(false); |
305 | 0 | return 0; |
306 | 0 | } |
307 | | |
308 | | template <> inline GByte ComputeIntegerRMS_4values<GByte, int>(int sumSquares) |
309 | 0 | { |
310 | | // It has been verified that given the correction on rms below, using |
311 | | // sqrt((float)((sumSquares + 1)/ 4)) or sqrt((float)sumSquares * 0.25f) |
312 | | // is equivalent, so use the former as it is used twice. |
313 | 0 | const int sumSquaresPlusOneDiv4 = (sumSquares + 1) / 4; |
314 | 0 | const float sumDivWeight = static_cast<float>(sumSquaresPlusOneDiv4); |
315 | 0 | GByte rms = static_cast<GByte>(std::sqrt(sumDivWeight)); |
316 | | |
317 | | // Is rms**2 or (rms+1)**2 closest to sumSquares / weight ? |
318 | | // Naive version: |
319 | | // if( weight * (rms+1)**2 - sumSquares < sumSquares - weight * rms**2 ) |
320 | | // Optimized version for integer case and weight == 4 |
321 | 0 | if (static_cast<int>(rms) * (rms + 1) < sumSquaresPlusOneDiv4) |
322 | 0 | rms += 1; |
323 | 0 | return rms; |
324 | 0 | } |
325 | | |
326 | | template <> |
327 | | inline GUInt16 ComputeIntegerRMS_4values<GUInt16, double>(double sumSquares) |
328 | 0 | { |
329 | 0 | const double sumDivWeight = sumSquares * 0.25; |
330 | 0 | GUInt16 rms = static_cast<GUInt16>(std::sqrt(sumDivWeight)); |
331 | | |
332 | | // Is rms**2 or (rms+1)**2 closest to sumSquares / weight ? |
333 | | // Naive version: |
334 | | // if( weight * (rms+1)**2 - sumSquares < sumSquares - weight * rms**2 ) |
335 | | // Optimized version for integer case and weight == 4 |
336 | 0 | if (static_cast<GUInt32>(rms) * (rms + 1) < |
337 | 0 | static_cast<GUInt32>(sumDivWeight + 0.25)) |
338 | 0 | rms += 1; |
339 | 0 | return rms; |
340 | 0 | } |
341 | | |
342 | | #ifdef USE_SSE2 |
343 | | |
344 | | /************************************************************************/ |
345 | | /* QuadraticMeanByteSSE2OrAVX2() */ |
346 | | /************************************************************************/ |
347 | | |
348 | | #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS) |
349 | | #define sse2_packus_epi32 _mm_packus_epi32 |
350 | | #else |
351 | | inline __m128i sse2_packus_epi32(__m128i a, __m128i b) |
352 | 0 | { |
353 | 0 | const auto minus32768_32 = _mm_set1_epi32(-32768); |
354 | 0 | const auto minus32768_16 = _mm_set1_epi16(-32768); |
355 | 0 | a = _mm_add_epi32(a, minus32768_32); |
356 | 0 | b = _mm_add_epi32(b, minus32768_32); |
357 | 0 | a = _mm_packs_epi32(a, b); |
358 | 0 | a = _mm_sub_epi16(a, minus32768_16); |
359 | 0 | return a; |
360 | 0 | } |
361 | | #endif |
362 | | |
363 | | #if defined(__SSSE3__) || defined(USE_NEON_OPTIMIZATIONS) |
364 | | #define sse2_hadd_epi16 _mm_hadd_epi16 |
365 | | #else |
366 | | inline __m128i sse2_hadd_epi16(__m128i a, __m128i b) |
367 | 0 | { |
368 | | // Horizontal addition of adjacent pairs |
369 | 0 | const auto mask = _mm_set1_epi32(0xFFFF); |
370 | 0 | const auto horizLo = |
371 | 0 | _mm_add_epi32(_mm_and_si128(a, mask), _mm_srli_epi32(a, 16)); |
372 | 0 | const auto horizHi = |
373 | 0 | _mm_add_epi32(_mm_and_si128(b, mask), _mm_srli_epi32(b, 16)); |
374 | | |
375 | | // Recombine low and high parts |
376 | 0 | return _mm_packs_epi32(horizLo, horizHi); |
377 | 0 | } |
378 | | #endif |
379 | | |
380 | | #ifdef __AVX2__ |
381 | | |
382 | | #define DEST_ELTS 16 |
383 | | #define set1_epi16 _mm256_set1_epi16 |
384 | | #define set1_epi32 _mm256_set1_epi32 |
385 | | #define setzero _mm256_setzero_si256 |
386 | | #define set1_ps _mm256_set1_ps |
387 | | #define loadu_int(x) _mm256_loadu_si256(reinterpret_cast<__m256i const *>(x)) |
388 | | #define unpacklo_epi8 _mm256_unpacklo_epi8 |
389 | | #define unpackhi_epi8 _mm256_unpackhi_epi8 |
390 | | #define madd_epi16 _mm256_madd_epi16 |
391 | | #define add_epi32 _mm256_add_epi32 |
392 | | #define mul_ps _mm256_mul_ps |
393 | | #define cvtepi32_ps _mm256_cvtepi32_ps |
394 | | #define sqrt_ps _mm256_sqrt_ps |
395 | | #define cvttps_epi32 _mm256_cvttps_epi32 |
396 | | #define packs_epi32 _mm256_packs_epi32 |
397 | | #define packus_epi32 _mm256_packus_epi32 |
398 | | #define srli_epi32 _mm256_srli_epi32 |
399 | | #define mullo_epi16 _mm256_mullo_epi16 |
400 | | #define srli_epi16 _mm256_srli_epi16 |
401 | | #define cmpgt_epi16 _mm256_cmpgt_epi16 |
402 | | #define add_epi16 _mm256_add_epi16 |
403 | | #define sub_epi16 _mm256_sub_epi16 |
404 | | #define packus_epi16 _mm256_packus_epi16 |
405 | | |
406 | | /* AVX2 operates on 2 separate 128-bit lanes, so we have to do shuffling */ |
407 | | /* to get the lower 128-bit bits of what would be a true 256-bit vector register |
408 | | */ |
409 | | |
410 | | inline __m256i FIXUP_LANES(__m256i x) |
411 | | { |
412 | | return _mm256_permute4x64_epi64(x, _MM_SHUFFLE(3, 1, 2, 0)); |
413 | | } |
414 | | |
415 | | #define store_lo(x, y) \ |
416 | | _mm_storeu_si128(reinterpret_cast<__m128i *>(x), \ |
417 | | _mm256_extracti128_si256(FIXUP_LANES(y), 0)) |
418 | | #define storeu_int(x, y) \ |
419 | | _mm256_storeu_si256(reinterpret_cast<__m256i *>(x), FIXUP_LANES(y)) |
420 | | #define hadd_epi16 _mm256_hadd_epi16 |
421 | | #define zeroupper() _mm256_zeroupper() |
422 | | #else |
423 | 0 | #define DEST_ELTS 8 |
424 | 0 | #define set1_epi16 _mm_set1_epi16 |
425 | 0 | #define set1_epi32 _mm_set1_epi32 |
426 | 0 | #define setzero _mm_setzero_si128 |
427 | | #define set1_ps _mm_set1_ps |
428 | 0 | #define loadu_int(x) _mm_loadu_si128(reinterpret_cast<__m128i const *>(x)) |
429 | 0 | #define unpacklo_epi8 _mm_unpacklo_epi8 |
430 | 0 | #define unpackhi_epi8 _mm_unpackhi_epi8 |
431 | 0 | #define madd_epi16 _mm_madd_epi16 |
432 | 0 | #define add_epi32 _mm_add_epi32 |
433 | | #define mul_ps _mm_mul_ps |
434 | 0 | #define cvtepi32_ps _mm_cvtepi32_ps |
435 | 0 | #define sqrt_ps _mm_sqrt_ps |
436 | 0 | #define cvttps_epi32 _mm_cvttps_epi32 |
437 | 0 | #define packs_epi32 _mm_packs_epi32 |
438 | 0 | #define packus_epi32 sse2_packus_epi32 |
439 | 0 | #define srli_epi32 _mm_srli_epi32 |
440 | 0 | #define mullo_epi16 _mm_mullo_epi16 |
441 | 0 | #define srli_epi16 _mm_srli_epi16 |
442 | 0 | #define cmpgt_epi16 _mm_cmpgt_epi16 |
443 | 0 | #define add_epi16 _mm_add_epi16 |
444 | 0 | #define sub_epi16 _mm_sub_epi16 |
445 | 0 | #define packus_epi16 _mm_packus_epi16 |
446 | 0 | #define store_lo(x, y) _mm_storel_epi64(reinterpret_cast<__m128i *>(x), (y)) |
447 | 0 | #define storeu_int(x, y) _mm_storeu_si128(reinterpret_cast<__m128i *>(x), (y)) |
448 | 0 | #define hadd_epi16 sse2_hadd_epi16 |
449 | 0 | #define zeroupper() (void)0 |
450 | | #endif |
451 | | |
452 | | #if defined(__GNUC__) && defined(__AVX2__) |
453 | | // Disabling inlining works around a bug with gcc 9.3 (Ubuntu 20.04) in |
454 | | // -O2 -mavx2 mode in QuadraticMeanFloatSSE2(), |
455 | | // where the registry that contains minus_zero is correctly |
456 | | // loaded the first time the function is called (looking at the disassembly, |
457 | | // one sees it is loaded much earlier than the function), but gets corrupted |
458 | | // (zeroed) in following iterations. |
459 | | // It appears the bug is due to the explicit zeroupper() call at the end of |
460 | | // the function. |
461 | | // The bug is at least solved in gcc 10.2. |
462 | | // Inlining doesn't bring much here to performance. |
463 | | // This is also needed with gcc 9.3 on QuadraticMeanByteSSE2OrAVX2() in |
464 | | // -O3 -mavx2 mode |
465 | | #define NOINLINE __attribute__((noinline)) |
466 | | #else |
467 | | #define NOINLINE |
468 | | #endif |
469 | | |
470 | | template <class T> |
471 | | static int NOINLINE |
472 | | QuadraticMeanByteSSE2OrAVX2(int nDstXWidth, int nChunkXSize, |
473 | | const T *&CPL_RESTRICT pSrcScanlineShiftedInOut, |
474 | | T *CPL_RESTRICT pDstScanline) |
475 | 0 | { |
476 | | // Optimized implementation for RMS on Byte by |
477 | | // processing by group of 8 output pixels, so as to use |
478 | | // a single _mm_sqrt_ps() call for 4 output pixels |
479 | 0 | const T *CPL_RESTRICT pSrcScanlineShifted = pSrcScanlineShiftedInOut; |
480 | |
|
481 | 0 | int iDstPixel = 0; |
482 | 0 | const auto one16 = set1_epi16(1); |
483 | 0 | const auto one32 = set1_epi32(1); |
484 | 0 | const auto zero = setzero(); |
485 | 0 | const auto minus32768 = set1_epi16(-32768); |
486 | |
|
487 | 0 | for (; iDstPixel < nDstXWidth - (DEST_ELTS - 1); iDstPixel += DEST_ELTS) |
488 | 0 | { |
489 | | // Load 2 * DEST_ELTS bytes from each line |
490 | 0 | auto firstLine = loadu_int(pSrcScanlineShifted); |
491 | 0 | auto secondLine = loadu_int(pSrcScanlineShifted + nChunkXSize); |
492 | | // Extend those Bytes as UInt16s |
493 | 0 | auto firstLineLo = unpacklo_epi8(firstLine, zero); |
494 | 0 | auto firstLineHi = unpackhi_epi8(firstLine, zero); |
495 | 0 | auto secondLineLo = unpacklo_epi8(secondLine, zero); |
496 | 0 | auto secondLineHi = unpackhi_epi8(secondLine, zero); |
497 | | |
498 | | // Multiplication of 16 bit values and horizontal |
499 | | // addition of 32 bit results |
500 | | // [ src[2*i+0]^2 + src[2*i+1]^2 for i in range(4) ] |
501 | 0 | firstLineLo = madd_epi16(firstLineLo, firstLineLo); |
502 | 0 | firstLineHi = madd_epi16(firstLineHi, firstLineHi); |
503 | 0 | secondLineLo = madd_epi16(secondLineLo, secondLineLo); |
504 | 0 | secondLineHi = madd_epi16(secondLineHi, secondLineHi); |
505 | | |
506 | | // Vertical addition |
507 | 0 | const auto sumSquaresLo = add_epi32(firstLineLo, secondLineLo); |
508 | 0 | const auto sumSquaresHi = add_epi32(firstLineHi, secondLineHi); |
509 | |
|
510 | 0 | const auto sumSquaresPlusOneDiv4Lo = |
511 | 0 | srli_epi32(add_epi32(sumSquaresLo, one32), 2); |
512 | 0 | const auto sumSquaresPlusOneDiv4Hi = |
513 | 0 | srli_epi32(add_epi32(sumSquaresHi, one32), 2); |
514 | | |
515 | | // Take square root and truncate/floor to int32 |
516 | 0 | const auto rmsLo = |
517 | 0 | cvttps_epi32(sqrt_ps(cvtepi32_ps(sumSquaresPlusOneDiv4Lo))); |
518 | 0 | const auto rmsHi = |
519 | 0 | cvttps_epi32(sqrt_ps(cvtepi32_ps(sumSquaresPlusOneDiv4Hi))); |
520 | | |
521 | | // Merge back low and high registers with each RMS value |
522 | | // as a 16 bit value. |
523 | 0 | auto rms = packs_epi32(rmsLo, rmsHi); |
524 | | |
525 | | // Round to upper value if it minimizes the |
526 | | // error |rms^2 - sumSquares/4| |
527 | | // if( 2 * (2 * rms * (rms + 1) + 1) < sumSquares ) |
528 | | // rms += 1; |
529 | | // which is equivalent to: |
530 | | // if( rms * (rms + 1) < (sumSquares+1) / 4 ) |
531 | | // rms += 1; |
532 | | // And both left and right parts fit on 16 (unsigned) bits |
533 | 0 | const auto sumSquaresPlusOneDiv4 = |
534 | 0 | packus_epi32(sumSquaresPlusOneDiv4Lo, sumSquaresPlusOneDiv4Hi); |
535 | | // cmpgt_epi16 operates on signed int16, but here |
536 | | // we have unsigned values, so shift them by -32768 before |
537 | 0 | auto mask = cmpgt_epi16( |
538 | 0 | add_epi16(sumSquaresPlusOneDiv4, minus32768), |
539 | 0 | add_epi16(mullo_epi16(rms, add_epi16(rms, one16)), minus32768)); |
540 | | // The value of the mask will be -1 when the correction needs to be |
541 | | // applied |
542 | 0 | rms = sub_epi16(rms, mask); |
543 | | |
544 | | // Pack each 16 bit RMS value to 8 bits |
545 | 0 | rms = packus_epi16(rms, rms /* could be anything */); |
546 | 0 | store_lo(&pDstScanline[iDstPixel], rms); |
547 | 0 | pSrcScanlineShifted += 2 * DEST_ELTS; |
548 | 0 | } |
549 | 0 | zeroupper(); |
550 | |
|
551 | 0 | pSrcScanlineShiftedInOut = pSrcScanlineShifted; |
552 | 0 | return iDstPixel; |
553 | 0 | } |
554 | | |
555 | | /************************************************************************/ |
556 | | /* AverageByteSSE2OrAVX2() */ |
557 | | /************************************************************************/ |
558 | | |
559 | | template <class T> |
560 | | static int |
561 | | AverageByteSSE2OrAVX2(int nDstXWidth, int nChunkXSize, |
562 | | const T *&CPL_RESTRICT pSrcScanlineShiftedInOut, |
563 | | T *CPL_RESTRICT pDstScanline) |
564 | 0 | { |
565 | | // Optimized implementation for average on Byte by |
566 | | // processing by group of 16 output pixels for SSE2, or 32 for AVX2 |
567 | |
|
568 | 0 | const auto zero = setzero(); |
569 | 0 | const auto two16 = set1_epi16(2); |
570 | 0 | const T *CPL_RESTRICT pSrcScanlineShifted = pSrcScanlineShiftedInOut; |
571 | |
|
572 | 0 | int iDstPixel = 0; |
573 | 0 | for (; iDstPixel < nDstXWidth - (2 * DEST_ELTS - 1); |
574 | 0 | iDstPixel += 2 * DEST_ELTS) |
575 | 0 | { |
576 | 0 | decltype(setzero()) average0; |
577 | 0 | { |
578 | | // Load 2 * DEST_ELTS bytes from each line |
579 | 0 | const auto firstLine = loadu_int(pSrcScanlineShifted); |
580 | 0 | const auto secondLine = |
581 | 0 | loadu_int(pSrcScanlineShifted + nChunkXSize); |
582 | | // Extend those Bytes as UInt16s |
583 | 0 | const auto firstLineLo = unpacklo_epi8(firstLine, zero); |
584 | 0 | const auto firstLineHi = unpackhi_epi8(firstLine, zero); |
585 | 0 | const auto secondLineLo = unpacklo_epi8(secondLine, zero); |
586 | 0 | const auto secondLineHi = unpackhi_epi8(secondLine, zero); |
587 | | |
588 | | // Vertical addition |
589 | 0 | const auto sumLo = add_epi16(firstLineLo, secondLineLo); |
590 | 0 | const auto sumHi = add_epi16(firstLineHi, secondLineHi); |
591 | | |
592 | | // Horizontal addition of adjacent pairs, and recombine low and high |
593 | | // parts |
594 | 0 | const auto sum = hadd_epi16(sumLo, sumHi); |
595 | | |
596 | | // average = (sum + 2) / 4 |
597 | 0 | average0 = srli_epi16(add_epi16(sum, two16), 2); |
598 | |
|
599 | 0 | pSrcScanlineShifted += 2 * DEST_ELTS; |
600 | 0 | } |
601 | |
|
602 | 0 | decltype(setzero()) average1; |
603 | 0 | { |
604 | | // Load 2 * DEST_ELTS bytes from each line |
605 | 0 | const auto firstLine = loadu_int(pSrcScanlineShifted); |
606 | 0 | const auto secondLine = |
607 | 0 | loadu_int(pSrcScanlineShifted + nChunkXSize); |
608 | | // Extend those Bytes as UInt16s |
609 | 0 | const auto firstLineLo = unpacklo_epi8(firstLine, zero); |
610 | 0 | const auto firstLineHi = unpackhi_epi8(firstLine, zero); |
611 | 0 | const auto secondLineLo = unpacklo_epi8(secondLine, zero); |
612 | 0 | const auto secondLineHi = unpackhi_epi8(secondLine, zero); |
613 | | |
614 | | // Vertical addition |
615 | 0 | const auto sumLo = add_epi16(firstLineLo, secondLineLo); |
616 | 0 | const auto sumHi = add_epi16(firstLineHi, secondLineHi); |
617 | | |
618 | | // Horizontal addition of adjacent pairs, and recombine low and high |
619 | | // parts |
620 | 0 | const auto sum = hadd_epi16(sumLo, sumHi); |
621 | | |
622 | | // average = (sum + 2) / 4 |
623 | 0 | average1 = srli_epi16(add_epi16(sum, two16), 2); |
624 | |
|
625 | 0 | pSrcScanlineShifted += 2 * DEST_ELTS; |
626 | 0 | } |
627 | | |
628 | | // Pack each 16 bit average value to 8 bits |
629 | 0 | const auto average = packus_epi16(average0, average1); |
630 | 0 | storeu_int(&pDstScanline[iDstPixel], average); |
631 | 0 | } |
632 | 0 | zeroupper(); |
633 | |
|
634 | 0 | pSrcScanlineShiftedInOut = pSrcScanlineShifted; |
635 | 0 | return iDstPixel; |
636 | 0 | } |
637 | | |
638 | | /************************************************************************/ |
639 | | /* QuadraticMeanUInt16SSE2() */ |
640 | | /************************************************************************/ |
641 | | |
642 | | #ifdef __SSE3__ |
643 | | #define sse2_hadd_pd _mm_hadd_pd |
644 | | #else |
645 | | inline __m128d sse2_hadd_pd(__m128d a, __m128d b) |
646 | 0 | { |
647 | 0 | auto aLo_bLo = |
648 | 0 | _mm_castps_pd(_mm_movelh_ps(_mm_castpd_ps(a), _mm_castpd_ps(b))); |
649 | 0 | auto aHi_bHi = |
650 | 0 | _mm_castps_pd(_mm_movehl_ps(_mm_castpd_ps(b), _mm_castpd_ps(a))); |
651 | 0 | return _mm_add_pd(aLo_bLo, aHi_bHi); // (aLo + aHi, bLo + bHi) |
652 | 0 | } |
653 | | #endif |
654 | | |
655 | | inline __m128d SQUARE_PD(__m128d x) |
656 | 0 | { |
657 | 0 | return _mm_mul_pd(x, x); |
658 | 0 | } |
659 | | |
660 | | #ifdef __AVX2__ |
661 | | |
662 | | inline __m256d SQUARE_PD(__m256d x) |
663 | | { |
664 | | return _mm256_mul_pd(x, x); |
665 | | } |
666 | | |
667 | | inline __m256d FIXUP_LANES(__m256d x) |
668 | | { |
669 | | return _mm256_permute4x64_pd(x, _MM_SHUFFLE(3, 1, 2, 0)); |
670 | | } |
671 | | |
672 | | inline __m256 FIXUP_LANES(__m256 x) |
673 | | { |
674 | | return _mm256_castpd_ps(FIXUP_LANES(_mm256_castps_pd(x))); |
675 | | } |
676 | | |
677 | | #endif |
678 | | |
679 | | template <class T> |
680 | | static int |
681 | | QuadraticMeanUInt16SSE2(int nDstXWidth, int nChunkXSize, |
682 | | const T *&CPL_RESTRICT pSrcScanlineShiftedInOut, |
683 | | T *CPL_RESTRICT pDstScanline) |
684 | 0 | { |
685 | | // Optimized implementation for RMS on UInt16 by |
686 | | // processing by group of 4 output pixels. |
687 | 0 | const T *CPL_RESTRICT pSrcScanlineShifted = pSrcScanlineShiftedInOut; |
688 | |
|
689 | 0 | int iDstPixel = 0; |
690 | 0 | const auto zero = _mm_setzero_si128(); |
691 | |
|
692 | | #ifdef __AVX2__ |
693 | | const auto zeroDot25 = _mm256_set1_pd(0.25); |
694 | | const auto zeroDot5 = _mm256_set1_pd(0.5); |
695 | | |
696 | | // The first four 0's could be anything, as we only take the bottom |
697 | | // 128 bits. |
698 | | const auto permutation = _mm256_set_epi32(0, 0, 0, 0, 6, 4, 2, 0); |
699 | | #else |
700 | 0 | const auto zeroDot25 = _mm_set1_pd(0.25); |
701 | 0 | const auto zeroDot5 = _mm_set1_pd(0.5); |
702 | 0 | #endif |
703 | |
|
704 | 0 | for (; iDstPixel < nDstXWidth - 3; iDstPixel += 4) |
705 | 0 | { |
706 | | // Load 8 UInt16 from each line |
707 | 0 | const auto firstLine = _mm_loadu_si128( |
708 | 0 | reinterpret_cast<__m128i const *>(pSrcScanlineShifted)); |
709 | 0 | const auto secondLine = |
710 | 0 | _mm_loadu_si128(reinterpret_cast<__m128i const *>( |
711 | 0 | pSrcScanlineShifted + nChunkXSize)); |
712 | | |
713 | | // Detect if all of the source values fit in 14 bits. |
714 | | // because if x < 2^14, then 4 * x^2 < 2^30 which fits in a signed int32 |
715 | | // and we can do a much faster implementation. |
716 | 0 | const auto maskTmp = |
717 | 0 | _mm_srli_epi16(_mm_or_si128(firstLine, secondLine), 14); |
718 | | #if defined(__i386__) || defined(_M_IX86) |
719 | | uint64_t nMaskFitsIn14Bits = 0; |
720 | | _mm_storel_epi64( |
721 | | reinterpret_cast<__m128i *>(&nMaskFitsIn14Bits), |
722 | | _mm_packus_epi16(maskTmp, maskTmp /* could be anything */)); |
723 | | #else |
724 | 0 | const auto nMaskFitsIn14Bits = _mm_cvtsi128_si64( |
725 | 0 | _mm_packus_epi16(maskTmp, maskTmp /* could be anything */)); |
726 | 0 | #endif |
727 | 0 | if (nMaskFitsIn14Bits == 0) |
728 | 0 | { |
729 | | // Multiplication of 16 bit values and horizontal |
730 | | // addition of 32 bit results |
731 | 0 | const auto firstLineHSumSquare = |
732 | 0 | _mm_madd_epi16(firstLine, firstLine); |
733 | 0 | const auto secondLineHSumSquare = |
734 | 0 | _mm_madd_epi16(secondLine, secondLine); |
735 | | // Vertical addition |
736 | 0 | const auto sumSquares = |
737 | 0 | _mm_add_epi32(firstLineHSumSquare, secondLineHSumSquare); |
738 | | // In theory we should take sqrt(sumSquares * 0.25f) |
739 | | // but given the rounding we do, this is equivalent to |
740 | | // sqrt((sumSquares + 1)/4). This has been verified exhaustively for |
741 | | // sumSquares <= 4 * 16383^2 |
742 | 0 | const auto one32 = _mm_set1_epi32(1); |
743 | 0 | const auto sumSquaresPlusOneDiv4 = |
744 | 0 | _mm_srli_epi32(_mm_add_epi32(sumSquares, one32), 2); |
745 | | // Take square root and truncate/floor to int32 |
746 | 0 | auto rms = _mm_cvttps_epi32( |
747 | 0 | _mm_sqrt_ps(_mm_cvtepi32_ps(sumSquaresPlusOneDiv4))); |
748 | | |
749 | | // Round to upper value if it minimizes the |
750 | | // error |rms^2 - sumSquares/4| |
751 | | // if( 2 * (2 * rms * (rms + 1) + 1) < sumSquares ) |
752 | | // rms += 1; |
753 | | // which is equivalent to: |
754 | | // if( rms * rms + rms < (sumSquares+1) / 4 ) |
755 | | // rms += 1; |
756 | 0 | auto mask = |
757 | 0 | _mm_cmpgt_epi32(sumSquaresPlusOneDiv4, |
758 | 0 | _mm_add_epi32(_mm_madd_epi16(rms, rms), rms)); |
759 | 0 | rms = _mm_sub_epi32(rms, mask); |
760 | | // Pack each 32 bit RMS value to 16 bits |
761 | 0 | rms = _mm_packs_epi32(rms, rms /* could be anything */); |
762 | 0 | _mm_storel_epi64( |
763 | 0 | reinterpret_cast<__m128i *>(&pDstScanline[iDstPixel]), rms); |
764 | 0 | pSrcScanlineShifted += 8; |
765 | 0 | continue; |
766 | 0 | } |
767 | | |
768 | | // An approach using _mm_mullo_epi16, _mm_mulhi_epu16 before extending |
769 | | // to 32 bit would result in 4 multiplications instead of 8, but |
770 | | // mullo/mulhi have a worse throughput than mul_pd. |
771 | | |
772 | | // Extend those UInt16s as UInt32s |
773 | 0 | const auto firstLineLo = _mm_unpacklo_epi16(firstLine, zero); |
774 | 0 | const auto firstLineHi = _mm_unpackhi_epi16(firstLine, zero); |
775 | 0 | const auto secondLineLo = _mm_unpacklo_epi16(secondLine, zero); |
776 | 0 | const auto secondLineHi = _mm_unpackhi_epi16(secondLine, zero); |
777 | |
|
778 | | #ifdef __AVX2__ |
779 | | // Multiplication of 32 bit values previously converted to 64 bit double |
780 | | const auto firstLineLoDbl = SQUARE_PD(_mm256_cvtepi32_pd(firstLineLo)); |
781 | | const auto firstLineHiDbl = SQUARE_PD(_mm256_cvtepi32_pd(firstLineHi)); |
782 | | const auto secondLineLoDbl = |
783 | | SQUARE_PD(_mm256_cvtepi32_pd(secondLineLo)); |
784 | | const auto secondLineHiDbl = |
785 | | SQUARE_PD(_mm256_cvtepi32_pd(secondLineHi)); |
786 | | |
787 | | // Vertical addition of squares |
788 | | const auto sumSquaresLo = |
789 | | _mm256_add_pd(firstLineLoDbl, secondLineLoDbl); |
790 | | const auto sumSquaresHi = |
791 | | _mm256_add_pd(firstLineHiDbl, secondLineHiDbl); |
792 | | |
793 | | // Horizontal addition of squares |
794 | | const auto sumSquares = |
795 | | FIXUP_LANES(_mm256_hadd_pd(sumSquaresLo, sumSquaresHi)); |
796 | | |
797 | | const auto sumDivWeight = _mm256_mul_pd(sumSquares, zeroDot25); |
798 | | |
799 | | // Take square root and truncate/floor to int32 |
800 | | auto rms = _mm256_cvttpd_epi32(_mm256_sqrt_pd(sumDivWeight)); |
801 | | const auto rmsDouble = _mm256_cvtepi32_pd(rms); |
802 | | const auto right = _mm256_sub_pd( |
803 | | sumDivWeight, _mm256_add_pd(SQUARE_PD(rmsDouble), rmsDouble)); |
804 | | |
805 | | auto mask = |
806 | | _mm256_castpd_ps(_mm256_cmp_pd(zeroDot5, right, _CMP_LT_OS)); |
807 | | // Extract 32-bit from each of the 4 64-bit masks |
808 | | // mask = FIXUP_LANES(_mm256_shuffle_ps(mask, mask, |
809 | | // _MM_SHUFFLE(2,0,2,0))); |
810 | | mask = _mm256_permutevar8x32_ps(mask, permutation); |
811 | | const auto maskI = _mm_castps_si128(_mm256_extractf128_ps(mask, 0)); |
812 | | |
813 | | // Apply the correction |
814 | | rms = _mm_sub_epi32(rms, maskI); |
815 | | |
816 | | // Pack each 32 bit RMS value to 16 bits |
817 | | rms = _mm_packus_epi32(rms, rms /* could be anything */); |
818 | | #else |
819 | | // Multiplication of 32 bit values previously converted to 64 bit double |
820 | 0 | const auto firstLineLoLo = SQUARE_PD(_mm_cvtepi32_pd(firstLineLo)); |
821 | 0 | const auto firstLineLoHi = |
822 | 0 | SQUARE_PD(_mm_cvtepi32_pd(_mm_srli_si128(firstLineLo, 8))); |
823 | 0 | const auto firstLineHiLo = SQUARE_PD(_mm_cvtepi32_pd(firstLineHi)); |
824 | 0 | const auto firstLineHiHi = |
825 | 0 | SQUARE_PD(_mm_cvtepi32_pd(_mm_srli_si128(firstLineHi, 8))); |
826 | |
|
827 | 0 | const auto secondLineLoLo = SQUARE_PD(_mm_cvtepi32_pd(secondLineLo)); |
828 | 0 | const auto secondLineLoHi = |
829 | 0 | SQUARE_PD(_mm_cvtepi32_pd(_mm_srli_si128(secondLineLo, 8))); |
830 | 0 | const auto secondLineHiLo = SQUARE_PD(_mm_cvtepi32_pd(secondLineHi)); |
831 | 0 | const auto secondLineHiHi = |
832 | 0 | SQUARE_PD(_mm_cvtepi32_pd(_mm_srli_si128(secondLineHi, 8))); |
833 | | |
834 | | // Vertical addition of squares |
835 | 0 | const auto sumSquaresLoLo = _mm_add_pd(firstLineLoLo, secondLineLoLo); |
836 | 0 | const auto sumSquaresLoHi = _mm_add_pd(firstLineLoHi, secondLineLoHi); |
837 | 0 | const auto sumSquaresHiLo = _mm_add_pd(firstLineHiLo, secondLineHiLo); |
838 | 0 | const auto sumSquaresHiHi = _mm_add_pd(firstLineHiHi, secondLineHiHi); |
839 | | |
840 | | // Horizontal addition of squares |
841 | 0 | const auto sumSquaresLo = sse2_hadd_pd(sumSquaresLoLo, sumSquaresLoHi); |
842 | 0 | const auto sumSquaresHi = sse2_hadd_pd(sumSquaresHiLo, sumSquaresHiHi); |
843 | |
|
844 | 0 | const auto sumDivWeightLo = _mm_mul_pd(sumSquaresLo, zeroDot25); |
845 | 0 | const auto sumDivWeightHi = _mm_mul_pd(sumSquaresHi, zeroDot25); |
846 | | // Take square root and truncate/floor to int32 |
847 | 0 | const auto rmsLo = _mm_cvttpd_epi32(_mm_sqrt_pd(sumDivWeightLo)); |
848 | 0 | const auto rmsHi = _mm_cvttpd_epi32(_mm_sqrt_pd(sumDivWeightHi)); |
849 | | |
850 | | // Correctly round rms to minimize | rms^2 - sumSquares / 4 | |
851 | | // if( 0.5 < sumDivWeight - (rms * rms + rms) ) |
852 | | // rms += 1; |
853 | 0 | const auto rmsLoDouble = _mm_cvtepi32_pd(rmsLo); |
854 | 0 | const auto rmsHiDouble = _mm_cvtepi32_pd(rmsHi); |
855 | 0 | const auto rightLo = _mm_sub_pd( |
856 | 0 | sumDivWeightLo, _mm_add_pd(SQUARE_PD(rmsLoDouble), rmsLoDouble)); |
857 | 0 | const auto rightHi = _mm_sub_pd( |
858 | 0 | sumDivWeightHi, _mm_add_pd(SQUARE_PD(rmsHiDouble), rmsHiDouble)); |
859 | |
|
860 | 0 | const auto maskLo = _mm_castpd_ps(_mm_cmplt_pd(zeroDot5, rightLo)); |
861 | 0 | const auto maskHi = _mm_castpd_ps(_mm_cmplt_pd(zeroDot5, rightHi)); |
862 | | // The value of the mask will be -1 when the correction needs to be |
863 | | // applied |
864 | 0 | const auto mask = _mm_castps_si128(_mm_shuffle_ps( |
865 | 0 | maskLo, maskHi, (0 << 0) | (2 << 2) | (0 << 4) | (2 << 6))); |
866 | |
|
867 | 0 | auto rms = _mm_castps_si128( |
868 | 0 | _mm_movelh_ps(_mm_castsi128_ps(rmsLo), _mm_castsi128_ps(rmsHi))); |
869 | | // Apply the correction |
870 | 0 | rms = _mm_sub_epi32(rms, mask); |
871 | | |
872 | | // Pack each 32 bit RMS value to 16 bits |
873 | 0 | rms = sse2_packus_epi32(rms, rms /* could be anything */); |
874 | 0 | #endif |
875 | |
|
876 | 0 | _mm_storel_epi64(reinterpret_cast<__m128i *>(&pDstScanline[iDstPixel]), |
877 | 0 | rms); |
878 | 0 | pSrcScanlineShifted += 8; |
879 | 0 | } |
880 | |
|
881 | 0 | zeroupper(); |
882 | |
|
883 | 0 | pSrcScanlineShiftedInOut = pSrcScanlineShifted; |
884 | 0 | return iDstPixel; |
885 | 0 | } |
886 | | |
887 | | /************************************************************************/ |
888 | | /* AverageUInt16SSE2() */ |
889 | | /************************************************************************/ |
890 | | |
891 | | template <class T> |
892 | | static int AverageUInt16SSE2(int nDstXWidth, int nChunkXSize, |
893 | | const T *&CPL_RESTRICT pSrcScanlineShiftedInOut, |
894 | | T *CPL_RESTRICT pDstScanline) |
895 | 0 | { |
896 | | // Optimized implementation for average on UInt16 by |
897 | | // processing by group of 8 output pixels. |
898 | |
|
899 | 0 | const auto mask = _mm_set1_epi32(0xFFFF); |
900 | 0 | const auto two = _mm_set1_epi32(2); |
901 | 0 | const T *CPL_RESTRICT pSrcScanlineShifted = pSrcScanlineShiftedInOut; |
902 | |
|
903 | 0 | int iDstPixel = 0; |
904 | 0 | for (; iDstPixel < nDstXWidth - 7; iDstPixel += 8) |
905 | 0 | { |
906 | 0 | __m128i averageLow; |
907 | | // Load 8 UInt16 from each line |
908 | 0 | { |
909 | 0 | const auto firstLine = _mm_loadu_si128( |
910 | 0 | reinterpret_cast<__m128i const *>(pSrcScanlineShifted)); |
911 | 0 | const auto secondLine = |
912 | 0 | _mm_loadu_si128(reinterpret_cast<__m128i const *>( |
913 | 0 | pSrcScanlineShifted + nChunkXSize)); |
914 | | |
915 | | // Horizontal addition and extension to 32 bit |
916 | 0 | const auto horizAddFirstLine = _mm_add_epi32( |
917 | 0 | _mm_and_si128(firstLine, mask), _mm_srli_epi32(firstLine, 16)); |
918 | 0 | const auto horizAddSecondLine = |
919 | 0 | _mm_add_epi32(_mm_and_si128(secondLine, mask), |
920 | 0 | _mm_srli_epi32(secondLine, 16)); |
921 | | |
922 | | // Vertical addition and average computation |
923 | | // average = (sum + 2) >> 2 |
924 | 0 | const auto sum = _mm_add_epi32( |
925 | 0 | _mm_add_epi32(horizAddFirstLine, horizAddSecondLine), two); |
926 | 0 | averageLow = _mm_srli_epi32(sum, 2); |
927 | 0 | } |
928 | | // Load 8 UInt16 from each line |
929 | 0 | __m128i averageHigh; |
930 | 0 | { |
931 | 0 | const auto firstLine = _mm_loadu_si128( |
932 | 0 | reinterpret_cast<__m128i const *>(pSrcScanlineShifted + 8)); |
933 | 0 | const auto secondLine = |
934 | 0 | _mm_loadu_si128(reinterpret_cast<__m128i const *>( |
935 | 0 | pSrcScanlineShifted + 8 + nChunkXSize)); |
936 | | |
937 | | // Horizontal addition and extension to 32 bit |
938 | 0 | const auto horizAddFirstLine = _mm_add_epi32( |
939 | 0 | _mm_and_si128(firstLine, mask), _mm_srli_epi32(firstLine, 16)); |
940 | 0 | const auto horizAddSecondLine = |
941 | 0 | _mm_add_epi32(_mm_and_si128(secondLine, mask), |
942 | 0 | _mm_srli_epi32(secondLine, 16)); |
943 | | |
944 | | // Vertical addition and average computation |
945 | | // average = (sum + 2) >> 2 |
946 | 0 | const auto sum = _mm_add_epi32( |
947 | 0 | _mm_add_epi32(horizAddFirstLine, horizAddSecondLine), two); |
948 | 0 | averageHigh = _mm_srli_epi32(sum, 2); |
949 | 0 | } |
950 | | |
951 | | // Pack each 32 bit average value to 16 bits |
952 | 0 | auto average = sse2_packus_epi32(averageLow, averageHigh); |
953 | 0 | _mm_storeu_si128(reinterpret_cast<__m128i *>(&pDstScanline[iDstPixel]), |
954 | 0 | average); |
955 | 0 | pSrcScanlineShifted += 16; |
956 | 0 | } |
957 | |
|
958 | 0 | pSrcScanlineShiftedInOut = pSrcScanlineShifted; |
959 | 0 | return iDstPixel; |
960 | 0 | } |
961 | | |
962 | | /************************************************************************/ |
963 | | /* QuadraticMeanFloatSSE2() */ |
964 | | /************************************************************************/ |
965 | | |
966 | | #ifdef __SSE3__ |
967 | | #define sse2_hadd_ps _mm_hadd_ps |
968 | | #else |
969 | | inline __m128 sse2_hadd_ps(__m128 a, __m128 b) |
970 | 0 | { |
971 | 0 | auto aEven_bEven = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 0, 2, 0)); |
972 | 0 | auto aOdd_bOdd = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3, 1, 3, 1)); |
973 | 0 | return _mm_add_ps(aEven_bEven, aOdd_bOdd); // (aEven + aOdd, bEven + bOdd) |
974 | 0 | } |
975 | | #endif |
976 | | |
977 | | #ifdef __AVX2__ |
978 | | #define RMS_FLOAT_ELTS 8 |
979 | | #define set1_ps _mm256_set1_ps |
980 | | #define loadu_ps _mm256_loadu_ps |
981 | | #define andnot_ps _mm256_andnot_ps |
982 | | #define and_ps _mm256_and_ps |
983 | | #define max_ps _mm256_max_ps |
984 | | #define shuffle_ps _mm256_shuffle_ps |
985 | | #define div_ps _mm256_div_ps |
986 | | #define cmpeq_ps(x, y) _mm256_cmp_ps(x, y, _CMP_EQ_OQ) |
987 | | #define mul_ps _mm256_mul_ps |
988 | | #define add_ps _mm256_add_ps |
989 | | #define hadd_ps _mm256_hadd_ps |
990 | | #define sqrt_ps _mm256_sqrt_ps |
991 | | #define or_ps _mm256_or_ps |
992 | | #define unpacklo_ps _mm256_unpacklo_ps |
993 | | #define unpackhi_ps _mm256_unpackhi_ps |
994 | | #define storeu_ps _mm256_storeu_ps |
995 | | |
996 | | inline __m256 SQUARE_PS(__m256 x) |
997 | | { |
998 | | return _mm256_mul_ps(x, x); |
999 | | } |
1000 | | |
1001 | | #else |
1002 | | |
1003 | 0 | #define RMS_FLOAT_ELTS 4 |
1004 | 0 | #define set1_ps _mm_set1_ps |
1005 | 0 | #define loadu_ps _mm_loadu_ps |
1006 | 0 | #define andnot_ps _mm_andnot_ps |
1007 | 0 | #define and_ps _mm_and_ps |
1008 | 0 | #define max_ps _mm_max_ps |
1009 | 0 | #define shuffle_ps _mm_shuffle_ps |
1010 | 0 | #define div_ps _mm_div_ps |
1011 | 0 | #define cmpeq_ps _mm_cmpeq_ps |
1012 | 0 | #define mul_ps _mm_mul_ps |
1013 | 0 | #define add_ps _mm_add_ps |
1014 | | #define hadd_ps sse2_hadd_ps |
1015 | 0 | #define sqrt_ps _mm_sqrt_ps |
1016 | 0 | #define or_ps _mm_or_ps |
1017 | | #define unpacklo_ps _mm_unpacklo_ps |
1018 | | #define unpackhi_ps _mm_unpackhi_ps |
1019 | 0 | #define storeu_ps _mm_storeu_ps |
1020 | | |
1021 | | inline __m128 SQUARE_PS(__m128 x) |
1022 | 0 | { |
1023 | 0 | return _mm_mul_ps(x, x); |
1024 | 0 | } |
1025 | | |
1026 | | inline __m128 FIXUP_LANES(__m128 x) |
1027 | 0 | { |
1028 | 0 | return x; |
1029 | 0 | } |
1030 | | |
1031 | | #endif |
1032 | | |
1033 | | static int NOINLINE |
1034 | | QuadraticMeanFloatSSE2(int nDstXWidth, int nChunkXSize, |
1035 | | const float *&CPL_RESTRICT pSrcScanlineShiftedInOut, |
1036 | | float *CPL_RESTRICT pDstScanline) |
1037 | 0 | { |
1038 | | // Optimized implementation for RMS on Float32 by |
1039 | | // processing by group of RMS_FLOAT_ELTS output pixels. |
1040 | 0 | const float *CPL_RESTRICT pSrcScanlineShifted = pSrcScanlineShiftedInOut; |
1041 | |
|
1042 | 0 | int iDstPixel = 0; |
1043 | 0 | const auto minus_zero = set1_ps(-0.0f); |
1044 | 0 | const auto zeroDot25 = set1_ps(0.25f); |
1045 | 0 | const auto one = set1_ps(1.0f); |
1046 | 0 | const auto infv = set1_ps(std::numeric_limits<float>::infinity()); |
1047 | |
|
1048 | 0 | for (; iDstPixel < nDstXWidth - (RMS_FLOAT_ELTS - 1); |
1049 | 0 | iDstPixel += RMS_FLOAT_ELTS) |
1050 | 0 | { |
1051 | | // Load 2*RMS_FLOAT_ELTS Float32 from each line |
1052 | 0 | auto firstLineLo = loadu_ps(pSrcScanlineShifted); |
1053 | 0 | auto firstLineHi = loadu_ps(pSrcScanlineShifted + RMS_FLOAT_ELTS); |
1054 | 0 | auto secondLineLo = loadu_ps(pSrcScanlineShifted + nChunkXSize); |
1055 | 0 | auto secondLineHi = |
1056 | 0 | loadu_ps(pSrcScanlineShifted + RMS_FLOAT_ELTS + nChunkXSize); |
1057 | | |
1058 | | // Take the absolute value |
1059 | 0 | firstLineLo = andnot_ps(minus_zero, firstLineLo); |
1060 | 0 | firstLineHi = andnot_ps(minus_zero, firstLineHi); |
1061 | 0 | secondLineLo = andnot_ps(minus_zero, secondLineLo); |
1062 | 0 | secondLineHi = andnot_ps(minus_zero, secondLineHi); |
1063 | |
|
1064 | 0 | auto firstLineEven = |
1065 | 0 | shuffle_ps(firstLineLo, firstLineHi, _MM_SHUFFLE(2, 0, 2, 0)); |
1066 | 0 | auto firstLineOdd = |
1067 | 0 | shuffle_ps(firstLineLo, firstLineHi, _MM_SHUFFLE(3, 1, 3, 1)); |
1068 | 0 | auto secondLineEven = |
1069 | 0 | shuffle_ps(secondLineLo, secondLineHi, _MM_SHUFFLE(2, 0, 2, 0)); |
1070 | 0 | auto secondLineOdd = |
1071 | 0 | shuffle_ps(secondLineLo, secondLineHi, _MM_SHUFFLE(3, 1, 3, 1)); |
1072 | | |
1073 | | // Compute the maximum of each RMS_FLOAT_ELTS value to RMS-average |
1074 | 0 | const auto maxV = max_ps(max_ps(firstLineEven, firstLineOdd), |
1075 | 0 | max_ps(secondLineEven, secondLineEven)); |
1076 | | |
1077 | | // Normalize each value by the maximum of the RMS_FLOAT_ELTS ones. |
1078 | | // This step is important to avoid that the square evaluates to infinity |
1079 | | // for sufficiently big input. |
1080 | 0 | auto invMax = div_ps(one, maxV); |
1081 | | // Deal with 0 being the maximum to correct division by zero |
1082 | | // note: comparing to -0 leads to identical results as to comparing with |
1083 | | // 0 |
1084 | 0 | invMax = andnot_ps(cmpeq_ps(maxV, minus_zero), invMax); |
1085 | |
|
1086 | 0 | firstLineEven = mul_ps(firstLineEven, invMax); |
1087 | 0 | firstLineOdd = mul_ps(firstLineOdd, invMax); |
1088 | 0 | secondLineEven = mul_ps(secondLineEven, invMax); |
1089 | 0 | secondLineOdd = mul_ps(secondLineOdd, invMax); |
1090 | | |
1091 | | // Compute squares |
1092 | 0 | firstLineEven = SQUARE_PS(firstLineEven); |
1093 | 0 | firstLineOdd = SQUARE_PS(firstLineOdd); |
1094 | 0 | secondLineEven = SQUARE_PS(secondLineEven); |
1095 | 0 | secondLineOdd = SQUARE_PS(secondLineOdd); |
1096 | |
|
1097 | 0 | const auto sumSquares = add_ps(add_ps(firstLineEven, firstLineOdd), |
1098 | 0 | add_ps(secondLineEven, secondLineOdd)); |
1099 | |
|
1100 | 0 | auto rms = mul_ps(maxV, sqrt_ps(mul_ps(sumSquares, zeroDot25))); |
1101 | | |
1102 | | // Deal with infinity being the maximum |
1103 | 0 | const auto maskIsInf = cmpeq_ps(maxV, infv); |
1104 | 0 | rms = or_ps(andnot_ps(maskIsInf, rms), and_ps(maskIsInf, infv)); |
1105 | |
|
1106 | 0 | rms = FIXUP_LANES(rms); |
1107 | |
|
1108 | 0 | storeu_ps(&pDstScanline[iDstPixel], rms); |
1109 | 0 | pSrcScanlineShifted += RMS_FLOAT_ELTS * 2; |
1110 | 0 | } |
1111 | |
|
1112 | 0 | zeroupper(); |
1113 | |
|
1114 | 0 | pSrcScanlineShiftedInOut = pSrcScanlineShifted; |
1115 | 0 | return iDstPixel; |
1116 | 0 | } |
1117 | | |
1118 | | /************************************************************************/ |
1119 | | /* AverageFloatSSE2() */ |
1120 | | /************************************************************************/ |
1121 | | |
1122 | | static int AverageFloatSSE2(int nDstXWidth, int nChunkXSize, |
1123 | | const float *&CPL_RESTRICT pSrcScanlineShiftedInOut, |
1124 | | float *CPL_RESTRICT pDstScanline) |
1125 | 0 | { |
1126 | | // Optimized implementation for average on Float32 by |
1127 | | // processing by group of 4 output pixels. |
1128 | 0 | const float *CPL_RESTRICT pSrcScanlineShifted = pSrcScanlineShiftedInOut; |
1129 | |
|
1130 | 0 | int iDstPixel = 0; |
1131 | 0 | const auto zeroDot25 = _mm_set1_ps(0.25f); |
1132 | |
|
1133 | 0 | for (; iDstPixel < nDstXWidth - 3; iDstPixel += 4) |
1134 | 0 | { |
1135 | | // Load 8 Float32 from each line |
1136 | 0 | const auto firstLineLo = _mm_loadu_ps(pSrcScanlineShifted); |
1137 | 0 | const auto firstLineHi = _mm_loadu_ps(pSrcScanlineShifted + 4); |
1138 | 0 | const auto secondLineLo = |
1139 | 0 | _mm_loadu_ps(pSrcScanlineShifted + nChunkXSize); |
1140 | 0 | const auto secondLineHi = |
1141 | 0 | _mm_loadu_ps(pSrcScanlineShifted + 4 + nChunkXSize); |
1142 | | |
1143 | | // Vertical addition |
1144 | 0 | const auto sumLo = _mm_add_ps(firstLineLo, secondLineLo); |
1145 | 0 | const auto sumHi = _mm_add_ps(firstLineHi, secondLineHi); |
1146 | | |
1147 | | // Horizontal addition |
1148 | 0 | const auto sum = sse2_hadd_ps(sumLo, sumHi); |
1149 | |
|
1150 | 0 | const auto average = _mm_mul_ps(sum, zeroDot25); |
1151 | |
|
1152 | 0 | _mm_storeu_ps(&pDstScanline[iDstPixel], average); |
1153 | 0 | pSrcScanlineShifted += 8; |
1154 | 0 | } |
1155 | |
|
1156 | 0 | pSrcScanlineShiftedInOut = pSrcScanlineShifted; |
1157 | 0 | return iDstPixel; |
1158 | 0 | } |
1159 | | |
1160 | | #endif |
1161 | | |
1162 | | /************************************************************************/ |
1163 | | /* GDALResampleChunk_AverageOrRMS() */ |
1164 | | /************************************************************************/ |
1165 | | |
1166 | | template <class T, class Tsum, GDALDataType eWrkDataType> |
1167 | | static CPLErr |
1168 | | GDALResampleChunk_AverageOrRMS_T(const GDALOverviewResampleArgs &args, |
1169 | | const T *pChunk, void **ppDstBuffer) |
1170 | 0 | { |
1171 | 0 | const double dfXRatioDstToSrc = args.dfXRatioDstToSrc; |
1172 | 0 | const double dfYRatioDstToSrc = args.dfYRatioDstToSrc; |
1173 | 0 | const double dfSrcXDelta = args.dfSrcXDelta; |
1174 | 0 | const double dfSrcYDelta = args.dfSrcYDelta; |
1175 | 0 | const GByte *pabyChunkNodataMask = args.pabyChunkNodataMask; |
1176 | 0 | const int nChunkXOff = args.nChunkXOff; |
1177 | 0 | const int nChunkYOff = args.nChunkYOff; |
1178 | 0 | const int nChunkXSize = args.nChunkXSize; |
1179 | 0 | const int nChunkYSize = args.nChunkYSize; |
1180 | 0 | const int nDstXOff = args.nDstXOff; |
1181 | 0 | const int nDstXOff2 = args.nDstXOff2; |
1182 | 0 | const int nDstYOff = args.nDstYOff; |
1183 | 0 | const int nDstYOff2 = args.nDstYOff2; |
1184 | 0 | const char *pszResampling = args.pszResampling; |
1185 | 0 | bool bHasNoData = args.bHasNoData; |
1186 | 0 | const double dfNoDataValue = args.dfNoDataValue; |
1187 | 0 | const GDALColorTable *poColorTable = args.poColorTable; |
1188 | 0 | const bool bPropagateNoData = args.bPropagateNoData; |
1189 | | |
1190 | | // AVERAGE_BIT2GRAYSCALE |
1191 | 0 | const bool bBit2Grayscale = |
1192 | 0 | CPL_TO_BOOL(STARTS_WITH_CI(pszResampling, "AVERAGE_BIT2G")); |
1193 | 0 | const bool bQuadraticMean = CPL_TO_BOOL(EQUAL(pszResampling, "RMS")); |
1194 | 0 | if (bBit2Grayscale) |
1195 | 0 | poColorTable = nullptr; |
1196 | |
|
1197 | 0 | T tNoDataValue; |
1198 | 0 | if (!bHasNoData) |
1199 | 0 | tNoDataValue = 0; |
1200 | 0 | else |
1201 | 0 | tNoDataValue = static_cast<T>(dfNoDataValue); |
1202 | 0 | const T tReplacementVal = |
1203 | 0 | bHasNoData ? static_cast<T>(GDALGetNoDataReplacementValue( |
1204 | 0 | args.eOvrDataType, dfNoDataValue)) |
1205 | 0 | : 0; |
1206 | |
|
1207 | 0 | int nChunkRightXOff = nChunkXOff + nChunkXSize; |
1208 | 0 | int nChunkBottomYOff = nChunkYOff + nChunkYSize; |
1209 | 0 | int nDstXWidth = nDstXOff2 - nDstXOff; |
1210 | | |
1211 | | /* -------------------------------------------------------------------- */ |
1212 | | /* Allocate buffers. */ |
1213 | | /* -------------------------------------------------------------------- */ |
1214 | 0 | *ppDstBuffer = static_cast<T *>( |
1215 | 0 | VSI_MALLOC3_VERBOSE(nDstXWidth, nDstYOff2 - nDstYOff, |
1216 | 0 | GDALGetDataTypeSizeBytes(eWrkDataType))); |
1217 | 0 | if (*ppDstBuffer == nullptr) |
1218 | 0 | { |
1219 | 0 | return CE_Failure; |
1220 | 0 | } |
1221 | 0 | T *const pDstBuffer = static_cast<T *>(*ppDstBuffer); |
1222 | |
|
1223 | 0 | struct PrecomputedXValue |
1224 | 0 | { |
1225 | 0 | int nLeftXOffShifted; |
1226 | 0 | int nRightXOffShifted; |
1227 | 0 | double dfLeftWeight; |
1228 | 0 | double dfRightWeight; |
1229 | 0 | double dfTotalWeightFullLine; |
1230 | 0 | }; |
1231 | |
|
1232 | 0 | PrecomputedXValue *pasSrcX = static_cast<PrecomputedXValue *>( |
1233 | 0 | VSI_MALLOC2_VERBOSE(nDstXWidth, sizeof(PrecomputedXValue))); |
1234 | |
|
1235 | 0 | if (pasSrcX == nullptr) |
1236 | 0 | { |
1237 | 0 | return CE_Failure; |
1238 | 0 | } |
1239 | | |
1240 | 0 | int nTransparentIdx = -1; |
1241 | 0 | std::vector<GDALColorEntry> colorEntries; |
1242 | 0 | if (poColorTable) |
1243 | 0 | colorEntries = ReadColorTable(*poColorTable, nTransparentIdx); |
1244 | | |
1245 | | // Force c4 of nodata entry to 0 so that GDALFindBestEntry() identifies |
1246 | | // it as nodata value |
1247 | 0 | if (bHasNoData && dfNoDataValue >= 0.0f && |
1248 | 0 | tNoDataValue < colorEntries.size()) |
1249 | 0 | colorEntries[static_cast<int>(tNoDataValue)].c4 = 0; |
1250 | | |
1251 | | // Or if we have no explicit nodata, but a color table entry that is |
1252 | | // transparent, consider it as the nodata value |
1253 | 0 | else if (!bHasNoData && nTransparentIdx >= 0) |
1254 | 0 | { |
1255 | 0 | bHasNoData = true; |
1256 | 0 | tNoDataValue = static_cast<T>(nTransparentIdx); |
1257 | 0 | } |
1258 | | |
1259 | | /* ==================================================================== */ |
1260 | | /* Precompute inner loop constants. */ |
1261 | | /* ==================================================================== */ |
1262 | 0 | bool bSrcXSpacingIsTwo = true; |
1263 | 0 | int nLastSrcXOff2 = -1; |
1264 | 0 | for (int iDstPixel = nDstXOff; iDstPixel < nDstXOff2; ++iDstPixel) |
1265 | 0 | { |
1266 | 0 | double dfSrcXOff = dfSrcXDelta + iDstPixel * dfXRatioDstToSrc; |
1267 | | // Apply some epsilon to avoid numerical precision issues |
1268 | 0 | int nSrcXOff = static_cast<int>(dfSrcXOff + 1e-8); |
1269 | 0 | double dfSrcXOff2 = dfSrcXDelta + (iDstPixel + 1) * dfXRatioDstToSrc; |
1270 | 0 | int nSrcXOff2 = static_cast<int>(ceil(dfSrcXOff2 - 1e-8)); |
1271 | |
|
1272 | 0 | if (nSrcXOff < nChunkXOff) |
1273 | 0 | nSrcXOff = nChunkXOff; |
1274 | 0 | if (nSrcXOff2 == nSrcXOff) |
1275 | 0 | nSrcXOff2++; |
1276 | 0 | if (nSrcXOff2 > nChunkRightXOff) |
1277 | 0 | nSrcXOff2 = nChunkRightXOff; |
1278 | |
|
1279 | 0 | pasSrcX[iDstPixel - nDstXOff].nLeftXOffShifted = nSrcXOff - nChunkXOff; |
1280 | 0 | pasSrcX[iDstPixel - nDstXOff].nRightXOffShifted = |
1281 | 0 | nSrcXOff2 - nChunkXOff; |
1282 | 0 | pasSrcX[iDstPixel - nDstXOff].dfLeftWeight = |
1283 | 0 | (nSrcXOff2 == nSrcXOff + 1) ? 1.0 : 1 - (dfSrcXOff - nSrcXOff); |
1284 | 0 | pasSrcX[iDstPixel - nDstXOff].dfRightWeight = |
1285 | 0 | 1 - (nSrcXOff2 - dfSrcXOff2); |
1286 | 0 | pasSrcX[iDstPixel - nDstXOff].dfTotalWeightFullLine = |
1287 | 0 | pasSrcX[iDstPixel - nDstXOff].dfLeftWeight; |
1288 | 0 | if (nSrcXOff + 1 < nSrcXOff2) |
1289 | 0 | { |
1290 | 0 | pasSrcX[iDstPixel - nDstXOff].dfTotalWeightFullLine += |
1291 | 0 | nSrcXOff2 - nSrcXOff - 2; |
1292 | 0 | pasSrcX[iDstPixel - nDstXOff].dfTotalWeightFullLine += |
1293 | 0 | pasSrcX[iDstPixel - nDstXOff].dfRightWeight; |
1294 | 0 | } |
1295 | |
|
1296 | 0 | if (nSrcXOff2 - nSrcXOff != 2 || |
1297 | 0 | (nLastSrcXOff2 >= 0 && nLastSrcXOff2 != nSrcXOff)) |
1298 | 0 | { |
1299 | 0 | bSrcXSpacingIsTwo = false; |
1300 | 0 | } |
1301 | 0 | nLastSrcXOff2 = nSrcXOff2; |
1302 | 0 | } |
1303 | | |
1304 | | /* ==================================================================== */ |
1305 | | /* Loop over destination scanlines. */ |
1306 | | /* ==================================================================== */ |
1307 | 0 | for (int iDstLine = nDstYOff; iDstLine < nDstYOff2; ++iDstLine) |
1308 | 0 | { |
1309 | 0 | double dfSrcYOff = dfSrcYDelta + iDstLine * dfYRatioDstToSrc; |
1310 | 0 | int nSrcYOff = static_cast<int>(dfSrcYOff + 1e-8); |
1311 | 0 | if (nSrcYOff < nChunkYOff) |
1312 | 0 | nSrcYOff = nChunkYOff; |
1313 | |
|
1314 | 0 | double dfSrcYOff2 = dfSrcYDelta + (iDstLine + 1) * dfYRatioDstToSrc; |
1315 | 0 | int nSrcYOff2 = static_cast<int>(ceil(dfSrcYOff2 - 1e-8)); |
1316 | 0 | if (nSrcYOff2 == nSrcYOff) |
1317 | 0 | ++nSrcYOff2; |
1318 | 0 | if (nSrcYOff2 > nChunkBottomYOff) |
1319 | 0 | nSrcYOff2 = nChunkBottomYOff; |
1320 | |
|
1321 | 0 | T *const pDstScanline = pDstBuffer + (iDstLine - nDstYOff) * nDstXWidth; |
1322 | | |
1323 | | /* -------------------------------------------------------------------- |
1324 | | */ |
1325 | | /* Loop over destination pixels */ |
1326 | | /* -------------------------------------------------------------------- |
1327 | | */ |
1328 | 0 | if (poColorTable == nullptr) |
1329 | 0 | { |
1330 | 0 | if (bSrcXSpacingIsTwo && nSrcYOff2 == nSrcYOff + 2 && |
1331 | 0 | pabyChunkNodataMask == nullptr) |
1332 | 0 | { |
1333 | | if constexpr (eWrkDataType == GDT_Byte || |
1334 | | eWrkDataType == GDT_UInt16) |
1335 | 0 | { |
1336 | | // Optimized case : no nodata, overview by a factor of 2 and |
1337 | | // regular x and y src spacing. |
1338 | 0 | const T *pSrcScanlineShifted = |
1339 | 0 | pChunk + pasSrcX[0].nLeftXOffShifted + |
1340 | 0 | static_cast<GPtrDiff_t>(nSrcYOff - nChunkYOff) * |
1341 | 0 | nChunkXSize; |
1342 | 0 | int iDstPixel = 0; |
1343 | 0 | #ifdef USE_SSE2 |
1344 | | if constexpr (eWrkDataType == GDT_Byte) |
1345 | 0 | { |
1346 | 0 | if (bQuadraticMean) |
1347 | 0 | { |
1348 | 0 | iDstPixel = QuadraticMeanByteSSE2OrAVX2( |
1349 | 0 | nDstXWidth, nChunkXSize, pSrcScanlineShifted, |
1350 | 0 | pDstScanline); |
1351 | 0 | } |
1352 | 0 | else |
1353 | 0 | { |
1354 | 0 | iDstPixel = AverageByteSSE2OrAVX2( |
1355 | 0 | nDstXWidth, nChunkXSize, pSrcScanlineShifted, |
1356 | 0 | pDstScanline); |
1357 | 0 | } |
1358 | | } |
1359 | | else |
1360 | 0 | { |
1361 | 0 | static_assert(eWrkDataType == GDT_UInt16); |
1362 | 0 | if (bQuadraticMean) |
1363 | 0 | { |
1364 | 0 | iDstPixel = QuadraticMeanUInt16SSE2( |
1365 | 0 | nDstXWidth, nChunkXSize, pSrcScanlineShifted, |
1366 | 0 | pDstScanline); |
1367 | 0 | } |
1368 | 0 | else |
1369 | 0 | { |
1370 | 0 | iDstPixel = AverageUInt16SSE2( |
1371 | 0 | nDstXWidth, nChunkXSize, pSrcScanlineShifted, |
1372 | 0 | pDstScanline); |
1373 | 0 | } |
1374 | 0 | } |
1375 | 0 | #endif |
1376 | 0 | for (; iDstPixel < nDstXWidth; ++iDstPixel) |
1377 | 0 | { |
1378 | 0 | Tsum nTotal = 0; |
1379 | 0 | T nVal; |
1380 | 0 | if (bQuadraticMean) |
1381 | 0 | nTotal = |
1382 | 0 | SQUARE<Tsum>(pSrcScanlineShifted[0]) + |
1383 | 0 | SQUARE<Tsum>(pSrcScanlineShifted[1]) + |
1384 | 0 | SQUARE<Tsum>(pSrcScanlineShifted[nChunkXSize]) + |
1385 | 0 | SQUARE<Tsum>( |
1386 | 0 | pSrcScanlineShifted[1 + nChunkXSize]); |
1387 | 0 | else |
1388 | 0 | nTotal = pSrcScanlineShifted[0] + |
1389 | 0 | pSrcScanlineShifted[1] + |
1390 | 0 | pSrcScanlineShifted[nChunkXSize] + |
1391 | 0 | pSrcScanlineShifted[1 + nChunkXSize]; |
1392 | |
|
1393 | 0 | constexpr int nTotalWeight = 4; |
1394 | 0 | if (bQuadraticMean) |
1395 | 0 | nVal = ComputeIntegerRMS_4values<T>(nTotal); |
1396 | 0 | else |
1397 | 0 | nVal = static_cast<T>((nTotal + nTotalWeight / 2) / |
1398 | 0 | nTotalWeight); |
1399 | | |
1400 | | // No need to compare nVal against tNoDataValue as we |
1401 | | // are in a case where pabyChunkNodataMask == nullptr |
1402 | | // implies the absence of nodata value. |
1403 | 0 | pDstScanline[iDstPixel] = nVal; |
1404 | 0 | pSrcScanlineShifted += 2; |
1405 | 0 | } |
1406 | | } |
1407 | | else |
1408 | 0 | { |
1409 | 0 | static_assert(eWrkDataType == GDT_Float32 || |
1410 | 0 | eWrkDataType == GDT_Float64); |
1411 | 0 | const T *pSrcScanlineShifted = |
1412 | 0 | pChunk + pasSrcX[0].nLeftXOffShifted + |
1413 | 0 | static_cast<GPtrDiff_t>(nSrcYOff - nChunkYOff) * |
1414 | 0 | nChunkXSize; |
1415 | 0 | int iDstPixel = 0; |
1416 | 0 | #ifdef USE_SSE2 |
1417 | | if constexpr (eWrkDataType == GDT_Float32) |
1418 | 0 | { |
1419 | 0 | static_assert(std::is_same_v<T, float>); |
1420 | 0 | if (bQuadraticMean) |
1421 | 0 | { |
1422 | 0 | iDstPixel = QuadraticMeanFloatSSE2( |
1423 | 0 | nDstXWidth, nChunkXSize, pSrcScanlineShifted, |
1424 | 0 | pDstScanline); |
1425 | 0 | } |
1426 | 0 | else |
1427 | 0 | { |
1428 | 0 | iDstPixel = AverageFloatSSE2( |
1429 | 0 | nDstXWidth, nChunkXSize, pSrcScanlineShifted, |
1430 | 0 | pDstScanline); |
1431 | 0 | } |
1432 | 0 | } |
1433 | 0 | #endif |
1434 | |
|
1435 | 0 | for (; iDstPixel < nDstXWidth; ++iDstPixel) |
1436 | 0 | { |
1437 | 0 | T nVal; |
1438 | 0 | if (bQuadraticMean) |
1439 | 0 | { |
1440 | | // Cast to double to avoid overflows |
1441 | | // (using std::hypot() is much slower) |
1442 | 0 | nVal = static_cast<T>(std::sqrt( |
1443 | 0 | 0.25 * |
1444 | 0 | (SQUARE<double>(pSrcScanlineShifted[0]) + |
1445 | 0 | SQUARE<double>(pSrcScanlineShifted[1]) + |
1446 | 0 | SQUARE<double>( |
1447 | 0 | pSrcScanlineShifted[nChunkXSize]) + |
1448 | 0 | SQUARE<double>( |
1449 | 0 | pSrcScanlineShifted[1 + nChunkXSize])))); |
1450 | 0 | } |
1451 | 0 | else |
1452 | 0 | { |
1453 | 0 | nVal = static_cast<T>( |
1454 | 0 | 0.25f * (pSrcScanlineShifted[0] + |
1455 | 0 | pSrcScanlineShifted[1] + |
1456 | 0 | pSrcScanlineShifted[nChunkXSize] + |
1457 | 0 | pSrcScanlineShifted[1 + nChunkXSize])); |
1458 | 0 | } |
1459 | | |
1460 | | // No need to compare nVal against tNoDataValue as we |
1461 | | // are in a case where pabyChunkNodataMask == nullptr |
1462 | | // implies the absence of nodata value. |
1463 | 0 | pDstScanline[iDstPixel] = nVal; |
1464 | 0 | pSrcScanlineShifted += 2; |
1465 | 0 | } |
1466 | 0 | } |
1467 | 0 | } |
1468 | 0 | else |
1469 | 0 | { |
1470 | 0 | const double dfBottomWeight = |
1471 | 0 | (nSrcYOff + 1 == nSrcYOff2) ? 1.0 |
1472 | 0 | : 1.0 - (dfSrcYOff - nSrcYOff); |
1473 | 0 | const double dfTopWeight = 1.0 - (nSrcYOff2 - dfSrcYOff2); |
1474 | 0 | nSrcYOff -= nChunkYOff; |
1475 | 0 | nSrcYOff2 -= nChunkYOff; |
1476 | |
|
1477 | 0 | double dfTotalWeightFullColumn = dfBottomWeight; |
1478 | 0 | if (nSrcYOff + 1 < nSrcYOff2) |
1479 | 0 | { |
1480 | 0 | dfTotalWeightFullColumn += nSrcYOff2 - nSrcYOff - 2; |
1481 | 0 | dfTotalWeightFullColumn += dfTopWeight; |
1482 | 0 | } |
1483 | |
|
1484 | 0 | for (int iDstPixel = 0; iDstPixel < nDstXWidth; ++iDstPixel) |
1485 | 0 | { |
1486 | 0 | const int nSrcXOff = pasSrcX[iDstPixel].nLeftXOffShifted; |
1487 | 0 | const int nSrcXOff2 = pasSrcX[iDstPixel].nRightXOffShifted; |
1488 | |
|
1489 | 0 | double dfTotal = 0; |
1490 | 0 | double dfTotalWeight = 0; |
1491 | 0 | if (pabyChunkNodataMask == nullptr) |
1492 | 0 | { |
1493 | 0 | auto pChunkShifted = |
1494 | 0 | pChunk + |
1495 | 0 | static_cast<GPtrDiff_t>(nSrcYOff) * nChunkXSize; |
1496 | 0 | int nCounterY = nSrcYOff2 - nSrcYOff - 1; |
1497 | 0 | double dfWeightY = dfBottomWeight; |
1498 | 0 | while (true) |
1499 | 0 | { |
1500 | 0 | double dfTotalLine; |
1501 | 0 | if (bQuadraticMean) |
1502 | 0 | { |
1503 | | // Left pixel |
1504 | 0 | { |
1505 | 0 | const T val = pChunkShifted[nSrcXOff]; |
1506 | 0 | dfTotalLine = |
1507 | 0 | SQUARE<double>(val) * |
1508 | 0 | pasSrcX[iDstPixel].dfLeftWeight; |
1509 | 0 | } |
1510 | |
|
1511 | 0 | if (nSrcXOff + 1 < nSrcXOff2) |
1512 | 0 | { |
1513 | | // Middle pixels |
1514 | 0 | for (int iX = nSrcXOff + 1; |
1515 | 0 | iX + 1 < nSrcXOff2; ++iX) |
1516 | 0 | { |
1517 | 0 | const T val = pChunkShifted[iX]; |
1518 | 0 | dfTotalLine += SQUARE<double>(val); |
1519 | 0 | } |
1520 | | |
1521 | | // Right pixel |
1522 | 0 | { |
1523 | 0 | const T val = |
1524 | 0 | pChunkShifted[nSrcXOff2 - 1]; |
1525 | 0 | dfTotalLine += |
1526 | 0 | SQUARE<double>(val) * |
1527 | 0 | pasSrcX[iDstPixel].dfRightWeight; |
1528 | 0 | } |
1529 | 0 | } |
1530 | 0 | } |
1531 | 0 | else |
1532 | 0 | { |
1533 | | // Left pixel |
1534 | 0 | { |
1535 | 0 | const T val = pChunkShifted[nSrcXOff]; |
1536 | 0 | dfTotalLine = |
1537 | 0 | val * pasSrcX[iDstPixel].dfLeftWeight; |
1538 | 0 | } |
1539 | |
|
1540 | 0 | if (nSrcXOff + 1 < nSrcXOff2) |
1541 | 0 | { |
1542 | | // Middle pixels |
1543 | 0 | for (int iX = nSrcXOff + 1; |
1544 | 0 | iX + 1 < nSrcXOff2; ++iX) |
1545 | 0 | { |
1546 | 0 | const T val = pChunkShifted[iX]; |
1547 | 0 | dfTotalLine += val; |
1548 | 0 | } |
1549 | | |
1550 | | // Right pixel |
1551 | 0 | { |
1552 | 0 | const T val = |
1553 | 0 | pChunkShifted[nSrcXOff2 - 1]; |
1554 | 0 | dfTotalLine += |
1555 | 0 | val * |
1556 | 0 | pasSrcX[iDstPixel].dfRightWeight; |
1557 | 0 | } |
1558 | 0 | } |
1559 | 0 | } |
1560 | |
|
1561 | 0 | dfTotal += dfTotalLine * dfWeightY; |
1562 | 0 | --nCounterY; |
1563 | 0 | if (nCounterY < 0) |
1564 | 0 | break; |
1565 | 0 | pChunkShifted += nChunkXSize; |
1566 | 0 | dfWeightY = (nCounterY == 0) ? dfTopWeight : 1.0; |
1567 | 0 | } |
1568 | |
|
1569 | 0 | dfTotalWeight = |
1570 | 0 | pasSrcX[iDstPixel].dfTotalWeightFullLine * |
1571 | 0 | dfTotalWeightFullColumn; |
1572 | 0 | } |
1573 | 0 | else |
1574 | 0 | { |
1575 | 0 | GPtrDiff_t nCount = 0; |
1576 | 0 | for (int iY = nSrcYOff; iY < nSrcYOff2; ++iY) |
1577 | 0 | { |
1578 | 0 | const auto pChunkShifted = |
1579 | 0 | pChunk + |
1580 | 0 | static_cast<GPtrDiff_t>(iY) * nChunkXSize; |
1581 | |
|
1582 | 0 | double dfTotalLine = 0; |
1583 | 0 | double dfTotalWeightLine = 0; |
1584 | | // Left pixel |
1585 | 0 | { |
1586 | 0 | const int iX = nSrcXOff; |
1587 | 0 | const T val = pChunkShifted[iX]; |
1588 | 0 | if (pabyChunkNodataMask[iX + iY * nChunkXSize]) |
1589 | 0 | { |
1590 | 0 | nCount++; |
1591 | 0 | const double dfWeightX = |
1592 | 0 | pasSrcX[iDstPixel].dfLeftWeight; |
1593 | 0 | dfTotalWeightLine = dfWeightX; |
1594 | 0 | if (bQuadraticMean) |
1595 | 0 | dfTotalLine = |
1596 | 0 | SQUARE<double>(val) * dfWeightX; |
1597 | 0 | else |
1598 | 0 | dfTotalLine = val * dfWeightX; |
1599 | 0 | } |
1600 | 0 | } |
1601 | |
|
1602 | 0 | if (nSrcXOff + 1 < nSrcXOff2) |
1603 | 0 | { |
1604 | | // Middle pixels |
1605 | 0 | for (int iX = nSrcXOff + 1; iX + 1 < nSrcXOff2; |
1606 | 0 | ++iX) |
1607 | 0 | { |
1608 | 0 | const T val = pChunkShifted[iX]; |
1609 | 0 | if (pabyChunkNodataMask[iX + |
1610 | 0 | iY * nChunkXSize]) |
1611 | 0 | { |
1612 | 0 | nCount++; |
1613 | 0 | dfTotalWeightLine += 1; |
1614 | 0 | if (bQuadraticMean) |
1615 | 0 | dfTotalLine += SQUARE<double>(val); |
1616 | 0 | else |
1617 | 0 | dfTotalLine += val; |
1618 | 0 | } |
1619 | 0 | } |
1620 | | |
1621 | | // Right pixel |
1622 | 0 | { |
1623 | 0 | const int iX = nSrcXOff2 - 1; |
1624 | 0 | const T val = pChunkShifted[iX]; |
1625 | 0 | if (pabyChunkNodataMask[iX + |
1626 | 0 | iY * nChunkXSize]) |
1627 | 0 | { |
1628 | 0 | nCount++; |
1629 | 0 | const double dfWeightX = |
1630 | 0 | pasSrcX[iDstPixel].dfRightWeight; |
1631 | 0 | dfTotalWeightLine += dfWeightX; |
1632 | 0 | if (bQuadraticMean) |
1633 | 0 | dfTotalLine += |
1634 | 0 | SQUARE<double>(val) * dfWeightX; |
1635 | 0 | else |
1636 | 0 | dfTotalLine += val * dfWeightX; |
1637 | 0 | } |
1638 | 0 | } |
1639 | 0 | } |
1640 | |
|
1641 | 0 | const double dfWeightY = |
1642 | 0 | (iY == nSrcYOff) ? dfBottomWeight |
1643 | 0 | : (iY + 1 == nSrcYOff2) ? dfTopWeight |
1644 | 0 | : 1.0; |
1645 | 0 | dfTotal += dfTotalLine * dfWeightY; |
1646 | 0 | dfTotalWeight += dfTotalWeightLine * dfWeightY; |
1647 | 0 | } |
1648 | |
|
1649 | 0 | if (nCount == 0 || |
1650 | 0 | (bPropagateNoData && |
1651 | 0 | nCount < |
1652 | 0 | static_cast<GPtrDiff_t>(nSrcYOff2 - nSrcYOff) * |
1653 | 0 | (nSrcXOff2 - nSrcXOff))) |
1654 | 0 | { |
1655 | 0 | pDstScanline[iDstPixel] = tNoDataValue; |
1656 | 0 | continue; |
1657 | 0 | } |
1658 | 0 | } |
1659 | | if constexpr (eWrkDataType == GDT_Byte) |
1660 | 0 | { |
1661 | 0 | T nVal; |
1662 | 0 | if (bQuadraticMean) |
1663 | 0 | nVal = ComputeIntegerRMS<T, int>(dfTotal, |
1664 | 0 | dfTotalWeight); |
1665 | 0 | else |
1666 | 0 | nVal = |
1667 | 0 | static_cast<T>(dfTotal / dfTotalWeight + 0.5); |
1668 | 0 | if (bHasNoData && nVal == tNoDataValue) |
1669 | 0 | nVal = tReplacementVal; |
1670 | 0 | pDstScanline[iDstPixel] = nVal; |
1671 | | } |
1672 | | else if constexpr (eWrkDataType == GDT_UInt16) |
1673 | 0 | { |
1674 | 0 | T nVal; |
1675 | 0 | if (bQuadraticMean) |
1676 | 0 | nVal = ComputeIntegerRMS<T, uint64_t>( |
1677 | 0 | dfTotal, dfTotalWeight); |
1678 | 0 | else |
1679 | 0 | nVal = |
1680 | 0 | static_cast<T>(dfTotal / dfTotalWeight + 0.5); |
1681 | 0 | if (bHasNoData && nVal == tNoDataValue) |
1682 | 0 | nVal = tReplacementVal; |
1683 | 0 | pDstScanline[iDstPixel] = nVal; |
1684 | | } |
1685 | | else |
1686 | 0 | { |
1687 | 0 | T nVal; |
1688 | 0 | if (bQuadraticMean) |
1689 | 0 | nVal = |
1690 | 0 | static_cast<T>(sqrt(dfTotal / dfTotalWeight)); |
1691 | 0 | else |
1692 | 0 | nVal = static_cast<T>(dfTotal / dfTotalWeight); |
1693 | 0 | if (bHasNoData && nVal == tNoDataValue) |
1694 | 0 | nVal = tReplacementVal; |
1695 | 0 | pDstScanline[iDstPixel] = nVal; |
1696 | 0 | } |
1697 | 0 | } |
1698 | 0 | } |
1699 | 0 | } |
1700 | 0 | else |
1701 | 0 | { |
1702 | 0 | nSrcYOff -= nChunkYOff; |
1703 | 0 | nSrcYOff2 -= nChunkYOff; |
1704 | |
|
1705 | 0 | for (int iDstPixel = 0; iDstPixel < nDstXWidth; ++iDstPixel) |
1706 | 0 | { |
1707 | 0 | const int nSrcXOff = pasSrcX[iDstPixel].nLeftXOffShifted; |
1708 | 0 | const int nSrcXOff2 = pasSrcX[iDstPixel].nRightXOffShifted; |
1709 | |
|
1710 | 0 | GPtrDiff_t nTotalR = 0; |
1711 | 0 | GPtrDiff_t nTotalG = 0; |
1712 | 0 | GPtrDiff_t nTotalB = 0; |
1713 | 0 | GPtrDiff_t nCount = 0; |
1714 | |
|
1715 | 0 | for (int iY = nSrcYOff; iY < nSrcYOff2; ++iY) |
1716 | 0 | { |
1717 | 0 | for (int iX = nSrcXOff; iX < nSrcXOff2; ++iX) |
1718 | 0 | { |
1719 | 0 | const T val = pChunk[iX + static_cast<GPtrDiff_t>(iY) * |
1720 | 0 | nChunkXSize]; |
1721 | | // cppcheck-suppress unsignedLessThanZero |
1722 | 0 | if (val < 0 || val >= colorEntries.size()) |
1723 | 0 | continue; |
1724 | 0 | size_t idx = static_cast<size_t>(val); |
1725 | 0 | const auto &entry = colorEntries[idx]; |
1726 | 0 | if (entry.c4) |
1727 | 0 | { |
1728 | 0 | if (bQuadraticMean) |
1729 | 0 | { |
1730 | 0 | nTotalR += SQUARE<int>(entry.c1); |
1731 | 0 | nTotalG += SQUARE<int>(entry.c2); |
1732 | 0 | nTotalB += SQUARE<int>(entry.c3); |
1733 | 0 | ++nCount; |
1734 | 0 | } |
1735 | 0 | else |
1736 | 0 | { |
1737 | 0 | nTotalR += entry.c1; |
1738 | 0 | nTotalG += entry.c2; |
1739 | 0 | nTotalB += entry.c3; |
1740 | 0 | ++nCount; |
1741 | 0 | } |
1742 | 0 | } |
1743 | 0 | } |
1744 | 0 | } |
1745 | |
|
1746 | 0 | if (nCount == 0 || |
1747 | 0 | (bPropagateNoData && |
1748 | 0 | nCount < static_cast<GPtrDiff_t>(nSrcYOff2 - nSrcYOff) * |
1749 | 0 | (nSrcXOff2 - nSrcXOff))) |
1750 | 0 | { |
1751 | 0 | pDstScanline[iDstPixel] = tNoDataValue; |
1752 | 0 | } |
1753 | 0 | else |
1754 | 0 | { |
1755 | 0 | GDALColorEntry color; |
1756 | 0 | if (bQuadraticMean) |
1757 | 0 | { |
1758 | 0 | color.c1 = |
1759 | 0 | static_cast<short>(sqrt(nTotalR / nCount) + 0.5); |
1760 | 0 | color.c2 = |
1761 | 0 | static_cast<short>(sqrt(nTotalG / nCount) + 0.5); |
1762 | 0 | color.c3 = |
1763 | 0 | static_cast<short>(sqrt(nTotalB / nCount) + 0.5); |
1764 | 0 | } |
1765 | 0 | else |
1766 | 0 | { |
1767 | 0 | color.c1 = |
1768 | 0 | static_cast<short>((nTotalR + nCount / 2) / nCount); |
1769 | 0 | color.c2 = |
1770 | 0 | static_cast<short>((nTotalG + nCount / 2) / nCount); |
1771 | 0 | color.c3 = |
1772 | 0 | static_cast<short>((nTotalB + nCount / 2) / nCount); |
1773 | 0 | } |
1774 | 0 | pDstScanline[iDstPixel] = |
1775 | 0 | static_cast<T>(BestColorEntry(colorEntries, color)); |
1776 | 0 | } |
1777 | 0 | } |
1778 | 0 | } |
1779 | 0 | } |
1780 | |
|
1781 | 0 | CPLFree(pasSrcX); |
1782 | |
|
1783 | 0 | return CE_None; |
1784 | 0 | } Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_AverageOrRMS_T<unsigned char, int, (GDALDataType)1>(GDALOverviewResampleArgs const&, unsigned char const*, void**) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_AverageOrRMS_T<unsigned short, double, (GDALDataType)2>(GDALOverviewResampleArgs const&, unsigned short const*, void**) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_AverageOrRMS_T<unsigned short, unsigned int, (GDALDataType)2>(GDALOverviewResampleArgs const&, unsigned short const*, void**) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_AverageOrRMS_T<float, double, (GDALDataType)6>(GDALOverviewResampleArgs const&, float const*, void**) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_AverageOrRMS_T<double, double, (GDALDataType)7>(GDALOverviewResampleArgs const&, double const*, void**) |
1785 | | |
1786 | | static CPLErr |
1787 | | GDALResampleChunk_AverageOrRMS(const GDALOverviewResampleArgs &args, |
1788 | | const void *pChunk, void **ppDstBuffer, |
1789 | | GDALDataType *peDstBufferDataType) |
1790 | 0 | { |
1791 | 0 | *peDstBufferDataType = args.eWrkDataType; |
1792 | 0 | switch (args.eWrkDataType) |
1793 | 0 | { |
1794 | 0 | case GDT_Byte: |
1795 | 0 | { |
1796 | 0 | return GDALResampleChunk_AverageOrRMS_T<GByte, int, GDT_Byte>( |
1797 | 0 | args, static_cast<const GByte *>(pChunk), ppDstBuffer); |
1798 | 0 | } |
1799 | | |
1800 | 0 | case GDT_UInt16: |
1801 | 0 | { |
1802 | 0 | if (EQUAL(args.pszResampling, "RMS")) |
1803 | 0 | { |
1804 | | // Use double as accumulation type, because UInt32 could overflow |
1805 | 0 | return GDALResampleChunk_AverageOrRMS_T<GUInt16, double, |
1806 | 0 | GDT_UInt16>( |
1807 | 0 | args, static_cast<const GUInt16 *>(pChunk), ppDstBuffer); |
1808 | 0 | } |
1809 | 0 | else |
1810 | 0 | { |
1811 | 0 | return GDALResampleChunk_AverageOrRMS_T<GUInt16, GUInt32, |
1812 | 0 | GDT_UInt16>( |
1813 | 0 | args, static_cast<const GUInt16 *>(pChunk), ppDstBuffer); |
1814 | 0 | } |
1815 | 0 | } |
1816 | | |
1817 | 0 | case GDT_Float32: |
1818 | 0 | { |
1819 | 0 | return GDALResampleChunk_AverageOrRMS_T<float, double, GDT_Float32>( |
1820 | 0 | args, static_cast<const float *>(pChunk), ppDstBuffer); |
1821 | 0 | } |
1822 | | |
1823 | 0 | case GDT_Float64: |
1824 | 0 | { |
1825 | 0 | return GDALResampleChunk_AverageOrRMS_T<double, double, |
1826 | 0 | GDT_Float64>( |
1827 | 0 | args, static_cast<const double *>(pChunk), ppDstBuffer); |
1828 | 0 | } |
1829 | | |
1830 | 0 | default: |
1831 | 0 | break; |
1832 | 0 | } |
1833 | | |
1834 | 0 | CPLAssert(false); |
1835 | 0 | return CE_Failure; |
1836 | 0 | } |
1837 | | |
1838 | | /************************************************************************/ |
1839 | | /* GDALResampleChunk_Gauss() */ |
1840 | | /************************************************************************/ |
1841 | | |
1842 | | static CPLErr GDALResampleChunk_Gauss(const GDALOverviewResampleArgs &args, |
1843 | | const void *pChunk, void **ppDstBuffer, |
1844 | | GDALDataType *peDstBufferDataType) |
1845 | | |
1846 | 0 | { |
1847 | 0 | const double dfXRatioDstToSrc = args.dfXRatioDstToSrc; |
1848 | 0 | const double dfYRatioDstToSrc = args.dfYRatioDstToSrc; |
1849 | 0 | const GByte *pabyChunkNodataMask = args.pabyChunkNodataMask; |
1850 | 0 | const int nChunkXOff = args.nChunkXOff; |
1851 | 0 | const int nChunkXSize = args.nChunkXSize; |
1852 | 0 | const int nChunkYOff = args.nChunkYOff; |
1853 | 0 | const int nChunkYSize = args.nChunkYSize; |
1854 | 0 | const int nDstXOff = args.nDstXOff; |
1855 | 0 | const int nDstXOff2 = args.nDstXOff2; |
1856 | 0 | const int nDstYOff = args.nDstYOff; |
1857 | 0 | const int nDstYOff2 = args.nDstYOff2; |
1858 | 0 | const bool bHasNoData = args.bHasNoData; |
1859 | 0 | double dfNoDataValue = args.dfNoDataValue; |
1860 | 0 | const GDALColorTable *poColorTable = args.poColorTable; |
1861 | |
|
1862 | 0 | const double *const padfChunk = static_cast<const double *>(pChunk); |
1863 | |
|
1864 | 0 | *ppDstBuffer = |
1865 | 0 | VSI_MALLOC3_VERBOSE(nDstXOff2 - nDstXOff, nDstYOff2 - nDstYOff, |
1866 | 0 | GDALGetDataTypeSizeBytes(GDT_Float64)); |
1867 | 0 | if (*ppDstBuffer == nullptr) |
1868 | 0 | { |
1869 | 0 | return CE_Failure; |
1870 | 0 | } |
1871 | 0 | *peDstBufferDataType = GDT_Float64; |
1872 | 0 | double *const padfDstBuffer = static_cast<double *>(*ppDstBuffer); |
1873 | | |
1874 | | /* -------------------------------------------------------------------- */ |
1875 | | /* Create the filter kernel and allocate scanline buffer. */ |
1876 | | /* -------------------------------------------------------------------- */ |
1877 | 0 | int nGaussMatrixDim = 3; |
1878 | 0 | const int *panGaussMatrix; |
1879 | 0 | constexpr int anGaussMatrix3x3[] = {1, 2, 1, 2, 4, 2, 1, 2, 1}; |
1880 | 0 | constexpr int anGaussMatrix5x5[] = {1, 4, 6, 4, 1, 4, 16, 24, 16, |
1881 | 0 | 4, 6, 24, 36, 24, 6, 4, 16, 24, |
1882 | 0 | 16, 4, 1, 4, 6, 4, 1}; |
1883 | 0 | constexpr int anGaussMatrix7x7[] = { |
1884 | 0 | 1, 6, 15, 20, 15, 6, 1, 6, 36, 90, 120, 90, 36, |
1885 | 0 | 6, 15, 90, 225, 300, 225, 90, 15, 20, 120, 300, 400, 300, |
1886 | 0 | 120, 20, 15, 90, 225, 300, 225, 90, 15, 6, 36, 90, 120, |
1887 | 0 | 90, 36, 6, 1, 6, 15, 20, 15, 6, 1}; |
1888 | |
|
1889 | 0 | const int nOXSize = args.nOvrXSize; |
1890 | 0 | const int nOYSize = args.nOvrYSize; |
1891 | 0 | const int nResYFactor = static_cast<int>(0.5 + dfYRatioDstToSrc); |
1892 | | |
1893 | | // matrix for gauss filter |
1894 | 0 | if (nResYFactor <= 2) |
1895 | 0 | { |
1896 | 0 | panGaussMatrix = anGaussMatrix3x3; |
1897 | 0 | nGaussMatrixDim = 3; |
1898 | 0 | } |
1899 | 0 | else if (nResYFactor <= 4) |
1900 | 0 | { |
1901 | 0 | panGaussMatrix = anGaussMatrix5x5; |
1902 | 0 | nGaussMatrixDim = 5; |
1903 | 0 | } |
1904 | 0 | else |
1905 | 0 | { |
1906 | 0 | panGaussMatrix = anGaussMatrix7x7; |
1907 | 0 | nGaussMatrixDim = 7; |
1908 | 0 | } |
1909 | |
|
1910 | | #ifdef DEBUG_OUT_OF_BOUND_ACCESS |
1911 | | int *panGaussMatrixDup = static_cast<int *>( |
1912 | | CPLMalloc(sizeof(int) * nGaussMatrixDim * nGaussMatrixDim)); |
1913 | | memcpy(panGaussMatrixDup, panGaussMatrix, |
1914 | | sizeof(int) * nGaussMatrixDim * nGaussMatrixDim); |
1915 | | panGaussMatrix = panGaussMatrixDup; |
1916 | | #endif |
1917 | |
|
1918 | 0 | if (!bHasNoData) |
1919 | 0 | dfNoDataValue = 0.0; |
1920 | |
|
1921 | 0 | std::vector<GDALColorEntry> colorEntries; |
1922 | 0 | int nTransparentIdx = -1; |
1923 | 0 | if (poColorTable) |
1924 | 0 | colorEntries = ReadColorTable(*poColorTable, nTransparentIdx); |
1925 | | |
1926 | | // Force c4 of nodata entry to 0 so that GDALFindBestEntry() identifies |
1927 | | // it as nodata value. |
1928 | 0 | if (bHasNoData && dfNoDataValue >= 0.0f && |
1929 | 0 | dfNoDataValue < colorEntries.size()) |
1930 | 0 | colorEntries[static_cast<int>(dfNoDataValue)].c4 = 0; |
1931 | | |
1932 | | // Or if we have no explicit nodata, but a color table entry that is |
1933 | | // transparent, consider it as the nodata value. |
1934 | 0 | else if (!bHasNoData && nTransparentIdx >= 0) |
1935 | 0 | { |
1936 | 0 | dfNoDataValue = nTransparentIdx; |
1937 | 0 | } |
1938 | |
|
1939 | 0 | const int nChunkRightXOff = nChunkXOff + nChunkXSize; |
1940 | 0 | const int nChunkBottomYOff = nChunkYOff + nChunkYSize; |
1941 | 0 | const int nDstXWidth = nDstXOff2 - nDstXOff; |
1942 | | |
1943 | | /* ==================================================================== */ |
1944 | | /* Loop over destination scanlines. */ |
1945 | | /* ==================================================================== */ |
1946 | 0 | for (int iDstLine = nDstYOff; iDstLine < nDstYOff2; ++iDstLine) |
1947 | 0 | { |
1948 | 0 | int nSrcYOff = static_cast<int>(0.5 + iDstLine * dfYRatioDstToSrc); |
1949 | 0 | int nSrcYOff2 = |
1950 | 0 | static_cast<int>(0.5 + (iDstLine + 1) * dfYRatioDstToSrc) + 1; |
1951 | |
|
1952 | 0 | if (nSrcYOff < nChunkYOff) |
1953 | 0 | { |
1954 | 0 | nSrcYOff = nChunkYOff; |
1955 | 0 | nSrcYOff2++; |
1956 | 0 | } |
1957 | |
|
1958 | 0 | const int iSizeY = nSrcYOff2 - nSrcYOff; |
1959 | 0 | nSrcYOff = nSrcYOff + iSizeY / 2 - nGaussMatrixDim / 2; |
1960 | 0 | nSrcYOff2 = nSrcYOff + nGaussMatrixDim; |
1961 | |
|
1962 | 0 | if (nSrcYOff2 > nChunkBottomYOff || |
1963 | 0 | (dfYRatioDstToSrc > 1 && iDstLine == nOYSize - 1)) |
1964 | 0 | { |
1965 | 0 | nSrcYOff2 = std::min(nChunkBottomYOff, nSrcYOff + nGaussMatrixDim); |
1966 | 0 | } |
1967 | |
|
1968 | 0 | int nYShiftGaussMatrix = 0; |
1969 | 0 | if (nSrcYOff < nChunkYOff) |
1970 | 0 | { |
1971 | 0 | nYShiftGaussMatrix = -(nSrcYOff - nChunkYOff); |
1972 | 0 | nSrcYOff = nChunkYOff; |
1973 | 0 | } |
1974 | |
|
1975 | 0 | const double *const padfSrcScanline = |
1976 | 0 | padfChunk + ((nSrcYOff - nChunkYOff) * nChunkXSize); |
1977 | 0 | const GByte *pabySrcScanlineNodataMask = nullptr; |
1978 | 0 | if (pabyChunkNodataMask != nullptr) |
1979 | 0 | pabySrcScanlineNodataMask = |
1980 | 0 | pabyChunkNodataMask + ((nSrcYOff - nChunkYOff) * nChunkXSize); |
1981 | | |
1982 | | /* -------------------------------------------------------------------- |
1983 | | */ |
1984 | | /* Loop over destination pixels */ |
1985 | | /* -------------------------------------------------------------------- |
1986 | | */ |
1987 | 0 | double *const padfDstScanline = |
1988 | 0 | padfDstBuffer + (iDstLine - nDstYOff) * nDstXWidth; |
1989 | 0 | for (int iDstPixel = nDstXOff; iDstPixel < nDstXOff2; ++iDstPixel) |
1990 | 0 | { |
1991 | 0 | int nSrcXOff = static_cast<int>(0.5 + iDstPixel * dfXRatioDstToSrc); |
1992 | 0 | int nSrcXOff2 = |
1993 | 0 | static_cast<int>(0.5 + (iDstPixel + 1) * dfXRatioDstToSrc) + 1; |
1994 | |
|
1995 | 0 | if (nSrcXOff < nChunkXOff) |
1996 | 0 | { |
1997 | 0 | nSrcXOff = nChunkXOff; |
1998 | 0 | nSrcXOff2++; |
1999 | 0 | } |
2000 | |
|
2001 | 0 | const int iSizeX = nSrcXOff2 - nSrcXOff; |
2002 | 0 | nSrcXOff = nSrcXOff + iSizeX / 2 - nGaussMatrixDim / 2; |
2003 | 0 | nSrcXOff2 = nSrcXOff + nGaussMatrixDim; |
2004 | |
|
2005 | 0 | if (nSrcXOff2 > nChunkRightXOff || |
2006 | 0 | (dfXRatioDstToSrc > 1 && iDstPixel == nOXSize - 1)) |
2007 | 0 | { |
2008 | 0 | nSrcXOff2 = |
2009 | 0 | std::min(nChunkRightXOff, nSrcXOff + nGaussMatrixDim); |
2010 | 0 | } |
2011 | |
|
2012 | 0 | int nXShiftGaussMatrix = 0; |
2013 | 0 | if (nSrcXOff < nChunkXOff) |
2014 | 0 | { |
2015 | 0 | nXShiftGaussMatrix = -(nSrcXOff - nChunkXOff); |
2016 | 0 | nSrcXOff = nChunkXOff; |
2017 | 0 | } |
2018 | |
|
2019 | 0 | if (poColorTable == nullptr) |
2020 | 0 | { |
2021 | 0 | double dfTotal = 0.0; |
2022 | 0 | GInt64 nCount = 0; |
2023 | 0 | const int *panLineWeight = |
2024 | 0 | panGaussMatrix + nYShiftGaussMatrix * nGaussMatrixDim + |
2025 | 0 | nXShiftGaussMatrix; |
2026 | |
|
2027 | 0 | for (int j = 0, iY = nSrcYOff; iY < nSrcYOff2; |
2028 | 0 | ++iY, ++j, panLineWeight += nGaussMatrixDim) |
2029 | 0 | { |
2030 | 0 | for (int i = 0, iX = nSrcXOff; iX < nSrcXOff2; ++iX, ++i) |
2031 | 0 | { |
2032 | 0 | const double val = |
2033 | 0 | padfSrcScanline[iX - nChunkXOff + |
2034 | 0 | static_cast<GPtrDiff_t>(iY - |
2035 | 0 | nSrcYOff) * |
2036 | 0 | nChunkXSize]; |
2037 | 0 | if (pabySrcScanlineNodataMask == nullptr || |
2038 | 0 | pabySrcScanlineNodataMask[iX - nChunkXOff + |
2039 | 0 | static_cast<GPtrDiff_t>( |
2040 | 0 | iY - nSrcYOff) * |
2041 | 0 | nChunkXSize]) |
2042 | 0 | { |
2043 | 0 | const int nWeight = panLineWeight[i]; |
2044 | 0 | dfTotal += val * nWeight; |
2045 | 0 | nCount += nWeight; |
2046 | 0 | } |
2047 | 0 | } |
2048 | 0 | } |
2049 | |
|
2050 | 0 | if (nCount == 0) |
2051 | 0 | { |
2052 | 0 | padfDstScanline[iDstPixel - nDstXOff] = dfNoDataValue; |
2053 | 0 | } |
2054 | 0 | else |
2055 | 0 | { |
2056 | 0 | padfDstScanline[iDstPixel - nDstXOff] = dfTotal / nCount; |
2057 | 0 | } |
2058 | 0 | } |
2059 | 0 | else |
2060 | 0 | { |
2061 | 0 | GInt64 nTotalR = 0; |
2062 | 0 | GInt64 nTotalG = 0; |
2063 | 0 | GInt64 nTotalB = 0; |
2064 | 0 | GInt64 nTotalWeight = 0; |
2065 | 0 | const int *panLineWeight = |
2066 | 0 | panGaussMatrix + nYShiftGaussMatrix * nGaussMatrixDim + |
2067 | 0 | nXShiftGaussMatrix; |
2068 | |
|
2069 | 0 | for (int j = 0, iY = nSrcYOff; iY < nSrcYOff2; |
2070 | 0 | ++iY, ++j, panLineWeight += nGaussMatrixDim) |
2071 | 0 | { |
2072 | 0 | for (int i = 0, iX = nSrcXOff; iX < nSrcXOff2; ++iX, ++i) |
2073 | 0 | { |
2074 | 0 | const double val = |
2075 | 0 | padfSrcScanline[iX - nChunkXOff + |
2076 | 0 | static_cast<GPtrDiff_t>(iY - |
2077 | 0 | nSrcYOff) * |
2078 | 0 | nChunkXSize]; |
2079 | 0 | if (val < 0 || val >= colorEntries.size()) |
2080 | 0 | continue; |
2081 | | |
2082 | 0 | size_t idx = static_cast<size_t>(val); |
2083 | 0 | if (colorEntries[idx].c4) |
2084 | 0 | { |
2085 | 0 | const int nWeight = panLineWeight[i]; |
2086 | 0 | nTotalR += |
2087 | 0 | static_cast<GInt64>(colorEntries[idx].c1) * |
2088 | 0 | nWeight; |
2089 | 0 | nTotalG += |
2090 | 0 | static_cast<GInt64>(colorEntries[idx].c2) * |
2091 | 0 | nWeight; |
2092 | 0 | nTotalB += |
2093 | 0 | static_cast<GInt64>(colorEntries[idx].c3) * |
2094 | 0 | nWeight; |
2095 | 0 | nTotalWeight += nWeight; |
2096 | 0 | } |
2097 | 0 | } |
2098 | 0 | } |
2099 | |
|
2100 | 0 | if (nTotalWeight == 0) |
2101 | 0 | { |
2102 | 0 | padfDstScanline[iDstPixel - nDstXOff] = dfNoDataValue; |
2103 | 0 | } |
2104 | 0 | else |
2105 | 0 | { |
2106 | 0 | GDALColorEntry color; |
2107 | |
|
2108 | 0 | color.c1 = static_cast<short>((nTotalR + nTotalWeight / 2) / |
2109 | 0 | nTotalWeight); |
2110 | 0 | color.c2 = static_cast<short>((nTotalG + nTotalWeight / 2) / |
2111 | 0 | nTotalWeight); |
2112 | 0 | color.c3 = static_cast<short>((nTotalB + nTotalWeight / 2) / |
2113 | 0 | nTotalWeight); |
2114 | 0 | padfDstScanline[iDstPixel - nDstXOff] = |
2115 | 0 | BestColorEntry(colorEntries, color); |
2116 | 0 | } |
2117 | 0 | } |
2118 | 0 | } |
2119 | 0 | } |
2120 | |
|
2121 | | #ifdef DEBUG_OUT_OF_BOUND_ACCESS |
2122 | | CPLFree(panGaussMatrixDup); |
2123 | | #endif |
2124 | |
|
2125 | 0 | return CE_None; |
2126 | 0 | } |
2127 | | |
2128 | | /************************************************************************/ |
2129 | | /* GDALResampleChunk_Mode() */ |
2130 | | /************************************************************************/ |
2131 | | |
2132 | | template <class T> static inline bool IsSame(T a, T b) |
2133 | 0 | { |
2134 | 0 | return a == b; |
2135 | 0 | } Unexecuted instantiation: overview.cpp:bool IsSame<unsigned char>(unsigned char, unsigned char) Unexecuted instantiation: overview.cpp:bool IsSame<signed char>(signed char, signed char) Unexecuted instantiation: overview.cpp:bool IsSame<unsigned short>(unsigned short, unsigned short) Unexecuted instantiation: overview.cpp:bool IsSame<unsigned int>(unsigned int, unsigned int) Unexecuted instantiation: overview.cpp:bool IsSame<unsigned long>(unsigned long, unsigned long) |
2136 | | |
2137 | | template <> bool IsSame<float>(float a, float b) |
2138 | 0 | { |
2139 | 0 | return a == b || (std::isnan(a) && std::isnan(b)); |
2140 | 0 | } |
2141 | | |
2142 | | template <> bool IsSame<double>(double a, double b) |
2143 | 0 | { |
2144 | 0 | return a == b || (std::isnan(a) && std::isnan(b)); |
2145 | 0 | } |
2146 | | |
2147 | | template <> |
2148 | | bool IsSame<std::complex<float>>(std::complex<float> a, std::complex<float> b) |
2149 | 0 | { |
2150 | 0 | return a == b || (std::isnan(a.real()) && std::isnan(a.imag()) && |
2151 | 0 | std::isnan(b.real()) && std::isnan(b.imag())); |
2152 | 0 | } |
2153 | | |
2154 | | template <> |
2155 | | bool IsSame<std::complex<double>>(std::complex<double> a, |
2156 | | std::complex<double> b) |
2157 | 0 | { |
2158 | 0 | return a == b || (std::isnan(a.real()) && std::isnan(a.imag()) && |
2159 | 0 | std::isnan(b.real()) && std::isnan(b.imag())); |
2160 | 0 | } |
2161 | | |
2162 | | template <class T> |
2163 | | static CPLErr GDALResampleChunk_ModeT(const GDALOverviewResampleArgs &args, |
2164 | | const T *pChunk, T *const pDstBuffer) |
2165 | | |
2166 | 0 | { |
2167 | 0 | const double dfXRatioDstToSrc = args.dfXRatioDstToSrc; |
2168 | 0 | const double dfYRatioDstToSrc = args.dfYRatioDstToSrc; |
2169 | 0 | const double dfSrcXDelta = args.dfSrcXDelta; |
2170 | 0 | const double dfSrcYDelta = args.dfSrcYDelta; |
2171 | 0 | const GByte *pabyChunkNodataMask = args.pabyChunkNodataMask; |
2172 | 0 | const int nChunkXOff = args.nChunkXOff; |
2173 | 0 | const int nChunkXSize = args.nChunkXSize; |
2174 | 0 | const int nChunkYOff = args.nChunkYOff; |
2175 | 0 | const int nChunkYSize = args.nChunkYSize; |
2176 | 0 | const int nDstXOff = args.nDstXOff; |
2177 | 0 | const int nDstXOff2 = args.nDstXOff2; |
2178 | 0 | const int nDstYOff = args.nDstYOff; |
2179 | 0 | const int nDstYOff2 = args.nDstYOff2; |
2180 | 0 | const bool bHasNoData = args.bHasNoData; |
2181 | 0 | const GDALColorTable *poColorTable = args.poColorTable; |
2182 | 0 | const int nDstXSize = nDstXOff2 - nDstXOff; |
2183 | |
|
2184 | 0 | T tNoDataValue; |
2185 | | if constexpr (std::is_same<T, std::complex<float>>::value || |
2186 | | std::is_same<T, std::complex<double>>::value) |
2187 | 0 | { |
2188 | 0 | using BaseT = typename T::value_type; |
2189 | 0 | tNoDataValue = |
2190 | 0 | std::complex<BaseT>(std::numeric_limits<BaseT>::quiet_NaN(), |
2191 | 0 | std::numeric_limits<BaseT>::quiet_NaN()); |
2192 | | } |
2193 | 0 | else if (!bHasNoData || !GDALIsValueInRange<T>(args.dfNoDataValue)) |
2194 | 0 | tNoDataValue = 0; |
2195 | 0 | else |
2196 | 0 | tNoDataValue = static_cast<T>(args.dfNoDataValue); |
2197 | |
|
2198 | 0 | size_t nMaxNumPx = 0; |
2199 | 0 | T *paVals = nullptr; |
2200 | 0 | int *panSums = nullptr; |
2201 | |
|
2202 | 0 | const int nChunkRightXOff = nChunkXOff + nChunkXSize; |
2203 | 0 | const int nChunkBottomYOff = nChunkYOff + nChunkYSize; |
2204 | 0 | std::vector<int> anVals(256, 0); |
2205 | | |
2206 | | /* ==================================================================== */ |
2207 | | /* Loop over destination scanlines. */ |
2208 | | /* ==================================================================== */ |
2209 | 0 | for (int iDstLine = nDstYOff; iDstLine < nDstYOff2; ++iDstLine) |
2210 | 0 | { |
2211 | 0 | double dfSrcYOff = dfSrcYDelta + iDstLine * dfYRatioDstToSrc; |
2212 | 0 | int nSrcYOff = static_cast<int>(dfSrcYOff + 1e-8); |
2213 | | #ifdef only_pixels_with_more_than_10_pct_participation |
2214 | | // When oversampling, don't take into account pixels that have a tiny |
2215 | | // participation in the resulting pixel |
2216 | | if (dfYRatioDstToSrc > 1 && dfSrcYOff - nSrcYOff > 0.9 && |
2217 | | nSrcYOff < nChunkBottomYOff) |
2218 | | nSrcYOff++; |
2219 | | #endif |
2220 | 0 | if (nSrcYOff < nChunkYOff) |
2221 | 0 | nSrcYOff = nChunkYOff; |
2222 | |
|
2223 | 0 | double dfSrcYOff2 = dfSrcYDelta + (iDstLine + 1) * dfYRatioDstToSrc; |
2224 | 0 | int nSrcYOff2 = static_cast<int>(ceil(dfSrcYOff2 - 1e-8)); |
2225 | | #ifdef only_pixels_with_more_than_10_pct_participation |
2226 | | // When oversampling, don't take into account pixels that have a tiny |
2227 | | // participation in the resulting pixel |
2228 | | if (dfYRatioDstToSrc > 1 && nSrcYOff2 - dfSrcYOff2 > 0.9 && |
2229 | | nSrcYOff2 > nChunkYOff) |
2230 | | nSrcYOff2--; |
2231 | | #endif |
2232 | 0 | if (nSrcYOff2 == nSrcYOff) |
2233 | 0 | ++nSrcYOff2; |
2234 | 0 | if (nSrcYOff2 > nChunkBottomYOff) |
2235 | 0 | nSrcYOff2 = nChunkBottomYOff; |
2236 | |
|
2237 | 0 | const T *const paSrcScanline = |
2238 | 0 | pChunk + |
2239 | 0 | (static_cast<GPtrDiff_t>(nSrcYOff - nChunkYOff) * nChunkXSize); |
2240 | 0 | const GByte *pabySrcScanlineNodataMask = nullptr; |
2241 | 0 | if (pabyChunkNodataMask != nullptr) |
2242 | 0 | pabySrcScanlineNodataMask = |
2243 | 0 | pabyChunkNodataMask + |
2244 | 0 | static_cast<GPtrDiff_t>(nSrcYOff - nChunkYOff) * nChunkXSize; |
2245 | |
|
2246 | 0 | T *const paDstScanline = pDstBuffer + (iDstLine - nDstYOff) * nDstXSize; |
2247 | | /* -------------------------------------------------------------------- |
2248 | | */ |
2249 | | /* Loop over destination pixels */ |
2250 | | /* -------------------------------------------------------------------- |
2251 | | */ |
2252 | 0 | for (int iDstPixel = nDstXOff; iDstPixel < nDstXOff2; ++iDstPixel) |
2253 | 0 | { |
2254 | 0 | double dfSrcXOff = dfSrcXDelta + iDstPixel * dfXRatioDstToSrc; |
2255 | | // Apply some epsilon to avoid numerical precision issues |
2256 | 0 | int nSrcXOff = static_cast<int>(dfSrcXOff + 1e-8); |
2257 | | #ifdef only_pixels_with_more_than_10_pct_participation |
2258 | | // When oversampling, don't take into account pixels that have a |
2259 | | // tiny participation in the resulting pixel |
2260 | | if (dfXRatioDstToSrc > 1 && dfSrcXOff - nSrcXOff > 0.9 && |
2261 | | nSrcXOff < nChunkRightXOff) |
2262 | | nSrcXOff++; |
2263 | | #endif |
2264 | 0 | if (nSrcXOff < nChunkXOff) |
2265 | 0 | nSrcXOff = nChunkXOff; |
2266 | |
|
2267 | 0 | double dfSrcXOff2 = |
2268 | 0 | dfSrcXDelta + (iDstPixel + 1) * dfXRatioDstToSrc; |
2269 | 0 | int nSrcXOff2 = static_cast<int>(ceil(dfSrcXOff2 - 1e-8)); |
2270 | | #ifdef only_pixels_with_more_than_10_pct_participation |
2271 | | // When oversampling, don't take into account pixels that have a |
2272 | | // tiny participation in the resulting pixel |
2273 | | if (dfXRatioDstToSrc > 1 && nSrcXOff2 - dfSrcXOff2 > 0.9 && |
2274 | | nSrcXOff2 > nChunkXOff) |
2275 | | nSrcXOff2--; |
2276 | | #endif |
2277 | 0 | if (nSrcXOff2 == nSrcXOff) |
2278 | 0 | nSrcXOff2++; |
2279 | 0 | if (nSrcXOff2 > nChunkRightXOff) |
2280 | 0 | nSrcXOff2 = nChunkRightXOff; |
2281 | |
|
2282 | 0 | bool bRegularProcessing = false; |
2283 | | if constexpr (!std::is_same<T, GByte>::value) |
2284 | 0 | bRegularProcessing = true; |
2285 | 0 | else if (poColorTable && poColorTable->GetColorEntryCount() > 256) |
2286 | 0 | bRegularProcessing = true; |
2287 | |
|
2288 | 0 | if (bRegularProcessing) |
2289 | 0 | { |
2290 | | // Not sure how much sense it makes to run a majority |
2291 | | // filter on floating point data, but here it is for the sake |
2292 | | // of compatibility. It won't look right on RGB images by the |
2293 | | // nature of the filter. |
2294 | |
|
2295 | 0 | if (nSrcYOff2 - nSrcYOff <= 0 || nSrcXOff2 - nSrcXOff <= 0 || |
2296 | 0 | nSrcYOff2 - nSrcYOff > INT_MAX / (nSrcXOff2 - nSrcXOff) || |
2297 | 0 | static_cast<size_t>(nSrcYOff2 - nSrcYOff) * |
2298 | 0 | static_cast<size_t>(nSrcXOff2 - nSrcXOff) > |
2299 | 0 | std::numeric_limits<size_t>::max() / sizeof(float)) |
2300 | 0 | { |
2301 | 0 | CPLError(CE_Failure, CPLE_NotSupported, |
2302 | 0 | "Too big downsampling factor"); |
2303 | 0 | CPLFree(paVals); |
2304 | 0 | CPLFree(panSums); |
2305 | 0 | return CE_Failure; |
2306 | 0 | } |
2307 | 0 | const size_t nNumPx = |
2308 | 0 | static_cast<size_t>(nSrcYOff2 - nSrcYOff) * |
2309 | 0 | static_cast<size_t>(nSrcXOff2 - nSrcXOff); |
2310 | 0 | size_t iMaxInd = 0; |
2311 | 0 | size_t iMaxVal = 0; |
2312 | 0 | bool biMaxValdValid = false; |
2313 | |
|
2314 | 0 | if (paVals == nullptr || nNumPx > nMaxNumPx) |
2315 | 0 | { |
2316 | 0 | T *paValsNew = static_cast<T *>( |
2317 | 0 | VSI_REALLOC_VERBOSE(paVals, nNumPx * sizeof(T))); |
2318 | 0 | int *panSumsNew = static_cast<int *>( |
2319 | 0 | VSI_REALLOC_VERBOSE(panSums, nNumPx * sizeof(int))); |
2320 | 0 | if (paValsNew != nullptr) |
2321 | 0 | paVals = paValsNew; |
2322 | 0 | if (panSumsNew != nullptr) |
2323 | 0 | panSums = panSumsNew; |
2324 | 0 | if (paValsNew == nullptr || panSumsNew == nullptr) |
2325 | 0 | { |
2326 | 0 | CPLFree(paVals); |
2327 | 0 | CPLFree(panSums); |
2328 | 0 | return CE_Failure; |
2329 | 0 | } |
2330 | 0 | nMaxNumPx = nNumPx; |
2331 | 0 | } |
2332 | | |
2333 | 0 | for (int iY = nSrcYOff; iY < nSrcYOff2; ++iY) |
2334 | 0 | { |
2335 | 0 | const GPtrDiff_t iTotYOff = |
2336 | 0 | static_cast<GPtrDiff_t>(iY - nSrcYOff) * nChunkXSize - |
2337 | 0 | nChunkXOff; |
2338 | 0 | for (int iX = nSrcXOff; iX < nSrcXOff2; ++iX) |
2339 | 0 | { |
2340 | 0 | if (pabySrcScanlineNodataMask == nullptr || |
2341 | 0 | pabySrcScanlineNodataMask[iX + iTotYOff]) |
2342 | 0 | { |
2343 | 0 | const T val = paSrcScanline[iX + iTotYOff]; |
2344 | 0 | size_t i = 0; // Used after for. |
2345 | | |
2346 | | // Check array for existing entry. |
2347 | 0 | for (; i < iMaxInd; ++i) |
2348 | 0 | if (IsSame(paVals[i], val) && |
2349 | 0 | ++panSums[i] > panSums[iMaxVal]) |
2350 | 0 | { |
2351 | 0 | iMaxVal = i; |
2352 | 0 | biMaxValdValid = true; |
2353 | 0 | break; |
2354 | 0 | } |
2355 | | |
2356 | | // Add to arr if entry not already there. |
2357 | 0 | if (i == iMaxInd) |
2358 | 0 | { |
2359 | 0 | paVals[iMaxInd] = val; |
2360 | 0 | panSums[iMaxInd] = 1; |
2361 | |
|
2362 | 0 | if (!biMaxValdValid) |
2363 | 0 | { |
2364 | 0 | iMaxVal = iMaxInd; |
2365 | 0 | biMaxValdValid = true; |
2366 | 0 | } |
2367 | |
|
2368 | 0 | ++iMaxInd; |
2369 | 0 | } |
2370 | 0 | } |
2371 | 0 | } |
2372 | 0 | } |
2373 | |
|
2374 | 0 | if (!biMaxValdValid) |
2375 | 0 | paDstScanline[iDstPixel - nDstXOff] = tNoDataValue; |
2376 | 0 | else |
2377 | 0 | paDstScanline[iDstPixel - nDstXOff] = paVals[iMaxVal]; |
2378 | 0 | } |
2379 | | else if constexpr (std::is_same<T, GByte>::value) |
2380 | | // ( eSrcDataType == GDT_Byte && nEntryCount < 256 ) |
2381 | 0 | { |
2382 | | // So we go here for a paletted or non-paletted byte band. |
2383 | | // The input values are then between 0 and 255. |
2384 | 0 | int nMaxVal = 0; |
2385 | 0 | int iMaxInd = -1; |
2386 | | |
2387 | | // The cost of this zeroing might be high. Perhaps we should |
2388 | | // just use the above generic case, and go to this one if the |
2389 | | // number of source pixels is large enough |
2390 | 0 | std::fill(anVals.begin(), anVals.end(), 0); |
2391 | |
|
2392 | 0 | for (int iY = nSrcYOff; iY < nSrcYOff2; ++iY) |
2393 | 0 | { |
2394 | 0 | const GPtrDiff_t iTotYOff = |
2395 | 0 | static_cast<GPtrDiff_t>(iY - nSrcYOff) * nChunkXSize - |
2396 | 0 | nChunkXOff; |
2397 | 0 | for (int iX = nSrcXOff; iX < nSrcXOff2; ++iX) |
2398 | 0 | { |
2399 | 0 | const T val = paSrcScanline[iX + iTotYOff]; |
2400 | 0 | if (!bHasNoData || val != tNoDataValue) |
2401 | 0 | { |
2402 | 0 | int nVal = static_cast<int>(val); |
2403 | 0 | if (++anVals[nVal] > nMaxVal) |
2404 | 0 | { |
2405 | | // Sum the density. |
2406 | | // Is it the most common value so far? |
2407 | 0 | iMaxInd = nVal; |
2408 | 0 | nMaxVal = anVals[nVal]; |
2409 | 0 | } |
2410 | 0 | } |
2411 | 0 | } |
2412 | 0 | } |
2413 | |
|
2414 | 0 | if (iMaxInd == -1) |
2415 | 0 | paDstScanline[iDstPixel - nDstXOff] = tNoDataValue; |
2416 | 0 | else |
2417 | 0 | paDstScanline[iDstPixel - nDstXOff] = |
2418 | 0 | static_cast<T>(iMaxInd); |
2419 | 0 | } |
2420 | 0 | } |
2421 | 0 | } |
2422 | | |
2423 | 0 | CPLFree(paVals); |
2424 | 0 | CPLFree(panSums); |
2425 | |
|
2426 | 0 | return CE_None; |
2427 | 0 | } Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_ModeT<unsigned char>(GDALOverviewResampleArgs const&, unsigned char const*, unsigned char*) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_ModeT<signed char>(GDALOverviewResampleArgs const&, signed char const*, signed char*) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_ModeT<unsigned short>(GDALOverviewResampleArgs const&, unsigned short const*, unsigned short*) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_ModeT<unsigned int>(GDALOverviewResampleArgs const&, unsigned int const*, unsigned int*) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_ModeT<float>(GDALOverviewResampleArgs const&, float const*, float*) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_ModeT<unsigned long>(GDALOverviewResampleArgs const&, unsigned long const*, unsigned long*) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_ModeT<double>(GDALOverviewResampleArgs const&, double const*, double*) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_ModeT<std::__1::complex<float> >(GDALOverviewResampleArgs const&, std::__1::complex<float> const*, std::__1::complex<float>*) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_ModeT<std::__1::complex<double> >(GDALOverviewResampleArgs const&, std::__1::complex<double> const*, std::__1::complex<double>*) |
2428 | | |
2429 | | static CPLErr GDALResampleChunk_Mode(const GDALOverviewResampleArgs &args, |
2430 | | const void *pChunk, void **ppDstBuffer, |
2431 | | GDALDataType *peDstBufferDataType) |
2432 | 0 | { |
2433 | 0 | *ppDstBuffer = VSI_MALLOC3_VERBOSE( |
2434 | 0 | args.nDstXOff2 - args.nDstXOff, args.nDstYOff2 - args.nDstYOff, |
2435 | 0 | GDALGetDataTypeSizeBytes(args.eWrkDataType)); |
2436 | 0 | if (*ppDstBuffer == nullptr) |
2437 | 0 | { |
2438 | 0 | return CE_Failure; |
2439 | 0 | } |
2440 | | |
2441 | 0 | CPLAssert(args.eSrcDataType == args.eWrkDataType); |
2442 | | |
2443 | 0 | *peDstBufferDataType = args.eWrkDataType; |
2444 | 0 | switch (args.eWrkDataType) |
2445 | 0 | { |
2446 | | // For mode resampling, as no computation is done, only the |
2447 | | // size of the data type matters... except for Byte where we have |
2448 | | // special processing. And for floating point values |
2449 | 0 | case GDT_Byte: |
2450 | 0 | { |
2451 | 0 | return GDALResampleChunk_ModeT(args, |
2452 | 0 | static_cast<const GByte *>(pChunk), |
2453 | 0 | static_cast<GByte *>(*ppDstBuffer)); |
2454 | 0 | } |
2455 | | |
2456 | 0 | case GDT_Int8: |
2457 | 0 | { |
2458 | 0 | return GDALResampleChunk_ModeT(args, |
2459 | 0 | static_cast<const int8_t *>(pChunk), |
2460 | 0 | static_cast<int8_t *>(*ppDstBuffer)); |
2461 | 0 | } |
2462 | | |
2463 | 0 | case GDT_Int16: |
2464 | 0 | case GDT_UInt16: |
2465 | 0 | case GDT_Float16: |
2466 | 0 | { |
2467 | 0 | CPLAssert(GDALGetDataTypeSizeBytes(args.eWrkDataType) == 2); |
2468 | 0 | return GDALResampleChunk_ModeT( |
2469 | 0 | args, static_cast<const uint16_t *>(pChunk), |
2470 | 0 | static_cast<uint16_t *>(*ppDstBuffer)); |
2471 | 0 | } |
2472 | | |
2473 | 0 | case GDT_CInt16: |
2474 | 0 | case GDT_CFloat16: |
2475 | 0 | case GDT_Int32: |
2476 | 0 | case GDT_UInt32: |
2477 | 0 | { |
2478 | 0 | CPLAssert(GDALGetDataTypeSizeBytes(args.eWrkDataType) == 4); |
2479 | 0 | return GDALResampleChunk_ModeT( |
2480 | 0 | args, static_cast<const uint32_t *>(pChunk), |
2481 | 0 | static_cast<uint32_t *>(*ppDstBuffer)); |
2482 | 0 | } |
2483 | | |
2484 | 0 | case GDT_Float32: |
2485 | 0 | { |
2486 | 0 | CPLAssert(GDALGetDataTypeSizeBytes(args.eWrkDataType) == 4); |
2487 | 0 | return GDALResampleChunk_ModeT(args, |
2488 | 0 | static_cast<const float *>(pChunk), |
2489 | 0 | static_cast<float *>(*ppDstBuffer)); |
2490 | 0 | } |
2491 | | |
2492 | 0 | case GDT_CInt32: |
2493 | 0 | case GDT_Int64: |
2494 | 0 | case GDT_UInt64: |
2495 | 0 | { |
2496 | 0 | CPLAssert(GDALGetDataTypeSizeBytes(args.eWrkDataType) == 8); |
2497 | 0 | return GDALResampleChunk_ModeT( |
2498 | 0 | args, static_cast<const uint64_t *>(pChunk), |
2499 | 0 | static_cast<uint64_t *>(*ppDstBuffer)); |
2500 | 0 | } |
2501 | | |
2502 | 0 | case GDT_Float64: |
2503 | 0 | { |
2504 | 0 | CPLAssert(GDALGetDataTypeSizeBytes(args.eWrkDataType) == 8); |
2505 | 0 | return GDALResampleChunk_ModeT(args, |
2506 | 0 | static_cast<const double *>(pChunk), |
2507 | 0 | static_cast<double *>(*ppDstBuffer)); |
2508 | 0 | } |
2509 | | |
2510 | 0 | case GDT_CFloat32: |
2511 | 0 | { |
2512 | 0 | return GDALResampleChunk_ModeT( |
2513 | 0 | args, static_cast<const std::complex<float> *>(pChunk), |
2514 | 0 | static_cast<std::complex<float> *>(*ppDstBuffer)); |
2515 | 0 | } |
2516 | | |
2517 | 0 | case GDT_CFloat64: |
2518 | 0 | { |
2519 | 0 | return GDALResampleChunk_ModeT( |
2520 | 0 | args, static_cast<const std::complex<double> *>(pChunk), |
2521 | 0 | static_cast<std::complex<double> *>(*ppDstBuffer)); |
2522 | 0 | } |
2523 | | |
2524 | 0 | case GDT_Unknown: |
2525 | 0 | case GDT_TypeCount: |
2526 | 0 | break; |
2527 | 0 | } |
2528 | | |
2529 | 0 | CPLAssert(false); |
2530 | 0 | return CE_Failure; |
2531 | 0 | } |
2532 | | |
2533 | | /************************************************************************/ |
2534 | | /* GDALResampleConvolutionHorizontal() */ |
2535 | | /************************************************************************/ |
2536 | | |
2537 | | template <class T> |
2538 | | static inline double |
2539 | | GDALResampleConvolutionHorizontal(const T *pChunk, const double *padfWeights, |
2540 | | int nSrcPixelCount) |
2541 | 0 | { |
2542 | 0 | double dfVal1 = 0.0; |
2543 | 0 | double dfVal2 = 0.0; |
2544 | 0 | int i = 0; // Used after for. |
2545 | | // Intel Compiler 2024.0.2.29 (maybe other versions?) crashes on this |
2546 | | // manually (untypical) unrolled loop in -O2 and -O3: |
2547 | | // https://github.com/OSGeo/gdal/issues/9508 |
2548 | 0 | #if !defined(__INTEL_CLANG_COMPILER) |
2549 | 0 | for (; i + 3 < nSrcPixelCount; i += 4) |
2550 | 0 | { |
2551 | 0 | dfVal1 += pChunk[i] * padfWeights[i]; |
2552 | 0 | dfVal1 += pChunk[i + 1] * padfWeights[i + 1]; |
2553 | 0 | dfVal2 += pChunk[i + 2] * padfWeights[i + 2]; |
2554 | 0 | dfVal2 += pChunk[i + 3] * padfWeights[i + 3]; |
2555 | 0 | } |
2556 | 0 | #endif |
2557 | 0 | for (; i < nSrcPixelCount; ++i) |
2558 | 0 | { |
2559 | 0 | dfVal1 += pChunk[i] * padfWeights[i]; |
2560 | 0 | } |
2561 | 0 | return dfVal1 + dfVal2; |
2562 | 0 | } Unexecuted instantiation: overview.cpp:double GDALResampleConvolutionHorizontal<float>(float const*, double const*, int) Unexecuted instantiation: overview.cpp:double GDALResampleConvolutionHorizontal<double>(double const*, double const*, int) |
2563 | | |
2564 | | template <class T> |
2565 | | static inline void GDALResampleConvolutionHorizontalWithMask( |
2566 | | const T *pChunk, const GByte *pabyMask, const double *padfWeights, |
2567 | | int nSrcPixelCount, double &dfVal, double &dfWeightSum) |
2568 | 0 | { |
2569 | 0 | dfVal = 0; |
2570 | 0 | dfWeightSum = 0; |
2571 | 0 | int i = 0; |
2572 | 0 | for (; i + 3 < nSrcPixelCount; i += 4) |
2573 | 0 | { |
2574 | 0 | const double dfWeight0 = padfWeights[i] * pabyMask[i]; |
2575 | 0 | const double dfWeight1 = padfWeights[i + 1] * pabyMask[i + 1]; |
2576 | 0 | const double dfWeight2 = padfWeights[i + 2] * pabyMask[i + 2]; |
2577 | 0 | const double dfWeight3 = padfWeights[i + 3] * pabyMask[i + 3]; |
2578 | 0 | dfVal += pChunk[i] * dfWeight0; |
2579 | 0 | dfVal += pChunk[i + 1] * dfWeight1; |
2580 | 0 | dfVal += pChunk[i + 2] * dfWeight2; |
2581 | 0 | dfVal += pChunk[i + 3] * dfWeight3; |
2582 | 0 | dfWeightSum += dfWeight0 + dfWeight1 + dfWeight2 + dfWeight3; |
2583 | 0 | } |
2584 | 0 | for (; i < nSrcPixelCount; ++i) |
2585 | 0 | { |
2586 | 0 | const double dfWeight = padfWeights[i] * pabyMask[i]; |
2587 | 0 | dfVal += pChunk[i] * dfWeight; |
2588 | 0 | dfWeightSum += dfWeight; |
2589 | 0 | } |
2590 | 0 | } Unexecuted instantiation: overview.cpp:void GDALResampleConvolutionHorizontalWithMask<float>(float const*, unsigned char const*, double const*, int, double&, double&) Unexecuted instantiation: overview.cpp:void GDALResampleConvolutionHorizontalWithMask<double>(double const*, unsigned char const*, double const*, int, double&, double&) |
2591 | | |
2592 | | template <class T> |
2593 | | static inline void GDALResampleConvolutionHorizontal_3rows( |
2594 | | const T *pChunkRow1, const T *pChunkRow2, const T *pChunkRow3, |
2595 | | const double *padfWeights, int nSrcPixelCount, double &dfRes1, |
2596 | | double &dfRes2, double &dfRes3) |
2597 | 0 | { |
2598 | 0 | double dfVal1 = 0.0; |
2599 | 0 | double dfVal2 = 0.0; |
2600 | 0 | double dfVal3 = 0.0; |
2601 | 0 | double dfVal4 = 0.0; |
2602 | 0 | double dfVal5 = 0.0; |
2603 | 0 | double dfVal6 = 0.0; |
2604 | 0 | int i = 0; // Used after for. |
2605 | 0 | for (; i + 3 < nSrcPixelCount; i += 4) |
2606 | 0 | { |
2607 | 0 | dfVal1 += pChunkRow1[i] * padfWeights[i]; |
2608 | 0 | dfVal1 += pChunkRow1[i + 1] * padfWeights[i + 1]; |
2609 | 0 | dfVal2 += pChunkRow1[i + 2] * padfWeights[i + 2]; |
2610 | 0 | dfVal2 += pChunkRow1[i + 3] * padfWeights[i + 3]; |
2611 | 0 | dfVal3 += pChunkRow2[i] * padfWeights[i]; |
2612 | 0 | dfVal3 += pChunkRow2[i + 1] * padfWeights[i + 1]; |
2613 | 0 | dfVal4 += pChunkRow2[i + 2] * padfWeights[i + 2]; |
2614 | 0 | dfVal4 += pChunkRow2[i + 3] * padfWeights[i + 3]; |
2615 | 0 | dfVal5 += pChunkRow3[i] * padfWeights[i]; |
2616 | 0 | dfVal5 += pChunkRow3[i + 1] * padfWeights[i + 1]; |
2617 | 0 | dfVal6 += pChunkRow3[i + 2] * padfWeights[i + 2]; |
2618 | 0 | dfVal6 += pChunkRow3[i + 3] * padfWeights[i + 3]; |
2619 | 0 | } |
2620 | 0 | for (; i < nSrcPixelCount; ++i) |
2621 | 0 | { |
2622 | 0 | dfVal1 += pChunkRow1[i] * padfWeights[i]; |
2623 | 0 | dfVal3 += pChunkRow2[i] * padfWeights[i]; |
2624 | 0 | dfVal5 += pChunkRow3[i] * padfWeights[i]; |
2625 | 0 | } |
2626 | 0 | dfRes1 = dfVal1 + dfVal2; |
2627 | 0 | dfRes2 = dfVal3 + dfVal4; |
2628 | 0 | dfRes3 = dfVal5 + dfVal6; |
2629 | 0 | } Unexecuted instantiation: overview.cpp:void GDALResampleConvolutionHorizontal_3rows<float>(float const*, float const*, float const*, double const*, int, double&, double&, double&) Unexecuted instantiation: overview.cpp:void GDALResampleConvolutionHorizontal_3rows<double>(double const*, double const*, double const*, double const*, int, double&, double&, double&) |
2630 | | |
2631 | | template <class T> |
2632 | | static inline void GDALResampleConvolutionHorizontalPixelCountLess8_3rows( |
2633 | | const T *pChunkRow1, const T *pChunkRow2, const T *pChunkRow3, |
2634 | | const double *padfWeights, int nSrcPixelCount, double &dfRes1, |
2635 | | double &dfRes2, double &dfRes3) |
2636 | 0 | { |
2637 | 0 | GDALResampleConvolutionHorizontal_3rows(pChunkRow1, pChunkRow2, pChunkRow3, |
2638 | 0 | padfWeights, nSrcPixelCount, dfRes1, |
2639 | 0 | dfRes2, dfRes3); |
2640 | 0 | } Unexecuted instantiation: overview.cpp:void GDALResampleConvolutionHorizontalPixelCountLess8_3rows<float>(float const*, float const*, float const*, double const*, int, double&, double&, double&) Unexecuted instantiation: overview.cpp:void GDALResampleConvolutionHorizontalPixelCountLess8_3rows<double>(double const*, double const*, double const*, double const*, int, double&, double&, double&) |
2641 | | |
2642 | | template <class T> |
2643 | | static inline void GDALResampleConvolutionHorizontalPixelCount4_3rows( |
2644 | | const T *pChunkRow1, const T *pChunkRow2, const T *pChunkRow3, |
2645 | | const double *padfWeights, double &dfRes1, double &dfRes2, double &dfRes3) |
2646 | 0 | { |
2647 | 0 | GDALResampleConvolutionHorizontal_3rows(pChunkRow1, pChunkRow2, pChunkRow3, |
2648 | 0 | padfWeights, 4, dfRes1, dfRes2, |
2649 | 0 | dfRes3); |
2650 | 0 | } Unexecuted instantiation: overview.cpp:void GDALResampleConvolutionHorizontalPixelCount4_3rows<float>(float const*, float const*, float const*, double const*, double&, double&, double&) Unexecuted instantiation: overview.cpp:void GDALResampleConvolutionHorizontalPixelCount4_3rows<double>(double const*, double const*, double const*, double const*, double&, double&, double&) |
2651 | | |
2652 | | /************************************************************************/ |
2653 | | /* GDALResampleConvolutionVertical() */ |
2654 | | /************************************************************************/ |
2655 | | |
2656 | | template <class T> |
2657 | | static inline double |
2658 | | GDALResampleConvolutionVertical(const T *pChunk, int nStride, |
2659 | | const double *padfWeights, int nSrcLineCount) |
2660 | 0 | { |
2661 | 0 | double dfVal1 = 0.0; |
2662 | 0 | double dfVal2 = 0.0; |
2663 | 0 | int i = 0; |
2664 | 0 | int j = 0; |
2665 | 0 | for (; i + 3 < nSrcLineCount; i += 4, j += 4 * nStride) |
2666 | 0 | { |
2667 | 0 | dfVal1 += pChunk[j] * padfWeights[i]; |
2668 | 0 | dfVal1 += pChunk[j + nStride] * padfWeights[i + 1]; |
2669 | 0 | dfVal2 += pChunk[j + 2 * nStride] * padfWeights[i + 2]; |
2670 | 0 | dfVal2 += pChunk[j + 3 * nStride] * padfWeights[i + 3]; |
2671 | 0 | } |
2672 | 0 | for (; i < nSrcLineCount; ++i, j += nStride) |
2673 | 0 | { |
2674 | 0 | dfVal1 += pChunk[j] * padfWeights[i]; |
2675 | 0 | } |
2676 | 0 | return dfVal1 + dfVal2; |
2677 | 0 | } |
2678 | | |
2679 | | template <class T> |
2680 | | static inline void GDALResampleConvolutionVertical_2cols( |
2681 | | const T *pChunk, int nStride, const double *padfWeights, int nSrcLineCount, |
2682 | | double &dfRes1, double &dfRes2) |
2683 | 0 | { |
2684 | 0 | double dfVal1 = 0.0; |
2685 | 0 | double dfVal2 = 0.0; |
2686 | 0 | double dfVal3 = 0.0; |
2687 | 0 | double dfVal4 = 0.0; |
2688 | 0 | int i = 0; |
2689 | 0 | int j = 0; |
2690 | 0 | for (; i + 3 < nSrcLineCount; i += 4, j += 4 * nStride) |
2691 | 0 | { |
2692 | 0 | dfVal1 += pChunk[j] * padfWeights[i]; |
2693 | 0 | dfVal3 += pChunk[j + 1] * padfWeights[i]; |
2694 | 0 | dfVal1 += pChunk[j + nStride] * padfWeights[i + 1]; |
2695 | 0 | dfVal3 += pChunk[j + 1 + nStride] * padfWeights[i + 1]; |
2696 | 0 | dfVal2 += pChunk[j + 2 * nStride] * padfWeights[i + 2]; |
2697 | 0 | dfVal4 += pChunk[j + 1 + 2 * nStride] * padfWeights[i + 2]; |
2698 | 0 | dfVal2 += pChunk[j + 3 * nStride] * padfWeights[i + 3]; |
2699 | 0 | dfVal4 += pChunk[j + 1 + 3 * nStride] * padfWeights[i + 3]; |
2700 | 0 | } |
2701 | 0 | for (; i < nSrcLineCount; ++i, j += nStride) |
2702 | 0 | { |
2703 | 0 | dfVal1 += pChunk[j] * padfWeights[i]; |
2704 | 0 | dfVal3 += pChunk[j + 1] * padfWeights[i]; |
2705 | 0 | } |
2706 | 0 | dfRes1 = dfVal1 + dfVal2; |
2707 | 0 | dfRes2 = dfVal3 + dfVal4; |
2708 | 0 | } |
2709 | | |
2710 | | #ifdef USE_SSE2 |
2711 | | |
2712 | | #ifdef __AVX__ |
2713 | | /************************************************************************/ |
2714 | | /* GDALResampleConvolutionVertical_16cols<T> */ |
2715 | | /************************************************************************/ |
2716 | | |
2717 | | template <class T> |
2718 | | static inline void |
2719 | | GDALResampleConvolutionVertical_16cols(const T *pChunk, int nStride, |
2720 | | const double *padfWeights, |
2721 | | int nSrcLineCount, float *afDest) |
2722 | | { |
2723 | | int i = 0; |
2724 | | int j = 0; |
2725 | | XMMReg4Double v_acc0 = XMMReg4Double::Zero(); |
2726 | | XMMReg4Double v_acc1 = XMMReg4Double::Zero(); |
2727 | | XMMReg4Double v_acc2 = XMMReg4Double::Zero(); |
2728 | | XMMReg4Double v_acc3 = XMMReg4Double::Zero(); |
2729 | | for (; i + 3 < nSrcLineCount; i += 4, j += 4 * nStride) |
2730 | | { |
2731 | | XMMReg4Double w0 = |
2732 | | XMMReg4Double::Load1ValHighAndLow(padfWeights + i + 0); |
2733 | | XMMReg4Double w1 = |
2734 | | XMMReg4Double::Load1ValHighAndLow(padfWeights + i + 1); |
2735 | | XMMReg4Double w2 = |
2736 | | XMMReg4Double::Load1ValHighAndLow(padfWeights + i + 2); |
2737 | | XMMReg4Double w3 = |
2738 | | XMMReg4Double::Load1ValHighAndLow(padfWeights + i + 3); |
2739 | | v_acc0 += XMMReg4Double::Load4Val(pChunk + j + 0 + 0 * nStride) * w0; |
2740 | | v_acc1 += XMMReg4Double::Load4Val(pChunk + j + 4 + 0 * nStride) * w0; |
2741 | | v_acc2 += XMMReg4Double::Load4Val(pChunk + j + 8 + 0 * nStride) * w0; |
2742 | | v_acc3 += XMMReg4Double::Load4Val(pChunk + j + 12 + 0 * nStride) * w0; |
2743 | | v_acc0 += XMMReg4Double::Load4Val(pChunk + j + 0 + 1 * nStride) * w1; |
2744 | | v_acc1 += XMMReg4Double::Load4Val(pChunk + j + 4 + 1 * nStride) * w1; |
2745 | | v_acc2 += XMMReg4Double::Load4Val(pChunk + j + 8 + 1 * nStride) * w1; |
2746 | | v_acc3 += XMMReg4Double::Load4Val(pChunk + j + 12 + 1 * nStride) * w1; |
2747 | | v_acc0 += XMMReg4Double::Load4Val(pChunk + j + 0 + 2 * nStride) * w2; |
2748 | | v_acc1 += XMMReg4Double::Load4Val(pChunk + j + 4 + 2 * nStride) * w2; |
2749 | | v_acc2 += XMMReg4Double::Load4Val(pChunk + j + 8 + 2 * nStride) * w2; |
2750 | | v_acc3 += XMMReg4Double::Load4Val(pChunk + j + 12 + 2 * nStride) * w2; |
2751 | | v_acc0 += XMMReg4Double::Load4Val(pChunk + j + 0 + 3 * nStride) * w3; |
2752 | | v_acc1 += XMMReg4Double::Load4Val(pChunk + j + 4 + 3 * nStride) * w3; |
2753 | | v_acc2 += XMMReg4Double::Load4Val(pChunk + j + 8 + 3 * nStride) * w3; |
2754 | | v_acc3 += XMMReg4Double::Load4Val(pChunk + j + 12 + 3 * nStride) * w3; |
2755 | | } |
2756 | | for (; i < nSrcLineCount; ++i, j += nStride) |
2757 | | { |
2758 | | XMMReg4Double w = XMMReg4Double::Load1ValHighAndLow(padfWeights + i); |
2759 | | v_acc0 += XMMReg4Double::Load4Val(pChunk + j + 0) * w; |
2760 | | v_acc1 += XMMReg4Double::Load4Val(pChunk + j + 4) * w; |
2761 | | v_acc2 += XMMReg4Double::Load4Val(pChunk + j + 8) * w; |
2762 | | v_acc3 += XMMReg4Double::Load4Val(pChunk + j + 12) * w; |
2763 | | } |
2764 | | v_acc0.Store4Val(afDest); |
2765 | | v_acc1.Store4Val(afDest + 4); |
2766 | | v_acc2.Store4Val(afDest + 8); |
2767 | | v_acc3.Store4Val(afDest + 12); |
2768 | | } |
2769 | | |
2770 | | template <class T> |
2771 | | static inline void GDALResampleConvolutionVertical_16cols(const T *, int, |
2772 | | const double *, int, |
2773 | | double *) |
2774 | | { |
2775 | | // Cannot be reached |
2776 | | CPLAssert(false); |
2777 | | } |
2778 | | |
2779 | | #else |
2780 | | |
2781 | | /************************************************************************/ |
2782 | | /* GDALResampleConvolutionVertical_8cols<T> */ |
2783 | | /************************************************************************/ |
2784 | | |
2785 | | template <class T> |
2786 | | static inline void |
2787 | | GDALResampleConvolutionVertical_8cols(const T *pChunk, int nStride, |
2788 | | const double *padfWeights, |
2789 | | int nSrcLineCount, float *afDest) |
2790 | 0 | { |
2791 | 0 | int i = 0; |
2792 | 0 | int j = 0; |
2793 | 0 | XMMReg4Double v_acc0 = XMMReg4Double::Zero(); |
2794 | 0 | XMMReg4Double v_acc1 = XMMReg4Double::Zero(); |
2795 | 0 | for (; i + 3 < nSrcLineCount; i += 4, j += 4 * nStride) |
2796 | 0 | { |
2797 | 0 | XMMReg4Double w0 = |
2798 | 0 | XMMReg4Double::Load1ValHighAndLow(padfWeights + i + 0); |
2799 | 0 | XMMReg4Double w1 = |
2800 | 0 | XMMReg4Double::Load1ValHighAndLow(padfWeights + i + 1); |
2801 | 0 | XMMReg4Double w2 = |
2802 | 0 | XMMReg4Double::Load1ValHighAndLow(padfWeights + i + 2); |
2803 | 0 | XMMReg4Double w3 = |
2804 | 0 | XMMReg4Double::Load1ValHighAndLow(padfWeights + i + 3); |
2805 | 0 | v_acc0 += XMMReg4Double::Load4Val(pChunk + j + 0 + 0 * nStride) * w0; |
2806 | 0 | v_acc1 += XMMReg4Double::Load4Val(pChunk + j + 4 + 0 * nStride) * w0; |
2807 | 0 | v_acc0 += XMMReg4Double::Load4Val(pChunk + j + 0 + 1 * nStride) * w1; |
2808 | 0 | v_acc1 += XMMReg4Double::Load4Val(pChunk + j + 4 + 1 * nStride) * w1; |
2809 | 0 | v_acc0 += XMMReg4Double::Load4Val(pChunk + j + 0 + 2 * nStride) * w2; |
2810 | 0 | v_acc1 += XMMReg4Double::Load4Val(pChunk + j + 4 + 2 * nStride) * w2; |
2811 | 0 | v_acc0 += XMMReg4Double::Load4Val(pChunk + j + 0 + 3 * nStride) * w3; |
2812 | 0 | v_acc1 += XMMReg4Double::Load4Val(pChunk + j + 4 + 3 * nStride) * w3; |
2813 | 0 | } |
2814 | 0 | for (; i < nSrcLineCount; ++i, j += nStride) |
2815 | 0 | { |
2816 | 0 | XMMReg4Double w = XMMReg4Double::Load1ValHighAndLow(padfWeights + i); |
2817 | 0 | v_acc0 += XMMReg4Double::Load4Val(pChunk + j + 0) * w; |
2818 | 0 | v_acc1 += XMMReg4Double::Load4Val(pChunk + j + 4) * w; |
2819 | 0 | } |
2820 | 0 | v_acc0.Store4Val(afDest); |
2821 | 0 | v_acc1.Store4Val(afDest + 4); |
2822 | 0 | } |
2823 | | |
2824 | | template <class T> |
2825 | | static inline void GDALResampleConvolutionVertical_8cols(const T *, int, |
2826 | | const double *, int, |
2827 | | double *) |
2828 | | { |
2829 | | // Cannot be reached |
2830 | | CPLAssert(false); |
2831 | | } |
2832 | | |
2833 | | #endif // __AVX__ |
2834 | | |
2835 | | /************************************************************************/ |
2836 | | /* GDALResampleConvolutionHorizontalSSE2<T> */ |
2837 | | /************************************************************************/ |
2838 | | |
2839 | | template <class T> |
2840 | | static inline double GDALResampleConvolutionHorizontalSSE2( |
2841 | | const T *pChunk, const double *padfWeightsAligned, int nSrcPixelCount) |
2842 | 0 | { |
2843 | 0 | XMMReg4Double v_acc1 = XMMReg4Double::Zero(); |
2844 | 0 | XMMReg4Double v_acc2 = XMMReg4Double::Zero(); |
2845 | 0 | int i = 0; // Used after for. |
2846 | 0 | for (; i + 7 < nSrcPixelCount; i += 8) |
2847 | 0 | { |
2848 | | // Retrieve the pixel & accumulate |
2849 | 0 | const XMMReg4Double v_pixels1 = XMMReg4Double::Load4Val(pChunk + i); |
2850 | 0 | const XMMReg4Double v_pixels2 = XMMReg4Double::Load4Val(pChunk + i + 4); |
2851 | 0 | const XMMReg4Double v_weight1 = |
2852 | 0 | XMMReg4Double::Load4ValAligned(padfWeightsAligned + i); |
2853 | 0 | const XMMReg4Double v_weight2 = |
2854 | 0 | XMMReg4Double::Load4ValAligned(padfWeightsAligned + i + 4); |
2855 | |
|
2856 | 0 | v_acc1 += v_pixels1 * v_weight1; |
2857 | 0 | v_acc2 += v_pixels2 * v_weight2; |
2858 | 0 | } |
2859 | |
|
2860 | 0 | v_acc1 += v_acc2; |
2861 | |
|
2862 | 0 | double dfVal = v_acc1.GetHorizSum(); |
2863 | 0 | for (; i < nSrcPixelCount; ++i) |
2864 | 0 | { |
2865 | 0 | dfVal += pChunk[i] * padfWeightsAligned[i]; |
2866 | 0 | } |
2867 | 0 | return dfVal; |
2868 | 0 | } Unexecuted instantiation: overview.cpp:double GDALResampleConvolutionHorizontalSSE2<unsigned char>(unsigned char const*, double const*, int) Unexecuted instantiation: overview.cpp:double GDALResampleConvolutionHorizontalSSE2<unsigned short>(unsigned short const*, double const*, int) |
2869 | | |
2870 | | /************************************************************************/ |
2871 | | /* GDALResampleConvolutionHorizontal<GByte> */ |
2872 | | /************************************************************************/ |
2873 | | |
2874 | | template <> |
2875 | | inline double GDALResampleConvolutionHorizontal<GByte>( |
2876 | | const GByte *pChunk, const double *padfWeightsAligned, int nSrcPixelCount) |
2877 | 0 | { |
2878 | 0 | return GDALResampleConvolutionHorizontalSSE2(pChunk, padfWeightsAligned, |
2879 | 0 | nSrcPixelCount); |
2880 | 0 | } |
2881 | | |
2882 | | template <> |
2883 | | inline double GDALResampleConvolutionHorizontal<GUInt16>( |
2884 | | const GUInt16 *pChunk, const double *padfWeightsAligned, int nSrcPixelCount) |
2885 | 0 | { |
2886 | 0 | return GDALResampleConvolutionHorizontalSSE2(pChunk, padfWeightsAligned, |
2887 | 0 | nSrcPixelCount); |
2888 | 0 | } |
2889 | | |
2890 | | /************************************************************************/ |
2891 | | /* GDALResampleConvolutionHorizontalWithMaskSSE2<T> */ |
2892 | | /************************************************************************/ |
2893 | | |
2894 | | template <class T> |
2895 | | static inline void GDALResampleConvolutionHorizontalWithMaskSSE2( |
2896 | | const T *pChunk, const GByte *pabyMask, const double *padfWeightsAligned, |
2897 | | int nSrcPixelCount, double &dfVal, double &dfWeightSum) |
2898 | 0 | { |
2899 | 0 | int i = 0; // Used after for. |
2900 | 0 | XMMReg4Double v_acc = XMMReg4Double::Zero(); |
2901 | 0 | XMMReg4Double v_acc_weight = XMMReg4Double::Zero(); |
2902 | 0 | for (; i + 3 < nSrcPixelCount; i += 4) |
2903 | 0 | { |
2904 | 0 | const XMMReg4Double v_pixels = XMMReg4Double::Load4Val(pChunk + i); |
2905 | 0 | const XMMReg4Double v_mask = XMMReg4Double::Load4Val(pabyMask + i); |
2906 | 0 | XMMReg4Double v_weight = |
2907 | 0 | XMMReg4Double::Load4ValAligned(padfWeightsAligned + i); |
2908 | 0 | v_weight *= v_mask; |
2909 | 0 | v_acc += v_pixels * v_weight; |
2910 | 0 | v_acc_weight += v_weight; |
2911 | 0 | } |
2912 | |
|
2913 | 0 | dfVal = v_acc.GetHorizSum(); |
2914 | 0 | dfWeightSum = v_acc_weight.GetHorizSum(); |
2915 | 0 | for (; i < nSrcPixelCount; ++i) |
2916 | 0 | { |
2917 | 0 | const double dfWeight = padfWeightsAligned[i] * pabyMask[i]; |
2918 | 0 | dfVal += pChunk[i] * dfWeight; |
2919 | 0 | dfWeightSum += dfWeight; |
2920 | 0 | } |
2921 | 0 | } Unexecuted instantiation: overview.cpp:void GDALResampleConvolutionHorizontalWithMaskSSE2<unsigned char>(unsigned char const*, unsigned char const*, double const*, int, double&, double&) Unexecuted instantiation: overview.cpp:void GDALResampleConvolutionHorizontalWithMaskSSE2<unsigned short>(unsigned short const*, unsigned char const*, double const*, int, double&, double&) |
2922 | | |
2923 | | /************************************************************************/ |
2924 | | /* GDALResampleConvolutionHorizontalWithMask<GByte> */ |
2925 | | /************************************************************************/ |
2926 | | |
2927 | | template <> |
2928 | | inline void GDALResampleConvolutionHorizontalWithMask<GByte>( |
2929 | | const GByte *pChunk, const GByte *pabyMask, |
2930 | | const double *padfWeightsAligned, int nSrcPixelCount, double &dfVal, |
2931 | | double &dfWeightSum) |
2932 | 0 | { |
2933 | 0 | GDALResampleConvolutionHorizontalWithMaskSSE2( |
2934 | 0 | pChunk, pabyMask, padfWeightsAligned, nSrcPixelCount, dfVal, |
2935 | 0 | dfWeightSum); |
2936 | 0 | } |
2937 | | |
2938 | | template <> |
2939 | | inline void GDALResampleConvolutionHorizontalWithMask<GUInt16>( |
2940 | | const GUInt16 *pChunk, const GByte *pabyMask, |
2941 | | const double *padfWeightsAligned, int nSrcPixelCount, double &dfVal, |
2942 | | double &dfWeightSum) |
2943 | 0 | { |
2944 | 0 | GDALResampleConvolutionHorizontalWithMaskSSE2( |
2945 | 0 | pChunk, pabyMask, padfWeightsAligned, nSrcPixelCount, dfVal, |
2946 | 0 | dfWeightSum); |
2947 | 0 | } |
2948 | | |
2949 | | /************************************************************************/ |
2950 | | /* GDALResampleConvolutionHorizontal_3rows_SSE2<T> */ |
2951 | | /************************************************************************/ |
2952 | | |
2953 | | template <class T> |
2954 | | static inline void GDALResampleConvolutionHorizontal_3rows_SSE2( |
2955 | | const T *pChunkRow1, const T *pChunkRow2, const T *pChunkRow3, |
2956 | | const double *padfWeightsAligned, int nSrcPixelCount, double &dfRes1, |
2957 | | double &dfRes2, double &dfRes3) |
2958 | 0 | { |
2959 | 0 | XMMReg4Double v_acc1 = XMMReg4Double::Zero(), |
2960 | 0 | v_acc2 = XMMReg4Double::Zero(), |
2961 | 0 | v_acc3 = XMMReg4Double::Zero(); |
2962 | 0 | int i = 0; |
2963 | 0 | for (; i + 7 < nSrcPixelCount; i += 8) |
2964 | 0 | { |
2965 | | // Retrieve the pixel & accumulate. |
2966 | 0 | XMMReg4Double v_pixels1 = XMMReg4Double::Load4Val(pChunkRow1 + i); |
2967 | 0 | XMMReg4Double v_pixels2 = XMMReg4Double::Load4Val(pChunkRow1 + i + 4); |
2968 | 0 | const XMMReg4Double v_weight1 = |
2969 | 0 | XMMReg4Double::Load4ValAligned(padfWeightsAligned + i); |
2970 | 0 | const XMMReg4Double v_weight2 = |
2971 | 0 | XMMReg4Double::Load4ValAligned(padfWeightsAligned + i + 4); |
2972 | |
|
2973 | 0 | v_acc1 += v_pixels1 * v_weight1; |
2974 | 0 | v_acc1 += v_pixels2 * v_weight2; |
2975 | |
|
2976 | 0 | v_pixels1 = XMMReg4Double::Load4Val(pChunkRow2 + i); |
2977 | 0 | v_pixels2 = XMMReg4Double::Load4Val(pChunkRow2 + i + 4); |
2978 | 0 | v_acc2 += v_pixels1 * v_weight1; |
2979 | 0 | v_acc2 += v_pixels2 * v_weight2; |
2980 | |
|
2981 | 0 | v_pixels1 = XMMReg4Double::Load4Val(pChunkRow3 + i); |
2982 | 0 | v_pixels2 = XMMReg4Double::Load4Val(pChunkRow3 + i + 4); |
2983 | 0 | v_acc3 += v_pixels1 * v_weight1; |
2984 | 0 | v_acc3 += v_pixels2 * v_weight2; |
2985 | 0 | } |
2986 | |
|
2987 | 0 | dfRes1 = v_acc1.GetHorizSum(); |
2988 | 0 | dfRes2 = v_acc2.GetHorizSum(); |
2989 | 0 | dfRes3 = v_acc3.GetHorizSum(); |
2990 | 0 | for (; i < nSrcPixelCount; ++i) |
2991 | 0 | { |
2992 | 0 | dfRes1 += pChunkRow1[i] * padfWeightsAligned[i]; |
2993 | 0 | dfRes2 += pChunkRow2[i] * padfWeightsAligned[i]; |
2994 | 0 | dfRes3 += pChunkRow3[i] * padfWeightsAligned[i]; |
2995 | 0 | } |
2996 | 0 | } Unexecuted instantiation: overview.cpp:void GDALResampleConvolutionHorizontal_3rows_SSE2<unsigned char>(unsigned char const*, unsigned char const*, unsigned char const*, double const*, int, double&, double&, double&) Unexecuted instantiation: overview.cpp:void GDALResampleConvolutionHorizontal_3rows_SSE2<unsigned short>(unsigned short const*, unsigned short const*, unsigned short const*, double const*, int, double&, double&, double&) |
2997 | | |
2998 | | /************************************************************************/ |
2999 | | /* GDALResampleConvolutionHorizontal_3rows<GByte> */ |
3000 | | /************************************************************************/ |
3001 | | |
3002 | | template <> |
3003 | | inline void GDALResampleConvolutionHorizontal_3rows<GByte>( |
3004 | | const GByte *pChunkRow1, const GByte *pChunkRow2, const GByte *pChunkRow3, |
3005 | | const double *padfWeightsAligned, int nSrcPixelCount, double &dfRes1, |
3006 | | double &dfRes2, double &dfRes3) |
3007 | 0 | { |
3008 | 0 | GDALResampleConvolutionHorizontal_3rows_SSE2( |
3009 | 0 | pChunkRow1, pChunkRow2, pChunkRow3, padfWeightsAligned, nSrcPixelCount, |
3010 | 0 | dfRes1, dfRes2, dfRes3); |
3011 | 0 | } |
3012 | | |
3013 | | template <> |
3014 | | inline void GDALResampleConvolutionHorizontal_3rows<GUInt16>( |
3015 | | const GUInt16 *pChunkRow1, const GUInt16 *pChunkRow2, |
3016 | | const GUInt16 *pChunkRow3, const double *padfWeightsAligned, |
3017 | | int nSrcPixelCount, double &dfRes1, double &dfRes2, double &dfRes3) |
3018 | 0 | { |
3019 | 0 | GDALResampleConvolutionHorizontal_3rows_SSE2( |
3020 | 0 | pChunkRow1, pChunkRow2, pChunkRow3, padfWeightsAligned, nSrcPixelCount, |
3021 | 0 | dfRes1, dfRes2, dfRes3); |
3022 | 0 | } |
3023 | | |
3024 | | /************************************************************************/ |
3025 | | /* GDALResampleConvolutionHorizontalPixelCountLess8_3rows_SSE2<T> */ |
3026 | | /************************************************************************/ |
3027 | | |
3028 | | template <class T> |
3029 | | static inline void GDALResampleConvolutionHorizontalPixelCountLess8_3rows_SSE2( |
3030 | | const T *pChunkRow1, const T *pChunkRow2, const T *pChunkRow3, |
3031 | | const double *padfWeightsAligned, int nSrcPixelCount, double &dfRes1, |
3032 | | double &dfRes2, double &dfRes3) |
3033 | 0 | { |
3034 | 0 | XMMReg4Double v_acc1 = XMMReg4Double::Zero(); |
3035 | 0 | XMMReg4Double v_acc2 = XMMReg4Double::Zero(); |
3036 | 0 | XMMReg4Double v_acc3 = XMMReg4Double::Zero(); |
3037 | 0 | int i = 0; // Use after for. |
3038 | 0 | for (; i + 3 < nSrcPixelCount; i += 4) |
3039 | 0 | { |
3040 | | // Retrieve the pixel & accumulate. |
3041 | 0 | const XMMReg4Double v_pixels1 = XMMReg4Double::Load4Val(pChunkRow1 + i); |
3042 | 0 | const XMMReg4Double v_pixels2 = XMMReg4Double::Load4Val(pChunkRow2 + i); |
3043 | 0 | const XMMReg4Double v_pixels3 = XMMReg4Double::Load4Val(pChunkRow3 + i); |
3044 | 0 | const XMMReg4Double v_weight = |
3045 | 0 | XMMReg4Double::Load4ValAligned(padfWeightsAligned + i); |
3046 | |
|
3047 | 0 | v_acc1 += v_pixels1 * v_weight; |
3048 | 0 | v_acc2 += v_pixels2 * v_weight; |
3049 | 0 | v_acc3 += v_pixels3 * v_weight; |
3050 | 0 | } |
3051 | |
|
3052 | 0 | dfRes1 = v_acc1.GetHorizSum(); |
3053 | 0 | dfRes2 = v_acc2.GetHorizSum(); |
3054 | 0 | dfRes3 = v_acc3.GetHorizSum(); |
3055 | |
|
3056 | 0 | for (; i < nSrcPixelCount; ++i) |
3057 | 0 | { |
3058 | 0 | dfRes1 += pChunkRow1[i] * padfWeightsAligned[i]; |
3059 | 0 | dfRes2 += pChunkRow2[i] * padfWeightsAligned[i]; |
3060 | 0 | dfRes3 += pChunkRow3[i] * padfWeightsAligned[i]; |
3061 | 0 | } |
3062 | 0 | } Unexecuted instantiation: overview.cpp:void GDALResampleConvolutionHorizontalPixelCountLess8_3rows_SSE2<unsigned char>(unsigned char const*, unsigned char const*, unsigned char const*, double const*, int, double&, double&, double&) Unexecuted instantiation: overview.cpp:void GDALResampleConvolutionHorizontalPixelCountLess8_3rows_SSE2<unsigned short>(unsigned short const*, unsigned short const*, unsigned short const*, double const*, int, double&, double&, double&) |
3063 | | |
3064 | | /************************************************************************/ |
3065 | | /* GDALResampleConvolutionHorizontalPixelCountLess8_3rows<GByte> */ |
3066 | | /************************************************************************/ |
3067 | | |
3068 | | template <> |
3069 | | inline void GDALResampleConvolutionHorizontalPixelCountLess8_3rows<GByte>( |
3070 | | const GByte *pChunkRow1, const GByte *pChunkRow2, const GByte *pChunkRow3, |
3071 | | const double *padfWeightsAligned, int nSrcPixelCount, double &dfRes1, |
3072 | | double &dfRes2, double &dfRes3) |
3073 | 0 | { |
3074 | 0 | GDALResampleConvolutionHorizontalPixelCountLess8_3rows_SSE2( |
3075 | 0 | pChunkRow1, pChunkRow2, pChunkRow3, padfWeightsAligned, nSrcPixelCount, |
3076 | 0 | dfRes1, dfRes2, dfRes3); |
3077 | 0 | } |
3078 | | |
3079 | | template <> |
3080 | | inline void GDALResampleConvolutionHorizontalPixelCountLess8_3rows<GUInt16>( |
3081 | | const GUInt16 *pChunkRow1, const GUInt16 *pChunkRow2, |
3082 | | const GUInt16 *pChunkRow3, const double *padfWeightsAligned, |
3083 | | int nSrcPixelCount, double &dfRes1, double &dfRes2, double &dfRes3) |
3084 | 0 | { |
3085 | 0 | GDALResampleConvolutionHorizontalPixelCountLess8_3rows_SSE2( |
3086 | 0 | pChunkRow1, pChunkRow2, pChunkRow3, padfWeightsAligned, nSrcPixelCount, |
3087 | 0 | dfRes1, dfRes2, dfRes3); |
3088 | 0 | } |
3089 | | |
3090 | | /************************************************************************/ |
3091 | | /* GDALResampleConvolutionHorizontalPixelCount4_3rows_SSE2<T> */ |
3092 | | /************************************************************************/ |
3093 | | |
3094 | | template <class T> |
3095 | | static inline void GDALResampleConvolutionHorizontalPixelCount4_3rows_SSE2( |
3096 | | const T *pChunkRow1, const T *pChunkRow2, const T *pChunkRow3, |
3097 | | const double *padfWeightsAligned, double &dfRes1, double &dfRes2, |
3098 | | double &dfRes3) |
3099 | 0 | { |
3100 | 0 | const XMMReg4Double v_weight = |
3101 | 0 | XMMReg4Double::Load4ValAligned(padfWeightsAligned); |
3102 | | |
3103 | | // Retrieve the pixel & accumulate. |
3104 | 0 | const XMMReg4Double v_pixels1 = XMMReg4Double::Load4Val(pChunkRow1); |
3105 | 0 | const XMMReg4Double v_pixels2 = XMMReg4Double::Load4Val(pChunkRow2); |
3106 | 0 | const XMMReg4Double v_pixels3 = XMMReg4Double::Load4Val(pChunkRow3); |
3107 | |
|
3108 | 0 | XMMReg4Double v_acc1 = v_pixels1 * v_weight; |
3109 | 0 | XMMReg4Double v_acc2 = v_pixels2 * v_weight; |
3110 | 0 | XMMReg4Double v_acc3 = v_pixels3 * v_weight; |
3111 | |
|
3112 | 0 | dfRes1 = v_acc1.GetHorizSum(); |
3113 | 0 | dfRes2 = v_acc2.GetHorizSum(); |
3114 | 0 | dfRes3 = v_acc3.GetHorizSum(); |
3115 | 0 | } Unexecuted instantiation: overview.cpp:void GDALResampleConvolutionHorizontalPixelCount4_3rows_SSE2<unsigned char>(unsigned char const*, unsigned char const*, unsigned char const*, double const*, double&, double&, double&) Unexecuted instantiation: overview.cpp:void GDALResampleConvolutionHorizontalPixelCount4_3rows_SSE2<unsigned short>(unsigned short const*, unsigned short const*, unsigned short const*, double const*, double&, double&, double&) |
3116 | | |
3117 | | /************************************************************************/ |
3118 | | /* GDALResampleConvolutionHorizontalPixelCount4_3rows<GByte> */ |
3119 | | /************************************************************************/ |
3120 | | |
3121 | | template <> |
3122 | | inline void GDALResampleConvolutionHorizontalPixelCount4_3rows<GByte>( |
3123 | | const GByte *pChunkRow1, const GByte *pChunkRow2, const GByte *pChunkRow3, |
3124 | | const double *padfWeightsAligned, double &dfRes1, double &dfRes2, |
3125 | | double &dfRes3) |
3126 | 0 | { |
3127 | 0 | GDALResampleConvolutionHorizontalPixelCount4_3rows_SSE2( |
3128 | 0 | pChunkRow1, pChunkRow2, pChunkRow3, padfWeightsAligned, dfRes1, dfRes2, |
3129 | 0 | dfRes3); |
3130 | 0 | } |
3131 | | |
3132 | | template <> |
3133 | | inline void GDALResampleConvolutionHorizontalPixelCount4_3rows<GUInt16>( |
3134 | | const GUInt16 *pChunkRow1, const GUInt16 *pChunkRow2, |
3135 | | const GUInt16 *pChunkRow3, const double *padfWeightsAligned, double &dfRes1, |
3136 | | double &dfRes2, double &dfRes3) |
3137 | 0 | { |
3138 | 0 | GDALResampleConvolutionHorizontalPixelCount4_3rows_SSE2( |
3139 | 0 | pChunkRow1, pChunkRow2, pChunkRow3, padfWeightsAligned, dfRes1, dfRes2, |
3140 | 0 | dfRes3); |
3141 | 0 | } |
3142 | | |
3143 | | #endif // USE_SSE2 |
3144 | | |
3145 | | /************************************************************************/ |
3146 | | /* GDALResampleChunk_Convolution() */ |
3147 | | /************************************************************************/ |
3148 | | |
3149 | | template <class T, class Twork, GDALDataType eWrkDataType> |
3150 | | static CPLErr GDALResampleChunk_ConvolutionT( |
3151 | | const GDALOverviewResampleArgs &args, const T *pChunk, void *pDstBuffer, |
3152 | | FilterFuncType pfnFilterFunc, FilterFunc4ValuesType pfnFilterFunc4Values, |
3153 | | int nKernelRadius, bool bKernelWithNegativeWeights, float fMaxVal) |
3154 | | |
3155 | 0 | { |
3156 | 0 | const double dfXRatioDstToSrc = args.dfXRatioDstToSrc; |
3157 | 0 | const double dfYRatioDstToSrc = args.dfYRatioDstToSrc; |
3158 | 0 | const double dfSrcXDelta = args.dfSrcXDelta; |
3159 | 0 | const double dfSrcYDelta = args.dfSrcYDelta; |
3160 | 0 | constexpr int nBands = 1; |
3161 | 0 | const GByte *pabyChunkNodataMask = args.pabyChunkNodataMask; |
3162 | 0 | const int nChunkXOff = args.nChunkXOff; |
3163 | 0 | const int nChunkXSize = args.nChunkXSize; |
3164 | 0 | const int nChunkYOff = args.nChunkYOff; |
3165 | 0 | const int nChunkYSize = args.nChunkYSize; |
3166 | 0 | const int nDstXOff = args.nDstXOff; |
3167 | 0 | const int nDstXOff2 = args.nDstXOff2; |
3168 | 0 | const int nDstYOff = args.nDstYOff; |
3169 | 0 | const int nDstYOff2 = args.nDstYOff2; |
3170 | 0 | const bool bHasNoData = args.bHasNoData; |
3171 | 0 | double dfNoDataValue = args.dfNoDataValue; |
3172 | |
|
3173 | 0 | if (!bHasNoData) |
3174 | 0 | dfNoDataValue = 0.0; |
3175 | 0 | const auto dstDataType = args.eOvrDataType; |
3176 | 0 | const int nDstDataTypeSize = GDALGetDataTypeSizeBytes(dstDataType); |
3177 | 0 | const double dfReplacementVal = |
3178 | 0 | bHasNoData ? GDALGetNoDataReplacementValue(dstDataType, dfNoDataValue) |
3179 | 0 | : dfNoDataValue; |
3180 | | // cppcheck-suppress unreadVariable |
3181 | 0 | const int isIntegerDT = GDALDataTypeIsInteger(dstDataType); |
3182 | 0 | const bool bNoDataValueInt64Valid = |
3183 | 0 | isIntegerDT && GDALIsValueExactAs<GInt64>(dfNoDataValue); |
3184 | 0 | const auto nNodataValueInt64 = |
3185 | 0 | bNoDataValueInt64Valid ? static_cast<GInt64>(dfNoDataValue) : 0; |
3186 | 0 | constexpr int nWrkDataTypeSize = static_cast<int>(sizeof(Twork)); |
3187 | | |
3188 | | // TODO: we should have some generic function to do this. |
3189 | 0 | Twork fDstMin = cpl::NumericLimits<Twork>::lowest(); |
3190 | 0 | Twork fDstMax = cpl::NumericLimits<Twork>::max(); |
3191 | 0 | if (dstDataType == GDT_Byte) |
3192 | 0 | { |
3193 | 0 | fDstMin = std::numeric_limits<GByte>::min(); |
3194 | 0 | fDstMax = std::numeric_limits<GByte>::max(); |
3195 | 0 | } |
3196 | 0 | else if (dstDataType == GDT_Int8) |
3197 | 0 | { |
3198 | 0 | fDstMin = std::numeric_limits<GInt8>::min(); |
3199 | 0 | fDstMax = std::numeric_limits<GInt8>::max(); |
3200 | 0 | } |
3201 | 0 | else if (dstDataType == GDT_UInt16) |
3202 | 0 | { |
3203 | 0 | fDstMin = std::numeric_limits<GUInt16>::min(); |
3204 | 0 | fDstMax = std::numeric_limits<GUInt16>::max(); |
3205 | 0 | } |
3206 | 0 | else if (dstDataType == GDT_Int16) |
3207 | 0 | { |
3208 | 0 | fDstMin = std::numeric_limits<GInt16>::min(); |
3209 | 0 | fDstMax = std::numeric_limits<GInt16>::max(); |
3210 | 0 | } |
3211 | 0 | else if (dstDataType == GDT_UInt32) |
3212 | 0 | { |
3213 | 0 | fDstMin = static_cast<Twork>(std::numeric_limits<GUInt32>::min()); |
3214 | 0 | fDstMax = static_cast<Twork>(std::numeric_limits<GUInt32>::max()); |
3215 | 0 | } |
3216 | 0 | else if (dstDataType == GDT_Int32) |
3217 | 0 | { |
3218 | | // cppcheck-suppress unreadVariable |
3219 | 0 | fDstMin = static_cast<Twork>(std::numeric_limits<GInt32>::min()); |
3220 | | // cppcheck-suppress unreadVariable |
3221 | 0 | fDstMax = static_cast<Twork>(std::numeric_limits<GInt32>::max()); |
3222 | 0 | } |
3223 | 0 | else if (dstDataType == GDT_UInt64) |
3224 | 0 | { |
3225 | | // cppcheck-suppress unreadVariable |
3226 | 0 | fDstMin = static_cast<Twork>(std::numeric_limits<uint64_t>::min()); |
3227 | | // cppcheck-suppress unreadVariable |
3228 | | // (1 << 64) - 2048: largest uint64 value a double can hold |
3229 | 0 | fDstMax = static_cast<Twork>(18446744073709549568ULL); |
3230 | 0 | } |
3231 | 0 | else if (dstDataType == GDT_Int64) |
3232 | 0 | { |
3233 | | // cppcheck-suppress unreadVariable |
3234 | 0 | fDstMin = static_cast<Twork>(std::numeric_limits<int64_t>::min()); |
3235 | | // cppcheck-suppress unreadVariable |
3236 | | // (1 << 63) - 1024: largest int64 that a double can hold |
3237 | 0 | fDstMax = static_cast<Twork>(9223372036854774784LL); |
3238 | 0 | } |
3239 | |
|
3240 | 0 | auto replaceValIfNodata = [bHasNoData, isIntegerDT, fDstMin, fDstMax, |
3241 | 0 | bNoDataValueInt64Valid, nNodataValueInt64, |
3242 | 0 | dfNoDataValue, dfReplacementVal](Twork fVal) |
3243 | 0 | { |
3244 | 0 | if (!bHasNoData) |
3245 | 0 | return fVal; |
3246 | | |
3247 | | // Clamp value before comparing to nodata: this is only needed for |
3248 | | // kernels with negative weights (Lanczos) |
3249 | 0 | Twork fClamped = fVal; |
3250 | 0 | if (fClamped < fDstMin) |
3251 | 0 | fClamped = fDstMin; |
3252 | 0 | else if (fClamped > fDstMax) |
3253 | 0 | fClamped = fDstMax; |
3254 | 0 | if (isIntegerDT) |
3255 | 0 | { |
3256 | 0 | if (bNoDataValueInt64Valid) |
3257 | 0 | { |
3258 | 0 | const double fClampedRounded = std::round(fClamped); |
3259 | 0 | if (fClampedRounded >= fDstMin && fClampedRounded <= fDstMax && |
3260 | 0 | nNodataValueInt64 == |
3261 | 0 | static_cast<GInt64>(std::round(fClamped))) |
3262 | 0 | { |
3263 | | // Do not use the nodata value |
3264 | 0 | return static_cast<Twork>(dfReplacementVal); |
3265 | 0 | } |
3266 | 0 | } |
3267 | 0 | } |
3268 | 0 | else if (dfNoDataValue == fClamped) |
3269 | 0 | { |
3270 | | // Do not use the nodata value |
3271 | 0 | return static_cast<Twork>(dfReplacementVal); |
3272 | 0 | } |
3273 | 0 | return fClamped; |
3274 | 0 | }; Unexecuted instantiation: overview.cpp:GDALResampleChunk_ConvolutionT<unsigned char, float, (GDALDataType)6>(GDALOverviewResampleArgs const&, unsigned char const*, void*, double (*)(double), double (*)(double*), int, bool, float)::{lambda(float)#1}::operator()(float) const Unexecuted instantiation: overview.cpp:GDALResampleChunk_ConvolutionT<unsigned short, float, (GDALDataType)6>(GDALOverviewResampleArgs const&, unsigned short const*, void*, double (*)(double), double (*)(double*), int, bool, float)::{lambda(float)#1}::operator()(float) const Unexecuted instantiation: overview.cpp:GDALResampleChunk_ConvolutionT<float, float, (GDALDataType)6>(GDALOverviewResampleArgs const&, float const*, void*, double (*)(double), double (*)(double*), int, bool, float)::{lambda(float)#1}::operator()(float) const Unexecuted instantiation: overview.cpp:GDALResampleChunk_ConvolutionT<double, double, (GDALDataType)7>(GDALOverviewResampleArgs const&, double const*, void*, double (*)(double), double (*)(double*), int, bool, float)::{lambda(double)#1}::operator()(double) const |
3275 | | |
3276 | | /* -------------------------------------------------------------------- */ |
3277 | | /* Allocate work buffers. */ |
3278 | | /* -------------------------------------------------------------------- */ |
3279 | 0 | const int nDstXSize = nDstXOff2 - nDstXOff; |
3280 | 0 | Twork *pafWrkScanline = nullptr; |
3281 | 0 | if (dstDataType != eWrkDataType) |
3282 | 0 | { |
3283 | 0 | pafWrkScanline = |
3284 | 0 | static_cast<Twork *>(VSI_MALLOC2_VERBOSE(nDstXSize, sizeof(Twork))); |
3285 | 0 | if (pafWrkScanline == nullptr) |
3286 | 0 | return CE_Failure; |
3287 | 0 | } |
3288 | | |
3289 | 0 | const double dfXScale = 1.0 / dfXRatioDstToSrc; |
3290 | 0 | const double dfXScaleWeight = (dfXScale >= 1.0) ? 1.0 : dfXScale; |
3291 | 0 | const double dfXScaledRadius = nKernelRadius / dfXScaleWeight; |
3292 | 0 | const double dfYScale = 1.0 / dfYRatioDstToSrc; |
3293 | 0 | const double dfYScaleWeight = (dfYScale >= 1.0) ? 1.0 : dfYScale; |
3294 | 0 | const double dfYScaledRadius = nKernelRadius / dfYScaleWeight; |
3295 | | |
3296 | | // Temporary array to store result of horizontal filter. |
3297 | 0 | double *padfHorizontalFiltered = static_cast<double *>( |
3298 | 0 | VSI_MALLOC3_VERBOSE(nChunkYSize, nDstXSize, sizeof(double) * nBands)); |
3299 | | |
3300 | | // To store convolution coefficients. |
3301 | 0 | double *padfWeights = static_cast<double *>(VSI_MALLOC_ALIGNED_AUTO_VERBOSE( |
3302 | 0 | static_cast<int>(2 + 2 * std::max(dfXScaledRadius, dfYScaledRadius) + |
3303 | 0 | 0.5) * |
3304 | 0 | sizeof(double))); |
3305 | |
|
3306 | 0 | GByte *pabyChunkNodataMaskHorizontalFiltered = nullptr; |
3307 | 0 | if (pabyChunkNodataMask) |
3308 | 0 | pabyChunkNodataMaskHorizontalFiltered = |
3309 | 0 | static_cast<GByte *>(VSI_MALLOC2_VERBOSE(nChunkYSize, nDstXSize)); |
3310 | 0 | if (padfHorizontalFiltered == nullptr || padfWeights == nullptr || |
3311 | 0 | (pabyChunkNodataMask != nullptr && |
3312 | 0 | pabyChunkNodataMaskHorizontalFiltered == nullptr)) |
3313 | 0 | { |
3314 | 0 | VSIFree(pafWrkScanline); |
3315 | 0 | VSIFree(padfHorizontalFiltered); |
3316 | 0 | VSIFreeAligned(padfWeights); |
3317 | 0 | VSIFree(pabyChunkNodataMaskHorizontalFiltered); |
3318 | 0 | return CE_Failure; |
3319 | 0 | } |
3320 | | |
3321 | | /* ==================================================================== */ |
3322 | | /* First pass: horizontal filter */ |
3323 | | /* ==================================================================== */ |
3324 | 0 | const int nChunkRightXOff = nChunkXOff + nChunkXSize; |
3325 | 0 | #ifdef USE_SSE2 |
3326 | 0 | bool bSrcPixelCountLess8 = dfXScaledRadius < 4; |
3327 | 0 | #endif |
3328 | 0 | for (int iDstPixel = nDstXOff; iDstPixel < nDstXOff2; ++iDstPixel) |
3329 | 0 | { |
3330 | 0 | const double dfSrcPixel = |
3331 | 0 | (iDstPixel + 0.5) * dfXRatioDstToSrc + dfSrcXDelta; |
3332 | 0 | int nSrcPixelStart = |
3333 | 0 | static_cast<int>(floor(dfSrcPixel - dfXScaledRadius + 0.5)); |
3334 | 0 | if (nSrcPixelStart < nChunkXOff) |
3335 | 0 | nSrcPixelStart = nChunkXOff; |
3336 | 0 | int nSrcPixelStop = |
3337 | 0 | static_cast<int>(dfSrcPixel + dfXScaledRadius + 0.5); |
3338 | 0 | if (nSrcPixelStop > nChunkRightXOff) |
3339 | 0 | nSrcPixelStop = nChunkRightXOff; |
3340 | | #if 0 |
3341 | | if( nSrcPixelStart < nChunkXOff && nChunkXOff > 0 ) |
3342 | | { |
3343 | | printf( "truncated iDstPixel = %d\n", iDstPixel );/*ok*/ |
3344 | | } |
3345 | | if( nSrcPixelStop > nChunkRightXOff && nChunkRightXOff < nSrcWidth ) |
3346 | | { |
3347 | | printf( "truncated iDstPixel = %d\n", iDstPixel );/*ok*/ |
3348 | | } |
3349 | | #endif |
3350 | 0 | const int nSrcPixelCount = nSrcPixelStop - nSrcPixelStart; |
3351 | 0 | double dfWeightSum = 0.0; |
3352 | | |
3353 | | // Compute convolution coefficients. |
3354 | 0 | int nSrcPixel = nSrcPixelStart; |
3355 | 0 | double dfX = dfXScaleWeight * (nSrcPixel - dfSrcPixel + 0.5); |
3356 | 0 | for (; nSrcPixel + 3 < nSrcPixelStop; nSrcPixel += 4) |
3357 | 0 | { |
3358 | 0 | padfWeights[nSrcPixel - nSrcPixelStart] = dfX; |
3359 | 0 | dfX += dfXScaleWeight; |
3360 | 0 | padfWeights[nSrcPixel + 1 - nSrcPixelStart] = dfX; |
3361 | 0 | dfX += dfXScaleWeight; |
3362 | 0 | padfWeights[nSrcPixel + 2 - nSrcPixelStart] = dfX; |
3363 | 0 | dfX += dfXScaleWeight; |
3364 | 0 | padfWeights[nSrcPixel + 3 - nSrcPixelStart] = dfX; |
3365 | 0 | dfX += dfXScaleWeight; |
3366 | 0 | dfWeightSum += |
3367 | 0 | pfnFilterFunc4Values(padfWeights + nSrcPixel - nSrcPixelStart); |
3368 | 0 | } |
3369 | 0 | for (; nSrcPixel < nSrcPixelStop; ++nSrcPixel, dfX += dfXScaleWeight) |
3370 | 0 | { |
3371 | 0 | const double dfWeight = pfnFilterFunc(dfX); |
3372 | 0 | padfWeights[nSrcPixel - nSrcPixelStart] = dfWeight; |
3373 | 0 | dfWeightSum += dfWeight; |
3374 | 0 | } |
3375 | |
|
3376 | 0 | const int nHeight = nChunkYSize * nBands; |
3377 | 0 | if (pabyChunkNodataMask == nullptr) |
3378 | 0 | { |
3379 | 0 | if (dfWeightSum != 0) |
3380 | 0 | { |
3381 | 0 | const double dfInvWeightSum = 1.0 / dfWeightSum; |
3382 | 0 | for (int i = 0; i < nSrcPixelCount; ++i) |
3383 | 0 | padfWeights[i] *= dfInvWeightSum; |
3384 | 0 | } |
3385 | 0 | int iSrcLineOff = 0; |
3386 | 0 | #ifdef USE_SSE2 |
3387 | 0 | if (nSrcPixelCount == 4) |
3388 | 0 | { |
3389 | 0 | for (; iSrcLineOff + 2 < nHeight; iSrcLineOff += 3) |
3390 | 0 | { |
3391 | 0 | const GPtrDiff_t j = |
3392 | 0 | static_cast<GPtrDiff_t>(iSrcLineOff) * nChunkXSize + |
3393 | 0 | (nSrcPixelStart - nChunkXOff); |
3394 | 0 | double dfVal1 = 0.0; |
3395 | 0 | double dfVal2 = 0.0; |
3396 | 0 | double dfVal3 = 0.0; |
3397 | 0 | GDALResampleConvolutionHorizontalPixelCount4_3rows( |
3398 | 0 | pChunk + j, pChunk + j + nChunkXSize, |
3399 | 0 | pChunk + j + 2 * nChunkXSize, padfWeights, dfVal1, |
3400 | 0 | dfVal2, dfVal3); |
3401 | 0 | padfHorizontalFiltered[static_cast<size_t>(iSrcLineOff) * |
3402 | 0 | nDstXSize + |
3403 | 0 | iDstPixel - nDstXOff] = dfVal1; |
3404 | 0 | padfHorizontalFiltered[(static_cast<size_t>(iSrcLineOff) + |
3405 | 0 | 1) * |
3406 | 0 | nDstXSize + |
3407 | 0 | iDstPixel - nDstXOff] = dfVal2; |
3408 | 0 | padfHorizontalFiltered[(static_cast<size_t>(iSrcLineOff) + |
3409 | 0 | 2) * |
3410 | 0 | nDstXSize + |
3411 | 0 | iDstPixel - nDstXOff] = dfVal3; |
3412 | 0 | } |
3413 | 0 | } |
3414 | 0 | else if (bSrcPixelCountLess8) |
3415 | 0 | { |
3416 | 0 | for (; iSrcLineOff + 2 < nHeight; iSrcLineOff += 3) |
3417 | 0 | { |
3418 | 0 | const GPtrDiff_t j = |
3419 | 0 | static_cast<GPtrDiff_t>(iSrcLineOff) * nChunkXSize + |
3420 | 0 | (nSrcPixelStart - nChunkXOff); |
3421 | 0 | double dfVal1 = 0.0; |
3422 | 0 | double dfVal2 = 0.0; |
3423 | 0 | double dfVal3 = 0.0; |
3424 | 0 | GDALResampleConvolutionHorizontalPixelCountLess8_3rows( |
3425 | 0 | pChunk + j, pChunk + j + nChunkXSize, |
3426 | 0 | pChunk + j + 2 * nChunkXSize, padfWeights, |
3427 | 0 | nSrcPixelCount, dfVal1, dfVal2, dfVal3); |
3428 | 0 | padfHorizontalFiltered[static_cast<size_t>(iSrcLineOff) * |
3429 | 0 | nDstXSize + |
3430 | 0 | iDstPixel - nDstXOff] = dfVal1; |
3431 | 0 | padfHorizontalFiltered[(static_cast<size_t>(iSrcLineOff) + |
3432 | 0 | 1) * |
3433 | 0 | nDstXSize + |
3434 | 0 | iDstPixel - nDstXOff] = dfVal2; |
3435 | 0 | padfHorizontalFiltered[(static_cast<size_t>(iSrcLineOff) + |
3436 | 0 | 2) * |
3437 | 0 | nDstXSize + |
3438 | 0 | iDstPixel - nDstXOff] = dfVal3; |
3439 | 0 | } |
3440 | 0 | } |
3441 | 0 | else |
3442 | 0 | #endif |
3443 | 0 | { |
3444 | 0 | for (; iSrcLineOff + 2 < nHeight; iSrcLineOff += 3) |
3445 | 0 | { |
3446 | 0 | const GPtrDiff_t j = |
3447 | 0 | static_cast<GPtrDiff_t>(iSrcLineOff) * nChunkXSize + |
3448 | 0 | (nSrcPixelStart - nChunkXOff); |
3449 | 0 | double dfVal1 = 0.0; |
3450 | 0 | double dfVal2 = 0.0; |
3451 | 0 | double dfVal3 = 0.0; |
3452 | 0 | GDALResampleConvolutionHorizontal_3rows( |
3453 | 0 | pChunk + j, pChunk + j + nChunkXSize, |
3454 | 0 | pChunk + j + 2 * nChunkXSize, padfWeights, |
3455 | 0 | nSrcPixelCount, dfVal1, dfVal2, dfVal3); |
3456 | 0 | padfHorizontalFiltered[static_cast<size_t>(iSrcLineOff) * |
3457 | 0 | nDstXSize + |
3458 | 0 | iDstPixel - nDstXOff] = dfVal1; |
3459 | 0 | padfHorizontalFiltered[(static_cast<size_t>(iSrcLineOff) + |
3460 | 0 | 1) * |
3461 | 0 | nDstXSize + |
3462 | 0 | iDstPixel - nDstXOff] = dfVal2; |
3463 | 0 | padfHorizontalFiltered[(static_cast<size_t>(iSrcLineOff) + |
3464 | 0 | 2) * |
3465 | 0 | nDstXSize + |
3466 | 0 | iDstPixel - nDstXOff] = dfVal3; |
3467 | 0 | } |
3468 | 0 | } |
3469 | 0 | for (; iSrcLineOff < nHeight; ++iSrcLineOff) |
3470 | 0 | { |
3471 | 0 | const GPtrDiff_t j = |
3472 | 0 | static_cast<GPtrDiff_t>(iSrcLineOff) * nChunkXSize + |
3473 | 0 | (nSrcPixelStart - nChunkXOff); |
3474 | 0 | const double dfVal = GDALResampleConvolutionHorizontal( |
3475 | 0 | pChunk + j, padfWeights, nSrcPixelCount); |
3476 | 0 | padfHorizontalFiltered[static_cast<size_t>(iSrcLineOff) * |
3477 | 0 | nDstXSize + |
3478 | 0 | iDstPixel - nDstXOff] = dfVal; |
3479 | 0 | } |
3480 | 0 | } |
3481 | 0 | else |
3482 | 0 | { |
3483 | 0 | for (int iSrcLineOff = 0; iSrcLineOff < nHeight; ++iSrcLineOff) |
3484 | 0 | { |
3485 | 0 | const GPtrDiff_t j = |
3486 | 0 | static_cast<GPtrDiff_t>(iSrcLineOff) * nChunkXSize + |
3487 | 0 | (nSrcPixelStart - nChunkXOff); |
3488 | |
|
3489 | 0 | if (bKernelWithNegativeWeights) |
3490 | 0 | { |
3491 | 0 | int nConsecutiveValid = 0; |
3492 | 0 | int nMaxConsecutiveValid = 0; |
3493 | 0 | for (int k = 0; k < nSrcPixelCount; k++) |
3494 | 0 | { |
3495 | 0 | if (pabyChunkNodataMask[j + k]) |
3496 | 0 | nConsecutiveValid++; |
3497 | 0 | else if (nConsecutiveValid) |
3498 | 0 | { |
3499 | 0 | nMaxConsecutiveValid = std::max( |
3500 | 0 | nMaxConsecutiveValid, nConsecutiveValid); |
3501 | 0 | nConsecutiveValid = 0; |
3502 | 0 | } |
3503 | 0 | } |
3504 | 0 | nMaxConsecutiveValid = |
3505 | 0 | std::max(nMaxConsecutiveValid, nConsecutiveValid); |
3506 | 0 | if (nMaxConsecutiveValid < nSrcPixelCount / 2) |
3507 | 0 | { |
3508 | 0 | const size_t nTempOffset = |
3509 | 0 | static_cast<size_t>(iSrcLineOff) * nDstXSize + |
3510 | 0 | iDstPixel - nDstXOff; |
3511 | 0 | padfHorizontalFiltered[nTempOffset] = 0.0; |
3512 | 0 | pabyChunkNodataMaskHorizontalFiltered[nTempOffset] = 0; |
3513 | 0 | continue; |
3514 | 0 | } |
3515 | 0 | } |
3516 | | |
3517 | 0 | double dfVal = 0.0; |
3518 | 0 | GDALResampleConvolutionHorizontalWithMask( |
3519 | 0 | pChunk + j, pabyChunkNodataMask + j, padfWeights, |
3520 | 0 | nSrcPixelCount, dfVal, dfWeightSum); |
3521 | 0 | const size_t nTempOffset = |
3522 | 0 | static_cast<size_t>(iSrcLineOff) * nDstXSize + iDstPixel - |
3523 | 0 | nDstXOff; |
3524 | 0 | if (dfWeightSum > 0.0) |
3525 | 0 | { |
3526 | 0 | padfHorizontalFiltered[nTempOffset] = dfVal / dfWeightSum; |
3527 | 0 | pabyChunkNodataMaskHorizontalFiltered[nTempOffset] = 1; |
3528 | 0 | } |
3529 | 0 | else |
3530 | 0 | { |
3531 | 0 | padfHorizontalFiltered[nTempOffset] = 0.0; |
3532 | 0 | pabyChunkNodataMaskHorizontalFiltered[nTempOffset] = 0; |
3533 | 0 | } |
3534 | 0 | } |
3535 | 0 | } |
3536 | 0 | } |
3537 | | |
3538 | | /* ==================================================================== */ |
3539 | | /* Second pass: vertical filter */ |
3540 | | /* ==================================================================== */ |
3541 | 0 | const int nChunkBottomYOff = nChunkYOff + nChunkYSize; |
3542 | |
|
3543 | 0 | for (int iDstLine = nDstYOff; iDstLine < nDstYOff2; ++iDstLine) |
3544 | 0 | { |
3545 | 0 | Twork *const pafDstScanline = |
3546 | 0 | pafWrkScanline ? pafWrkScanline |
3547 | 0 | : static_cast<Twork *>(pDstBuffer) + |
3548 | 0 | (iDstLine - nDstYOff) * nDstXSize; |
3549 | |
|
3550 | 0 | const double dfSrcLine = |
3551 | 0 | (iDstLine + 0.5) * dfYRatioDstToSrc + dfSrcYDelta; |
3552 | 0 | int nSrcLineStart = |
3553 | 0 | static_cast<int>(floor(dfSrcLine - dfYScaledRadius + 0.5)); |
3554 | 0 | int nSrcLineStop = static_cast<int>(dfSrcLine + dfYScaledRadius + 0.5); |
3555 | 0 | if (nSrcLineStart < nChunkYOff) |
3556 | 0 | nSrcLineStart = nChunkYOff; |
3557 | 0 | if (nSrcLineStop > nChunkBottomYOff) |
3558 | 0 | nSrcLineStop = nChunkBottomYOff; |
3559 | | #if 0 |
3560 | | if( nSrcLineStart < nChunkYOff && |
3561 | | nChunkYOff > 0 ) |
3562 | | { |
3563 | | printf( "truncated iDstLine = %d\n", iDstLine );/*ok*/ |
3564 | | } |
3565 | | if( nSrcLineStop > nChunkBottomYOff && nChunkBottomYOff < nSrcHeight ) |
3566 | | { |
3567 | | printf( "truncated iDstLine = %d\n", iDstLine );/*ok*/ |
3568 | | } |
3569 | | #endif |
3570 | 0 | const int nSrcLineCount = nSrcLineStop - nSrcLineStart; |
3571 | 0 | double dfWeightSum = 0.0; |
3572 | | |
3573 | | // Compute convolution coefficients. |
3574 | 0 | int nSrcLine = nSrcLineStart; // Used after for. |
3575 | 0 | double dfY = dfYScaleWeight * (nSrcLine - dfSrcLine + 0.5); |
3576 | 0 | for (; nSrcLine + 3 < nSrcLineStop; |
3577 | 0 | nSrcLine += 4, dfY += 4 * dfYScaleWeight) |
3578 | 0 | { |
3579 | 0 | padfWeights[nSrcLine - nSrcLineStart] = dfY; |
3580 | 0 | padfWeights[nSrcLine + 1 - nSrcLineStart] = dfY + dfYScaleWeight; |
3581 | 0 | padfWeights[nSrcLine + 2 - nSrcLineStart] = |
3582 | 0 | dfY + 2 * dfYScaleWeight; |
3583 | 0 | padfWeights[nSrcLine + 3 - nSrcLineStart] = |
3584 | 0 | dfY + 3 * dfYScaleWeight; |
3585 | 0 | dfWeightSum += |
3586 | 0 | pfnFilterFunc4Values(padfWeights + nSrcLine - nSrcLineStart); |
3587 | 0 | } |
3588 | 0 | for (; nSrcLine < nSrcLineStop; ++nSrcLine, dfY += dfYScaleWeight) |
3589 | 0 | { |
3590 | 0 | const double dfWeight = pfnFilterFunc(dfY); |
3591 | 0 | padfWeights[nSrcLine - nSrcLineStart] = dfWeight; |
3592 | 0 | dfWeightSum += dfWeight; |
3593 | 0 | } |
3594 | |
|
3595 | 0 | if (pabyChunkNodataMask == nullptr) |
3596 | 0 | { |
3597 | 0 | if (dfWeightSum != 0) |
3598 | 0 | { |
3599 | 0 | const double dfInvWeightSum = 1.0 / dfWeightSum; |
3600 | 0 | for (int i = 0; i < nSrcLineCount; ++i) |
3601 | 0 | padfWeights[i] *= dfInvWeightSum; |
3602 | 0 | } |
3603 | 0 | } |
3604 | |
|
3605 | 0 | if (pabyChunkNodataMask == nullptr) |
3606 | 0 | { |
3607 | 0 | int iFilteredPixelOff = 0; // Used after for. |
3608 | | // j used after for. |
3609 | 0 | size_t j = |
3610 | 0 | (nSrcLineStart - nChunkYOff) * static_cast<size_t>(nDstXSize); |
3611 | 0 | #ifdef USE_SSE2 |
3612 | | if constexpr (eWrkDataType == GDT_Float32) |
3613 | 0 | { |
3614 | | #ifdef __AVX__ |
3615 | | for (; iFilteredPixelOff + 15 < nDstXSize; |
3616 | | iFilteredPixelOff += 16, j += 16) |
3617 | | { |
3618 | | GDALResampleConvolutionVertical_16cols( |
3619 | | padfHorizontalFiltered + j, nDstXSize, padfWeights, |
3620 | | nSrcLineCount, pafDstScanline + iFilteredPixelOff); |
3621 | | if (bHasNoData) |
3622 | | { |
3623 | | for (int k = 0; k < 16; k++) |
3624 | | { |
3625 | | pafDstScanline[iFilteredPixelOff + k] = |
3626 | | replaceValIfNodata( |
3627 | | pafDstScanline[iFilteredPixelOff + k]); |
3628 | | } |
3629 | | } |
3630 | | } |
3631 | | #else |
3632 | 0 | for (; iFilteredPixelOff + 7 < nDstXSize; |
3633 | 0 | iFilteredPixelOff += 8, j += 8) |
3634 | 0 | { |
3635 | 0 | GDALResampleConvolutionVertical_8cols( |
3636 | 0 | padfHorizontalFiltered + j, nDstXSize, padfWeights, |
3637 | 0 | nSrcLineCount, pafDstScanline + iFilteredPixelOff); |
3638 | 0 | if (bHasNoData) |
3639 | 0 | { |
3640 | 0 | for (int k = 0; k < 8; k++) |
3641 | 0 | { |
3642 | 0 | pafDstScanline[iFilteredPixelOff + k] = |
3643 | 0 | replaceValIfNodata( |
3644 | 0 | pafDstScanline[iFilteredPixelOff + k]); |
3645 | 0 | } |
3646 | 0 | } |
3647 | 0 | } |
3648 | 0 | #endif |
3649 | |
|
3650 | 0 | for (; iFilteredPixelOff < nDstXSize; iFilteredPixelOff++, j++) |
3651 | 0 | { |
3652 | 0 | const Twork fVal = |
3653 | 0 | static_cast<Twork>(GDALResampleConvolutionVertical( |
3654 | 0 | padfHorizontalFiltered + j, nDstXSize, padfWeights, |
3655 | 0 | nSrcLineCount)); |
3656 | 0 | pafDstScanline[iFilteredPixelOff] = |
3657 | 0 | replaceValIfNodata(fVal); |
3658 | 0 | } |
3659 | | } |
3660 | | else |
3661 | | #endif |
3662 | 0 | { |
3663 | 0 | for (; iFilteredPixelOff + 1 < nDstXSize; |
3664 | 0 | iFilteredPixelOff += 2, j += 2) |
3665 | 0 | { |
3666 | 0 | double dfVal1 = 0.0; |
3667 | 0 | double dfVal2 = 0.0; |
3668 | 0 | GDALResampleConvolutionVertical_2cols( |
3669 | 0 | padfHorizontalFiltered + j, nDstXSize, padfWeights, |
3670 | 0 | nSrcLineCount, dfVal1, dfVal2); |
3671 | 0 | pafDstScanline[iFilteredPixelOff] = |
3672 | 0 | replaceValIfNodata(static_cast<Twork>(dfVal1)); |
3673 | 0 | pafDstScanline[iFilteredPixelOff + 1] = |
3674 | 0 | replaceValIfNodata(static_cast<Twork>(dfVal2)); |
3675 | 0 | } |
3676 | 0 | if (iFilteredPixelOff < nDstXSize) |
3677 | 0 | { |
3678 | 0 | const double dfVal = GDALResampleConvolutionVertical( |
3679 | 0 | padfHorizontalFiltered + j, nDstXSize, padfWeights, |
3680 | 0 | nSrcLineCount); |
3681 | 0 | pafDstScanline[iFilteredPixelOff] = |
3682 | 0 | replaceValIfNodata(static_cast<Twork>(dfVal)); |
3683 | 0 | } |
3684 | 0 | } |
3685 | 0 | } |
3686 | 0 | else |
3687 | 0 | { |
3688 | 0 | for (int iFilteredPixelOff = 0; iFilteredPixelOff < nDstXSize; |
3689 | 0 | ++iFilteredPixelOff) |
3690 | 0 | { |
3691 | 0 | double dfVal = 0.0; |
3692 | 0 | dfWeightSum = 0.0; |
3693 | 0 | size_t j = (nSrcLineStart - nChunkYOff) * |
3694 | 0 | static_cast<size_t>(nDstXSize) + |
3695 | 0 | iFilteredPixelOff; |
3696 | 0 | if (bKernelWithNegativeWeights) |
3697 | 0 | { |
3698 | 0 | int nConsecutiveValid = 0; |
3699 | 0 | int nMaxConsecutiveValid = 0; |
3700 | 0 | for (int i = 0; i < nSrcLineCount; ++i, j += nDstXSize) |
3701 | 0 | { |
3702 | 0 | const double dfWeight = |
3703 | 0 | padfWeights[i] * |
3704 | 0 | pabyChunkNodataMaskHorizontalFiltered[j]; |
3705 | 0 | if (pabyChunkNodataMaskHorizontalFiltered[j]) |
3706 | 0 | { |
3707 | 0 | nConsecutiveValid++; |
3708 | 0 | } |
3709 | 0 | else if (nConsecutiveValid) |
3710 | 0 | { |
3711 | 0 | nMaxConsecutiveValid = std::max( |
3712 | 0 | nMaxConsecutiveValid, nConsecutiveValid); |
3713 | 0 | nConsecutiveValid = 0; |
3714 | 0 | } |
3715 | 0 | dfVal += padfHorizontalFiltered[j] * dfWeight; |
3716 | 0 | dfWeightSum += dfWeight; |
3717 | 0 | } |
3718 | 0 | nMaxConsecutiveValid = |
3719 | 0 | std::max(nMaxConsecutiveValid, nConsecutiveValid); |
3720 | 0 | if (nMaxConsecutiveValid < nSrcLineCount / 2) |
3721 | 0 | { |
3722 | 0 | pafDstScanline[iFilteredPixelOff] = |
3723 | 0 | static_cast<Twork>(dfNoDataValue); |
3724 | 0 | continue; |
3725 | 0 | } |
3726 | 0 | } |
3727 | 0 | else |
3728 | 0 | { |
3729 | 0 | for (int i = 0; i < nSrcLineCount; ++i, j += nDstXSize) |
3730 | 0 | { |
3731 | 0 | const double dfWeight = |
3732 | 0 | padfWeights[i] * |
3733 | 0 | pabyChunkNodataMaskHorizontalFiltered[j]; |
3734 | 0 | dfVal += padfHorizontalFiltered[j] * dfWeight; |
3735 | 0 | dfWeightSum += dfWeight; |
3736 | 0 | } |
3737 | 0 | } |
3738 | 0 | if (dfWeightSum > 0.0) |
3739 | 0 | { |
3740 | 0 | pafDstScanline[iFilteredPixelOff] = replaceValIfNodata( |
3741 | 0 | static_cast<Twork>(dfVal / dfWeightSum)); |
3742 | 0 | } |
3743 | 0 | else |
3744 | 0 | { |
3745 | 0 | pafDstScanline[iFilteredPixelOff] = |
3746 | 0 | static_cast<Twork>(dfNoDataValue); |
3747 | 0 | } |
3748 | 0 | } |
3749 | 0 | } |
3750 | |
|
3751 | 0 | if (fMaxVal != 0.0f) |
3752 | 0 | { |
3753 | 0 | for (int i = 0; i < nDstXSize; ++i) |
3754 | 0 | { |
3755 | 0 | if (pafDstScanline[i] > fMaxVal) |
3756 | 0 | pafDstScanline[i] = fMaxVal; |
3757 | 0 | } |
3758 | 0 | } |
3759 | |
|
3760 | 0 | if (pafWrkScanline) |
3761 | 0 | { |
3762 | 0 | GDALCopyWords64(pafWrkScanline, eWrkDataType, nWrkDataTypeSize, |
3763 | 0 | static_cast<GByte *>(pDstBuffer) + |
3764 | 0 | static_cast<size_t>(iDstLine - nDstYOff) * |
3765 | 0 | nDstXSize * nDstDataTypeSize, |
3766 | 0 | dstDataType, nDstDataTypeSize, nDstXSize); |
3767 | 0 | } |
3768 | 0 | } |
3769 | |
|
3770 | 0 | VSIFree(pafWrkScanline); |
3771 | 0 | VSIFreeAligned(padfWeights); |
3772 | 0 | VSIFree(padfHorizontalFiltered); |
3773 | 0 | VSIFree(pabyChunkNodataMaskHorizontalFiltered); |
3774 | |
|
3775 | 0 | return CE_None; |
3776 | 0 | } Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_ConvolutionT<unsigned char, float, (GDALDataType)6>(GDALOverviewResampleArgs const&, unsigned char const*, void*, double (*)(double), double (*)(double*), int, bool, float) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_ConvolutionT<unsigned short, float, (GDALDataType)6>(GDALOverviewResampleArgs const&, unsigned short const*, void*, double (*)(double), double (*)(double*), int, bool, float) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_ConvolutionT<float, float, (GDALDataType)6>(GDALOverviewResampleArgs const&, float const*, void*, double (*)(double), double (*)(double*), int, bool, float) Unexecuted instantiation: overview.cpp:CPLErr GDALResampleChunk_ConvolutionT<double, double, (GDALDataType)7>(GDALOverviewResampleArgs const&, double const*, void*, double (*)(double), double (*)(double*), int, bool, float) |
3777 | | |
3778 | | static CPLErr |
3779 | | GDALResampleChunk_Convolution(const GDALOverviewResampleArgs &args, |
3780 | | const void *pChunk, void **ppDstBuffer, |
3781 | | GDALDataType *peDstBufferDataType) |
3782 | 0 | { |
3783 | 0 | GDALResampleAlg eResample; |
3784 | 0 | bool bKernelWithNegativeWeights = false; |
3785 | 0 | if (EQUAL(args.pszResampling, "BILINEAR")) |
3786 | 0 | eResample = GRA_Bilinear; |
3787 | 0 | else if (EQUAL(args.pszResampling, "CUBIC")) |
3788 | 0 | { |
3789 | 0 | eResample = GRA_Cubic; |
3790 | 0 | bKernelWithNegativeWeights = true; |
3791 | 0 | } |
3792 | 0 | else if (EQUAL(args.pszResampling, "CUBICSPLINE")) |
3793 | 0 | eResample = GRA_CubicSpline; |
3794 | 0 | else if (EQUAL(args.pszResampling, "LANCZOS")) |
3795 | 0 | { |
3796 | 0 | eResample = GRA_Lanczos; |
3797 | 0 | bKernelWithNegativeWeights = true; |
3798 | 0 | } |
3799 | 0 | else |
3800 | 0 | { |
3801 | 0 | CPLAssert(false); |
3802 | 0 | return CE_Failure; |
3803 | 0 | } |
3804 | 0 | const int nKernelRadius = GWKGetFilterRadius(eResample); |
3805 | 0 | FilterFuncType pfnFilterFunc = GWKGetFilterFunc(eResample); |
3806 | 0 | const FilterFunc4ValuesType pfnFilterFunc4Values = |
3807 | 0 | GWKGetFilterFunc4Values(eResample); |
3808 | |
|
3809 | 0 | float fMaxVal = 0.f; |
3810 | | // Cubic, etc... can have overshoots, so make sure we clamp values to the |
3811 | | // maximum value if NBITS is set. |
3812 | 0 | if (eResample != GRA_Bilinear && args.nOvrNBITS > 0 && |
3813 | 0 | (args.eOvrDataType == GDT_Byte || args.eOvrDataType == GDT_UInt16 || |
3814 | 0 | args.eOvrDataType == GDT_UInt32)) |
3815 | 0 | { |
3816 | 0 | int nBits = args.nOvrNBITS; |
3817 | 0 | if (nBits == GDALGetDataTypeSize(args.eOvrDataType)) |
3818 | 0 | nBits = 0; |
3819 | 0 | if (nBits > 0 && nBits < 32) |
3820 | 0 | fMaxVal = static_cast<float>((1U << nBits) - 1); |
3821 | 0 | } |
3822 | |
|
3823 | 0 | *ppDstBuffer = VSI_MALLOC3_VERBOSE( |
3824 | 0 | args.nDstXOff2 - args.nDstXOff, args.nDstYOff2 - args.nDstYOff, |
3825 | 0 | GDALGetDataTypeSizeBytes(args.eOvrDataType)); |
3826 | 0 | if (*ppDstBuffer == nullptr) |
3827 | 0 | { |
3828 | 0 | return CE_Failure; |
3829 | 0 | } |
3830 | 0 | *peDstBufferDataType = args.eOvrDataType; |
3831 | |
|
3832 | 0 | switch (args.eWrkDataType) |
3833 | 0 | { |
3834 | 0 | case GDT_Byte: |
3835 | 0 | { |
3836 | 0 | return GDALResampleChunk_ConvolutionT<GByte, float, GDT_Float32>( |
3837 | 0 | args, static_cast<const GByte *>(pChunk), *ppDstBuffer, |
3838 | 0 | pfnFilterFunc, pfnFilterFunc4Values, nKernelRadius, |
3839 | 0 | bKernelWithNegativeWeights, fMaxVal); |
3840 | 0 | } |
3841 | | |
3842 | 0 | case GDT_UInt16: |
3843 | 0 | { |
3844 | 0 | return GDALResampleChunk_ConvolutionT<GUInt16, float, GDT_Float32>( |
3845 | 0 | args, static_cast<const GUInt16 *>(pChunk), *ppDstBuffer, |
3846 | 0 | pfnFilterFunc, pfnFilterFunc4Values, nKernelRadius, |
3847 | 0 | bKernelWithNegativeWeights, fMaxVal); |
3848 | 0 | } |
3849 | | |
3850 | 0 | case GDT_Float32: |
3851 | 0 | { |
3852 | 0 | return GDALResampleChunk_ConvolutionT<float, float, GDT_Float32>( |
3853 | 0 | args, static_cast<const float *>(pChunk), *ppDstBuffer, |
3854 | 0 | pfnFilterFunc, pfnFilterFunc4Values, nKernelRadius, |
3855 | 0 | bKernelWithNegativeWeights, fMaxVal); |
3856 | 0 | } |
3857 | | |
3858 | 0 | case GDT_Float64: |
3859 | 0 | { |
3860 | 0 | return GDALResampleChunk_ConvolutionT<double, double, GDT_Float64>( |
3861 | 0 | args, static_cast<const double *>(pChunk), *ppDstBuffer, |
3862 | 0 | pfnFilterFunc, pfnFilterFunc4Values, nKernelRadius, |
3863 | 0 | bKernelWithNegativeWeights, fMaxVal); |
3864 | 0 | } |
3865 | | |
3866 | 0 | default: |
3867 | 0 | break; |
3868 | 0 | } |
3869 | | |
3870 | 0 | CPLAssert(false); |
3871 | 0 | return CE_Failure; |
3872 | 0 | } |
3873 | | |
3874 | | /************************************************************************/ |
3875 | | /* GDALResampleChunkC32R() */ |
3876 | | /************************************************************************/ |
3877 | | |
3878 | | static CPLErr GDALResampleChunkC32R(const int nSrcWidth, const int nSrcHeight, |
3879 | | const float *pafChunk, const int nChunkYOff, |
3880 | | const int nChunkYSize, const int nDstYOff, |
3881 | | const int nDstYOff2, const int nOvrXSize, |
3882 | | const int nOvrYSize, void **ppDstBuffer, |
3883 | | GDALDataType *peDstBufferDataType, |
3884 | | const char *pszResampling) |
3885 | | |
3886 | 0 | { |
3887 | 0 | enum Method |
3888 | 0 | { |
3889 | 0 | NEAR, |
3890 | 0 | AVERAGE, |
3891 | 0 | AVERAGE_MAGPHASE, |
3892 | 0 | RMS, |
3893 | 0 | }; |
3894 | |
|
3895 | 0 | Method eMethod = NEAR; |
3896 | 0 | if (STARTS_WITH_CI(pszResampling, "NEAR")) |
3897 | 0 | { |
3898 | 0 | eMethod = NEAR; |
3899 | 0 | } |
3900 | 0 | else if (EQUAL(pszResampling, "AVERAGE_MAGPHASE")) |
3901 | 0 | { |
3902 | 0 | eMethod = AVERAGE_MAGPHASE; |
3903 | 0 | } |
3904 | 0 | else if (EQUAL(pszResampling, "RMS")) |
3905 | 0 | { |
3906 | 0 | eMethod = RMS; |
3907 | 0 | } |
3908 | 0 | else if (STARTS_WITH_CI(pszResampling, "AVER")) |
3909 | 0 | { |
3910 | 0 | eMethod = AVERAGE; |
3911 | 0 | } |
3912 | 0 | else |
3913 | 0 | { |
3914 | 0 | CPLError( |
3915 | 0 | CE_Failure, CPLE_NotSupported, |
3916 | 0 | "Resampling method %s is not supported for complex data types. " |
3917 | 0 | "Only NEAREST, AVERAGE, AVERAGE_MAGPHASE and RMS are supported", |
3918 | 0 | pszResampling); |
3919 | 0 | return CE_Failure; |
3920 | 0 | } |
3921 | | |
3922 | 0 | const int nOXSize = nOvrXSize; |
3923 | 0 | *ppDstBuffer = VSI_MALLOC3_VERBOSE(nOXSize, nDstYOff2 - nDstYOff, |
3924 | 0 | GDALGetDataTypeSizeBytes(GDT_CFloat32)); |
3925 | 0 | if (*ppDstBuffer == nullptr) |
3926 | 0 | { |
3927 | 0 | return CE_Failure; |
3928 | 0 | } |
3929 | 0 | float *const pafDstBuffer = static_cast<float *>(*ppDstBuffer); |
3930 | 0 | *peDstBufferDataType = GDT_CFloat32; |
3931 | |
|
3932 | 0 | const int nOYSize = nOvrYSize; |
3933 | 0 | const double dfXRatioDstToSrc = static_cast<double>(nSrcWidth) / nOXSize; |
3934 | 0 | const double dfYRatioDstToSrc = static_cast<double>(nSrcHeight) / nOYSize; |
3935 | | |
3936 | | /* ==================================================================== */ |
3937 | | /* Loop over destination scanlines. */ |
3938 | | /* ==================================================================== */ |
3939 | 0 | for (int iDstLine = nDstYOff; iDstLine < nDstYOff2; ++iDstLine) |
3940 | 0 | { |
3941 | 0 | int nSrcYOff = static_cast<int>(0.5 + iDstLine * dfYRatioDstToSrc); |
3942 | 0 | if (nSrcYOff < nChunkYOff) |
3943 | 0 | nSrcYOff = nChunkYOff; |
3944 | |
|
3945 | 0 | int nSrcYOff2 = |
3946 | 0 | static_cast<int>(0.5 + (iDstLine + 1) * dfYRatioDstToSrc); |
3947 | 0 | if (nSrcYOff2 == nSrcYOff) |
3948 | 0 | nSrcYOff2++; |
3949 | |
|
3950 | 0 | if (nSrcYOff2 > nSrcHeight || iDstLine == nOYSize - 1) |
3951 | 0 | { |
3952 | 0 | if (nSrcYOff == nSrcHeight && nSrcHeight - 1 >= nChunkYOff) |
3953 | 0 | nSrcYOff = nSrcHeight - 1; |
3954 | 0 | nSrcYOff2 = nSrcHeight; |
3955 | 0 | } |
3956 | 0 | if (nSrcYOff2 > nChunkYOff + nChunkYSize) |
3957 | 0 | nSrcYOff2 = nChunkYOff + nChunkYSize; |
3958 | |
|
3959 | 0 | const float *const pafSrcScanline = |
3960 | 0 | pafChunk + ((nSrcYOff - nChunkYOff) * nSrcWidth) * 2; |
3961 | 0 | float *const pafDstScanline = |
3962 | 0 | pafDstBuffer + (iDstLine - nDstYOff) * 2 * nOXSize; |
3963 | | |
3964 | | /* -------------------------------------------------------------------- |
3965 | | */ |
3966 | | /* Loop over destination pixels */ |
3967 | | /* -------------------------------------------------------------------- |
3968 | | */ |
3969 | 0 | for (int iDstPixel = 0; iDstPixel < nOXSize; ++iDstPixel) |
3970 | 0 | { |
3971 | 0 | int nSrcXOff = static_cast<int>(0.5 + iDstPixel * dfXRatioDstToSrc); |
3972 | 0 | int nSrcXOff2 = |
3973 | 0 | static_cast<int>(0.5 + (iDstPixel + 1) * dfXRatioDstToSrc); |
3974 | 0 | if (nSrcXOff2 == nSrcXOff) |
3975 | 0 | nSrcXOff2++; |
3976 | 0 | if (nSrcXOff2 > nSrcWidth || iDstPixel == nOXSize - 1) |
3977 | 0 | { |
3978 | 0 | if (nSrcXOff == nSrcWidth && nSrcWidth - 1 >= 0) |
3979 | 0 | nSrcXOff = nSrcWidth - 1; |
3980 | 0 | nSrcXOff2 = nSrcWidth; |
3981 | 0 | } |
3982 | |
|
3983 | 0 | if (eMethod == NEAR) |
3984 | 0 | { |
3985 | 0 | pafDstScanline[iDstPixel * 2] = pafSrcScanline[nSrcXOff * 2]; |
3986 | 0 | pafDstScanline[iDstPixel * 2 + 1] = |
3987 | 0 | pafSrcScanline[nSrcXOff * 2 + 1]; |
3988 | 0 | } |
3989 | 0 | else if (eMethod == AVERAGE_MAGPHASE) |
3990 | 0 | { |
3991 | 0 | double dfTotalR = 0.0; |
3992 | 0 | double dfTotalI = 0.0; |
3993 | 0 | double dfTotalM = 0.0; |
3994 | 0 | int nCount = 0; |
3995 | |
|
3996 | 0 | for (int iY = nSrcYOff; iY < nSrcYOff2; ++iY) |
3997 | 0 | { |
3998 | 0 | for (int iX = nSrcXOff; iX < nSrcXOff2; ++iX) |
3999 | 0 | { |
4000 | 0 | const double dfR = |
4001 | 0 | pafSrcScanline[iX * 2 + static_cast<GPtrDiff_t>( |
4002 | 0 | iY - nSrcYOff) * |
4003 | 0 | nSrcWidth * 2]; |
4004 | 0 | const double dfI = |
4005 | 0 | pafSrcScanline[iX * 2 + |
4006 | 0 | static_cast<GPtrDiff_t>(iY - |
4007 | 0 | nSrcYOff) * |
4008 | 0 | nSrcWidth * 2 + |
4009 | 0 | 1]; |
4010 | 0 | dfTotalR += dfR; |
4011 | 0 | dfTotalI += dfI; |
4012 | 0 | dfTotalM += std::hypot(dfR, dfI); |
4013 | 0 | ++nCount; |
4014 | 0 | } |
4015 | 0 | } |
4016 | |
|
4017 | 0 | CPLAssert(nCount > 0); |
4018 | 0 | if (nCount == 0) |
4019 | 0 | { |
4020 | 0 | pafDstScanline[iDstPixel * 2] = 0.0; |
4021 | 0 | pafDstScanline[iDstPixel * 2 + 1] = 0.0; |
4022 | 0 | } |
4023 | 0 | else |
4024 | 0 | { |
4025 | 0 | pafDstScanline[iDstPixel * 2] = |
4026 | 0 | static_cast<float>(dfTotalR / nCount); |
4027 | 0 | pafDstScanline[iDstPixel * 2 + 1] = |
4028 | 0 | static_cast<float>(dfTotalI / nCount); |
4029 | 0 | const double dfM = |
4030 | 0 | std::hypot(pafDstScanline[iDstPixel * 2], |
4031 | 0 | pafDstScanline[iDstPixel * 2 + 1]); |
4032 | 0 | const double dfDesiredM = dfTotalM / nCount; |
4033 | 0 | double dfRatio = 1.0; |
4034 | 0 | if (dfM != 0.0) |
4035 | 0 | dfRatio = dfDesiredM / dfM; |
4036 | |
|
4037 | 0 | pafDstScanline[iDstPixel * 2] *= |
4038 | 0 | static_cast<float>(dfRatio); |
4039 | 0 | pafDstScanline[iDstPixel * 2 + 1] *= |
4040 | 0 | static_cast<float>(dfRatio); |
4041 | 0 | } |
4042 | 0 | } |
4043 | 0 | else if (eMethod == RMS) |
4044 | 0 | { |
4045 | 0 | double dfTotalR = 0.0; |
4046 | 0 | double dfTotalI = 0.0; |
4047 | 0 | int nCount = 0; |
4048 | |
|
4049 | 0 | for (int iY = nSrcYOff; iY < nSrcYOff2; ++iY) |
4050 | 0 | { |
4051 | 0 | for (int iX = nSrcXOff; iX < nSrcXOff2; ++iX) |
4052 | 0 | { |
4053 | 0 | const double dfR = |
4054 | 0 | pafSrcScanline[iX * 2 + static_cast<GPtrDiff_t>( |
4055 | 0 | iY - nSrcYOff) * |
4056 | 0 | nSrcWidth * 2]; |
4057 | 0 | const double dfI = |
4058 | 0 | pafSrcScanline[iX * 2 + |
4059 | 0 | static_cast<GPtrDiff_t>(iY - |
4060 | 0 | nSrcYOff) * |
4061 | 0 | nSrcWidth * 2 + |
4062 | 0 | 1]; |
4063 | |
|
4064 | 0 | dfTotalR += SQUARE(dfR); |
4065 | 0 | dfTotalI += SQUARE(dfI); |
4066 | |
|
4067 | 0 | ++nCount; |
4068 | 0 | } |
4069 | 0 | } |
4070 | |
|
4071 | 0 | CPLAssert(nCount > 0); |
4072 | 0 | if (nCount == 0) |
4073 | 0 | { |
4074 | 0 | pafDstScanline[iDstPixel * 2] = 0.0; |
4075 | 0 | pafDstScanline[iDstPixel * 2 + 1] = 0.0; |
4076 | 0 | } |
4077 | 0 | else |
4078 | 0 | { |
4079 | | /* compute RMS */ |
4080 | 0 | pafDstScanline[iDstPixel * 2] = |
4081 | 0 | static_cast<float>(sqrt(dfTotalR / nCount)); |
4082 | 0 | pafDstScanline[iDstPixel * 2 + 1] = |
4083 | 0 | static_cast<float>(sqrt(dfTotalI / nCount)); |
4084 | 0 | } |
4085 | 0 | } |
4086 | 0 | else if (eMethod == AVERAGE) |
4087 | 0 | { |
4088 | 0 | double dfTotalR = 0.0; |
4089 | 0 | double dfTotalI = 0.0; |
4090 | 0 | int nCount = 0; |
4091 | |
|
4092 | 0 | for (int iY = nSrcYOff; iY < nSrcYOff2; ++iY) |
4093 | 0 | { |
4094 | 0 | for (int iX = nSrcXOff; iX < nSrcXOff2; ++iX) |
4095 | 0 | { |
4096 | | // TODO(schwehr): Maybe use std::complex? |
4097 | 0 | dfTotalR += |
4098 | 0 | pafSrcScanline[iX * 2 + static_cast<GPtrDiff_t>( |
4099 | 0 | iY - nSrcYOff) * |
4100 | 0 | nSrcWidth * 2]; |
4101 | 0 | dfTotalI += pafSrcScanline[iX * 2 + |
4102 | 0 | static_cast<GPtrDiff_t>( |
4103 | 0 | iY - nSrcYOff) * |
4104 | 0 | nSrcWidth * 2 + |
4105 | 0 | 1]; |
4106 | 0 | ++nCount; |
4107 | 0 | } |
4108 | 0 | } |
4109 | |
|
4110 | 0 | CPLAssert(nCount > 0); |
4111 | 0 | if (nCount == 0) |
4112 | 0 | { |
4113 | 0 | pafDstScanline[iDstPixel * 2] = 0.0; |
4114 | 0 | pafDstScanline[iDstPixel * 2 + 1] = 0.0; |
4115 | 0 | } |
4116 | 0 | else |
4117 | 0 | { |
4118 | 0 | pafDstScanline[iDstPixel * 2] = |
4119 | 0 | static_cast<float>(dfTotalR / nCount); |
4120 | 0 | pafDstScanline[iDstPixel * 2 + 1] = |
4121 | 0 | static_cast<float>(dfTotalI / nCount); |
4122 | 0 | } |
4123 | 0 | } |
4124 | 0 | } |
4125 | 0 | } |
4126 | | |
4127 | 0 | return CE_None; |
4128 | 0 | } |
4129 | | |
4130 | | /************************************************************************/ |
4131 | | /* GDALRegenerateCascadingOverviews() */ |
4132 | | /* */ |
4133 | | /* Generate a list of overviews in order from largest to */ |
4134 | | /* smallest, computing each from the next larger. */ |
4135 | | /************************************************************************/ |
4136 | | |
4137 | | static CPLErr GDALRegenerateCascadingOverviews( |
4138 | | GDALRasterBand *poSrcBand, int nOverviews, GDALRasterBand **papoOvrBands, |
4139 | | const char *pszResampling, GDALProgressFunc pfnProgress, |
4140 | | void *pProgressData, CSLConstList papszOptions) |
4141 | | |
4142 | 0 | { |
4143 | | /* -------------------------------------------------------------------- */ |
4144 | | /* First, we must put the overviews in order from largest to */ |
4145 | | /* smallest. */ |
4146 | | /* -------------------------------------------------------------------- */ |
4147 | 0 | for (int i = 0; i < nOverviews - 1; ++i) |
4148 | 0 | { |
4149 | 0 | for (int j = 0; j < nOverviews - i - 1; ++j) |
4150 | 0 | { |
4151 | 0 | if (papoOvrBands[j]->GetXSize() * |
4152 | 0 | static_cast<float>(papoOvrBands[j]->GetYSize()) < |
4153 | 0 | papoOvrBands[j + 1]->GetXSize() * |
4154 | 0 | static_cast<float>(papoOvrBands[j + 1]->GetYSize())) |
4155 | 0 | { |
4156 | 0 | GDALRasterBand *poTempBand = papoOvrBands[j]; |
4157 | 0 | papoOvrBands[j] = papoOvrBands[j + 1]; |
4158 | 0 | papoOvrBands[j + 1] = poTempBand; |
4159 | 0 | } |
4160 | 0 | } |
4161 | 0 | } |
4162 | | |
4163 | | /* -------------------------------------------------------------------- */ |
4164 | | /* Count total pixels so we can prepare appropriate scaled */ |
4165 | | /* progress functions. */ |
4166 | | /* -------------------------------------------------------------------- */ |
4167 | 0 | double dfTotalPixels = 0.0; |
4168 | |
|
4169 | 0 | for (int i = 0; i < nOverviews; ++i) |
4170 | 0 | { |
4171 | 0 | dfTotalPixels += papoOvrBands[i]->GetXSize() * |
4172 | 0 | static_cast<double>(papoOvrBands[i]->GetYSize()); |
4173 | 0 | } |
4174 | | |
4175 | | /* -------------------------------------------------------------------- */ |
4176 | | /* Generate all the bands. */ |
4177 | | /* -------------------------------------------------------------------- */ |
4178 | 0 | double dfPixelsProcessed = 0.0; |
4179 | |
|
4180 | 0 | for (int i = 0; i < nOverviews; ++i) |
4181 | 0 | { |
4182 | 0 | GDALRasterBand *poBaseBand = poSrcBand; |
4183 | 0 | if (i != 0) |
4184 | 0 | poBaseBand = papoOvrBands[i - 1]; |
4185 | |
|
4186 | 0 | double dfPixels = papoOvrBands[i]->GetXSize() * |
4187 | 0 | static_cast<double>(papoOvrBands[i]->GetYSize()); |
4188 | |
|
4189 | 0 | void *pScaledProgressData = GDALCreateScaledProgress( |
4190 | 0 | dfPixelsProcessed / dfTotalPixels, |
4191 | 0 | (dfPixelsProcessed + dfPixels) / dfTotalPixels, pfnProgress, |
4192 | 0 | pProgressData); |
4193 | |
|
4194 | 0 | const CPLErr eErr = GDALRegenerateOverviewsEx( |
4195 | 0 | poBaseBand, 1, |
4196 | 0 | reinterpret_cast<GDALRasterBandH *>(papoOvrBands) + i, |
4197 | 0 | pszResampling, GDALScaledProgress, pScaledProgressData, |
4198 | 0 | papszOptions); |
4199 | 0 | GDALDestroyScaledProgress(pScaledProgressData); |
4200 | |
|
4201 | 0 | if (eErr != CE_None) |
4202 | 0 | return eErr; |
4203 | | |
4204 | 0 | dfPixelsProcessed += dfPixels; |
4205 | | |
4206 | | // Only do the bit2grayscale promotion on the base band. |
4207 | 0 | if (STARTS_WITH_CI(pszResampling, |
4208 | 0 | "AVERAGE_BIT2G" /* AVERAGE_BIT2GRAYSCALE */)) |
4209 | 0 | pszResampling = "AVERAGE"; |
4210 | 0 | } |
4211 | | |
4212 | 0 | return CE_None; |
4213 | 0 | } |
4214 | | |
4215 | | /************************************************************************/ |
4216 | | /* GDALGetResampleFunction() */ |
4217 | | /************************************************************************/ |
4218 | | |
4219 | | GDALResampleFunction GDALGetResampleFunction(const char *pszResampling, |
4220 | | int *pnRadius) |
4221 | 0 | { |
4222 | 0 | if (pnRadius) |
4223 | 0 | *pnRadius = 0; |
4224 | 0 | if (STARTS_WITH_CI(pszResampling, "NEAR")) |
4225 | 0 | return GDALResampleChunk_Near; |
4226 | 0 | else if (STARTS_WITH_CI(pszResampling, "AVER") || |
4227 | 0 | EQUAL(pszResampling, "RMS")) |
4228 | 0 | return GDALResampleChunk_AverageOrRMS; |
4229 | 0 | else if (EQUAL(pszResampling, "GAUSS")) |
4230 | 0 | { |
4231 | 0 | if (pnRadius) |
4232 | 0 | *pnRadius = 1; |
4233 | 0 | return GDALResampleChunk_Gauss; |
4234 | 0 | } |
4235 | 0 | else if (EQUAL(pszResampling, "MODE")) |
4236 | 0 | return GDALResampleChunk_Mode; |
4237 | 0 | else if (EQUAL(pszResampling, "CUBIC")) |
4238 | 0 | { |
4239 | 0 | if (pnRadius) |
4240 | 0 | *pnRadius = GWKGetFilterRadius(GRA_Cubic); |
4241 | 0 | return GDALResampleChunk_Convolution; |
4242 | 0 | } |
4243 | 0 | else if (EQUAL(pszResampling, "CUBICSPLINE")) |
4244 | 0 | { |
4245 | 0 | if (pnRadius) |
4246 | 0 | *pnRadius = GWKGetFilterRadius(GRA_CubicSpline); |
4247 | 0 | return GDALResampleChunk_Convolution; |
4248 | 0 | } |
4249 | 0 | else if (EQUAL(pszResampling, "LANCZOS")) |
4250 | 0 | { |
4251 | 0 | if (pnRadius) |
4252 | 0 | *pnRadius = GWKGetFilterRadius(GRA_Lanczos); |
4253 | 0 | return GDALResampleChunk_Convolution; |
4254 | 0 | } |
4255 | 0 | else if (EQUAL(pszResampling, "BILINEAR")) |
4256 | 0 | { |
4257 | 0 | if (pnRadius) |
4258 | 0 | *pnRadius = GWKGetFilterRadius(GRA_Bilinear); |
4259 | 0 | return GDALResampleChunk_Convolution; |
4260 | 0 | } |
4261 | 0 | else |
4262 | 0 | { |
4263 | 0 | CPLError( |
4264 | 0 | CE_Failure, CPLE_AppDefined, |
4265 | 0 | "GDALGetResampleFunction: Unsupported resampling method \"%s\".", |
4266 | 0 | pszResampling); |
4267 | 0 | return nullptr; |
4268 | 0 | } |
4269 | 0 | } |
4270 | | |
4271 | | /************************************************************************/ |
4272 | | /* GDALGetOvrWorkDataType() */ |
4273 | | /************************************************************************/ |
4274 | | |
4275 | | GDALDataType GDALGetOvrWorkDataType(const char *pszResampling, |
4276 | | GDALDataType eSrcDataType) |
4277 | 0 | { |
4278 | 0 | if (STARTS_WITH_CI(pszResampling, "NEAR") || EQUAL(pszResampling, "MODE")) |
4279 | 0 | { |
4280 | 0 | return eSrcDataType; |
4281 | 0 | } |
4282 | 0 | else if (eSrcDataType == GDT_Byte && |
4283 | 0 | (STARTS_WITH_CI(pszResampling, "AVER") || |
4284 | 0 | EQUAL(pszResampling, "RMS") || EQUAL(pszResampling, "CUBIC") || |
4285 | 0 | EQUAL(pszResampling, "CUBICSPLINE") || |
4286 | 0 | EQUAL(pszResampling, "LANCZOS") || |
4287 | 0 | EQUAL(pszResampling, "BILINEAR") || EQUAL(pszResampling, "MODE"))) |
4288 | 0 | { |
4289 | 0 | return GDT_Byte; |
4290 | 0 | } |
4291 | 0 | else if (eSrcDataType == GDT_UInt16 && |
4292 | 0 | (STARTS_WITH_CI(pszResampling, "AVER") || |
4293 | 0 | EQUAL(pszResampling, "RMS") || EQUAL(pszResampling, "CUBIC") || |
4294 | 0 | EQUAL(pszResampling, "CUBICSPLINE") || |
4295 | 0 | EQUAL(pszResampling, "LANCZOS") || |
4296 | 0 | EQUAL(pszResampling, "BILINEAR") || EQUAL(pszResampling, "MODE"))) |
4297 | 0 | { |
4298 | 0 | return GDT_UInt16; |
4299 | 0 | } |
4300 | 0 | else if (EQUAL(pszResampling, "GAUSS")) |
4301 | 0 | return GDT_Float64; |
4302 | | |
4303 | 0 | if (eSrcDataType == GDT_Byte || eSrcDataType == GDT_Int8 || |
4304 | 0 | eSrcDataType == GDT_UInt16 || eSrcDataType == GDT_Int16 || |
4305 | 0 | eSrcDataType == GDT_Float32) |
4306 | 0 | { |
4307 | 0 | return GDT_Float32; |
4308 | 0 | } |
4309 | 0 | return GDT_Float64; |
4310 | 0 | } |
4311 | | |
4312 | | namespace |
4313 | | { |
4314 | | // Structure to hold a pointer to free with CPLFree() |
4315 | | struct PointerHolder |
4316 | | { |
4317 | | void *ptr = nullptr; |
4318 | | |
4319 | 0 | explicit PointerHolder(void *ptrIn) : ptr(ptrIn) |
4320 | 0 | { |
4321 | 0 | } |
4322 | | |
4323 | | ~PointerHolder() |
4324 | 0 | { |
4325 | 0 | CPLFree(ptr); |
4326 | 0 | } |
4327 | | |
4328 | | PointerHolder(const PointerHolder &) = delete; |
4329 | | PointerHolder &operator=(const PointerHolder &) = delete; |
4330 | | }; |
4331 | | } // namespace |
4332 | | |
4333 | | /************************************************************************/ |
4334 | | /* GDALRegenerateOverviews() */ |
4335 | | /************************************************************************/ |
4336 | | |
4337 | | /** |
4338 | | * \brief Generate downsampled overviews. |
4339 | | * |
4340 | | * This function will generate one or more overview images from a base image |
4341 | | * using the requested downsampling algorithm. Its primary use is for |
4342 | | * generating overviews via GDALDataset::BuildOverviews(), but it can also be |
4343 | | * used to generate downsampled images in one file from another outside the |
4344 | | * overview architecture. |
4345 | | * |
4346 | | * The output bands need to exist in advance. |
4347 | | * |
4348 | | * The full set of resampling algorithms is documented in |
4349 | | * GDALDataset::BuildOverviews(). |
4350 | | * |
4351 | | * This function will honour properly NODATA_VALUES tuples (special dataset |
4352 | | * metadata) so that only a given RGB triplet (in case of a RGB image) will be |
4353 | | * considered as the nodata value and not each value of the triplet |
4354 | | * independently per band. |
4355 | | * |
4356 | | * Starting with GDAL 3.2, the GDAL_NUM_THREADS configuration option can be set |
4357 | | * to "ALL_CPUS" or a integer value to specify the number of threads to use for |
4358 | | * overview computation. |
4359 | | * |
4360 | | * @param hSrcBand the source (base level) band. |
4361 | | * @param nOverviewCount the number of downsampled bands being generated. |
4362 | | * @param pahOvrBands the list of downsampled bands to be generated. |
4363 | | * @param pszResampling Resampling algorithm (e.g. "AVERAGE"). |
4364 | | * @param pfnProgress progress report function. |
4365 | | * @param pProgressData progress function callback data. |
4366 | | * @return CE_None on success or CE_Failure on failure. |
4367 | | */ |
4368 | | CPLErr GDALRegenerateOverviews(GDALRasterBandH hSrcBand, int nOverviewCount, |
4369 | | GDALRasterBandH *pahOvrBands, |
4370 | | const char *pszResampling, |
4371 | | GDALProgressFunc pfnProgress, |
4372 | | void *pProgressData) |
4373 | | |
4374 | 0 | { |
4375 | 0 | return GDALRegenerateOverviewsEx(hSrcBand, nOverviewCount, pahOvrBands, |
4376 | 0 | pszResampling, pfnProgress, pProgressData, |
4377 | 0 | nullptr); |
4378 | 0 | } |
4379 | | |
4380 | | /************************************************************************/ |
4381 | | /* GDALRegenerateOverviewsEx() */ |
4382 | | /************************************************************************/ |
4383 | | |
4384 | | constexpr int RADIUS_TO_DIAMETER = 2; |
4385 | | |
4386 | | /** |
4387 | | * \brief Generate downsampled overviews. |
4388 | | * |
4389 | | * This function will generate one or more overview images from a base image |
4390 | | * using the requested downsampling algorithm. Its primary use is for |
4391 | | * generating overviews via GDALDataset::BuildOverviews(), but it can also be |
4392 | | * used to generate downsampled images in one file from another outside the |
4393 | | * overview architecture. |
4394 | | * |
4395 | | * The output bands need to exist in advance. |
4396 | | * |
4397 | | * The full set of resampling algorithms is documented in |
4398 | | * GDALDataset::BuildOverviews(). |
4399 | | * |
4400 | | * This function will honour properly NODATA_VALUES tuples (special dataset |
4401 | | * metadata) so that only a given RGB triplet (in case of a RGB image) will be |
4402 | | * considered as the nodata value and not each value of the triplet |
4403 | | * independently per band. |
4404 | | * |
4405 | | * Starting with GDAL 3.2, the GDAL_NUM_THREADS configuration option can be set |
4406 | | * to "ALL_CPUS" or a integer value to specify the number of threads to use for |
4407 | | * overview computation. |
4408 | | * |
4409 | | * @param hSrcBand the source (base level) band. |
4410 | | * @param nOverviewCount the number of downsampled bands being generated. |
4411 | | * @param pahOvrBands the list of downsampled bands to be generated. |
4412 | | * @param pszResampling Resampling algorithm (e.g. "AVERAGE"). |
4413 | | * @param pfnProgress progress report function. |
4414 | | * @param pProgressData progress function callback data. |
4415 | | * @param papszOptions NULL terminated list of options as key=value pairs, or |
4416 | | * NULL |
4417 | | * @return CE_None on success or CE_Failure on failure. |
4418 | | * @since GDAL 3.6 |
4419 | | */ |
4420 | | CPLErr GDALRegenerateOverviewsEx(GDALRasterBandH hSrcBand, int nOverviewCount, |
4421 | | GDALRasterBandH *pahOvrBands, |
4422 | | const char *pszResampling, |
4423 | | GDALProgressFunc pfnProgress, |
4424 | | void *pProgressData, CSLConstList papszOptions) |
4425 | | |
4426 | 0 | { |
4427 | 0 | GDALRasterBand *poSrcBand = GDALRasterBand::FromHandle(hSrcBand); |
4428 | 0 | GDALRasterBand **papoOvrBands = |
4429 | 0 | reinterpret_cast<GDALRasterBand **>(pahOvrBands); |
4430 | |
|
4431 | 0 | if (pfnProgress == nullptr) |
4432 | 0 | pfnProgress = GDALDummyProgress; |
4433 | |
|
4434 | 0 | if (EQUAL(pszResampling, "NONE")) |
4435 | 0 | return CE_None; |
4436 | | |
4437 | 0 | int nKernelRadius = 0; |
4438 | 0 | GDALResampleFunction pfnResampleFn = |
4439 | 0 | GDALGetResampleFunction(pszResampling, &nKernelRadius); |
4440 | |
|
4441 | 0 | if (pfnResampleFn == nullptr) |
4442 | 0 | return CE_Failure; |
4443 | | |
4444 | | /* -------------------------------------------------------------------- */ |
4445 | | /* Check color tables... */ |
4446 | | /* -------------------------------------------------------------------- */ |
4447 | 0 | GDALColorTable *poColorTable = nullptr; |
4448 | |
|
4449 | 0 | if ((STARTS_WITH_CI(pszResampling, "AVER") || EQUAL(pszResampling, "RMS") || |
4450 | 0 | EQUAL(pszResampling, "MODE") || EQUAL(pszResampling, "GAUSS")) && |
4451 | 0 | poSrcBand->GetColorInterpretation() == GCI_PaletteIndex) |
4452 | 0 | { |
4453 | 0 | poColorTable = poSrcBand->GetColorTable(); |
4454 | 0 | if (poColorTable != nullptr) |
4455 | 0 | { |
4456 | 0 | if (poColorTable->GetPaletteInterpretation() != GPI_RGB) |
4457 | 0 | { |
4458 | 0 | CPLError(CE_Warning, CPLE_AppDefined, |
4459 | 0 | "Computing overviews on palette index raster bands " |
4460 | 0 | "with a palette whose color interpretation is not RGB " |
4461 | 0 | "will probably lead to unexpected results."); |
4462 | 0 | poColorTable = nullptr; |
4463 | 0 | } |
4464 | 0 | else if (poColorTable->IsIdentity()) |
4465 | 0 | { |
4466 | 0 | poColorTable = nullptr; |
4467 | 0 | } |
4468 | 0 | } |
4469 | 0 | else |
4470 | 0 | { |
4471 | 0 | CPLError(CE_Warning, CPLE_AppDefined, |
4472 | 0 | "Computing overviews on palette index raster bands " |
4473 | 0 | "without a palette will probably lead to unexpected " |
4474 | 0 | "results."); |
4475 | 0 | } |
4476 | 0 | } |
4477 | | // Not ready yet |
4478 | 0 | else if ((EQUAL(pszResampling, "CUBIC") || |
4479 | 0 | EQUAL(pszResampling, "CUBICSPLINE") || |
4480 | 0 | EQUAL(pszResampling, "LANCZOS") || |
4481 | 0 | EQUAL(pszResampling, "BILINEAR")) && |
4482 | 0 | poSrcBand->GetColorInterpretation() == GCI_PaletteIndex) |
4483 | 0 | { |
4484 | 0 | CPLError(CE_Warning, CPLE_AppDefined, |
4485 | 0 | "Computing %s overviews on palette index raster bands " |
4486 | 0 | "will probably lead to unexpected results.", |
4487 | 0 | pszResampling); |
4488 | 0 | } |
4489 | | |
4490 | | // If we have a nodata mask and we are doing something more complicated |
4491 | | // than nearest neighbouring, we have to fetch to nodata mask. |
4492 | |
|
4493 | 0 | GDALRasterBand *poMaskBand = nullptr; |
4494 | 0 | bool bUseNoDataMask = false; |
4495 | 0 | bool bCanUseCascaded = true; |
4496 | |
|
4497 | 0 | if (!STARTS_WITH_CI(pszResampling, "NEAR")) |
4498 | 0 | { |
4499 | | // Special case if we are an alpha/mask band. We want it to be |
4500 | | // considered as the mask band to avoid alpha=0 to be taken into account |
4501 | | // in average computation. |
4502 | 0 | if (poSrcBand->IsMaskBand()) |
4503 | 0 | { |
4504 | 0 | poMaskBand = poSrcBand; |
4505 | 0 | bUseNoDataMask = true; |
4506 | 0 | } |
4507 | 0 | else |
4508 | 0 | { |
4509 | 0 | poMaskBand = poSrcBand->GetMaskBand(); |
4510 | 0 | const int nMaskFlags = poSrcBand->GetMaskFlags(); |
4511 | 0 | bCanUseCascaded = |
4512 | 0 | (nMaskFlags == GMF_NODATA || nMaskFlags == GMF_ALL_VALID); |
4513 | 0 | bUseNoDataMask = (nMaskFlags & GMF_ALL_VALID) == 0; |
4514 | 0 | } |
4515 | 0 | } |
4516 | | |
4517 | | /* -------------------------------------------------------------------- */ |
4518 | | /* If we are operating on multiple overviews, and using */ |
4519 | | /* averaging, lets do them in cascading order to reduce the */ |
4520 | | /* amount of computation. */ |
4521 | | /* -------------------------------------------------------------------- */ |
4522 | | |
4523 | | // In case the mask made be computed from another band of the dataset, |
4524 | | // we can't use cascaded generation, as the computation of the overviews |
4525 | | // of the band used for the mask band may not have yet occurred (#3033). |
4526 | 0 | if ((STARTS_WITH_CI(pszResampling, "AVER") || |
4527 | 0 | EQUAL(pszResampling, "GAUSS") || EQUAL(pszResampling, "RMS") || |
4528 | 0 | EQUAL(pszResampling, "CUBIC") || EQUAL(pszResampling, "CUBICSPLINE") || |
4529 | 0 | EQUAL(pszResampling, "LANCZOS") || EQUAL(pszResampling, "BILINEAR") || |
4530 | 0 | EQUAL(pszResampling, "MODE")) && |
4531 | 0 | nOverviewCount > 1 && bCanUseCascaded) |
4532 | 0 | return GDALRegenerateCascadingOverviews( |
4533 | 0 | poSrcBand, nOverviewCount, papoOvrBands, pszResampling, pfnProgress, |
4534 | 0 | pProgressData, papszOptions); |
4535 | | |
4536 | | /* -------------------------------------------------------------------- */ |
4537 | | /* Setup one horizontal swath to read from the raw buffer. */ |
4538 | | /* -------------------------------------------------------------------- */ |
4539 | 0 | int nFRXBlockSize = 0; |
4540 | 0 | int nFRYBlockSize = 0; |
4541 | 0 | poSrcBand->GetBlockSize(&nFRXBlockSize, &nFRYBlockSize); |
4542 | |
|
4543 | 0 | const GDALDataType eSrcDataType = poSrcBand->GetRasterDataType(); |
4544 | 0 | const bool bUseGenericResampleFn = STARTS_WITH_CI(pszResampling, "NEAR") || |
4545 | 0 | EQUAL(pszResampling, "MODE") || |
4546 | 0 | !GDALDataTypeIsComplex(eSrcDataType); |
4547 | 0 | const GDALDataType eWrkDataType = |
4548 | 0 | bUseGenericResampleFn |
4549 | 0 | ? GDALGetOvrWorkDataType(pszResampling, eSrcDataType) |
4550 | 0 | : GDT_CFloat32; |
4551 | |
|
4552 | 0 | const int nWidth = poSrcBand->GetXSize(); |
4553 | 0 | const int nHeight = poSrcBand->GetYSize(); |
4554 | |
|
4555 | 0 | int nMaxOvrFactor = 1; |
4556 | 0 | for (int iOverview = 0; iOverview < nOverviewCount; ++iOverview) |
4557 | 0 | { |
4558 | 0 | const int nDstWidth = papoOvrBands[iOverview]->GetXSize(); |
4559 | 0 | const int nDstHeight = papoOvrBands[iOverview]->GetYSize(); |
4560 | 0 | nMaxOvrFactor = std::max( |
4561 | 0 | nMaxOvrFactor, |
4562 | 0 | static_cast<int>(static_cast<double>(nWidth) / nDstWidth + 0.5)); |
4563 | 0 | nMaxOvrFactor = std::max( |
4564 | 0 | nMaxOvrFactor, |
4565 | 0 | static_cast<int>(static_cast<double>(nHeight) / nDstHeight + 0.5)); |
4566 | 0 | } |
4567 | |
|
4568 | 0 | int nFullResYChunk = nFRYBlockSize; |
4569 | 0 | int nMaxChunkYSizeQueried = 0; |
4570 | |
|
4571 | 0 | const auto UpdateChunkHeightAndGetChunkSize = |
4572 | 0 | [&nFullResYChunk, &nMaxChunkYSizeQueried, nKernelRadius, nMaxOvrFactor, |
4573 | 0 | eWrkDataType, nWidth]() |
4574 | 0 | { |
4575 | | // Make sure that round(nChunkYOff / nMaxOvrFactor) < round((nChunkYOff |
4576 | | // + nFullResYChunk) / nMaxOvrFactor) |
4577 | 0 | if (nMaxOvrFactor > INT_MAX / RADIUS_TO_DIAMETER) |
4578 | 0 | { |
4579 | 0 | return GINTBIG_MAX; |
4580 | 0 | } |
4581 | 0 | nFullResYChunk = |
4582 | 0 | std::max(nFullResYChunk, RADIUS_TO_DIAMETER * nMaxOvrFactor); |
4583 | 0 | if ((nKernelRadius > 0 && |
4584 | 0 | nMaxOvrFactor > INT_MAX / (RADIUS_TO_DIAMETER * nKernelRadius)) || |
4585 | 0 | nFullResYChunk > |
4586 | 0 | INT_MAX - RADIUS_TO_DIAMETER * nKernelRadius * nMaxOvrFactor) |
4587 | 0 | { |
4588 | 0 | return GINTBIG_MAX; |
4589 | 0 | } |
4590 | 0 | nMaxChunkYSizeQueried = |
4591 | 0 | nFullResYChunk + RADIUS_TO_DIAMETER * nKernelRadius * nMaxOvrFactor; |
4592 | 0 | if (GDALGetDataTypeSizeBytes(eWrkDataType) > |
4593 | 0 | std::numeric_limits<int64_t>::max() / |
4594 | 0 | (static_cast<int64_t>(nMaxChunkYSizeQueried) * nWidth)) |
4595 | 0 | { |
4596 | 0 | return GINTBIG_MAX; |
4597 | 0 | } |
4598 | 0 | return static_cast<GIntBig>(GDALGetDataTypeSizeBytes(eWrkDataType)) * |
4599 | 0 | nMaxChunkYSizeQueried * nWidth; |
4600 | 0 | }; |
4601 | |
|
4602 | 0 | const char *pszChunkYSize = |
4603 | 0 | CPLGetConfigOption("GDAL_OVR_CHUNKYSIZE", nullptr); |
4604 | 0 | #ifndef __COVERITY__ |
4605 | | // Only configurable for debug / testing |
4606 | 0 | if (pszChunkYSize) |
4607 | 0 | { |
4608 | 0 | nFullResYChunk = atoi(pszChunkYSize); |
4609 | 0 | } |
4610 | 0 | #endif |
4611 | | |
4612 | | // Only configurable for debug / testing |
4613 | 0 | const int nChunkMaxSize = |
4614 | 0 | atoi(CPLGetConfigOption("GDAL_OVR_CHUNK_MAX_SIZE", "10485760")); |
4615 | |
|
4616 | 0 | auto nChunkSize = UpdateChunkHeightAndGetChunkSize(); |
4617 | 0 | if (nChunkSize > nChunkMaxSize) |
4618 | 0 | { |
4619 | 0 | if (poColorTable == nullptr && nFRXBlockSize < nWidth && |
4620 | 0 | !GDALDataTypeIsComplex(eSrcDataType) && |
4621 | 0 | (!STARTS_WITH_CI(pszResampling, "AVER") || |
4622 | 0 | EQUAL(pszResampling, "AVERAGE"))) |
4623 | 0 | { |
4624 | | // If this is tiled, then use GDALRegenerateOverviewsMultiBand() |
4625 | | // which use a block based strategy, which is much less memory |
4626 | | // hungry. |
4627 | 0 | return GDALRegenerateOverviewsMultiBand( |
4628 | 0 | 1, &poSrcBand, nOverviewCount, &papoOvrBands, pszResampling, |
4629 | 0 | pfnProgress, pProgressData, papszOptions); |
4630 | 0 | } |
4631 | 0 | else if (nOverviewCount > 1 && STARTS_WITH_CI(pszResampling, "NEAR")) |
4632 | 0 | { |
4633 | 0 | return GDALRegenerateCascadingOverviews( |
4634 | 0 | poSrcBand, nOverviewCount, papoOvrBands, pszResampling, |
4635 | 0 | pfnProgress, pProgressData, papszOptions); |
4636 | 0 | } |
4637 | 0 | } |
4638 | 0 | else if (pszChunkYSize == nullptr) |
4639 | 0 | { |
4640 | | // Try to get as close as possible to nChunkMaxSize |
4641 | 0 | while (nChunkSize < nChunkMaxSize / 2) |
4642 | 0 | { |
4643 | 0 | nFullResYChunk *= 2; |
4644 | 0 | nChunkSize = UpdateChunkHeightAndGetChunkSize(); |
4645 | 0 | } |
4646 | 0 | } |
4647 | | |
4648 | 0 | int nHasNoData = 0; |
4649 | 0 | const double dfNoDataValue = poSrcBand->GetNoDataValue(&nHasNoData); |
4650 | 0 | const bool bHasNoData = CPL_TO_BOOL(nHasNoData); |
4651 | 0 | const bool bPropagateNoData = |
4652 | 0 | CPLTestBool(CPLGetConfigOption("GDAL_OVR_PROPAGATE_NODATA", "NO")); |
4653 | | |
4654 | | // Structure describing a resampling job |
4655 | 0 | struct OvrJob |
4656 | 0 | { |
4657 | | // Buffers to free when job is finished |
4658 | 0 | std::shared_ptr<PointerHolder> oSrcMaskBufferHolder{}; |
4659 | 0 | std::shared_ptr<PointerHolder> oSrcBufferHolder{}; |
4660 | 0 | std::unique_ptr<PointerHolder> oDstBufferHolder{}; |
4661 | |
|
4662 | 0 | GDALRasterBand *poDstBand = nullptr; |
4663 | | |
4664 | | // Input parameters of pfnResampleFn |
4665 | 0 | GDALResampleFunction pfnResampleFn = nullptr; |
4666 | 0 | int nSrcWidth = 0; |
4667 | 0 | int nSrcHeight = 0; |
4668 | 0 | int nDstWidth = 0; |
4669 | 0 | GDALOverviewResampleArgs args{}; |
4670 | 0 | const void *pChunk = nullptr; |
4671 | 0 | bool bUseGenericResampleFn = false; |
4672 | | |
4673 | | // Output values of resampling function |
4674 | 0 | CPLErr eErr = CE_Failure; |
4675 | 0 | void *pDstBuffer = nullptr; |
4676 | 0 | GDALDataType eDstBufferDataType = GDT_Unknown; |
4677 | |
|
4678 | 0 | void SetSrcMaskBufferHolder( |
4679 | 0 | const std::shared_ptr<PointerHolder> &oSrcMaskBufferHolderIn) |
4680 | 0 | { |
4681 | 0 | oSrcMaskBufferHolder = oSrcMaskBufferHolderIn; |
4682 | 0 | } |
4683 | |
|
4684 | 0 | void SetSrcBufferHolder( |
4685 | 0 | const std::shared_ptr<PointerHolder> &oSrcBufferHolderIn) |
4686 | 0 | { |
4687 | 0 | oSrcBufferHolder = oSrcBufferHolderIn; |
4688 | 0 | } |
4689 | |
|
4690 | 0 | void NotifyFinished() |
4691 | 0 | { |
4692 | 0 | std::lock_guard guard(mutex); |
4693 | 0 | bFinished = true; |
4694 | 0 | cv.notify_one(); |
4695 | 0 | } |
4696 | |
|
4697 | 0 | bool IsFinished() |
4698 | 0 | { |
4699 | 0 | std::lock_guard guard(mutex); |
4700 | 0 | return bFinished; |
4701 | 0 | } |
4702 | |
|
4703 | 0 | void WaitFinished() |
4704 | 0 | { |
4705 | 0 | std::unique_lock oGuard(mutex); |
4706 | 0 | while (!bFinished) |
4707 | 0 | { |
4708 | 0 | cv.wait(oGuard); |
4709 | 0 | } |
4710 | 0 | } |
4711 | |
|
4712 | 0 | private: |
4713 | | // Synchronization |
4714 | 0 | bool bFinished = false; |
4715 | 0 | std::mutex mutex{}; |
4716 | 0 | std::condition_variable cv{}; |
4717 | 0 | }; |
4718 | | |
4719 | | // Thread function to resample |
4720 | 0 | const auto JobResampleFunc = [](void *pData) |
4721 | 0 | { |
4722 | 0 | OvrJob *poJob = static_cast<OvrJob *>(pData); |
4723 | |
|
4724 | 0 | if (poJob->bUseGenericResampleFn) |
4725 | 0 | { |
4726 | 0 | poJob->eErr = poJob->pfnResampleFn(poJob->args, poJob->pChunk, |
4727 | 0 | &(poJob->pDstBuffer), |
4728 | 0 | &(poJob->eDstBufferDataType)); |
4729 | 0 | } |
4730 | 0 | else |
4731 | 0 | { |
4732 | 0 | poJob->eErr = GDALResampleChunkC32R( |
4733 | 0 | poJob->nSrcWidth, poJob->nSrcHeight, |
4734 | 0 | static_cast<const float *>(poJob->pChunk), |
4735 | 0 | poJob->args.nChunkYOff, poJob->args.nChunkYSize, |
4736 | 0 | poJob->args.nDstYOff, poJob->args.nDstYOff2, |
4737 | 0 | poJob->args.nOvrXSize, poJob->args.nOvrYSize, |
4738 | 0 | &(poJob->pDstBuffer), &(poJob->eDstBufferDataType), |
4739 | 0 | poJob->args.pszResampling); |
4740 | 0 | } |
4741 | |
|
4742 | 0 | poJob->oDstBufferHolder = |
4743 | 0 | std::make_unique<PointerHolder>(poJob->pDstBuffer); |
4744 | |
|
4745 | 0 | poJob->NotifyFinished(); |
4746 | 0 | }; |
4747 | | |
4748 | | // Function to write resample data to target band |
4749 | 0 | const auto WriteJobData = [](const OvrJob *poJob) |
4750 | 0 | { |
4751 | 0 | return poJob->poDstBand->RasterIO( |
4752 | 0 | GF_Write, 0, poJob->args.nDstYOff, poJob->nDstWidth, |
4753 | 0 | poJob->args.nDstYOff2 - poJob->args.nDstYOff, poJob->pDstBuffer, |
4754 | 0 | poJob->nDstWidth, poJob->args.nDstYOff2 - poJob->args.nDstYOff, |
4755 | 0 | poJob->eDstBufferDataType, 0, 0, nullptr); |
4756 | 0 | }; |
4757 | | |
4758 | | // Wait for completion of oldest job and serialize it |
4759 | 0 | const auto WaitAndFinalizeOldestJob = |
4760 | 0 | [WriteJobData](std::list<std::unique_ptr<OvrJob>> &jobList) |
4761 | 0 | { |
4762 | 0 | auto poOldestJob = jobList.front().get(); |
4763 | 0 | poOldestJob->WaitFinished(); |
4764 | 0 | CPLErr l_eErr = poOldestJob->eErr; |
4765 | 0 | if (l_eErr == CE_None) |
4766 | 0 | { |
4767 | 0 | l_eErr = WriteJobData(poOldestJob); |
4768 | 0 | } |
4769 | |
|
4770 | 0 | jobList.pop_front(); |
4771 | 0 | return l_eErr; |
4772 | 0 | }; |
4773 | | |
4774 | | // Queue of jobs |
4775 | 0 | std::list<std::unique_ptr<OvrJob>> jobList; |
4776 | |
|
4777 | 0 | GByte *pabyChunkNodataMask = nullptr; |
4778 | 0 | void *pChunk = nullptr; |
4779 | |
|
4780 | 0 | const char *pszThreads = CPLGetConfigOption("GDAL_NUM_THREADS", "1"); |
4781 | 0 | const int nThreads = std::max(1, std::min(128, EQUAL(pszThreads, "ALL_CPUS") |
4782 | 0 | ? CPLGetNumCPUs() |
4783 | 0 | : atoi(pszThreads))); |
4784 | 0 | auto poThreadPool = |
4785 | 0 | nThreads > 1 ? GDALGetGlobalThreadPool(nThreads) : nullptr; |
4786 | 0 | auto poJobQueue = poThreadPool ? poThreadPool->CreateJobQueue() |
4787 | 0 | : std::unique_ptr<CPLJobQueue>(nullptr); |
4788 | | |
4789 | | /* -------------------------------------------------------------------- */ |
4790 | | /* Loop over image operating on chunks. */ |
4791 | | /* -------------------------------------------------------------------- */ |
4792 | 0 | int nChunkYOff = 0; |
4793 | 0 | CPLErr eErr = CE_None; |
4794 | |
|
4795 | 0 | for (nChunkYOff = 0; nChunkYOff < nHeight && eErr == CE_None; |
4796 | 0 | nChunkYOff += nFullResYChunk) |
4797 | 0 | { |
4798 | 0 | if (!pfnProgress(nChunkYOff / static_cast<double>(nHeight), nullptr, |
4799 | 0 | pProgressData)) |
4800 | 0 | { |
4801 | 0 | CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated"); |
4802 | 0 | eErr = CE_Failure; |
4803 | 0 | } |
4804 | |
|
4805 | 0 | if (nFullResYChunk + nChunkYOff > nHeight) |
4806 | 0 | nFullResYChunk = nHeight - nChunkYOff; |
4807 | |
|
4808 | 0 | int nChunkYOffQueried = nChunkYOff - nKernelRadius * nMaxOvrFactor; |
4809 | 0 | int nChunkYSizeQueried = |
4810 | 0 | nFullResYChunk + 2 * nKernelRadius * nMaxOvrFactor; |
4811 | 0 | if (nChunkYOffQueried < 0) |
4812 | 0 | { |
4813 | 0 | nChunkYSizeQueried += nChunkYOffQueried; |
4814 | 0 | nChunkYOffQueried = 0; |
4815 | 0 | } |
4816 | 0 | if (nChunkYOffQueried + nChunkYSizeQueried > nHeight) |
4817 | 0 | nChunkYSizeQueried = nHeight - nChunkYOffQueried; |
4818 | | |
4819 | | // Avoid accumulating too many tasks and exhaust RAM |
4820 | | // Try to complete already finished jobs |
4821 | 0 | while (eErr == CE_None && !jobList.empty()) |
4822 | 0 | { |
4823 | 0 | auto poOldestJob = jobList.front().get(); |
4824 | 0 | if (!poOldestJob->IsFinished()) |
4825 | 0 | break; |
4826 | 0 | eErr = poOldestJob->eErr; |
4827 | 0 | if (eErr == CE_None) |
4828 | 0 | { |
4829 | 0 | eErr = WriteJobData(poOldestJob); |
4830 | 0 | } |
4831 | |
|
4832 | 0 | jobList.pop_front(); |
4833 | 0 | } |
4834 | | |
4835 | | // And in case we have saturated the number of threads, |
4836 | | // wait for completion of tasks to go below the threshold. |
4837 | 0 | while (eErr == CE_None && |
4838 | 0 | jobList.size() >= static_cast<size_t>(nThreads)) |
4839 | 0 | { |
4840 | 0 | eErr = WaitAndFinalizeOldestJob(jobList); |
4841 | 0 | } |
4842 | | |
4843 | | // (Re)allocate buffers if needed |
4844 | 0 | if (pChunk == nullptr) |
4845 | 0 | { |
4846 | 0 | pChunk = VSI_MALLOC3_VERBOSE(GDALGetDataTypeSizeBytes(eWrkDataType), |
4847 | 0 | nMaxChunkYSizeQueried, nWidth); |
4848 | 0 | } |
4849 | 0 | if (bUseNoDataMask && pabyChunkNodataMask == nullptr) |
4850 | 0 | { |
4851 | 0 | pabyChunkNodataMask = static_cast<GByte *>( |
4852 | 0 | VSI_MALLOC2_VERBOSE(nMaxChunkYSizeQueried, nWidth)); |
4853 | 0 | } |
4854 | |
|
4855 | 0 | if (pChunk == nullptr || |
4856 | 0 | (bUseNoDataMask && pabyChunkNodataMask == nullptr)) |
4857 | 0 | { |
4858 | 0 | CPLFree(pChunk); |
4859 | 0 | CPLFree(pabyChunkNodataMask); |
4860 | 0 | return CE_Failure; |
4861 | 0 | } |
4862 | | |
4863 | | // Read chunk. |
4864 | 0 | if (eErr == CE_None) |
4865 | 0 | eErr = poSrcBand->RasterIO(GF_Read, 0, nChunkYOffQueried, nWidth, |
4866 | 0 | nChunkYSizeQueried, pChunk, nWidth, |
4867 | 0 | nChunkYSizeQueried, eWrkDataType, 0, 0, |
4868 | 0 | nullptr); |
4869 | 0 | if (eErr == CE_None && bUseNoDataMask) |
4870 | 0 | eErr = poMaskBand->RasterIO(GF_Read, 0, nChunkYOffQueried, nWidth, |
4871 | 0 | nChunkYSizeQueried, pabyChunkNodataMask, |
4872 | 0 | nWidth, nChunkYSizeQueried, GDT_Byte, 0, |
4873 | 0 | 0, nullptr); |
4874 | | |
4875 | | // Special case to promote 1bit data to 8bit 0/255 values. |
4876 | 0 | if (EQUAL(pszResampling, "AVERAGE_BIT2GRAYSCALE")) |
4877 | 0 | { |
4878 | 0 | if (eWrkDataType == GDT_Float32) |
4879 | 0 | { |
4880 | 0 | float *pafChunk = static_cast<float *>(pChunk); |
4881 | 0 | for (GPtrDiff_t i = 0; |
4882 | 0 | i < static_cast<GPtrDiff_t>(nChunkYSizeQueried) * nWidth; |
4883 | 0 | i++) |
4884 | 0 | { |
4885 | 0 | if (pafChunk[i] == 1.0) |
4886 | 0 | pafChunk[i] = 255.0; |
4887 | 0 | } |
4888 | 0 | } |
4889 | 0 | else if (eWrkDataType == GDT_Byte) |
4890 | 0 | { |
4891 | 0 | GByte *pabyChunk = static_cast<GByte *>(pChunk); |
4892 | 0 | for (GPtrDiff_t i = 0; |
4893 | 0 | i < static_cast<GPtrDiff_t>(nChunkYSizeQueried) * nWidth; |
4894 | 0 | i++) |
4895 | 0 | { |
4896 | 0 | if (pabyChunk[i] == 1) |
4897 | 0 | pabyChunk[i] = 255; |
4898 | 0 | } |
4899 | 0 | } |
4900 | 0 | else if (eWrkDataType == GDT_UInt16) |
4901 | 0 | { |
4902 | 0 | GUInt16 *pasChunk = static_cast<GUInt16 *>(pChunk); |
4903 | 0 | for (GPtrDiff_t i = 0; |
4904 | 0 | i < static_cast<GPtrDiff_t>(nChunkYSizeQueried) * nWidth; |
4905 | 0 | i++) |
4906 | 0 | { |
4907 | 0 | if (pasChunk[i] == 1) |
4908 | 0 | pasChunk[i] = 255; |
4909 | 0 | } |
4910 | 0 | } |
4911 | 0 | else if (eWrkDataType == GDT_Float64) |
4912 | 0 | { |
4913 | 0 | double *padfChunk = static_cast<double *>(pChunk); |
4914 | 0 | for (GPtrDiff_t i = 0; |
4915 | 0 | i < static_cast<GPtrDiff_t>(nChunkYSizeQueried) * nWidth; |
4916 | 0 | i++) |
4917 | 0 | { |
4918 | 0 | if (padfChunk[i] == 1.0) |
4919 | 0 | padfChunk[i] = 255.0; |
4920 | 0 | } |
4921 | 0 | } |
4922 | 0 | else |
4923 | 0 | { |
4924 | 0 | CPLAssert(false); |
4925 | 0 | } |
4926 | 0 | } |
4927 | 0 | else if (EQUAL(pszResampling, "AVERAGE_BIT2GRAYSCALE_MINISWHITE")) |
4928 | 0 | { |
4929 | 0 | if (eWrkDataType == GDT_Float32) |
4930 | 0 | { |
4931 | 0 | float *pafChunk = static_cast<float *>(pChunk); |
4932 | 0 | for (GPtrDiff_t i = 0; |
4933 | 0 | i < static_cast<GPtrDiff_t>(nChunkYSizeQueried) * nWidth; |
4934 | 0 | i++) |
4935 | 0 | { |
4936 | 0 | if (pafChunk[i] == 1.0) |
4937 | 0 | pafChunk[i] = 0.0; |
4938 | 0 | else if (pafChunk[i] == 0.0) |
4939 | 0 | pafChunk[i] = 255.0; |
4940 | 0 | } |
4941 | 0 | } |
4942 | 0 | else if (eWrkDataType == GDT_Byte) |
4943 | 0 | { |
4944 | 0 | GByte *pabyChunk = static_cast<GByte *>(pChunk); |
4945 | 0 | for (GPtrDiff_t i = 0; |
4946 | 0 | i < static_cast<GPtrDiff_t>(nChunkYSizeQueried) * nWidth; |
4947 | 0 | i++) |
4948 | 0 | { |
4949 | 0 | if (pabyChunk[i] == 1) |
4950 | 0 | pabyChunk[i] = 0; |
4951 | 0 | else if (pabyChunk[i] == 0) |
4952 | 0 | pabyChunk[i] = 255; |
4953 | 0 | } |
4954 | 0 | } |
4955 | 0 | else if (eWrkDataType == GDT_UInt16) |
4956 | 0 | { |
4957 | 0 | GUInt16 *pasChunk = static_cast<GUInt16 *>(pChunk); |
4958 | 0 | for (GPtrDiff_t i = 0; |
4959 | 0 | i < static_cast<GPtrDiff_t>(nChunkYSizeQueried) * nWidth; |
4960 | 0 | i++) |
4961 | 0 | { |
4962 | 0 | if (pasChunk[i] == 1) |
4963 | 0 | pasChunk[i] = 0; |
4964 | 0 | else if (pasChunk[i] == 0) |
4965 | 0 | pasChunk[i] = 255; |
4966 | 0 | } |
4967 | 0 | } |
4968 | 0 | else if (eWrkDataType == GDT_Float64) |
4969 | 0 | { |
4970 | 0 | double *padfChunk = static_cast<double *>(pChunk); |
4971 | 0 | for (GPtrDiff_t i = 0; |
4972 | 0 | i < static_cast<GPtrDiff_t>(nChunkYSizeQueried) * nWidth; |
4973 | 0 | i++) |
4974 | 0 | { |
4975 | 0 | if (padfChunk[i] == 1.0) |
4976 | 0 | padfChunk[i] = 0.0; |
4977 | 0 | else if (padfChunk[i] == 0.0) |
4978 | 0 | padfChunk[i] = 255.0; |
4979 | 0 | } |
4980 | 0 | } |
4981 | 0 | else |
4982 | 0 | { |
4983 | 0 | CPLAssert(false); |
4984 | 0 | } |
4985 | 0 | } |
4986 | | |
4987 | 0 | auto oSrcBufferHolder = |
4988 | 0 | std::make_shared<PointerHolder>(poJobQueue ? pChunk : nullptr); |
4989 | 0 | auto oSrcMaskBufferHolder = std::make_shared<PointerHolder>( |
4990 | 0 | poJobQueue ? pabyChunkNodataMask : nullptr); |
4991 | |
|
4992 | 0 | for (int iOverview = 0; iOverview < nOverviewCount && eErr == CE_None; |
4993 | 0 | ++iOverview) |
4994 | 0 | { |
4995 | 0 | GDALRasterBand *poDstBand = papoOvrBands[iOverview]; |
4996 | 0 | const int nDstWidth = poDstBand->GetXSize(); |
4997 | 0 | const int nDstHeight = poDstBand->GetYSize(); |
4998 | |
|
4999 | 0 | const double dfXRatioDstToSrc = |
5000 | 0 | static_cast<double>(nWidth) / nDstWidth; |
5001 | 0 | const double dfYRatioDstToSrc = |
5002 | 0 | static_cast<double>(nHeight) / nDstHeight; |
5003 | | |
5004 | | /* -------------------------------------------------------------------- |
5005 | | */ |
5006 | | /* Figure out the line to start writing to, and the first line |
5007 | | */ |
5008 | | /* to not write to. In theory this approach should ensure that |
5009 | | */ |
5010 | | /* every output line will be written if all input chunks are */ |
5011 | | /* processed. */ |
5012 | | /* -------------------------------------------------------------------- |
5013 | | */ |
5014 | 0 | int nDstYOff = |
5015 | 0 | static_cast<int>(0.5 + nChunkYOff / dfYRatioDstToSrc); |
5016 | 0 | if (nDstYOff == nDstHeight) |
5017 | 0 | continue; |
5018 | 0 | int nDstYOff2 = static_cast<int>( |
5019 | 0 | 0.5 + (nChunkYOff + nFullResYChunk) / dfYRatioDstToSrc); |
5020 | |
|
5021 | 0 | if (nChunkYOff + nFullResYChunk == nHeight) |
5022 | 0 | nDstYOff2 = nDstHeight; |
5023 | | #if DEBUG_VERBOSE |
5024 | | CPLDebug("GDAL", |
5025 | | "Reading (%dx%d -> %dx%d) for output (%dx%d -> %dx%d)", 0, |
5026 | | nChunkYOffQueried, nWidth, nChunkYSizeQueried, 0, nDstYOff, |
5027 | | nDstWidth, nDstYOff2 - nDstYOff); |
5028 | | #endif |
5029 | |
|
5030 | 0 | auto poJob = std::make_unique<OvrJob>(); |
5031 | 0 | poJob->pfnResampleFn = pfnResampleFn; |
5032 | 0 | poJob->bUseGenericResampleFn = bUseGenericResampleFn; |
5033 | 0 | poJob->args.eOvrDataType = poDstBand->GetRasterDataType(); |
5034 | 0 | poJob->args.nOvrXSize = poDstBand->GetXSize(); |
5035 | 0 | poJob->args.nOvrYSize = poDstBand->GetYSize(); |
5036 | 0 | const char *pszNBITS = |
5037 | 0 | poDstBand->GetMetadataItem("NBITS", "IMAGE_STRUCTURE"); |
5038 | 0 | poJob->args.nOvrNBITS = pszNBITS ? atoi(pszNBITS) : 0; |
5039 | 0 | poJob->args.dfXRatioDstToSrc = dfXRatioDstToSrc; |
5040 | 0 | poJob->args.dfYRatioDstToSrc = dfYRatioDstToSrc; |
5041 | 0 | poJob->args.eWrkDataType = eWrkDataType; |
5042 | 0 | poJob->pChunk = pChunk; |
5043 | 0 | poJob->args.pabyChunkNodataMask = pabyChunkNodataMask; |
5044 | 0 | poJob->nSrcWidth = nWidth; |
5045 | 0 | poJob->nSrcHeight = nHeight; |
5046 | 0 | poJob->args.nChunkXOff = 0; |
5047 | 0 | poJob->args.nChunkXSize = nWidth; |
5048 | 0 | poJob->args.nChunkYOff = nChunkYOffQueried; |
5049 | 0 | poJob->args.nChunkYSize = nChunkYSizeQueried; |
5050 | 0 | poJob->nDstWidth = nDstWidth; |
5051 | 0 | poJob->args.nDstXOff = 0; |
5052 | 0 | poJob->args.nDstXOff2 = nDstWidth; |
5053 | 0 | poJob->args.nDstYOff = nDstYOff; |
5054 | 0 | poJob->args.nDstYOff2 = nDstYOff2; |
5055 | 0 | poJob->poDstBand = poDstBand; |
5056 | 0 | poJob->args.pszResampling = pszResampling; |
5057 | 0 | poJob->args.bHasNoData = bHasNoData; |
5058 | 0 | poJob->args.dfNoDataValue = dfNoDataValue; |
5059 | 0 | poJob->args.poColorTable = poColorTable; |
5060 | 0 | poJob->args.eSrcDataType = eSrcDataType; |
5061 | 0 | poJob->args.bPropagateNoData = bPropagateNoData; |
5062 | |
|
5063 | 0 | if (poJobQueue) |
5064 | 0 | { |
5065 | 0 | poJob->SetSrcMaskBufferHolder(oSrcMaskBufferHolder); |
5066 | 0 | poJob->SetSrcBufferHolder(oSrcBufferHolder); |
5067 | 0 | poJobQueue->SubmitJob(JobResampleFunc, poJob.get()); |
5068 | 0 | jobList.emplace_back(std::move(poJob)); |
5069 | 0 | } |
5070 | 0 | else |
5071 | 0 | { |
5072 | 0 | JobResampleFunc(poJob.get()); |
5073 | 0 | eErr = poJob->eErr; |
5074 | 0 | if (eErr == CE_None) |
5075 | 0 | { |
5076 | 0 | eErr = WriteJobData(poJob.get()); |
5077 | 0 | } |
5078 | 0 | } |
5079 | 0 | } |
5080 | |
|
5081 | 0 | if (poJobQueue) |
5082 | 0 | { |
5083 | 0 | pChunk = nullptr; |
5084 | 0 | pabyChunkNodataMask = nullptr; |
5085 | 0 | } |
5086 | 0 | } |
5087 | | |
5088 | 0 | VSIFree(pChunk); |
5089 | 0 | VSIFree(pabyChunkNodataMask); |
5090 | | |
5091 | | // Wait for all pending jobs to complete |
5092 | 0 | while (!jobList.empty()) |
5093 | 0 | { |
5094 | 0 | const auto l_eErr = WaitAndFinalizeOldestJob(jobList); |
5095 | 0 | if (l_eErr != CE_None && eErr == CE_None) |
5096 | 0 | eErr = l_eErr; |
5097 | 0 | } |
5098 | | |
5099 | | /* -------------------------------------------------------------------- */ |
5100 | | /* Renormalized overview mean / stddev if needed. */ |
5101 | | /* -------------------------------------------------------------------- */ |
5102 | 0 | if (eErr == CE_None && EQUAL(pszResampling, "AVERAGE_MP")) |
5103 | 0 | { |
5104 | 0 | GDALOverviewMagnitudeCorrection( |
5105 | 0 | poSrcBand, nOverviewCount, |
5106 | 0 | reinterpret_cast<GDALRasterBandH *>(papoOvrBands), |
5107 | 0 | GDALDummyProgress, nullptr); |
5108 | 0 | } |
5109 | | |
5110 | | /* -------------------------------------------------------------------- */ |
5111 | | /* It can be important to flush out data to overviews. */ |
5112 | | /* -------------------------------------------------------------------- */ |
5113 | 0 | for (int iOverview = 0; eErr == CE_None && iOverview < nOverviewCount; |
5114 | 0 | ++iOverview) |
5115 | 0 | { |
5116 | 0 | eErr = papoOvrBands[iOverview]->FlushCache(false); |
5117 | 0 | } |
5118 | |
|
5119 | 0 | if (eErr == CE_None) |
5120 | 0 | pfnProgress(1.0, nullptr, pProgressData); |
5121 | |
|
5122 | 0 | return eErr; |
5123 | 0 | } |
5124 | | |
5125 | | /************************************************************************/ |
5126 | | /* GDALRegenerateOverviewsMultiBand() */ |
5127 | | /************************************************************************/ |
5128 | | |
5129 | | /** |
5130 | | * \brief Variant of GDALRegenerateOverviews, specially dedicated for generating |
5131 | | * compressed pixel-interleaved overviews (JPEG-IN-TIFF for example) |
5132 | | * |
5133 | | * This function will generate one or more overview images from a base |
5134 | | * image using the requested downsampling algorithm. Its primary use |
5135 | | * is for generating overviews via GDALDataset::BuildOverviews(), but it |
5136 | | * can also be used to generate downsampled images in one file from another |
5137 | | * outside the overview architecture. |
5138 | | * |
5139 | | * The output bands need to exist in advance and share the same characteristics |
5140 | | * (type, dimensions) |
5141 | | * |
5142 | | * The resampling algorithms supported for the moment are "NEAREST", "AVERAGE", |
5143 | | * "RMS", "GAUSS", "CUBIC", "CUBICSPLINE", "LANCZOS" and "BILINEAR" |
5144 | | * |
5145 | | * It does not support color tables or complex data types. |
5146 | | * |
5147 | | * The pseudo-algorithm used by the function is : |
5148 | | * for each overview |
5149 | | * iterate on lines of the source by a step of deltay |
5150 | | * iterate on columns of the source by a step of deltax |
5151 | | * read the source data of size deltax * deltay for all the bands |
5152 | | * generate the corresponding overview block for all the bands |
5153 | | * |
5154 | | * This function will honour properly NODATA_VALUES tuples (special dataset |
5155 | | * metadata) so that only a given RGB triplet (in case of a RGB image) will be |
5156 | | * considered as the nodata value and not each value of the triplet |
5157 | | * independently per band. |
5158 | | * |
5159 | | * Starting with GDAL 3.2, the GDAL_NUM_THREADS configuration option can be set |
5160 | | * to "ALL_CPUS" or a integer value to specify the number of threads to use for |
5161 | | * overview computation. |
5162 | | * |
5163 | | * @param nBands the number of bands, size of papoSrcBands and size of |
5164 | | * first dimension of papapoOverviewBands |
5165 | | * @param papoSrcBands the list of source bands to downsample |
5166 | | * @param nOverviews the number of downsampled overview levels being generated. |
5167 | | * @param papapoOverviewBands bidimension array of bands. First dimension is |
5168 | | * indexed by nBands. Second dimension is indexed by |
5169 | | * nOverviews. |
5170 | | * @param pszResampling Resampling algorithm ("NEAREST", "AVERAGE", "RMS", |
5171 | | * "GAUSS", "CUBIC", "CUBICSPLINE", "LANCZOS" or "BILINEAR"). |
5172 | | * @param pfnProgress progress report function. |
5173 | | * @param pProgressData progress function callback data. |
5174 | | * @param papszOptions (GDAL >= 3.6) NULL terminated list of options as |
5175 | | * key=value pairs, or NULL |
5176 | | * Starting with GDAL 3.8, the XOFF, YOFF, XSIZE and YSIZE |
5177 | | * options can be specified to express that overviews should |
5178 | | * be regenerated only in the specified subset of the source |
5179 | | * dataset. |
5180 | | * @return CE_None on success or CE_Failure on failure. |
5181 | | */ |
5182 | | |
5183 | | CPLErr GDALRegenerateOverviewsMultiBand( |
5184 | | int nBands, GDALRasterBand *const *papoSrcBands, int nOverviews, |
5185 | | GDALRasterBand *const *const *papapoOverviewBands, |
5186 | | const char *pszResampling, GDALProgressFunc pfnProgress, |
5187 | | void *pProgressData, CSLConstList papszOptions) |
5188 | 0 | { |
5189 | 0 | CPL_IGNORE_RET_VAL(papszOptions); |
5190 | |
|
5191 | 0 | if (pfnProgress == nullptr) |
5192 | 0 | pfnProgress = GDALDummyProgress; |
5193 | |
|
5194 | 0 | if (EQUAL(pszResampling, "NONE") || nBands == 0 || nOverviews == 0) |
5195 | 0 | return CE_None; |
5196 | | |
5197 | | // Sanity checks. |
5198 | 0 | if (!STARTS_WITH_CI(pszResampling, "NEAR") && |
5199 | 0 | !EQUAL(pszResampling, "RMS") && !EQUAL(pszResampling, "AVERAGE") && |
5200 | 0 | !EQUAL(pszResampling, "GAUSS") && !EQUAL(pszResampling, "CUBIC") && |
5201 | 0 | !EQUAL(pszResampling, "CUBICSPLINE") && |
5202 | 0 | !EQUAL(pszResampling, "LANCZOS") && !EQUAL(pszResampling, "BILINEAR") && |
5203 | 0 | !EQUAL(pszResampling, "MODE")) |
5204 | 0 | { |
5205 | 0 | CPLError(CE_Failure, CPLE_NotSupported, |
5206 | 0 | "GDALRegenerateOverviewsMultiBand: pszResampling='%s' " |
5207 | 0 | "not supported", |
5208 | 0 | pszResampling); |
5209 | 0 | return CE_Failure; |
5210 | 0 | } |
5211 | | |
5212 | 0 | int nKernelRadius = 0; |
5213 | 0 | GDALResampleFunction pfnResampleFn = |
5214 | 0 | GDALGetResampleFunction(pszResampling, &nKernelRadius); |
5215 | 0 | if (pfnResampleFn == nullptr) |
5216 | 0 | return CE_Failure; |
5217 | | |
5218 | 0 | const int nToplevelSrcWidth = papoSrcBands[0]->GetXSize(); |
5219 | 0 | const int nToplevelSrcHeight = papoSrcBands[0]->GetYSize(); |
5220 | 0 | if (nToplevelSrcWidth <= 0 || nToplevelSrcHeight <= 0) |
5221 | 0 | return CE_None; |
5222 | 0 | GDALDataType eDataType = papoSrcBands[0]->GetRasterDataType(); |
5223 | 0 | for (int iBand = 1; iBand < nBands; ++iBand) |
5224 | 0 | { |
5225 | 0 | if (papoSrcBands[iBand]->GetXSize() != nToplevelSrcWidth || |
5226 | 0 | papoSrcBands[iBand]->GetYSize() != nToplevelSrcHeight) |
5227 | 0 | { |
5228 | 0 | CPLError( |
5229 | 0 | CE_Failure, CPLE_NotSupported, |
5230 | 0 | "GDALRegenerateOverviewsMultiBand: all the source bands must " |
5231 | 0 | "have the same dimensions"); |
5232 | 0 | return CE_Failure; |
5233 | 0 | } |
5234 | 0 | if (papoSrcBands[iBand]->GetRasterDataType() != eDataType) |
5235 | 0 | { |
5236 | 0 | CPLError( |
5237 | 0 | CE_Failure, CPLE_NotSupported, |
5238 | 0 | "GDALRegenerateOverviewsMultiBand: all the source bands must " |
5239 | 0 | "have the same data type"); |
5240 | 0 | return CE_Failure; |
5241 | 0 | } |
5242 | 0 | } |
5243 | | |
5244 | 0 | for (int iOverview = 0; iOverview < nOverviews; ++iOverview) |
5245 | 0 | { |
5246 | 0 | const auto poOvrFirstBand = papapoOverviewBands[0][iOverview]; |
5247 | 0 | const int nDstWidth = poOvrFirstBand->GetXSize(); |
5248 | 0 | const int nDstHeight = poOvrFirstBand->GetYSize(); |
5249 | 0 | for (int iBand = 1; iBand < nBands; ++iBand) |
5250 | 0 | { |
5251 | 0 | const auto poOvrBand = papapoOverviewBands[iBand][iOverview]; |
5252 | 0 | if (poOvrBand->GetXSize() != nDstWidth || |
5253 | 0 | poOvrBand->GetYSize() != nDstHeight) |
5254 | 0 | { |
5255 | 0 | CPLError( |
5256 | 0 | CE_Failure, CPLE_NotSupported, |
5257 | 0 | "GDALRegenerateOverviewsMultiBand: all the overviews bands " |
5258 | 0 | "of the same level must have the same dimensions"); |
5259 | 0 | return CE_Failure; |
5260 | 0 | } |
5261 | 0 | if (poOvrBand->GetRasterDataType() != eDataType) |
5262 | 0 | { |
5263 | 0 | CPLError( |
5264 | 0 | CE_Failure, CPLE_NotSupported, |
5265 | 0 | "GDALRegenerateOverviewsMultiBand: all the overviews bands " |
5266 | 0 | "must have the same data type as the source bands"); |
5267 | 0 | return CE_Failure; |
5268 | 0 | } |
5269 | 0 | } |
5270 | 0 | } |
5271 | | |
5272 | | // First pass to compute the total number of pixels to write. |
5273 | 0 | double dfTotalPixelCount = 0; |
5274 | 0 | const int nSrcXOff = atoi(CSLFetchNameValueDef(papszOptions, "XOFF", "0")); |
5275 | 0 | const int nSrcYOff = atoi(CSLFetchNameValueDef(papszOptions, "YOFF", "0")); |
5276 | 0 | const int nSrcXSize = atoi(CSLFetchNameValueDef( |
5277 | 0 | papszOptions, "XSIZE", CPLSPrintf("%d", nToplevelSrcWidth))); |
5278 | 0 | const int nSrcYSize = atoi(CSLFetchNameValueDef( |
5279 | 0 | papszOptions, "YSIZE", CPLSPrintf("%d", nToplevelSrcHeight))); |
5280 | 0 | for (int iOverview = 0; iOverview < nOverviews; ++iOverview) |
5281 | 0 | { |
5282 | 0 | dfTotalPixelCount += |
5283 | 0 | static_cast<double>(nSrcXSize) / nToplevelSrcWidth * |
5284 | 0 | papapoOverviewBands[0][iOverview]->GetXSize() * |
5285 | 0 | static_cast<double>(nSrcYSize) / nToplevelSrcHeight * |
5286 | 0 | papapoOverviewBands[0][iOverview]->GetYSize(); |
5287 | 0 | } |
5288 | |
|
5289 | 0 | const GDALDataType eWrkDataType = |
5290 | 0 | GDALGetOvrWorkDataType(pszResampling, eDataType); |
5291 | 0 | const int nWrkDataTypeSize = |
5292 | 0 | std::max(1, GDALGetDataTypeSizeBytes(eWrkDataType)); |
5293 | |
|
5294 | 0 | const bool bIsMask = papoSrcBands[0]->IsMaskBand(); |
5295 | | |
5296 | | // If we have a nodata mask and we are doing something more complicated |
5297 | | // than nearest neighbouring, we have to fetch to nodata mask. |
5298 | 0 | const bool bUseNoDataMask = |
5299 | 0 | !STARTS_WITH_CI(pszResampling, "NEAR") && |
5300 | 0 | (bIsMask || (papoSrcBands[0]->GetMaskFlags() & GMF_ALL_VALID) == 0); |
5301 | |
|
5302 | 0 | std::vector<bool> abHasNoData(nBands); |
5303 | 0 | std::vector<double> adfNoDataValue(nBands); |
5304 | |
|
5305 | 0 | for (int iBand = 0; iBand < nBands; ++iBand) |
5306 | 0 | { |
5307 | 0 | int nHasNoData = 0; |
5308 | 0 | adfNoDataValue[iBand] = |
5309 | 0 | papoSrcBands[iBand]->GetNoDataValue(&nHasNoData); |
5310 | 0 | abHasNoData[iBand] = CPL_TO_BOOL(nHasNoData); |
5311 | 0 | } |
5312 | 0 | const bool bPropagateNoData = |
5313 | 0 | CPLTestBool(CPLGetConfigOption("GDAL_OVR_PROPAGATE_NODATA", "NO")); |
5314 | |
|
5315 | 0 | const char *pszThreads = CPLGetConfigOption("GDAL_NUM_THREADS", "1"); |
5316 | 0 | const int nThreads = std::max(1, std::min(128, EQUAL(pszThreads, "ALL_CPUS") |
5317 | 0 | ? CPLGetNumCPUs() |
5318 | 0 | : atoi(pszThreads))); |
5319 | 0 | auto poThreadPool = |
5320 | 0 | nThreads > 1 ? GDALGetGlobalThreadPool(nThreads) : nullptr; |
5321 | 0 | auto poJobQueue = poThreadPool ? poThreadPool->CreateJobQueue() |
5322 | 0 | : std::unique_ptr<CPLJobQueue>(nullptr); |
5323 | | |
5324 | | // Only configurable for debug / testing |
5325 | 0 | const GIntBig nChunkMaxSize = []() -> GIntBig |
5326 | 0 | { |
5327 | 0 | const char *pszVal = |
5328 | 0 | CPLGetConfigOption("GDAL_OVR_CHUNK_MAX_SIZE", nullptr); |
5329 | 0 | if (pszVal) |
5330 | 0 | { |
5331 | 0 | GIntBig nRet = 0; |
5332 | 0 | CPLParseMemorySize(pszVal, &nRet, nullptr); |
5333 | 0 | return std::max<GIntBig>(100, nRet); |
5334 | 0 | } |
5335 | 0 | return 10 * 1024 * 1024; |
5336 | 0 | }(); |
5337 | | |
5338 | | // Only configurable for debug / testing |
5339 | 0 | const GIntBig nChunkMaxSizeForTempFile = []() -> GIntBig |
5340 | 0 | { |
5341 | 0 | const char *pszVal = CPLGetConfigOption( |
5342 | 0 | "GDAL_OVR_CHUNK_MAX_SIZE_FOR_TEMP_FILE", nullptr); |
5343 | 0 | if (pszVal) |
5344 | 0 | { |
5345 | 0 | GIntBig nRet = 0; |
5346 | 0 | CPLParseMemorySize(pszVal, &nRet, nullptr); |
5347 | 0 | return std::max<GIntBig>(100, nRet); |
5348 | 0 | } |
5349 | 0 | const auto nUsableRAM = CPLGetUsablePhysicalRAM(); |
5350 | 0 | if (nUsableRAM > 0) |
5351 | 0 | return nUsableRAM / 10; |
5352 | | // Select a value to be able to at least downsample by 2 for a RGB |
5353 | | // 1024x1024 tiled output: (2 * 1024 + 2) * (2 * 1024 + 2) * 3 = 12 MB |
5354 | 0 | return 100 * 1024 * 1024; |
5355 | 0 | }(); |
5356 | | |
5357 | | // Second pass to do the real job. |
5358 | 0 | double dfCurPixelCount = 0; |
5359 | 0 | CPLErr eErr = CE_None; |
5360 | 0 | for (int iOverview = 0; iOverview < nOverviews && eErr == CE_None; |
5361 | 0 | ++iOverview) |
5362 | 0 | { |
5363 | 0 | int iSrcOverview = -1; // -1 means the source bands. |
5364 | |
|
5365 | 0 | const int nDstTotalWidth = |
5366 | 0 | papapoOverviewBands[0][iOverview]->GetXSize(); |
5367 | 0 | const int nDstTotalHeight = |
5368 | 0 | papapoOverviewBands[0][iOverview]->GetYSize(); |
5369 | | |
5370 | | // Compute the coordinates of the target region to refresh |
5371 | 0 | constexpr double EPS = 1e-8; |
5372 | 0 | const int nDstXOffStart = static_cast<int>( |
5373 | 0 | static_cast<double>(nSrcXOff) / nToplevelSrcWidth * nDstTotalWidth + |
5374 | 0 | EPS); |
5375 | 0 | const int nDstXOffEnd = |
5376 | 0 | std::min(static_cast<int>( |
5377 | 0 | std::ceil(static_cast<double>(nSrcXOff + nSrcXSize) / |
5378 | 0 | nToplevelSrcWidth * nDstTotalWidth - |
5379 | 0 | EPS)), |
5380 | 0 | nDstTotalWidth); |
5381 | 0 | const int nDstWidth = nDstXOffEnd - nDstXOffStart; |
5382 | 0 | const int nDstYOffStart = |
5383 | 0 | static_cast<int>(static_cast<double>(nSrcYOff) / |
5384 | 0 | nToplevelSrcHeight * nDstTotalHeight + |
5385 | 0 | EPS); |
5386 | 0 | const int nDstYOffEnd = |
5387 | 0 | std::min(static_cast<int>( |
5388 | 0 | std::ceil(static_cast<double>(nSrcYOff + nSrcYSize) / |
5389 | 0 | nToplevelSrcHeight * nDstTotalHeight - |
5390 | 0 | EPS)), |
5391 | 0 | nDstTotalHeight); |
5392 | 0 | const int nDstHeight = nDstYOffEnd - nDstYOffStart; |
5393 | | |
5394 | | // Try to use previous level of overview as the source to compute |
5395 | | // the next level. |
5396 | 0 | int nSrcWidth = nToplevelSrcWidth; |
5397 | 0 | int nSrcHeight = nToplevelSrcHeight; |
5398 | 0 | if (iOverview > 0 && |
5399 | 0 | papapoOverviewBands[0][iOverview - 1]->GetXSize() > nDstTotalWidth) |
5400 | 0 | { |
5401 | 0 | nSrcWidth = papapoOverviewBands[0][iOverview - 1]->GetXSize(); |
5402 | 0 | nSrcHeight = papapoOverviewBands[0][iOverview - 1]->GetYSize(); |
5403 | 0 | iSrcOverview = iOverview - 1; |
5404 | 0 | } |
5405 | |
|
5406 | 0 | const double dfXRatioDstToSrc = |
5407 | 0 | static_cast<double>(nSrcWidth) / nDstTotalWidth; |
5408 | 0 | const double dfYRatioDstToSrc = |
5409 | 0 | static_cast<double>(nSrcHeight) / nDstTotalHeight; |
5410 | |
|
5411 | 0 | const int nOvrFactor = |
5412 | 0 | std::max(1, std::max(static_cast<int>(0.5 + dfXRatioDstToSrc), |
5413 | 0 | static_cast<int>(0.5 + dfYRatioDstToSrc))); |
5414 | |
|
5415 | 0 | int nDstChunkXSize = 0; |
5416 | 0 | int nDstChunkYSize = 0; |
5417 | 0 | papapoOverviewBands[0][iOverview]->GetBlockSize(&nDstChunkXSize, |
5418 | 0 | &nDstChunkYSize); |
5419 | |
|
5420 | 0 | constexpr int PIXEL_MARGIN = 2; |
5421 | | // Try to extend the chunk size so that the memory needed to acquire |
5422 | | // source pixels goes up to 10 MB. |
5423 | | // This can help for drivers that support multi-threaded reading |
5424 | 0 | const int nFullResYChunk = static_cast<int>(std::min<double>( |
5425 | 0 | nSrcHeight, PIXEL_MARGIN + nDstChunkYSize * dfYRatioDstToSrc)); |
5426 | 0 | const int nFullResYChunkQueried = static_cast<int>(std::min<int64_t>( |
5427 | 0 | nSrcHeight, |
5428 | 0 | nFullResYChunk + static_cast<int64_t>(RADIUS_TO_DIAMETER) * |
5429 | 0 | nKernelRadius * nOvrFactor)); |
5430 | 0 | while (nDstChunkXSize < nDstWidth) |
5431 | 0 | { |
5432 | 0 | constexpr int INCREASE_FACTOR = 2; |
5433 | |
|
5434 | 0 | const int nFullResXChunk = static_cast<int>(std::min<double>( |
5435 | 0 | nSrcWidth, PIXEL_MARGIN + INCREASE_FACTOR * nDstChunkXSize * |
5436 | 0 | dfXRatioDstToSrc)); |
5437 | |
|
5438 | 0 | const int nFullResXChunkQueried = |
5439 | 0 | static_cast<int>(std::min<int64_t>( |
5440 | 0 | nSrcWidth, |
5441 | 0 | nFullResXChunk + static_cast<int64_t>(RADIUS_TO_DIAMETER) * |
5442 | 0 | nKernelRadius * nOvrFactor)); |
5443 | |
|
5444 | 0 | if (nBands > nChunkMaxSize / nFullResXChunkQueried / |
5445 | 0 | nFullResYChunkQueried / nWrkDataTypeSize) |
5446 | 0 | { |
5447 | 0 | break; |
5448 | 0 | } |
5449 | | |
5450 | 0 | nDstChunkXSize *= INCREASE_FACTOR; |
5451 | 0 | } |
5452 | 0 | nDstChunkXSize = std::min(nDstChunkXSize, nDstWidth); |
5453 | |
|
5454 | 0 | const int nFullResXChunk = static_cast<int>(std::min<double>( |
5455 | 0 | nSrcWidth, PIXEL_MARGIN + nDstChunkXSize * dfXRatioDstToSrc)); |
5456 | 0 | const int nFullResXChunkQueried = static_cast<int>(std::min<int64_t>( |
5457 | 0 | nSrcWidth, |
5458 | 0 | nFullResXChunk + static_cast<int64_t>(RADIUS_TO_DIAMETER) * |
5459 | 0 | nKernelRadius * nOvrFactor)); |
5460 | | |
5461 | | // Make sure that the RAM requirements to acquire the source data does |
5462 | | // not exceed nChunkMaxSizeForTempFile |
5463 | | // If so, reduce the destination chunk size, generate overviews in a |
5464 | | // temporary dataset, and copy that temporary dataset over the target |
5465 | | // overview bands (to avoid issues with lossy compression) |
5466 | 0 | const bool bOverflowFullResXChunkYChunkQueried = |
5467 | 0 | nBands > std::numeric_limits<int64_t>::max() / |
5468 | 0 | nFullResXChunkQueried / nFullResYChunkQueried / |
5469 | 0 | nWrkDataTypeSize; |
5470 | |
|
5471 | 0 | const auto nMemRequirement = |
5472 | 0 | bOverflowFullResXChunkYChunkQueried |
5473 | 0 | ? 0 |
5474 | 0 | : static_cast<GIntBig>(nFullResXChunkQueried) * |
5475 | 0 | nFullResYChunkQueried * nBands * nWrkDataTypeSize; |
5476 | | // Use a temporary dataset with a smaller destination chunk size |
5477 | 0 | const auto nOverShootFactor = |
5478 | 0 | nMemRequirement / nChunkMaxSizeForTempFile; |
5479 | |
|
5480 | 0 | constexpr int MIN_OVERSHOOT_FACTOR = 4; |
5481 | 0 | const auto nSqrtOverShootFactor = std::max<GIntBig>( |
5482 | 0 | MIN_OVERSHOOT_FACTOR, static_cast<GIntBig>(std::ceil(std::sqrt( |
5483 | 0 | static_cast<double>(nOverShootFactor))))); |
5484 | 0 | constexpr int DEFAULT_CHUNK_SIZE = 256; |
5485 | 0 | constexpr int GTIFF_BLOCK_SIZE_MULTIPLE = 16; |
5486 | 0 | const int nReducedDstChunkXSize = |
5487 | 0 | bOverflowFullResXChunkYChunkQueried |
5488 | 0 | ? DEFAULT_CHUNK_SIZE |
5489 | 0 | : std::max(1, static_cast<int>(nDstChunkXSize / |
5490 | 0 | nSqrtOverShootFactor) & |
5491 | 0 | ~(GTIFF_BLOCK_SIZE_MULTIPLE - 1)); |
5492 | 0 | const int nReducedDstChunkYSize = |
5493 | 0 | bOverflowFullResXChunkYChunkQueried |
5494 | 0 | ? DEFAULT_CHUNK_SIZE |
5495 | 0 | : std::max(1, static_cast<int>(nDstChunkYSize / |
5496 | 0 | nSqrtOverShootFactor) & |
5497 | 0 | ~(GTIFF_BLOCK_SIZE_MULTIPLE - 1)); |
5498 | |
|
5499 | 0 | if (bOverflowFullResXChunkYChunkQueried || |
5500 | 0 | nMemRequirement > nChunkMaxSizeForTempFile) |
5501 | 0 | { |
5502 | 0 | const auto nDTSize = |
5503 | 0 | std::max(1, GDALGetDataTypeSizeBytes(eDataType)); |
5504 | 0 | const bool bTmpDSMemRequirementOverflow = |
5505 | 0 | nBands > std::numeric_limits<int64_t>::max() / nDstWidth / |
5506 | 0 | nDstHeight / nDTSize; |
5507 | 0 | const auto nTmpDSMemRequirement = |
5508 | 0 | bTmpDSMemRequirementOverflow |
5509 | 0 | ? 0 |
5510 | 0 | : static_cast<GIntBig>(nDstWidth) * nDstHeight * nBands * |
5511 | 0 | nDTSize; |
5512 | | |
5513 | | // make sure that one band buffer doesn't overflow size_t |
5514 | 0 | const bool bChunkSizeOverflow = |
5515 | 0 | static_cast<size_t>(nDTSize) > |
5516 | 0 | std::numeric_limits<size_t>::max() / nDstWidth / nDstHeight; |
5517 | 0 | const size_t nChunkSize = |
5518 | 0 | bChunkSizeOverflow |
5519 | 0 | ? 0 |
5520 | 0 | : static_cast<size_t>(nDstWidth) * nDstHeight * nDTSize; |
5521 | |
|
5522 | 0 | const auto CreateVRT = |
5523 | 0 | [nBands, nSrcWidth, nSrcHeight, nDstTotalWidth, nDstTotalHeight, |
5524 | 0 | pszResampling, eWrkDataType, papoSrcBands, papapoOverviewBands, |
5525 | 0 | iSrcOverview, &abHasNoData, |
5526 | 0 | &adfNoDataValue](int nVRTBlockXSize, int nVRTBlockYSize) |
5527 | 0 | { |
5528 | 0 | auto poVRTDS = std::make_unique<VRTDataset>( |
5529 | 0 | nDstTotalWidth, nDstTotalHeight, nVRTBlockXSize, |
5530 | 0 | nVRTBlockYSize); |
5531 | |
|
5532 | 0 | for (int iBand = 0; iBand < nBands; ++iBand) |
5533 | 0 | { |
5534 | 0 | auto poVRTSrc = std::make_unique<VRTSimpleSource>(); |
5535 | 0 | poVRTSrc->SetResampling(pszResampling); |
5536 | 0 | poVRTDS->AddBand(eWrkDataType); |
5537 | 0 | auto poVRTBand = static_cast<VRTSourcedRasterBand *>( |
5538 | 0 | poVRTDS->GetRasterBand(iBand + 1)); |
5539 | |
|
5540 | 0 | auto poSrcBand = papoSrcBands[iBand]; |
5541 | 0 | if (iSrcOverview != -1) |
5542 | 0 | poSrcBand = papapoOverviewBands[iBand][iSrcOverview]; |
5543 | 0 | poVRTBand->ConfigureSource( |
5544 | 0 | poVRTSrc.get(), poSrcBand, false, 0, 0, nSrcWidth, |
5545 | 0 | nSrcHeight, 0, 0, nDstTotalWidth, nDstTotalHeight); |
5546 | | // Add the source to the band |
5547 | 0 | poVRTBand->AddSource(poVRTSrc.release()); |
5548 | 0 | if (abHasNoData[iBand]) |
5549 | 0 | poVRTBand->SetNoDataValue(adfNoDataValue[iBand]); |
5550 | 0 | } |
5551 | |
|
5552 | 0 | if (papoSrcBands[0]->GetMaskFlags() == GMF_PER_DATASET && |
5553 | 0 | poVRTDS->CreateMaskBand(GMF_PER_DATASET) == CE_None) |
5554 | 0 | { |
5555 | 0 | VRTSourcedRasterBand *poMaskVRTBand = |
5556 | 0 | cpl::down_cast<VRTSourcedRasterBand *>( |
5557 | 0 | poVRTDS->GetRasterBand(1)->GetMaskBand()); |
5558 | 0 | auto poSrcBand = papoSrcBands[0]; |
5559 | 0 | if (iSrcOverview != -1) |
5560 | 0 | poSrcBand = papapoOverviewBands[0][iSrcOverview]; |
5561 | 0 | poMaskVRTBand->AddMaskBandSource( |
5562 | 0 | poSrcBand->GetMaskBand(), 0, 0, nSrcWidth, nSrcHeight, |
5563 | 0 | 0, 0, nDstTotalWidth, nDstTotalHeight); |
5564 | 0 | } |
5565 | |
|
5566 | 0 | return poVRTDS; |
5567 | 0 | }; |
5568 | | |
5569 | | // If the overview accommodates chunking, do so and recurse |
5570 | | // to avoid generating full size temporary files |
5571 | 0 | if (!bOverflowFullResXChunkYChunkQueried && |
5572 | 0 | !bTmpDSMemRequirementOverflow && !bChunkSizeOverflow && |
5573 | 0 | (nDstChunkXSize < nDstWidth || nDstChunkYSize < nDstHeight)) |
5574 | 0 | { |
5575 | | // Create a VRT with the smaller chunk to do the scaling |
5576 | 0 | auto poVRTDS = |
5577 | 0 | CreateVRT(nReducedDstChunkXSize, nReducedDstChunkYSize); |
5578 | |
|
5579 | 0 | std::vector<GDALRasterBand *> apoVRTBand(nBands); |
5580 | 0 | std::vector<GDALRasterBand *> apoDstBand(nBands); |
5581 | 0 | for (int iBand = 0; iBand < nBands; ++iBand) |
5582 | 0 | { |
5583 | 0 | apoDstBand[iBand] = papapoOverviewBands[iBand][iOverview]; |
5584 | 0 | apoVRTBand[iBand] = poVRTDS->GetRasterBand(iBand + 1); |
5585 | 0 | } |
5586 | | |
5587 | | // Use a flag to avoid reading from the overview being built |
5588 | 0 | GDALRasterIOExtraArg sExtraArg; |
5589 | 0 | INIT_RASTERIO_EXTRA_ARG(sExtraArg); |
5590 | 0 | if (iSrcOverview == -1) |
5591 | 0 | sExtraArg.bUseOnlyThisScale = true; |
5592 | | |
5593 | | // A single band buffer for data transfer to the overview |
5594 | 0 | std::vector<GByte> abyChunk; |
5595 | 0 | try |
5596 | 0 | { |
5597 | 0 | abyChunk.resize(nChunkSize); |
5598 | 0 | } |
5599 | 0 | catch (const std::exception &) |
5600 | 0 | { |
5601 | 0 | CPLError(CE_Failure, CPLE_OutOfMemory, |
5602 | 0 | "Out of memory allocating temporary buffer"); |
5603 | 0 | return CE_Failure; |
5604 | 0 | } |
5605 | | |
5606 | | // Loop over output height, in chunks |
5607 | 0 | for (int nDstYOff = nDstYOffStart; |
5608 | 0 | nDstYOff < nDstYOffEnd && eErr == CE_None; |
5609 | 0 | /* */) |
5610 | 0 | { |
5611 | 0 | const int nDstYCount = |
5612 | 0 | std::min(nDstChunkYSize, nDstYOffEnd - nDstYOff); |
5613 | | // Loop over output width, in output chunks |
5614 | 0 | for (int nDstXOff = nDstXOffStart; |
5615 | 0 | nDstXOff < nDstXOffEnd && eErr == CE_None; |
5616 | 0 | /* */) |
5617 | 0 | { |
5618 | 0 | const int nDstXCount = |
5619 | 0 | std::min(nDstChunkXSize, nDstXOffEnd - nDstXOff); |
5620 | | // Read and transfer the chunk to the overview |
5621 | 0 | for (int iBand = 0; iBand < nBands && eErr == CE_None; |
5622 | 0 | ++iBand) |
5623 | 0 | { |
5624 | 0 | eErr = apoVRTBand[iBand]->RasterIO( |
5625 | 0 | GF_Read, nDstXOff, nDstYOff, nDstXCount, |
5626 | 0 | nDstYCount, abyChunk.data(), nDstXCount, |
5627 | 0 | nDstYCount, eDataType, 0, 0, &sExtraArg); |
5628 | 0 | if (eErr == CE_None) |
5629 | 0 | { |
5630 | 0 | eErr = apoDstBand[iBand]->RasterIO( |
5631 | 0 | GF_Write, nDstXOff, nDstYOff, nDstXCount, |
5632 | 0 | nDstYCount, abyChunk.data(), nDstXCount, |
5633 | 0 | nDstYCount, eDataType, 0, 0, nullptr); |
5634 | 0 | } |
5635 | 0 | } |
5636 | |
|
5637 | 0 | dfCurPixelCount += |
5638 | 0 | static_cast<double>(nDstXCount) * nDstYCount; |
5639 | |
|
5640 | 0 | nDstXOff += nDstXCount; |
5641 | 0 | } // width |
5642 | |
|
5643 | 0 | if (!pfnProgress(dfCurPixelCount / dfTotalPixelCount, |
5644 | 0 | nullptr, pProgressData)) |
5645 | 0 | { |
5646 | 0 | CPLError(CE_Failure, CPLE_UserInterrupt, |
5647 | 0 | "User terminated"); |
5648 | 0 | eErr = CE_Failure; |
5649 | 0 | } |
5650 | |
|
5651 | 0 | nDstYOff += nDstYCount; |
5652 | 0 | } // height |
5653 | |
|
5654 | 0 | if (CE_None != eErr) |
5655 | 0 | { |
5656 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
5657 | 0 | "Error while writing overview"); |
5658 | 0 | return CE_Failure; |
5659 | 0 | } |
5660 | | |
5661 | 0 | pfnProgress(1.0, nullptr, pProgressData); |
5662 | | // Flush the overviews we just generated |
5663 | 0 | for (int iBand = 0; iBand < nBands; ++iBand) |
5664 | 0 | apoDstBand[iBand]->FlushCache(false); |
5665 | |
|
5666 | 0 | continue; // Next overview |
5667 | 0 | } // chunking via temporary dataset |
5668 | | |
5669 | 0 | std::unique_ptr<GDALDataset> poTmpDS; |
5670 | | // Config option mostly/only for autotest purposes |
5671 | 0 | const char *pszGDAL_OVR_TEMP_DRIVER = |
5672 | 0 | CPLGetConfigOption("GDAL_OVR_TEMP_DRIVER", ""); |
5673 | 0 | if ((!bTmpDSMemRequirementOverflow && |
5674 | 0 | nTmpDSMemRequirement <= nChunkMaxSizeForTempFile && |
5675 | 0 | !EQUAL(pszGDAL_OVR_TEMP_DRIVER, "GTIFF")) || |
5676 | 0 | EQUAL(pszGDAL_OVR_TEMP_DRIVER, "MEM")) |
5677 | 0 | { |
5678 | 0 | auto poTmpDrv = GetGDALDriverManager()->GetDriverByName("MEM"); |
5679 | 0 | if (!poTmpDrv) |
5680 | 0 | { |
5681 | 0 | eErr = CE_Failure; |
5682 | 0 | break; |
5683 | 0 | } |
5684 | 0 | poTmpDS.reset(poTmpDrv->Create("", nDstTotalWidth, |
5685 | 0 | nDstTotalHeight, nBands, |
5686 | 0 | eDataType, nullptr)); |
5687 | 0 | } |
5688 | 0 | else |
5689 | 0 | { |
5690 | | // Create a temporary file for the overview |
5691 | 0 | auto poTmpDrv = |
5692 | 0 | GetGDALDriverManager()->GetDriverByName("GTiff"); |
5693 | 0 | if (!poTmpDrv) |
5694 | 0 | { |
5695 | 0 | eErr = CE_Failure; |
5696 | 0 | break; |
5697 | 0 | } |
5698 | 0 | std::string osTmpFilename; |
5699 | 0 | auto poDstDS = papapoOverviewBands[0][0]->GetDataset(); |
5700 | 0 | if (poDstDS) |
5701 | 0 | { |
5702 | 0 | osTmpFilename = poDstDS->GetDescription(); |
5703 | 0 | VSIStatBufL sStatBuf; |
5704 | 0 | if (!osTmpFilename.empty() && |
5705 | 0 | VSIStatL(osTmpFilename.c_str(), &sStatBuf) == 0) |
5706 | 0 | osTmpFilename += "_tmp_ovr.tif"; |
5707 | 0 | } |
5708 | 0 | if (osTmpFilename.empty()) |
5709 | 0 | { |
5710 | 0 | osTmpFilename = CPLGenerateTempFilenameSafe(nullptr); |
5711 | 0 | osTmpFilename += ".tif"; |
5712 | 0 | } |
5713 | 0 | CPLDebug("GDAL", "Creating temporary file %s of %d x %d x %d", |
5714 | 0 | osTmpFilename.c_str(), nDstWidth, nDstHeight, nBands); |
5715 | 0 | CPLStringList aosCO; |
5716 | 0 | if (0 == ((nReducedDstChunkXSize % GTIFF_BLOCK_SIZE_MULTIPLE) | |
5717 | 0 | (nReducedDstChunkYSize % GTIFF_BLOCK_SIZE_MULTIPLE))) |
5718 | 0 | { |
5719 | 0 | aosCO.SetNameValue("TILED", "YES"); |
5720 | 0 | aosCO.SetNameValue("BLOCKXSIZE", |
5721 | 0 | CPLSPrintf("%d", nReducedDstChunkXSize)); |
5722 | 0 | aosCO.SetNameValue("BLOCKYSIZE", |
5723 | 0 | CPLSPrintf("%d", nReducedDstChunkYSize)); |
5724 | 0 | } |
5725 | 0 | if (const char *pszCOList = |
5726 | 0 | poTmpDrv->GetMetadataItem(GDAL_DMD_CREATIONOPTIONLIST)) |
5727 | 0 | { |
5728 | 0 | aosCO.SetNameValue( |
5729 | 0 | "COMPRESS", strstr(pszCOList, "ZSTD") ? "ZSTD" : "LZW"); |
5730 | 0 | } |
5731 | 0 | poTmpDS.reset(poTmpDrv->Create(osTmpFilename.c_str(), nDstWidth, |
5732 | 0 | nDstHeight, nBands, eDataType, |
5733 | 0 | aosCO.List())); |
5734 | 0 | if (poTmpDS) |
5735 | 0 | { |
5736 | 0 | poTmpDS->MarkSuppressOnClose(); |
5737 | 0 | VSIUnlink(osTmpFilename.c_str()); |
5738 | 0 | } |
5739 | 0 | } |
5740 | 0 | if (!poTmpDS) |
5741 | 0 | { |
5742 | 0 | eErr = CE_Failure; |
5743 | 0 | break; |
5744 | 0 | } |
5745 | | |
5746 | | // Create a full size VRT to do the resampling without edge effects |
5747 | 0 | auto poVRTDS = |
5748 | 0 | CreateVRT(nReducedDstChunkXSize, nReducedDstChunkYSize); |
5749 | | |
5750 | | // Allocate a band buffer with the overview chunk size |
5751 | 0 | std::unique_ptr<void, VSIFreeReleaser> pDstBuffer( |
5752 | 0 | VSI_MALLOC3_VERBOSE(size_t(nWrkDataTypeSize), nDstChunkXSize, |
5753 | 0 | nDstChunkYSize)); |
5754 | 0 | if (pDstBuffer == nullptr) |
5755 | 0 | { |
5756 | 0 | eErr = CE_Failure; |
5757 | 0 | break; |
5758 | 0 | } |
5759 | | |
5760 | | // Use a flag to avoid reading the overview being built |
5761 | 0 | GDALRasterIOExtraArg sExtraArg; |
5762 | 0 | INIT_RASTERIO_EXTRA_ARG(sExtraArg); |
5763 | 0 | if (iSrcOverview == -1) |
5764 | 0 | sExtraArg.bUseOnlyThisScale = true; |
5765 | | |
5766 | | // Scale and copy data from the VRT to the temp file |
5767 | 0 | for (int nDstYOff = nDstYOffStart; |
5768 | 0 | nDstYOff < nDstYOffEnd && eErr == CE_None; |
5769 | 0 | /* */) |
5770 | 0 | { |
5771 | 0 | const int nDstYCount = |
5772 | 0 | std::min(nReducedDstChunkYSize, nDstYOffEnd - nDstYOff); |
5773 | 0 | for (int nDstXOff = nDstXOffStart; |
5774 | 0 | nDstXOff < nDstXOffEnd && eErr == CE_None; |
5775 | 0 | /* */) |
5776 | 0 | { |
5777 | 0 | const int nDstXCount = |
5778 | 0 | std::min(nReducedDstChunkXSize, nDstXOffEnd - nDstXOff); |
5779 | 0 | for (int iBand = 0; iBand < nBands && eErr == CE_None; |
5780 | 0 | ++iBand) |
5781 | 0 | { |
5782 | 0 | auto poSrcBand = poVRTDS->GetRasterBand(iBand + 1); |
5783 | 0 | eErr = poSrcBand->RasterIO( |
5784 | 0 | GF_Read, nDstXOff, nDstYOff, nDstXCount, nDstYCount, |
5785 | 0 | pDstBuffer.get(), nDstXCount, nDstYCount, |
5786 | 0 | eWrkDataType, 0, 0, &sExtraArg); |
5787 | 0 | if (eErr == CE_None) |
5788 | 0 | { |
5789 | | // Write to the temporary dataset, shifted |
5790 | 0 | auto poOvrBand = poTmpDS->GetRasterBand(iBand + 1); |
5791 | 0 | eErr = poOvrBand->RasterIO( |
5792 | 0 | GF_Write, nDstXOff - nDstXOffStart, |
5793 | 0 | nDstYOff - nDstYOffStart, nDstXCount, |
5794 | 0 | nDstYCount, pDstBuffer.get(), nDstXCount, |
5795 | 0 | nDstYCount, eWrkDataType, 0, 0, nullptr); |
5796 | 0 | } |
5797 | 0 | } |
5798 | 0 | nDstXOff += nDstXCount; |
5799 | 0 | } |
5800 | 0 | nDstYOff += nDstYCount; |
5801 | 0 | } |
5802 | | |
5803 | | // Copy from the temporary to the overview |
5804 | 0 | for (int nDstYOff = nDstYOffStart; |
5805 | 0 | nDstYOff < nDstYOffEnd && eErr == CE_None; |
5806 | 0 | /* */) |
5807 | 0 | { |
5808 | 0 | const int nDstYCount = |
5809 | 0 | std::min(nDstChunkYSize, nDstYOffEnd - nDstYOff); |
5810 | 0 | for (int nDstXOff = nDstXOffStart; |
5811 | 0 | nDstXOff < nDstXOffEnd && eErr == CE_None; |
5812 | 0 | /* */) |
5813 | 0 | { |
5814 | 0 | const int nDstXCount = |
5815 | 0 | std::min(nDstChunkXSize, nDstXOffEnd - nDstXOff); |
5816 | 0 | for (int iBand = 0; iBand < nBands && eErr == CE_None; |
5817 | 0 | ++iBand) |
5818 | 0 | { |
5819 | 0 | auto poSrcBand = poTmpDS->GetRasterBand(iBand + 1); |
5820 | 0 | eErr = poSrcBand->RasterIO( |
5821 | 0 | GF_Read, nDstXOff - nDstXOffStart, |
5822 | 0 | nDstYOff - nDstYOffStart, nDstXCount, nDstYCount, |
5823 | 0 | pDstBuffer.get(), nDstXCount, nDstYCount, |
5824 | 0 | eWrkDataType, 0, 0, nullptr); |
5825 | 0 | if (eErr == CE_None) |
5826 | 0 | { |
5827 | | // Write to the destination overview bands |
5828 | 0 | auto poOvrBand = |
5829 | 0 | papapoOverviewBands[iBand][iOverview]; |
5830 | 0 | eErr = poOvrBand->RasterIO( |
5831 | 0 | GF_Write, nDstXOff, nDstYOff, nDstXCount, |
5832 | 0 | nDstYCount, pDstBuffer.get(), nDstXCount, |
5833 | 0 | nDstYCount, eWrkDataType, 0, 0, nullptr); |
5834 | 0 | } |
5835 | 0 | } |
5836 | 0 | nDstXOff += nDstXCount; |
5837 | 0 | } |
5838 | 0 | nDstYOff += nDstYCount; |
5839 | 0 | } |
5840 | |
|
5841 | 0 | if (eErr != CE_None) |
5842 | 0 | { |
5843 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
5844 | 0 | "Failed to write overview %d", iOverview); |
5845 | 0 | return eErr; |
5846 | 0 | } |
5847 | | |
5848 | | // Flush the data to overviews. |
5849 | 0 | for (int iBand = 0; iBand < nBands; ++iBand) |
5850 | 0 | papapoOverviewBands[iBand][iOverview]->FlushCache(false); |
5851 | |
|
5852 | 0 | continue; |
5853 | 0 | } |
5854 | | |
5855 | | // Structure describing a resampling job |
5856 | 0 | struct OvrJob |
5857 | 0 | { |
5858 | | // Buffers to free when job is finished |
5859 | 0 | std::unique_ptr<PointerHolder> oSrcMaskBufferHolder{}; |
5860 | 0 | std::unique_ptr<PointerHolder> oSrcBufferHolder{}; |
5861 | 0 | std::unique_ptr<PointerHolder> oDstBufferHolder{}; |
5862 | |
|
5863 | 0 | GDALRasterBand *poDstBand = nullptr; |
5864 | | |
5865 | | // Input parameters of pfnResampleFn |
5866 | 0 | GDALResampleFunction pfnResampleFn = nullptr; |
5867 | 0 | GDALOverviewResampleArgs args{}; |
5868 | 0 | const void *pChunk = nullptr; |
5869 | | |
5870 | | // Output values of resampling function |
5871 | 0 | CPLErr eErr = CE_Failure; |
5872 | 0 | void *pDstBuffer = nullptr; |
5873 | 0 | GDALDataType eDstBufferDataType = GDT_Unknown; |
5874 | |
|
5875 | 0 | void NotifyFinished() |
5876 | 0 | { |
5877 | 0 | std::lock_guard guard(mutex); |
5878 | 0 | bFinished = true; |
5879 | 0 | cv.notify_one(); |
5880 | 0 | } |
5881 | |
|
5882 | 0 | bool IsFinished() |
5883 | 0 | { |
5884 | 0 | std::lock_guard guard(mutex); |
5885 | 0 | return bFinished; |
5886 | 0 | } |
5887 | |
|
5888 | 0 | void WaitFinished() |
5889 | 0 | { |
5890 | 0 | std::unique_lock oGuard(mutex); |
5891 | 0 | while (!bFinished) |
5892 | 0 | { |
5893 | 0 | cv.wait(oGuard); |
5894 | 0 | } |
5895 | 0 | } |
5896 | |
|
5897 | 0 | private: |
5898 | | // Synchronization |
5899 | 0 | bool bFinished = false; |
5900 | 0 | std::mutex mutex{}; |
5901 | 0 | std::condition_variable cv{}; |
5902 | 0 | }; |
5903 | | |
5904 | | // Thread function to resample |
5905 | 0 | const auto JobResampleFunc = [](void *pData) |
5906 | 0 | { |
5907 | 0 | OvrJob *poJob = static_cast<OvrJob *>(pData); |
5908 | |
|
5909 | 0 | poJob->eErr = poJob->pfnResampleFn(poJob->args, poJob->pChunk, |
5910 | 0 | &(poJob->pDstBuffer), |
5911 | 0 | &(poJob->eDstBufferDataType)); |
5912 | |
|
5913 | 0 | poJob->oDstBufferHolder.reset(new PointerHolder(poJob->pDstBuffer)); |
5914 | |
|
5915 | 0 | poJob->NotifyFinished(); |
5916 | 0 | }; |
5917 | | |
5918 | | // Function to write resample data to target band |
5919 | 0 | const auto WriteJobData = [](const OvrJob *poJob) |
5920 | 0 | { |
5921 | 0 | return poJob->poDstBand->RasterIO( |
5922 | 0 | GF_Write, poJob->args.nDstXOff, poJob->args.nDstYOff, |
5923 | 0 | poJob->args.nDstXOff2 - poJob->args.nDstXOff, |
5924 | 0 | poJob->args.nDstYOff2 - poJob->args.nDstYOff, poJob->pDstBuffer, |
5925 | 0 | poJob->args.nDstXOff2 - poJob->args.nDstXOff, |
5926 | 0 | poJob->args.nDstYOff2 - poJob->args.nDstYOff, |
5927 | 0 | poJob->eDstBufferDataType, 0, 0, nullptr); |
5928 | 0 | }; |
5929 | | |
5930 | | // Wait for completion of oldest job and serialize it |
5931 | 0 | const auto WaitAndFinalizeOldestJob = |
5932 | 0 | [WriteJobData](std::list<std::unique_ptr<OvrJob>> &jobList) |
5933 | 0 | { |
5934 | 0 | auto poOldestJob = jobList.front().get(); |
5935 | 0 | poOldestJob->WaitFinished(); |
5936 | 0 | CPLErr l_eErr = poOldestJob->eErr; |
5937 | 0 | if (l_eErr == CE_None) |
5938 | 0 | { |
5939 | 0 | l_eErr = WriteJobData(poOldestJob); |
5940 | 0 | } |
5941 | |
|
5942 | 0 | jobList.pop_front(); |
5943 | 0 | return l_eErr; |
5944 | 0 | }; |
5945 | | |
5946 | | // Queue of jobs |
5947 | 0 | std::list<std::unique_ptr<OvrJob>> jobList; |
5948 | |
|
5949 | 0 | std::vector<std::unique_ptr<void, VSIFreeReleaser>> apaChunk(nBands); |
5950 | 0 | std::vector<std::unique_ptr<GByte, VSIFreeReleaser>> |
5951 | 0 | apabyChunkNoDataMask(nBands); |
5952 | | |
5953 | | // Iterate on destination overview, block by block. |
5954 | 0 | for (int nDstYOff = nDstYOffStart; |
5955 | 0 | nDstYOff < nDstYOffEnd && eErr == CE_None; |
5956 | 0 | nDstYOff += nDstChunkYSize) |
5957 | 0 | { |
5958 | 0 | int nDstYCount; |
5959 | 0 | if (nDstYOff + nDstChunkYSize <= nDstYOffEnd) |
5960 | 0 | nDstYCount = nDstChunkYSize; |
5961 | 0 | else |
5962 | 0 | nDstYCount = nDstYOffEnd - nDstYOff; |
5963 | |
|
5964 | 0 | int nChunkYOff = static_cast<int>(nDstYOff * dfYRatioDstToSrc); |
5965 | 0 | int nChunkYOff2 = static_cast<int>( |
5966 | 0 | ceil((nDstYOff + nDstYCount) * dfYRatioDstToSrc)); |
5967 | 0 | if (nChunkYOff2 > nSrcHeight || |
5968 | 0 | nDstYOff + nDstYCount == nDstTotalHeight) |
5969 | 0 | nChunkYOff2 = nSrcHeight; |
5970 | 0 | int nYCount = nChunkYOff2 - nChunkYOff; |
5971 | 0 | CPLAssert(nYCount <= nFullResYChunk); |
5972 | | |
5973 | 0 | int nChunkYOffQueried = nChunkYOff - nKernelRadius * nOvrFactor; |
5974 | 0 | int nChunkYSizeQueried = |
5975 | 0 | nYCount + RADIUS_TO_DIAMETER * nKernelRadius * nOvrFactor; |
5976 | 0 | if (nChunkYOffQueried < 0) |
5977 | 0 | { |
5978 | 0 | nChunkYSizeQueried += nChunkYOffQueried; |
5979 | 0 | nChunkYOffQueried = 0; |
5980 | 0 | } |
5981 | 0 | if (nChunkYSizeQueried + nChunkYOffQueried > nSrcHeight) |
5982 | 0 | nChunkYSizeQueried = nSrcHeight - nChunkYOffQueried; |
5983 | 0 | CPLAssert(nChunkYSizeQueried <= nFullResYChunkQueried); |
5984 | | |
5985 | 0 | if (!pfnProgress(std::min(1.0, dfCurPixelCount / dfTotalPixelCount), |
5986 | 0 | nullptr, pProgressData)) |
5987 | 0 | { |
5988 | 0 | CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated"); |
5989 | 0 | eErr = CE_Failure; |
5990 | 0 | } |
5991 | | |
5992 | | // Iterate on destination overview, block by block. |
5993 | 0 | for (int nDstXOff = nDstXOffStart; |
5994 | 0 | nDstXOff < nDstXOffEnd && eErr == CE_None; |
5995 | 0 | nDstXOff += nDstChunkXSize) |
5996 | 0 | { |
5997 | 0 | int nDstXCount = 0; |
5998 | 0 | if (nDstXOff + nDstChunkXSize <= nDstXOffEnd) |
5999 | 0 | nDstXCount = nDstChunkXSize; |
6000 | 0 | else |
6001 | 0 | nDstXCount = nDstXOffEnd - nDstXOff; |
6002 | |
|
6003 | 0 | dfCurPixelCount += static_cast<double>(nDstXCount) * nDstYCount; |
6004 | |
|
6005 | 0 | int nChunkXOff = static_cast<int>(nDstXOff * dfXRatioDstToSrc); |
6006 | 0 | int nChunkXOff2 = static_cast<int>( |
6007 | 0 | ceil((nDstXOff + nDstXCount) * dfXRatioDstToSrc)); |
6008 | 0 | if (nChunkXOff2 > nSrcWidth || |
6009 | 0 | nDstXOff + nDstXCount == nDstTotalWidth) |
6010 | 0 | nChunkXOff2 = nSrcWidth; |
6011 | 0 | const int nXCount = nChunkXOff2 - nChunkXOff; |
6012 | 0 | CPLAssert(nXCount <= nFullResXChunk); |
6013 | | |
6014 | 0 | int nChunkXOffQueried = nChunkXOff - nKernelRadius * nOvrFactor; |
6015 | 0 | int nChunkXSizeQueried = |
6016 | 0 | nXCount + RADIUS_TO_DIAMETER * nKernelRadius * nOvrFactor; |
6017 | 0 | if (nChunkXOffQueried < 0) |
6018 | 0 | { |
6019 | 0 | nChunkXSizeQueried += nChunkXOffQueried; |
6020 | 0 | nChunkXOffQueried = 0; |
6021 | 0 | } |
6022 | 0 | if (nChunkXSizeQueried + nChunkXOffQueried > nSrcWidth) |
6023 | 0 | nChunkXSizeQueried = nSrcWidth - nChunkXOffQueried; |
6024 | 0 | CPLAssert(nChunkXSizeQueried <= nFullResXChunkQueried); |
6025 | | #if DEBUG_VERBOSE |
6026 | | CPLDebug("GDAL", |
6027 | | "Reading (%dx%d -> %dx%d) for output (%dx%d -> %dx%d)", |
6028 | | nChunkXOffQueried, nChunkYOffQueried, |
6029 | | nChunkXSizeQueried, nChunkYSizeQueried, nDstXOff, |
6030 | | nDstYOff, nDstXCount, nDstYCount); |
6031 | | #endif |
6032 | | |
6033 | | // Avoid accumulating too many tasks and exhaust RAM |
6034 | | |
6035 | | // Try to complete already finished jobs |
6036 | 0 | while (eErr == CE_None && !jobList.empty()) |
6037 | 0 | { |
6038 | 0 | auto poOldestJob = jobList.front().get(); |
6039 | 0 | if (!poOldestJob->IsFinished()) |
6040 | 0 | break; |
6041 | 0 | eErr = poOldestJob->eErr; |
6042 | 0 | if (eErr == CE_None) |
6043 | 0 | { |
6044 | 0 | eErr = WriteJobData(poOldestJob); |
6045 | 0 | } |
6046 | |
|
6047 | 0 | jobList.pop_front(); |
6048 | 0 | } |
6049 | | |
6050 | | // And in case we have saturated the number of threads, |
6051 | | // wait for completion of tasks to go below the threshold. |
6052 | 0 | while (eErr == CE_None && |
6053 | 0 | jobList.size() >= static_cast<size_t>(nThreads)) |
6054 | 0 | { |
6055 | 0 | eErr = WaitAndFinalizeOldestJob(jobList); |
6056 | 0 | } |
6057 | | |
6058 | | // Read the source buffers for all the bands. |
6059 | 0 | for (int iBand = 0; iBand < nBands && eErr == CE_None; ++iBand) |
6060 | 0 | { |
6061 | | // (Re)allocate buffers if needed |
6062 | 0 | if (apaChunk[iBand] == nullptr) |
6063 | 0 | { |
6064 | 0 | apaChunk[iBand].reset(VSI_MALLOC3_VERBOSE( |
6065 | 0 | nFullResXChunkQueried, nFullResYChunkQueried, |
6066 | 0 | nWrkDataTypeSize)); |
6067 | 0 | if (apaChunk[iBand] == nullptr) |
6068 | 0 | { |
6069 | 0 | eErr = CE_Failure; |
6070 | 0 | } |
6071 | 0 | } |
6072 | 0 | if (bUseNoDataMask && |
6073 | 0 | apabyChunkNoDataMask[iBand] == nullptr) |
6074 | 0 | { |
6075 | 0 | apabyChunkNoDataMask[iBand].reset( |
6076 | 0 | static_cast<GByte *>(VSI_MALLOC2_VERBOSE( |
6077 | 0 | nFullResXChunkQueried, nFullResYChunkQueried))); |
6078 | 0 | if (apabyChunkNoDataMask[iBand] == nullptr) |
6079 | 0 | { |
6080 | 0 | eErr = CE_Failure; |
6081 | 0 | } |
6082 | 0 | } |
6083 | |
|
6084 | 0 | if (eErr == CE_None) |
6085 | 0 | { |
6086 | 0 | GDALRasterBand *poSrcBand = nullptr; |
6087 | 0 | if (iSrcOverview == -1) |
6088 | 0 | poSrcBand = papoSrcBands[iBand]; |
6089 | 0 | else |
6090 | 0 | poSrcBand = |
6091 | 0 | papapoOverviewBands[iBand][iSrcOverview]; |
6092 | 0 | eErr = poSrcBand->RasterIO( |
6093 | 0 | GF_Read, nChunkXOffQueried, nChunkYOffQueried, |
6094 | 0 | nChunkXSizeQueried, nChunkYSizeQueried, |
6095 | 0 | apaChunk[iBand].get(), nChunkXSizeQueried, |
6096 | 0 | nChunkYSizeQueried, eWrkDataType, 0, 0, nullptr); |
6097 | |
|
6098 | 0 | if (bUseNoDataMask && eErr == CE_None) |
6099 | 0 | { |
6100 | 0 | auto poMaskBand = poSrcBand->IsMaskBand() |
6101 | 0 | ? poSrcBand |
6102 | 0 | : poSrcBand->GetMaskBand(); |
6103 | 0 | eErr = poMaskBand->RasterIO( |
6104 | 0 | GF_Read, nChunkXOffQueried, nChunkYOffQueried, |
6105 | 0 | nChunkXSizeQueried, nChunkYSizeQueried, |
6106 | 0 | apabyChunkNoDataMask[iBand].get(), |
6107 | 0 | nChunkXSizeQueried, nChunkYSizeQueried, |
6108 | 0 | GDT_Byte, 0, 0, nullptr); |
6109 | 0 | } |
6110 | 0 | } |
6111 | 0 | } |
6112 | | |
6113 | | // Compute the resulting overview block. |
6114 | 0 | for (int iBand = 0; iBand < nBands && eErr == CE_None; ++iBand) |
6115 | 0 | { |
6116 | 0 | auto poJob = std::make_unique<OvrJob>(); |
6117 | 0 | poJob->pfnResampleFn = pfnResampleFn; |
6118 | 0 | poJob->poDstBand = papapoOverviewBands[iBand][iOverview]; |
6119 | 0 | poJob->args.eOvrDataType = |
6120 | 0 | poJob->poDstBand->GetRasterDataType(); |
6121 | 0 | poJob->args.nOvrXSize = poJob->poDstBand->GetXSize(); |
6122 | 0 | poJob->args.nOvrYSize = poJob->poDstBand->GetYSize(); |
6123 | 0 | const char *pszNBITS = poJob->poDstBand->GetMetadataItem( |
6124 | 0 | "NBITS", "IMAGE_STRUCTURE"); |
6125 | 0 | poJob->args.nOvrNBITS = pszNBITS ? atoi(pszNBITS) : 0; |
6126 | 0 | poJob->args.dfXRatioDstToSrc = dfXRatioDstToSrc; |
6127 | 0 | poJob->args.dfYRatioDstToSrc = dfYRatioDstToSrc; |
6128 | 0 | poJob->args.eWrkDataType = eWrkDataType; |
6129 | 0 | poJob->pChunk = apaChunk[iBand].get(); |
6130 | 0 | poJob->args.pabyChunkNodataMask = |
6131 | 0 | apabyChunkNoDataMask[iBand].get(); |
6132 | 0 | poJob->args.nChunkXOff = nChunkXOffQueried; |
6133 | 0 | poJob->args.nChunkXSize = nChunkXSizeQueried; |
6134 | 0 | poJob->args.nChunkYOff = nChunkYOffQueried; |
6135 | 0 | poJob->args.nChunkYSize = nChunkYSizeQueried; |
6136 | 0 | poJob->args.nDstXOff = nDstXOff; |
6137 | 0 | poJob->args.nDstXOff2 = nDstXOff + nDstXCount; |
6138 | 0 | poJob->args.nDstYOff = nDstYOff; |
6139 | 0 | poJob->args.nDstYOff2 = nDstYOff + nDstYCount; |
6140 | 0 | poJob->args.pszResampling = pszResampling; |
6141 | 0 | poJob->args.bHasNoData = abHasNoData[iBand]; |
6142 | 0 | poJob->args.dfNoDataValue = adfNoDataValue[iBand]; |
6143 | 0 | poJob->args.eSrcDataType = eDataType; |
6144 | 0 | poJob->args.bPropagateNoData = bPropagateNoData; |
6145 | |
|
6146 | 0 | if (poJobQueue) |
6147 | 0 | { |
6148 | 0 | poJob->oSrcMaskBufferHolder.reset(new PointerHolder( |
6149 | 0 | apabyChunkNoDataMask[iBand].release())); |
6150 | |
|
6151 | 0 | poJob->oSrcBufferHolder.reset( |
6152 | 0 | new PointerHolder(apaChunk[iBand].release())); |
6153 | |
|
6154 | 0 | poJobQueue->SubmitJob(JobResampleFunc, poJob.get()); |
6155 | 0 | jobList.emplace_back(std::move(poJob)); |
6156 | 0 | } |
6157 | 0 | else |
6158 | 0 | { |
6159 | 0 | JobResampleFunc(poJob.get()); |
6160 | 0 | eErr = poJob->eErr; |
6161 | 0 | if (eErr == CE_None) |
6162 | 0 | { |
6163 | 0 | eErr = WriteJobData(poJob.get()); |
6164 | 0 | } |
6165 | 0 | } |
6166 | 0 | } |
6167 | 0 | } |
6168 | 0 | } |
6169 | | |
6170 | | // Wait for all pending jobs to complete |
6171 | 0 | while (!jobList.empty()) |
6172 | 0 | { |
6173 | 0 | const auto l_eErr = WaitAndFinalizeOldestJob(jobList); |
6174 | 0 | if (l_eErr != CE_None && eErr == CE_None) |
6175 | 0 | eErr = l_eErr; |
6176 | 0 | } |
6177 | | |
6178 | | // Flush the data to overviews. |
6179 | 0 | for (int iBand = 0; iBand < nBands; ++iBand) |
6180 | 0 | { |
6181 | 0 | if (papapoOverviewBands[iBand][iOverview]->FlushCache(false) != |
6182 | 0 | CE_None) |
6183 | 0 | eErr = CE_Failure; |
6184 | 0 | } |
6185 | 0 | } |
6186 | | |
6187 | 0 | if (eErr == CE_None) |
6188 | 0 | pfnProgress(1.0, nullptr, pProgressData); |
6189 | |
|
6190 | 0 | return eErr; |
6191 | 0 | } |
6192 | | |
6193 | | /************************************************************************/ |
6194 | | /* GDALRegenerateOverviewsMultiBand() */ |
6195 | | /************************************************************************/ |
6196 | | |
6197 | | /** |
6198 | | * \brief Variant of GDALRegenerateOverviews, specially dedicated for generating |
6199 | | * compressed pixel-interleaved overviews (JPEG-IN-TIFF for example) |
6200 | | * |
6201 | | * This function will generate one or more overview images from a base |
6202 | | * image using the requested downsampling algorithm. Its primary use |
6203 | | * is for generating overviews via GDALDataset::BuildOverviews(), but it |
6204 | | * can also be used to generate downsampled images in one file from another |
6205 | | * outside the overview architecture. |
6206 | | * |
6207 | | * The output bands need to exist in advance and share the same characteristics |
6208 | | * (type, dimensions) |
6209 | | * |
6210 | | * The resampling algorithms supported for the moment are "NEAREST", "AVERAGE", |
6211 | | * "RMS", "GAUSS", "CUBIC", "CUBICSPLINE", "LANCZOS" and "BILINEAR" |
6212 | | * |
6213 | | * It does not support color tables or complex data types. |
6214 | | * |
6215 | | * The pseudo-algorithm used by the function is : |
6216 | | * for each overview |
6217 | | * iterate on lines of the source by a step of deltay |
6218 | | * iterate on columns of the source by a step of deltax |
6219 | | * read the source data of size deltax * deltay for all the bands |
6220 | | * generate the corresponding overview block for all the bands |
6221 | | * |
6222 | | * This function will honour properly NODATA_VALUES tuples (special dataset |
6223 | | * metadata) so that only a given RGB triplet (in case of a RGB image) will be |
6224 | | * considered as the nodata value and not each value of the triplet |
6225 | | * independently per band. |
6226 | | * |
6227 | | * The GDAL_NUM_THREADS configuration option can be set |
6228 | | * to "ALL_CPUS" or a integer value to specify the number of threads to use for |
6229 | | * overview computation. |
6230 | | * |
6231 | | * @param apoSrcBands the list of source bands to downsample |
6232 | | * @param aapoOverviewBands bidimension array of bands. First dimension is |
6233 | | * indexed by bands. Second dimension is indexed by |
6234 | | * overview levels. All aapoOverviewBands[i] arrays |
6235 | | * must have the same size (i.e. same number of |
6236 | | * overviews) |
6237 | | * @param pszResampling Resampling algorithm ("NEAREST", "AVERAGE", "RMS", |
6238 | | * "GAUSS", "CUBIC", "CUBICSPLINE", "LANCZOS" or "BILINEAR"). |
6239 | | * @param pfnProgress progress report function. |
6240 | | * @param pProgressData progress function callback data. |
6241 | | * @param papszOptions NULL terminated list of options as |
6242 | | * key=value pairs, or NULL |
6243 | | * The XOFF, YOFF, XSIZE and YSIZE |
6244 | | * options can be specified to express that overviews should |
6245 | | * be regenerated only in the specified subset of the source |
6246 | | * dataset. |
6247 | | * @return CE_None on success or CE_Failure on failure. |
6248 | | * @since 3.10 |
6249 | | */ |
6250 | | |
6251 | | CPLErr GDALRegenerateOverviewsMultiBand( |
6252 | | const std::vector<GDALRasterBand *> &apoSrcBands, |
6253 | | const std::vector<std::vector<GDALRasterBand *>> &aapoOverviewBands, |
6254 | | const char *pszResampling, GDALProgressFunc pfnProgress, |
6255 | | void *pProgressData, CSLConstList papszOptions) |
6256 | 0 | { |
6257 | 0 | CPLAssert(apoSrcBands.size() == aapoOverviewBands.size()); |
6258 | 0 | for (size_t i = 1; i < aapoOverviewBands.size(); ++i) |
6259 | 0 | { |
6260 | 0 | CPLAssert(aapoOverviewBands[i].size() == aapoOverviewBands[0].size()); |
6261 | 0 | } |
6262 | | |
6263 | 0 | if (aapoOverviewBands.empty()) |
6264 | 0 | return CE_None; |
6265 | | |
6266 | 0 | std::vector<GDALRasterBand **> apapoOverviewBands; |
6267 | 0 | for (auto &apoOverviewBands : aapoOverviewBands) |
6268 | 0 | { |
6269 | 0 | auto papoOverviewBands = static_cast<GDALRasterBand **>( |
6270 | 0 | CPLMalloc(apoOverviewBands.size() * sizeof(GDALRasterBand *))); |
6271 | 0 | for (size_t i = 0; i < apoOverviewBands.size(); ++i) |
6272 | 0 | { |
6273 | 0 | papoOverviewBands[i] = apoOverviewBands[i]; |
6274 | 0 | } |
6275 | 0 | apapoOverviewBands.push_back(papoOverviewBands); |
6276 | 0 | } |
6277 | 0 | const CPLErr eErr = GDALRegenerateOverviewsMultiBand( |
6278 | 0 | static_cast<int>(apoSrcBands.size()), apoSrcBands.data(), |
6279 | 0 | static_cast<int>(aapoOverviewBands[0].size()), |
6280 | 0 | apapoOverviewBands.data(), pszResampling, pfnProgress, pProgressData, |
6281 | 0 | papszOptions); |
6282 | 0 | for (GDALRasterBand **papoOverviewBands : apapoOverviewBands) |
6283 | 0 | CPLFree(papoOverviewBands); |
6284 | 0 | return eErr; |
6285 | 0 | } |
6286 | | |
6287 | | /************************************************************************/ |
6288 | | /* GDALComputeBandStats() */ |
6289 | | /************************************************************************/ |
6290 | | |
6291 | | /** Undocumented |
6292 | | * @param hSrcBand undocumented. |
6293 | | * @param nSampleStep Step between scanlines used to compute statistics. |
6294 | | * When nSampleStep is equal to 1, all scanlines will |
6295 | | * be processed. |
6296 | | * @param pdfMean undocumented. |
6297 | | * @param pdfStdDev undocumented. |
6298 | | * @param pfnProgress undocumented. |
6299 | | * @param pProgressData undocumented. |
6300 | | * @return undocumented |
6301 | | */ |
6302 | | CPLErr CPL_STDCALL GDALComputeBandStats(GDALRasterBandH hSrcBand, |
6303 | | int nSampleStep, double *pdfMean, |
6304 | | double *pdfStdDev, |
6305 | | GDALProgressFunc pfnProgress, |
6306 | | void *pProgressData) |
6307 | | |
6308 | 0 | { |
6309 | 0 | VALIDATE_POINTER1(hSrcBand, "GDALComputeBandStats", CE_Failure); |
6310 | | |
6311 | 0 | GDALRasterBand *poSrcBand = GDALRasterBand::FromHandle(hSrcBand); |
6312 | |
|
6313 | 0 | if (pfnProgress == nullptr) |
6314 | 0 | pfnProgress = GDALDummyProgress; |
6315 | |
|
6316 | 0 | const int nWidth = poSrcBand->GetXSize(); |
6317 | 0 | const int nHeight = poSrcBand->GetYSize(); |
6318 | |
|
6319 | 0 | if (nSampleStep >= nHeight || nSampleStep < 1) |
6320 | 0 | nSampleStep = 1; |
6321 | |
|
6322 | 0 | GDALDataType eWrkType = GDT_Unknown; |
6323 | 0 | float *pafData = nullptr; |
6324 | 0 | GDALDataType eType = poSrcBand->GetRasterDataType(); |
6325 | 0 | const bool bComplex = CPL_TO_BOOL(GDALDataTypeIsComplex(eType)); |
6326 | 0 | if (bComplex) |
6327 | 0 | { |
6328 | 0 | pafData = static_cast<float *>( |
6329 | 0 | VSI_MALLOC2_VERBOSE(nWidth, 2 * sizeof(float))); |
6330 | 0 | eWrkType = GDT_CFloat32; |
6331 | 0 | } |
6332 | 0 | else |
6333 | 0 | { |
6334 | 0 | pafData = |
6335 | 0 | static_cast<float *>(VSI_MALLOC2_VERBOSE(nWidth, sizeof(float))); |
6336 | 0 | eWrkType = GDT_Float32; |
6337 | 0 | } |
6338 | |
|
6339 | 0 | if (nWidth == 0 || pafData == nullptr) |
6340 | 0 | { |
6341 | 0 | VSIFree(pafData); |
6342 | 0 | return CE_Failure; |
6343 | 0 | } |
6344 | | |
6345 | | /* -------------------------------------------------------------------- */ |
6346 | | /* Loop over all sample lines. */ |
6347 | | /* -------------------------------------------------------------------- */ |
6348 | 0 | double dfSum = 0.0; |
6349 | 0 | double dfSum2 = 0.0; |
6350 | 0 | int iLine = 0; |
6351 | 0 | GIntBig nSamples = 0; |
6352 | |
|
6353 | 0 | do |
6354 | 0 | { |
6355 | 0 | if (!pfnProgress(iLine / static_cast<double>(nHeight), nullptr, |
6356 | 0 | pProgressData)) |
6357 | 0 | { |
6358 | 0 | CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated"); |
6359 | 0 | CPLFree(pafData); |
6360 | 0 | return CE_Failure; |
6361 | 0 | } |
6362 | | |
6363 | 0 | const CPLErr eErr = |
6364 | 0 | poSrcBand->RasterIO(GF_Read, 0, iLine, nWidth, 1, pafData, nWidth, |
6365 | 0 | 1, eWrkType, 0, 0, nullptr); |
6366 | 0 | if (eErr != CE_None) |
6367 | 0 | { |
6368 | 0 | CPLFree(pafData); |
6369 | 0 | return eErr; |
6370 | 0 | } |
6371 | | |
6372 | 0 | for (int iPixel = 0; iPixel < nWidth; ++iPixel) |
6373 | 0 | { |
6374 | 0 | float fValue = 0.0f; |
6375 | |
|
6376 | 0 | if (bComplex) |
6377 | 0 | { |
6378 | | // Compute the magnitude of the complex value. |
6379 | 0 | fValue = |
6380 | 0 | std::hypot(pafData[iPixel * 2], pafData[iPixel * 2 + 1]); |
6381 | 0 | } |
6382 | 0 | else |
6383 | 0 | { |
6384 | 0 | fValue = pafData[iPixel]; |
6385 | 0 | } |
6386 | |
|
6387 | 0 | dfSum += fValue; |
6388 | 0 | dfSum2 += static_cast<double>(fValue) * fValue; |
6389 | 0 | } |
6390 | |
|
6391 | 0 | nSamples += nWidth; |
6392 | 0 | iLine += nSampleStep; |
6393 | 0 | } while (iLine < nHeight); |
6394 | | |
6395 | 0 | if (!pfnProgress(1.0, nullptr, pProgressData)) |
6396 | 0 | { |
6397 | 0 | CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated"); |
6398 | 0 | CPLFree(pafData); |
6399 | 0 | return CE_Failure; |
6400 | 0 | } |
6401 | | |
6402 | | /* -------------------------------------------------------------------- */ |
6403 | | /* Produce the result values. */ |
6404 | | /* -------------------------------------------------------------------- */ |
6405 | 0 | if (pdfMean != nullptr) |
6406 | 0 | *pdfMean = dfSum / nSamples; |
6407 | |
|
6408 | 0 | if (pdfStdDev != nullptr) |
6409 | 0 | { |
6410 | 0 | const double dfMean = dfSum / nSamples; |
6411 | |
|
6412 | 0 | *pdfStdDev = sqrt((dfSum2 / nSamples) - (dfMean * dfMean)); |
6413 | 0 | } |
6414 | |
|
6415 | 0 | CPLFree(pafData); |
6416 | |
|
6417 | 0 | return CE_None; |
6418 | 0 | } |
6419 | | |
6420 | | /************************************************************************/ |
6421 | | /* GDALOverviewMagnitudeCorrection() */ |
6422 | | /* */ |
6423 | | /* Correct the mean and standard deviation of the overviews of */ |
6424 | | /* the given band to match the base layer approximately. */ |
6425 | | /************************************************************************/ |
6426 | | |
6427 | | /** Undocumented |
6428 | | * @param hBaseBand undocumented. |
6429 | | * @param nOverviewCount undocumented. |
6430 | | * @param pahOverviews undocumented. |
6431 | | * @param pfnProgress undocumented. |
6432 | | * @param pProgressData undocumented. |
6433 | | * @return undocumented |
6434 | | */ |
6435 | | CPLErr GDALOverviewMagnitudeCorrection(GDALRasterBandH hBaseBand, |
6436 | | int nOverviewCount, |
6437 | | GDALRasterBandH *pahOverviews, |
6438 | | GDALProgressFunc pfnProgress, |
6439 | | void *pProgressData) |
6440 | | |
6441 | 0 | { |
6442 | 0 | VALIDATE_POINTER1(hBaseBand, "GDALOverviewMagnitudeCorrection", CE_Failure); |
6443 | | |
6444 | | /* -------------------------------------------------------------------- */ |
6445 | | /* Compute mean/stddev for source raster. */ |
6446 | | /* -------------------------------------------------------------------- */ |
6447 | 0 | double dfOrigMean = 0.0; |
6448 | 0 | double dfOrigStdDev = 0.0; |
6449 | 0 | { |
6450 | 0 | const CPLErr eErr = |
6451 | 0 | GDALComputeBandStats(hBaseBand, 2, &dfOrigMean, &dfOrigStdDev, |
6452 | 0 | pfnProgress, pProgressData); |
6453 | |
|
6454 | 0 | if (eErr != CE_None) |
6455 | 0 | return eErr; |
6456 | 0 | } |
6457 | | |
6458 | | /* -------------------------------------------------------------------- */ |
6459 | | /* Loop on overview bands. */ |
6460 | | /* -------------------------------------------------------------------- */ |
6461 | 0 | for (int iOverview = 0; iOverview < nOverviewCount; ++iOverview) |
6462 | 0 | { |
6463 | 0 | GDALRasterBand *poOverview = |
6464 | 0 | GDALRasterBand::FromHandle(pahOverviews[iOverview]); |
6465 | 0 | double dfOverviewMean, dfOverviewStdDev; |
6466 | |
|
6467 | 0 | const CPLErr eErr = |
6468 | 0 | GDALComputeBandStats(pahOverviews[iOverview], 1, &dfOverviewMean, |
6469 | 0 | &dfOverviewStdDev, pfnProgress, pProgressData); |
6470 | |
|
6471 | 0 | if (eErr != CE_None) |
6472 | 0 | return eErr; |
6473 | | |
6474 | 0 | double dfGain = 1.0; |
6475 | 0 | if (dfOrigStdDev >= 0.0001) |
6476 | 0 | dfGain = dfOrigStdDev / dfOverviewStdDev; |
6477 | | |
6478 | | /* -------------------------------------------------------------------- |
6479 | | */ |
6480 | | /* Apply gain and offset. */ |
6481 | | /* -------------------------------------------------------------------- |
6482 | | */ |
6483 | 0 | const int nWidth = poOverview->GetXSize(); |
6484 | 0 | const int nHeight = poOverview->GetYSize(); |
6485 | |
|
6486 | 0 | GDALDataType eWrkType = GDT_Unknown; |
6487 | 0 | float *pafData = nullptr; |
6488 | 0 | const GDALDataType eType = poOverview->GetRasterDataType(); |
6489 | 0 | const bool bComplex = CPL_TO_BOOL(GDALDataTypeIsComplex(eType)); |
6490 | 0 | if (bComplex) |
6491 | 0 | { |
6492 | 0 | pafData = static_cast<float *>( |
6493 | 0 | VSI_MALLOC2_VERBOSE(nWidth, 2 * sizeof(float))); |
6494 | 0 | eWrkType = GDT_CFloat32; |
6495 | 0 | } |
6496 | 0 | else |
6497 | 0 | { |
6498 | 0 | pafData = static_cast<float *>( |
6499 | 0 | VSI_MALLOC2_VERBOSE(nWidth, sizeof(float))); |
6500 | 0 | eWrkType = GDT_Float32; |
6501 | 0 | } |
6502 | |
|
6503 | 0 | if (pafData == nullptr) |
6504 | 0 | { |
6505 | 0 | return CE_Failure; |
6506 | 0 | } |
6507 | | |
6508 | 0 | for (int iLine = 0; iLine < nHeight; ++iLine) |
6509 | 0 | { |
6510 | 0 | if (!pfnProgress(iLine / static_cast<double>(nHeight), nullptr, |
6511 | 0 | pProgressData)) |
6512 | 0 | { |
6513 | 0 | CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated"); |
6514 | 0 | CPLFree(pafData); |
6515 | 0 | return CE_Failure; |
6516 | 0 | } |
6517 | | |
6518 | 0 | if (poOverview->RasterIO(GF_Read, 0, iLine, nWidth, 1, pafData, |
6519 | 0 | nWidth, 1, eWrkType, 0, 0, |
6520 | 0 | nullptr) != CE_None) |
6521 | 0 | { |
6522 | 0 | CPLFree(pafData); |
6523 | 0 | return CE_Failure; |
6524 | 0 | } |
6525 | | |
6526 | 0 | for (int iPixel = 0; iPixel < nWidth; ++iPixel) |
6527 | 0 | { |
6528 | 0 | if (bComplex) |
6529 | 0 | { |
6530 | 0 | pafData[iPixel * 2] *= static_cast<float>(dfGain); |
6531 | 0 | pafData[iPixel * 2 + 1] *= static_cast<float>(dfGain); |
6532 | 0 | } |
6533 | 0 | else |
6534 | 0 | { |
6535 | 0 | pafData[iPixel] = static_cast<float>( |
6536 | 0 | (pafData[iPixel] - dfOverviewMean) * dfGain + |
6537 | 0 | dfOrigMean); |
6538 | 0 | } |
6539 | 0 | } |
6540 | |
|
6541 | 0 | if (poOverview->RasterIO(GF_Write, 0, iLine, nWidth, 1, pafData, |
6542 | 0 | nWidth, 1, eWrkType, 0, 0, |
6543 | 0 | nullptr) != CE_None) |
6544 | 0 | { |
6545 | 0 | CPLFree(pafData); |
6546 | 0 | return CE_Failure; |
6547 | 0 | } |
6548 | 0 | } |
6549 | | |
6550 | 0 | if (!pfnProgress(1.0, nullptr, pProgressData)) |
6551 | 0 | { |
6552 | 0 | CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated"); |
6553 | 0 | CPLFree(pafData); |
6554 | 0 | return CE_Failure; |
6555 | 0 | } |
6556 | | |
6557 | 0 | CPLFree(pafData); |
6558 | 0 | } |
6559 | | |
6560 | 0 | return CE_None; |
6561 | 0 | } |