/src/gdal/gcore/rasterio.cpp
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Project: GDAL Core |
4 | | * Purpose: Contains default implementation of GDALRasterBand::IRasterIO() |
5 | | * and supporting functions of broader utility. |
6 | | * Author: Frank Warmerdam, warmerdam@pobox.com |
7 | | * |
8 | | ****************************************************************************** |
9 | | * Copyright (c) 1998, Frank Warmerdam |
10 | | * Copyright (c) 2007-2014, Even Rouault <even dot rouault at spatialys.com> |
11 | | * |
12 | | * SPDX-License-Identifier: MIT |
13 | | ****************************************************************************/ |
14 | | |
15 | | #include "cpl_port.h" |
16 | | #include "gdal.h" |
17 | | #include "gdal_priv.h" |
18 | | |
19 | | #include <cassert> |
20 | | #include <climits> |
21 | | #include <cmath> |
22 | | #include <cstddef> |
23 | | #include <cstdio> |
24 | | #include <cstdlib> |
25 | | #include <cstring> |
26 | | |
27 | | #include <algorithm> |
28 | | #include <limits> |
29 | | #include <stdexcept> |
30 | | #include <type_traits> |
31 | | |
32 | | #include "cpl_conv.h" |
33 | | #include "cpl_cpu_features.h" |
34 | | #include "cpl_error.h" |
35 | | #include "cpl_float.h" |
36 | | #include "cpl_progress.h" |
37 | | #include "cpl_string.h" |
38 | | #include "cpl_vsi.h" |
39 | | #include "gdal_priv_templates.hpp" |
40 | | #include "gdal_vrt.h" |
41 | | #include "gdalwarper.h" |
42 | | #include "memdataset.h" |
43 | | #include "vrtdataset.h" |
44 | | |
45 | | #if defined(__x86_64) || defined(_M_X64) |
46 | | #include <emmintrin.h> |
47 | | #define HAVE_SSE2 |
48 | | #elif defined(USE_NEON_OPTIMIZATIONS) |
49 | | #include "include_sse2neon.h" |
50 | | #define HAVE_SSE2 |
51 | | #endif |
52 | | |
53 | | #ifdef HAVE_SSSE3_AT_COMPILE_TIME |
54 | | #include "rasterio_ssse3.h" |
55 | | #ifdef __SSSE3__ |
56 | | #include <tmmintrin.h> |
57 | | #endif |
58 | | #endif |
59 | | |
60 | | #ifdef __SSE4_1__ |
61 | | #include <smmintrin.h> |
62 | | #endif |
63 | | |
64 | | #ifdef __GNUC__ |
65 | | #define CPL_NOINLINE __attribute__((noinline)) |
66 | | #else |
67 | | #define CPL_NOINLINE |
68 | | #endif |
69 | | |
70 | | static void GDALFastCopyByte(const GByte *CPL_RESTRICT pSrcData, |
71 | | int nSrcPixelStride, GByte *CPL_RESTRICT pDstData, |
72 | | int nDstPixelStride, GPtrDiff_t nWordCount); |
73 | | |
74 | | /************************************************************************/ |
75 | | /* DownsamplingIntegerXFactor() */ |
76 | | /************************************************************************/ |
77 | | |
78 | | template <bool bSameDataType, int DATA_TYPE_SIZE> |
79 | | static bool DownsamplingIntegerXFactor( |
80 | | GDALRasterBand *poBand, int iSrcX, int nSrcXInc, GPtrDiff_t iSrcOffsetCst, |
81 | | GByte *CPL_RESTRICT pabyDstData, int nPixelSpace, int nBufXSize, |
82 | | GDALDataType eDataType, GDALDataType eBufType, int &nStartBlockX, |
83 | | int nBlockXSize, GDALRasterBlock *&poBlock, int nLBlockY) |
84 | 0 | { |
85 | 0 | const int nBandDataSize = |
86 | 0 | bSameDataType ? DATA_TYPE_SIZE : GDALGetDataTypeSizeBytes(eDataType); |
87 | 0 | int nOuterLoopIters = nBufXSize - 1; |
88 | 0 | const int nIncSrcOffset = nSrcXInc * nBandDataSize; |
89 | 0 | const GByte *CPL_RESTRICT pabySrcData; |
90 | 0 | int nEndBlockX = nBlockXSize + nStartBlockX; |
91 | |
|
92 | 0 | if (iSrcX < nEndBlockX) |
93 | 0 | { |
94 | 0 | CPLAssert(poBlock); |
95 | 0 | goto no_reload_block; |
96 | 0 | } |
97 | 0 | goto reload_block; |
98 | | |
99 | | // Don't do the last iteration in the loop, as iSrcX might go beyond |
100 | | // nRasterXSize - 1 |
101 | 0 | while (--nOuterLoopIters >= 1) |
102 | 0 | { |
103 | 0 | iSrcX += nSrcXInc; |
104 | 0 | pabySrcData += nIncSrcOffset; |
105 | 0 | pabyDstData += nPixelSpace; |
106 | | |
107 | | /* -------------------------------------------------------------------- |
108 | | */ |
109 | | /* Ensure we have the appropriate block loaded. */ |
110 | | /* -------------------------------------------------------------------- |
111 | | */ |
112 | 0 | if (iSrcX >= nEndBlockX) |
113 | 0 | { |
114 | 0 | reload_block: |
115 | 0 | { |
116 | 0 | const int nLBlockX = iSrcX / nBlockXSize; |
117 | 0 | nStartBlockX = nLBlockX * nBlockXSize; |
118 | 0 | nEndBlockX = nStartBlockX + nBlockXSize; |
119 | |
|
120 | 0 | if (poBlock != nullptr) |
121 | 0 | poBlock->DropLock(); |
122 | |
|
123 | 0 | poBlock = poBand->GetLockedBlockRef(nLBlockX, nLBlockY, FALSE); |
124 | 0 | if (poBlock == nullptr) |
125 | 0 | { |
126 | 0 | return false; |
127 | 0 | } |
128 | 0 | } |
129 | | |
130 | 0 | no_reload_block: |
131 | 0 | const GByte *pabySrcBlock = |
132 | 0 | static_cast<const GByte *>(poBlock->GetDataRef()); |
133 | 0 | GPtrDiff_t iSrcOffset = |
134 | 0 | (iSrcX - nStartBlockX + iSrcOffsetCst) * nBandDataSize; |
135 | 0 | pabySrcData = pabySrcBlock + iSrcOffset; |
136 | 0 | } |
137 | | |
138 | | /* -------------------------------------------------------------------- |
139 | | */ |
140 | | /* Copy the maximum run of pixels. */ |
141 | | /* -------------------------------------------------------------------- |
142 | | */ |
143 | | |
144 | 0 | const int nIters = std::min( |
145 | 0 | (nEndBlockX - iSrcX + (nSrcXInc - 1)) / nSrcXInc, nOuterLoopIters); |
146 | 0 | if (bSameDataType) |
147 | 0 | { |
148 | 0 | memcpy(pabyDstData, pabySrcData, nBandDataSize); |
149 | 0 | if (nIters > 1) |
150 | 0 | { |
151 | 0 | if (DATA_TYPE_SIZE == 1) |
152 | 0 | { |
153 | 0 | pabySrcData += nIncSrcOffset; |
154 | 0 | pabyDstData += nPixelSpace; |
155 | 0 | GDALFastCopyByte(pabySrcData, nIncSrcOffset, pabyDstData, |
156 | 0 | nPixelSpace, nIters - 1); |
157 | 0 | pabySrcData += |
158 | 0 | static_cast<GPtrDiff_t>(nIncSrcOffset) * (nIters - 2); |
159 | 0 | pabyDstData += |
160 | 0 | static_cast<GPtrDiff_t>(nPixelSpace) * (nIters - 2); |
161 | 0 | } |
162 | 0 | else |
163 | 0 | { |
164 | 0 | for (int i = 0; i < nIters - 1; i++) |
165 | 0 | { |
166 | 0 | pabySrcData += nIncSrcOffset; |
167 | 0 | pabyDstData += nPixelSpace; |
168 | 0 | memcpy(pabyDstData, pabySrcData, nBandDataSize); |
169 | 0 | } |
170 | 0 | } |
171 | 0 | iSrcX += nSrcXInc * (nIters - 1); |
172 | 0 | nOuterLoopIters -= nIters - 1; |
173 | 0 | } |
174 | 0 | } |
175 | 0 | else |
176 | 0 | { |
177 | | // Type to type conversion ... |
178 | 0 | GDALCopyWords64(pabySrcData, eDataType, nIncSrcOffset, pabyDstData, |
179 | 0 | eBufType, nPixelSpace, std::max(1, nIters)); |
180 | 0 | if (nIters > 1) |
181 | 0 | { |
182 | 0 | pabySrcData += |
183 | 0 | static_cast<GPtrDiff_t>(nIncSrcOffset) * (nIters - 1); |
184 | 0 | pabyDstData += |
185 | 0 | static_cast<GPtrDiff_t>(nPixelSpace) * (nIters - 1); |
186 | 0 | iSrcX += nSrcXInc * (nIters - 1); |
187 | 0 | nOuterLoopIters -= nIters - 1; |
188 | 0 | } |
189 | 0 | } |
190 | 0 | } |
191 | | |
192 | | // Deal with last iteration to avoid iSrcX to go beyond nRasterXSize - 1 |
193 | 0 | if (nOuterLoopIters == 0) |
194 | 0 | { |
195 | 0 | const int nRasterXSize = poBand->GetXSize(); |
196 | 0 | iSrcX = |
197 | 0 | static_cast<int>(std::min(static_cast<GInt64>(iSrcX) + nSrcXInc, |
198 | 0 | static_cast<GInt64>(nRasterXSize - 1))); |
199 | 0 | pabyDstData += nPixelSpace; |
200 | 0 | if (iSrcX < nEndBlockX) |
201 | 0 | { |
202 | 0 | goto no_reload_block; |
203 | 0 | } |
204 | 0 | goto reload_block; |
205 | 0 | } |
206 | 0 | return true; |
207 | 0 | } Unexecuted instantiation: rasterio.cpp:bool DownsamplingIntegerXFactor<true, 1>(GDALRasterBand*, int, int, long long, unsigned char*, int, int, GDALDataType, GDALDataType, int&, int, GDALRasterBlock*&, int) Unexecuted instantiation: rasterio.cpp:bool DownsamplingIntegerXFactor<true, 2>(GDALRasterBand*, int, int, long long, unsigned char*, int, int, GDALDataType, GDALDataType, int&, int, GDALRasterBlock*&, int) Unexecuted instantiation: rasterio.cpp:bool DownsamplingIntegerXFactor<true, 4>(GDALRasterBand*, int, int, long long, unsigned char*, int, int, GDALDataType, GDALDataType, int&, int, GDALRasterBlock*&, int) Unexecuted instantiation: rasterio.cpp:bool DownsamplingIntegerXFactor<true, 8>(GDALRasterBand*, int, int, long long, unsigned char*, int, int, GDALDataType, GDALDataType, int&, int, GDALRasterBlock*&, int) Unexecuted instantiation: rasterio.cpp:bool DownsamplingIntegerXFactor<true, 16>(GDALRasterBand*, int, int, long long, unsigned char*, int, int, GDALDataType, GDALDataType, int&, int, GDALRasterBlock*&, int) Unexecuted instantiation: rasterio.cpp:bool DownsamplingIntegerXFactor<false, 0>(GDALRasterBand*, int, int, long long, unsigned char*, int, int, GDALDataType, GDALDataType, int&, int, GDALRasterBlock*&, int) |
208 | | |
209 | | template <class A, class B> |
210 | | CPL_NOSANITIZE_UNSIGNED_INT_OVERFLOW inline auto CPLUnsanitizedMul(A a, B b) |
211 | 0 | { |
212 | 0 | return a * b; |
213 | 0 | } |
214 | | |
215 | | /************************************************************************/ |
216 | | /* IRasterIO() */ |
217 | | /* */ |
218 | | /* Default internal implementation of RasterIO() ... utilizes */ |
219 | | /* the Block access methods to satisfy the request. This would */ |
220 | | /* normally only be overridden by formats with overviews. */ |
221 | | /************************************************************************/ |
222 | | |
223 | | CPLErr GDALRasterBand::IRasterIO(GDALRWFlag eRWFlag, int nXOff, int nYOff, |
224 | | int nXSize, int nYSize, void *pData, |
225 | | int nBufXSize, int nBufYSize, |
226 | | GDALDataType eBufType, GSpacing nPixelSpace, |
227 | | GSpacing nLineSpace, |
228 | | GDALRasterIOExtraArg *psExtraArg) |
229 | | |
230 | 0 | { |
231 | 0 | if (eRWFlag == GF_Write && eFlushBlockErr != CE_None) |
232 | 0 | { |
233 | 0 | CPLError(eFlushBlockErr, CPLE_AppDefined, |
234 | 0 | "An error occurred while writing a dirty block " |
235 | 0 | "from GDALRasterBand::IRasterIO"); |
236 | 0 | CPLErr eErr = eFlushBlockErr; |
237 | 0 | eFlushBlockErr = CE_None; |
238 | 0 | return eErr; |
239 | 0 | } |
240 | 0 | if (nBlockXSize <= 0 || nBlockYSize <= 0) |
241 | 0 | { |
242 | 0 | CPLError(CE_Failure, CPLE_AppDefined, "Invalid block size"); |
243 | 0 | return CE_Failure; |
244 | 0 | } |
245 | | |
246 | 0 | const int nBandDataSize = GDALGetDataTypeSizeBytes(eDataType); |
247 | 0 | const int nBufDataSize = GDALGetDataTypeSizeBytes(eBufType); |
248 | 0 | GByte dummyBlock[2] = {0, 0}; |
249 | 0 | GByte *pabySrcBlock = |
250 | 0 | dummyBlock; /* to avoid Coverity warning about nullptr dereference */ |
251 | 0 | GDALRasterBlock *poBlock = nullptr; |
252 | 0 | const bool bUseIntegerRequestCoords = |
253 | 0 | (!psExtraArg->bFloatingPointWindowValidity || |
254 | 0 | (nXOff == psExtraArg->dfXOff && nYOff == psExtraArg->dfYOff && |
255 | 0 | nXSize == psExtraArg->dfXSize && nYSize == psExtraArg->dfYSize)); |
256 | | |
257 | | /* ==================================================================== */ |
258 | | /* A common case is the data requested with the destination */ |
259 | | /* is packed, and the block width is the raster width. */ |
260 | | /* ==================================================================== */ |
261 | 0 | if (nPixelSpace == nBufDataSize && nLineSpace == nPixelSpace * nXSize && |
262 | 0 | nBlockXSize == GetXSize() && nBufXSize == nXSize && |
263 | 0 | nBufYSize == nYSize && bUseIntegerRequestCoords) |
264 | 0 | { |
265 | 0 | CPLErr eErr = CE_None; |
266 | 0 | int nLBlockY = -1; |
267 | |
|
268 | 0 | for (int iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff++) |
269 | 0 | { |
270 | 0 | const int iSrcY = iBufYOff + nYOff; |
271 | |
|
272 | 0 | if (iSrcY < nLBlockY * nBlockYSize || |
273 | 0 | iSrcY - nBlockYSize >= nLBlockY * nBlockYSize) |
274 | 0 | { |
275 | 0 | nLBlockY = iSrcY / nBlockYSize; |
276 | 0 | bool bJustInitialize = |
277 | 0 | eRWFlag == GF_Write && nXOff == 0 && |
278 | 0 | nXSize == nBlockXSize && nYOff <= nLBlockY * nBlockYSize && |
279 | 0 | nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize; |
280 | | |
281 | | // Is this a partial tile at right and/or bottom edges of |
282 | | // the raster, and that is going to be completely written? |
283 | | // If so, do not load it from storage, but zero it so that |
284 | | // the content outsize of the validity area is initialized. |
285 | 0 | bool bMemZeroBuffer = false; |
286 | 0 | if (eRWFlag == GF_Write && !bJustInitialize && nXOff == 0 && |
287 | 0 | nXSize == nBlockXSize && nYOff <= nLBlockY * nBlockYSize && |
288 | 0 | nYOff + nYSize == GetYSize() && |
289 | 0 | nLBlockY * nBlockYSize > GetYSize() - nBlockYSize) |
290 | 0 | { |
291 | 0 | bJustInitialize = true; |
292 | 0 | bMemZeroBuffer = true; |
293 | 0 | } |
294 | |
|
295 | 0 | if (poBlock) |
296 | 0 | poBlock->DropLock(); |
297 | |
|
298 | 0 | const GUInt32 nErrorCounter = CPLGetErrorCounter(); |
299 | 0 | poBlock = GetLockedBlockRef(0, nLBlockY, bJustInitialize); |
300 | 0 | if (poBlock == nullptr) |
301 | 0 | { |
302 | 0 | if (strstr(CPLGetLastErrorMsg(), "IReadBlock failed") == |
303 | 0 | nullptr) |
304 | 0 | { |
305 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
306 | 0 | "GetBlockRef failed at X block offset %d, " |
307 | 0 | "Y block offset %d%s", |
308 | 0 | 0, nLBlockY, |
309 | 0 | (nErrorCounter != CPLGetErrorCounter()) |
310 | 0 | ? CPLSPrintf(": %s", CPLGetLastErrorMsg()) |
311 | 0 | : ""); |
312 | 0 | } |
313 | 0 | eErr = CE_Failure; |
314 | 0 | break; |
315 | 0 | } |
316 | | |
317 | 0 | if (eRWFlag == GF_Write) |
318 | 0 | poBlock->MarkDirty(); |
319 | |
|
320 | 0 | pabySrcBlock = static_cast<GByte *>(poBlock->GetDataRef()); |
321 | 0 | if (bMemZeroBuffer) |
322 | 0 | { |
323 | 0 | memset(pabySrcBlock, 0, |
324 | 0 | static_cast<GPtrDiff_t>(nBandDataSize) * |
325 | 0 | nBlockXSize * nBlockYSize); |
326 | 0 | } |
327 | 0 | } |
328 | | |
329 | 0 | const auto nSrcByteOffset = |
330 | 0 | (static_cast<GPtrDiff_t>(iSrcY - nLBlockY * nBlockYSize) * |
331 | 0 | nBlockXSize + |
332 | 0 | nXOff) * |
333 | 0 | nBandDataSize; |
334 | |
|
335 | 0 | if (eDataType == eBufType) |
336 | 0 | { |
337 | 0 | if (eRWFlag == GF_Read) |
338 | 0 | memcpy(static_cast<GByte *>(pData) + |
339 | 0 | static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace, |
340 | 0 | pabySrcBlock + nSrcByteOffset, |
341 | 0 | static_cast<size_t>(nLineSpace)); |
342 | 0 | else |
343 | 0 | memcpy(pabySrcBlock + nSrcByteOffset, |
344 | 0 | static_cast<GByte *>(pData) + |
345 | 0 | static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace, |
346 | 0 | static_cast<size_t>(nLineSpace)); |
347 | 0 | } |
348 | 0 | else |
349 | 0 | { |
350 | | // Type to type conversion. |
351 | 0 | if (eRWFlag == GF_Read) |
352 | 0 | GDALCopyWords64( |
353 | 0 | pabySrcBlock + nSrcByteOffset, eDataType, nBandDataSize, |
354 | 0 | static_cast<GByte *>(pData) + |
355 | 0 | static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace, |
356 | 0 | eBufType, static_cast<int>(nPixelSpace), nBufXSize); |
357 | 0 | else |
358 | 0 | GDALCopyWords64(static_cast<GByte *>(pData) + |
359 | 0 | static_cast<GPtrDiff_t>(iBufYOff) * |
360 | 0 | nLineSpace, |
361 | 0 | eBufType, static_cast<int>(nPixelSpace), |
362 | 0 | pabySrcBlock + nSrcByteOffset, eDataType, |
363 | 0 | nBandDataSize, nBufXSize); |
364 | 0 | } |
365 | |
|
366 | 0 | if (psExtraArg->pfnProgress != nullptr && |
367 | 0 | !psExtraArg->pfnProgress(1.0 * (iBufYOff + 1) / nBufYSize, "", |
368 | 0 | psExtraArg->pProgressData)) |
369 | 0 | { |
370 | 0 | eErr = CE_Failure; |
371 | 0 | break; |
372 | 0 | } |
373 | 0 | } |
374 | |
|
375 | 0 | if (poBlock) |
376 | 0 | poBlock->DropLock(); |
377 | |
|
378 | 0 | return eErr; |
379 | 0 | } |
380 | | |
381 | | /* ==================================================================== */ |
382 | | /* Do we have overviews that would be appropriate to satisfy */ |
383 | | /* this request? */ |
384 | | /* ==================================================================== */ |
385 | 0 | if ((nBufXSize < nXSize || nBufYSize < nYSize) && GetOverviewCount() > 0 && |
386 | 0 | eRWFlag == GF_Read) |
387 | 0 | { |
388 | 0 | GDALRasterIOExtraArg sExtraArg; |
389 | 0 | GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg); |
390 | |
|
391 | 0 | const int nOverview = |
392 | 0 | GDALBandGetBestOverviewLevel2(this, nXOff, nYOff, nXSize, nYSize, |
393 | 0 | nBufXSize, nBufYSize, &sExtraArg); |
394 | 0 | if (nOverview >= 0) |
395 | 0 | { |
396 | 0 | GDALRasterBand *poOverviewBand = GetOverview(nOverview); |
397 | 0 | if (poOverviewBand == nullptr) |
398 | 0 | return CE_Failure; |
399 | | |
400 | 0 | return poOverviewBand->RasterIO( |
401 | 0 | eRWFlag, nXOff, nYOff, nXSize, nYSize, pData, nBufXSize, |
402 | 0 | nBufYSize, eBufType, nPixelSpace, nLineSpace, &sExtraArg); |
403 | 0 | } |
404 | 0 | } |
405 | | |
406 | 0 | if (eRWFlag == GF_Read && nBufXSize < nXSize / 100 && |
407 | 0 | nBufYSize < nYSize / 100 && nPixelSpace == nBufDataSize && |
408 | 0 | nLineSpace == nPixelSpace * nBufXSize && |
409 | 0 | CPLTestBool(CPLGetConfigOption("GDAL_NO_COSTLY_OVERVIEW", "NO"))) |
410 | 0 | { |
411 | 0 | memset(pData, 0, static_cast<size_t>(nLineSpace * nBufYSize)); |
412 | 0 | return CE_None; |
413 | 0 | } |
414 | | |
415 | | /* ==================================================================== */ |
416 | | /* The second case when we don't need subsample data but likely */ |
417 | | /* need data type conversion. */ |
418 | | /* ==================================================================== */ |
419 | 0 | if ( // nPixelSpace == nBufDataSize && |
420 | 0 | nXSize == nBufXSize && nYSize == nBufYSize && bUseIntegerRequestCoords) |
421 | 0 | { |
422 | | #if DEBUG_VERBOSE |
423 | | printf("IRasterIO(%d,%d,%d,%d) rw=%d case 2\n", /*ok*/ |
424 | | nXOff, nYOff, nXSize, nYSize, static_cast<int>(eRWFlag)); |
425 | | #endif |
426 | | |
427 | | /* -------------------------------------------------------------------- |
428 | | */ |
429 | | /* Loop over buffer computing source locations. */ |
430 | | /* -------------------------------------------------------------------- |
431 | | */ |
432 | | // Calculate starting values out of loop |
433 | 0 | const int nLBlockXStart = nXOff / nBlockXSize; |
434 | 0 | const int nXSpanEnd = nBufXSize + nXOff; |
435 | |
|
436 | 0 | int nYInc = 0; |
437 | 0 | for (int iBufYOff = 0, iSrcY = nYOff; iBufYOff < nBufYSize; |
438 | 0 | iBufYOff += nYInc, iSrcY += nYInc) |
439 | 0 | { |
440 | 0 | GPtrDiff_t iBufOffset = static_cast<GPtrDiff_t>(iBufYOff) * |
441 | 0 | static_cast<GPtrDiff_t>(nLineSpace); |
442 | 0 | int nLBlockY = iSrcY / nBlockYSize; |
443 | 0 | int nLBlockX = nLBlockXStart; |
444 | 0 | int iSrcX = nXOff; |
445 | 0 | while (iSrcX < nXSpanEnd) |
446 | 0 | { |
447 | 0 | int nXSpan = nLBlockX * nBlockXSize; |
448 | 0 | if (nXSpan < INT_MAX - nBlockXSize) |
449 | 0 | nXSpan += nBlockXSize; |
450 | 0 | else |
451 | 0 | nXSpan = INT_MAX; |
452 | 0 | const int nXRight = nXSpan; |
453 | 0 | nXSpan = (nXSpan < nXSpanEnd ? nXSpan : nXSpanEnd) - iSrcX; |
454 | |
|
455 | 0 | const size_t nXSpanSize = |
456 | 0 | CPLUnsanitizedMul(nXSpan, static_cast<size_t>(nPixelSpace)); |
457 | |
|
458 | 0 | bool bJustInitialize = |
459 | 0 | eRWFlag == GF_Write && nYOff <= nLBlockY * nBlockYSize && |
460 | 0 | nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize && |
461 | 0 | nXOff <= nLBlockX * nBlockXSize && |
462 | 0 | nXOff + nXSize >= nXRight; |
463 | | |
464 | | // Is this a partial tile at right and/or bottom edges of |
465 | | // the raster, and that is going to be completely written? |
466 | | // If so, do not load it from storage, but zero it so that |
467 | | // the content outsize of the validity area is initialized. |
468 | 0 | bool bMemZeroBuffer = false; |
469 | 0 | if (eRWFlag == GF_Write && !bJustInitialize && |
470 | 0 | nXOff <= nLBlockX * nBlockXSize && |
471 | 0 | nYOff <= nLBlockY * nBlockYSize && |
472 | 0 | (nXOff + nXSize >= nXRight || |
473 | | // cppcheck-suppress knownConditionTrueFalse |
474 | 0 | (nXOff + nXSize == GetXSize() && nXRight > GetXSize())) && |
475 | 0 | (nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize || |
476 | 0 | (nYOff + nYSize == GetYSize() && |
477 | 0 | nLBlockY * nBlockYSize > GetYSize() - nBlockYSize))) |
478 | 0 | { |
479 | 0 | bJustInitialize = true; |
480 | 0 | bMemZeroBuffer = true; |
481 | 0 | } |
482 | | |
483 | | /* -------------------------------------------------------------------- |
484 | | */ |
485 | | /* Ensure we have the appropriate block loaded. */ |
486 | | /* -------------------------------------------------------------------- |
487 | | */ |
488 | 0 | const GUInt32 nErrorCounter = CPLGetErrorCounter(); |
489 | 0 | poBlock = |
490 | 0 | GetLockedBlockRef(nLBlockX, nLBlockY, bJustInitialize); |
491 | 0 | if (!poBlock) |
492 | 0 | { |
493 | 0 | if (strstr(CPLGetLastErrorMsg(), "IReadBlock failed") == |
494 | 0 | nullptr) |
495 | 0 | { |
496 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
497 | 0 | "GetBlockRef failed at X block offset %d, " |
498 | 0 | "Y block offset %d%s", |
499 | 0 | nLBlockX, nLBlockY, |
500 | 0 | (nErrorCounter != CPLGetErrorCounter()) |
501 | 0 | ? CPLSPrintf(": %s", CPLGetLastErrorMsg()) |
502 | 0 | : ""); |
503 | 0 | } |
504 | 0 | return (CE_Failure); |
505 | 0 | } |
506 | | |
507 | 0 | if (eRWFlag == GF_Write) |
508 | 0 | poBlock->MarkDirty(); |
509 | |
|
510 | 0 | pabySrcBlock = static_cast<GByte *>(poBlock->GetDataRef()); |
511 | 0 | if (bMemZeroBuffer) |
512 | 0 | { |
513 | 0 | memset(pabySrcBlock, 0, |
514 | 0 | static_cast<GPtrDiff_t>(nBandDataSize) * |
515 | 0 | nBlockXSize * nBlockYSize); |
516 | 0 | } |
517 | | /* -------------------------------------------------------------------- |
518 | | */ |
519 | | /* Copy over this chunk of data. */ |
520 | | /* -------------------------------------------------------------------- |
521 | | */ |
522 | 0 | GPtrDiff_t iSrcOffset = |
523 | 0 | (static_cast<GPtrDiff_t>(iSrcX) - |
524 | 0 | static_cast<GPtrDiff_t>(nLBlockX * nBlockXSize) + |
525 | 0 | (static_cast<GPtrDiff_t>(iSrcY) - |
526 | 0 | static_cast<GPtrDiff_t>(nLBlockY) * nBlockYSize) * |
527 | 0 | nBlockXSize) * |
528 | 0 | nBandDataSize; |
529 | | // Fill up as many rows as possible for the loaded block. |
530 | 0 | const int kmax = std::min(nBlockYSize - (iSrcY % nBlockYSize), |
531 | 0 | nBufYSize - iBufYOff); |
532 | 0 | for (int k = 0; k < kmax; k++) |
533 | 0 | { |
534 | 0 | if (eDataType == eBufType && nPixelSpace == nBufDataSize) |
535 | 0 | { |
536 | 0 | if (eRWFlag == GF_Read) |
537 | 0 | memcpy(static_cast<GByte *>(pData) + iBufOffset + |
538 | 0 | static_cast<GPtrDiff_t>(k) * nLineSpace, |
539 | 0 | pabySrcBlock + iSrcOffset, nXSpanSize); |
540 | 0 | else |
541 | 0 | memcpy(pabySrcBlock + iSrcOffset, |
542 | 0 | static_cast<GByte *>(pData) + iBufOffset + |
543 | 0 | static_cast<GPtrDiff_t>(k) * nLineSpace, |
544 | 0 | nXSpanSize); |
545 | 0 | } |
546 | 0 | else |
547 | 0 | { |
548 | | /* type to type conversion */ |
549 | 0 | if (eRWFlag == GF_Read) |
550 | 0 | GDALCopyWords64( |
551 | 0 | pabySrcBlock + iSrcOffset, eDataType, |
552 | 0 | nBandDataSize, |
553 | 0 | static_cast<GByte *>(pData) + iBufOffset + |
554 | 0 | static_cast<GPtrDiff_t>(k) * nLineSpace, |
555 | 0 | eBufType, static_cast<int>(nPixelSpace), |
556 | 0 | nXSpan); |
557 | 0 | else |
558 | 0 | GDALCopyWords64( |
559 | 0 | static_cast<GByte *>(pData) + iBufOffset + |
560 | 0 | static_cast<GPtrDiff_t>(k) * nLineSpace, |
561 | 0 | eBufType, static_cast<int>(nPixelSpace), |
562 | 0 | pabySrcBlock + iSrcOffset, eDataType, |
563 | 0 | nBandDataSize, nXSpan); |
564 | 0 | } |
565 | |
|
566 | 0 | iSrcOffset += |
567 | 0 | static_cast<GPtrDiff_t>(nBlockXSize) * nBandDataSize; |
568 | 0 | } |
569 | |
|
570 | 0 | iBufOffset = |
571 | 0 | CPLUnsanitizedAdd<GPtrDiff_t>(iBufOffset, nXSpanSize); |
572 | 0 | nLBlockX++; |
573 | 0 | iSrcX += nXSpan; |
574 | |
|
575 | 0 | poBlock->DropLock(); |
576 | 0 | poBlock = nullptr; |
577 | 0 | } |
578 | | |
579 | | /* Compute the increment to go on a block boundary */ |
580 | 0 | nYInc = nBlockYSize - (iSrcY % nBlockYSize); |
581 | |
|
582 | 0 | if (psExtraArg->pfnProgress != nullptr && |
583 | 0 | !psExtraArg->pfnProgress( |
584 | 0 | 1.0 * std::min(nBufYSize, iBufYOff + nYInc) / nBufYSize, "", |
585 | 0 | psExtraArg->pProgressData)) |
586 | 0 | { |
587 | 0 | return CE_Failure; |
588 | 0 | } |
589 | 0 | } |
590 | | |
591 | 0 | return CE_None; |
592 | 0 | } |
593 | | |
594 | | /* ==================================================================== */ |
595 | | /* Loop reading required source blocks to satisfy output */ |
596 | | /* request. This is the most general implementation. */ |
597 | | /* ==================================================================== */ |
598 | | |
599 | 0 | double dfXOff = nXOff; |
600 | 0 | double dfYOff = nYOff; |
601 | 0 | double dfXSize = nXSize; |
602 | 0 | double dfYSize = nYSize; |
603 | 0 | if (psExtraArg->bFloatingPointWindowValidity) |
604 | 0 | { |
605 | 0 | dfXOff = psExtraArg->dfXOff; |
606 | 0 | dfYOff = psExtraArg->dfYOff; |
607 | 0 | dfXSize = psExtraArg->dfXSize; |
608 | 0 | dfYSize = psExtraArg->dfYSize; |
609 | 0 | } |
610 | | |
611 | | /* -------------------------------------------------------------------- */ |
612 | | /* Compute stepping increment. */ |
613 | | /* -------------------------------------------------------------------- */ |
614 | 0 | const double dfSrcXInc = dfXSize / static_cast<double>(nBufXSize); |
615 | 0 | const double dfSrcYInc = dfYSize / static_cast<double>(nBufYSize); |
616 | 0 | CPLErr eErr = CE_None; |
617 | |
|
618 | 0 | if (eRWFlag == GF_Write) |
619 | 0 | { |
620 | | /* -------------------------------------------------------------------- |
621 | | */ |
622 | | /* Write case */ |
623 | | /* Loop over raster window computing source locations in the buffer. |
624 | | */ |
625 | | /* -------------------------------------------------------------------- |
626 | | */ |
627 | 0 | GByte *pabyDstBlock = nullptr; |
628 | 0 | int nLBlockX = -1; |
629 | 0 | int nLBlockY = -1; |
630 | |
|
631 | 0 | for (int iDstY = nYOff; iDstY < nYOff + nYSize; iDstY++) |
632 | 0 | { |
633 | 0 | const int iBufYOff = static_cast<int>((iDstY - nYOff) / dfSrcYInc); |
634 | |
|
635 | 0 | for (int iDstX = nXOff; iDstX < nXOff + nXSize; iDstX++) |
636 | 0 | { |
637 | 0 | const int iBufXOff = |
638 | 0 | static_cast<int>((iDstX - nXOff) / dfSrcXInc); |
639 | 0 | GPtrDiff_t iBufOffset = |
640 | 0 | static_cast<GPtrDiff_t>(iBufYOff) * |
641 | 0 | static_cast<GPtrDiff_t>(nLineSpace) + |
642 | 0 | iBufXOff * static_cast<GPtrDiff_t>(nPixelSpace); |
643 | | |
644 | | // FIXME: this code likely doesn't work if the dirty block gets |
645 | | // flushed to disk before being completely written. |
646 | | // In the meantime, bJustInitialize should probably be set to |
647 | | // FALSE even if it is not ideal performance wise, and for |
648 | | // lossy compression. |
649 | | |
650 | | /* -------------------------------------------------------------------- |
651 | | */ |
652 | | /* Ensure we have the appropriate block loaded. */ |
653 | | /* -------------------------------------------------------------------- |
654 | | */ |
655 | 0 | if (iDstX < nLBlockX * nBlockXSize || |
656 | 0 | iDstX - nBlockXSize >= nLBlockX * nBlockXSize || |
657 | 0 | iDstY < nLBlockY * nBlockYSize || |
658 | 0 | iDstY - nBlockYSize >= nLBlockY * nBlockYSize) |
659 | 0 | { |
660 | 0 | nLBlockX = iDstX / nBlockXSize; |
661 | 0 | nLBlockY = iDstY / nBlockYSize; |
662 | |
|
663 | 0 | const bool bJustInitialize = |
664 | 0 | nYOff <= nLBlockY * nBlockYSize && |
665 | 0 | nYOff + nYSize - nBlockYSize >= |
666 | 0 | nLBlockY * nBlockYSize && |
667 | 0 | nXOff <= nLBlockX * nBlockXSize && |
668 | 0 | nXOff + nXSize - nBlockXSize >= nLBlockX * nBlockXSize; |
669 | | /*bool bMemZeroBuffer = FALSE; |
670 | | if( !bJustInitialize && |
671 | | nXOff <= nLBlockX * nBlockXSize && |
672 | | nYOff <= nLBlockY * nBlockYSize && |
673 | | (nXOff + nXSize >= (nLBlockX+1) * nBlockXSize || |
674 | | (nXOff + nXSize == GetXSize() && |
675 | | (nLBlockX+1) * nBlockXSize > GetXSize())) && |
676 | | (nYOff + nYSize >= (nLBlockY+1) * nBlockYSize || |
677 | | (nYOff + nYSize == GetYSize() && |
678 | | (nLBlockY+1) * nBlockYSize > GetYSize())) ) |
679 | | { |
680 | | bJustInitialize = TRUE; |
681 | | bMemZeroBuffer = TRUE; |
682 | | }*/ |
683 | 0 | if (poBlock != nullptr) |
684 | 0 | poBlock->DropLock(); |
685 | |
|
686 | 0 | poBlock = |
687 | 0 | GetLockedBlockRef(nLBlockX, nLBlockY, bJustInitialize); |
688 | 0 | if (poBlock == nullptr) |
689 | 0 | { |
690 | 0 | return (CE_Failure); |
691 | 0 | } |
692 | | |
693 | 0 | poBlock->MarkDirty(); |
694 | |
|
695 | 0 | pabyDstBlock = static_cast<GByte *>(poBlock->GetDataRef()); |
696 | | /*if( bMemZeroBuffer ) |
697 | | { |
698 | | memset(pabyDstBlock, 0, |
699 | | static_cast<GPtrDiff_t>(nBandDataSize) * nBlockXSize |
700 | | * nBlockYSize); |
701 | | }*/ |
702 | 0 | } |
703 | | |
704 | | // To make Coverity happy. Should not happen by design. |
705 | 0 | if (pabyDstBlock == nullptr) |
706 | 0 | { |
707 | 0 | CPLAssert(false); |
708 | 0 | eErr = CE_Failure; |
709 | 0 | break; |
710 | 0 | } |
711 | | |
712 | | /* -------------------------------------------------------------------- |
713 | | */ |
714 | | /* Copy over this pixel of data. */ |
715 | | /* -------------------------------------------------------------------- |
716 | | */ |
717 | 0 | GPtrDiff_t iDstOffset = |
718 | 0 | (static_cast<GPtrDiff_t>(iDstX) - |
719 | 0 | static_cast<GPtrDiff_t>(nLBlockX) * nBlockXSize + |
720 | 0 | (static_cast<GPtrDiff_t>(iDstY) - |
721 | 0 | static_cast<GPtrDiff_t>(nLBlockY) * nBlockYSize) * |
722 | 0 | nBlockXSize) * |
723 | 0 | nBandDataSize; |
724 | |
|
725 | 0 | if (eDataType == eBufType) |
726 | 0 | { |
727 | 0 | memcpy(pabyDstBlock + iDstOffset, |
728 | 0 | static_cast<GByte *>(pData) + iBufOffset, |
729 | 0 | nBandDataSize); |
730 | 0 | } |
731 | 0 | else |
732 | 0 | { |
733 | | /* type to type conversion ... ouch, this is expensive way |
734 | | of handling single words */ |
735 | 0 | GDALCopyWords64(static_cast<GByte *>(pData) + iBufOffset, |
736 | 0 | eBufType, 0, pabyDstBlock + iDstOffset, |
737 | 0 | eDataType, 0, 1); |
738 | 0 | } |
739 | 0 | } |
740 | | |
741 | 0 | if (psExtraArg->pfnProgress != nullptr && |
742 | 0 | !psExtraArg->pfnProgress(1.0 * (iDstY - nYOff + 1) / nYSize, "", |
743 | 0 | psExtraArg->pProgressData)) |
744 | 0 | { |
745 | 0 | eErr = CE_Failure; |
746 | 0 | break; |
747 | 0 | } |
748 | 0 | } |
749 | 0 | } |
750 | 0 | else |
751 | 0 | { |
752 | 0 | if (psExtraArg->eResampleAlg != GRIORA_NearestNeighbour) |
753 | 0 | { |
754 | 0 | if ((psExtraArg->eResampleAlg == GRIORA_Cubic || |
755 | 0 | psExtraArg->eResampleAlg == GRIORA_CubicSpline || |
756 | 0 | psExtraArg->eResampleAlg == GRIORA_Bilinear || |
757 | 0 | psExtraArg->eResampleAlg == GRIORA_Lanczos) && |
758 | 0 | GetColorTable() != nullptr) |
759 | 0 | { |
760 | 0 | CPLError(CE_Warning, CPLE_NotSupported, |
761 | 0 | "Resampling method not supported on paletted band. " |
762 | 0 | "Falling back to nearest neighbour"); |
763 | 0 | } |
764 | 0 | else if (psExtraArg->eResampleAlg == GRIORA_Gauss && |
765 | 0 | GDALDataTypeIsComplex(eDataType)) |
766 | 0 | { |
767 | 0 | CPLError(CE_Warning, CPLE_NotSupported, |
768 | 0 | "Resampling method not supported on complex data type " |
769 | 0 | "band. Falling back to nearest neighbour"); |
770 | 0 | } |
771 | 0 | else |
772 | 0 | { |
773 | 0 | return RasterIOResampled(eRWFlag, nXOff, nYOff, nXSize, nYSize, |
774 | 0 | pData, nBufXSize, nBufYSize, eBufType, |
775 | 0 | nPixelSpace, nLineSpace, psExtraArg); |
776 | 0 | } |
777 | 0 | } |
778 | | |
779 | 0 | int nLimitBlockY = 0; |
780 | 0 | const bool bByteCopy = eDataType == eBufType && nBandDataSize == 1; |
781 | 0 | int nStartBlockX = -nBlockXSize; |
782 | 0 | constexpr double EPS = 1e-10; |
783 | 0 | int nLBlockY = -1; |
784 | 0 | const double dfSrcXStart = 0.5 * dfSrcXInc + dfXOff + EPS; |
785 | 0 | const bool bIntegerXFactor = |
786 | 0 | bUseIntegerRequestCoords && |
787 | 0 | static_cast<int>(dfSrcXInc) == dfSrcXInc && |
788 | 0 | static_cast<int>(dfSrcXInc) < INT_MAX / nBandDataSize; |
789 | | |
790 | | /* -------------------------------------------------------------------- |
791 | | */ |
792 | | /* Read case */ |
793 | | /* Loop over buffer computing source locations. */ |
794 | | /* -------------------------------------------------------------------- |
795 | | */ |
796 | 0 | for (int iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff++) |
797 | 0 | { |
798 | | // Add small epsilon to avoid some numeric precision issues. |
799 | 0 | const double dfSrcY = (iBufYOff + 0.5) * dfSrcYInc + dfYOff + EPS; |
800 | 0 | const int iSrcY = static_cast<int>(std::min( |
801 | 0 | std::max(0.0, dfSrcY), static_cast<double>(nRasterYSize - 1))); |
802 | |
|
803 | 0 | GPtrDiff_t iBufOffset = static_cast<GPtrDiff_t>(iBufYOff) * |
804 | 0 | static_cast<GPtrDiff_t>(nLineSpace); |
805 | |
|
806 | 0 | if (iSrcY >= nLimitBlockY) |
807 | 0 | { |
808 | 0 | nLBlockY = iSrcY / nBlockYSize; |
809 | 0 | nLimitBlockY = nLBlockY * nBlockYSize; |
810 | 0 | if (nLimitBlockY < INT_MAX - nBlockYSize) |
811 | 0 | nLimitBlockY += nBlockYSize; |
812 | 0 | else |
813 | 0 | nLimitBlockY = INT_MAX; |
814 | | // Make sure a new block is loaded. |
815 | 0 | nStartBlockX = -nBlockXSize; |
816 | 0 | } |
817 | 0 | else if (static_cast<int>(dfSrcXStart) < nStartBlockX) |
818 | 0 | { |
819 | | // Make sure a new block is loaded. |
820 | 0 | nStartBlockX = -nBlockXSize; |
821 | 0 | } |
822 | |
|
823 | 0 | GPtrDiff_t iSrcOffsetCst = (iSrcY - nLBlockY * nBlockYSize) * |
824 | 0 | static_cast<GPtrDiff_t>(nBlockXSize); |
825 | |
|
826 | 0 | if (bIntegerXFactor) |
827 | 0 | { |
828 | 0 | int iSrcX = static_cast<int>(dfSrcXStart); |
829 | 0 | const int nSrcXInc = static_cast<int>(dfSrcXInc); |
830 | 0 | GByte *pabyDstData = static_cast<GByte *>(pData) + iBufOffset; |
831 | 0 | bool bRet = false; |
832 | 0 | if (bByteCopy) |
833 | 0 | { |
834 | 0 | bRet = DownsamplingIntegerXFactor<true, 1>( |
835 | 0 | this, iSrcX, nSrcXInc, iSrcOffsetCst, pabyDstData, |
836 | 0 | static_cast<int>(nPixelSpace), nBufXSize, GDT_UInt8, |
837 | 0 | GDT_UInt8, nStartBlockX, nBlockXSize, poBlock, |
838 | 0 | nLBlockY); |
839 | 0 | } |
840 | 0 | else if (eDataType == eBufType) |
841 | 0 | { |
842 | 0 | switch (nBandDataSize) |
843 | 0 | { |
844 | 0 | case 2: |
845 | 0 | bRet = DownsamplingIntegerXFactor<true, 2>( |
846 | 0 | this, iSrcX, nSrcXInc, iSrcOffsetCst, |
847 | 0 | pabyDstData, static_cast<int>(nPixelSpace), |
848 | 0 | nBufXSize, eDataType, eDataType, nStartBlockX, |
849 | 0 | nBlockXSize, poBlock, nLBlockY); |
850 | 0 | break; |
851 | 0 | case 4: |
852 | 0 | bRet = DownsamplingIntegerXFactor<true, 4>( |
853 | 0 | this, iSrcX, nSrcXInc, iSrcOffsetCst, |
854 | 0 | pabyDstData, static_cast<int>(nPixelSpace), |
855 | 0 | nBufXSize, eDataType, eDataType, nStartBlockX, |
856 | 0 | nBlockXSize, poBlock, nLBlockY); |
857 | 0 | break; |
858 | 0 | case 8: |
859 | 0 | bRet = DownsamplingIntegerXFactor<true, 8>( |
860 | 0 | this, iSrcX, nSrcXInc, iSrcOffsetCst, |
861 | 0 | pabyDstData, static_cast<int>(nPixelSpace), |
862 | 0 | nBufXSize, eDataType, eDataType, nStartBlockX, |
863 | 0 | nBlockXSize, poBlock, nLBlockY); |
864 | 0 | break; |
865 | 0 | case 16: |
866 | 0 | bRet = DownsamplingIntegerXFactor<true, 16>( |
867 | 0 | this, iSrcX, nSrcXInc, iSrcOffsetCst, |
868 | 0 | pabyDstData, static_cast<int>(nPixelSpace), |
869 | 0 | nBufXSize, eDataType, eDataType, nStartBlockX, |
870 | 0 | nBlockXSize, poBlock, nLBlockY); |
871 | 0 | break; |
872 | 0 | default: |
873 | 0 | CPLAssert(false); |
874 | 0 | break; |
875 | 0 | } |
876 | 0 | } |
877 | 0 | else |
878 | 0 | { |
879 | 0 | bRet = DownsamplingIntegerXFactor<false, 0>( |
880 | 0 | this, iSrcX, nSrcXInc, iSrcOffsetCst, pabyDstData, |
881 | 0 | static_cast<int>(nPixelSpace), nBufXSize, eDataType, |
882 | 0 | eBufType, nStartBlockX, nBlockXSize, poBlock, nLBlockY); |
883 | 0 | } |
884 | 0 | if (!bRet) |
885 | 0 | eErr = CE_Failure; |
886 | 0 | } |
887 | 0 | else |
888 | 0 | { |
889 | 0 | double dfSrcX = dfSrcXStart; |
890 | 0 | for (int iBufXOff = 0; iBufXOff < nBufXSize; |
891 | 0 | iBufXOff++, dfSrcX += dfSrcXInc) |
892 | 0 | { |
893 | | // TODO?: try to avoid the clamping for most iterations |
894 | 0 | const int iSrcX = static_cast<int>( |
895 | 0 | std::min(std::max(0.0, dfSrcX), |
896 | 0 | static_cast<double>(nRasterXSize - 1))); |
897 | | |
898 | | /* -------------------------------------------------------------------- |
899 | | */ |
900 | | /* Ensure we have the appropriate block loaded. */ |
901 | | /* -------------------------------------------------------------------- |
902 | | */ |
903 | 0 | if (iSrcX >= nBlockXSize + nStartBlockX) |
904 | 0 | { |
905 | 0 | const int nLBlockX = iSrcX / nBlockXSize; |
906 | 0 | nStartBlockX = nLBlockX * nBlockXSize; |
907 | |
|
908 | 0 | if (poBlock != nullptr) |
909 | 0 | poBlock->DropLock(); |
910 | |
|
911 | 0 | poBlock = GetLockedBlockRef(nLBlockX, nLBlockY, FALSE); |
912 | 0 | if (poBlock == nullptr) |
913 | 0 | { |
914 | 0 | eErr = CE_Failure; |
915 | 0 | break; |
916 | 0 | } |
917 | | |
918 | 0 | pabySrcBlock = |
919 | 0 | static_cast<GByte *>(poBlock->GetDataRef()); |
920 | 0 | } |
921 | 0 | const GPtrDiff_t nDiffX = |
922 | 0 | static_cast<GPtrDiff_t>(iSrcX - nStartBlockX); |
923 | | |
924 | | /* -------------------------------------------------------------------- |
925 | | */ |
926 | | /* Copy over this pixel of data. */ |
927 | | /* -------------------------------------------------------------------- |
928 | | */ |
929 | |
|
930 | 0 | if (bByteCopy) |
931 | 0 | { |
932 | 0 | GPtrDiff_t iSrcOffset = nDiffX + iSrcOffsetCst; |
933 | 0 | static_cast<GByte *>(pData)[iBufOffset] = |
934 | 0 | pabySrcBlock[iSrcOffset]; |
935 | 0 | } |
936 | 0 | else if (eDataType == eBufType) |
937 | 0 | { |
938 | 0 | GPtrDiff_t iSrcOffset = |
939 | 0 | (nDiffX + iSrcOffsetCst) * nBandDataSize; |
940 | 0 | memcpy(static_cast<GByte *>(pData) + iBufOffset, |
941 | 0 | pabySrcBlock + iSrcOffset, nBandDataSize); |
942 | 0 | } |
943 | 0 | else |
944 | 0 | { |
945 | | // Type to type conversion ... |
946 | 0 | GPtrDiff_t iSrcOffset = |
947 | 0 | (nDiffX + iSrcOffsetCst) * nBandDataSize; |
948 | 0 | GDALCopyWords64(pabySrcBlock + iSrcOffset, eDataType, 0, |
949 | 0 | static_cast<GByte *>(pData) + |
950 | 0 | iBufOffset, |
951 | 0 | eBufType, 0, 1); |
952 | 0 | } |
953 | |
|
954 | 0 | iBufOffset += static_cast<int>(nPixelSpace); |
955 | 0 | } |
956 | 0 | } |
957 | 0 | if (eErr == CE_Failure) |
958 | 0 | break; |
959 | | |
960 | 0 | if (psExtraArg->pfnProgress != nullptr && |
961 | 0 | !psExtraArg->pfnProgress(1.0 * (iBufYOff + 1) / nBufYSize, "", |
962 | 0 | psExtraArg->pProgressData)) |
963 | 0 | { |
964 | 0 | eErr = CE_Failure; |
965 | 0 | break; |
966 | 0 | } |
967 | 0 | } |
968 | 0 | } |
969 | | |
970 | 0 | if (poBlock != nullptr) |
971 | 0 | poBlock->DropLock(); |
972 | |
|
973 | 0 | return eErr; |
974 | 0 | } |
975 | | |
976 | | /************************************************************************/ |
977 | | /* GDALRasterIOTransformer() */ |
978 | | /************************************************************************/ |
979 | | |
980 | | struct GDALRasterIOTransformerStruct |
981 | | { |
982 | | double dfXOff; |
983 | | double dfYOff; |
984 | | double dfXRatioDstToSrc; |
985 | | double dfYRatioDstToSrc; |
986 | | }; |
987 | | |
988 | | static int GDALRasterIOTransformer(void *pTransformerArg, int bDstToSrc, |
989 | | int nPointCount, double *x, double *y, |
990 | | double * /* z */, int *panSuccess) |
991 | 0 | { |
992 | 0 | GDALRasterIOTransformerStruct *psParams = |
993 | 0 | static_cast<GDALRasterIOTransformerStruct *>(pTransformerArg); |
994 | 0 | if (bDstToSrc) |
995 | 0 | { |
996 | 0 | for (int i = 0; i < nPointCount; i++) |
997 | 0 | { |
998 | 0 | x[i] = x[i] * psParams->dfXRatioDstToSrc + psParams->dfXOff; |
999 | 0 | y[i] = y[i] * psParams->dfYRatioDstToSrc + psParams->dfYOff; |
1000 | 0 | panSuccess[i] = TRUE; |
1001 | 0 | } |
1002 | 0 | } |
1003 | 0 | else |
1004 | 0 | { |
1005 | 0 | for (int i = 0; i < nPointCount; i++) |
1006 | 0 | { |
1007 | 0 | x[i] = (x[i] - psParams->dfXOff) / psParams->dfXRatioDstToSrc; |
1008 | 0 | y[i] = (y[i] - psParams->dfYOff) / psParams->dfYRatioDstToSrc; |
1009 | 0 | panSuccess[i] = TRUE; |
1010 | 0 | } |
1011 | 0 | } |
1012 | 0 | return TRUE; |
1013 | 0 | } |
1014 | | |
1015 | | /************************************************************************/ |
1016 | | /* RasterIOResampled() */ |
1017 | | /************************************************************************/ |
1018 | | |
1019 | | //! @cond Doxygen_Suppress |
1020 | | CPLErr GDALRasterBand::RasterIOResampled( |
1021 | | GDALRWFlag /* eRWFlag */, int nXOff, int nYOff, int nXSize, int nYSize, |
1022 | | void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType, |
1023 | | GSpacing nPixelSpace, GSpacing nLineSpace, GDALRasterIOExtraArg *psExtraArg) |
1024 | 0 | { |
1025 | | // Determine if we use warping resampling or overview resampling |
1026 | 0 | const bool bUseWarp = |
1027 | 0 | (GDALDataTypeIsComplex(eDataType) && |
1028 | 0 | psExtraArg->eResampleAlg != GRIORA_NearestNeighbour && |
1029 | 0 | psExtraArg->eResampleAlg != GRIORA_Mode); |
1030 | |
|
1031 | 0 | double dfXOff = nXOff; |
1032 | 0 | double dfYOff = nYOff; |
1033 | 0 | double dfXSize = nXSize; |
1034 | 0 | double dfYSize = nYSize; |
1035 | 0 | if (psExtraArg->bFloatingPointWindowValidity) |
1036 | 0 | { |
1037 | 0 | dfXOff = psExtraArg->dfXOff; |
1038 | 0 | dfYOff = psExtraArg->dfYOff; |
1039 | 0 | dfXSize = psExtraArg->dfXSize; |
1040 | 0 | dfYSize = psExtraArg->dfYSize; |
1041 | 0 | } |
1042 | |
|
1043 | 0 | const double dfXRatioDstToSrc = dfXSize / nBufXSize; |
1044 | 0 | const double dfYRatioDstToSrc = dfYSize / nBufYSize; |
1045 | | |
1046 | | // Determine the coordinates in the "virtual" output raster to see |
1047 | | // if there are not integers, in which case we will use them as a shift |
1048 | | // so that subwindow extracts give the exact same results as entire raster |
1049 | | // scaling. |
1050 | 0 | double dfDestXOff = dfXOff / dfXRatioDstToSrc; |
1051 | 0 | bool bHasXOffVirtual = false; |
1052 | 0 | int nDestXOffVirtual = 0; |
1053 | 0 | if (fabs(dfDestXOff - static_cast<int>(dfDestXOff + 0.5)) < 1e-8) |
1054 | 0 | { |
1055 | 0 | bHasXOffVirtual = true; |
1056 | 0 | dfXOff = nXOff; |
1057 | 0 | nDestXOffVirtual = static_cast<int>(dfDestXOff + 0.5); |
1058 | 0 | } |
1059 | |
|
1060 | 0 | double dfDestYOff = dfYOff / dfYRatioDstToSrc; |
1061 | 0 | bool bHasYOffVirtual = false; |
1062 | 0 | int nDestYOffVirtual = 0; |
1063 | 0 | if (fabs(dfDestYOff - static_cast<int>(dfDestYOff + 0.5)) < 1e-8) |
1064 | 0 | { |
1065 | 0 | bHasYOffVirtual = true; |
1066 | 0 | dfYOff = nYOff; |
1067 | 0 | nDestYOffVirtual = static_cast<int>(dfDestYOff + 0.5); |
1068 | 0 | } |
1069 | | |
1070 | | // Create a MEM dataset that wraps the output buffer. |
1071 | 0 | GDALDataset *poMEMDS; |
1072 | 0 | void *pTempBuffer = nullptr; |
1073 | 0 | GSpacing nPSMem = nPixelSpace; |
1074 | 0 | GSpacing nLSMem = nLineSpace; |
1075 | 0 | void *pDataMem = pData; |
1076 | 0 | GDALDataType eDTMem = eBufType; |
1077 | 0 | if (eBufType != eDataType) |
1078 | 0 | { |
1079 | 0 | nPSMem = GDALGetDataTypeSizeBytes(eDataType); |
1080 | 0 | nLSMem = nPSMem * nBufXSize; |
1081 | 0 | pTempBuffer = |
1082 | 0 | VSI_MALLOC2_VERBOSE(nBufYSize, static_cast<size_t>(nLSMem)); |
1083 | 0 | if (pTempBuffer == nullptr) |
1084 | 0 | return CE_Failure; |
1085 | 0 | pDataMem = pTempBuffer; |
1086 | 0 | eDTMem = eDataType; |
1087 | 0 | } |
1088 | | |
1089 | 0 | poMEMDS = |
1090 | 0 | MEMDataset::Create("", nDestXOffVirtual + nBufXSize, |
1091 | 0 | nDestYOffVirtual + nBufYSize, 0, eDTMem, nullptr); |
1092 | 0 | GByte *pabyData = static_cast<GByte *>(pDataMem) - |
1093 | 0 | nPSMem * nDestXOffVirtual - nLSMem * nDestYOffVirtual; |
1094 | 0 | GDALRasterBandH hMEMBand = MEMCreateRasterBandEx( |
1095 | 0 | poMEMDS, 1, pabyData, eDTMem, nPSMem, nLSMem, false); |
1096 | 0 | poMEMDS->SetBand(1, GDALRasterBand::FromHandle(hMEMBand)); |
1097 | |
|
1098 | 0 | const char *pszNBITS = GetMetadataItem("NBITS", "IMAGE_STRUCTURE"); |
1099 | 0 | const int nNBITS = pszNBITS ? atoi(pszNBITS) : 0; |
1100 | 0 | if (pszNBITS) |
1101 | 0 | GDALRasterBand::FromHandle(hMEMBand)->SetMetadataItem( |
1102 | 0 | "NBITS", pszNBITS, "IMAGE_STRUCTURE"); |
1103 | |
|
1104 | 0 | CPLErr eErr = CE_None; |
1105 | | |
1106 | | // Do the resampling. |
1107 | 0 | if (bUseWarp) |
1108 | 0 | { |
1109 | 0 | int bHasNoData = FALSE; |
1110 | 0 | double dfNoDataValue = GetNoDataValue(&bHasNoData); |
1111 | |
|
1112 | 0 | VRTDatasetH hVRTDS = nullptr; |
1113 | 0 | GDALRasterBandH hVRTBand = nullptr; |
1114 | 0 | if (GetDataset() == nullptr) |
1115 | 0 | { |
1116 | | /* Create VRT dataset that wraps the whole dataset */ |
1117 | 0 | hVRTDS = VRTCreate(nRasterXSize, nRasterYSize); |
1118 | 0 | VRTAddBand(hVRTDS, eDataType, nullptr); |
1119 | 0 | hVRTBand = GDALGetRasterBand(hVRTDS, 1); |
1120 | 0 | VRTAddSimpleSource(hVRTBand, this, 0, 0, nRasterXSize, nRasterYSize, |
1121 | 0 | 0, 0, nRasterXSize, nRasterYSize, nullptr, |
1122 | 0 | VRT_NODATA_UNSET); |
1123 | | |
1124 | | /* Add a mask band if needed */ |
1125 | 0 | if (GetMaskFlags() != GMF_ALL_VALID) |
1126 | 0 | { |
1127 | 0 | GDALDataset::FromHandle(hVRTDS)->CreateMaskBand(0); |
1128 | 0 | VRTSourcedRasterBand *poVRTMaskBand = |
1129 | 0 | reinterpret_cast<VRTSourcedRasterBand *>( |
1130 | 0 | reinterpret_cast<GDALRasterBand *>(hVRTBand) |
1131 | 0 | ->GetMaskBand()); |
1132 | 0 | poVRTMaskBand->AddMaskBandSource(this, 0, 0, nRasterXSize, |
1133 | 0 | nRasterYSize, 0, 0, |
1134 | 0 | nRasterXSize, nRasterYSize); |
1135 | 0 | } |
1136 | 0 | } |
1137 | |
|
1138 | 0 | GDALWarpOptions *psWarpOptions = GDALCreateWarpOptions(); |
1139 | 0 | switch (psExtraArg->eResampleAlg) |
1140 | 0 | { |
1141 | 0 | case GRIORA_NearestNeighbour: |
1142 | 0 | psWarpOptions->eResampleAlg = GRA_NearestNeighbour; |
1143 | 0 | break; |
1144 | 0 | case GRIORA_Bilinear: |
1145 | 0 | psWarpOptions->eResampleAlg = GRA_Bilinear; |
1146 | 0 | break; |
1147 | 0 | case GRIORA_Cubic: |
1148 | 0 | psWarpOptions->eResampleAlg = GRA_Cubic; |
1149 | 0 | break; |
1150 | 0 | case GRIORA_CubicSpline: |
1151 | 0 | psWarpOptions->eResampleAlg = GRA_CubicSpline; |
1152 | 0 | break; |
1153 | 0 | case GRIORA_Lanczos: |
1154 | 0 | psWarpOptions->eResampleAlg = GRA_Lanczos; |
1155 | 0 | break; |
1156 | 0 | case GRIORA_Average: |
1157 | 0 | psWarpOptions->eResampleAlg = GRA_Average; |
1158 | 0 | break; |
1159 | 0 | case GRIORA_RMS: |
1160 | 0 | psWarpOptions->eResampleAlg = GRA_RMS; |
1161 | 0 | break; |
1162 | 0 | case GRIORA_Mode: |
1163 | 0 | psWarpOptions->eResampleAlg = GRA_Mode; |
1164 | 0 | break; |
1165 | 0 | default: |
1166 | 0 | CPLAssert(false); |
1167 | 0 | psWarpOptions->eResampleAlg = GRA_NearestNeighbour; |
1168 | 0 | break; |
1169 | 0 | } |
1170 | 0 | psWarpOptions->hSrcDS = hVRTDS ? hVRTDS : GetDataset(); |
1171 | 0 | psWarpOptions->hDstDS = poMEMDS; |
1172 | 0 | psWarpOptions->nBandCount = 1; |
1173 | 0 | int nSrcBandNumber = hVRTDS ? 1 : nBand; |
1174 | 0 | int nDstBandNumber = 1; |
1175 | 0 | psWarpOptions->panSrcBands = &nSrcBandNumber; |
1176 | 0 | psWarpOptions->panDstBands = &nDstBandNumber; |
1177 | 0 | psWarpOptions->pfnProgress = psExtraArg->pfnProgress |
1178 | 0 | ? psExtraArg->pfnProgress |
1179 | 0 | : GDALDummyProgress; |
1180 | 0 | psWarpOptions->pProgressArg = psExtraArg->pProgressData; |
1181 | 0 | psWarpOptions->pfnTransformer = GDALRasterIOTransformer; |
1182 | 0 | if (bHasNoData) |
1183 | 0 | { |
1184 | 0 | psWarpOptions->papszWarpOptions = CSLSetNameValue( |
1185 | 0 | psWarpOptions->papszWarpOptions, "INIT_DEST", "NO_DATA"); |
1186 | 0 | if (psWarpOptions->padfSrcNoDataReal == nullptr) |
1187 | 0 | { |
1188 | 0 | psWarpOptions->padfSrcNoDataReal = |
1189 | 0 | static_cast<double *>(CPLMalloc(sizeof(double))); |
1190 | 0 | psWarpOptions->padfSrcNoDataReal[0] = dfNoDataValue; |
1191 | 0 | } |
1192 | |
|
1193 | 0 | if (psWarpOptions->padfDstNoDataReal == nullptr) |
1194 | 0 | { |
1195 | 0 | psWarpOptions->padfDstNoDataReal = |
1196 | 0 | static_cast<double *>(CPLMalloc(sizeof(double))); |
1197 | 0 | psWarpOptions->padfDstNoDataReal[0] = dfNoDataValue; |
1198 | 0 | } |
1199 | 0 | } |
1200 | |
|
1201 | 0 | GDALRasterIOTransformerStruct sTransformer; |
1202 | 0 | sTransformer.dfXOff = bHasXOffVirtual ? 0 : dfXOff; |
1203 | 0 | sTransformer.dfYOff = bHasYOffVirtual ? 0 : dfYOff; |
1204 | 0 | sTransformer.dfXRatioDstToSrc = dfXRatioDstToSrc; |
1205 | 0 | sTransformer.dfYRatioDstToSrc = dfYRatioDstToSrc; |
1206 | 0 | psWarpOptions->pTransformerArg = &sTransformer; |
1207 | |
|
1208 | 0 | GDALWarpOperationH hWarpOperation = |
1209 | 0 | GDALCreateWarpOperation(psWarpOptions); |
1210 | 0 | eErr = GDALChunkAndWarpImage(hWarpOperation, nDestXOffVirtual, |
1211 | 0 | nDestYOffVirtual, nBufXSize, nBufYSize); |
1212 | 0 | GDALDestroyWarpOperation(hWarpOperation); |
1213 | |
|
1214 | 0 | psWarpOptions->panSrcBands = nullptr; |
1215 | 0 | psWarpOptions->panDstBands = nullptr; |
1216 | 0 | GDALDestroyWarpOptions(psWarpOptions); |
1217 | |
|
1218 | 0 | if (hVRTDS) |
1219 | 0 | GDALClose(hVRTDS); |
1220 | 0 | } |
1221 | 0 | else |
1222 | 0 | { |
1223 | 0 | const char *pszResampling = |
1224 | 0 | (psExtraArg->eResampleAlg == GRIORA_Bilinear) ? "BILINEAR" |
1225 | 0 | : (psExtraArg->eResampleAlg == GRIORA_Cubic) ? "CUBIC" |
1226 | 0 | : (psExtraArg->eResampleAlg == GRIORA_CubicSpline) ? "CUBICSPLINE" |
1227 | 0 | : (psExtraArg->eResampleAlg == GRIORA_Lanczos) ? "LANCZOS" |
1228 | 0 | : (psExtraArg->eResampleAlg == GRIORA_Average) ? "AVERAGE" |
1229 | 0 | : (psExtraArg->eResampleAlg == GRIORA_RMS) ? "RMS" |
1230 | 0 | : (psExtraArg->eResampleAlg == GRIORA_Mode) ? "MODE" |
1231 | 0 | : (psExtraArg->eResampleAlg == GRIORA_Gauss) ? "GAUSS" |
1232 | 0 | : "UNKNOWN"; |
1233 | |
|
1234 | 0 | int nKernelRadius = 0; |
1235 | 0 | GDALResampleFunction pfnResampleFunc = |
1236 | 0 | GDALGetResampleFunction(pszResampling, &nKernelRadius); |
1237 | 0 | CPLAssert(pfnResampleFunc); |
1238 | 0 | GDALDataType eWrkDataType = |
1239 | 0 | GDALGetOvrWorkDataType(pszResampling, eDataType); |
1240 | 0 | int nHasNoData = 0; |
1241 | 0 | double dfNoDataValue = GetNoDataValue(&nHasNoData); |
1242 | 0 | const bool bHasNoData = CPL_TO_BOOL(nHasNoData); |
1243 | 0 | if (!bHasNoData) |
1244 | 0 | dfNoDataValue = 0.0; |
1245 | |
|
1246 | 0 | int nDstBlockXSize = nBufXSize; |
1247 | 0 | int nDstBlockYSize = nBufYSize; |
1248 | 0 | int nFullResXChunk = 0; |
1249 | 0 | int nFullResYChunk = 0; |
1250 | 0 | while (true) |
1251 | 0 | { |
1252 | 0 | nFullResXChunk = |
1253 | 0 | 3 + static_cast<int>(nDstBlockXSize * dfXRatioDstToSrc); |
1254 | 0 | nFullResYChunk = |
1255 | 0 | 3 + static_cast<int>(nDstBlockYSize * dfYRatioDstToSrc); |
1256 | 0 | if (nFullResXChunk > nRasterXSize) |
1257 | 0 | nFullResXChunk = nRasterXSize; |
1258 | 0 | if (nFullResYChunk > nRasterYSize) |
1259 | 0 | nFullResYChunk = nRasterYSize; |
1260 | 0 | if ((nDstBlockXSize == 1 && nDstBlockYSize == 1) || |
1261 | 0 | (static_cast<GIntBig>(nFullResXChunk) * nFullResYChunk <= |
1262 | 0 | 1024 * 1024)) |
1263 | 0 | break; |
1264 | | // When operating on the full width of a raster whose block width is |
1265 | | // the raster width, prefer doing chunks in height. |
1266 | 0 | if (nFullResXChunk >= nXSize && nXSize == nBlockXSize && |
1267 | 0 | nDstBlockYSize > 1) |
1268 | 0 | nDstBlockYSize /= 2; |
1269 | | /* Otherwise cut the maximal dimension */ |
1270 | 0 | else if (nDstBlockXSize > 1 && |
1271 | 0 | (nFullResXChunk > nFullResYChunk || nDstBlockYSize == 1)) |
1272 | 0 | nDstBlockXSize /= 2; |
1273 | 0 | else |
1274 | 0 | nDstBlockYSize /= 2; |
1275 | 0 | } |
1276 | |
|
1277 | 0 | int nOvrXFactor = static_cast<int>(0.5 + dfXRatioDstToSrc); |
1278 | 0 | int nOvrYFactor = static_cast<int>(0.5 + dfYRatioDstToSrc); |
1279 | 0 | if (nOvrXFactor == 0) |
1280 | 0 | nOvrXFactor = 1; |
1281 | 0 | if (nOvrYFactor == 0) |
1282 | 0 | nOvrYFactor = 1; |
1283 | 0 | int nFullResXSizeQueried = |
1284 | 0 | nFullResXChunk + 2 * nKernelRadius * nOvrXFactor; |
1285 | 0 | int nFullResYSizeQueried = |
1286 | 0 | nFullResYChunk + 2 * nKernelRadius * nOvrYFactor; |
1287 | |
|
1288 | 0 | if (nFullResXSizeQueried > nRasterXSize) |
1289 | 0 | nFullResXSizeQueried = nRasterXSize; |
1290 | 0 | if (nFullResYSizeQueried > nRasterYSize) |
1291 | 0 | nFullResYSizeQueried = nRasterYSize; |
1292 | |
|
1293 | 0 | void *pChunk = |
1294 | 0 | VSI_MALLOC3_VERBOSE(GDALGetDataTypeSizeBytes(eWrkDataType), |
1295 | 0 | nFullResXSizeQueried, nFullResYSizeQueried); |
1296 | 0 | GByte *pabyChunkNoDataMask = nullptr; |
1297 | |
|
1298 | 0 | GDALRasterBand *poMaskBand = GetMaskBand(); |
1299 | 0 | int l_nMaskFlags = GetMaskFlags(); |
1300 | |
|
1301 | 0 | bool bUseNoDataMask = ((l_nMaskFlags & GMF_ALL_VALID) == 0); |
1302 | 0 | if (bUseNoDataMask) |
1303 | 0 | { |
1304 | 0 | pabyChunkNoDataMask = static_cast<GByte *>(VSI_MALLOC2_VERBOSE( |
1305 | 0 | nFullResXSizeQueried, nFullResYSizeQueried)); |
1306 | 0 | } |
1307 | 0 | if (pChunk == nullptr || |
1308 | 0 | (bUseNoDataMask && pabyChunkNoDataMask == nullptr)) |
1309 | 0 | { |
1310 | 0 | GDALClose(poMEMDS); |
1311 | 0 | CPLFree(pChunk); |
1312 | 0 | CPLFree(pabyChunkNoDataMask); |
1313 | 0 | VSIFree(pTempBuffer); |
1314 | 0 | return CE_Failure; |
1315 | 0 | } |
1316 | | |
1317 | 0 | const int nTotalBlocks = DIV_ROUND_UP(nBufXSize, nDstBlockXSize) * |
1318 | 0 | DIV_ROUND_UP(nBufYSize, nDstBlockYSize); |
1319 | 0 | int nBlocksDone = 0; |
1320 | |
|
1321 | 0 | int nDstYOff; |
1322 | 0 | for (nDstYOff = 0; nDstYOff < nBufYSize && eErr == CE_None; |
1323 | 0 | nDstYOff += nDstBlockYSize) |
1324 | 0 | { |
1325 | 0 | int nDstYCount; |
1326 | 0 | if (nDstYOff + nDstBlockYSize <= nBufYSize) |
1327 | 0 | nDstYCount = nDstBlockYSize; |
1328 | 0 | else |
1329 | 0 | nDstYCount = nBufYSize - nDstYOff; |
1330 | |
|
1331 | 0 | int nChunkYOff = |
1332 | 0 | nYOff + static_cast<int>(nDstYOff * dfYRatioDstToSrc); |
1333 | 0 | int nChunkYOff2 = nYOff + 1 + |
1334 | 0 | static_cast<int>(ceil((nDstYOff + nDstYCount) * |
1335 | 0 | dfYRatioDstToSrc)); |
1336 | 0 | if (nChunkYOff2 > nRasterYSize) |
1337 | 0 | nChunkYOff2 = nRasterYSize; |
1338 | 0 | int nYCount = nChunkYOff2 - nChunkYOff; |
1339 | 0 | CPLAssert(nYCount <= nFullResYChunk); |
1340 | | |
1341 | 0 | int nChunkYOffQueried = nChunkYOff - nKernelRadius * nOvrYFactor; |
1342 | 0 | int nChunkYSizeQueried = nYCount + 2 * nKernelRadius * nOvrYFactor; |
1343 | 0 | if (nChunkYOffQueried < 0) |
1344 | 0 | { |
1345 | 0 | nChunkYSizeQueried += nChunkYOffQueried; |
1346 | 0 | nChunkYOffQueried = 0; |
1347 | 0 | } |
1348 | 0 | if (nChunkYSizeQueried + nChunkYOffQueried > nRasterYSize) |
1349 | 0 | nChunkYSizeQueried = nRasterYSize - nChunkYOffQueried; |
1350 | 0 | CPLAssert(nChunkYSizeQueried <= nFullResYSizeQueried); |
1351 | | |
1352 | 0 | int nDstXOff = 0; |
1353 | 0 | for (nDstXOff = 0; nDstXOff < nBufXSize && eErr == CE_None; |
1354 | 0 | nDstXOff += nDstBlockXSize) |
1355 | 0 | { |
1356 | 0 | int nDstXCount = 0; |
1357 | 0 | if (nDstXOff + nDstBlockXSize <= nBufXSize) |
1358 | 0 | nDstXCount = nDstBlockXSize; |
1359 | 0 | else |
1360 | 0 | nDstXCount = nBufXSize - nDstXOff; |
1361 | |
|
1362 | 0 | int nChunkXOff = |
1363 | 0 | nXOff + static_cast<int>(nDstXOff * dfXRatioDstToSrc); |
1364 | 0 | int nChunkXOff2 = |
1365 | 0 | nXOff + 1 + |
1366 | 0 | static_cast<int>( |
1367 | 0 | ceil((nDstXOff + nDstXCount) * dfXRatioDstToSrc)); |
1368 | 0 | if (nChunkXOff2 > nRasterXSize) |
1369 | 0 | nChunkXOff2 = nRasterXSize; |
1370 | 0 | int nXCount = nChunkXOff2 - nChunkXOff; |
1371 | 0 | CPLAssert(nXCount <= nFullResXChunk); |
1372 | | |
1373 | 0 | int nChunkXOffQueried = |
1374 | 0 | nChunkXOff - nKernelRadius * nOvrXFactor; |
1375 | 0 | int nChunkXSizeQueried = |
1376 | 0 | nXCount + 2 * nKernelRadius * nOvrXFactor; |
1377 | 0 | if (nChunkXOffQueried < 0) |
1378 | 0 | { |
1379 | 0 | nChunkXSizeQueried += nChunkXOffQueried; |
1380 | 0 | nChunkXOffQueried = 0; |
1381 | 0 | } |
1382 | 0 | if (nChunkXSizeQueried + nChunkXOffQueried > nRasterXSize) |
1383 | 0 | nChunkXSizeQueried = nRasterXSize - nChunkXOffQueried; |
1384 | 0 | CPLAssert(nChunkXSizeQueried <= nFullResXSizeQueried); |
1385 | | |
1386 | | // Read the source buffers. |
1387 | 0 | eErr = RasterIO(GF_Read, nChunkXOffQueried, nChunkYOffQueried, |
1388 | 0 | nChunkXSizeQueried, nChunkYSizeQueried, pChunk, |
1389 | 0 | nChunkXSizeQueried, nChunkYSizeQueried, |
1390 | 0 | eWrkDataType, 0, 0, nullptr); |
1391 | |
|
1392 | 0 | bool bSkipResample = false; |
1393 | 0 | bool bNoDataMaskFullyOpaque = false; |
1394 | 0 | if (eErr == CE_None && bUseNoDataMask) |
1395 | 0 | { |
1396 | 0 | eErr = poMaskBand->RasterIO( |
1397 | 0 | GF_Read, nChunkXOffQueried, nChunkYOffQueried, |
1398 | 0 | nChunkXSizeQueried, nChunkYSizeQueried, |
1399 | 0 | pabyChunkNoDataMask, nChunkXSizeQueried, |
1400 | 0 | nChunkYSizeQueried, GDT_UInt8, 0, 0, nullptr); |
1401 | | |
1402 | | /* Optimizations if mask if fully opaque or transparent */ |
1403 | 0 | int nPixels = nChunkXSizeQueried * nChunkYSizeQueried; |
1404 | 0 | GByte bVal = pabyChunkNoDataMask[0]; |
1405 | 0 | int i = 1; |
1406 | 0 | for (; i < nPixels; i++) |
1407 | 0 | { |
1408 | 0 | if (pabyChunkNoDataMask[i] != bVal) |
1409 | 0 | break; |
1410 | 0 | } |
1411 | 0 | if (i == nPixels) |
1412 | 0 | { |
1413 | 0 | if (bVal == 0) |
1414 | 0 | { |
1415 | 0 | for (int j = 0; j < nDstYCount; j++) |
1416 | 0 | { |
1417 | 0 | GDALCopyWords64(&dfNoDataValue, GDT_Float64, 0, |
1418 | 0 | static_cast<GByte *>(pDataMem) + |
1419 | 0 | nLSMem * (j + nDstYOff) + |
1420 | 0 | nDstXOff * nPSMem, |
1421 | 0 | eDTMem, |
1422 | 0 | static_cast<int>(nPSMem), |
1423 | 0 | nDstXCount); |
1424 | 0 | } |
1425 | 0 | bSkipResample = true; |
1426 | 0 | } |
1427 | 0 | else |
1428 | 0 | { |
1429 | 0 | bNoDataMaskFullyOpaque = true; |
1430 | 0 | } |
1431 | 0 | } |
1432 | 0 | } |
1433 | |
|
1434 | 0 | if (!bSkipResample && eErr == CE_None) |
1435 | 0 | { |
1436 | 0 | const bool bPropagateNoData = false; |
1437 | 0 | void *pDstBuffer = nullptr; |
1438 | 0 | GDALDataType eDstBufferDataType = GDT_Unknown; |
1439 | 0 | GDALRasterBand *poMEMBand = |
1440 | 0 | GDALRasterBand::FromHandle(hMEMBand); |
1441 | 0 | GDALOverviewResampleArgs args; |
1442 | 0 | args.eSrcDataType = eDataType; |
1443 | 0 | args.eOvrDataType = poMEMBand->GetRasterDataType(); |
1444 | 0 | args.nOvrXSize = poMEMBand->GetXSize(); |
1445 | 0 | args.nOvrYSize = poMEMBand->GetYSize(); |
1446 | 0 | args.nOvrNBITS = nNBITS; |
1447 | 0 | args.dfXRatioDstToSrc = dfXRatioDstToSrc; |
1448 | 0 | args.dfYRatioDstToSrc = dfYRatioDstToSrc; |
1449 | 0 | args.dfSrcXDelta = |
1450 | 0 | dfXOff - nXOff; /* == 0 if bHasXOffVirtual */ |
1451 | 0 | args.dfSrcYDelta = |
1452 | 0 | dfYOff - nYOff; /* == 0 if bHasYOffVirtual */ |
1453 | 0 | args.eWrkDataType = eWrkDataType; |
1454 | 0 | args.pabyChunkNodataMask = |
1455 | 0 | bNoDataMaskFullyOpaque ? nullptr : pabyChunkNoDataMask; |
1456 | 0 | args.nChunkXOff = |
1457 | 0 | nChunkXOffQueried - (bHasXOffVirtual ? 0 : nXOff); |
1458 | 0 | args.nChunkXSize = nChunkXSizeQueried; |
1459 | 0 | args.nChunkYOff = |
1460 | 0 | nChunkYOffQueried - (bHasYOffVirtual ? 0 : nYOff); |
1461 | 0 | args.nChunkYSize = nChunkYSizeQueried; |
1462 | 0 | args.nDstXOff = nDstXOff + nDestXOffVirtual; |
1463 | 0 | args.nDstXOff2 = nDstXOff + nDestXOffVirtual + nDstXCount; |
1464 | 0 | args.nDstYOff = nDstYOff + nDestYOffVirtual; |
1465 | 0 | args.nDstYOff2 = nDstYOff + nDestYOffVirtual + nDstYCount; |
1466 | 0 | args.pszResampling = pszResampling; |
1467 | 0 | args.bHasNoData = bHasNoData; |
1468 | 0 | args.dfNoDataValue = dfNoDataValue; |
1469 | 0 | args.poColorTable = GetColorTable(); |
1470 | 0 | args.bPropagateNoData = bPropagateNoData; |
1471 | 0 | eErr = pfnResampleFunc(args, pChunk, &pDstBuffer, |
1472 | 0 | &eDstBufferDataType); |
1473 | 0 | if (eErr == CE_None) |
1474 | 0 | { |
1475 | 0 | eErr = poMEMBand->RasterIO( |
1476 | 0 | GF_Write, nDstXOff + nDestXOffVirtual, |
1477 | 0 | nDstYOff + nDestYOffVirtual, nDstXCount, nDstYCount, |
1478 | 0 | pDstBuffer, nDstXCount, nDstYCount, |
1479 | 0 | eDstBufferDataType, 0, 0, nullptr); |
1480 | 0 | } |
1481 | 0 | CPLFree(pDstBuffer); |
1482 | 0 | } |
1483 | |
|
1484 | 0 | nBlocksDone++; |
1485 | 0 | if (eErr == CE_None && psExtraArg->pfnProgress != nullptr && |
1486 | 0 | !psExtraArg->pfnProgress(1.0 * nBlocksDone / nTotalBlocks, |
1487 | 0 | "", psExtraArg->pProgressData)) |
1488 | 0 | { |
1489 | 0 | eErr = CE_Failure; |
1490 | 0 | } |
1491 | 0 | } |
1492 | 0 | } |
1493 | | |
1494 | 0 | CPLFree(pChunk); |
1495 | 0 | CPLFree(pabyChunkNoDataMask); |
1496 | 0 | } |
1497 | | |
1498 | 0 | if (eBufType != eDataType) |
1499 | 0 | { |
1500 | 0 | CPL_IGNORE_RET_VAL(poMEMDS->GetRasterBand(1)->RasterIO( |
1501 | 0 | GF_Read, nDestXOffVirtual, nDestYOffVirtual, nBufXSize, nBufYSize, |
1502 | 0 | pData, nBufXSize, nBufYSize, eBufType, nPixelSpace, nLineSpace, |
1503 | 0 | nullptr)); |
1504 | 0 | } |
1505 | 0 | GDALClose(poMEMDS); |
1506 | 0 | VSIFree(pTempBuffer); |
1507 | |
|
1508 | 0 | return eErr; |
1509 | 0 | } |
1510 | | |
1511 | | /************************************************************************/ |
1512 | | /* RasterIOResampled() */ |
1513 | | /************************************************************************/ |
1514 | | |
1515 | | CPLErr GDALDataset::RasterIOResampled( |
1516 | | GDALRWFlag /* eRWFlag */, int nXOff, int nYOff, int nXSize, int nYSize, |
1517 | | void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType, |
1518 | | int nBandCount, const int *panBandMap, GSpacing nPixelSpace, |
1519 | | GSpacing nLineSpace, GSpacing nBandSpace, GDALRasterIOExtraArg *psExtraArg) |
1520 | | |
1521 | 0 | { |
1522 | | #if 0 |
1523 | | // Determine if we use warping resampling or overview resampling |
1524 | | bool bUseWarp = false; |
1525 | | if( GDALDataTypeIsComplex( eDataType ) ) |
1526 | | bUseWarp = true; |
1527 | | #endif |
1528 | |
|
1529 | 0 | double dfXOff = nXOff; |
1530 | 0 | double dfYOff = nYOff; |
1531 | 0 | double dfXSize = nXSize; |
1532 | 0 | double dfYSize = nYSize; |
1533 | 0 | if (psExtraArg->bFloatingPointWindowValidity) |
1534 | 0 | { |
1535 | 0 | dfXOff = psExtraArg->dfXOff; |
1536 | 0 | dfYOff = psExtraArg->dfYOff; |
1537 | 0 | dfXSize = psExtraArg->dfXSize; |
1538 | 0 | dfYSize = psExtraArg->dfYSize; |
1539 | 0 | } |
1540 | |
|
1541 | 0 | const double dfXRatioDstToSrc = dfXSize / nBufXSize; |
1542 | 0 | const double dfYRatioDstToSrc = dfYSize / nBufYSize; |
1543 | | |
1544 | | // Determine the coordinates in the "virtual" output raster to see |
1545 | | // if there are not integers, in which case we will use them as a shift |
1546 | | // so that subwindow extracts give the exact same results as entire raster |
1547 | | // scaling. |
1548 | 0 | double dfDestXOff = dfXOff / dfXRatioDstToSrc; |
1549 | 0 | bool bHasXOffVirtual = false; |
1550 | 0 | int nDestXOffVirtual = 0; |
1551 | 0 | if (fabs(dfDestXOff - static_cast<int>(dfDestXOff + 0.5)) < 1e-8) |
1552 | 0 | { |
1553 | 0 | bHasXOffVirtual = true; |
1554 | 0 | dfXOff = nXOff; |
1555 | 0 | nDestXOffVirtual = static_cast<int>(dfDestXOff + 0.5); |
1556 | 0 | } |
1557 | |
|
1558 | 0 | double dfDestYOff = dfYOff / dfYRatioDstToSrc; |
1559 | 0 | bool bHasYOffVirtual = false; |
1560 | 0 | int nDestYOffVirtual = 0; |
1561 | 0 | if (fabs(dfDestYOff - static_cast<int>(dfDestYOff + 0.5)) < 1e-8) |
1562 | 0 | { |
1563 | 0 | bHasYOffVirtual = true; |
1564 | 0 | dfYOff = nYOff; |
1565 | 0 | nDestYOffVirtual = static_cast<int>(dfDestYOff + 0.5); |
1566 | 0 | } |
1567 | | |
1568 | | // Create a MEM dataset that wraps the output buffer. |
1569 | 0 | GDALDataset *poMEMDS = |
1570 | 0 | MEMDataset::Create("", nDestXOffVirtual + nBufXSize, |
1571 | 0 | nDestYOffVirtual + nBufYSize, 0, eBufType, nullptr); |
1572 | 0 | GDALRasterBand **papoDstBands = static_cast<GDALRasterBand **>( |
1573 | 0 | CPLMalloc(nBandCount * sizeof(GDALRasterBand *))); |
1574 | 0 | int nNBITS = 0; |
1575 | 0 | for (int i = 0; i < nBandCount; i++) |
1576 | 0 | { |
1577 | 0 | char szBuffer[32] = {'\0'}; |
1578 | 0 | int nRet = CPLPrintPointer( |
1579 | 0 | szBuffer, |
1580 | 0 | static_cast<GByte *>(pData) - nPixelSpace * nDestXOffVirtual - |
1581 | 0 | nLineSpace * nDestYOffVirtual + nBandSpace * i, |
1582 | 0 | sizeof(szBuffer)); |
1583 | 0 | szBuffer[nRet] = 0; |
1584 | |
|
1585 | 0 | char szBuffer0[64] = {'\0'}; |
1586 | 0 | snprintf(szBuffer0, sizeof(szBuffer0), "DATAPOINTER=%s", szBuffer); |
1587 | |
|
1588 | 0 | char szBuffer1[64] = {'\0'}; |
1589 | 0 | snprintf(szBuffer1, sizeof(szBuffer1), "PIXELOFFSET=" CPL_FRMT_GIB, |
1590 | 0 | static_cast<GIntBig>(nPixelSpace)); |
1591 | |
|
1592 | 0 | char szBuffer2[64] = {'\0'}; |
1593 | 0 | snprintf(szBuffer2, sizeof(szBuffer2), "LINEOFFSET=" CPL_FRMT_GIB, |
1594 | 0 | static_cast<GIntBig>(nLineSpace)); |
1595 | |
|
1596 | 0 | char *apszOptions[4] = {szBuffer0, szBuffer1, szBuffer2, nullptr}; |
1597 | |
|
1598 | 0 | poMEMDS->AddBand(eBufType, apszOptions); |
1599 | |
|
1600 | 0 | GDALRasterBand *poSrcBand = GetRasterBand(panBandMap[i]); |
1601 | 0 | papoDstBands[i] = poMEMDS->GetRasterBand(i + 1); |
1602 | 0 | const char *pszNBITS = |
1603 | 0 | poSrcBand->GetMetadataItem("NBITS", "IMAGE_STRUCTURE"); |
1604 | 0 | if (pszNBITS) |
1605 | 0 | { |
1606 | 0 | nNBITS = atoi(pszNBITS); |
1607 | 0 | poMEMDS->GetRasterBand(i + 1)->SetMetadataItem("NBITS", pszNBITS, |
1608 | 0 | "IMAGE_STRUCTURE"); |
1609 | 0 | } |
1610 | 0 | } |
1611 | |
|
1612 | 0 | CPLErr eErr = CE_None; |
1613 | | |
1614 | | // TODO(schwehr): Why disabled? Why not just delete? |
1615 | | // Looks like this code was initially added as disable by copying |
1616 | | // from RasterIO here: |
1617 | | // https://trac.osgeo.org/gdal/changeset/29572 |
1618 | | #if 0 |
1619 | | // Do the resampling. |
1620 | | if( bUseWarp ) |
1621 | | { |
1622 | | VRTDatasetH hVRTDS = nullptr; |
1623 | | GDALRasterBandH hVRTBand = nullptr; |
1624 | | if( GetDataset() == nullptr ) |
1625 | | { |
1626 | | /* Create VRT dataset that wraps the whole dataset */ |
1627 | | hVRTDS = VRTCreate(nRasterXSize, nRasterYSize); |
1628 | | VRTAddBand( hVRTDS, eDataType, nullptr ); |
1629 | | hVRTBand = GDALGetRasterBand(hVRTDS, 1); |
1630 | | VRTAddSimpleSource( (VRTSourcedRasterBandH)hVRTBand, |
1631 | | (GDALRasterBandH)this, |
1632 | | 0, 0, |
1633 | | nRasterXSize, nRasterYSize, |
1634 | | 0, 0, |
1635 | | nRasterXSize, nRasterYSize, |
1636 | | nullptr, VRT_NODATA_UNSET ); |
1637 | | |
1638 | | /* Add a mask band if needed */ |
1639 | | if( GetMaskFlags() != GMF_ALL_VALID ) |
1640 | | { |
1641 | | ((GDALDataset*)hVRTDS)->CreateMaskBand(0); |
1642 | | VRTSourcedRasterBand* poVRTMaskBand = |
1643 | | (VRTSourcedRasterBand*)(((GDALRasterBand*)hVRTBand)->GetMaskBand()); |
1644 | | poVRTMaskBand-> |
1645 | | AddMaskBandSource( this, |
1646 | | 0, 0, |
1647 | | nRasterXSize, nRasterYSize, |
1648 | | 0, 0, |
1649 | | nRasterXSize, nRasterYSize); |
1650 | | } |
1651 | | } |
1652 | | |
1653 | | GDALWarpOptions* psWarpOptions = GDALCreateWarpOptions(); |
1654 | | psWarpOptions->eResampleAlg = (GDALResampleAlg)psExtraArg->eResampleAlg; |
1655 | | psWarpOptions->hSrcDS = (GDALDatasetH) (hVRTDS ? hVRTDS : GetDataset()); |
1656 | | psWarpOptions->hDstDS = (GDALDatasetH) poMEMDS; |
1657 | | psWarpOptions->nBandCount = 1; |
1658 | | int nSrcBandNumber = (hVRTDS ? 1 : nBand); |
1659 | | int nDstBandNumber = 1; |
1660 | | psWarpOptions->panSrcBands = &nSrcBandNumber; |
1661 | | psWarpOptions->panDstBands = &nDstBandNumber; |
1662 | | psWarpOptions->pfnProgress = psExtraArg->pfnProgress ? |
1663 | | psExtraArg->pfnProgress : GDALDummyProgress; |
1664 | | psWarpOptions->pProgressArg = psExtraArg->pProgressData; |
1665 | | psWarpOptions->pfnTransformer = GDALRasterIOTransformer; |
1666 | | GDALRasterIOTransformerStruct sTransformer; |
1667 | | sTransformer.dfXOff = bHasXOffVirtual ? 0 : dfXOff; |
1668 | | sTransformer.dfYOff = bHasYOffVirtual ? 0 : dfYOff; |
1669 | | sTransformer.dfXRatioDstToSrc = dfXRatioDstToSrc; |
1670 | | sTransformer.dfYRatioDstToSrc = dfYRatioDstToSrc; |
1671 | | psWarpOptions->pTransformerArg = &sTransformer; |
1672 | | |
1673 | | GDALWarpOperationH hWarpOperation = GDALCreateWarpOperation(psWarpOptions); |
1674 | | eErr = GDALChunkAndWarpImage( hWarpOperation, |
1675 | | nDestXOffVirtual, nDestYOffVirtual, |
1676 | | nBufXSize, nBufYSize ); |
1677 | | GDALDestroyWarpOperation( hWarpOperation ); |
1678 | | |
1679 | | psWarpOptions->panSrcBands = nullptr; |
1680 | | psWarpOptions->panDstBands = nullptr; |
1681 | | GDALDestroyWarpOptions( psWarpOptions ); |
1682 | | |
1683 | | if( hVRTDS ) |
1684 | | GDALClose(hVRTDS); |
1685 | | } |
1686 | | else |
1687 | | #endif |
1688 | 0 | { |
1689 | 0 | const char *pszResampling = |
1690 | 0 | (psExtraArg->eResampleAlg == GRIORA_Bilinear) ? "BILINEAR" |
1691 | 0 | : (psExtraArg->eResampleAlg == GRIORA_Cubic) ? "CUBIC" |
1692 | 0 | : (psExtraArg->eResampleAlg == GRIORA_CubicSpline) ? "CUBICSPLINE" |
1693 | 0 | : (psExtraArg->eResampleAlg == GRIORA_Lanczos) ? "LANCZOS" |
1694 | 0 | : (psExtraArg->eResampleAlg == GRIORA_Average) ? "AVERAGE" |
1695 | 0 | : (psExtraArg->eResampleAlg == GRIORA_RMS) ? "RMS" |
1696 | 0 | : (psExtraArg->eResampleAlg == GRIORA_Mode) ? "MODE" |
1697 | 0 | : (psExtraArg->eResampleAlg == GRIORA_Gauss) ? "GAUSS" |
1698 | 0 | : "UNKNOWN"; |
1699 | |
|
1700 | 0 | GDALRasterBand *poFirstSrcBand = GetRasterBand(panBandMap[0]); |
1701 | 0 | GDALDataType eDataType = poFirstSrcBand->GetRasterDataType(); |
1702 | 0 | int nBlockXSize, nBlockYSize; |
1703 | 0 | poFirstSrcBand->GetBlockSize(&nBlockXSize, &nBlockYSize); |
1704 | |
|
1705 | 0 | int nKernelRadius; |
1706 | 0 | GDALResampleFunction pfnResampleFunc = |
1707 | 0 | GDALGetResampleFunction(pszResampling, &nKernelRadius); |
1708 | 0 | CPLAssert(pfnResampleFunc); |
1709 | | #ifdef GDAL_ENABLE_RESAMPLING_MULTIBAND |
1710 | | GDALResampleFunctionMultiBands pfnResampleFuncMultiBands = |
1711 | | GDALGetResampleFunctionMultiBands(pszResampling, &nKernelRadius); |
1712 | | #endif |
1713 | 0 | GDALDataType eWrkDataType = |
1714 | 0 | GDALGetOvrWorkDataType(pszResampling, eDataType); |
1715 | |
|
1716 | 0 | int nDstBlockXSize = nBufXSize; |
1717 | 0 | int nDstBlockYSize = nBufYSize; |
1718 | 0 | int nFullResXChunk, nFullResYChunk; |
1719 | 0 | while (true) |
1720 | 0 | { |
1721 | 0 | nFullResXChunk = |
1722 | 0 | 3 + static_cast<int>(nDstBlockXSize * dfXRatioDstToSrc); |
1723 | 0 | nFullResYChunk = |
1724 | 0 | 3 + static_cast<int>(nDstBlockYSize * dfYRatioDstToSrc); |
1725 | 0 | if (nFullResXChunk > nRasterXSize) |
1726 | 0 | nFullResXChunk = nRasterXSize; |
1727 | 0 | if (nFullResYChunk > nRasterYSize) |
1728 | 0 | nFullResYChunk = nRasterYSize; |
1729 | 0 | if ((nDstBlockXSize == 1 && nDstBlockYSize == 1) || |
1730 | 0 | (static_cast<GIntBig>(nFullResXChunk) * nFullResYChunk <= |
1731 | 0 | 1024 * 1024)) |
1732 | 0 | break; |
1733 | | // When operating on the full width of a raster whose block width is |
1734 | | // the raster width, prefer doing chunks in height. |
1735 | 0 | if (nFullResXChunk >= nXSize && nXSize == nBlockXSize && |
1736 | 0 | nDstBlockYSize > 1) |
1737 | 0 | nDstBlockYSize /= 2; |
1738 | | /* Otherwise cut the maximal dimension */ |
1739 | 0 | else if (nDstBlockXSize > 1 && |
1740 | 0 | (nFullResXChunk > nFullResYChunk || nDstBlockYSize == 1)) |
1741 | 0 | nDstBlockXSize /= 2; |
1742 | 0 | else |
1743 | 0 | nDstBlockYSize /= 2; |
1744 | 0 | } |
1745 | |
|
1746 | 0 | int nOvrFactor = std::max(static_cast<int>(0.5 + dfXRatioDstToSrc), |
1747 | 0 | static_cast<int>(0.5 + dfYRatioDstToSrc)); |
1748 | 0 | if (nOvrFactor == 0) |
1749 | 0 | nOvrFactor = 1; |
1750 | 0 | int nFullResXSizeQueried = |
1751 | 0 | nFullResXChunk + 2 * nKernelRadius * nOvrFactor; |
1752 | 0 | int nFullResYSizeQueried = |
1753 | 0 | nFullResYChunk + 2 * nKernelRadius * nOvrFactor; |
1754 | |
|
1755 | 0 | if (nFullResXSizeQueried > nRasterXSize) |
1756 | 0 | nFullResXSizeQueried = nRasterXSize; |
1757 | 0 | if (nFullResYSizeQueried > nRasterYSize) |
1758 | 0 | nFullResYSizeQueried = nRasterYSize; |
1759 | |
|
1760 | 0 | void *pChunk = VSI_MALLOC3_VERBOSE( |
1761 | 0 | cpl::fits_on<int>(GDALGetDataTypeSizeBytes(eWrkDataType) * |
1762 | 0 | nBandCount), |
1763 | 0 | nFullResXSizeQueried, nFullResYSizeQueried); |
1764 | 0 | GByte *pabyChunkNoDataMask = nullptr; |
1765 | |
|
1766 | 0 | GDALRasterBand *poMaskBand = poFirstSrcBand->GetMaskBand(); |
1767 | 0 | int nMaskFlags = poFirstSrcBand->GetMaskFlags(); |
1768 | |
|
1769 | 0 | bool bUseNoDataMask = ((nMaskFlags & GMF_ALL_VALID) == 0); |
1770 | 0 | if (bUseNoDataMask) |
1771 | 0 | { |
1772 | 0 | pabyChunkNoDataMask = static_cast<GByte *>(VSI_MALLOC2_VERBOSE( |
1773 | 0 | nFullResXSizeQueried, nFullResYSizeQueried)); |
1774 | 0 | } |
1775 | 0 | if (pChunk == nullptr || |
1776 | 0 | (bUseNoDataMask && pabyChunkNoDataMask == nullptr)) |
1777 | 0 | { |
1778 | 0 | GDALClose(poMEMDS); |
1779 | 0 | CPLFree(pChunk); |
1780 | 0 | CPLFree(pabyChunkNoDataMask); |
1781 | 0 | CPLFree(papoDstBands); |
1782 | 0 | return CE_Failure; |
1783 | 0 | } |
1784 | | |
1785 | 0 | const int nTotalBlocks = DIV_ROUND_UP(nBufXSize, nDstBlockXSize) * |
1786 | 0 | DIV_ROUND_UP(nBufYSize, nDstBlockYSize); |
1787 | 0 | int nBlocksDone = 0; |
1788 | |
|
1789 | 0 | int nDstYOff; |
1790 | 0 | for (nDstYOff = 0; nDstYOff < nBufYSize && eErr == CE_None; |
1791 | 0 | nDstYOff += nDstBlockYSize) |
1792 | 0 | { |
1793 | 0 | int nDstYCount; |
1794 | 0 | if (nDstYOff + nDstBlockYSize <= nBufYSize) |
1795 | 0 | nDstYCount = nDstBlockYSize; |
1796 | 0 | else |
1797 | 0 | nDstYCount = nBufYSize - nDstYOff; |
1798 | |
|
1799 | 0 | int nChunkYOff = |
1800 | 0 | nYOff + static_cast<int>(nDstYOff * dfYRatioDstToSrc); |
1801 | 0 | int nChunkYOff2 = nYOff + 1 + |
1802 | 0 | static_cast<int>(ceil((nDstYOff + nDstYCount) * |
1803 | 0 | dfYRatioDstToSrc)); |
1804 | 0 | if (nChunkYOff2 > nRasterYSize) |
1805 | 0 | nChunkYOff2 = nRasterYSize; |
1806 | 0 | int nYCount = nChunkYOff2 - nChunkYOff; |
1807 | 0 | CPLAssert(nYCount <= nFullResYChunk); |
1808 | | |
1809 | 0 | int nChunkYOffQueried = nChunkYOff - nKernelRadius * nOvrFactor; |
1810 | 0 | int nChunkYSizeQueried = nYCount + 2 * nKernelRadius * nOvrFactor; |
1811 | 0 | if (nChunkYOffQueried < 0) |
1812 | 0 | { |
1813 | 0 | nChunkYSizeQueried += nChunkYOffQueried; |
1814 | 0 | nChunkYOffQueried = 0; |
1815 | 0 | } |
1816 | 0 | if (nChunkYSizeQueried + nChunkYOffQueried > nRasterYSize) |
1817 | 0 | nChunkYSizeQueried = nRasterYSize - nChunkYOffQueried; |
1818 | 0 | CPLAssert(nChunkYSizeQueried <= nFullResYSizeQueried); |
1819 | | |
1820 | 0 | int nDstXOff; |
1821 | 0 | for (nDstXOff = 0; nDstXOff < nBufXSize && eErr == CE_None; |
1822 | 0 | nDstXOff += nDstBlockXSize) |
1823 | 0 | { |
1824 | 0 | int nDstXCount; |
1825 | 0 | if (nDstXOff + nDstBlockXSize <= nBufXSize) |
1826 | 0 | nDstXCount = nDstBlockXSize; |
1827 | 0 | else |
1828 | 0 | nDstXCount = nBufXSize - nDstXOff; |
1829 | |
|
1830 | 0 | int nChunkXOff = |
1831 | 0 | nXOff + static_cast<int>(nDstXOff * dfXRatioDstToSrc); |
1832 | 0 | int nChunkXOff2 = |
1833 | 0 | nXOff + 1 + |
1834 | 0 | static_cast<int>( |
1835 | 0 | ceil((nDstXOff + nDstXCount) * dfXRatioDstToSrc)); |
1836 | 0 | if (nChunkXOff2 > nRasterXSize) |
1837 | 0 | nChunkXOff2 = nRasterXSize; |
1838 | 0 | int nXCount = nChunkXOff2 - nChunkXOff; |
1839 | 0 | CPLAssert(nXCount <= nFullResXChunk); |
1840 | | |
1841 | 0 | int nChunkXOffQueried = nChunkXOff - nKernelRadius * nOvrFactor; |
1842 | 0 | int nChunkXSizeQueried = |
1843 | 0 | nXCount + 2 * nKernelRadius * nOvrFactor; |
1844 | 0 | if (nChunkXOffQueried < 0) |
1845 | 0 | { |
1846 | 0 | nChunkXSizeQueried += nChunkXOffQueried; |
1847 | 0 | nChunkXOffQueried = 0; |
1848 | 0 | } |
1849 | 0 | if (nChunkXSizeQueried + nChunkXOffQueried > nRasterXSize) |
1850 | 0 | nChunkXSizeQueried = nRasterXSize - nChunkXOffQueried; |
1851 | 0 | CPLAssert(nChunkXSizeQueried <= nFullResXSizeQueried); |
1852 | | |
1853 | 0 | bool bSkipResample = false; |
1854 | 0 | bool bNoDataMaskFullyOpaque = false; |
1855 | 0 | if (eErr == CE_None && bUseNoDataMask) |
1856 | 0 | { |
1857 | 0 | eErr = poMaskBand->RasterIO( |
1858 | 0 | GF_Read, nChunkXOffQueried, nChunkYOffQueried, |
1859 | 0 | nChunkXSizeQueried, nChunkYSizeQueried, |
1860 | 0 | pabyChunkNoDataMask, nChunkXSizeQueried, |
1861 | 0 | nChunkYSizeQueried, GDT_UInt8, 0, 0, nullptr); |
1862 | | |
1863 | | /* Optimizations if mask if fully opaque or transparent */ |
1864 | 0 | const int nPixels = nChunkXSizeQueried * nChunkYSizeQueried; |
1865 | 0 | const GByte bVal = pabyChunkNoDataMask[0]; |
1866 | 0 | int i = 1; // Used after for. |
1867 | 0 | for (; i < nPixels; i++) |
1868 | 0 | { |
1869 | 0 | if (pabyChunkNoDataMask[i] != bVal) |
1870 | 0 | break; |
1871 | 0 | } |
1872 | 0 | if (i == nPixels) |
1873 | 0 | { |
1874 | 0 | if (bVal == 0) |
1875 | 0 | { |
1876 | 0 | GByte abyZero[16] = {0}; |
1877 | 0 | for (int iBand = 0; iBand < nBandCount; iBand++) |
1878 | 0 | { |
1879 | 0 | for (int j = 0; j < nDstYCount; j++) |
1880 | 0 | { |
1881 | 0 | GDALCopyWords64( |
1882 | 0 | abyZero, GDT_UInt8, 0, |
1883 | 0 | static_cast<GByte *>(pData) + |
1884 | 0 | iBand * nBandSpace + |
1885 | 0 | nLineSpace * (j + nDstYOff) + |
1886 | 0 | nDstXOff * nPixelSpace, |
1887 | 0 | eBufType, static_cast<int>(nPixelSpace), |
1888 | 0 | nDstXCount); |
1889 | 0 | } |
1890 | 0 | } |
1891 | 0 | bSkipResample = true; |
1892 | 0 | } |
1893 | 0 | else |
1894 | 0 | { |
1895 | 0 | bNoDataMaskFullyOpaque = true; |
1896 | 0 | } |
1897 | 0 | } |
1898 | 0 | } |
1899 | |
|
1900 | 0 | if (!bSkipResample && eErr == CE_None) |
1901 | 0 | { |
1902 | | /* Read the source buffers */ |
1903 | 0 | eErr = RasterIO( |
1904 | 0 | GF_Read, nChunkXOffQueried, nChunkYOffQueried, |
1905 | 0 | nChunkXSizeQueried, nChunkYSizeQueried, pChunk, |
1906 | 0 | nChunkXSizeQueried, nChunkYSizeQueried, eWrkDataType, |
1907 | 0 | nBandCount, panBandMap, 0, 0, 0, nullptr); |
1908 | 0 | } |
1909 | |
|
1910 | | #ifdef GDAL_ENABLE_RESAMPLING_MULTIBAND |
1911 | | if (pfnResampleFuncMultiBands && !bSkipResample && |
1912 | | eErr == CE_None) |
1913 | | { |
1914 | | eErr = pfnResampleFuncMultiBands( |
1915 | | dfXRatioDstToSrc, dfYRatioDstToSrc, |
1916 | | dfXOff - nXOff, /* == 0 if bHasXOffVirtual */ |
1917 | | dfYOff - nYOff, /* == 0 if bHasYOffVirtual */ |
1918 | | eWrkDataType, (GByte *)pChunk, nBandCount, |
1919 | | bNoDataMaskFullyOpaque ? nullptr : pabyChunkNoDataMask, |
1920 | | nChunkXOffQueried - (bHasXOffVirtual ? 0 : nXOff), |
1921 | | nChunkXSizeQueried, |
1922 | | nChunkYOffQueried - (bHasYOffVirtual ? 0 : nYOff), |
1923 | | nChunkYSizeQueried, nDstXOff + nDestXOffVirtual, |
1924 | | nDstXOff + nDestXOffVirtual + nDstXCount, |
1925 | | nDstYOff + nDestYOffVirtual, |
1926 | | nDstYOff + nDestYOffVirtual + nDstYCount, papoDstBands, |
1927 | | pszResampling, FALSE /*bHasNoData*/, |
1928 | | 0.0 /* dfNoDataValue */, nullptr /* color table*/, |
1929 | | eDataType); |
1930 | | } |
1931 | | else |
1932 | | #endif |
1933 | 0 | { |
1934 | 0 | size_t nChunkBandOffset = |
1935 | 0 | static_cast<size_t>(nChunkXSizeQueried) * |
1936 | 0 | nChunkYSizeQueried * |
1937 | 0 | GDALGetDataTypeSizeBytes(eWrkDataType); |
1938 | 0 | for (int i = 0; |
1939 | 0 | i < nBandCount && !bSkipResample && eErr == CE_None; |
1940 | 0 | i++) |
1941 | 0 | { |
1942 | 0 | const bool bPropagateNoData = false; |
1943 | 0 | void *pDstBuffer = nullptr; |
1944 | 0 | GDALDataType eDstBufferDataType = GDT_Unknown; |
1945 | 0 | GDALRasterBand *poMEMBand = |
1946 | 0 | poMEMDS->GetRasterBand(i + 1); |
1947 | 0 | GDALOverviewResampleArgs args; |
1948 | 0 | args.eSrcDataType = eDataType; |
1949 | 0 | args.eOvrDataType = poMEMBand->GetRasterDataType(); |
1950 | 0 | args.nOvrXSize = poMEMBand->GetXSize(); |
1951 | 0 | args.nOvrYSize = poMEMBand->GetYSize(); |
1952 | 0 | args.nOvrNBITS = nNBITS; |
1953 | 0 | args.dfXRatioDstToSrc = dfXRatioDstToSrc; |
1954 | 0 | args.dfYRatioDstToSrc = dfYRatioDstToSrc; |
1955 | 0 | args.dfSrcXDelta = |
1956 | 0 | dfXOff - nXOff; /* == 0 if bHasXOffVirtual */ |
1957 | 0 | args.dfSrcYDelta = |
1958 | 0 | dfYOff - nYOff; /* == 0 if bHasYOffVirtual */ |
1959 | 0 | args.eWrkDataType = eWrkDataType; |
1960 | 0 | args.pabyChunkNodataMask = bNoDataMaskFullyOpaque |
1961 | 0 | ? nullptr |
1962 | 0 | : pabyChunkNoDataMask; |
1963 | 0 | args.nChunkXOff = |
1964 | 0 | nChunkXOffQueried - (bHasXOffVirtual ? 0 : nXOff); |
1965 | 0 | args.nChunkXSize = nChunkXSizeQueried; |
1966 | 0 | args.nChunkYOff = |
1967 | 0 | nChunkYOffQueried - (bHasYOffVirtual ? 0 : nYOff); |
1968 | 0 | args.nChunkYSize = nChunkYSizeQueried; |
1969 | 0 | args.nDstXOff = nDstXOff + nDestXOffVirtual; |
1970 | 0 | args.nDstXOff2 = |
1971 | 0 | nDstXOff + nDestXOffVirtual + nDstXCount; |
1972 | 0 | args.nDstYOff = nDstYOff + nDestYOffVirtual; |
1973 | 0 | args.nDstYOff2 = |
1974 | 0 | nDstYOff + nDestYOffVirtual + nDstYCount; |
1975 | 0 | args.pszResampling = pszResampling; |
1976 | 0 | args.bHasNoData = false; |
1977 | 0 | args.dfNoDataValue = 0.0; |
1978 | 0 | args.poColorTable = nullptr; |
1979 | 0 | args.bPropagateNoData = bPropagateNoData; |
1980 | |
|
1981 | 0 | eErr = |
1982 | 0 | pfnResampleFunc(args, |
1983 | 0 | reinterpret_cast<GByte *>(pChunk) + |
1984 | 0 | i * nChunkBandOffset, |
1985 | 0 | &pDstBuffer, &eDstBufferDataType); |
1986 | 0 | if (eErr == CE_None) |
1987 | 0 | { |
1988 | 0 | eErr = poMEMBand->RasterIO( |
1989 | 0 | GF_Write, nDstXOff + nDestXOffVirtual, |
1990 | 0 | nDstYOff + nDestYOffVirtual, nDstXCount, |
1991 | 0 | nDstYCount, pDstBuffer, nDstXCount, nDstYCount, |
1992 | 0 | eDstBufferDataType, 0, 0, nullptr); |
1993 | 0 | } |
1994 | 0 | CPLFree(pDstBuffer); |
1995 | 0 | } |
1996 | 0 | } |
1997 | |
|
1998 | 0 | nBlocksDone++; |
1999 | 0 | if (eErr == CE_None && psExtraArg->pfnProgress != nullptr && |
2000 | 0 | !psExtraArg->pfnProgress(1.0 * nBlocksDone / nTotalBlocks, |
2001 | 0 | "", psExtraArg->pProgressData)) |
2002 | 0 | { |
2003 | 0 | eErr = CE_Failure; |
2004 | 0 | } |
2005 | 0 | } |
2006 | 0 | } |
2007 | | |
2008 | 0 | CPLFree(pChunk); |
2009 | 0 | CPLFree(pabyChunkNoDataMask); |
2010 | 0 | } |
2011 | | |
2012 | 0 | CPLFree(papoDstBands); |
2013 | 0 | GDALClose(poMEMDS); |
2014 | |
|
2015 | 0 | return eErr; |
2016 | 0 | } |
2017 | | |
2018 | | //! @endcond |
2019 | | |
2020 | | /************************************************************************/ |
2021 | | /* GDALSwapWords() */ |
2022 | | /************************************************************************/ |
2023 | | |
2024 | | /** |
2025 | | * Byte swap words in-place. |
2026 | | * |
2027 | | * This function will byte swap a set of 2, 4 or 8 byte words "in place" in |
2028 | | * a memory array. No assumption is made that the words being swapped are |
2029 | | * word aligned in memory. Use the CPL_LSB and CPL_MSB macros from cpl_port.h |
2030 | | * to determine if the current platform is big endian or little endian. Use |
2031 | | * The macros like CPL_SWAP32() to byte swap single values without the overhead |
2032 | | * of a function call. |
2033 | | * |
2034 | | * @param pData pointer to start of data buffer. |
2035 | | * @param nWordSize size of words being swapped in bytes. Normally 2, 4 or 8. |
2036 | | * @param nWordCount the number of words to be swapped in this call. |
2037 | | * @param nWordSkip the byte offset from the start of one word to the start of |
2038 | | * the next. For packed buffers this is the same as nWordSize. |
2039 | | */ |
2040 | | |
2041 | | void CPL_STDCALL GDALSwapWords(void *pData, int nWordSize, int nWordCount, |
2042 | | int nWordSkip) |
2043 | | |
2044 | 0 | { |
2045 | 0 | if (nWordCount > 0) |
2046 | 0 | VALIDATE_POINTER0(pData, "GDALSwapWords"); |
2047 | | |
2048 | 0 | GByte *pabyData = static_cast<GByte *>(pData); |
2049 | |
|
2050 | 0 | switch (nWordSize) |
2051 | 0 | { |
2052 | 0 | case 1: |
2053 | 0 | break; |
2054 | | |
2055 | 0 | case 2: |
2056 | 0 | CPLAssert(nWordSkip >= 2 || nWordCount == 1); |
2057 | 0 | for (int i = 0; i < nWordCount; i++) |
2058 | 0 | { |
2059 | 0 | CPL_SWAP16PTR(pabyData); |
2060 | 0 | pabyData += nWordSkip; |
2061 | 0 | } |
2062 | 0 | break; |
2063 | | |
2064 | 0 | case 4: |
2065 | 0 | CPLAssert(nWordSkip >= 4 || nWordCount == 1); |
2066 | 0 | if (CPL_IS_ALIGNED(pabyData, 4) && (nWordSkip % 4) == 0) |
2067 | 0 | { |
2068 | 0 | for (int i = 0; i < nWordCount; i++) |
2069 | 0 | { |
2070 | 0 | *reinterpret_cast<GUInt32 *>(pabyData) = CPL_SWAP32( |
2071 | 0 | *reinterpret_cast<const GUInt32 *>(pabyData)); |
2072 | 0 | pabyData += nWordSkip; |
2073 | 0 | } |
2074 | 0 | } |
2075 | 0 | else |
2076 | 0 | { |
2077 | 0 | for (int i = 0; i < nWordCount; i++) |
2078 | 0 | { |
2079 | 0 | CPL_SWAP32PTR(pabyData); |
2080 | 0 | pabyData += nWordSkip; |
2081 | 0 | } |
2082 | 0 | } |
2083 | 0 | break; |
2084 | | |
2085 | 0 | case 8: |
2086 | 0 | CPLAssert(nWordSkip >= 8 || nWordCount == 1); |
2087 | 0 | if (CPL_IS_ALIGNED(pabyData, 8) && (nWordSkip % 8) == 0) |
2088 | 0 | { |
2089 | 0 | for (int i = 0; i < nWordCount; i++) |
2090 | 0 | { |
2091 | 0 | *reinterpret_cast<GUInt64 *>(pabyData) = CPL_SWAP64( |
2092 | 0 | *reinterpret_cast<const GUInt64 *>(pabyData)); |
2093 | 0 | pabyData += nWordSkip; |
2094 | 0 | } |
2095 | 0 | } |
2096 | 0 | else |
2097 | 0 | { |
2098 | 0 | for (int i = 0; i < nWordCount; i++) |
2099 | 0 | { |
2100 | 0 | CPL_SWAP64PTR(pabyData); |
2101 | 0 | pabyData += nWordSkip; |
2102 | 0 | } |
2103 | 0 | } |
2104 | 0 | break; |
2105 | | |
2106 | 0 | default: |
2107 | 0 | CPLAssert(false); |
2108 | 0 | } |
2109 | 0 | } |
2110 | | |
2111 | | /************************************************************************/ |
2112 | | /* GDALSwapWordsEx() */ |
2113 | | /************************************************************************/ |
2114 | | |
2115 | | /** |
2116 | | * Byte swap words in-place. |
2117 | | * |
2118 | | * This function will byte swap a set of 2, 4 or 8 byte words "in place" in |
2119 | | * a memory array. No assumption is made that the words being swapped are |
2120 | | * word aligned in memory. Use the CPL_LSB and CPL_MSB macros from cpl_port.h |
2121 | | * to determine if the current platform is big endian or little endian. Use |
2122 | | * The macros like CPL_SWAP32() to byte swap single values without the overhead |
2123 | | * of a function call. |
2124 | | * |
2125 | | * @param pData pointer to start of data buffer. |
2126 | | * @param nWordSize size of words being swapped in bytes. Normally 2, 4 or 8. |
2127 | | * @param nWordCount the number of words to be swapped in this call. |
2128 | | * @param nWordSkip the byte offset from the start of one word to the start of |
2129 | | * the next. For packed buffers this is the same as nWordSize. |
2130 | | */ |
2131 | | void CPL_STDCALL GDALSwapWordsEx(void *pData, int nWordSize, size_t nWordCount, |
2132 | | int nWordSkip) |
2133 | 0 | { |
2134 | 0 | GByte *pabyData = static_cast<GByte *>(pData); |
2135 | 0 | while (nWordCount) |
2136 | 0 | { |
2137 | | // Pick-up a multiple of 8 as max chunk size. |
2138 | 0 | const int nWordCountSmall = |
2139 | 0 | (nWordCount > (1 << 30)) ? (1 << 30) : static_cast<int>(nWordCount); |
2140 | 0 | GDALSwapWords(pabyData, nWordSize, nWordCountSmall, nWordSkip); |
2141 | 0 | pabyData += static_cast<size_t>(nWordSkip) * nWordCountSmall; |
2142 | 0 | nWordCount -= nWordCountSmall; |
2143 | 0 | } |
2144 | 0 | } |
2145 | | |
2146 | | // Place the new GDALCopyWords helpers in an anonymous namespace |
2147 | | namespace |
2148 | | { |
2149 | | |
2150 | | /************************************************************************/ |
2151 | | /* GDALCopyWordsT() */ |
2152 | | /************************************************************************/ |
2153 | | /** |
2154 | | * Template function, used to copy data from pSrcData into buffer |
2155 | | * pDstData, with stride nSrcPixelStride in the source data and |
2156 | | * stride nDstPixelStride in the destination data. This template can |
2157 | | * deal with the case where the input data type is real or complex and |
2158 | | * the output is real. |
2159 | | * |
2160 | | * @param pSrcData the source data buffer |
2161 | | * @param nSrcPixelStride the stride, in the buffer pSrcData for pixels |
2162 | | * of interest. |
2163 | | * @param pDstData the destination buffer. |
2164 | | * @param nDstPixelStride the stride in the buffer pDstData for pixels of |
2165 | | * interest. |
2166 | | * @param nWordCount the total number of pixel words to copy |
2167 | | * |
2168 | | * @code |
2169 | | * // Assume an input buffer of type GUInt16 named pBufferIn |
2170 | | * GByte *pBufferOut = new GByte[numBytesOut]; |
2171 | | * GDALCopyWordsT<GUInt16, GByte>(pSrcData, 2, pDstData, 1, numBytesOut); |
2172 | | * @endcode |
2173 | | * @note |
2174 | | * This is a private function, and should not be exposed outside of |
2175 | | * rasterio.cpp. External users should call the GDALCopyWords driver function. |
2176 | | */ |
2177 | | |
2178 | | template <class Tin, class Tout> |
2179 | | static void inline GDALCopyWordsGenericT(const Tin *const CPL_RESTRICT pSrcData, |
2180 | | int nSrcPixelStride, |
2181 | | Tout *const CPL_RESTRICT pDstData, |
2182 | | int nDstPixelStride, |
2183 | | GPtrDiff_t nWordCount) |
2184 | 0 | { |
2185 | 0 | decltype(nWordCount) nDstOffset = 0; |
2186 | |
|
2187 | 0 | const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData); |
2188 | 0 | char *const pDstDataPtr = reinterpret_cast<char *>(pDstData); |
2189 | 0 | for (decltype(nWordCount) n = 0; n < nWordCount; n++) |
2190 | 0 | { |
2191 | 0 | const Tin tValue = |
2192 | 0 | *reinterpret_cast<const Tin *>(pSrcDataPtr + (n * nSrcPixelStride)); |
2193 | 0 | Tout *const pOutPixel = |
2194 | 0 | reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset); |
2195 | |
|
2196 | 0 | GDALCopyWord(tValue, *pOutPixel); |
2197 | |
|
2198 | 0 | nDstOffset += nDstPixelStride; |
2199 | 0 | } |
2200 | 0 | } Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, unsigned char>(unsigned char const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, signed char>(unsigned char const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, unsigned short>(unsigned char const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, short>(unsigned char const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, unsigned int>(unsigned char const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, int>(unsigned char const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, unsigned long>(unsigned char const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, long>(unsigned char const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, cpl::Float16>(unsigned char const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, float>(unsigned char const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, double>(unsigned char const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, unsigned char>(signed char const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, signed char>(signed char const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, unsigned short>(signed char const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, short>(signed char const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, unsigned int>(signed char const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, int>(signed char const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, unsigned long>(signed char const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, long>(signed char const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, cpl::Float16>(signed char const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, float>(signed char const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, double>(signed char const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, unsigned char>(unsigned short const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, signed char>(unsigned short const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, unsigned short>(unsigned short const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, short>(unsigned short const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, unsigned int>(unsigned short const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, int>(unsigned short const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, unsigned long>(unsigned short const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, long>(unsigned short const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, cpl::Float16>(unsigned short const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, float>(unsigned short const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, double>(unsigned short const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, unsigned char>(short const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, signed char>(short const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, unsigned short>(short const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, short>(short const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, unsigned int>(short const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, int>(short const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, unsigned long>(short const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, long>(short const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, cpl::Float16>(short const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, float>(short const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, double>(short const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, unsigned char>(unsigned int const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, signed char>(unsigned int const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, unsigned short>(unsigned int const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, short>(unsigned int const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, unsigned int>(unsigned int const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, int>(unsigned int const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, unsigned long>(unsigned int const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, long>(unsigned int const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, cpl::Float16>(unsigned int const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, float>(unsigned int const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, double>(unsigned int const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, unsigned char>(int const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, signed char>(int const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, unsigned short>(int const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, short>(int const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, unsigned int>(int const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, int>(int const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, unsigned long>(int const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, long>(int const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, cpl::Float16>(int const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, float>(int const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, double>(int const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, unsigned char>(unsigned long const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, signed char>(unsigned long const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, unsigned short>(unsigned long const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, short>(unsigned long const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, unsigned int>(unsigned long const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, int>(unsigned long const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, unsigned long>(unsigned long const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, long>(unsigned long const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, cpl::Float16>(unsigned long const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, float>(unsigned long const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, double>(unsigned long const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, unsigned char>(long const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, signed char>(long const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, unsigned short>(long const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, short>(long const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, unsigned int>(long const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, int>(long const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, unsigned long>(long const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, long>(long const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, cpl::Float16>(long const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, float>(long const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, double>(long const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, unsigned char>(cpl::Float16 const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, signed char>(cpl::Float16 const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, unsigned short>(cpl::Float16 const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, short>(cpl::Float16 const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, unsigned int>(cpl::Float16 const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, int>(cpl::Float16 const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, unsigned long>(cpl::Float16 const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, long>(cpl::Float16 const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, cpl::Float16>(cpl::Float16 const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<float, unsigned int>(float const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<float, unsigned long>(float const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<float, long>(float const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<float, float>(float const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, signed char>(double const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, short>(double const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, unsigned int>(double const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, int>(double const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, unsigned long>(double const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, long>(double const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, double>(double const*, int, double*, int, long long) |
2201 | | |
2202 | | template <class Tin, class Tout> |
2203 | | static void CPL_NOINLINE GDALCopyWordsT(const Tin *const CPL_RESTRICT pSrcData, |
2204 | | int nSrcPixelStride, |
2205 | | Tout *const CPL_RESTRICT pDstData, |
2206 | | int nDstPixelStride, |
2207 | | GPtrDiff_t nWordCount) |
2208 | 0 | { |
2209 | 0 | GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, nDstPixelStride, |
2210 | 0 | nWordCount); |
2211 | 0 | } Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned char, unsigned char>(unsigned char const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned char, unsigned long>(unsigned char const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned char, long>(unsigned char const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned char, cpl::Float16>(unsigned char const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, signed char>(signed char const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, unsigned short>(signed char const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, short>(signed char const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, unsigned int>(signed char const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, int>(signed char const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, unsigned long>(signed char const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, long>(signed char const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, cpl::Float16>(signed char const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, float>(signed char const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, double>(signed char const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned short, signed char>(unsigned short const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned short, unsigned short>(unsigned short const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned short, unsigned int>(unsigned short const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned short, int>(unsigned short const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned short, unsigned long>(unsigned short const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned short, long>(unsigned short const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned short, cpl::Float16>(unsigned short const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, unsigned char>(short const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, signed char>(short const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, short>(short const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, unsigned int>(short const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, int>(short const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, unsigned long>(short const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, long>(short const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, cpl::Float16>(short const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, unsigned char>(unsigned int const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, signed char>(unsigned int const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, unsigned short>(unsigned int const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, short>(unsigned int const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, unsigned int>(unsigned int const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, int>(unsigned int const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, unsigned long>(unsigned int const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, long>(unsigned int const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, cpl::Float16>(unsigned int const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, float>(unsigned int const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, double>(unsigned int const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, unsigned char>(int const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, signed char>(int const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, unsigned short>(int const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, short>(int const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, unsigned int>(int const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, int>(int const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, unsigned long>(int const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, long>(int const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, cpl::Float16>(int const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, float>(int const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, double>(int const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, unsigned char>(unsigned long const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, signed char>(unsigned long const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, unsigned short>(unsigned long const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, short>(unsigned long const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, unsigned int>(unsigned long const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, int>(unsigned long const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, unsigned long>(unsigned long const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, long>(unsigned long const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, cpl::Float16>(unsigned long const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, float>(unsigned long const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, double>(unsigned long const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, unsigned char>(long const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, signed char>(long const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, unsigned short>(long const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, short>(long const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, unsigned int>(long const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, int>(long const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, unsigned long>(long const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, long>(long const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, cpl::Float16>(long const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, float>(long const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, double>(long const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, unsigned char>(cpl::Float16 const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, signed char>(cpl::Float16 const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, unsigned short>(cpl::Float16 const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, short>(cpl::Float16 const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, unsigned int>(cpl::Float16 const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, int>(cpl::Float16 const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, unsigned long>(cpl::Float16 const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, long>(cpl::Float16 const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, cpl::Float16>(cpl::Float16 const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<float, unsigned int>(float const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<float, unsigned long>(float const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<float, long>(float const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<float, float>(float const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, signed char>(double const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, short>(double const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, unsigned int>(double const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, int>(double const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, unsigned long>(double const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, long>(double const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, double>(double const*, int, double*, int, long long) |
2212 | | |
2213 | | template <class Tin, class Tout> |
2214 | | static void inline GDALCopyWordsT_8atatime( |
2215 | | const Tin *const CPL_RESTRICT pSrcData, int nSrcPixelStride, |
2216 | | Tout *const CPL_RESTRICT pDstData, int nDstPixelStride, |
2217 | | GPtrDiff_t nWordCount) |
2218 | 0 | { |
2219 | 0 | decltype(nWordCount) nDstOffset = 0; |
2220 | |
|
2221 | 0 | const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData); |
2222 | 0 | char *const pDstDataPtr = reinterpret_cast<char *>(pDstData); |
2223 | 0 | decltype(nWordCount) n = 0; |
2224 | 0 | if (nSrcPixelStride == static_cast<int>(sizeof(Tin)) && |
2225 | 0 | nDstPixelStride == static_cast<int>(sizeof(Tout))) |
2226 | 0 | { |
2227 | 0 | for (; n < nWordCount - 7; n += 8) |
2228 | 0 | { |
2229 | 0 | const Tin *pInValues = reinterpret_cast<const Tin *>( |
2230 | 0 | pSrcDataPtr + (n * nSrcPixelStride)); |
2231 | 0 | Tout *const pOutPixels = |
2232 | 0 | reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset); |
2233 | |
|
2234 | 0 | GDALCopy8Words(pInValues, pOutPixels); |
2235 | |
|
2236 | 0 | nDstOffset += 8 * nDstPixelStride; |
2237 | 0 | } |
2238 | 0 | } |
2239 | 0 | for (; n < nWordCount; n++) |
2240 | 0 | { |
2241 | 0 | const Tin tValue = |
2242 | 0 | *reinterpret_cast<const Tin *>(pSrcDataPtr + (n * nSrcPixelStride)); |
2243 | 0 | Tout *const pOutPixel = |
2244 | 0 | reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset); |
2245 | |
|
2246 | 0 | GDALCopyWord(tValue, *pOutPixel); |
2247 | |
|
2248 | 0 | nDstOffset += nDstPixelStride; |
2249 | 0 | } |
2250 | 0 | } Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<cpl::Float16, float>(cpl::Float16 const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<cpl::Float16, double>(cpl::Float16 const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<float, unsigned char>(float const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<float, signed char>(float const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<float, unsigned short>(float const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<float, short>(float const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<float, int>(float const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<float, cpl::Float16>(float const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<float, double>(float const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<double, unsigned char>(double const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<double, unsigned short>(double const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<double, cpl::Float16>(double const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<double, float>(double const*, int, float*, int, long long) |
2251 | | |
2252 | | #ifdef HAVE_SSE2 |
2253 | | |
2254 | | template <class Tout> |
2255 | | void GDALCopyWordsByteTo16Bit(const GByte *const CPL_RESTRICT pSrcData, |
2256 | | int nSrcPixelStride, |
2257 | | Tout *const CPL_RESTRICT pDstData, |
2258 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2259 | 0 | { |
2260 | 0 | static_assert(std::is_integral<Tout>::value && |
2261 | 0 | sizeof(Tout) == sizeof(uint16_t), |
2262 | 0 | "Bad Tout"); |
2263 | 0 | if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) && |
2264 | 0 | nDstPixelStride == static_cast<int>(sizeof(*pDstData))) |
2265 | 0 | { |
2266 | 0 | decltype(nWordCount) n = 0; |
2267 | 0 | const __m128i xmm_zero = _mm_setzero_si128(); |
2268 | 0 | GByte *CPL_RESTRICT pabyDstDataPtr = |
2269 | 0 | reinterpret_cast<GByte *>(pDstData); |
2270 | 0 | for (; n < nWordCount - 15; n += 16) |
2271 | 0 | { |
2272 | 0 | __m128i xmm = _mm_loadu_si128( |
2273 | 0 | reinterpret_cast<const __m128i *>(pSrcData + n)); |
2274 | 0 | __m128i xmm0 = _mm_unpacklo_epi8(xmm, xmm_zero); |
2275 | 0 | __m128i xmm1 = _mm_unpackhi_epi8(xmm, xmm_zero); |
2276 | 0 | _mm_storeu_si128( |
2277 | 0 | reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 2), xmm0); |
2278 | 0 | _mm_storeu_si128( |
2279 | 0 | reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 2 + 16), xmm1); |
2280 | 0 | } |
2281 | 0 | for (; n < nWordCount; n++) |
2282 | 0 | { |
2283 | 0 | pDstData[n] = pSrcData[n]; |
2284 | 0 | } |
2285 | 0 | } |
2286 | 0 | else |
2287 | 0 | { |
2288 | 0 | GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, |
2289 | 0 | nDstPixelStride, nWordCount); |
2290 | 0 | } |
2291 | 0 | } Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsByteTo16Bit<unsigned short>(unsigned char const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsByteTo16Bit<short>(unsigned char const*, int, short*, int, long long) |
2292 | | |
2293 | | template <> |
2294 | | CPL_NOINLINE void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData, |
2295 | | int nSrcPixelStride, |
2296 | | GUInt16 *const CPL_RESTRICT pDstData, |
2297 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2298 | 0 | { |
2299 | 0 | GDALCopyWordsByteTo16Bit(pSrcData, nSrcPixelStride, pDstData, |
2300 | 0 | nDstPixelStride, nWordCount); |
2301 | 0 | } |
2302 | | |
2303 | | template <> |
2304 | | CPL_NOINLINE void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData, |
2305 | | int nSrcPixelStride, |
2306 | | GInt16 *const CPL_RESTRICT pDstData, |
2307 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2308 | 0 | { |
2309 | 0 | GDALCopyWordsByteTo16Bit(pSrcData, nSrcPixelStride, pDstData, |
2310 | 0 | nDstPixelStride, nWordCount); |
2311 | 0 | } |
2312 | | |
2313 | | template <class Tout> |
2314 | | void GDALCopyWordsByteTo32Bit(const GByte *const CPL_RESTRICT pSrcData, |
2315 | | int nSrcPixelStride, |
2316 | | Tout *const CPL_RESTRICT pDstData, |
2317 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2318 | 0 | { |
2319 | 0 | static_assert(std::is_integral<Tout>::value && |
2320 | 0 | sizeof(Tout) == sizeof(uint32_t), |
2321 | 0 | "Bad Tout"); |
2322 | 0 | if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) && |
2323 | 0 | nDstPixelStride == static_cast<int>(sizeof(*pDstData))) |
2324 | 0 | { |
2325 | 0 | decltype(nWordCount) n = 0; |
2326 | 0 | const __m128i xmm_zero = _mm_setzero_si128(); |
2327 | 0 | GByte *CPL_RESTRICT pabyDstDataPtr = |
2328 | 0 | reinterpret_cast<GByte *>(pDstData); |
2329 | 0 | for (; n < nWordCount - 15; n += 16) |
2330 | 0 | { |
2331 | 0 | __m128i xmm = _mm_loadu_si128( |
2332 | 0 | reinterpret_cast<const __m128i *>(pSrcData + n)); |
2333 | 0 | __m128i xmm_low = _mm_unpacklo_epi8(xmm, xmm_zero); |
2334 | 0 | __m128i xmm_high = _mm_unpackhi_epi8(xmm, xmm_zero); |
2335 | 0 | __m128i xmm0 = _mm_unpacklo_epi16(xmm_low, xmm_zero); |
2336 | 0 | __m128i xmm1 = _mm_unpackhi_epi16(xmm_low, xmm_zero); |
2337 | 0 | __m128i xmm2 = _mm_unpacklo_epi16(xmm_high, xmm_zero); |
2338 | 0 | __m128i xmm3 = _mm_unpackhi_epi16(xmm_high, xmm_zero); |
2339 | 0 | _mm_storeu_si128( |
2340 | 0 | reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4), xmm0); |
2341 | 0 | _mm_storeu_si128( |
2342 | 0 | reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4 + 16), xmm1); |
2343 | 0 | _mm_storeu_si128( |
2344 | 0 | reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4 + 32), xmm2); |
2345 | 0 | _mm_storeu_si128( |
2346 | 0 | reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4 + 48), xmm3); |
2347 | 0 | } |
2348 | 0 | for (; n < nWordCount; n++) |
2349 | 0 | { |
2350 | 0 | pDstData[n] = pSrcData[n]; |
2351 | 0 | } |
2352 | 0 | } |
2353 | 0 | else |
2354 | 0 | { |
2355 | 0 | GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, |
2356 | 0 | nDstPixelStride, nWordCount); |
2357 | 0 | } |
2358 | 0 | } Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsByteTo32Bit<unsigned int>(unsigned char const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsByteTo32Bit<int>(unsigned char const*, int, int*, int, long long) |
2359 | | |
2360 | | template <> |
2361 | | CPL_NOINLINE void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData, |
2362 | | int nSrcPixelStride, |
2363 | | GUInt32 *const CPL_RESTRICT pDstData, |
2364 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2365 | 0 | { |
2366 | 0 | GDALCopyWordsByteTo32Bit(pSrcData, nSrcPixelStride, pDstData, |
2367 | 0 | nDstPixelStride, nWordCount); |
2368 | 0 | } |
2369 | | |
2370 | | template <> |
2371 | | CPL_NOINLINE void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData, |
2372 | | int nSrcPixelStride, |
2373 | | GInt32 *const CPL_RESTRICT pDstData, |
2374 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2375 | 0 | { |
2376 | 0 | GDALCopyWordsByteTo32Bit(pSrcData, nSrcPixelStride, pDstData, |
2377 | 0 | nDstPixelStride, nWordCount); |
2378 | 0 | } |
2379 | | |
2380 | | template <> |
2381 | | CPL_NOINLINE void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData, |
2382 | | int nSrcPixelStride, |
2383 | | float *const CPL_RESTRICT pDstData, |
2384 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2385 | 0 | { |
2386 | 0 | if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) && |
2387 | 0 | nDstPixelStride == static_cast<int>(sizeof(*pDstData))) |
2388 | 0 | { |
2389 | 0 | decltype(nWordCount) n = 0; |
2390 | 0 | const __m128i xmm_zero = _mm_setzero_si128(); |
2391 | 0 | GByte *CPL_RESTRICT pabyDstDataPtr = |
2392 | 0 | reinterpret_cast<GByte *>(pDstData); |
2393 | 0 | for (; n < nWordCount - 15; n += 16) |
2394 | 0 | { |
2395 | 0 | __m128i xmm = _mm_loadu_si128( |
2396 | 0 | reinterpret_cast<const __m128i *>(pSrcData + n)); |
2397 | 0 | __m128i xmm_low = _mm_unpacklo_epi8(xmm, xmm_zero); |
2398 | 0 | __m128i xmm_high = _mm_unpackhi_epi8(xmm, xmm_zero); |
2399 | 0 | __m128i xmm0 = _mm_unpacklo_epi16(xmm_low, xmm_zero); |
2400 | 0 | __m128i xmm1 = _mm_unpackhi_epi16(xmm_low, xmm_zero); |
2401 | 0 | __m128i xmm2 = _mm_unpacklo_epi16(xmm_high, xmm_zero); |
2402 | 0 | __m128i xmm3 = _mm_unpackhi_epi16(xmm_high, xmm_zero); |
2403 | 0 | __m128 xmm0_f = _mm_cvtepi32_ps(xmm0); |
2404 | 0 | __m128 xmm1_f = _mm_cvtepi32_ps(xmm1); |
2405 | 0 | __m128 xmm2_f = _mm_cvtepi32_ps(xmm2); |
2406 | 0 | __m128 xmm3_f = _mm_cvtepi32_ps(xmm3); |
2407 | 0 | _mm_storeu_ps(reinterpret_cast<float *>(pabyDstDataPtr + n * 4), |
2408 | 0 | xmm0_f); |
2409 | 0 | _mm_storeu_ps( |
2410 | 0 | reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 16), xmm1_f); |
2411 | 0 | _mm_storeu_ps( |
2412 | 0 | reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 32), xmm2_f); |
2413 | 0 | _mm_storeu_ps( |
2414 | 0 | reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 48), xmm3_f); |
2415 | 0 | } |
2416 | 0 | for (; n < nWordCount; n++) |
2417 | 0 | { |
2418 | 0 | pDstData[n] = pSrcData[n]; |
2419 | 0 | } |
2420 | 0 | } |
2421 | 0 | else |
2422 | 0 | { |
2423 | 0 | GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, |
2424 | 0 | nDstPixelStride, nWordCount); |
2425 | 0 | } |
2426 | 0 | } |
2427 | | |
2428 | | template <> |
2429 | | CPL_NOINLINE void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData, |
2430 | | int nSrcPixelStride, |
2431 | | double *const CPL_RESTRICT pDstData, |
2432 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2433 | 0 | { |
2434 | 0 | if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) && |
2435 | 0 | nDstPixelStride == static_cast<int>(sizeof(*pDstData))) |
2436 | 0 | { |
2437 | 0 | decltype(nWordCount) n = 0; |
2438 | 0 | const __m128i xmm_zero = _mm_setzero_si128(); |
2439 | 0 | GByte *CPL_RESTRICT pabyDstDataPtr = |
2440 | 0 | reinterpret_cast<GByte *>(pDstData); |
2441 | 0 | for (; n < nWordCount - 15; n += 16) |
2442 | 0 | { |
2443 | 0 | __m128i xmm = _mm_loadu_si128( |
2444 | 0 | reinterpret_cast<const __m128i *>(pSrcData + n)); |
2445 | 0 | __m128i xmm_low = _mm_unpacklo_epi8(xmm, xmm_zero); |
2446 | 0 | __m128i xmm_high = _mm_unpackhi_epi8(xmm, xmm_zero); |
2447 | 0 | __m128i xmm0 = _mm_unpacklo_epi16(xmm_low, xmm_zero); |
2448 | 0 | __m128i xmm1 = _mm_unpackhi_epi16(xmm_low, xmm_zero); |
2449 | 0 | __m128i xmm2 = _mm_unpacklo_epi16(xmm_high, xmm_zero); |
2450 | 0 | __m128i xmm3 = _mm_unpackhi_epi16(xmm_high, xmm_zero); |
2451 | |
|
2452 | | #if defined(__AVX2__) && defined(slightly_slower_than_SSE2) |
2453 | | _mm256_storeu_pd(reinterpret_cast<double *>(pabyDstDataPtr + n * 8), |
2454 | | _mm256_cvtepi32_pd(xmm0)); |
2455 | | _mm256_storeu_pd( |
2456 | | reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 32), |
2457 | | _mm256_cvtepi32_pd(xmm1)); |
2458 | | _mm256_storeu_pd( |
2459 | | reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 64), |
2460 | | _mm256_cvtepi32_pd(xmm2)); |
2461 | | _mm256_storeu_pd( |
2462 | | reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 96), |
2463 | | _mm256_cvtepi32_pd(xmm3)); |
2464 | | #else |
2465 | 0 | __m128d xmm0_low_d = _mm_cvtepi32_pd(xmm0); |
2466 | 0 | __m128d xmm1_low_d = _mm_cvtepi32_pd(xmm1); |
2467 | 0 | __m128d xmm2_low_d = _mm_cvtepi32_pd(xmm2); |
2468 | 0 | __m128d xmm3_low_d = _mm_cvtepi32_pd(xmm3); |
2469 | 0 | xmm0 = _mm_srli_si128(xmm0, 8); |
2470 | 0 | xmm1 = _mm_srli_si128(xmm1, 8); |
2471 | 0 | xmm2 = _mm_srli_si128(xmm2, 8); |
2472 | 0 | xmm3 = _mm_srli_si128(xmm3, 8); |
2473 | 0 | __m128d xmm0_high_d = _mm_cvtepi32_pd(xmm0); |
2474 | 0 | __m128d xmm1_high_d = _mm_cvtepi32_pd(xmm1); |
2475 | 0 | __m128d xmm2_high_d = _mm_cvtepi32_pd(xmm2); |
2476 | 0 | __m128d xmm3_high_d = _mm_cvtepi32_pd(xmm3); |
2477 | |
|
2478 | 0 | _mm_storeu_pd(reinterpret_cast<double *>(pabyDstDataPtr + n * 8), |
2479 | 0 | xmm0_low_d); |
2480 | 0 | _mm_storeu_pd( |
2481 | 0 | reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 16), |
2482 | 0 | xmm0_high_d); |
2483 | 0 | _mm_storeu_pd( |
2484 | 0 | reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 32), |
2485 | 0 | xmm1_low_d); |
2486 | 0 | _mm_storeu_pd( |
2487 | 0 | reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 48), |
2488 | 0 | xmm1_high_d); |
2489 | 0 | _mm_storeu_pd( |
2490 | 0 | reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 64), |
2491 | 0 | xmm2_low_d); |
2492 | 0 | _mm_storeu_pd( |
2493 | 0 | reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 80), |
2494 | 0 | xmm2_high_d); |
2495 | 0 | _mm_storeu_pd( |
2496 | 0 | reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 96), |
2497 | 0 | xmm3_low_d); |
2498 | 0 | _mm_storeu_pd( |
2499 | 0 | reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 112), |
2500 | 0 | xmm3_high_d); |
2501 | 0 | #endif |
2502 | 0 | } |
2503 | 0 | for (; n < nWordCount; n++) |
2504 | 0 | { |
2505 | 0 | pDstData[n] = pSrcData[n]; |
2506 | 0 | } |
2507 | 0 | } |
2508 | 0 | else |
2509 | 0 | { |
2510 | 0 | GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, |
2511 | 0 | nDstPixelStride, nWordCount); |
2512 | 0 | } |
2513 | 0 | } |
2514 | | |
2515 | | template <> |
2516 | | CPL_NOINLINE void GDALCopyWordsT(const uint8_t *const CPL_RESTRICT pSrcData, |
2517 | | int nSrcPixelStride, |
2518 | | int8_t *const CPL_RESTRICT pDstData, |
2519 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2520 | 0 | { |
2521 | 0 | if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) && |
2522 | 0 | nDstPixelStride == static_cast<int>(sizeof(*pDstData))) |
2523 | 0 | { |
2524 | 0 | decltype(nWordCount) n = 0; |
2525 | 0 | const __m128i xmm_127 = _mm_set1_epi8(127); |
2526 | 0 | for (; n < nWordCount - 31; n += 32) |
2527 | 0 | { |
2528 | 0 | __m128i xmm0 = _mm_loadu_si128( |
2529 | 0 | reinterpret_cast<const __m128i *>(pSrcData + n)); |
2530 | 0 | __m128i xmm1 = _mm_loadu_si128( |
2531 | 0 | reinterpret_cast<const __m128i *>(pSrcData + n + 16)); |
2532 | 0 | xmm0 = _mm_min_epu8(xmm0, xmm_127); |
2533 | 0 | xmm1 = _mm_min_epu8(xmm1, xmm_127); |
2534 | 0 | _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n), xmm0); |
2535 | 0 | _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n + 16), |
2536 | 0 | xmm1); |
2537 | 0 | } |
2538 | 0 | for (; n < nWordCount; n++) |
2539 | 0 | { |
2540 | 0 | pDstData[n] = |
2541 | 0 | pSrcData[n] >= 127 ? 127 : static_cast<int8_t>(pSrcData[n]); |
2542 | 0 | } |
2543 | 0 | } |
2544 | 0 | else |
2545 | 0 | { |
2546 | 0 | GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, |
2547 | 0 | nDstPixelStride, nWordCount); |
2548 | 0 | } |
2549 | 0 | } |
2550 | | |
2551 | | template <> |
2552 | | CPL_NOINLINE void GDALCopyWordsT(const int8_t *const CPL_RESTRICT pSrcData, |
2553 | | int nSrcPixelStride, |
2554 | | uint8_t *const CPL_RESTRICT pDstData, |
2555 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2556 | 0 | { |
2557 | 0 | if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) && |
2558 | 0 | nDstPixelStride == static_cast<int>(sizeof(*pDstData))) |
2559 | 0 | { |
2560 | 0 | decltype(nWordCount) n = 0; |
2561 | 0 | #if !(defined(__SSE4_1__) || defined(USE_NEON_OPTIMIZATIONS)) |
2562 | 0 | const __m128i xmm_INT8_to_UINT8 = _mm_set1_epi8(-128); |
2563 | 0 | #endif |
2564 | 0 | for (; n < nWordCount - 31; n += 32) |
2565 | 0 | { |
2566 | 0 | __m128i xmm0 = _mm_loadu_si128( |
2567 | 0 | reinterpret_cast<const __m128i *>(pSrcData + n)); |
2568 | 0 | __m128i xmm1 = _mm_loadu_si128( |
2569 | 0 | reinterpret_cast<const __m128i *>(pSrcData + n + 16)); |
2570 | | #if defined(__SSE4_1__) || defined(USE_NEON_OPTIMIZATIONS) |
2571 | | xmm0 = _mm_max_epi8(xmm0, _mm_setzero_si128()); |
2572 | | xmm1 = _mm_max_epi8(xmm1, _mm_setzero_si128()); |
2573 | | #else |
2574 | 0 | xmm0 = _mm_add_epi8(xmm0, xmm_INT8_to_UINT8); |
2575 | 0 | xmm1 = _mm_add_epi8(xmm1, xmm_INT8_to_UINT8); |
2576 | 0 | xmm0 = _mm_max_epu8(xmm0, xmm_INT8_to_UINT8); |
2577 | 0 | xmm1 = _mm_max_epu8(xmm1, xmm_INT8_to_UINT8); |
2578 | 0 | xmm0 = _mm_sub_epi8(xmm0, xmm_INT8_to_UINT8); |
2579 | 0 | xmm1 = _mm_sub_epi8(xmm1, xmm_INT8_to_UINT8); |
2580 | 0 | #endif |
2581 | 0 | _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n), xmm0); |
2582 | 0 | _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n + 16), |
2583 | 0 | xmm1); |
2584 | 0 | } |
2585 | 0 | for (; n < nWordCount; n++) |
2586 | 0 | { |
2587 | 0 | pDstData[n] = |
2588 | 0 | pSrcData[n] < 0 ? 0 : static_cast<uint8_t>(pSrcData[n]); |
2589 | 0 | } |
2590 | 0 | } |
2591 | 0 | else |
2592 | 0 | { |
2593 | 0 | GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, |
2594 | 0 | nDstPixelStride, nWordCount); |
2595 | 0 | } |
2596 | 0 | } |
2597 | | |
2598 | | template <> |
2599 | | CPL_NOINLINE void GDALCopyWordsT(const uint16_t *const CPL_RESTRICT pSrcData, |
2600 | | int nSrcPixelStride, |
2601 | | uint8_t *const CPL_RESTRICT pDstData, |
2602 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2603 | 0 | { |
2604 | 0 | if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) && |
2605 | 0 | nDstPixelStride == static_cast<int>(sizeof(*pDstData))) |
2606 | 0 | { |
2607 | 0 | decltype(nWordCount) n = 0; |
2608 | | #if defined(__SSE4_1__) || defined(USE_NEON_OPTIMIZATIONS) |
2609 | | const auto xmm_MAX_INT16 = _mm_set1_epi16(32767); |
2610 | | #else |
2611 | | // In SSE2, min_epu16 does not exist, so shift from |
2612 | | // UInt16 to SInt16 to be able to use min_epi16 |
2613 | 0 | const __m128i xmm_UINT16_to_INT16 = _mm_set1_epi16(-32768); |
2614 | 0 | const __m128i xmm_m255_shifted = _mm_set1_epi16(255 - 32768); |
2615 | 0 | #endif |
2616 | 0 | for (; n < nWordCount - 15; n += 16) |
2617 | 0 | { |
2618 | 0 | __m128i xmm0 = _mm_loadu_si128( |
2619 | 0 | reinterpret_cast<const __m128i *>(pSrcData + n)); |
2620 | 0 | __m128i xmm1 = _mm_loadu_si128( |
2621 | 0 | reinterpret_cast<const __m128i *>(pSrcData + n + 8)); |
2622 | | #if defined(__SSE4_1__) || defined(USE_NEON_OPTIMIZATIONS) |
2623 | | xmm0 = _mm_min_epu16(xmm0, xmm_MAX_INT16); |
2624 | | xmm1 = _mm_min_epu16(xmm1, xmm_MAX_INT16); |
2625 | | #else |
2626 | 0 | xmm0 = _mm_add_epi16(xmm0, xmm_UINT16_to_INT16); |
2627 | 0 | xmm1 = _mm_add_epi16(xmm1, xmm_UINT16_to_INT16); |
2628 | 0 | xmm0 = _mm_min_epi16(xmm0, xmm_m255_shifted); |
2629 | 0 | xmm1 = _mm_min_epi16(xmm1, xmm_m255_shifted); |
2630 | 0 | xmm0 = _mm_sub_epi16(xmm0, xmm_UINT16_to_INT16); |
2631 | 0 | xmm1 = _mm_sub_epi16(xmm1, xmm_UINT16_to_INT16); |
2632 | 0 | #endif |
2633 | 0 | xmm0 = _mm_packus_epi16(xmm0, xmm1); |
2634 | 0 | _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n), xmm0); |
2635 | 0 | } |
2636 | 0 | for (; n < nWordCount; n++) |
2637 | 0 | { |
2638 | 0 | pDstData[n] = |
2639 | 0 | pSrcData[n] >= 255 ? 255 : static_cast<uint8_t>(pSrcData[n]); |
2640 | 0 | } |
2641 | 0 | } |
2642 | 0 | else |
2643 | 0 | { |
2644 | 0 | GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, |
2645 | 0 | nDstPixelStride, nWordCount); |
2646 | 0 | } |
2647 | 0 | } |
2648 | | |
2649 | | template <> |
2650 | | CPL_NOINLINE void GDALCopyWordsT(const uint16_t *const CPL_RESTRICT pSrcData, |
2651 | | int nSrcPixelStride, |
2652 | | int16_t *const CPL_RESTRICT pDstData, |
2653 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2654 | 0 | { |
2655 | 0 | if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) && |
2656 | 0 | nDstPixelStride == static_cast<int>(sizeof(*pDstData))) |
2657 | 0 | { |
2658 | 0 | decltype(nWordCount) n = 0; |
2659 | | #if defined(__SSE4_1__) || defined(USE_NEON_OPTIMIZATIONS) |
2660 | | const __m128i xmm_MAX_INT16 = _mm_set1_epi16(32767); |
2661 | | #else |
2662 | | // In SSE2, min_epu16 does not exist, so shift from |
2663 | | // UInt16 to SInt16 to be able to use min_epi16 |
2664 | 0 | const __m128i xmm_UINT16_to_INT16 = _mm_set1_epi16(-32768); |
2665 | 0 | const __m128i xmm_32767_shifted = _mm_set1_epi16(32767 - 32768); |
2666 | 0 | #endif |
2667 | 0 | for (; n < nWordCount - 15; n += 16) |
2668 | 0 | { |
2669 | 0 | __m128i xmm0 = _mm_loadu_si128( |
2670 | 0 | reinterpret_cast<const __m128i *>(pSrcData + n)); |
2671 | 0 | __m128i xmm1 = _mm_loadu_si128( |
2672 | 0 | reinterpret_cast<const __m128i *>(pSrcData + n + 8)); |
2673 | | #if defined(__SSE4_1__) || defined(USE_NEON_OPTIMIZATIONS) |
2674 | | xmm0 = _mm_min_epu16(xmm0, xmm_MAX_INT16); |
2675 | | xmm1 = _mm_min_epu16(xmm1, xmm_MAX_INT16); |
2676 | | #else |
2677 | 0 | xmm0 = _mm_add_epi16(xmm0, xmm_UINT16_to_INT16); |
2678 | 0 | xmm1 = _mm_add_epi16(xmm1, xmm_UINT16_to_INT16); |
2679 | 0 | xmm0 = _mm_min_epi16(xmm0, xmm_32767_shifted); |
2680 | 0 | xmm1 = _mm_min_epi16(xmm1, xmm_32767_shifted); |
2681 | 0 | xmm0 = _mm_sub_epi16(xmm0, xmm_UINT16_to_INT16); |
2682 | 0 | xmm1 = _mm_sub_epi16(xmm1, xmm_UINT16_to_INT16); |
2683 | 0 | #endif |
2684 | 0 | _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n), xmm0); |
2685 | 0 | _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n + 8), |
2686 | 0 | xmm1); |
2687 | 0 | } |
2688 | 0 | for (; n < nWordCount; n++) |
2689 | 0 | { |
2690 | 0 | pDstData[n] = pSrcData[n] >= 32767 |
2691 | 0 | ? 32767 |
2692 | 0 | : static_cast<int16_t>(pSrcData[n]); |
2693 | 0 | } |
2694 | 0 | } |
2695 | 0 | else |
2696 | 0 | { |
2697 | 0 | GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, |
2698 | 0 | nDstPixelStride, nWordCount); |
2699 | 0 | } |
2700 | 0 | } |
2701 | | |
2702 | | template <> |
2703 | | CPL_NOINLINE void GDALCopyWordsT(const int16_t *const CPL_RESTRICT pSrcData, |
2704 | | int nSrcPixelStride, |
2705 | | uint16_t *const CPL_RESTRICT pDstData, |
2706 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2707 | 0 | { |
2708 | 0 | if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) && |
2709 | 0 | nDstPixelStride == static_cast<int>(sizeof(*pDstData))) |
2710 | 0 | { |
2711 | 0 | decltype(nWordCount) n = 0; |
2712 | 0 | const __m128i xmm_zero = _mm_setzero_si128(); |
2713 | 0 | for (; n < nWordCount - 15; n += 16) |
2714 | 0 | { |
2715 | 0 | __m128i xmm0 = _mm_loadu_si128( |
2716 | 0 | reinterpret_cast<const __m128i *>(pSrcData + n)); |
2717 | 0 | __m128i xmm1 = _mm_loadu_si128( |
2718 | 0 | reinterpret_cast<const __m128i *>(pSrcData + n + 8)); |
2719 | 0 | xmm0 = _mm_max_epi16(xmm0, xmm_zero); |
2720 | 0 | xmm1 = _mm_max_epi16(xmm1, xmm_zero); |
2721 | 0 | _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n), xmm0); |
2722 | 0 | _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n + 8), |
2723 | 0 | xmm1); |
2724 | 0 | } |
2725 | 0 | for (; n < nWordCount; n++) |
2726 | 0 | { |
2727 | 0 | pDstData[n] = |
2728 | 0 | pSrcData[n] < 0 ? 0 : static_cast<uint16_t>(pSrcData[n]); |
2729 | 0 | } |
2730 | 0 | } |
2731 | 0 | else |
2732 | 0 | { |
2733 | 0 | GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, |
2734 | 0 | nDstPixelStride, nWordCount); |
2735 | 0 | } |
2736 | 0 | } |
2737 | | |
2738 | | #if defined(__SSE4_1__) || defined(USE_NEON_OPTIMIZATIONS) |
2739 | | |
2740 | | template <> |
2741 | | CPL_NOINLINE void GDALCopyWordsT(const uint32_t *const CPL_RESTRICT pSrcData, |
2742 | | int nSrcPixelStride, |
2743 | | int32_t *const CPL_RESTRICT pDstData, |
2744 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2745 | | { |
2746 | | if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) && |
2747 | | nDstPixelStride == static_cast<int>(sizeof(*pDstData))) |
2748 | | { |
2749 | | decltype(nWordCount) n = 0; |
2750 | | const __m128i xmm_MAX_INT = _mm_set1_epi32(INT_MAX); |
2751 | | for (; n < nWordCount - 8; n += 7) |
2752 | | { |
2753 | | __m128i xmm0 = _mm_loadu_si128( |
2754 | | reinterpret_cast<const __m128i *>(pSrcData + n)); |
2755 | | __m128i xmm1 = _mm_loadu_si128( |
2756 | | reinterpret_cast<const __m128i *>(pSrcData + n + 4)); |
2757 | | xmm0 = _mm_min_epu32(xmm0, xmm_MAX_INT); |
2758 | | xmm1 = _mm_min_epu32(xmm1, xmm_MAX_INT); |
2759 | | _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n), xmm0); |
2760 | | _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n + 4), |
2761 | | xmm1); |
2762 | | } |
2763 | | for (; n < nWordCount; n++) |
2764 | | { |
2765 | | pDstData[n] = pSrcData[n] >= INT_MAX |
2766 | | ? INT_MAX |
2767 | | : static_cast<int32_t>(pSrcData[n]); |
2768 | | } |
2769 | | } |
2770 | | else |
2771 | | { |
2772 | | GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, |
2773 | | nDstPixelStride, nWordCount); |
2774 | | } |
2775 | | } |
2776 | | |
2777 | | template <> |
2778 | | CPL_NOINLINE void GDALCopyWordsT(const int32_t *const CPL_RESTRICT pSrcData, |
2779 | | int nSrcPixelStride, |
2780 | | uint32_t *const CPL_RESTRICT pDstData, |
2781 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2782 | | { |
2783 | | if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) && |
2784 | | nDstPixelStride == static_cast<int>(sizeof(*pDstData))) |
2785 | | { |
2786 | | decltype(nWordCount) n = 0; |
2787 | | const __m128i xmm_zero = _mm_setzero_si128(); |
2788 | | for (; n < nWordCount - 7; n += 8) |
2789 | | { |
2790 | | __m128i xmm0 = _mm_loadu_si128( |
2791 | | reinterpret_cast<const __m128i *>(pSrcData + n)); |
2792 | | __m128i xmm1 = _mm_loadu_si128( |
2793 | | reinterpret_cast<const __m128i *>(pSrcData + n + 4)); |
2794 | | xmm0 = _mm_max_epi32(xmm0, xmm_zero); |
2795 | | xmm1 = _mm_max_epi32(xmm1, xmm_zero); |
2796 | | _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n), xmm0); |
2797 | | _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n + 4), |
2798 | | xmm1); |
2799 | | } |
2800 | | for (; n < nWordCount; n++) |
2801 | | { |
2802 | | pDstData[n] = |
2803 | | pSrcData[n] < 0 ? 0 : static_cast<uint32_t>(pSrcData[n]); |
2804 | | } |
2805 | | } |
2806 | | else |
2807 | | { |
2808 | | GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, |
2809 | | nDstPixelStride, nWordCount); |
2810 | | } |
2811 | | } |
2812 | | |
2813 | | #endif // defined(__SSE4_1__) || defined(USE_NEON_OPTIMIZATIONS) |
2814 | | |
2815 | | template <> |
2816 | | CPL_NOINLINE void GDALCopyWordsT(const uint16_t *const CPL_RESTRICT pSrcData, |
2817 | | int nSrcPixelStride, |
2818 | | float *const CPL_RESTRICT pDstData, |
2819 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2820 | 0 | { |
2821 | 0 | if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) && |
2822 | 0 | nDstPixelStride == static_cast<int>(sizeof(*pDstData))) |
2823 | 0 | { |
2824 | 0 | decltype(nWordCount) n = 0; |
2825 | 0 | const __m128i xmm_zero = _mm_setzero_si128(); |
2826 | 0 | GByte *CPL_RESTRICT pabyDstDataPtr = |
2827 | 0 | reinterpret_cast<GByte *>(pDstData); |
2828 | 0 | for (; n < nWordCount - 7; n += 8) |
2829 | 0 | { |
2830 | 0 | __m128i xmm = _mm_loadu_si128( |
2831 | 0 | reinterpret_cast<const __m128i *>(pSrcData + n)); |
2832 | 0 | __m128i xmm0 = _mm_unpacklo_epi16(xmm, xmm_zero); |
2833 | 0 | __m128i xmm1 = _mm_unpackhi_epi16(xmm, xmm_zero); |
2834 | 0 | __m128 xmm0_f = _mm_cvtepi32_ps(xmm0); |
2835 | 0 | __m128 xmm1_f = _mm_cvtepi32_ps(xmm1); |
2836 | 0 | _mm_storeu_ps(reinterpret_cast<float *>(pabyDstDataPtr + n * 4), |
2837 | 0 | xmm0_f); |
2838 | 0 | _mm_storeu_ps( |
2839 | 0 | reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 16), xmm1_f); |
2840 | 0 | } |
2841 | 0 | for (; n < nWordCount; n++) |
2842 | 0 | { |
2843 | 0 | pDstData[n] = pSrcData[n]; |
2844 | 0 | } |
2845 | 0 | } |
2846 | 0 | else |
2847 | 0 | { |
2848 | 0 | GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, |
2849 | 0 | nDstPixelStride, nWordCount); |
2850 | 0 | } |
2851 | 0 | } |
2852 | | |
2853 | | template <> |
2854 | | CPL_NOINLINE void GDALCopyWordsT(const int16_t *const CPL_RESTRICT pSrcData, |
2855 | | int nSrcPixelStride, |
2856 | | float *const CPL_RESTRICT pDstData, |
2857 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2858 | 0 | { |
2859 | 0 | if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) && |
2860 | 0 | nDstPixelStride == static_cast<int>(sizeof(*pDstData))) |
2861 | 0 | { |
2862 | 0 | decltype(nWordCount) n = 0; |
2863 | 0 | GByte *CPL_RESTRICT pabyDstDataPtr = |
2864 | 0 | reinterpret_cast<GByte *>(pDstData); |
2865 | 0 | for (; n < nWordCount - 7; n += 8) |
2866 | 0 | { |
2867 | 0 | __m128i xmm = _mm_loadu_si128( |
2868 | 0 | reinterpret_cast<const __m128i *>(pSrcData + n)); |
2869 | 0 | const auto sign = _mm_srai_epi16(xmm, 15); |
2870 | 0 | __m128i xmm0 = _mm_unpacklo_epi16(xmm, sign); |
2871 | 0 | __m128i xmm1 = _mm_unpackhi_epi16(xmm, sign); |
2872 | 0 | __m128 xmm0_f = _mm_cvtepi32_ps(xmm0); |
2873 | 0 | __m128 xmm1_f = _mm_cvtepi32_ps(xmm1); |
2874 | 0 | _mm_storeu_ps(reinterpret_cast<float *>(pabyDstDataPtr + n * 4), |
2875 | 0 | xmm0_f); |
2876 | 0 | _mm_storeu_ps( |
2877 | 0 | reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 16), xmm1_f); |
2878 | 0 | } |
2879 | 0 | for (; n < nWordCount; n++) |
2880 | 0 | { |
2881 | 0 | pDstData[n] = pSrcData[n]; |
2882 | 0 | } |
2883 | 0 | } |
2884 | 0 | else |
2885 | 0 | { |
2886 | 0 | GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, |
2887 | 0 | nDstPixelStride, nWordCount); |
2888 | 0 | } |
2889 | 0 | } |
2890 | | |
2891 | | template <> |
2892 | | CPL_NOINLINE void GDALCopyWordsT(const uint16_t *const CPL_RESTRICT pSrcData, |
2893 | | int nSrcPixelStride, |
2894 | | double *const CPL_RESTRICT pDstData, |
2895 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2896 | 0 | { |
2897 | 0 | if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) && |
2898 | 0 | nDstPixelStride == static_cast<int>(sizeof(*pDstData))) |
2899 | 0 | { |
2900 | 0 | decltype(nWordCount) n = 0; |
2901 | 0 | const __m128i xmm_zero = _mm_setzero_si128(); |
2902 | 0 | GByte *CPL_RESTRICT pabyDstDataPtr = |
2903 | 0 | reinterpret_cast<GByte *>(pDstData); |
2904 | 0 | for (; n < nWordCount - 7; n += 8) |
2905 | 0 | { |
2906 | 0 | __m128i xmm = _mm_loadu_si128( |
2907 | 0 | reinterpret_cast<const __m128i *>(pSrcData + n)); |
2908 | 0 | __m128i xmm0 = _mm_unpacklo_epi16(xmm, xmm_zero); |
2909 | 0 | __m128i xmm1 = _mm_unpackhi_epi16(xmm, xmm_zero); |
2910 | |
|
2911 | 0 | __m128d xmm0_low_d = _mm_cvtepi32_pd(xmm0); |
2912 | 0 | __m128d xmm1_low_d = _mm_cvtepi32_pd(xmm1); |
2913 | 0 | xmm0 = _mm_srli_si128(xmm0, 8); |
2914 | 0 | xmm1 = _mm_srli_si128(xmm1, 8); |
2915 | 0 | __m128d xmm0_high_d = _mm_cvtepi32_pd(xmm0); |
2916 | 0 | __m128d xmm1_high_d = _mm_cvtepi32_pd(xmm1); |
2917 | |
|
2918 | 0 | _mm_storeu_pd(reinterpret_cast<double *>(pabyDstDataPtr + n * 8), |
2919 | 0 | xmm0_low_d); |
2920 | 0 | _mm_storeu_pd( |
2921 | 0 | reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 16), |
2922 | 0 | xmm0_high_d); |
2923 | 0 | _mm_storeu_pd( |
2924 | 0 | reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 32), |
2925 | 0 | xmm1_low_d); |
2926 | 0 | _mm_storeu_pd( |
2927 | 0 | reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 48), |
2928 | 0 | xmm1_high_d); |
2929 | 0 | } |
2930 | 0 | for (; n < nWordCount; n++) |
2931 | 0 | { |
2932 | 0 | pDstData[n] = pSrcData[n]; |
2933 | 0 | } |
2934 | 0 | } |
2935 | 0 | else |
2936 | 0 | { |
2937 | 0 | GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, |
2938 | 0 | nDstPixelStride, nWordCount); |
2939 | 0 | } |
2940 | 0 | } |
2941 | | |
2942 | | template <> |
2943 | | CPL_NOINLINE void GDALCopyWordsT(const int16_t *const CPL_RESTRICT pSrcData, |
2944 | | int nSrcPixelStride, |
2945 | | double *const CPL_RESTRICT pDstData, |
2946 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2947 | 0 | { |
2948 | 0 | if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) && |
2949 | 0 | nDstPixelStride == static_cast<int>(sizeof(*pDstData))) |
2950 | 0 | { |
2951 | 0 | decltype(nWordCount) n = 0; |
2952 | 0 | GByte *CPL_RESTRICT pabyDstDataPtr = |
2953 | 0 | reinterpret_cast<GByte *>(pDstData); |
2954 | 0 | for (; n < nWordCount - 7; n += 8) |
2955 | 0 | { |
2956 | 0 | __m128i xmm = _mm_loadu_si128( |
2957 | 0 | reinterpret_cast<const __m128i *>(pSrcData + n)); |
2958 | 0 | const auto sign = _mm_srai_epi16(xmm, 15); |
2959 | 0 | __m128i xmm0 = _mm_unpacklo_epi16(xmm, sign); |
2960 | 0 | __m128i xmm1 = _mm_unpackhi_epi16(xmm, sign); |
2961 | |
|
2962 | 0 | __m128d xmm0_low_d = _mm_cvtepi32_pd(xmm0); |
2963 | 0 | __m128d xmm1_low_d = _mm_cvtepi32_pd(xmm1); |
2964 | 0 | xmm0 = _mm_srli_si128(xmm0, 8); |
2965 | 0 | xmm1 = _mm_srli_si128(xmm1, 8); |
2966 | 0 | __m128d xmm0_high_d = _mm_cvtepi32_pd(xmm0); |
2967 | 0 | __m128d xmm1_high_d = _mm_cvtepi32_pd(xmm1); |
2968 | |
|
2969 | 0 | _mm_storeu_pd(reinterpret_cast<double *>(pabyDstDataPtr + n * 8), |
2970 | 0 | xmm0_low_d); |
2971 | 0 | _mm_storeu_pd( |
2972 | 0 | reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 16), |
2973 | 0 | xmm0_high_d); |
2974 | 0 | _mm_storeu_pd( |
2975 | 0 | reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 32), |
2976 | 0 | xmm1_low_d); |
2977 | 0 | _mm_storeu_pd( |
2978 | 0 | reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 48), |
2979 | 0 | xmm1_high_d); |
2980 | 0 | } |
2981 | 0 | for (; n < nWordCount; n++) |
2982 | 0 | { |
2983 | 0 | pDstData[n] = pSrcData[n]; |
2984 | 0 | } |
2985 | 0 | } |
2986 | 0 | else |
2987 | 0 | { |
2988 | 0 | GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, |
2989 | 0 | nDstPixelStride, nWordCount); |
2990 | 0 | } |
2991 | 0 | } |
2992 | | |
2993 | | #endif // HAVE_SSE2 |
2994 | | |
2995 | | template <> |
2996 | | CPL_NOINLINE void GDALCopyWordsT(const double *const CPL_RESTRICT pSrcData, |
2997 | | int nSrcPixelStride, |
2998 | | GByte *const CPL_RESTRICT pDstData, |
2999 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
3000 | 0 | { |
3001 | 0 | GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData, |
3002 | 0 | nDstPixelStride, nWordCount); |
3003 | 0 | } |
3004 | | |
3005 | | template <> |
3006 | | CPL_NOINLINE void GDALCopyWordsT(const double *const CPL_RESTRICT pSrcData, |
3007 | | int nSrcPixelStride, |
3008 | | GUInt16 *const CPL_RESTRICT pDstData, |
3009 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
3010 | 0 | { |
3011 | 0 | GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData, |
3012 | 0 | nDstPixelStride, nWordCount); |
3013 | 0 | } |
3014 | | |
3015 | | template <> |
3016 | | CPL_NOINLINE void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData, |
3017 | | int nSrcPixelStride, |
3018 | | double *const CPL_RESTRICT pDstData, |
3019 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
3020 | 0 | { |
3021 | 0 | GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData, |
3022 | 0 | nDstPixelStride, nWordCount); |
3023 | 0 | } |
3024 | | |
3025 | | template <> |
3026 | | CPL_NOINLINE void GDALCopyWordsT(const double *const CPL_RESTRICT pSrcData, |
3027 | | int nSrcPixelStride, |
3028 | | float *const CPL_RESTRICT pDstData, |
3029 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
3030 | 0 | { |
3031 | 0 | GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData, |
3032 | 0 | nDstPixelStride, nWordCount); |
3033 | 0 | } |
3034 | | |
3035 | | template <> |
3036 | | CPL_NOINLINE void GDALCopyWordsT(const GFloat16 *const CPL_RESTRICT pSrcData, |
3037 | | int nSrcPixelStride, |
3038 | | float *const CPL_RESTRICT pDstData, |
3039 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
3040 | 0 | { |
3041 | 0 | GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData, |
3042 | 0 | nDstPixelStride, nWordCount); |
3043 | 0 | } |
3044 | | |
3045 | | template <> |
3046 | | CPL_NOINLINE void GDALCopyWordsT(const GFloat16 *const CPL_RESTRICT pSrcData, |
3047 | | int nSrcPixelStride, |
3048 | | double *const CPL_RESTRICT pDstData, |
3049 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
3050 | 0 | { |
3051 | 0 | GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData, |
3052 | 0 | nDstPixelStride, nWordCount); |
3053 | 0 | } |
3054 | | |
3055 | | template <> |
3056 | | CPL_NOINLINE void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData, |
3057 | | int nSrcPixelStride, |
3058 | | GByte *const CPL_RESTRICT pDstData, |
3059 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
3060 | 0 | { |
3061 | 0 | GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData, |
3062 | 0 | nDstPixelStride, nWordCount); |
3063 | 0 | } |
3064 | | |
3065 | | template <> |
3066 | | CPL_NOINLINE void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData, |
3067 | | int nSrcPixelStride, |
3068 | | GInt8 *const CPL_RESTRICT pDstData, |
3069 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
3070 | 0 | { |
3071 | 0 | GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData, |
3072 | 0 | nDstPixelStride, nWordCount); |
3073 | 0 | } |
3074 | | |
3075 | | template <> |
3076 | | CPL_NOINLINE void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData, |
3077 | | int nSrcPixelStride, |
3078 | | GInt16 *const CPL_RESTRICT pDstData, |
3079 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
3080 | 0 | { |
3081 | 0 | GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData, |
3082 | 0 | nDstPixelStride, nWordCount); |
3083 | 0 | } |
3084 | | |
3085 | | template <> |
3086 | | CPL_NOINLINE void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData, |
3087 | | int nSrcPixelStride, |
3088 | | GUInt16 *const CPL_RESTRICT pDstData, |
3089 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
3090 | 0 | { |
3091 | 0 | GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData, |
3092 | 0 | nDstPixelStride, nWordCount); |
3093 | 0 | } |
3094 | | |
3095 | | template <> |
3096 | | CPL_NOINLINE void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData, |
3097 | | int nSrcPixelStride, |
3098 | | GInt32 *const CPL_RESTRICT pDstData, |
3099 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
3100 | 0 | { |
3101 | 0 | GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData, |
3102 | 0 | nDstPixelStride, nWordCount); |
3103 | 0 | } |
3104 | | |
3105 | | template <> |
3106 | | CPL_NOINLINE void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData, |
3107 | | int nSrcPixelStride, |
3108 | | GFloat16 *const CPL_RESTRICT pDstData, |
3109 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
3110 | 0 | { |
3111 | 0 | GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData, |
3112 | 0 | nDstPixelStride, nWordCount); |
3113 | 0 | } |
3114 | | |
3115 | | template <> |
3116 | | CPL_NOINLINE void GDALCopyWordsT(const double *const CPL_RESTRICT pSrcData, |
3117 | | int nSrcPixelStride, |
3118 | | GFloat16 *const CPL_RESTRICT pDstData, |
3119 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
3120 | 0 | { |
3121 | 0 | GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData, |
3122 | 0 | nDstPixelStride, nWordCount); |
3123 | 0 | } |
3124 | | |
3125 | | /************************************************************************/ |
3126 | | /* GDALCopyWordsComplexT() */ |
3127 | | /************************************************************************/ |
3128 | | /** |
3129 | | * Template function, used to copy data from pSrcData into buffer |
3130 | | * pDstData, with stride nSrcPixelStride in the source data and |
3131 | | * stride nDstPixelStride in the destination data. Deals with the |
3132 | | * complex case, where input is complex and output is complex. |
3133 | | * |
3134 | | * @param pSrcData the source data buffer |
3135 | | * @param nSrcPixelStride the stride, in the buffer pSrcData for pixels |
3136 | | * of interest. |
3137 | | * @param pDstData the destination buffer. |
3138 | | * @param nDstPixelStride the stride in the buffer pDstData for pixels of |
3139 | | * interest. |
3140 | | * @param nWordCount the total number of pixel words to copy |
3141 | | * |
3142 | | */ |
3143 | | template <class Tin, class Tout> |
3144 | | inline void GDALCopyWordsComplexT(const Tin *const CPL_RESTRICT pSrcData, |
3145 | | int nSrcPixelStride, |
3146 | | Tout *const CPL_RESTRICT pDstData, |
3147 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
3148 | 0 | { |
3149 | 0 | decltype(nWordCount) nDstOffset = 0; |
3150 | 0 | const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData); |
3151 | 0 | char *const pDstDataPtr = reinterpret_cast<char *>(pDstData); |
3152 | |
|
3153 | 0 | for (decltype(nWordCount) n = 0; n < nWordCount; n++) |
3154 | 0 | { |
3155 | 0 | const Tin *const pPixelIn = |
3156 | 0 | reinterpret_cast<const Tin *>(pSrcDataPtr + n * nSrcPixelStride); |
3157 | 0 | Tout *const pPixelOut = |
3158 | 0 | reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset); |
3159 | |
|
3160 | 0 | GDALCopyWord(pPixelIn[0], pPixelOut[0]); |
3161 | 0 | GDALCopyWord(pPixelIn[1], pPixelOut[1]); |
3162 | |
|
3163 | 0 | nDstOffset += nDstPixelStride; |
3164 | 0 | } |
3165 | 0 | } Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned char, short>(unsigned char const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned char, int>(unsigned char const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned char, cpl::Float16>(unsigned char const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned char, float>(unsigned char const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned char, double>(unsigned char const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<signed char, short>(signed char const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<signed char, int>(signed char const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<signed char, cpl::Float16>(signed char const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<signed char, float>(signed char const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<signed char, double>(signed char const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned short, short>(unsigned short const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned short, int>(unsigned short const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned short, cpl::Float16>(unsigned short const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned short, float>(unsigned short const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned short, double>(unsigned short const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<short, short>(short const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<short, int>(short const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<short, cpl::Float16>(short const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<short, float>(short const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<short, double>(short const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned int, short>(unsigned int const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned int, int>(unsigned int const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned int, cpl::Float16>(unsigned int const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned int, float>(unsigned int const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned int, double>(unsigned int const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<int, short>(int const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<int, int>(int const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<int, cpl::Float16>(int const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<int, float>(int const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<int, double>(int const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned long, short>(unsigned long const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned long, int>(unsigned long const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned long, cpl::Float16>(unsigned long const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned long, float>(unsigned long const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned long, double>(unsigned long const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<long, short>(long const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<long, int>(long const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<long, cpl::Float16>(long const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<long, float>(long const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<long, double>(long const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<cpl::Float16, short>(cpl::Float16 const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<cpl::Float16, int>(cpl::Float16 const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<cpl::Float16, cpl::Float16>(cpl::Float16 const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<cpl::Float16, float>(cpl::Float16 const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<cpl::Float16, double>(cpl::Float16 const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<float, short>(float const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<float, int>(float const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<float, cpl::Float16>(float const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<float, float>(float const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<float, double>(float const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<double, short>(double const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<double, int>(double const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<double, cpl::Float16>(double const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<double, float>(double const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<double, double>(double const*, int, double*, int, long long) |
3166 | | |
3167 | | /************************************************************************/ |
3168 | | /* GDALCopyWordsComplexOutT() */ |
3169 | | /************************************************************************/ |
3170 | | /** |
3171 | | * Template function, used to copy data from pSrcData into buffer |
3172 | | * pDstData, with stride nSrcPixelStride in the source data and |
3173 | | * stride nDstPixelStride in the destination data. Deals with the |
3174 | | * case where the value is real coming in, but complex going out. |
3175 | | * |
3176 | | * @param pSrcData the source data buffer |
3177 | | * @param nSrcPixelStride the stride, in the buffer pSrcData for pixels |
3178 | | * of interest, in bytes. |
3179 | | * @param pDstData the destination buffer. |
3180 | | * @param nDstPixelStride the stride in the buffer pDstData for pixels of |
3181 | | * interest, in bytes. |
3182 | | * @param nWordCount the total number of pixel words to copy |
3183 | | * |
3184 | | */ |
3185 | | template <class Tin, class Tout> |
3186 | | inline void GDALCopyWordsComplexOutT(const Tin *const CPL_RESTRICT pSrcData, |
3187 | | int nSrcPixelStride, |
3188 | | Tout *const CPL_RESTRICT pDstData, |
3189 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
3190 | 0 | { |
3191 | 0 | decltype(nWordCount) nDstOffset = 0; |
3192 | |
|
3193 | 0 | const Tout tOutZero = static_cast<Tout>(0); |
3194 | |
|
3195 | 0 | const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData); |
3196 | 0 | char *const pDstDataPtr = reinterpret_cast<char *>(pDstData); |
3197 | |
|
3198 | 0 | for (decltype(nWordCount) n = 0; n < nWordCount; n++) |
3199 | 0 | { |
3200 | 0 | const Tin tValue = |
3201 | 0 | *reinterpret_cast<const Tin *>(pSrcDataPtr + n * nSrcPixelStride); |
3202 | 0 | Tout *const pPixelOut = |
3203 | 0 | reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset); |
3204 | 0 | GDALCopyWord(tValue, *pPixelOut); |
3205 | |
|
3206 | 0 | pPixelOut[1] = tOutZero; |
3207 | |
|
3208 | 0 | nDstOffset += nDstPixelStride; |
3209 | 0 | } |
3210 | 0 | } Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned char, short>(unsigned char const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned char, int>(unsigned char const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned char, cpl::Float16>(unsigned char const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned char, float>(unsigned char const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned char, double>(unsigned char const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<signed char, short>(signed char const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<signed char, int>(signed char const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<signed char, cpl::Float16>(signed char const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<signed char, float>(signed char const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<signed char, double>(signed char const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned short, short>(unsigned short const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned short, int>(unsigned short const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned short, cpl::Float16>(unsigned short const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned short, float>(unsigned short const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned short, double>(unsigned short const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<short, short>(short const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<short, int>(short const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<short, cpl::Float16>(short const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<short, float>(short const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<short, double>(short const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned int, short>(unsigned int const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned int, int>(unsigned int const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned int, cpl::Float16>(unsigned int const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned int, float>(unsigned int const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned int, double>(unsigned int const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<int, short>(int const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<int, int>(int const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<int, cpl::Float16>(int const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<int, float>(int const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<int, double>(int const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned long, short>(unsigned long const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned long, int>(unsigned long const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned long, cpl::Float16>(unsigned long const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned long, float>(unsigned long const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned long, double>(unsigned long const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<long, short>(long const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<long, int>(long const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<long, cpl::Float16>(long const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<long, float>(long const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<long, double>(long const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<cpl::Float16, short>(cpl::Float16 const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<cpl::Float16, int>(cpl::Float16 const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<cpl::Float16, cpl::Float16>(cpl::Float16 const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<cpl::Float16, float>(cpl::Float16 const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<cpl::Float16, double>(cpl::Float16 const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<float, short>(float const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<float, int>(float const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<float, cpl::Float16>(float const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<float, float>(float const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<float, double>(float const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<double, short>(double const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<double, int>(double const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<double, cpl::Float16>(double const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<double, float>(double const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<double, double>(double const*, int, double*, int, long long) |
3211 | | |
3212 | | /************************************************************************/ |
3213 | | /* GDALCopyWordsFromT() */ |
3214 | | /************************************************************************/ |
3215 | | /** |
3216 | | * Template driver function. Given the input type T, call the appropriate |
3217 | | * GDALCopyWordsT function template for the desired output type. You should |
3218 | | * never call this function directly (call GDALCopyWords instead). |
3219 | | * |
3220 | | * @param pSrcData source data buffer |
3221 | | * @param nSrcPixelStride pixel stride in input buffer, in pixel words |
3222 | | * @param bInComplex input is complex |
3223 | | * @param pDstData destination data buffer |
3224 | | * @param eDstType destination data type |
3225 | | * @param nDstPixelStride pixel stride in output buffer, in pixel words |
3226 | | * @param nWordCount number of pixel words to be copied |
3227 | | */ |
3228 | | template <class T> |
3229 | | inline void GDALCopyWordsFromT(const T *const CPL_RESTRICT pSrcData, |
3230 | | int nSrcPixelStride, bool bInComplex, |
3231 | | void *CPL_RESTRICT pDstData, |
3232 | | GDALDataType eDstType, int nDstPixelStride, |
3233 | | GPtrDiff_t nWordCount) |
3234 | 0 | { |
3235 | 0 | switch (eDstType) |
3236 | 0 | { |
3237 | 0 | case GDT_UInt8: |
3238 | 0 | GDALCopyWordsT(pSrcData, nSrcPixelStride, |
3239 | 0 | static_cast<unsigned char *>(pDstData), |
3240 | 0 | nDstPixelStride, nWordCount); |
3241 | 0 | break; |
3242 | 0 | case GDT_Int8: |
3243 | 0 | GDALCopyWordsT(pSrcData, nSrcPixelStride, |
3244 | 0 | static_cast<signed char *>(pDstData), |
3245 | 0 | nDstPixelStride, nWordCount); |
3246 | 0 | break; |
3247 | 0 | case GDT_UInt16: |
3248 | 0 | GDALCopyWordsT(pSrcData, nSrcPixelStride, |
3249 | 0 | static_cast<unsigned short *>(pDstData), |
3250 | 0 | nDstPixelStride, nWordCount); |
3251 | 0 | break; |
3252 | 0 | case GDT_Int16: |
3253 | 0 | GDALCopyWordsT(pSrcData, nSrcPixelStride, |
3254 | 0 | static_cast<short *>(pDstData), nDstPixelStride, |
3255 | 0 | nWordCount); |
3256 | 0 | break; |
3257 | 0 | case GDT_UInt32: |
3258 | 0 | GDALCopyWordsT(pSrcData, nSrcPixelStride, |
3259 | 0 | static_cast<unsigned int *>(pDstData), |
3260 | 0 | nDstPixelStride, nWordCount); |
3261 | 0 | break; |
3262 | 0 | case GDT_Int32: |
3263 | 0 | GDALCopyWordsT(pSrcData, nSrcPixelStride, |
3264 | 0 | static_cast<int *>(pDstData), nDstPixelStride, |
3265 | 0 | nWordCount); |
3266 | 0 | break; |
3267 | 0 | case GDT_UInt64: |
3268 | 0 | GDALCopyWordsT(pSrcData, nSrcPixelStride, |
3269 | 0 | static_cast<std::uint64_t *>(pDstData), |
3270 | 0 | nDstPixelStride, nWordCount); |
3271 | 0 | break; |
3272 | 0 | case GDT_Int64: |
3273 | 0 | GDALCopyWordsT(pSrcData, nSrcPixelStride, |
3274 | 0 | static_cast<std::int64_t *>(pDstData), |
3275 | 0 | nDstPixelStride, nWordCount); |
3276 | 0 | break; |
3277 | 0 | case GDT_Float16: |
3278 | 0 | GDALCopyWordsT(pSrcData, nSrcPixelStride, |
3279 | 0 | static_cast<GFloat16 *>(pDstData), nDstPixelStride, |
3280 | 0 | nWordCount); |
3281 | 0 | break; |
3282 | 0 | case GDT_Float32: |
3283 | 0 | GDALCopyWordsT(pSrcData, nSrcPixelStride, |
3284 | 0 | static_cast<float *>(pDstData), nDstPixelStride, |
3285 | 0 | nWordCount); |
3286 | 0 | break; |
3287 | 0 | case GDT_Float64: |
3288 | 0 | GDALCopyWordsT(pSrcData, nSrcPixelStride, |
3289 | 0 | static_cast<double *>(pDstData), nDstPixelStride, |
3290 | 0 | nWordCount); |
3291 | 0 | break; |
3292 | 0 | case GDT_CInt16: |
3293 | 0 | if (bInComplex) |
3294 | 0 | { |
3295 | 0 | GDALCopyWordsComplexT(pSrcData, nSrcPixelStride, |
3296 | 0 | static_cast<short *>(pDstData), |
3297 | 0 | nDstPixelStride, nWordCount); |
3298 | 0 | } |
3299 | 0 | else // input is not complex, so we need to promote to a complex |
3300 | | // buffer |
3301 | 0 | { |
3302 | 0 | GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride, |
3303 | 0 | static_cast<short *>(pDstData), |
3304 | 0 | nDstPixelStride, nWordCount); |
3305 | 0 | } |
3306 | 0 | break; |
3307 | 0 | case GDT_CInt32: |
3308 | 0 | if (bInComplex) |
3309 | 0 | { |
3310 | 0 | GDALCopyWordsComplexT(pSrcData, nSrcPixelStride, |
3311 | 0 | static_cast<int *>(pDstData), |
3312 | 0 | nDstPixelStride, nWordCount); |
3313 | 0 | } |
3314 | 0 | else // input is not complex, so we need to promote to a complex |
3315 | | // buffer |
3316 | 0 | { |
3317 | 0 | GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride, |
3318 | 0 | static_cast<int *>(pDstData), |
3319 | 0 | nDstPixelStride, nWordCount); |
3320 | 0 | } |
3321 | 0 | break; |
3322 | 0 | case GDT_CFloat16: |
3323 | 0 | if (bInComplex) |
3324 | 0 | { |
3325 | 0 | GDALCopyWordsComplexT(pSrcData, nSrcPixelStride, |
3326 | 0 | static_cast<GFloat16 *>(pDstData), |
3327 | 0 | nDstPixelStride, nWordCount); |
3328 | 0 | } |
3329 | 0 | else // input is not complex, so we need to promote to a complex |
3330 | | // buffer |
3331 | 0 | { |
3332 | 0 | GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride, |
3333 | 0 | static_cast<GFloat16 *>(pDstData), |
3334 | 0 | nDstPixelStride, nWordCount); |
3335 | 0 | } |
3336 | 0 | break; |
3337 | 0 | case GDT_CFloat32: |
3338 | 0 | if (bInComplex) |
3339 | 0 | { |
3340 | 0 | GDALCopyWordsComplexT(pSrcData, nSrcPixelStride, |
3341 | 0 | static_cast<float *>(pDstData), |
3342 | 0 | nDstPixelStride, nWordCount); |
3343 | 0 | } |
3344 | 0 | else // input is not complex, so we need to promote to a complex |
3345 | | // buffer |
3346 | 0 | { |
3347 | 0 | GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride, |
3348 | 0 | static_cast<float *>(pDstData), |
3349 | 0 | nDstPixelStride, nWordCount); |
3350 | 0 | } |
3351 | 0 | break; |
3352 | 0 | case GDT_CFloat64: |
3353 | 0 | if (bInComplex) |
3354 | 0 | { |
3355 | 0 | GDALCopyWordsComplexT(pSrcData, nSrcPixelStride, |
3356 | 0 | static_cast<double *>(pDstData), |
3357 | 0 | nDstPixelStride, nWordCount); |
3358 | 0 | } |
3359 | 0 | else // input is not complex, so we need to promote to a complex |
3360 | | // buffer |
3361 | 0 | { |
3362 | 0 | GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride, |
3363 | 0 | static_cast<double *>(pDstData), |
3364 | 0 | nDstPixelStride, nWordCount); |
3365 | 0 | } |
3366 | 0 | break; |
3367 | 0 | case GDT_Unknown: |
3368 | 0 | case GDT_TypeCount: |
3369 | 0 | CPLAssert(false); |
3370 | 0 | } |
3371 | 0 | } Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<unsigned char>(unsigned char const*, int, bool, void*, GDALDataType, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<signed char>(signed char const*, int, bool, void*, GDALDataType, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<unsigned short>(unsigned short const*, int, bool, void*, GDALDataType, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<short>(short const*, int, bool, void*, GDALDataType, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<unsigned int>(unsigned int const*, int, bool, void*, GDALDataType, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<int>(int const*, int, bool, void*, GDALDataType, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<unsigned long>(unsigned long const*, int, bool, void*, GDALDataType, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<long>(long const*, int, bool, void*, GDALDataType, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<cpl::Float16>(cpl::Float16 const*, int, bool, void*, GDALDataType, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<float>(float const*, int, bool, void*, GDALDataType, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<double>(double const*, int, bool, void*, GDALDataType, int, long long) |
3372 | | |
3373 | | } // end anonymous namespace |
3374 | | |
3375 | | /************************************************************************/ |
3376 | | /* GDALReplicateWord() */ |
3377 | | /************************************************************************/ |
3378 | | |
3379 | | template <class T> |
3380 | | inline void GDALReplicateWordT(void *pDstData, int nDstPixelStride, |
3381 | | GPtrDiff_t nWordCount) |
3382 | 0 | { |
3383 | 0 | const T valSet = *static_cast<const T *>(pDstData); |
3384 | 0 | if (nDstPixelStride == static_cast<int>(sizeof(T))) |
3385 | 0 | { |
3386 | 0 | T *pDstPtr = static_cast<T *>(pDstData) + 1; |
3387 | 0 | while (nWordCount >= 4) |
3388 | 0 | { |
3389 | 0 | nWordCount -= 4; |
3390 | 0 | pDstPtr[0] = valSet; |
3391 | 0 | pDstPtr[1] = valSet; |
3392 | 0 | pDstPtr[2] = valSet; |
3393 | 0 | pDstPtr[3] = valSet; |
3394 | 0 | pDstPtr += 4; |
3395 | 0 | } |
3396 | 0 | while (nWordCount > 0) |
3397 | 0 | { |
3398 | 0 | --nWordCount; |
3399 | 0 | *pDstPtr = valSet; |
3400 | 0 | pDstPtr++; |
3401 | 0 | } |
3402 | 0 | } |
3403 | 0 | else |
3404 | 0 | { |
3405 | 0 | GByte *pabyDstPtr = static_cast<GByte *>(pDstData) + nDstPixelStride; |
3406 | 0 | while (nWordCount > 0) |
3407 | 0 | { |
3408 | 0 | --nWordCount; |
3409 | 0 | *reinterpret_cast<T *>(pabyDstPtr) = valSet; |
3410 | 0 | pabyDstPtr += nDstPixelStride; |
3411 | 0 | } |
3412 | 0 | } |
3413 | 0 | } Unexecuted instantiation: void GDALReplicateWordT<unsigned short>(void*, int, long long) Unexecuted instantiation: void GDALReplicateWordT<short>(void*, int, long long) Unexecuted instantiation: void GDALReplicateWordT<unsigned int>(void*, int, long long) Unexecuted instantiation: void GDALReplicateWordT<int>(void*, int, long long) Unexecuted instantiation: void GDALReplicateWordT<unsigned long>(void*, int, long long) Unexecuted instantiation: void GDALReplicateWordT<long>(void*, int, long long) Unexecuted instantiation: void GDALReplicateWordT<cpl::Float16>(void*, int, long long) Unexecuted instantiation: void GDALReplicateWordT<float>(void*, int, long long) Unexecuted instantiation: void GDALReplicateWordT<double>(void*, int, long long) |
3414 | | |
3415 | | static void GDALReplicateWord(const void *CPL_RESTRICT pSrcData, |
3416 | | GDALDataType eSrcType, |
3417 | | void *CPL_RESTRICT pDstData, |
3418 | | GDALDataType eDstType, int nDstPixelStride, |
3419 | | GPtrDiff_t nWordCount) |
3420 | 0 | { |
3421 | | /* ----------------------------------------------------------------------- |
3422 | | */ |
3423 | | /* Special case when the source data is always the same value */ |
3424 | | /* (for VRTSourcedRasterBand::IRasterIO and |
3425 | | * VRTDerivedRasterBand::IRasterIO*/ |
3426 | | /* for example) */ |
3427 | | /* ----------------------------------------------------------------------- |
3428 | | */ |
3429 | | // Let the general translation case do the necessary conversions |
3430 | | // on the first destination element. |
3431 | 0 | GDALCopyWords64(pSrcData, eSrcType, 0, pDstData, eDstType, 0, 1); |
3432 | | |
3433 | | // Now copy the first element to the nWordCount - 1 following destination |
3434 | | // elements. |
3435 | 0 | nWordCount--; |
3436 | 0 | GByte *pabyDstWord = reinterpret_cast<GByte *>(pDstData) + nDstPixelStride; |
3437 | |
|
3438 | 0 | switch (eDstType) |
3439 | 0 | { |
3440 | 0 | case GDT_UInt8: |
3441 | 0 | case GDT_Int8: |
3442 | 0 | { |
3443 | 0 | if (nDstPixelStride == 1) |
3444 | 0 | { |
3445 | 0 | if (nWordCount > 0) |
3446 | 0 | memset(pabyDstWord, |
3447 | 0 | *reinterpret_cast<const GByte *>(pDstData), |
3448 | 0 | nWordCount); |
3449 | 0 | } |
3450 | 0 | else |
3451 | 0 | { |
3452 | 0 | GByte valSet = *reinterpret_cast<const GByte *>(pDstData); |
3453 | 0 | while (nWordCount > 0) |
3454 | 0 | { |
3455 | 0 | --nWordCount; |
3456 | 0 | *pabyDstWord = valSet; |
3457 | 0 | pabyDstWord += nDstPixelStride; |
3458 | 0 | } |
3459 | 0 | } |
3460 | 0 | break; |
3461 | 0 | } |
3462 | | |
3463 | 0 | #define CASE_DUPLICATE_SIMPLE(enum_type, c_type) \ |
3464 | 0 | case enum_type: \ |
3465 | 0 | { \ |
3466 | 0 | GDALReplicateWordT<c_type>(pDstData, nDstPixelStride, nWordCount); \ |
3467 | 0 | break; \ |
3468 | 0 | } |
3469 | | |
3470 | 0 | CASE_DUPLICATE_SIMPLE(GDT_UInt16, GUInt16) |
3471 | 0 | CASE_DUPLICATE_SIMPLE(GDT_Int16, GInt16) |
3472 | 0 | CASE_DUPLICATE_SIMPLE(GDT_UInt32, GUInt32) |
3473 | 0 | CASE_DUPLICATE_SIMPLE(GDT_Int32, GInt32) |
3474 | 0 | CASE_DUPLICATE_SIMPLE(GDT_UInt64, std::uint64_t) |
3475 | 0 | CASE_DUPLICATE_SIMPLE(GDT_Int64, std::int64_t) |
3476 | 0 | CASE_DUPLICATE_SIMPLE(GDT_Float16, GFloat16) |
3477 | 0 | CASE_DUPLICATE_SIMPLE(GDT_Float32, float) |
3478 | 0 | CASE_DUPLICATE_SIMPLE(GDT_Float64, double) |
3479 | | |
3480 | 0 | #define CASE_DUPLICATE_COMPLEX(enum_type, c_type) \ |
3481 | 0 | case enum_type: \ |
3482 | 0 | { \ |
3483 | 0 | c_type valSet1 = reinterpret_cast<const c_type *>(pDstData)[0]; \ |
3484 | 0 | c_type valSet2 = reinterpret_cast<const c_type *>(pDstData)[1]; \ |
3485 | 0 | while (nWordCount > 0) \ |
3486 | 0 | { \ |
3487 | 0 | --nWordCount; \ |
3488 | 0 | reinterpret_cast<c_type *>(pabyDstWord)[0] = valSet1; \ |
3489 | 0 | reinterpret_cast<c_type *>(pabyDstWord)[1] = valSet2; \ |
3490 | 0 | pabyDstWord += nDstPixelStride; \ |
3491 | 0 | } \ |
3492 | 0 | break; \ |
3493 | 0 | } |
3494 | | |
3495 | 0 | CASE_DUPLICATE_COMPLEX(GDT_CInt16, GInt16) |
3496 | 0 | CASE_DUPLICATE_COMPLEX(GDT_CInt32, GInt32) |
3497 | 0 | CASE_DUPLICATE_COMPLEX(GDT_CFloat16, GFloat16) |
3498 | 0 | CASE_DUPLICATE_COMPLEX(GDT_CFloat32, float) |
3499 | 0 | CASE_DUPLICATE_COMPLEX(GDT_CFloat64, double) |
3500 | | |
3501 | 0 | case GDT_Unknown: |
3502 | 0 | case GDT_TypeCount: |
3503 | 0 | CPLAssert(false); |
3504 | 0 | } |
3505 | 0 | } |
3506 | | |
3507 | | /************************************************************************/ |
3508 | | /* GDALUnrolledCopy() */ |
3509 | | /************************************************************************/ |
3510 | | |
3511 | | template <class T, int srcStride, int dstStride> |
3512 | | #if defined(__GNUC__) && defined(__AVX2__) |
3513 | | __attribute__((optimize("tree-vectorize"))) |
3514 | | #endif |
3515 | | static inline void |
3516 | | GDALUnrolledCopyGeneric(T *CPL_RESTRICT pDest, const T *CPL_RESTRICT pSrc, |
3517 | | GPtrDiff_t nIters) |
3518 | 0 | { |
3519 | 0 | #if !(defined(__GNUC__) && defined(__AVX2__)) |
3520 | 0 | if (nIters >= 16) |
3521 | 0 | { |
3522 | 0 | for (GPtrDiff_t i = nIters / 16; i != 0; i--) |
3523 | 0 | { |
3524 | 0 | pDest[0 * dstStride] = pSrc[0 * srcStride]; |
3525 | 0 | pDest[1 * dstStride] = pSrc[1 * srcStride]; |
3526 | 0 | pDest[2 * dstStride] = pSrc[2 * srcStride]; |
3527 | 0 | pDest[3 * dstStride] = pSrc[3 * srcStride]; |
3528 | 0 | pDest[4 * dstStride] = pSrc[4 * srcStride]; |
3529 | 0 | pDest[5 * dstStride] = pSrc[5 * srcStride]; |
3530 | 0 | pDest[6 * dstStride] = pSrc[6 * srcStride]; |
3531 | 0 | pDest[7 * dstStride] = pSrc[7 * srcStride]; |
3532 | 0 | pDest[8 * dstStride] = pSrc[8 * srcStride]; |
3533 | 0 | pDest[9 * dstStride] = pSrc[9 * srcStride]; |
3534 | 0 | pDest[10 * dstStride] = pSrc[10 * srcStride]; |
3535 | 0 | pDest[11 * dstStride] = pSrc[11 * srcStride]; |
3536 | 0 | pDest[12 * dstStride] = pSrc[12 * srcStride]; |
3537 | 0 | pDest[13 * dstStride] = pSrc[13 * srcStride]; |
3538 | 0 | pDest[14 * dstStride] = pSrc[14 * srcStride]; |
3539 | 0 | pDest[15 * dstStride] = pSrc[15 * srcStride]; |
3540 | 0 | pDest += 16 * dstStride; |
3541 | 0 | pSrc += 16 * srcStride; |
3542 | 0 | } |
3543 | 0 | nIters = nIters % 16; |
3544 | 0 | } |
3545 | | #else |
3546 | | #pragma GCC unroll 4 |
3547 | | #endif |
3548 | 0 | for (GPtrDiff_t i = 0; i < nIters; i++) |
3549 | 0 | { |
3550 | 0 | pDest[i * dstStride] = *pSrc; |
3551 | 0 | pSrc += srcStride; |
3552 | 0 | } |
3553 | 0 | } Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<unsigned char, 1, 2>(unsigned char*, unsigned char const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<unsigned char, 1, 3>(unsigned char*, unsigned char const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<unsigned char, 1, 4>(unsigned char*, unsigned char const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<short, 2, 1>(short*, short const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<short, 3, 1>(short*, short const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<short, 4, 1>(short*, short const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<short, 1, 2>(short*, short const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<short, 1, 3>(short*, short const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<short, 1, 4>(short*, short const*, long long) |
3554 | | |
3555 | | template <class T, int srcStride, int dstStride> |
3556 | | static inline void GDALUnrolledCopy(T *CPL_RESTRICT pDest, |
3557 | | const T *CPL_RESTRICT pSrc, |
3558 | | GPtrDiff_t nIters) |
3559 | 0 | { |
3560 | 0 | GDALUnrolledCopyGeneric<T, srcStride, dstStride>(pDest, pSrc, nIters); |
3561 | 0 | } Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<unsigned char, 1, 2>(unsigned char*, unsigned char const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<unsigned char, 1, 3>(unsigned char*, unsigned char const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<unsigned char, 1, 4>(unsigned char*, unsigned char const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<short, 2, 1>(short*, short const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<short, 3, 1>(short*, short const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<short, 4, 1>(short*, short const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<short, 1, 2>(short*, short const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<short, 1, 3>(short*, short const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<short, 1, 4>(short*, short const*, long long) |
3562 | | |
3563 | | #if defined(__AVX2__) && defined(HAVE_SSSE3_AT_COMPILE_TIME) && \ |
3564 | | (defined(__x86_64) || defined(_M_X64) || defined(USE_NEON_OPTIMIZATIONS)) |
3565 | | |
3566 | | template <> |
3567 | | void GDALUnrolledCopy<GByte, 3, 1>(GByte *CPL_RESTRICT pDest, |
3568 | | const GByte *CPL_RESTRICT pSrc, |
3569 | | GPtrDiff_t nIters) |
3570 | | { |
3571 | | if (nIters > 16) |
3572 | | { |
3573 | | // The SSSE3 variant is slightly faster than what the gcc autovectorizer |
3574 | | // generates |
3575 | | GDALUnrolledCopy_GByte_3_1_SSSE3(pDest, pSrc, nIters); |
3576 | | } |
3577 | | else |
3578 | | { |
3579 | | for (GPtrDiff_t i = 0; i < nIters; i++) |
3580 | | { |
3581 | | pDest[i] = *pSrc; |
3582 | | pSrc += 3; |
3583 | | } |
3584 | | } |
3585 | | } |
3586 | | |
3587 | | #elif defined(HAVE_SSE2) && !(defined(__GNUC__) && defined(__AVX2__)) |
3588 | | |
3589 | | template <> |
3590 | | void GDALUnrolledCopy<GByte, 2, 1>(GByte *CPL_RESTRICT pDest, |
3591 | | const GByte *CPL_RESTRICT pSrc, |
3592 | | GPtrDiff_t nIters) |
3593 | 0 | { |
3594 | 0 | decltype(nIters) i = 0; |
3595 | 0 | if (nIters > 16) |
3596 | 0 | { |
3597 | 0 | const __m128i xmm_mask = _mm_set1_epi16(0xff); |
3598 | | // If we were sure that there would always be 1 trailing byte, we could |
3599 | | // check against nIters - 15 |
3600 | 0 | for (; i < nIters - 16; i += 16) |
3601 | 0 | { |
3602 | 0 | __m128i xmm0 = |
3603 | 0 | _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 0)); |
3604 | 0 | __m128i xmm1 = |
3605 | 0 | _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 16)); |
3606 | | // Set higher 8bit of each int16 packed word to 0 |
3607 | 0 | xmm0 = _mm_and_si128(xmm0, xmm_mask); |
3608 | 0 | xmm1 = _mm_and_si128(xmm1, xmm_mask); |
3609 | | // Pack int16 to uint8 and merge back both vector |
3610 | 0 | xmm0 = _mm_packus_epi16(xmm0, xmm1); |
3611 | | |
3612 | | // Store result |
3613 | 0 | _mm_storeu_si128(reinterpret_cast<__m128i *>(pDest + i), xmm0); |
3614 | |
|
3615 | 0 | pSrc += 2 * 16; |
3616 | 0 | } |
3617 | 0 | } |
3618 | 0 | for (; i < nIters; i++) |
3619 | 0 | { |
3620 | 0 | pDest[i] = *pSrc; |
3621 | 0 | pSrc += 2; |
3622 | 0 | } |
3623 | 0 | } |
3624 | | |
3625 | | static void GDALUnrolledCopy_GByte_3_1_SSE2(GByte *CPL_RESTRICT pDest, |
3626 | | const GByte *CPL_RESTRICT pSrc, |
3627 | | GPtrDiff_t nIters) |
3628 | 0 | { |
3629 | 0 | decltype(nIters) i = 0; |
3630 | 0 | const __m128i xmm_mask_ori = _mm_set_epi32(0, 0, 0, 255); |
3631 | | // If we were sure that there would always be 2 trailing bytes, we could |
3632 | | // check against nIters - 15 |
3633 | 0 | for (; i < nIters - 16; i += 16) |
3634 | 0 | { |
3635 | 0 | __m128i xmm0 = |
3636 | 0 | _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 0)); |
3637 | 0 | __m128i xmm1 = |
3638 | 0 | _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 16)); |
3639 | 0 | __m128i xmm2 = |
3640 | 0 | _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 32)); |
3641 | |
|
3642 | 0 | auto xmm_mask0 = xmm_mask_ori; |
3643 | 0 | auto xmm_mask1 = _mm_slli_si128(xmm_mask_ori, 6); |
3644 | 0 | auto xmm_mask2 = _mm_slli_si128(xmm_mask_ori, 11); |
3645 | |
|
3646 | 0 | auto xmm = _mm_and_si128(xmm0, xmm_mask0); |
3647 | 0 | auto xmm_res1 = _mm_and_si128(_mm_slli_si128(xmm1, 4), xmm_mask1); |
3648 | |
|
3649 | 0 | xmm_mask0 = _mm_slli_si128(xmm_mask0, 1); |
3650 | 0 | xmm_mask1 = _mm_slli_si128(xmm_mask1, 1); |
3651 | 0 | xmm0 = _mm_srli_si128(xmm0, 2); |
3652 | 0 | xmm = _mm_or_si128(xmm, _mm_and_si128(xmm0, xmm_mask0)); |
3653 | 0 | xmm_res1 = _mm_or_si128( |
3654 | 0 | xmm_res1, _mm_and_si128(_mm_slli_si128(xmm1, 2), xmm_mask1)); |
3655 | |
|
3656 | 0 | xmm_mask0 = _mm_slli_si128(xmm_mask0, 1); |
3657 | 0 | xmm_mask1 = _mm_slli_si128(xmm_mask1, 1); |
3658 | 0 | xmm0 = _mm_srli_si128(xmm0, 2); |
3659 | 0 | xmm = _mm_or_si128(xmm, _mm_and_si128(xmm0, xmm_mask0)); |
3660 | 0 | xmm_res1 = _mm_or_si128(xmm_res1, _mm_and_si128(xmm1, xmm_mask1)); |
3661 | |
|
3662 | 0 | xmm_mask0 = _mm_slli_si128(xmm_mask0, 1); |
3663 | 0 | xmm_mask1 = _mm_slli_si128(xmm_mask1, 1); |
3664 | 0 | xmm0 = _mm_srli_si128(xmm0, 2); |
3665 | 0 | xmm = _mm_or_si128(xmm, _mm_and_si128(xmm0, xmm_mask0)); |
3666 | 0 | xmm_res1 = _mm_or_si128( |
3667 | 0 | xmm_res1, _mm_and_si128(_mm_srli_si128(xmm1, 2), xmm_mask1)); |
3668 | |
|
3669 | 0 | xmm_mask0 = _mm_slli_si128(xmm_mask0, 1); |
3670 | 0 | xmm_mask1 = _mm_slli_si128(xmm_mask1, 1); |
3671 | 0 | xmm0 = _mm_srli_si128(xmm0, 2); |
3672 | 0 | xmm = _mm_or_si128(xmm, _mm_and_si128(xmm0, xmm_mask0)); |
3673 | 0 | xmm_res1 = _mm_or_si128( |
3674 | 0 | xmm_res1, _mm_and_si128(_mm_srli_si128(xmm1, 4), xmm_mask1)); |
3675 | 0 | xmm = _mm_or_si128(xmm, xmm_res1); |
3676 | |
|
3677 | 0 | xmm_mask0 = _mm_slli_si128(xmm_mask0, 1); |
3678 | 0 | xmm0 = _mm_srli_si128(xmm0, 2); |
3679 | 0 | xmm = _mm_or_si128(xmm, _mm_and_si128(xmm0, xmm_mask0)); |
3680 | |
|
3681 | 0 | xmm = _mm_or_si128(xmm, |
3682 | 0 | _mm_and_si128(_mm_slli_si128(xmm2, 10), xmm_mask2)); |
3683 | |
|
3684 | 0 | xmm_mask2 = _mm_slli_si128(xmm_mask2, 1); |
3685 | 0 | xmm = _mm_or_si128(xmm, |
3686 | 0 | _mm_and_si128(_mm_slli_si128(xmm2, 8), xmm_mask2)); |
3687 | |
|
3688 | 0 | xmm_mask2 = _mm_slli_si128(xmm_mask2, 1); |
3689 | 0 | xmm = _mm_or_si128(xmm, |
3690 | 0 | _mm_and_si128(_mm_slli_si128(xmm2, 6), xmm_mask2)); |
3691 | |
|
3692 | 0 | xmm_mask2 = _mm_slli_si128(xmm_mask2, 1); |
3693 | 0 | xmm = _mm_or_si128(xmm, |
3694 | 0 | _mm_and_si128(_mm_slli_si128(xmm2, 4), xmm_mask2)); |
3695 | |
|
3696 | 0 | xmm_mask2 = _mm_slli_si128(xmm_mask2, 1); |
3697 | 0 | xmm = _mm_or_si128(xmm, |
3698 | 0 | _mm_and_si128(_mm_slli_si128(xmm2, 2), xmm_mask2)); |
3699 | |
|
3700 | 0 | _mm_storeu_si128(reinterpret_cast<__m128i *>(pDest + i), xmm); |
3701 | |
|
3702 | 0 | pSrc += 3 * 16; |
3703 | 0 | } |
3704 | 0 | for (; i < nIters; i++) |
3705 | 0 | { |
3706 | 0 | pDest[i] = *pSrc; |
3707 | 0 | pSrc += 3; |
3708 | 0 | } |
3709 | 0 | } |
3710 | | |
3711 | | #ifdef HAVE_SSSE3_AT_COMPILE_TIME |
3712 | | |
3713 | | template <> |
3714 | | void GDALUnrolledCopy<GByte, 3, 1>(GByte *CPL_RESTRICT pDest, |
3715 | | const GByte *CPL_RESTRICT pSrc, |
3716 | | GPtrDiff_t nIters) |
3717 | 0 | { |
3718 | 0 | if (nIters > 16) |
3719 | 0 | { |
3720 | 0 | if (CPLHaveRuntimeSSSE3()) |
3721 | 0 | { |
3722 | 0 | GDALUnrolledCopy_GByte_3_1_SSSE3(pDest, pSrc, nIters); |
3723 | 0 | } |
3724 | 0 | else |
3725 | 0 | { |
3726 | 0 | GDALUnrolledCopy_GByte_3_1_SSE2(pDest, pSrc, nIters); |
3727 | 0 | } |
3728 | 0 | } |
3729 | 0 | else |
3730 | 0 | { |
3731 | 0 | for (GPtrDiff_t i = 0; i < nIters; i++) |
3732 | 0 | { |
3733 | 0 | pDest[i] = *pSrc; |
3734 | 0 | pSrc += 3; |
3735 | 0 | } |
3736 | 0 | } |
3737 | 0 | } |
3738 | | |
3739 | | #else |
3740 | | |
3741 | | template <> |
3742 | | void GDALUnrolledCopy<GByte, 3, 1>(GByte *CPL_RESTRICT pDest, |
3743 | | const GByte *CPL_RESTRICT pSrc, |
3744 | | GPtrDiff_t nIters) |
3745 | | { |
3746 | | GDALUnrolledCopy_GByte_3_1_SSE2(pDest, pSrc, nIters); |
3747 | | } |
3748 | | #endif |
3749 | | |
3750 | | template <> |
3751 | | void GDALUnrolledCopy<GByte, 4, 1>(GByte *CPL_RESTRICT pDest, |
3752 | | const GByte *CPL_RESTRICT pSrc, |
3753 | | GPtrDiff_t nIters) |
3754 | 0 | { |
3755 | 0 | decltype(nIters) i = 0; |
3756 | 0 | if (nIters > 16) |
3757 | 0 | { |
3758 | 0 | const __m128i xmm_mask = _mm_set1_epi32(0xff); |
3759 | | // If we were sure that there would always be 3 trailing bytes, we could |
3760 | | // check against nIters - 15 |
3761 | 0 | for (; i < nIters - 16; i += 16) |
3762 | 0 | { |
3763 | 0 | __m128i xmm0 = |
3764 | 0 | _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 0)); |
3765 | 0 | __m128i xmm1 = |
3766 | 0 | _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 16)); |
3767 | 0 | __m128i xmm2 = |
3768 | 0 | _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 32)); |
3769 | 0 | __m128i xmm3 = |
3770 | 0 | _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 48)); |
3771 | | // Set higher 24bit of each int32 packed word to 0 |
3772 | 0 | xmm0 = _mm_and_si128(xmm0, xmm_mask); |
3773 | 0 | xmm1 = _mm_and_si128(xmm1, xmm_mask); |
3774 | 0 | xmm2 = _mm_and_si128(xmm2, xmm_mask); |
3775 | 0 | xmm3 = _mm_and_si128(xmm3, xmm_mask); |
3776 | | // Pack int32 to int16 |
3777 | 0 | xmm0 = _mm_packs_epi32(xmm0, xmm1); |
3778 | 0 | xmm2 = _mm_packs_epi32(xmm2, xmm3); |
3779 | | // Pack int16 to uint8 |
3780 | 0 | xmm0 = _mm_packus_epi16(xmm0, xmm2); |
3781 | | |
3782 | | // Store result |
3783 | 0 | _mm_storeu_si128(reinterpret_cast<__m128i *>(pDest + i), xmm0); |
3784 | |
|
3785 | 0 | pSrc += 4 * 16; |
3786 | 0 | } |
3787 | 0 | } |
3788 | 0 | for (; i < nIters; i++) |
3789 | 0 | { |
3790 | 0 | pDest[i] = *pSrc; |
3791 | 0 | pSrc += 4; |
3792 | 0 | } |
3793 | 0 | } |
3794 | | #endif // HAVE_SSE2 |
3795 | | |
3796 | | /************************************************************************/ |
3797 | | /* GDALFastCopy() */ |
3798 | | /************************************************************************/ |
3799 | | |
3800 | | template <class T> |
3801 | | static inline void GDALFastCopy(T *CPL_RESTRICT pDest, int nDestStride, |
3802 | | const T *CPL_RESTRICT pSrc, int nSrcStride, |
3803 | | GPtrDiff_t nIters) |
3804 | 0 | { |
3805 | 0 | constexpr int sizeofT = static_cast<int>(sizeof(T)); |
3806 | 0 | if (nIters == 1) |
3807 | 0 | { |
3808 | 0 | *pDest = *pSrc; |
3809 | 0 | } |
3810 | 0 | else if (nDestStride == sizeofT) |
3811 | 0 | { |
3812 | 0 | if (nSrcStride == sizeofT) |
3813 | 0 | { |
3814 | 0 | memcpy(pDest, pSrc, nIters * sizeof(T)); |
3815 | 0 | } |
3816 | 0 | else if (nSrcStride == 2 * sizeofT) |
3817 | 0 | { |
3818 | 0 | GDALUnrolledCopy<T, 2, 1>(pDest, pSrc, nIters); |
3819 | 0 | } |
3820 | 0 | else if (nSrcStride == 3 * sizeofT) |
3821 | 0 | { |
3822 | 0 | GDALUnrolledCopy<T, 3, 1>(pDest, pSrc, nIters); |
3823 | 0 | } |
3824 | 0 | else if (nSrcStride == 4 * sizeofT) |
3825 | 0 | { |
3826 | 0 | GDALUnrolledCopy<T, 4, 1>(pDest, pSrc, nIters); |
3827 | 0 | } |
3828 | 0 | else |
3829 | 0 | { |
3830 | 0 | while (nIters-- > 0) |
3831 | 0 | { |
3832 | 0 | *pDest = *pSrc; |
3833 | 0 | pSrc += nSrcStride / sizeofT; |
3834 | 0 | pDest++; |
3835 | 0 | } |
3836 | 0 | } |
3837 | 0 | } |
3838 | 0 | else if (nSrcStride == sizeofT) |
3839 | 0 | { |
3840 | 0 | if (nDestStride == 2 * sizeofT) |
3841 | 0 | { |
3842 | 0 | GDALUnrolledCopy<T, 1, 2>(pDest, pSrc, nIters); |
3843 | 0 | } |
3844 | 0 | else if (nDestStride == 3 * sizeofT) |
3845 | 0 | { |
3846 | 0 | GDALUnrolledCopy<T, 1, 3>(pDest, pSrc, nIters); |
3847 | 0 | } |
3848 | 0 | else if (nDestStride == 4 * sizeofT) |
3849 | 0 | { |
3850 | 0 | GDALUnrolledCopy<T, 1, 4>(pDest, pSrc, nIters); |
3851 | 0 | } |
3852 | 0 | else |
3853 | 0 | { |
3854 | 0 | while (nIters-- > 0) |
3855 | 0 | { |
3856 | 0 | *pDest = *pSrc; |
3857 | 0 | pSrc++; |
3858 | 0 | pDest += nDestStride / sizeofT; |
3859 | 0 | } |
3860 | 0 | } |
3861 | 0 | } |
3862 | 0 | else |
3863 | 0 | { |
3864 | 0 | while (nIters-- > 0) |
3865 | 0 | { |
3866 | 0 | *pDest = *pSrc; |
3867 | 0 | pSrc += nSrcStride / sizeofT; |
3868 | 0 | pDest += nDestStride / sizeofT; |
3869 | 0 | } |
3870 | 0 | } |
3871 | 0 | } Unexecuted instantiation: rasterio.cpp:void GDALFastCopy<unsigned char>(unsigned char*, int, unsigned char const*, int, long long) Unexecuted instantiation: rasterio.cpp:void GDALFastCopy<short>(short*, int, short const*, int, long long) |
3872 | | |
3873 | | /************************************************************************/ |
3874 | | /* GDALFastCopyByte() */ |
3875 | | /************************************************************************/ |
3876 | | |
3877 | | static void GDALFastCopyByte(const GByte *CPL_RESTRICT pSrcData, |
3878 | | int nSrcPixelStride, GByte *CPL_RESTRICT pDstData, |
3879 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
3880 | 0 | { |
3881 | 0 | GDALFastCopy(pDstData, nDstPixelStride, pSrcData, nSrcPixelStride, |
3882 | 0 | nWordCount); |
3883 | 0 | } |
3884 | | |
3885 | | /************************************************************************/ |
3886 | | /* GDALCopyWords() */ |
3887 | | /************************************************************************/ |
3888 | | |
3889 | | /** |
3890 | | * Copy pixel words from buffer to buffer. |
3891 | | * |
3892 | | * @see GDALCopyWords64() |
3893 | | */ |
3894 | | void CPL_STDCALL GDALCopyWords(const void *CPL_RESTRICT pSrcData, |
3895 | | GDALDataType eSrcType, int nSrcPixelStride, |
3896 | | void *CPL_RESTRICT pDstData, |
3897 | | GDALDataType eDstType, int nDstPixelStride, |
3898 | | int nWordCount) |
3899 | 0 | { |
3900 | 0 | GDALCopyWords64(pSrcData, eSrcType, nSrcPixelStride, pDstData, eDstType, |
3901 | 0 | nDstPixelStride, nWordCount); |
3902 | 0 | } |
3903 | | |
3904 | | /************************************************************************/ |
3905 | | /* GDALCopyWords64() */ |
3906 | | /************************************************************************/ |
3907 | | |
3908 | | /** |
3909 | | * Copy pixel words from buffer to buffer. |
3910 | | * |
3911 | | * This function is used to copy pixel word values from one memory buffer |
3912 | | * to another, with support for conversion between data types, and differing |
3913 | | * step factors. The data type conversion is done using the following |
3914 | | * rules: |
3915 | | * <ul> |
3916 | | * <li>Values assigned to a lower range integer type are clipped. For |
3917 | | * instance assigning GDT_Int16 values to a GDT_UInt8 buffer will cause values |
3918 | | * less the 0 to be set to 0, and values larger than 255 to be set to 255. |
3919 | | * </li> |
3920 | | * <li> |
3921 | | * Assignment from floating point to integer rounds to closest integer. |
3922 | | * +Infinity is mapped to the largest integer. -Infinity is mapped to the |
3923 | | * smallest integer. NaN is mapped to 0. |
3924 | | * </li> |
3925 | | * <li> |
3926 | | * Assignment from non-complex to complex will result in the imaginary part |
3927 | | * being set to zero on output. |
3928 | | * </li> |
3929 | | * <li> Assignment from complex to |
3930 | | * non-complex will result in the complex portion being lost and the real |
3931 | | * component being preserved (<i>not magnitude!</i>). |
3932 | | * </li> |
3933 | | * </ul> |
3934 | | * |
3935 | | * No assumptions are made about the source or destination words occurring |
3936 | | * on word boundaries. It is assumed that all values are in native machine |
3937 | | * byte order. |
3938 | | * |
3939 | | * @param pSrcData Pointer to source data to be converted. |
3940 | | * @param eSrcType the source data type (see GDALDataType enum) |
3941 | | * @param nSrcPixelStride Source pixel stride (i.e. distance between 2 words), |
3942 | | * in bytes |
3943 | | * @param pDstData Pointer to buffer where destination data should go |
3944 | | * @param eDstType the destination data type (see GDALDataType enum) |
3945 | | * @param nDstPixelStride Destination pixel stride (i.e. distance between 2 |
3946 | | * words), in bytes |
3947 | | * @param nWordCount number of words to be copied |
3948 | | * |
3949 | | * @note |
3950 | | * When adding a new data type to GDAL, you must do the following to |
3951 | | * support it properly within the GDALCopyWords function: |
3952 | | * 1. Add the data type to the switch on eSrcType in GDALCopyWords. |
3953 | | * This should invoke the appropriate GDALCopyWordsFromT wrapper. |
3954 | | * 2. Add the data type to the switch on eDstType in GDALCopyWordsFromT. |
3955 | | * This should call the appropriate GDALCopyWordsT template. |
3956 | | * 3. If appropriate, overload the appropriate CopyWord template in the |
3957 | | * above namespace. This will ensure that any conversion issues are |
3958 | | * handled (cases like the float -> int32 case, where the min/max) |
3959 | | * values are subject to roundoff error. |
3960 | | */ |
3961 | | |
3962 | | void CPL_STDCALL GDALCopyWords64(const void *CPL_RESTRICT pSrcData, |
3963 | | GDALDataType eSrcType, int nSrcPixelStride, |
3964 | | void *CPL_RESTRICT pDstData, |
3965 | | GDALDataType eDstType, int nDstPixelStride, |
3966 | | GPtrDiff_t nWordCount) |
3967 | | |
3968 | 0 | { |
3969 | | // On platforms where alignment matters, be careful |
3970 | 0 | const int nSrcDataTypeSize = GDALGetDataTypeSizeBytes(eSrcType); |
3971 | 0 | const int nDstDataTypeSize = GDALGetDataTypeSizeBytes(eDstType); |
3972 | 0 | if (CPL_UNLIKELY(nSrcDataTypeSize == 0 || nDstDataTypeSize == 0)) |
3973 | 0 | { |
3974 | 0 | CPLError(CE_Failure, CPLE_NotSupported, |
3975 | 0 | "GDALCopyWords64(): unsupported GDT_Unknown/GDT_TypeCount " |
3976 | 0 | "argument"); |
3977 | 0 | return; |
3978 | 0 | } |
3979 | 0 | if (!(eSrcType == eDstType && nSrcPixelStride == nDstPixelStride) && |
3980 | 0 | ((reinterpret_cast<uintptr_t>(pSrcData) % nSrcDataTypeSize) != 0 || |
3981 | 0 | (reinterpret_cast<uintptr_t>(pDstData) % nDstDataTypeSize) != 0 || |
3982 | 0 | (nSrcPixelStride % nSrcDataTypeSize) != 0 || |
3983 | 0 | (nDstPixelStride % nDstDataTypeSize) != 0)) |
3984 | 0 | { |
3985 | 0 | if (eSrcType == eDstType) |
3986 | 0 | { |
3987 | 0 | for (decltype(nWordCount) i = 0; i < nWordCount; i++) |
3988 | 0 | { |
3989 | 0 | memcpy(static_cast<GByte *>(pDstData) + nDstPixelStride * i, |
3990 | 0 | static_cast<const GByte *>(pSrcData) + |
3991 | 0 | nSrcPixelStride * i, |
3992 | 0 | nDstDataTypeSize); |
3993 | 0 | } |
3994 | 0 | } |
3995 | 0 | else |
3996 | 0 | { |
3997 | 0 | const auto getAlignedPtr = [](GByte *ptr, int align) |
3998 | 0 | { |
3999 | 0 | return ptr + |
4000 | 0 | ((align - (reinterpret_cast<uintptr_t>(ptr) % align)) % |
4001 | 0 | align); |
4002 | 0 | }; |
4003 | | |
4004 | | // The largest we need is for CFloat64 (16 bytes), so 32 bytes to |
4005 | | // be sure to get correctly aligned pointer. |
4006 | 0 | constexpr size_t SIZEOF_CFLOAT64 = 2 * sizeof(double); |
4007 | 0 | GByte abySrcBuffer[2 * SIZEOF_CFLOAT64]; |
4008 | 0 | GByte abyDstBuffer[2 * SIZEOF_CFLOAT64]; |
4009 | 0 | GByte *pabySrcBuffer = |
4010 | 0 | getAlignedPtr(abySrcBuffer, nSrcDataTypeSize); |
4011 | 0 | GByte *pabyDstBuffer = |
4012 | 0 | getAlignedPtr(abyDstBuffer, nDstDataTypeSize); |
4013 | 0 | for (decltype(nWordCount) i = 0; i < nWordCount; i++) |
4014 | 0 | { |
4015 | 0 | memcpy(pabySrcBuffer, |
4016 | 0 | static_cast<const GByte *>(pSrcData) + |
4017 | 0 | nSrcPixelStride * i, |
4018 | 0 | nSrcDataTypeSize); |
4019 | 0 | GDALCopyWords64(pabySrcBuffer, eSrcType, 0, pabyDstBuffer, |
4020 | 0 | eDstType, 0, 1); |
4021 | 0 | memcpy(static_cast<GByte *>(pDstData) + nDstPixelStride * i, |
4022 | 0 | pabyDstBuffer, nDstDataTypeSize); |
4023 | 0 | } |
4024 | 0 | } |
4025 | 0 | return; |
4026 | 0 | } |
4027 | | |
4028 | | // Deal with the case where we're replicating a single word into the |
4029 | | // provided buffer |
4030 | 0 | if (nSrcPixelStride == 0 && nWordCount > 1) |
4031 | 0 | { |
4032 | 0 | GDALReplicateWord(pSrcData, eSrcType, pDstData, eDstType, |
4033 | 0 | nDstPixelStride, nWordCount); |
4034 | 0 | return; |
4035 | 0 | } |
4036 | | |
4037 | 0 | if (eSrcType == eDstType) |
4038 | 0 | { |
4039 | 0 | if (eSrcType == GDT_UInt8 || eSrcType == GDT_Int8) |
4040 | 0 | { |
4041 | 0 | GDALFastCopy(static_cast<GByte *>(pDstData), nDstPixelStride, |
4042 | 0 | static_cast<const GByte *>(pSrcData), nSrcPixelStride, |
4043 | 0 | nWordCount); |
4044 | 0 | return; |
4045 | 0 | } |
4046 | | |
4047 | 0 | if (nSrcDataTypeSize == 2 && (nSrcPixelStride % 2) == 0 && |
4048 | 0 | (nDstPixelStride % 2) == 0) |
4049 | 0 | { |
4050 | 0 | GDALFastCopy(static_cast<short *>(pDstData), nDstPixelStride, |
4051 | 0 | static_cast<const short *>(pSrcData), nSrcPixelStride, |
4052 | 0 | nWordCount); |
4053 | 0 | return; |
4054 | 0 | } |
4055 | | |
4056 | 0 | if (nWordCount == 1) |
4057 | 0 | { |
4058 | | #if defined(CSA_BUILD) || defined(__COVERITY__) |
4059 | | // Avoid false positives... |
4060 | | memcpy(pDstData, pSrcData, nSrcDataTypeSize); |
4061 | | #else |
4062 | 0 | if (nSrcDataTypeSize == 2) |
4063 | 0 | memcpy(pDstData, pSrcData, 2); |
4064 | 0 | else if (nSrcDataTypeSize == 4) |
4065 | 0 | memcpy(pDstData, pSrcData, 4); |
4066 | 0 | else if (nSrcDataTypeSize == 8) |
4067 | 0 | memcpy(pDstData, pSrcData, 8); |
4068 | 0 | else /* if( eSrcType == GDT_CFloat64 ) */ |
4069 | 0 | memcpy(pDstData, pSrcData, 16); |
4070 | 0 | #endif |
4071 | 0 | return; |
4072 | 0 | } |
4073 | | |
4074 | | // Let memcpy() handle the case where we're copying a packed buffer |
4075 | | // of pixels. |
4076 | 0 | if (nSrcPixelStride == nDstPixelStride) |
4077 | 0 | { |
4078 | 0 | if (nSrcPixelStride == nSrcDataTypeSize) |
4079 | 0 | { |
4080 | 0 | memcpy(pDstData, pSrcData, nWordCount * nSrcDataTypeSize); |
4081 | 0 | return; |
4082 | 0 | } |
4083 | 0 | } |
4084 | 0 | } |
4085 | | |
4086 | | // Handle the more general case -- deals with conversion of data types |
4087 | | // directly. |
4088 | 0 | switch (eSrcType) |
4089 | 0 | { |
4090 | 0 | case GDT_UInt8: |
4091 | 0 | GDALCopyWordsFromT<unsigned char>( |
4092 | 0 | static_cast<const unsigned char *>(pSrcData), nSrcPixelStride, |
4093 | 0 | false, pDstData, eDstType, nDstPixelStride, nWordCount); |
4094 | 0 | break; |
4095 | 0 | case GDT_Int8: |
4096 | 0 | GDALCopyWordsFromT<signed char>( |
4097 | 0 | static_cast<const signed char *>(pSrcData), nSrcPixelStride, |
4098 | 0 | false, pDstData, eDstType, nDstPixelStride, nWordCount); |
4099 | 0 | break; |
4100 | 0 | case GDT_UInt16: |
4101 | 0 | GDALCopyWordsFromT<unsigned short>( |
4102 | 0 | static_cast<const unsigned short *>(pSrcData), nSrcPixelStride, |
4103 | 0 | false, pDstData, eDstType, nDstPixelStride, nWordCount); |
4104 | 0 | break; |
4105 | 0 | case GDT_Int16: |
4106 | 0 | GDALCopyWordsFromT<short>(static_cast<const short *>(pSrcData), |
4107 | 0 | nSrcPixelStride, false, pDstData, |
4108 | 0 | eDstType, nDstPixelStride, nWordCount); |
4109 | 0 | break; |
4110 | 0 | case GDT_UInt32: |
4111 | 0 | GDALCopyWordsFromT<unsigned int>( |
4112 | 0 | static_cast<const unsigned int *>(pSrcData), nSrcPixelStride, |
4113 | 0 | false, pDstData, eDstType, nDstPixelStride, nWordCount); |
4114 | 0 | break; |
4115 | 0 | case GDT_Int32: |
4116 | 0 | GDALCopyWordsFromT<int>(static_cast<const int *>(pSrcData), |
4117 | 0 | nSrcPixelStride, false, pDstData, eDstType, |
4118 | 0 | nDstPixelStride, nWordCount); |
4119 | 0 | break; |
4120 | 0 | case GDT_UInt64: |
4121 | 0 | GDALCopyWordsFromT<std::uint64_t>( |
4122 | 0 | static_cast<const std::uint64_t *>(pSrcData), nSrcPixelStride, |
4123 | 0 | false, pDstData, eDstType, nDstPixelStride, nWordCount); |
4124 | 0 | break; |
4125 | 0 | case GDT_Int64: |
4126 | 0 | GDALCopyWordsFromT<std::int64_t>( |
4127 | 0 | static_cast<const std::int64_t *>(pSrcData), nSrcPixelStride, |
4128 | 0 | false, pDstData, eDstType, nDstPixelStride, nWordCount); |
4129 | 0 | break; |
4130 | 0 | case GDT_Float16: |
4131 | 0 | GDALCopyWordsFromT<GFloat16>( |
4132 | 0 | static_cast<const GFloat16 *>(pSrcData), nSrcPixelStride, false, |
4133 | 0 | pDstData, eDstType, nDstPixelStride, nWordCount); |
4134 | 0 | break; |
4135 | 0 | case GDT_Float32: |
4136 | 0 | GDALCopyWordsFromT<float>(static_cast<const float *>(pSrcData), |
4137 | 0 | nSrcPixelStride, false, pDstData, |
4138 | 0 | eDstType, nDstPixelStride, nWordCount); |
4139 | 0 | break; |
4140 | 0 | case GDT_Float64: |
4141 | 0 | GDALCopyWordsFromT<double>(static_cast<const double *>(pSrcData), |
4142 | 0 | nSrcPixelStride, false, pDstData, |
4143 | 0 | eDstType, nDstPixelStride, nWordCount); |
4144 | 0 | break; |
4145 | 0 | case GDT_CInt16: |
4146 | 0 | GDALCopyWordsFromT<short>(static_cast<const short *>(pSrcData), |
4147 | 0 | nSrcPixelStride, true, pDstData, eDstType, |
4148 | 0 | nDstPixelStride, nWordCount); |
4149 | 0 | break; |
4150 | 0 | case GDT_CInt32: |
4151 | 0 | GDALCopyWordsFromT<int>(static_cast<const int *>(pSrcData), |
4152 | 0 | nSrcPixelStride, true, pDstData, eDstType, |
4153 | 0 | nDstPixelStride, nWordCount); |
4154 | 0 | break; |
4155 | 0 | case GDT_CFloat16: |
4156 | 0 | GDALCopyWordsFromT<GFloat16>( |
4157 | 0 | static_cast<const GFloat16 *>(pSrcData), nSrcPixelStride, true, |
4158 | 0 | pDstData, eDstType, nDstPixelStride, nWordCount); |
4159 | 0 | break; |
4160 | 0 | case GDT_CFloat32: |
4161 | 0 | GDALCopyWordsFromT<float>(static_cast<const float *>(pSrcData), |
4162 | 0 | nSrcPixelStride, true, pDstData, eDstType, |
4163 | 0 | nDstPixelStride, nWordCount); |
4164 | 0 | break; |
4165 | 0 | case GDT_CFloat64: |
4166 | 0 | GDALCopyWordsFromT<double>(static_cast<const double *>(pSrcData), |
4167 | 0 | nSrcPixelStride, true, pDstData, |
4168 | 0 | eDstType, nDstPixelStride, nWordCount); |
4169 | 0 | break; |
4170 | 0 | case GDT_Unknown: |
4171 | 0 | case GDT_TypeCount: |
4172 | 0 | CPLAssert(false); |
4173 | 0 | } |
4174 | 0 | } |
4175 | | |
4176 | | /************************************************************************/ |
4177 | | /* GDALCopyBits() */ |
4178 | | /************************************************************************/ |
4179 | | |
4180 | | /** |
4181 | | * Bitwise word copying. |
4182 | | * |
4183 | | * A function for moving sets of partial bytes around. Loosely |
4184 | | * speaking this is a bitwise analog to GDALCopyWords(). |
4185 | | * |
4186 | | * It copies nStepCount "words" where each word is nBitCount bits long. |
4187 | | * The nSrcStep and nDstStep are the number of bits from the start of one |
4188 | | * word to the next (same as nBitCount if they are packed). The nSrcOffset |
4189 | | * and nDstOffset are the offset into the source and destination buffers |
4190 | | * to start at, also measured in bits. |
4191 | | * |
4192 | | * All bit offsets are assumed to start from the high order bit in a byte |
4193 | | * (i.e. most significant bit first). Currently this function is not very |
4194 | | * optimized, but it may be improved for some common cases in the future |
4195 | | * as needed. |
4196 | | * |
4197 | | * @param pabySrcData the source data buffer. |
4198 | | * @param nSrcOffset the offset (in bits) in pabySrcData to the start of the |
4199 | | * first word to copy. |
4200 | | * @param nSrcStep the offset in bits from the start one source word to the |
4201 | | * start of the next. |
4202 | | * @param pabyDstData the destination data buffer. |
4203 | | * @param nDstOffset the offset (in bits) in pabyDstData to the start of the |
4204 | | * first word to copy over. |
4205 | | * @param nDstStep the offset in bits from the start one word to the |
4206 | | * start of the next. |
4207 | | * @param nBitCount the number of bits in a word to be copied. |
4208 | | * @param nStepCount the number of words to copy. |
4209 | | */ |
4210 | | |
4211 | | void GDALCopyBits(const GByte *pabySrcData, int nSrcOffset, int nSrcStep, |
4212 | | GByte *pabyDstData, int nDstOffset, int nDstStep, |
4213 | | int nBitCount, int nStepCount) |
4214 | | |
4215 | 0 | { |
4216 | 0 | VALIDATE_POINTER0(pabySrcData, "GDALCopyBits"); |
4217 | | |
4218 | 0 | for (int iStep = 0; iStep < nStepCount; iStep++) |
4219 | 0 | { |
4220 | 0 | for (int iBit = 0; iBit < nBitCount; iBit++) |
4221 | 0 | { |
4222 | 0 | if (pabySrcData[nSrcOffset >> 3] & (0x80 >> (nSrcOffset & 7))) |
4223 | 0 | pabyDstData[nDstOffset >> 3] |= (0x80 >> (nDstOffset & 7)); |
4224 | 0 | else |
4225 | 0 | pabyDstData[nDstOffset >> 3] &= ~(0x80 >> (nDstOffset & 7)); |
4226 | |
|
4227 | 0 | nSrcOffset++; |
4228 | 0 | nDstOffset++; |
4229 | 0 | } |
4230 | |
|
4231 | 0 | nSrcOffset += (nSrcStep - nBitCount); |
4232 | 0 | nDstOffset += (nDstStep - nBitCount); |
4233 | 0 | } |
4234 | 0 | } |
4235 | | |
4236 | | /************************************************************************/ |
4237 | | /* GDALGetBestOverviewLevel() */ |
4238 | | /* */ |
4239 | | /* Returns the best overview level to satisfy the query or -1 if none */ |
4240 | | /* Also updates nXOff, nYOff, nXSize, nYSize and psExtraArg when */ |
4241 | | /* returning a valid overview level */ |
4242 | | /************************************************************************/ |
4243 | | |
4244 | | int GDALBandGetBestOverviewLevel(GDALRasterBand *poBand, int &nXOff, int &nYOff, |
4245 | | int &nXSize, int &nYSize, int nBufXSize, |
4246 | | int nBufYSize) |
4247 | 0 | { |
4248 | 0 | return GDALBandGetBestOverviewLevel2(poBand, nXOff, nYOff, nXSize, nYSize, |
4249 | 0 | nBufXSize, nBufYSize, nullptr); |
4250 | 0 | } |
4251 | | |
4252 | | int GDALBandGetBestOverviewLevel2(GDALRasterBand *poBand, int &nXOff, |
4253 | | int &nYOff, int &nXSize, int &nYSize, |
4254 | | int nBufXSize, int nBufYSize, |
4255 | | GDALRasterIOExtraArg *psExtraArg) |
4256 | 0 | { |
4257 | 0 | if (psExtraArg != nullptr && psExtraArg->nVersion > 1 && |
4258 | 0 | psExtraArg->bUseOnlyThisScale) |
4259 | 0 | return -1; |
4260 | | /* -------------------------------------------------------------------- */ |
4261 | | /* Compute the desired downsampling factor. It is */ |
4262 | | /* based on the least reduced axis, and represents the number */ |
4263 | | /* of source pixels to one destination pixel. */ |
4264 | | /* -------------------------------------------------------------------- */ |
4265 | 0 | const double dfDesiredDownsamplingFactor = |
4266 | 0 | ((nXSize / static_cast<double>(nBufXSize)) < |
4267 | 0 | (nYSize / static_cast<double>(nBufYSize)) || |
4268 | 0 | nBufYSize == 1) |
4269 | 0 | ? nXSize / static_cast<double>(nBufXSize) |
4270 | 0 | : nYSize / static_cast<double>(nBufYSize); |
4271 | | |
4272 | | /* -------------------------------------------------------------------- */ |
4273 | | /* Find the overview level that largest downsampling factor (most */ |
4274 | | /* downsampled) that is still less than (or only a little more) */ |
4275 | | /* downsampled than the request. */ |
4276 | | /* -------------------------------------------------------------------- */ |
4277 | 0 | const int nOverviewCount = poBand->GetOverviewCount(); |
4278 | 0 | GDALRasterBand *poBestOverview = nullptr; |
4279 | 0 | double dfBestDownsamplingFactor = 0; |
4280 | 0 | int nBestOverviewLevel = -1; |
4281 | |
|
4282 | 0 | const char *pszOversampligThreshold = |
4283 | 0 | CPLGetConfigOption("GDAL_OVERVIEW_OVERSAMPLING_THRESHOLD", nullptr); |
4284 | | |
4285 | | // Note: keep this logic for overview selection in sync between |
4286 | | // gdalwarp_lib.cpp and rasterio.cpp |
4287 | | // Cf https://github.com/OSGeo/gdal/pull/9040#issuecomment-1898524693 |
4288 | 0 | const double dfOversamplingThreshold = |
4289 | 0 | pszOversampligThreshold ? CPLAtof(pszOversampligThreshold) |
4290 | 0 | : psExtraArg && psExtraArg->eResampleAlg != GRIORA_NearestNeighbour |
4291 | 0 | ? 1.0 |
4292 | 0 | : 1.2; |
4293 | 0 | for (int iOverview = 0; iOverview < nOverviewCount; iOverview++) |
4294 | 0 | { |
4295 | 0 | GDALRasterBand *poOverview = poBand->GetOverview(iOverview); |
4296 | 0 | if (poOverview == nullptr || |
4297 | 0 | poOverview->GetXSize() > poBand->GetXSize() || |
4298 | 0 | poOverview->GetYSize() > poBand->GetYSize()) |
4299 | 0 | { |
4300 | 0 | continue; |
4301 | 0 | } |
4302 | | |
4303 | | // Compute downsampling factor of this overview |
4304 | 0 | const double dfDownsamplingFactor = std::min( |
4305 | 0 | poBand->GetXSize() / static_cast<double>(poOverview->GetXSize()), |
4306 | 0 | poBand->GetYSize() / static_cast<double>(poOverview->GetYSize())); |
4307 | | |
4308 | | // Is it nearly the requested factor and better (lower) than |
4309 | | // the current best factor? |
4310 | | // Use an epsilon because of numerical instability. |
4311 | 0 | constexpr double EPSILON = 1e-1; |
4312 | 0 | if (dfDownsamplingFactor >= |
4313 | 0 | dfDesiredDownsamplingFactor * dfOversamplingThreshold + |
4314 | 0 | EPSILON || |
4315 | 0 | dfDownsamplingFactor <= dfBestDownsamplingFactor) |
4316 | 0 | { |
4317 | 0 | continue; |
4318 | 0 | } |
4319 | | |
4320 | | // Ignore AVERAGE_BIT2GRAYSCALE overviews for RasterIO purposes. |
4321 | 0 | const char *pszResampling = poOverview->GetMetadataItem("RESAMPLING"); |
4322 | |
|
4323 | 0 | if (pszResampling != nullptr && |
4324 | 0 | STARTS_WITH_CI(pszResampling, "AVERAGE_BIT2")) |
4325 | 0 | continue; |
4326 | | |
4327 | | // OK, this is our new best overview. |
4328 | 0 | poBestOverview = poOverview; |
4329 | 0 | nBestOverviewLevel = iOverview; |
4330 | 0 | dfBestDownsamplingFactor = dfDownsamplingFactor; |
4331 | |
|
4332 | 0 | if (std::abs(dfDesiredDownsamplingFactor - dfDownsamplingFactor) < |
4333 | 0 | EPSILON) |
4334 | 0 | { |
4335 | 0 | break; |
4336 | 0 | } |
4337 | 0 | } |
4338 | | |
4339 | | /* -------------------------------------------------------------------- */ |
4340 | | /* If we didn't find an overview that helps us, just return */ |
4341 | | /* indicating failure and the full resolution image will be used. */ |
4342 | | /* -------------------------------------------------------------------- */ |
4343 | 0 | if (nBestOverviewLevel < 0) |
4344 | 0 | return -1; |
4345 | | |
4346 | | /* -------------------------------------------------------------------- */ |
4347 | | /* Recompute the source window in terms of the selected */ |
4348 | | /* overview. */ |
4349 | | /* -------------------------------------------------------------------- */ |
4350 | 0 | const double dfXFactor = |
4351 | 0 | poBand->GetXSize() / static_cast<double>(poBestOverview->GetXSize()); |
4352 | 0 | const double dfYFactor = |
4353 | 0 | poBand->GetYSize() / static_cast<double>(poBestOverview->GetYSize()); |
4354 | 0 | CPLDebug("GDAL", "Selecting overview %d x %d", poBestOverview->GetXSize(), |
4355 | 0 | poBestOverview->GetYSize()); |
4356 | |
|
4357 | 0 | const int nOXOff = std::min(poBestOverview->GetXSize() - 1, |
4358 | 0 | static_cast<int>(nXOff / dfXFactor + 0.5)); |
4359 | 0 | const int nOYOff = std::min(poBestOverview->GetYSize() - 1, |
4360 | 0 | static_cast<int>(nYOff / dfYFactor + 0.5)); |
4361 | 0 | int nOXSize = std::max(1, static_cast<int>(nXSize / dfXFactor + 0.5)); |
4362 | 0 | int nOYSize = std::max(1, static_cast<int>(nYSize / dfYFactor + 0.5)); |
4363 | 0 | if (nOXOff + nOXSize > poBestOverview->GetXSize()) |
4364 | 0 | nOXSize = poBestOverview->GetXSize() - nOXOff; |
4365 | 0 | if (nOYOff + nOYSize > poBestOverview->GetYSize()) |
4366 | 0 | nOYSize = poBestOverview->GetYSize() - nOYOff; |
4367 | |
|
4368 | 0 | if (psExtraArg) |
4369 | 0 | { |
4370 | 0 | if (psExtraArg->bFloatingPointWindowValidity) |
4371 | 0 | { |
4372 | 0 | psExtraArg->dfXOff /= dfXFactor; |
4373 | 0 | psExtraArg->dfXSize /= dfXFactor; |
4374 | 0 | psExtraArg->dfYOff /= dfYFactor; |
4375 | 0 | psExtraArg->dfYSize /= dfYFactor; |
4376 | 0 | } |
4377 | 0 | else if (psExtraArg->eResampleAlg != GRIORA_NearestNeighbour) |
4378 | 0 | { |
4379 | 0 | psExtraArg->bFloatingPointWindowValidity = true; |
4380 | 0 | psExtraArg->dfXOff = nXOff / dfXFactor; |
4381 | 0 | psExtraArg->dfXSize = nXSize / dfXFactor; |
4382 | 0 | psExtraArg->dfYOff = nYOff / dfYFactor; |
4383 | 0 | psExtraArg->dfYSize = nYSize / dfYFactor; |
4384 | 0 | } |
4385 | 0 | } |
4386 | |
|
4387 | 0 | nXOff = nOXOff; |
4388 | 0 | nYOff = nOYOff; |
4389 | 0 | nXSize = nOXSize; |
4390 | 0 | nYSize = nOYSize; |
4391 | |
|
4392 | 0 | return nBestOverviewLevel; |
4393 | 0 | } |
4394 | | |
4395 | | /************************************************************************/ |
4396 | | /* OverviewRasterIO() */ |
4397 | | /* */ |
4398 | | /* Special work function to utilize available overviews to */ |
4399 | | /* more efficiently satisfy downsampled requests. It will */ |
4400 | | /* return CE_Failure if there are no appropriate overviews */ |
4401 | | /* available but it doesn't emit any error messages. */ |
4402 | | /************************************************************************/ |
4403 | | |
4404 | | //! @cond Doxygen_Suppress |
4405 | | CPLErr GDALRasterBand::OverviewRasterIO( |
4406 | | GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize, |
4407 | | void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType, |
4408 | | GSpacing nPixelSpace, GSpacing nLineSpace, GDALRasterIOExtraArg *psExtraArg) |
4409 | | |
4410 | 0 | { |
4411 | 0 | GDALRasterIOExtraArg sExtraArg; |
4412 | 0 | GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg); |
4413 | |
|
4414 | 0 | const int nOverview = GDALBandGetBestOverviewLevel2( |
4415 | 0 | this, nXOff, nYOff, nXSize, nYSize, nBufXSize, nBufYSize, &sExtraArg); |
4416 | 0 | if (nOverview < 0) |
4417 | 0 | return CE_Failure; |
4418 | | |
4419 | | /* -------------------------------------------------------------------- */ |
4420 | | /* Recast the call in terms of the new raster layer. */ |
4421 | | /* -------------------------------------------------------------------- */ |
4422 | 0 | GDALRasterBand *poOverviewBand = GetOverview(nOverview); |
4423 | 0 | if (poOverviewBand == nullptr) |
4424 | 0 | return CE_Failure; |
4425 | | |
4426 | 0 | return poOverviewBand->RasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize, |
4427 | 0 | pData, nBufXSize, nBufYSize, eBufType, |
4428 | 0 | nPixelSpace, nLineSpace, &sExtraArg); |
4429 | 0 | } |
4430 | | |
4431 | | /************************************************************************/ |
4432 | | /* TryOverviewRasterIO() */ |
4433 | | /************************************************************************/ |
4434 | | |
4435 | | CPLErr GDALRasterBand::TryOverviewRasterIO( |
4436 | | GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize, |
4437 | | void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType, |
4438 | | GSpacing nPixelSpace, GSpacing nLineSpace, GDALRasterIOExtraArg *psExtraArg, |
4439 | | int *pbTried) |
4440 | 0 | { |
4441 | 0 | int nXOffMod = nXOff; |
4442 | 0 | int nYOffMod = nYOff; |
4443 | 0 | int nXSizeMod = nXSize; |
4444 | 0 | int nYSizeMod = nYSize; |
4445 | 0 | GDALRasterIOExtraArg sExtraArg; |
4446 | |
|
4447 | 0 | GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg); |
4448 | |
|
4449 | 0 | int iOvrLevel = GDALBandGetBestOverviewLevel2( |
4450 | 0 | this, nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, nBufXSize, nBufYSize, |
4451 | 0 | &sExtraArg); |
4452 | |
|
4453 | 0 | if (iOvrLevel >= 0) |
4454 | 0 | { |
4455 | 0 | GDALRasterBand *poOverviewBand = GetOverview(iOvrLevel); |
4456 | 0 | if (poOverviewBand) |
4457 | 0 | { |
4458 | 0 | *pbTried = TRUE; |
4459 | 0 | return poOverviewBand->RasterIO( |
4460 | 0 | eRWFlag, nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, pData, |
4461 | 0 | nBufXSize, nBufYSize, eBufType, nPixelSpace, nLineSpace, |
4462 | 0 | &sExtraArg); |
4463 | 0 | } |
4464 | 0 | } |
4465 | | |
4466 | 0 | *pbTried = FALSE; |
4467 | 0 | return CE_None; |
4468 | 0 | } |
4469 | | |
4470 | | /************************************************************************/ |
4471 | | /* TryOverviewRasterIO() */ |
4472 | | /************************************************************************/ |
4473 | | |
4474 | | CPLErr GDALDataset::TryOverviewRasterIO( |
4475 | | GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize, |
4476 | | void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType, |
4477 | | int nBandCount, const int *panBandMap, GSpacing nPixelSpace, |
4478 | | GSpacing nLineSpace, GSpacing nBandSpace, GDALRasterIOExtraArg *psExtraArg, |
4479 | | int *pbTried) |
4480 | 0 | { |
4481 | 0 | int nXOffMod = nXOff; |
4482 | 0 | int nYOffMod = nYOff; |
4483 | 0 | int nXSizeMod = nXSize; |
4484 | 0 | int nYSizeMod = nYSize; |
4485 | 0 | GDALRasterIOExtraArg sExtraArg; |
4486 | 0 | GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg); |
4487 | |
|
4488 | 0 | int iOvrLevel = GDALBandGetBestOverviewLevel2( |
4489 | 0 | papoBands[0], nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, nBufXSize, |
4490 | 0 | nBufYSize, &sExtraArg); |
4491 | |
|
4492 | 0 | if (iOvrLevel >= 0 && papoBands[0]->GetOverview(iOvrLevel) != nullptr && |
4493 | 0 | papoBands[0]->GetOverview(iOvrLevel)->GetDataset() != nullptr) |
4494 | 0 | { |
4495 | 0 | *pbTried = TRUE; |
4496 | 0 | return papoBands[0]->GetOverview(iOvrLevel)->GetDataset()->RasterIO( |
4497 | 0 | eRWFlag, nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, pData, nBufXSize, |
4498 | 0 | nBufYSize, eBufType, nBandCount, panBandMap, nPixelSpace, |
4499 | 0 | nLineSpace, nBandSpace, &sExtraArg); |
4500 | 0 | } |
4501 | 0 | else |
4502 | 0 | { |
4503 | 0 | *pbTried = FALSE; |
4504 | 0 | return CE_None; |
4505 | 0 | } |
4506 | 0 | } |
4507 | | |
4508 | | /************************************************************************/ |
4509 | | /* GetBestOverviewLevel() */ |
4510 | | /* */ |
4511 | | /* Returns the best overview level to satisfy the query or -1 if none */ |
4512 | | /* Also updates nXOff, nYOff, nXSize, nYSize when returning a valid */ |
4513 | | /* overview level */ |
4514 | | /************************************************************************/ |
4515 | | |
4516 | | static int GDALDatasetGetBestOverviewLevel(GDALDataset *poDS, int &nXOff, |
4517 | | int &nYOff, int &nXSize, int &nYSize, |
4518 | | int nBufXSize, int nBufYSize, |
4519 | | int nBandCount, |
4520 | | const int *panBandMap, |
4521 | | GDALRasterIOExtraArg *psExtraArg) |
4522 | 0 | { |
4523 | 0 | int nOverviewCount = 0; |
4524 | 0 | GDALRasterBand *poFirstBand = nullptr; |
4525 | | |
4526 | | /* -------------------------------------------------------------------- */ |
4527 | | /* Check that all bands have the same number of overviews and */ |
4528 | | /* that they have all the same size and block dimensions */ |
4529 | | /* -------------------------------------------------------------------- */ |
4530 | 0 | for (int iBand = 0; iBand < nBandCount; iBand++) |
4531 | 0 | { |
4532 | 0 | GDALRasterBand *poBand = poDS->GetRasterBand(panBandMap[iBand]); |
4533 | 0 | if (poBand == nullptr) |
4534 | 0 | return -1; |
4535 | 0 | if (iBand == 0) |
4536 | 0 | { |
4537 | 0 | poFirstBand = poBand; |
4538 | 0 | nOverviewCount = poBand->GetOverviewCount(); |
4539 | 0 | } |
4540 | 0 | else if (nOverviewCount != poBand->GetOverviewCount()) |
4541 | 0 | { |
4542 | 0 | CPLDebug("GDAL", "GDALDataset::GetBestOverviewLevel() ... " |
4543 | 0 | "mismatched overview count, use std method."); |
4544 | 0 | return -1; |
4545 | 0 | } |
4546 | 0 | else |
4547 | 0 | { |
4548 | 0 | for (int iOverview = 0; iOverview < nOverviewCount; iOverview++) |
4549 | 0 | { |
4550 | 0 | GDALRasterBand *poOvrBand = poBand->GetOverview(iOverview); |
4551 | 0 | GDALRasterBand *poOvrFirstBand = |
4552 | 0 | poFirstBand->GetOverview(iOverview); |
4553 | 0 | if (poOvrBand == nullptr || poOvrFirstBand == nullptr) |
4554 | 0 | continue; |
4555 | | |
4556 | 0 | if (poOvrFirstBand->GetXSize() != poOvrBand->GetXSize() || |
4557 | 0 | poOvrFirstBand->GetYSize() != poOvrBand->GetYSize()) |
4558 | 0 | { |
4559 | 0 | CPLDebug("GDAL", |
4560 | 0 | "GDALDataset::GetBestOverviewLevel() ... " |
4561 | 0 | "mismatched overview sizes, use std method."); |
4562 | 0 | return -1; |
4563 | 0 | } |
4564 | 0 | int nBlockXSizeFirst = 0; |
4565 | 0 | int nBlockYSizeFirst = 0; |
4566 | 0 | poOvrFirstBand->GetBlockSize(&nBlockXSizeFirst, |
4567 | 0 | &nBlockYSizeFirst); |
4568 | |
|
4569 | 0 | int nBlockXSizeCurrent = 0; |
4570 | 0 | int nBlockYSizeCurrent = 0; |
4571 | 0 | poOvrBand->GetBlockSize(&nBlockXSizeCurrent, |
4572 | 0 | &nBlockYSizeCurrent); |
4573 | |
|
4574 | 0 | if (nBlockXSizeFirst != nBlockXSizeCurrent || |
4575 | 0 | nBlockYSizeFirst != nBlockYSizeCurrent) |
4576 | 0 | { |
4577 | 0 | CPLDebug("GDAL", "GDALDataset::GetBestOverviewLevel() ... " |
4578 | 0 | "mismatched block sizes, use std method."); |
4579 | 0 | return -1; |
4580 | 0 | } |
4581 | 0 | } |
4582 | 0 | } |
4583 | 0 | } |
4584 | 0 | if (poFirstBand == nullptr) |
4585 | 0 | return -1; |
4586 | | |
4587 | 0 | return GDALBandGetBestOverviewLevel2(poFirstBand, nXOff, nYOff, nXSize, |
4588 | 0 | nYSize, nBufXSize, nBufYSize, |
4589 | 0 | psExtraArg); |
4590 | 0 | } |
4591 | | |
4592 | | /************************************************************************/ |
4593 | | /* BlockBasedRasterIO() */ |
4594 | | /* */ |
4595 | | /* This convenience function implements a dataset level */ |
4596 | | /* RasterIO() interface based on calling down to fetch blocks, */ |
4597 | | /* much like the GDALRasterBand::IRasterIO(), but it handles */ |
4598 | | /* all bands at once, so that a format driver that handles a */ |
4599 | | /* request for different bands of the same block efficiently */ |
4600 | | /* (i.e. without re-reading interleaved data) will efficiently. */ |
4601 | | /* */ |
4602 | | /* This method is intended to be called by an overridden */ |
4603 | | /* IRasterIO() method in the driver specific GDALDataset */ |
4604 | | /* derived class. */ |
4605 | | /* */ |
4606 | | /* Default internal implementation of RasterIO() ... utilizes */ |
4607 | | /* the Block access methods to satisfy the request. This would */ |
4608 | | /* normally only be overridden by formats with overviews. */ |
4609 | | /* */ |
4610 | | /* To keep things relatively simple, this method does not */ |
4611 | | /* currently take advantage of some special cases addressed in */ |
4612 | | /* GDALRasterBand::IRasterIO(), so it is likely best to only */ |
4613 | | /* call it when you know it will help. That is in cases where */ |
4614 | | /* data is at 1:1 to the buffer, and you know the driver is */ |
4615 | | /* implementing interleaved IO efficiently on a block by block */ |
4616 | | /* basis. Overviews will be used when possible. */ |
4617 | | /************************************************************************/ |
4618 | | |
4619 | | CPLErr GDALDataset::BlockBasedRasterIO( |
4620 | | GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize, |
4621 | | void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType, |
4622 | | int nBandCount, const int *panBandMap, GSpacing nPixelSpace, |
4623 | | GSpacing nLineSpace, GSpacing nBandSpace, GDALRasterIOExtraArg *psExtraArg) |
4624 | | |
4625 | 0 | { |
4626 | 0 | CPLAssert(nullptr != pData); |
4627 | | |
4628 | 0 | GByte **papabySrcBlock = nullptr; |
4629 | 0 | GDALRasterBlock *poBlock = nullptr; |
4630 | 0 | GDALRasterBlock **papoBlocks = nullptr; |
4631 | 0 | int nLBlockX = -1; |
4632 | 0 | int nLBlockY = -1; |
4633 | 0 | int iBufYOff; |
4634 | 0 | int iBufXOff; |
4635 | 0 | int nBlockXSize = 1; |
4636 | 0 | int nBlockYSize = 1; |
4637 | 0 | CPLErr eErr = CE_None; |
4638 | 0 | GDALDataType eDataType = GDT_UInt8; |
4639 | |
|
4640 | 0 | const bool bUseIntegerRequestCoords = |
4641 | 0 | (!psExtraArg->bFloatingPointWindowValidity || |
4642 | 0 | (nXOff == psExtraArg->dfXOff && nYOff == psExtraArg->dfYOff && |
4643 | 0 | nXSize == psExtraArg->dfXSize && nYSize == psExtraArg->dfYSize)); |
4644 | | |
4645 | | /* -------------------------------------------------------------------- */ |
4646 | | /* Ensure that all bands share a common block size and data type. */ |
4647 | | /* -------------------------------------------------------------------- */ |
4648 | 0 | for (int iBand = 0; iBand < nBandCount; iBand++) |
4649 | 0 | { |
4650 | 0 | GDALRasterBand *poBand = GetRasterBand(panBandMap[iBand]); |
4651 | |
|
4652 | 0 | if (iBand == 0) |
4653 | 0 | { |
4654 | 0 | poBand->GetBlockSize(&nBlockXSize, &nBlockYSize); |
4655 | 0 | eDataType = poBand->GetRasterDataType(); |
4656 | 0 | } |
4657 | 0 | else |
4658 | 0 | { |
4659 | 0 | int nThisBlockXSize = 0; |
4660 | 0 | int nThisBlockYSize = 0; |
4661 | 0 | poBand->GetBlockSize(&nThisBlockXSize, &nThisBlockYSize); |
4662 | 0 | if (nThisBlockXSize != nBlockXSize || |
4663 | 0 | nThisBlockYSize != nBlockYSize) |
4664 | 0 | { |
4665 | 0 | CPLDebug("GDAL", "GDALDataset::BlockBasedRasterIO() ... " |
4666 | 0 | "mismatched block sizes, use std method."); |
4667 | 0 | return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize, |
4668 | 0 | pData, nBufXSize, nBufYSize, eBufType, |
4669 | 0 | nBandCount, panBandMap, nPixelSpace, |
4670 | 0 | nLineSpace, nBandSpace, psExtraArg); |
4671 | 0 | } |
4672 | | |
4673 | 0 | if (eDataType != poBand->GetRasterDataType() && |
4674 | 0 | (nXSize != nBufXSize || nYSize != nBufYSize)) |
4675 | 0 | { |
4676 | 0 | CPLDebug("GDAL", "GDALDataset::BlockBasedRasterIO() ... " |
4677 | 0 | "mismatched band data types, use std method."); |
4678 | 0 | return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize, |
4679 | 0 | pData, nBufXSize, nBufYSize, eBufType, |
4680 | 0 | nBandCount, panBandMap, nPixelSpace, |
4681 | 0 | nLineSpace, nBandSpace, psExtraArg); |
4682 | 0 | } |
4683 | 0 | } |
4684 | 0 | } |
4685 | | |
4686 | | /* ==================================================================== */ |
4687 | | /* In this special case at full resolution we step through in */ |
4688 | | /* blocks, turning the request over to the per-band */ |
4689 | | /* IRasterIO(), but ensuring that all bands of one block are */ |
4690 | | /* called before proceeding to the next. */ |
4691 | | /* ==================================================================== */ |
4692 | | |
4693 | 0 | if (nXSize == nBufXSize && nYSize == nBufYSize && bUseIntegerRequestCoords) |
4694 | 0 | { |
4695 | 0 | GDALRasterIOExtraArg sDummyExtraArg; |
4696 | 0 | INIT_RASTERIO_EXTRA_ARG(sDummyExtraArg); |
4697 | |
|
4698 | 0 | int nChunkYSize = 0; |
4699 | 0 | int nChunkXSize = 0; |
4700 | |
|
4701 | 0 | for (iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff += nChunkYSize) |
4702 | 0 | { |
4703 | 0 | const int nChunkYOff = iBufYOff + nYOff; |
4704 | 0 | nChunkYSize = nBlockYSize - (nChunkYOff % nBlockYSize); |
4705 | 0 | if (nChunkYOff + nChunkYSize > nYOff + nYSize) |
4706 | 0 | nChunkYSize = (nYOff + nYSize) - nChunkYOff; |
4707 | |
|
4708 | 0 | for (iBufXOff = 0; iBufXOff < nBufXSize; iBufXOff += nChunkXSize) |
4709 | 0 | { |
4710 | 0 | const int nChunkXOff = iBufXOff + nXOff; |
4711 | 0 | nChunkXSize = nBlockXSize - (nChunkXOff % nBlockXSize); |
4712 | 0 | if (nChunkXOff + nChunkXSize > nXOff + nXSize) |
4713 | 0 | nChunkXSize = (nXOff + nXSize) - nChunkXOff; |
4714 | |
|
4715 | 0 | GByte *pabyChunkData = |
4716 | 0 | static_cast<GByte *>(pData) + iBufXOff * nPixelSpace + |
4717 | 0 | static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace; |
4718 | |
|
4719 | 0 | for (int iBand = 0; iBand < nBandCount; iBand++) |
4720 | 0 | { |
4721 | 0 | GDALRasterBand *poBand = GetRasterBand(panBandMap[iBand]); |
4722 | |
|
4723 | 0 | eErr = poBand->IRasterIO( |
4724 | 0 | eRWFlag, nChunkXOff, nChunkYOff, nChunkXSize, |
4725 | 0 | nChunkYSize, |
4726 | 0 | pabyChunkData + |
4727 | 0 | static_cast<GPtrDiff_t>(iBand) * nBandSpace, |
4728 | 0 | nChunkXSize, nChunkYSize, eBufType, nPixelSpace, |
4729 | 0 | nLineSpace, &sDummyExtraArg); |
4730 | 0 | if (eErr != CE_None) |
4731 | 0 | return eErr; |
4732 | 0 | } |
4733 | 0 | } |
4734 | | |
4735 | 0 | if (psExtraArg->pfnProgress != nullptr && |
4736 | 0 | !psExtraArg->pfnProgress( |
4737 | 0 | 1.0 * std::min(nBufYSize, iBufYOff + nChunkYSize) / |
4738 | 0 | nBufYSize, |
4739 | 0 | "", psExtraArg->pProgressData)) |
4740 | 0 | { |
4741 | 0 | return CE_Failure; |
4742 | 0 | } |
4743 | 0 | } |
4744 | | |
4745 | 0 | return CE_None; |
4746 | 0 | } |
4747 | | |
4748 | | /* Below code is not compatible with that case. It would need a complete */ |
4749 | | /* separate code like done in GDALRasterBand::IRasterIO. */ |
4750 | 0 | if (eRWFlag == GF_Write && (nBufXSize < nXSize || nBufYSize < nYSize)) |
4751 | 0 | { |
4752 | 0 | return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize, pData, |
4753 | 0 | nBufXSize, nBufYSize, eBufType, nBandCount, |
4754 | 0 | panBandMap, nPixelSpace, nLineSpace, |
4755 | 0 | nBandSpace, psExtraArg); |
4756 | 0 | } |
4757 | | |
4758 | | /* We could have a smarter implementation, but that will do for now */ |
4759 | 0 | if (psExtraArg->eResampleAlg != GRIORA_NearestNeighbour && |
4760 | 0 | (nBufXSize != nXSize || nBufYSize != nYSize)) |
4761 | 0 | { |
4762 | 0 | return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize, pData, |
4763 | 0 | nBufXSize, nBufYSize, eBufType, nBandCount, |
4764 | 0 | panBandMap, nPixelSpace, nLineSpace, |
4765 | 0 | nBandSpace, psExtraArg); |
4766 | 0 | } |
4767 | | |
4768 | | /* ==================================================================== */ |
4769 | | /* Loop reading required source blocks to satisfy output */ |
4770 | | /* request. This is the most general implementation. */ |
4771 | | /* ==================================================================== */ |
4772 | | |
4773 | 0 | const int nBandDataSize = GDALGetDataTypeSizeBytes(eDataType); |
4774 | |
|
4775 | 0 | papabySrcBlock = |
4776 | 0 | static_cast<GByte **>(CPLCalloc(sizeof(GByte *), nBandCount)); |
4777 | 0 | papoBlocks = |
4778 | 0 | static_cast<GDALRasterBlock **>(CPLCalloc(sizeof(void *), nBandCount)); |
4779 | | |
4780 | | /* -------------------------------------------------------------------- */ |
4781 | | /* Select an overview level if appropriate. */ |
4782 | | /* -------------------------------------------------------------------- */ |
4783 | |
|
4784 | 0 | GDALRasterIOExtraArg sExtraArg; |
4785 | 0 | GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg); |
4786 | 0 | const int nOverviewLevel = GDALDatasetGetBestOverviewLevel( |
4787 | 0 | this, nXOff, nYOff, nXSize, nYSize, nBufXSize, nBufYSize, nBandCount, |
4788 | 0 | panBandMap, &sExtraArg); |
4789 | 0 | if (nOverviewLevel >= 0) |
4790 | 0 | { |
4791 | 0 | GetRasterBand(panBandMap[0]) |
4792 | 0 | ->GetOverview(nOverviewLevel) |
4793 | 0 | ->GetBlockSize(&nBlockXSize, &nBlockYSize); |
4794 | 0 | } |
4795 | |
|
4796 | 0 | double dfXOff = nXOff; |
4797 | 0 | double dfYOff = nYOff; |
4798 | 0 | double dfXSize = nXSize; |
4799 | 0 | double dfYSize = nYSize; |
4800 | 0 | if (sExtraArg.bFloatingPointWindowValidity) |
4801 | 0 | { |
4802 | 0 | dfXOff = sExtraArg.dfXOff; |
4803 | 0 | dfYOff = sExtraArg.dfYOff; |
4804 | 0 | dfXSize = sExtraArg.dfXSize; |
4805 | 0 | dfYSize = sExtraArg.dfYSize; |
4806 | 0 | } |
4807 | | |
4808 | | /* -------------------------------------------------------------------- */ |
4809 | | /* Compute stepping increment. */ |
4810 | | /* -------------------------------------------------------------------- */ |
4811 | 0 | const double dfSrcXInc = dfXSize / static_cast<double>(nBufXSize); |
4812 | 0 | const double dfSrcYInc = dfYSize / static_cast<double>(nBufYSize); |
4813 | |
|
4814 | 0 | constexpr double EPS = 1e-10; |
4815 | | /* -------------------------------------------------------------------- */ |
4816 | | /* Loop over buffer computing source locations. */ |
4817 | | /* -------------------------------------------------------------------- */ |
4818 | 0 | for (iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff++) |
4819 | 0 | { |
4820 | 0 | GPtrDiff_t iSrcOffset; |
4821 | | |
4822 | | // Add small epsilon to avoid some numeric precision issues. |
4823 | 0 | const double dfSrcY = (iBufYOff + 0.5) * dfSrcYInc + dfYOff + EPS; |
4824 | 0 | const int iSrcY = static_cast<int>(std::min( |
4825 | 0 | std::max(0.0, dfSrcY), static_cast<double>(nRasterYSize - 1))); |
4826 | |
|
4827 | 0 | GPtrDiff_t iBufOffset = static_cast<GPtrDiff_t>(iBufYOff) * |
4828 | 0 | static_cast<GPtrDiff_t>(nLineSpace); |
4829 | |
|
4830 | 0 | for (iBufXOff = 0; iBufXOff < nBufXSize; iBufXOff++) |
4831 | 0 | { |
4832 | 0 | const double dfSrcX = (iBufXOff + 0.5) * dfSrcXInc + dfXOff + EPS; |
4833 | 0 | const int iSrcX = static_cast<int>(std::min( |
4834 | 0 | std::max(0.0, dfSrcX), static_cast<double>(nRasterXSize - 1))); |
4835 | | |
4836 | | // FIXME: this code likely doesn't work if the dirty block gets |
4837 | | // flushed to disk before being completely written. In the meantime, |
4838 | | // bJustInitialize should probably be set to FALSE even if it is not |
4839 | | // ideal performance wise, and for lossy compression |
4840 | | |
4841 | | /* -------------------------------------------------------------------- |
4842 | | */ |
4843 | | /* Ensure we have the appropriate block loaded. */ |
4844 | | /* -------------------------------------------------------------------- |
4845 | | */ |
4846 | 0 | if (iSrcX < nLBlockX * nBlockXSize || |
4847 | 0 | iSrcX - nBlockXSize >= nLBlockX * nBlockXSize || |
4848 | 0 | iSrcY < nLBlockY * nBlockYSize || |
4849 | 0 | iSrcY - nBlockYSize >= nLBlockY * nBlockYSize) |
4850 | 0 | { |
4851 | 0 | nLBlockX = iSrcX / nBlockXSize; |
4852 | 0 | nLBlockY = iSrcY / nBlockYSize; |
4853 | |
|
4854 | 0 | const bool bJustInitialize = |
4855 | 0 | eRWFlag == GF_Write && nYOff <= nLBlockY * nBlockYSize && |
4856 | 0 | nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize && |
4857 | 0 | nXOff <= nLBlockX * nBlockXSize && |
4858 | 0 | nXOff + nXSize - nBlockXSize >= nLBlockX * nBlockXSize; |
4859 | | /*bool bMemZeroBuffer = FALSE; |
4860 | | if( eRWFlag == GF_Write && !bJustInitialize && |
4861 | | nXOff <= nLBlockX * nBlockXSize && |
4862 | | nYOff <= nLBlockY * nBlockYSize && |
4863 | | (nXOff + nXSize >= (nLBlockX+1) * nBlockXSize || |
4864 | | (nXOff + nXSize == GetRasterXSize() && |
4865 | | (nLBlockX+1) * nBlockXSize > GetRasterXSize())) && |
4866 | | (nYOff + nYSize >= (nLBlockY+1) * nBlockYSize || |
4867 | | (nYOff + nYSize == GetRasterYSize() && |
4868 | | (nLBlockY+1) * nBlockYSize > GetRasterYSize())) ) |
4869 | | { |
4870 | | bJustInitialize = TRUE; |
4871 | | bMemZeroBuffer = TRUE; |
4872 | | }*/ |
4873 | 0 | for (int iBand = 0; iBand < nBandCount; iBand++) |
4874 | 0 | { |
4875 | 0 | GDALRasterBand *poBand = GetRasterBand(panBandMap[iBand]); |
4876 | 0 | if (nOverviewLevel >= 0) |
4877 | 0 | poBand = poBand->GetOverview(nOverviewLevel); |
4878 | 0 | poBlock = poBand->GetLockedBlockRef(nLBlockX, nLBlockY, |
4879 | 0 | bJustInitialize); |
4880 | 0 | if (poBlock == nullptr) |
4881 | 0 | { |
4882 | 0 | eErr = CE_Failure; |
4883 | 0 | goto CleanupAndReturn; |
4884 | 0 | } |
4885 | | |
4886 | 0 | if (eRWFlag == GF_Write) |
4887 | 0 | poBlock->MarkDirty(); |
4888 | |
|
4889 | 0 | if (papoBlocks[iBand] != nullptr) |
4890 | 0 | papoBlocks[iBand]->DropLock(); |
4891 | |
|
4892 | 0 | papoBlocks[iBand] = poBlock; |
4893 | |
|
4894 | 0 | papabySrcBlock[iBand] = |
4895 | 0 | static_cast<GByte *>(poBlock->GetDataRef()); |
4896 | | /*if( bMemZeroBuffer ) |
4897 | | { |
4898 | | memset(papabySrcBlock[iBand], 0, |
4899 | | static_cast<GPtrDiff_t>(nBandDataSize) * nBlockXSize |
4900 | | * nBlockYSize); |
4901 | | }*/ |
4902 | 0 | } |
4903 | 0 | } |
4904 | | |
4905 | | /* -------------------------------------------------------------------- |
4906 | | */ |
4907 | | /* Copy over this pixel of data. */ |
4908 | | /* -------------------------------------------------------------------- |
4909 | | */ |
4910 | 0 | iSrcOffset = (static_cast<GPtrDiff_t>(iSrcX) - |
4911 | 0 | static_cast<GPtrDiff_t>(nLBlockX) * nBlockXSize + |
4912 | 0 | (static_cast<GPtrDiff_t>(iSrcY) - |
4913 | 0 | static_cast<GPtrDiff_t>(nLBlockY) * nBlockYSize) * |
4914 | 0 | nBlockXSize) * |
4915 | 0 | nBandDataSize; |
4916 | |
|
4917 | 0 | for (int iBand = 0; iBand < nBandCount; iBand++) |
4918 | 0 | { |
4919 | 0 | GByte *pabySrcBlock = papabySrcBlock[iBand]; |
4920 | 0 | GPtrDiff_t iBandBufOffset = |
4921 | 0 | iBufOffset + static_cast<GPtrDiff_t>(iBand) * |
4922 | 0 | static_cast<GPtrDiff_t>(nBandSpace); |
4923 | |
|
4924 | 0 | if (eDataType == eBufType) |
4925 | 0 | { |
4926 | 0 | if (eRWFlag == GF_Read) |
4927 | 0 | memcpy(static_cast<GByte *>(pData) + iBandBufOffset, |
4928 | 0 | pabySrcBlock + iSrcOffset, nBandDataSize); |
4929 | 0 | else |
4930 | 0 | memcpy(pabySrcBlock + iSrcOffset, |
4931 | 0 | static_cast<const GByte *>(pData) + |
4932 | 0 | iBandBufOffset, |
4933 | 0 | nBandDataSize); |
4934 | 0 | } |
4935 | 0 | else |
4936 | 0 | { |
4937 | | /* type to type conversion ... ouch, this is expensive way |
4938 | | of handling single words */ |
4939 | |
|
4940 | 0 | if (eRWFlag == GF_Read) |
4941 | 0 | GDALCopyWords64(pabySrcBlock + iSrcOffset, eDataType, 0, |
4942 | 0 | static_cast<GByte *>(pData) + |
4943 | 0 | iBandBufOffset, |
4944 | 0 | eBufType, 0, 1); |
4945 | 0 | else |
4946 | 0 | GDALCopyWords64(static_cast<const GByte *>(pData) + |
4947 | 0 | iBandBufOffset, |
4948 | 0 | eBufType, 0, pabySrcBlock + iSrcOffset, |
4949 | 0 | eDataType, 0, 1); |
4950 | 0 | } |
4951 | 0 | } |
4952 | |
|
4953 | 0 | iBufOffset += static_cast<int>(nPixelSpace); |
4954 | 0 | } |
4955 | 0 | } |
4956 | | |
4957 | | /* -------------------------------------------------------------------- */ |
4958 | | /* CleanupAndReturn. */ |
4959 | | /* -------------------------------------------------------------------- */ |
4960 | 0 | CleanupAndReturn: |
4961 | 0 | CPLFree(papabySrcBlock); |
4962 | 0 | if (papoBlocks != nullptr) |
4963 | 0 | { |
4964 | 0 | for (int iBand = 0; iBand < nBandCount; iBand++) |
4965 | 0 | { |
4966 | 0 | if (papoBlocks[iBand] != nullptr) |
4967 | 0 | papoBlocks[iBand]->DropLock(); |
4968 | 0 | } |
4969 | 0 | CPLFree(papoBlocks); |
4970 | 0 | } |
4971 | |
|
4972 | 0 | return eErr; |
4973 | 0 | } |
4974 | | |
4975 | | //! @endcond |
4976 | | |
4977 | | /************************************************************************/ |
4978 | | /* GDALCopyWholeRasterGetSwathSize() */ |
4979 | | /************************************************************************/ |
4980 | | |
4981 | | static void GDALCopyWholeRasterGetSwathSize(GDALRasterBand *poSrcPrototypeBand, |
4982 | | GDALRasterBand *poDstPrototypeBand, |
4983 | | int nBandCount, |
4984 | | int bDstIsCompressed, |
4985 | | int bInterleave, int *pnSwathCols, |
4986 | | int *pnSwathLines) |
4987 | 0 | { |
4988 | 0 | GDALDataType eDT = poDstPrototypeBand->GetRasterDataType(); |
4989 | 0 | int nSrcBlockXSize = 0; |
4990 | 0 | int nSrcBlockYSize = 0; |
4991 | 0 | int nBlockXSize = 0; |
4992 | 0 | int nBlockYSize = 0; |
4993 | |
|
4994 | 0 | int nXSize = poSrcPrototypeBand->GetXSize(); |
4995 | 0 | int nYSize = poSrcPrototypeBand->GetYSize(); |
4996 | |
|
4997 | 0 | poSrcPrototypeBand->GetBlockSize(&nSrcBlockXSize, &nSrcBlockYSize); |
4998 | 0 | poDstPrototypeBand->GetBlockSize(&nBlockXSize, &nBlockYSize); |
4999 | |
|
5000 | 0 | const int nMaxBlockXSize = std::max(nBlockXSize, nSrcBlockXSize); |
5001 | 0 | const int nMaxBlockYSize = std::max(nBlockYSize, nSrcBlockYSize); |
5002 | |
|
5003 | 0 | int nPixelSize = GDALGetDataTypeSizeBytes(eDT); |
5004 | 0 | if (bInterleave) |
5005 | 0 | nPixelSize *= nBandCount; |
5006 | | |
5007 | | // aim for one row of blocks. Do not settle for less. |
5008 | 0 | int nSwathCols = nXSize; |
5009 | 0 | int nSwathLines = nMaxBlockYSize; |
5010 | |
|
5011 | 0 | const char *pszSrcCompression = |
5012 | 0 | poSrcPrototypeBand->GetMetadataItem("COMPRESSION", "IMAGE_STRUCTURE"); |
5013 | 0 | if (pszSrcCompression == nullptr) |
5014 | 0 | { |
5015 | 0 | auto poSrcDS = poSrcPrototypeBand->GetDataset(); |
5016 | 0 | if (poSrcDS) |
5017 | 0 | pszSrcCompression = |
5018 | 0 | poSrcDS->GetMetadataItem("COMPRESSION", "IMAGE_STRUCTURE"); |
5019 | 0 | } |
5020 | | |
5021 | | /* -------------------------------------------------------------------- */ |
5022 | | /* What will our swath size be? */ |
5023 | | /* -------------------------------------------------------------------- */ |
5024 | | // When writing interleaved data in a compressed format, we want to be sure |
5025 | | // that each block will only be written once, so the swath size must not be |
5026 | | // greater than the block cache. |
5027 | 0 | const char *pszSwathSize = CPLGetConfigOption("GDAL_SWATH_SIZE", nullptr); |
5028 | 0 | int nTargetSwathSize; |
5029 | 0 | if (pszSwathSize != nullptr) |
5030 | 0 | nTargetSwathSize = static_cast<int>( |
5031 | 0 | std::min(GIntBig(INT_MAX), CPLAtoGIntBig(pszSwathSize))); |
5032 | 0 | else |
5033 | 0 | { |
5034 | | // As a default, take one 1/4 of the cache size. |
5035 | 0 | nTargetSwathSize = static_cast<int>( |
5036 | 0 | std::min(GIntBig(INT_MAX), GDALGetCacheMax64() / 4)); |
5037 | | |
5038 | | // but if the minimum idal swath buf size is less, then go for it to |
5039 | | // avoid unnecessarily abusing RAM usage. |
5040 | | // but try to use 10 MB at least. |
5041 | 0 | GIntBig nIdealSwathBufSize = |
5042 | 0 | static_cast<GIntBig>(nSwathCols) * nSwathLines * nPixelSize; |
5043 | 0 | int nMinTargetSwathSize = 10 * 1000 * 1000; |
5044 | |
|
5045 | 0 | if ((poSrcPrototypeBand->GetSuggestedBlockAccessPattern() & |
5046 | 0 | GSBAP_LARGEST_CHUNK_POSSIBLE) != 0) |
5047 | 0 | { |
5048 | 0 | nMinTargetSwathSize = nTargetSwathSize; |
5049 | 0 | } |
5050 | |
|
5051 | 0 | if (nIdealSwathBufSize < nTargetSwathSize && |
5052 | 0 | nIdealSwathBufSize < nMinTargetSwathSize) |
5053 | 0 | { |
5054 | 0 | nIdealSwathBufSize = nMinTargetSwathSize; |
5055 | 0 | } |
5056 | |
|
5057 | 0 | if (pszSrcCompression != nullptr && |
5058 | 0 | EQUAL(pszSrcCompression, "JPEG2000") && |
5059 | 0 | (!bDstIsCompressed || ((nSrcBlockXSize % nBlockXSize) == 0 && |
5060 | 0 | (nSrcBlockYSize % nBlockYSize) == 0))) |
5061 | 0 | { |
5062 | 0 | nIdealSwathBufSize = |
5063 | 0 | std::max(nIdealSwathBufSize, static_cast<GIntBig>(nSwathCols) * |
5064 | 0 | nSrcBlockYSize * nPixelSize); |
5065 | 0 | } |
5066 | 0 | if (nTargetSwathSize > nIdealSwathBufSize) |
5067 | 0 | nTargetSwathSize = static_cast<int>( |
5068 | 0 | std::min(GIntBig(INT_MAX), nIdealSwathBufSize)); |
5069 | 0 | } |
5070 | |
|
5071 | 0 | if (nTargetSwathSize < 1000000) |
5072 | 0 | nTargetSwathSize = 1000000; |
5073 | | |
5074 | | /* But let's check that */ |
5075 | 0 | if (bDstIsCompressed && bInterleave && |
5076 | 0 | nTargetSwathSize > GDALGetCacheMax64()) |
5077 | 0 | { |
5078 | 0 | CPLError(CE_Warning, CPLE_AppDefined, |
5079 | 0 | "When translating into a compressed interleave format, " |
5080 | 0 | "the block cache size (" CPL_FRMT_GIB ") " |
5081 | 0 | "should be at least the size of the swath (%d) " |
5082 | 0 | "(GDAL_SWATH_SIZE config. option)", |
5083 | 0 | GDALGetCacheMax64(), nTargetSwathSize); |
5084 | 0 | } |
5085 | |
|
5086 | 0 | #define IS_DIVIDER_OF(x, y) ((y) % (x) == 0) |
5087 | 0 | #define ROUND_TO(x, y) (((x) / (y)) * (y)) |
5088 | | |
5089 | | // if both input and output datasets are tiled, that the tile dimensions |
5090 | | // are "compatible", try to stick to a swath dimension that is a multiple |
5091 | | // of input and output block dimensions. |
5092 | 0 | if (nBlockXSize != nXSize && nSrcBlockXSize != nXSize && |
5093 | 0 | IS_DIVIDER_OF(nBlockXSize, nMaxBlockXSize) && |
5094 | 0 | IS_DIVIDER_OF(nSrcBlockXSize, nMaxBlockXSize) && |
5095 | 0 | IS_DIVIDER_OF(nBlockYSize, nMaxBlockYSize) && |
5096 | 0 | IS_DIVIDER_OF(nSrcBlockYSize, nMaxBlockYSize)) |
5097 | 0 | { |
5098 | 0 | if (static_cast<GIntBig>(nMaxBlockXSize) * nMaxBlockYSize * |
5099 | 0 | nPixelSize <= |
5100 | 0 | static_cast<GIntBig>(nTargetSwathSize)) |
5101 | 0 | { |
5102 | 0 | nSwathCols = nTargetSwathSize / (nMaxBlockYSize * nPixelSize); |
5103 | 0 | nSwathCols = ROUND_TO(nSwathCols, nMaxBlockXSize); |
5104 | 0 | if (nSwathCols == 0) |
5105 | 0 | nSwathCols = nMaxBlockXSize; |
5106 | 0 | if (nSwathCols > nXSize) |
5107 | 0 | nSwathCols = nXSize; |
5108 | 0 | nSwathLines = nMaxBlockYSize; |
5109 | |
|
5110 | 0 | if (static_cast<GIntBig>(nSwathCols) * nSwathLines * nPixelSize > |
5111 | 0 | static_cast<GIntBig>(nTargetSwathSize)) |
5112 | 0 | { |
5113 | 0 | nSwathCols = nXSize; |
5114 | 0 | nSwathLines = nBlockYSize; |
5115 | 0 | } |
5116 | 0 | } |
5117 | 0 | } |
5118 | |
|
5119 | 0 | const GIntBig nMemoryPerCol = static_cast<GIntBig>(nSwathCols) * nPixelSize; |
5120 | 0 | const GIntBig nSwathBufSize = nMemoryPerCol * nSwathLines; |
5121 | 0 | if (nSwathBufSize > static_cast<GIntBig>(nTargetSwathSize)) |
5122 | 0 | { |
5123 | 0 | nSwathLines = static_cast<int>(nTargetSwathSize / nMemoryPerCol); |
5124 | 0 | if (nSwathLines == 0) |
5125 | 0 | nSwathLines = 1; |
5126 | |
|
5127 | 0 | CPLDebug( |
5128 | 0 | "GDAL", |
5129 | 0 | "GDALCopyWholeRasterGetSwathSize(): adjusting to %d line swath " |
5130 | 0 | "since requirement (" CPL_FRMT_GIB " bytes) exceed target swath " |
5131 | 0 | "size (%d bytes) (GDAL_SWATH_SIZE config. option)", |
5132 | 0 | nSwathLines, nBlockYSize * nMemoryPerCol, nTargetSwathSize); |
5133 | 0 | } |
5134 | | // If we are processing single scans, try to handle several at once. |
5135 | | // If we are handling swaths already, only grow the swath if a row |
5136 | | // of blocks is substantially less than our target buffer size. |
5137 | 0 | else if (nSwathLines == 1 || |
5138 | 0 | nMemoryPerCol * nSwathLines < |
5139 | 0 | static_cast<GIntBig>(nTargetSwathSize) / 10) |
5140 | 0 | { |
5141 | 0 | nSwathLines = std::min( |
5142 | 0 | nYSize, |
5143 | 0 | std::max(1, static_cast<int>(nTargetSwathSize / nMemoryPerCol))); |
5144 | | |
5145 | | /* If possible try to align to source and target block height */ |
5146 | 0 | if ((nSwathLines % nMaxBlockYSize) != 0 && |
5147 | 0 | nSwathLines > nMaxBlockYSize && |
5148 | 0 | IS_DIVIDER_OF(nBlockYSize, nMaxBlockYSize) && |
5149 | 0 | IS_DIVIDER_OF(nSrcBlockYSize, nMaxBlockYSize)) |
5150 | 0 | nSwathLines = ROUND_TO(nSwathLines, nMaxBlockYSize); |
5151 | 0 | } |
5152 | |
|
5153 | 0 | if (pszSrcCompression != nullptr && EQUAL(pszSrcCompression, "JPEG2000") && |
5154 | 0 | (!bDstIsCompressed || (IS_DIVIDER_OF(nBlockXSize, nSrcBlockXSize) && |
5155 | 0 | IS_DIVIDER_OF(nBlockYSize, nSrcBlockYSize)))) |
5156 | 0 | { |
5157 | | // Typical use case: converting from Pleaiades that is 2048x2048 tiled. |
5158 | 0 | if (nSwathLines < nSrcBlockYSize) |
5159 | 0 | { |
5160 | 0 | nSwathLines = nSrcBlockYSize; |
5161 | | |
5162 | | // Number of pixels that can be read/write simultaneously. |
5163 | 0 | nSwathCols = nTargetSwathSize / (nSrcBlockXSize * nPixelSize); |
5164 | 0 | nSwathCols = ROUND_TO(nSwathCols, nSrcBlockXSize); |
5165 | 0 | if (nSwathCols == 0) |
5166 | 0 | nSwathCols = nSrcBlockXSize; |
5167 | 0 | if (nSwathCols > nXSize) |
5168 | 0 | nSwathCols = nXSize; |
5169 | |
|
5170 | 0 | CPLDebug( |
5171 | 0 | "GDAL", |
5172 | 0 | "GDALCopyWholeRasterGetSwathSize(): because of compression and " |
5173 | 0 | "too high block, " |
5174 | 0 | "use partial width at one time"); |
5175 | 0 | } |
5176 | 0 | else if ((nSwathLines % nSrcBlockYSize) != 0) |
5177 | 0 | { |
5178 | | /* Round on a multiple of nSrcBlockYSize */ |
5179 | 0 | nSwathLines = ROUND_TO(nSwathLines, nSrcBlockYSize); |
5180 | 0 | CPLDebug( |
5181 | 0 | "GDAL", |
5182 | 0 | "GDALCopyWholeRasterGetSwathSize(): because of compression, " |
5183 | 0 | "round nSwathLines to block height : %d", |
5184 | 0 | nSwathLines); |
5185 | 0 | } |
5186 | 0 | } |
5187 | 0 | else if (bDstIsCompressed) |
5188 | 0 | { |
5189 | 0 | if (nSwathLines < nBlockYSize) |
5190 | 0 | { |
5191 | 0 | nSwathLines = nBlockYSize; |
5192 | | |
5193 | | // Number of pixels that can be read/write simultaneously. |
5194 | 0 | nSwathCols = nTargetSwathSize / (nSwathLines * nPixelSize); |
5195 | 0 | nSwathCols = ROUND_TO(nSwathCols, nBlockXSize); |
5196 | 0 | if (nSwathCols == 0) |
5197 | 0 | nSwathCols = nBlockXSize; |
5198 | 0 | if (nSwathCols > nXSize) |
5199 | 0 | nSwathCols = nXSize; |
5200 | |
|
5201 | 0 | CPLDebug( |
5202 | 0 | "GDAL", |
5203 | 0 | "GDALCopyWholeRasterGetSwathSize(): because of compression and " |
5204 | 0 | "too high block, " |
5205 | 0 | "use partial width at one time"); |
5206 | 0 | } |
5207 | 0 | else if ((nSwathLines % nBlockYSize) != 0) |
5208 | 0 | { |
5209 | | // Round on a multiple of nBlockYSize. |
5210 | 0 | nSwathLines = ROUND_TO(nSwathLines, nBlockYSize); |
5211 | 0 | CPLDebug( |
5212 | 0 | "GDAL", |
5213 | 0 | "GDALCopyWholeRasterGetSwathSize(): because of compression, " |
5214 | 0 | "round nSwathLines to block height : %d", |
5215 | 0 | nSwathLines); |
5216 | 0 | } |
5217 | 0 | } |
5218 | |
|
5219 | 0 | *pnSwathCols = nSwathCols; |
5220 | 0 | *pnSwathLines = nSwathLines; |
5221 | 0 | } |
5222 | | |
5223 | | /************************************************************************/ |
5224 | | /* GDALDatasetCopyWholeRaster() */ |
5225 | | /************************************************************************/ |
5226 | | |
5227 | | /** |
5228 | | * \brief Copy all dataset raster data. |
5229 | | * |
5230 | | * This function copies the complete raster contents of one dataset to |
5231 | | * another similarly configured dataset. The source and destination |
5232 | | * dataset must have the same number of bands, and the same width |
5233 | | * and height. The bands do not have to have the same data type. |
5234 | | * |
5235 | | * This function is primarily intended to support implementation of |
5236 | | * driver specific CreateCopy() functions. It implements efficient copying, |
5237 | | * in particular "chunking" the copy in substantial blocks and, if appropriate, |
5238 | | * performing the transfer in a pixel interleaved fashion. |
5239 | | * |
5240 | | * Currently the only papszOptions value supported are : |
5241 | | * <ul> |
5242 | | * <li>"INTERLEAVE=PIXEL/BAND" to force pixel (resp. band) interleaved read and |
5243 | | * write access pattern (this does not modify the layout of the destination |
5244 | | * data)</li> <li>"COMPRESSED=YES" to force alignment on target dataset block |
5245 | | * sizes to achieve best compression.</li> <li>"SKIP_HOLES=YES" to skip chunks |
5246 | | * for which GDALGetDataCoverageStatus() returns GDAL_DATA_COVERAGE_STATUS_EMPTY |
5247 | | * (GDAL >= 2.2)</li> |
5248 | | * </ul> |
5249 | | * More options may be supported in the future. |
5250 | | * |
5251 | | * @param hSrcDS the source dataset |
5252 | | * @param hDstDS the destination dataset |
5253 | | * @param papszOptions transfer hints in "StringList" Name=Value format. |
5254 | | * @param pfnProgress progress reporting function. |
5255 | | * @param pProgressData callback data for progress function. |
5256 | | * |
5257 | | * @return CE_None on success, or CE_Failure on failure. |
5258 | | */ |
5259 | | |
5260 | | CPLErr CPL_STDCALL GDALDatasetCopyWholeRaster(GDALDatasetH hSrcDS, |
5261 | | GDALDatasetH hDstDS, |
5262 | | CSLConstList papszOptions, |
5263 | | GDALProgressFunc pfnProgress, |
5264 | | void *pProgressData) |
5265 | | |
5266 | 0 | { |
5267 | 0 | VALIDATE_POINTER1(hSrcDS, "GDALDatasetCopyWholeRaster", CE_Failure); |
5268 | 0 | VALIDATE_POINTER1(hDstDS, "GDALDatasetCopyWholeRaster", CE_Failure); |
5269 | | |
5270 | 0 | GDALDataset *poSrcDS = GDALDataset::FromHandle(hSrcDS); |
5271 | 0 | GDALDataset *poDstDS = GDALDataset::FromHandle(hDstDS); |
5272 | |
|
5273 | 0 | if (pfnProgress == nullptr) |
5274 | 0 | pfnProgress = GDALDummyProgress; |
5275 | | |
5276 | | /* -------------------------------------------------------------------- */ |
5277 | | /* Confirm the datasets match in size and band counts. */ |
5278 | | /* -------------------------------------------------------------------- */ |
5279 | 0 | const int nXSize = poDstDS->GetRasterXSize(); |
5280 | 0 | const int nYSize = poDstDS->GetRasterYSize(); |
5281 | 0 | const int nBandCount = poDstDS->GetRasterCount(); |
5282 | |
|
5283 | 0 | if (poSrcDS->GetRasterXSize() != nXSize || |
5284 | 0 | poSrcDS->GetRasterYSize() != nYSize || |
5285 | 0 | poSrcDS->GetRasterCount() != nBandCount) |
5286 | 0 | { |
5287 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
5288 | 0 | "Input and output dataset sizes or band counts do not\n" |
5289 | 0 | "match in GDALDatasetCopyWholeRaster()"); |
5290 | 0 | return CE_Failure; |
5291 | 0 | } |
5292 | | |
5293 | | /* -------------------------------------------------------------------- */ |
5294 | | /* Report preliminary (0) progress. */ |
5295 | | /* -------------------------------------------------------------------- */ |
5296 | 0 | if (!pfnProgress(0.0, nullptr, pProgressData)) |
5297 | 0 | { |
5298 | 0 | CPLError(CE_Failure, CPLE_UserInterrupt, |
5299 | 0 | "User terminated CreateCopy()"); |
5300 | 0 | return CE_Failure; |
5301 | 0 | } |
5302 | | |
5303 | | /* -------------------------------------------------------------------- */ |
5304 | | /* Get our prototype band, and assume the others are similarly */ |
5305 | | /* configured. */ |
5306 | | /* -------------------------------------------------------------------- */ |
5307 | 0 | if (nBandCount == 0) |
5308 | 0 | return CE_None; |
5309 | | |
5310 | 0 | GDALRasterBand *poSrcPrototypeBand = poSrcDS->GetRasterBand(1); |
5311 | 0 | GDALRasterBand *poDstPrototypeBand = poDstDS->GetRasterBand(1); |
5312 | 0 | GDALDataType eDT = poDstPrototypeBand->GetRasterDataType(); |
5313 | | |
5314 | | /* -------------------------------------------------------------------- */ |
5315 | | /* Do we want to try and do the operation in a pixel */ |
5316 | | /* interleaved fashion? */ |
5317 | | /* -------------------------------------------------------------------- */ |
5318 | 0 | bool bInterleave = false; |
5319 | 0 | const char *pszInterleave = |
5320 | 0 | poSrcDS->GetMetadataItem("INTERLEAVE", "IMAGE_STRUCTURE"); |
5321 | 0 | if (pszInterleave != nullptr && |
5322 | 0 | (EQUAL(pszInterleave, "PIXEL") || EQUAL(pszInterleave, "LINE"))) |
5323 | 0 | bInterleave = true; |
5324 | |
|
5325 | 0 | pszInterleave = poDstDS->GetMetadataItem("INTERLEAVE", "IMAGE_STRUCTURE"); |
5326 | 0 | if (pszInterleave != nullptr && |
5327 | 0 | (EQUAL(pszInterleave, "PIXEL") || EQUAL(pszInterleave, "LINE"))) |
5328 | 0 | bInterleave = true; |
5329 | |
|
5330 | 0 | pszInterleave = CSLFetchNameValue(papszOptions, "INTERLEAVE"); |
5331 | 0 | if (pszInterleave != nullptr && EQUAL(pszInterleave, "PIXEL")) |
5332 | 0 | bInterleave = true; |
5333 | 0 | else if (pszInterleave != nullptr && EQUAL(pszInterleave, "BAND")) |
5334 | 0 | bInterleave = false; |
5335 | | // attributes is specific to the TileDB driver |
5336 | 0 | else if (pszInterleave != nullptr && EQUAL(pszInterleave, "ATTRIBUTES")) |
5337 | 0 | bInterleave = true; |
5338 | 0 | else if (pszInterleave != nullptr) |
5339 | 0 | { |
5340 | 0 | CPLError(CE_Warning, CPLE_NotSupported, |
5341 | 0 | "Unsupported value for option INTERLEAVE"); |
5342 | 0 | } |
5343 | | |
5344 | | // If the destination is compressed, we must try to write blocks just once, |
5345 | | // to save disk space (GTiff case for example), and to avoid data loss |
5346 | | // (JPEG compression for example). |
5347 | 0 | bool bDstIsCompressed = false; |
5348 | 0 | const char *pszDstCompressed = |
5349 | 0 | CSLFetchNameValue(papszOptions, "COMPRESSED"); |
5350 | 0 | if (pszDstCompressed != nullptr && CPLTestBool(pszDstCompressed)) |
5351 | 0 | bDstIsCompressed = true; |
5352 | | |
5353 | | /* -------------------------------------------------------------------- */ |
5354 | | /* What will our swath size be? */ |
5355 | | /* -------------------------------------------------------------------- */ |
5356 | |
|
5357 | 0 | int nSwathCols = 0; |
5358 | 0 | int nSwathLines = 0; |
5359 | 0 | GDALCopyWholeRasterGetSwathSize(poSrcPrototypeBand, poDstPrototypeBand, |
5360 | 0 | nBandCount, bDstIsCompressed, bInterleave, |
5361 | 0 | &nSwathCols, &nSwathLines); |
5362 | |
|
5363 | 0 | int nPixelSize = GDALGetDataTypeSizeBytes(eDT); |
5364 | 0 | if (bInterleave) |
5365 | 0 | nPixelSize *= nBandCount; |
5366 | |
|
5367 | 0 | void *pSwathBuf = VSI_MALLOC3_VERBOSE(nSwathCols, nSwathLines, nPixelSize); |
5368 | 0 | if (pSwathBuf == nullptr) |
5369 | 0 | { |
5370 | 0 | return CE_Failure; |
5371 | 0 | } |
5372 | | |
5373 | 0 | CPLDebug("GDAL", |
5374 | 0 | "GDALDatasetCopyWholeRaster(): %d*%d swaths, bInterleave=%d", |
5375 | 0 | nSwathCols, nSwathLines, static_cast<int>(bInterleave)); |
5376 | | |
5377 | | // Advise the source raster that we are going to read it completely |
5378 | | // Note: this might already have been done by GDALCreateCopy() in the |
5379 | | // likely case this function is indirectly called by it |
5380 | 0 | poSrcDS->AdviseRead(0, 0, nXSize, nYSize, nXSize, nYSize, eDT, nBandCount, |
5381 | 0 | nullptr, nullptr); |
5382 | | |
5383 | | /* ==================================================================== */ |
5384 | | /* Band oriented (uninterleaved) case. */ |
5385 | | /* ==================================================================== */ |
5386 | 0 | CPLErr eErr = CE_None; |
5387 | 0 | const bool bCheckHoles = |
5388 | 0 | CPLTestBool(CSLFetchNameValueDef(papszOptions, "SKIP_HOLES", "NO")); |
5389 | |
|
5390 | 0 | if (!bInterleave) |
5391 | 0 | { |
5392 | 0 | GDALRasterIOExtraArg sExtraArg; |
5393 | 0 | INIT_RASTERIO_EXTRA_ARG(sExtraArg); |
5394 | 0 | CPL_IGNORE_RET_VAL(sExtraArg.pfnProgress); // to make cppcheck happy |
5395 | |
|
5396 | 0 | const GIntBig nTotalBlocks = static_cast<GIntBig>(nBandCount) * |
5397 | 0 | DIV_ROUND_UP(nYSize, nSwathLines) * |
5398 | 0 | DIV_ROUND_UP(nXSize, nSwathCols); |
5399 | 0 | GIntBig nBlocksDone = 0; |
5400 | |
|
5401 | 0 | for (int iBand = 0; iBand < nBandCount && eErr == CE_None; iBand++) |
5402 | 0 | { |
5403 | 0 | int nBand = iBand + 1; |
5404 | |
|
5405 | 0 | for (int iY = 0; iY < nYSize && eErr == CE_None; iY += nSwathLines) |
5406 | 0 | { |
5407 | 0 | int nThisLines = nSwathLines; |
5408 | |
|
5409 | 0 | if (iY + nThisLines > nYSize) |
5410 | 0 | nThisLines = nYSize - iY; |
5411 | |
|
5412 | 0 | for (int iX = 0; iX < nXSize && eErr == CE_None; |
5413 | 0 | iX += nSwathCols) |
5414 | 0 | { |
5415 | 0 | int nThisCols = nSwathCols; |
5416 | |
|
5417 | 0 | if (iX + nThisCols > nXSize) |
5418 | 0 | nThisCols = nXSize - iX; |
5419 | |
|
5420 | 0 | int nStatus = GDAL_DATA_COVERAGE_STATUS_DATA; |
5421 | 0 | if (bCheckHoles) |
5422 | 0 | { |
5423 | 0 | nStatus = poSrcDS->GetRasterBand(nBand) |
5424 | 0 | ->GetDataCoverageStatus( |
5425 | 0 | iX, iY, nThisCols, nThisLines, |
5426 | 0 | GDAL_DATA_COVERAGE_STATUS_DATA); |
5427 | 0 | } |
5428 | 0 | if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA) |
5429 | 0 | { |
5430 | 0 | sExtraArg.pfnProgress = GDALScaledProgress; |
5431 | 0 | sExtraArg.pProgressData = GDALCreateScaledProgress( |
5432 | 0 | nBlocksDone / static_cast<double>(nTotalBlocks), |
5433 | 0 | (nBlocksDone + 0.5) / |
5434 | 0 | static_cast<double>(nTotalBlocks), |
5435 | 0 | pfnProgress, pProgressData); |
5436 | 0 | if (sExtraArg.pProgressData == nullptr) |
5437 | 0 | sExtraArg.pfnProgress = nullptr; |
5438 | |
|
5439 | 0 | eErr = poSrcDS->RasterIO(GF_Read, iX, iY, nThisCols, |
5440 | 0 | nThisLines, pSwathBuf, |
5441 | 0 | nThisCols, nThisLines, eDT, 1, |
5442 | 0 | &nBand, 0, 0, 0, &sExtraArg); |
5443 | |
|
5444 | 0 | GDALDestroyScaledProgress(sExtraArg.pProgressData); |
5445 | |
|
5446 | 0 | if (eErr == CE_None) |
5447 | 0 | eErr = poDstDS->RasterIO( |
5448 | 0 | GF_Write, iX, iY, nThisCols, nThisLines, |
5449 | 0 | pSwathBuf, nThisCols, nThisLines, eDT, 1, |
5450 | 0 | &nBand, 0, 0, 0, nullptr); |
5451 | 0 | } |
5452 | |
|
5453 | 0 | nBlocksDone++; |
5454 | 0 | if (eErr == CE_None && |
5455 | 0 | !pfnProgress(nBlocksDone / |
5456 | 0 | static_cast<double>(nTotalBlocks), |
5457 | 0 | nullptr, pProgressData)) |
5458 | 0 | { |
5459 | 0 | eErr = CE_Failure; |
5460 | 0 | CPLError(CE_Failure, CPLE_UserInterrupt, |
5461 | 0 | "User terminated CreateCopy()"); |
5462 | 0 | } |
5463 | 0 | } |
5464 | 0 | } |
5465 | 0 | } |
5466 | 0 | } |
5467 | | |
5468 | | /* ==================================================================== */ |
5469 | | /* Pixel interleaved case. */ |
5470 | | /* ==================================================================== */ |
5471 | 0 | else /* if( bInterleave ) */ |
5472 | 0 | { |
5473 | 0 | GDALRasterIOExtraArg sExtraArg; |
5474 | 0 | INIT_RASTERIO_EXTRA_ARG(sExtraArg); |
5475 | 0 | CPL_IGNORE_RET_VAL(sExtraArg.pfnProgress); // to make cppcheck happy |
5476 | |
|
5477 | 0 | const GIntBig nTotalBlocks = |
5478 | 0 | static_cast<GIntBig>(DIV_ROUND_UP(nYSize, nSwathLines)) * |
5479 | 0 | DIV_ROUND_UP(nXSize, nSwathCols); |
5480 | 0 | GIntBig nBlocksDone = 0; |
5481 | |
|
5482 | 0 | for (int iY = 0; iY < nYSize && eErr == CE_None; iY += nSwathLines) |
5483 | 0 | { |
5484 | 0 | int nThisLines = nSwathLines; |
5485 | |
|
5486 | 0 | if (iY + nThisLines > nYSize) |
5487 | 0 | nThisLines = nYSize - iY; |
5488 | |
|
5489 | 0 | for (int iX = 0; iX < nXSize && eErr == CE_None; iX += nSwathCols) |
5490 | 0 | { |
5491 | 0 | int nThisCols = nSwathCols; |
5492 | |
|
5493 | 0 | if (iX + nThisCols > nXSize) |
5494 | 0 | nThisCols = nXSize - iX; |
5495 | |
|
5496 | 0 | int nStatus = GDAL_DATA_COVERAGE_STATUS_DATA; |
5497 | 0 | if (bCheckHoles) |
5498 | 0 | { |
5499 | 0 | nStatus = 0; |
5500 | 0 | for (int iBand = 0; iBand < nBandCount; iBand++) |
5501 | 0 | { |
5502 | 0 | nStatus |= poSrcDS->GetRasterBand(iBand + 1) |
5503 | 0 | ->GetDataCoverageStatus( |
5504 | 0 | iX, iY, nThisCols, nThisLines, |
5505 | 0 | GDAL_DATA_COVERAGE_STATUS_DATA); |
5506 | 0 | if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA) |
5507 | 0 | break; |
5508 | 0 | } |
5509 | 0 | } |
5510 | 0 | if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA) |
5511 | 0 | { |
5512 | 0 | sExtraArg.pfnProgress = GDALScaledProgress; |
5513 | 0 | sExtraArg.pProgressData = GDALCreateScaledProgress( |
5514 | 0 | nBlocksDone / static_cast<double>(nTotalBlocks), |
5515 | 0 | (nBlocksDone + 0.5) / static_cast<double>(nTotalBlocks), |
5516 | 0 | pfnProgress, pProgressData); |
5517 | 0 | if (sExtraArg.pProgressData == nullptr) |
5518 | 0 | sExtraArg.pfnProgress = nullptr; |
5519 | |
|
5520 | 0 | eErr = poSrcDS->RasterIO(GF_Read, iX, iY, nThisCols, |
5521 | 0 | nThisLines, pSwathBuf, nThisCols, |
5522 | 0 | nThisLines, eDT, nBandCount, |
5523 | 0 | nullptr, 0, 0, 0, &sExtraArg); |
5524 | |
|
5525 | 0 | GDALDestroyScaledProgress(sExtraArg.pProgressData); |
5526 | |
|
5527 | 0 | if (eErr == CE_None) |
5528 | 0 | eErr = poDstDS->RasterIO( |
5529 | 0 | GF_Write, iX, iY, nThisCols, nThisLines, pSwathBuf, |
5530 | 0 | nThisCols, nThisLines, eDT, nBandCount, nullptr, 0, |
5531 | 0 | 0, 0, nullptr); |
5532 | 0 | } |
5533 | |
|
5534 | 0 | nBlocksDone++; |
5535 | 0 | if (eErr == CE_None && |
5536 | 0 | !pfnProgress(nBlocksDone / |
5537 | 0 | static_cast<double>(nTotalBlocks), |
5538 | 0 | nullptr, pProgressData)) |
5539 | 0 | { |
5540 | 0 | eErr = CE_Failure; |
5541 | 0 | CPLError(CE_Failure, CPLE_UserInterrupt, |
5542 | 0 | "User terminated CreateCopy()"); |
5543 | 0 | } |
5544 | 0 | } |
5545 | 0 | } |
5546 | 0 | } |
5547 | | |
5548 | | /* -------------------------------------------------------------------- */ |
5549 | | /* Cleanup */ |
5550 | | /* -------------------------------------------------------------------- */ |
5551 | 0 | CPLFree(pSwathBuf); |
5552 | |
|
5553 | 0 | return eErr; |
5554 | 0 | } |
5555 | | |
5556 | | /************************************************************************/ |
5557 | | /* GDALRasterBandCopyWholeRaster() */ |
5558 | | /************************************************************************/ |
5559 | | |
5560 | | /** |
5561 | | * \brief Copy a whole raster band |
5562 | | * |
5563 | | * This function copies the complete raster contents of one band to |
5564 | | * another similarly configured band. The source and destination |
5565 | | * bands must have the same width and height. The bands do not have |
5566 | | * to have the same data type. |
5567 | | * |
5568 | | * It implements efficient copying, in particular "chunking" the copy in |
5569 | | * substantial blocks. |
5570 | | * |
5571 | | * Currently the only papszOptions value supported are : |
5572 | | * <ul> |
5573 | | * <li>"COMPRESSED=YES" to force alignment on target dataset block sizes to |
5574 | | * achieve best compression.</li> |
5575 | | * <li>"SKIP_HOLES=YES" to skip chunks for which GDALGetDataCoverageStatus() |
5576 | | * returns GDAL_DATA_COVERAGE_STATUS_EMPTY (GDAL >= 2.2)</li> |
5577 | | * </ul> |
5578 | | * |
5579 | | * @param hSrcBand the source band |
5580 | | * @param hDstBand the destination band |
5581 | | * @param papszOptions transfer hints in "StringList" Name=Value format. |
5582 | | * @param pfnProgress progress reporting function. |
5583 | | * @param pProgressData callback data for progress function. |
5584 | | * |
5585 | | * @return CE_None on success, or CE_Failure on failure. |
5586 | | */ |
5587 | | |
5588 | | CPLErr CPL_STDCALL GDALRasterBandCopyWholeRaster( |
5589 | | GDALRasterBandH hSrcBand, GDALRasterBandH hDstBand, |
5590 | | const char *const *const papszOptions, GDALProgressFunc pfnProgress, |
5591 | | void *pProgressData) |
5592 | | |
5593 | 0 | { |
5594 | 0 | VALIDATE_POINTER1(hSrcBand, "GDALRasterBandCopyWholeRaster", CE_Failure); |
5595 | 0 | VALIDATE_POINTER1(hDstBand, "GDALRasterBandCopyWholeRaster", CE_Failure); |
5596 | | |
5597 | 0 | GDALRasterBand *poSrcBand = GDALRasterBand::FromHandle(hSrcBand); |
5598 | 0 | GDALRasterBand *poDstBand = GDALRasterBand::FromHandle(hDstBand); |
5599 | 0 | CPLErr eErr = CE_None; |
5600 | |
|
5601 | 0 | if (pfnProgress == nullptr) |
5602 | 0 | pfnProgress = GDALDummyProgress; |
5603 | | |
5604 | | /* -------------------------------------------------------------------- */ |
5605 | | /* Confirm the datasets match in size and band counts. */ |
5606 | | /* -------------------------------------------------------------------- */ |
5607 | 0 | int nXSize = poSrcBand->GetXSize(); |
5608 | 0 | int nYSize = poSrcBand->GetYSize(); |
5609 | |
|
5610 | 0 | if (poDstBand->GetXSize() != nXSize || poDstBand->GetYSize() != nYSize) |
5611 | 0 | { |
5612 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
5613 | 0 | "Input and output band sizes do not\n" |
5614 | 0 | "match in GDALRasterBandCopyWholeRaster()"); |
5615 | 0 | return CE_Failure; |
5616 | 0 | } |
5617 | | |
5618 | | /* -------------------------------------------------------------------- */ |
5619 | | /* Report preliminary (0) progress. */ |
5620 | | /* -------------------------------------------------------------------- */ |
5621 | 0 | if (!pfnProgress(0.0, nullptr, pProgressData)) |
5622 | 0 | { |
5623 | 0 | CPLError(CE_Failure, CPLE_UserInterrupt, |
5624 | 0 | "User terminated CreateCopy()"); |
5625 | 0 | return CE_Failure; |
5626 | 0 | } |
5627 | | |
5628 | 0 | GDALDataType eDT = poDstBand->GetRasterDataType(); |
5629 | | |
5630 | | // If the destination is compressed, we must try to write blocks just once, |
5631 | | // to save disk space (GTiff case for example), and to avoid data loss |
5632 | | // (JPEG compression for example). |
5633 | 0 | bool bDstIsCompressed = false; |
5634 | 0 | const char *pszDstCompressed = |
5635 | 0 | CSLFetchNameValue(const_cast<char **>(papszOptions), "COMPRESSED"); |
5636 | 0 | if (pszDstCompressed != nullptr && CPLTestBool(pszDstCompressed)) |
5637 | 0 | bDstIsCompressed = true; |
5638 | | |
5639 | | /* -------------------------------------------------------------------- */ |
5640 | | /* What will our swath size be? */ |
5641 | | /* -------------------------------------------------------------------- */ |
5642 | |
|
5643 | 0 | int nSwathCols = 0; |
5644 | 0 | int nSwathLines = 0; |
5645 | 0 | GDALCopyWholeRasterGetSwathSize(poSrcBand, poDstBand, 1, bDstIsCompressed, |
5646 | 0 | FALSE, &nSwathCols, &nSwathLines); |
5647 | |
|
5648 | 0 | const int nPixelSize = GDALGetDataTypeSizeBytes(eDT); |
5649 | |
|
5650 | 0 | void *pSwathBuf = VSI_MALLOC3_VERBOSE(nSwathCols, nSwathLines, nPixelSize); |
5651 | 0 | if (pSwathBuf == nullptr) |
5652 | 0 | { |
5653 | 0 | return CE_Failure; |
5654 | 0 | } |
5655 | | |
5656 | 0 | CPLDebug("GDAL", "GDALRasterBandCopyWholeRaster(): %d*%d swaths", |
5657 | 0 | nSwathCols, nSwathLines); |
5658 | |
|
5659 | 0 | const bool bCheckHoles = |
5660 | 0 | CPLTestBool(CSLFetchNameValueDef(papszOptions, "SKIP_HOLES", "NO")); |
5661 | | |
5662 | | // Advise the source raster that we are going to read it completely |
5663 | 0 | poSrcBand->AdviseRead(0, 0, nXSize, nYSize, nXSize, nYSize, eDT, nullptr); |
5664 | | |
5665 | | /* ==================================================================== */ |
5666 | | /* Band oriented (uninterleaved) case. */ |
5667 | | /* ==================================================================== */ |
5668 | |
|
5669 | 0 | for (int iY = 0; iY < nYSize && eErr == CE_None; iY += nSwathLines) |
5670 | 0 | { |
5671 | 0 | int nThisLines = nSwathLines; |
5672 | |
|
5673 | 0 | if (iY + nThisLines > nYSize) |
5674 | 0 | nThisLines = nYSize - iY; |
5675 | |
|
5676 | 0 | for (int iX = 0; iX < nXSize && eErr == CE_None; iX += nSwathCols) |
5677 | 0 | { |
5678 | 0 | int nThisCols = nSwathCols; |
5679 | |
|
5680 | 0 | if (iX + nThisCols > nXSize) |
5681 | 0 | nThisCols = nXSize - iX; |
5682 | |
|
5683 | 0 | int nStatus = GDAL_DATA_COVERAGE_STATUS_DATA; |
5684 | 0 | if (bCheckHoles) |
5685 | 0 | { |
5686 | 0 | nStatus = poSrcBand->GetDataCoverageStatus( |
5687 | 0 | iX, iY, nThisCols, nThisLines, |
5688 | 0 | GDAL_DATA_COVERAGE_STATUS_DATA); |
5689 | 0 | } |
5690 | 0 | if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA) |
5691 | 0 | { |
5692 | 0 | eErr = poSrcBand->RasterIO(GF_Read, iX, iY, nThisCols, |
5693 | 0 | nThisLines, pSwathBuf, nThisCols, |
5694 | 0 | nThisLines, eDT, 0, 0, nullptr); |
5695 | |
|
5696 | 0 | if (eErr == CE_None) |
5697 | 0 | eErr = poDstBand->RasterIO(GF_Write, iX, iY, nThisCols, |
5698 | 0 | nThisLines, pSwathBuf, nThisCols, |
5699 | 0 | nThisLines, eDT, 0, 0, nullptr); |
5700 | 0 | } |
5701 | |
|
5702 | 0 | if (eErr == CE_None && !pfnProgress(double(iY + nThisLines) / |
5703 | 0 | static_cast<double>(nYSize), |
5704 | 0 | nullptr, pProgressData)) |
5705 | 0 | { |
5706 | 0 | eErr = CE_Failure; |
5707 | 0 | CPLError(CE_Failure, CPLE_UserInterrupt, |
5708 | 0 | "User terminated CreateCopy()"); |
5709 | 0 | } |
5710 | 0 | } |
5711 | 0 | } |
5712 | | |
5713 | | /* -------------------------------------------------------------------- */ |
5714 | | /* Cleanup */ |
5715 | | /* -------------------------------------------------------------------- */ |
5716 | 0 | CPLFree(pSwathBuf); |
5717 | |
|
5718 | 0 | return eErr; |
5719 | 0 | } |
5720 | | |
5721 | | /************************************************************************/ |
5722 | | /* GDALCopyRasterIOExtraArg () */ |
5723 | | /************************************************************************/ |
5724 | | |
5725 | | void GDALCopyRasterIOExtraArg(GDALRasterIOExtraArg *psDestArg, |
5726 | | GDALRasterIOExtraArg *psSrcArg) |
5727 | 0 | { |
5728 | 0 | INIT_RASTERIO_EXTRA_ARG(*psDestArg); |
5729 | 0 | if (psSrcArg) |
5730 | 0 | { |
5731 | 0 | psDestArg->eResampleAlg = psSrcArg->eResampleAlg; |
5732 | 0 | psDestArg->pfnProgress = psSrcArg->pfnProgress; |
5733 | 0 | psDestArg->pProgressData = psSrcArg->pProgressData; |
5734 | 0 | psDestArg->bFloatingPointWindowValidity = |
5735 | 0 | psSrcArg->bFloatingPointWindowValidity; |
5736 | 0 | if (psSrcArg->bFloatingPointWindowValidity) |
5737 | 0 | { |
5738 | 0 | psDestArg->dfXOff = psSrcArg->dfXOff; |
5739 | 0 | psDestArg->dfYOff = psSrcArg->dfYOff; |
5740 | 0 | psDestArg->dfXSize = psSrcArg->dfXSize; |
5741 | 0 | psDestArg->dfYSize = psSrcArg->dfYSize; |
5742 | 0 | } |
5743 | 0 | if (psSrcArg->nVersion >= 2) |
5744 | 0 | { |
5745 | 0 | psDestArg->bUseOnlyThisScale = psSrcArg->bUseOnlyThisScale; |
5746 | 0 | } |
5747 | 0 | } |
5748 | 0 | } |
5749 | | |
5750 | | /************************************************************************/ |
5751 | | /* HasOnlyNoData() */ |
5752 | | /************************************************************************/ |
5753 | | |
5754 | | template <class T> static inline bool IsEqualToNoData(T value, T noDataValue) |
5755 | 0 | { |
5756 | 0 | return value == noDataValue; |
5757 | 0 | } Unexecuted instantiation: rasterio.cpp:bool IsEqualToNoData<unsigned char>(unsigned char, unsigned char) Unexecuted instantiation: rasterio.cpp:bool IsEqualToNoData<unsigned short>(unsigned short, unsigned short) Unexecuted instantiation: rasterio.cpp:bool IsEqualToNoData<unsigned int>(unsigned int, unsigned int) Unexecuted instantiation: rasterio.cpp:bool IsEqualToNoData<unsigned long>(unsigned long, unsigned long) |
5758 | | |
5759 | | template <> bool IsEqualToNoData<GFloat16>(GFloat16 value, GFloat16 noDataValue) |
5760 | 0 | { |
5761 | 0 | using std::isnan; |
5762 | 0 | return isnan(noDataValue) ? isnan(value) : value == noDataValue; |
5763 | 0 | } |
5764 | | |
5765 | | template <> bool IsEqualToNoData<float>(float value, float noDataValue) |
5766 | 0 | { |
5767 | 0 | return std::isnan(noDataValue) ? std::isnan(value) : value == noDataValue; |
5768 | 0 | } |
5769 | | |
5770 | | template <> bool IsEqualToNoData<double>(double value, double noDataValue) |
5771 | 0 | { |
5772 | 0 | return std::isnan(noDataValue) ? std::isnan(value) : value == noDataValue; |
5773 | 0 | } |
5774 | | |
5775 | | template <class T> |
5776 | | static bool HasOnlyNoDataT(const T *pBuffer, T noDataValue, size_t nWidth, |
5777 | | size_t nHeight, size_t nLineStride, |
5778 | | size_t nComponents) |
5779 | 0 | { |
5780 | | // Fast test: check the 4 corners and the middle pixel. |
5781 | 0 | for (size_t iBand = 0; iBand < nComponents; iBand++) |
5782 | 0 | { |
5783 | 0 | if (!(IsEqualToNoData(pBuffer[iBand], noDataValue) && |
5784 | 0 | IsEqualToNoData(pBuffer[(nWidth - 1) * nComponents + iBand], |
5785 | 0 | noDataValue) && |
5786 | 0 | IsEqualToNoData( |
5787 | 0 | pBuffer[((nHeight - 1) / 2 * nLineStride + (nWidth - 1) / 2) * |
5788 | 0 | nComponents + |
5789 | 0 | iBand], |
5790 | 0 | noDataValue) && |
5791 | 0 | IsEqualToNoData( |
5792 | 0 | pBuffer[(nHeight - 1) * nLineStride * nComponents + iBand], |
5793 | 0 | noDataValue) && |
5794 | 0 | IsEqualToNoData( |
5795 | 0 | pBuffer[((nHeight - 1) * nLineStride + nWidth - 1) * |
5796 | 0 | nComponents + |
5797 | 0 | iBand], |
5798 | 0 | noDataValue))) |
5799 | 0 | { |
5800 | 0 | return false; |
5801 | 0 | } |
5802 | 0 | } |
5803 | | |
5804 | | // Test all pixels. |
5805 | 0 | for (size_t iY = 0; iY < nHeight; iY++) |
5806 | 0 | { |
5807 | 0 | const T *pBufferLine = pBuffer + iY * nLineStride * nComponents; |
5808 | 0 | for (size_t iX = 0; iX < nWidth * nComponents; iX++) |
5809 | 0 | { |
5810 | 0 | if (!IsEqualToNoData(pBufferLine[iX], noDataValue)) |
5811 | 0 | { |
5812 | 0 | return false; |
5813 | 0 | } |
5814 | 0 | } |
5815 | 0 | } |
5816 | 0 | return true; |
5817 | 0 | } Unexecuted instantiation: rasterio.cpp:bool HasOnlyNoDataT<unsigned char>(unsigned char const*, unsigned char, unsigned long, unsigned long, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:bool HasOnlyNoDataT<unsigned short>(unsigned short const*, unsigned short, unsigned long, unsigned long, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:bool HasOnlyNoDataT<unsigned int>(unsigned int const*, unsigned int, unsigned long, unsigned long, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:bool HasOnlyNoDataT<unsigned long>(unsigned long const*, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:bool HasOnlyNoDataT<cpl::Float16>(cpl::Float16 const*, cpl::Float16, unsigned long, unsigned long, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:bool HasOnlyNoDataT<float>(float const*, float, unsigned long, unsigned long, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:bool HasOnlyNoDataT<double>(double const*, double, unsigned long, unsigned long, unsigned long, unsigned long) |
5818 | | |
5819 | | /************************************************************************/ |
5820 | | /* GDALBufferHasOnlyNoData() */ |
5821 | | /************************************************************************/ |
5822 | | |
5823 | | bool GDALBufferHasOnlyNoData(const void *pBuffer, double dfNoDataValue, |
5824 | | size_t nWidth, size_t nHeight, size_t nLineStride, |
5825 | | size_t nComponents, int nBitsPerSample, |
5826 | | GDALBufferSampleFormat nSampleFormat) |
5827 | 0 | { |
5828 | | // In the case where the nodata is 0, we can compare several bytes at |
5829 | | // once. Select the largest natural integer type for the architecture. |
5830 | 0 | if (dfNoDataValue == 0.0 && nWidth == nLineStride && |
5831 | | // Do not use this optimized code path for floating point numbers, |
5832 | | // as it can't detect negative zero. |
5833 | 0 | nSampleFormat != GSF_FLOATING_POINT) |
5834 | 0 | { |
5835 | 0 | const GByte *pabyBuffer = static_cast<const GByte *>(pBuffer); |
5836 | 0 | const size_t nSize = |
5837 | 0 | static_cast<size_t>((static_cast<uint64_t>(nWidth) * nHeight * |
5838 | 0 | nComponents * nBitsPerSample + |
5839 | 0 | 7) / |
5840 | 0 | 8); |
5841 | 0 | #ifdef HAVE_SSE2 |
5842 | 0 | size_t n = nSize; |
5843 | | // Align to 16 bytes |
5844 | 0 | while ((reinterpret_cast<uintptr_t>(pabyBuffer) & 15) != 0 && n > 0) |
5845 | 0 | { |
5846 | 0 | --n; |
5847 | 0 | if (*pabyBuffer) |
5848 | 0 | return false; |
5849 | 0 | pabyBuffer++; |
5850 | 0 | } |
5851 | | |
5852 | 0 | const auto zero = _mm_setzero_si128(); |
5853 | 0 | constexpr int UNROLLING = 4; |
5854 | 0 | while (n >= UNROLLING * sizeof(zero)) |
5855 | 0 | { |
5856 | 0 | const auto v0 = _mm_load_si128(reinterpret_cast<const __m128i *>( |
5857 | 0 | pabyBuffer + 0 * sizeof(zero))); |
5858 | 0 | const auto v1 = _mm_load_si128(reinterpret_cast<const __m128i *>( |
5859 | 0 | pabyBuffer + 1 * sizeof(zero))); |
5860 | 0 | const auto v2 = _mm_load_si128(reinterpret_cast<const __m128i *>( |
5861 | 0 | pabyBuffer + 2 * sizeof(zero))); |
5862 | 0 | const auto v3 = _mm_load_si128(reinterpret_cast<const __m128i *>( |
5863 | 0 | pabyBuffer + 3 * sizeof(zero))); |
5864 | 0 | const auto v = |
5865 | 0 | _mm_or_si128(_mm_or_si128(v0, v1), _mm_or_si128(v2, v3)); |
5866 | | #if defined(__SSE4_1__) || defined(USE_NEON_OPTIMIZATIONS) |
5867 | | if (!_mm_test_all_zeros(v, v)) |
5868 | | #else |
5869 | 0 | if (_mm_movemask_epi8(_mm_cmpeq_epi8(v, zero)) != 0xFFFF) |
5870 | 0 | #endif |
5871 | 0 | { |
5872 | 0 | return false; |
5873 | 0 | } |
5874 | 0 | pabyBuffer += UNROLLING * sizeof(zero); |
5875 | 0 | n -= UNROLLING * sizeof(zero); |
5876 | 0 | } |
5877 | | |
5878 | 0 | while (n > 0) |
5879 | 0 | { |
5880 | 0 | --n; |
5881 | 0 | if (*pabyBuffer) |
5882 | 0 | return false; |
5883 | 0 | pabyBuffer++; |
5884 | 0 | } |
5885 | | #else |
5886 | | #if SIZEOF_VOIDP >= 8 || defined(__x86_64__) |
5887 | | // We test __x86_64__ for x32 arch where SIZEOF_VOIDP == 4 |
5888 | | typedef std::uint64_t WordType; |
5889 | | #else |
5890 | | typedef std::uint32_t WordType; |
5891 | | #endif |
5892 | | |
5893 | | const size_t nInitialIters = |
5894 | | std::min(sizeof(WordType) - |
5895 | | static_cast<size_t>( |
5896 | | reinterpret_cast<std::uintptr_t>(pabyBuffer) % |
5897 | | sizeof(WordType)), |
5898 | | nSize); |
5899 | | size_t i = 0; |
5900 | | for (; i < nInitialIters; i++) |
5901 | | { |
5902 | | if (pabyBuffer[i]) |
5903 | | return false; |
5904 | | } |
5905 | | for (; i + sizeof(WordType) - 1 < nSize; i += sizeof(WordType)) |
5906 | | { |
5907 | | if (*(reinterpret_cast<const WordType *>(pabyBuffer + i))) |
5908 | | return false; |
5909 | | } |
5910 | | for (; i < nSize; i++) |
5911 | | { |
5912 | | if (pabyBuffer[i]) |
5913 | | return false; |
5914 | | } |
5915 | | #endif |
5916 | 0 | return true; |
5917 | 0 | } |
5918 | | |
5919 | 0 | #ifdef HAVE_SSE2 |
5920 | 0 | else if (dfNoDataValue == 0.0 && nWidth == nLineStride && |
5921 | 0 | nBitsPerSample == 32 && nSampleFormat == GSF_FLOATING_POINT) |
5922 | 0 | { |
5923 | 0 | const auto signMask = _mm_set1_epi32(0x7FFFFFFF); |
5924 | 0 | const auto zero = _mm_setzero_si128(); |
5925 | 0 | const GByte *pabyBuffer = static_cast<const GByte *>(pBuffer); |
5926 | 0 | const size_t n = nWidth * nHeight * nComponents; |
5927 | |
|
5928 | 0 | size_t i = 0; |
5929 | 0 | constexpr int UNROLLING = 4; |
5930 | 0 | constexpr size_t VALUES_PER_ITER = |
5931 | 0 | UNROLLING * sizeof(zero) / sizeof(float); |
5932 | 0 | for (; i + VALUES_PER_ITER <= n; i += VALUES_PER_ITER) |
5933 | 0 | { |
5934 | 0 | const auto v0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>( |
5935 | 0 | pabyBuffer + 0 * sizeof(zero))); |
5936 | 0 | const auto v1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>( |
5937 | 0 | pabyBuffer + 1 * sizeof(zero))); |
5938 | 0 | const auto v2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>( |
5939 | 0 | pabyBuffer + 2 * sizeof(zero))); |
5940 | 0 | const auto v3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>( |
5941 | 0 | pabyBuffer + 3 * sizeof(zero))); |
5942 | 0 | auto v = _mm_or_si128(_mm_or_si128(v0, v1), _mm_or_si128(v2, v3)); |
5943 | | // Clear the sign bit (makes -0.0 become +0.0) |
5944 | 0 | v = _mm_and_si128(v, signMask); |
5945 | | #if defined(__SSE4_1__) || defined(USE_NEON_OPTIMIZATIONS) |
5946 | | if (!_mm_test_all_zeros(v, v)) |
5947 | | #else |
5948 | 0 | if (_mm_movemask_epi8(_mm_cmpeq_epi8(v, zero)) != 0xFFFF) |
5949 | 0 | #endif |
5950 | 0 | { |
5951 | 0 | return false; |
5952 | 0 | } |
5953 | 0 | pabyBuffer += UNROLLING * sizeof(zero); |
5954 | 0 | } |
5955 | | |
5956 | 0 | for (; i < n; i++) |
5957 | 0 | { |
5958 | 0 | uint32_t bits; |
5959 | 0 | memcpy(&bits, pabyBuffer, sizeof(bits)); |
5960 | 0 | pabyBuffer += sizeof(bits); |
5961 | 0 | if ((bits & 0x7FFFFFFF) != 0) |
5962 | 0 | return false; |
5963 | 0 | } |
5964 | | |
5965 | 0 | return true; |
5966 | 0 | } |
5967 | | |
5968 | 0 | else if (dfNoDataValue == 0.0 && nWidth == nLineStride && |
5969 | 0 | nBitsPerSample == 64 && nSampleFormat == GSF_FLOATING_POINT) |
5970 | 0 | { |
5971 | 0 | const auto signMask = _mm_set1_epi64x(0x7FFFFFFFFFFFFFFFLL); |
5972 | 0 | const auto zero = _mm_setzero_si128(); |
5973 | 0 | const GByte *pabyBuffer = static_cast<const GByte *>(pBuffer); |
5974 | 0 | const size_t n = nWidth * nHeight * nComponents; |
5975 | |
|
5976 | 0 | size_t i = 0; |
5977 | 0 | constexpr int UNROLLING = 4; |
5978 | 0 | constexpr size_t VALUES_PER_ITER = |
5979 | 0 | UNROLLING * sizeof(zero) / sizeof(double); |
5980 | 0 | for (; i + VALUES_PER_ITER <= n; i += VALUES_PER_ITER) |
5981 | 0 | { |
5982 | 0 | const auto v0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>( |
5983 | 0 | pabyBuffer + 0 * sizeof(zero))); |
5984 | 0 | const auto v1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>( |
5985 | 0 | pabyBuffer + 1 * sizeof(zero))); |
5986 | 0 | const auto v2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>( |
5987 | 0 | pabyBuffer + 2 * sizeof(zero))); |
5988 | 0 | const auto v3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>( |
5989 | 0 | pabyBuffer + 3 * sizeof(zero))); |
5990 | 0 | auto v = _mm_or_si128(_mm_or_si128(v0, v1), _mm_or_si128(v2, v3)); |
5991 | | // Clear the sign bit (makes -0.0 become +0.0) |
5992 | 0 | v = _mm_and_si128(v, signMask); |
5993 | | #if defined(__SSE4_1__) || defined(USE_NEON_OPTIMIZATIONS) |
5994 | | if (!_mm_test_all_zeros(v, v)) |
5995 | | #else |
5996 | 0 | if (_mm_movemask_epi8(_mm_cmpeq_epi8(v, zero)) != 0xFFFF) |
5997 | 0 | #endif |
5998 | 0 | { |
5999 | 0 | return false; |
6000 | 0 | } |
6001 | 0 | pabyBuffer += UNROLLING * sizeof(zero); |
6002 | 0 | } |
6003 | | |
6004 | 0 | for (; i < n; i++) |
6005 | 0 | { |
6006 | 0 | uint64_t bits; |
6007 | 0 | memcpy(&bits, pabyBuffer, sizeof(bits)); |
6008 | 0 | pabyBuffer += sizeof(bits); |
6009 | 0 | if ((bits & 0x7FFFFFFFFFFFFFFFULL) != 0) |
6010 | 0 | return false; |
6011 | 0 | } |
6012 | | |
6013 | 0 | return true; |
6014 | 0 | } |
6015 | 0 | #endif |
6016 | | |
6017 | 0 | if (nBitsPerSample == 8 && nSampleFormat == GSF_UNSIGNED_INT) |
6018 | 0 | { |
6019 | 0 | return GDALIsValueInRange<uint8_t>(dfNoDataValue) && |
6020 | 0 | HasOnlyNoDataT(static_cast<const uint8_t *>(pBuffer), |
6021 | 0 | static_cast<uint8_t>(dfNoDataValue), nWidth, |
6022 | 0 | nHeight, nLineStride, nComponents); |
6023 | 0 | } |
6024 | 0 | if (nBitsPerSample == 8 && nSampleFormat == GSF_SIGNED_INT) |
6025 | 0 | { |
6026 | | // Use unsigned implementation by converting the nodatavalue to |
6027 | | // unsigned |
6028 | 0 | return GDALIsValueInRange<int8_t>(dfNoDataValue) && |
6029 | 0 | HasOnlyNoDataT( |
6030 | 0 | static_cast<const uint8_t *>(pBuffer), |
6031 | 0 | static_cast<uint8_t>(static_cast<int8_t>(dfNoDataValue)), |
6032 | 0 | nWidth, nHeight, nLineStride, nComponents); |
6033 | 0 | } |
6034 | 0 | if (nBitsPerSample == 16 && nSampleFormat == GSF_UNSIGNED_INT) |
6035 | 0 | { |
6036 | 0 | return GDALIsValueInRange<uint16_t>(dfNoDataValue) && |
6037 | 0 | HasOnlyNoDataT(static_cast<const uint16_t *>(pBuffer), |
6038 | 0 | static_cast<uint16_t>(dfNoDataValue), nWidth, |
6039 | 0 | nHeight, nLineStride, nComponents); |
6040 | 0 | } |
6041 | 0 | if (nBitsPerSample == 16 && nSampleFormat == GSF_SIGNED_INT) |
6042 | 0 | { |
6043 | | // Use unsigned implementation by converting the nodatavalue to |
6044 | | // unsigned |
6045 | 0 | return GDALIsValueInRange<int16_t>(dfNoDataValue) && |
6046 | 0 | HasOnlyNoDataT( |
6047 | 0 | static_cast<const uint16_t *>(pBuffer), |
6048 | 0 | static_cast<uint16_t>(static_cast<int16_t>(dfNoDataValue)), |
6049 | 0 | nWidth, nHeight, nLineStride, nComponents); |
6050 | 0 | } |
6051 | 0 | if (nBitsPerSample == 32 && nSampleFormat == GSF_UNSIGNED_INT) |
6052 | 0 | { |
6053 | 0 | return GDALIsValueInRange<uint32_t>(dfNoDataValue) && |
6054 | 0 | HasOnlyNoDataT(static_cast<const uint32_t *>(pBuffer), |
6055 | 0 | static_cast<uint32_t>(dfNoDataValue), nWidth, |
6056 | 0 | nHeight, nLineStride, nComponents); |
6057 | 0 | } |
6058 | 0 | if (nBitsPerSample == 32 && nSampleFormat == GSF_SIGNED_INT) |
6059 | 0 | { |
6060 | | // Use unsigned implementation by converting the nodatavalue to |
6061 | | // unsigned |
6062 | 0 | return GDALIsValueInRange<int32_t>(dfNoDataValue) && |
6063 | 0 | HasOnlyNoDataT( |
6064 | 0 | static_cast<const uint32_t *>(pBuffer), |
6065 | 0 | static_cast<uint32_t>(static_cast<int32_t>(dfNoDataValue)), |
6066 | 0 | nWidth, nHeight, nLineStride, nComponents); |
6067 | 0 | } |
6068 | 0 | if (nBitsPerSample == 64 && nSampleFormat == GSF_UNSIGNED_INT) |
6069 | 0 | { |
6070 | 0 | return GDALIsValueInRange<uint64_t>(dfNoDataValue) && |
6071 | 0 | HasOnlyNoDataT(static_cast<const uint64_t *>(pBuffer), |
6072 | 0 | static_cast<uint64_t>(dfNoDataValue), nWidth, |
6073 | 0 | nHeight, nLineStride, nComponents); |
6074 | 0 | } |
6075 | 0 | if (nBitsPerSample == 64 && nSampleFormat == GSF_SIGNED_INT) |
6076 | 0 | { |
6077 | | // Use unsigned implementation by converting the nodatavalue to |
6078 | | // unsigned |
6079 | 0 | return GDALIsValueInRange<int64_t>(dfNoDataValue) && |
6080 | 0 | HasOnlyNoDataT( |
6081 | 0 | static_cast<const uint64_t *>(pBuffer), |
6082 | 0 | static_cast<uint64_t>(static_cast<int64_t>(dfNoDataValue)), |
6083 | 0 | nWidth, nHeight, nLineStride, nComponents); |
6084 | 0 | } |
6085 | 0 | if (nBitsPerSample == 16 && nSampleFormat == GSF_FLOATING_POINT) |
6086 | 0 | { |
6087 | 0 | return (std::isnan(dfNoDataValue) || |
6088 | 0 | GDALIsValueInRange<GFloat16>(dfNoDataValue)) && |
6089 | 0 | HasOnlyNoDataT(static_cast<const GFloat16 *>(pBuffer), |
6090 | 0 | static_cast<GFloat16>(dfNoDataValue), nWidth, |
6091 | 0 | nHeight, nLineStride, nComponents); |
6092 | 0 | } |
6093 | 0 | if (nBitsPerSample == 32 && nSampleFormat == GSF_FLOATING_POINT) |
6094 | 0 | { |
6095 | 0 | return (std::isnan(dfNoDataValue) || |
6096 | 0 | GDALIsValueInRange<float>(dfNoDataValue)) && |
6097 | 0 | HasOnlyNoDataT(static_cast<const float *>(pBuffer), |
6098 | 0 | static_cast<float>(dfNoDataValue), nWidth, |
6099 | 0 | nHeight, nLineStride, nComponents); |
6100 | 0 | } |
6101 | 0 | if (nBitsPerSample == 64 && nSampleFormat == GSF_FLOATING_POINT) |
6102 | 0 | { |
6103 | 0 | return HasOnlyNoDataT(static_cast<const double *>(pBuffer), |
6104 | 0 | dfNoDataValue, nWidth, nHeight, nLineStride, |
6105 | 0 | nComponents); |
6106 | 0 | } |
6107 | 0 | return false; |
6108 | 0 | } |
6109 | | |
6110 | | #ifdef HAVE_SSE2 |
6111 | | |
6112 | | /************************************************************************/ |
6113 | | /* GDALDeinterleave3Byte() */ |
6114 | | /************************************************************************/ |
6115 | | |
6116 | | #if defined(__GNUC__) && !defined(__clang__) |
6117 | | __attribute__((optimize("no-tree-vectorize"))) |
6118 | | #endif |
6119 | | static void |
6120 | | GDALDeinterleave3Byte(const GByte *CPL_RESTRICT pabySrc, |
6121 | | GByte *CPL_RESTRICT pabyDest0, |
6122 | | GByte *CPL_RESTRICT pabyDest1, |
6123 | | GByte *CPL_RESTRICT pabyDest2, size_t nIters) |
6124 | | #ifdef USE_NEON_OPTIMIZATIONS |
6125 | | { |
6126 | | return GDALDeinterleave3Byte_SSSE3(pabySrc, pabyDest0, pabyDest1, pabyDest2, |
6127 | | nIters); |
6128 | | } |
6129 | | #else |
6130 | 0 | { |
6131 | 0 | #ifdef HAVE_SSSE3_AT_COMPILE_TIME |
6132 | 0 | if (CPLHaveRuntimeSSSE3()) |
6133 | 0 | { |
6134 | 0 | return GDALDeinterleave3Byte_SSSE3(pabySrc, pabyDest0, pabyDest1, |
6135 | 0 | pabyDest2, nIters); |
6136 | 0 | } |
6137 | 0 | #endif |
6138 | | |
6139 | 0 | size_t i = 0; |
6140 | 0 | if (((reinterpret_cast<uintptr_t>(pabySrc) | |
6141 | 0 | reinterpret_cast<uintptr_t>(pabyDest0) | |
6142 | 0 | reinterpret_cast<uintptr_t>(pabyDest1) | |
6143 | 0 | reinterpret_cast<uintptr_t>(pabyDest2)) % |
6144 | 0 | sizeof(unsigned int)) == 0) |
6145 | 0 | { |
6146 | | // Slightly better than GCC autovectorizer |
6147 | 0 | for (size_t j = 0; i + 3 < nIters; i += 4, ++j) |
6148 | 0 | { |
6149 | 0 | unsigned int word0 = |
6150 | 0 | *reinterpret_cast<const unsigned int *>(pabySrc + 3 * i); |
6151 | 0 | unsigned int word1 = |
6152 | 0 | *reinterpret_cast<const unsigned int *>(pabySrc + 3 * i + 4); |
6153 | 0 | unsigned int word2 = |
6154 | 0 | *reinterpret_cast<const unsigned int *>(pabySrc + 3 * i + 8); |
6155 | 0 | reinterpret_cast<unsigned int *>(pabyDest0)[j] = |
6156 | 0 | (word0 & 0xff) | ((word0 >> 24) << 8) | (word1 & 0x00ff0000) | |
6157 | 0 | ((word2 >> 8) << 24); |
6158 | 0 | reinterpret_cast<unsigned int *>(pabyDest1)[j] = |
6159 | 0 | ((word0 >> 8) & 0xff) | ((word1 & 0xff) << 8) | |
6160 | 0 | (((word1 >> 24)) << 16) | ((word2 >> 16) << 24); |
6161 | 0 | pabyDest2[j * 4] = static_cast<GByte>(word0 >> 16); |
6162 | 0 | pabyDest2[j * 4 + 1] = static_cast<GByte>(word1 >> 8); |
6163 | 0 | pabyDest2[j * 4 + 2] = static_cast<GByte>(word2); |
6164 | 0 | pabyDest2[j * 4 + 3] = static_cast<GByte>(word2 >> 24); |
6165 | 0 | } |
6166 | 0 | } |
6167 | 0 | #if defined(__clang__) |
6168 | 0 | #pragma clang loop vectorize(disable) |
6169 | 0 | #endif |
6170 | 0 | for (; i < nIters; ++i) |
6171 | 0 | { |
6172 | 0 | pabyDest0[i] = pabySrc[3 * i + 0]; |
6173 | 0 | pabyDest1[i] = pabySrc[3 * i + 1]; |
6174 | 0 | pabyDest2[i] = pabySrc[3 * i + 2]; |
6175 | 0 | } |
6176 | 0 | } |
6177 | | #endif |
6178 | | |
6179 | | /************************************************************************/ |
6180 | | /* GDALDeinterleave4Byte() */ |
6181 | | /************************************************************************/ |
6182 | | |
6183 | | #if !defined(__GNUC__) || defined(__clang__) |
6184 | | |
6185 | | /************************************************************************/ |
6186 | | /* deinterleave() */ |
6187 | | /************************************************************************/ |
6188 | | |
6189 | | template <bool SHIFT, bool MASK> |
6190 | | inline __m128i deinterleave(__m128i &xmm0_ori, __m128i &xmm1_ori, |
6191 | | __m128i &xmm2_ori, __m128i &xmm3_ori) |
6192 | 0 | { |
6193 | | // Set higher 24bit of each int32 packed word to 0 |
6194 | 0 | if (SHIFT) |
6195 | 0 | { |
6196 | 0 | xmm0_ori = _mm_srli_epi32(xmm0_ori, 8); |
6197 | 0 | xmm1_ori = _mm_srli_epi32(xmm1_ori, 8); |
6198 | 0 | xmm2_ori = _mm_srli_epi32(xmm2_ori, 8); |
6199 | 0 | xmm3_ori = _mm_srli_epi32(xmm3_ori, 8); |
6200 | 0 | } |
6201 | 0 | __m128i xmm0; |
6202 | 0 | __m128i xmm1; |
6203 | 0 | __m128i xmm2; |
6204 | 0 | __m128i xmm3; |
6205 | 0 | if (MASK) |
6206 | 0 | { |
6207 | 0 | const __m128i xmm_mask = _mm_set1_epi32(0xff); |
6208 | 0 | xmm0 = _mm_and_si128(xmm0_ori, xmm_mask); |
6209 | 0 | xmm1 = _mm_and_si128(xmm1_ori, xmm_mask); |
6210 | 0 | xmm2 = _mm_and_si128(xmm2_ori, xmm_mask); |
6211 | 0 | xmm3 = _mm_and_si128(xmm3_ori, xmm_mask); |
6212 | 0 | } |
6213 | 0 | else |
6214 | 0 | { |
6215 | 0 | xmm0 = xmm0_ori; |
6216 | 0 | xmm1 = xmm1_ori; |
6217 | 0 | xmm2 = xmm2_ori; |
6218 | 0 | xmm3 = xmm3_ori; |
6219 | 0 | } |
6220 | | // Pack int32 to int16 |
6221 | 0 | xmm0 = _mm_packs_epi32(xmm0, xmm1); |
6222 | 0 | xmm2 = _mm_packs_epi32(xmm2, xmm3); |
6223 | | // Pack int16 to uint8 |
6224 | 0 | xmm0 = _mm_packus_epi16(xmm0, xmm2); |
6225 | 0 | return xmm0; |
6226 | 0 | } Unexecuted instantiation: long long __vector(2) deinterleave<false, true>(long long __vector(2)&, long long __vector(2)&, long long __vector(2)&, long long __vector(2)&) Unexecuted instantiation: long long __vector(2) deinterleave<true, true>(long long __vector(2)&, long long __vector(2)&, long long __vector(2)&, long long __vector(2)&) Unexecuted instantiation: long long __vector(2) deinterleave<true, false>(long long __vector(2)&, long long __vector(2)&, long long __vector(2)&, long long __vector(2)&) |
6227 | | |
6228 | | static void GDALDeinterleave4Byte(const GByte *CPL_RESTRICT pabySrc, |
6229 | | GByte *CPL_RESTRICT pabyDest0, |
6230 | | GByte *CPL_RESTRICT pabyDest1, |
6231 | | GByte *CPL_RESTRICT pabyDest2, |
6232 | | GByte *CPL_RESTRICT pabyDest3, size_t nIters) |
6233 | | #ifdef USE_NEON_OPTIMIZATIONS |
6234 | | { |
6235 | | return GDALDeinterleave4Byte_SSSE3(pabySrc, pabyDest0, pabyDest1, pabyDest2, |
6236 | | pabyDest3, nIters); |
6237 | | } |
6238 | | #else |
6239 | 0 | { |
6240 | 0 | #ifdef HAVE_SSSE3_AT_COMPILE_TIME |
6241 | 0 | if (CPLHaveRuntimeSSSE3()) |
6242 | 0 | { |
6243 | 0 | return GDALDeinterleave4Byte_SSSE3(pabySrc, pabyDest0, pabyDest1, |
6244 | 0 | pabyDest2, pabyDest3, nIters); |
6245 | 0 | } |
6246 | 0 | #endif |
6247 | | |
6248 | | // Not the optimal SSE2-only code, as gcc auto-vectorizer manages to |
6249 | | // do something slightly better. |
6250 | 0 | size_t i = 0; |
6251 | 0 | for (; i + 15 < nIters; i += 16) |
6252 | 0 | { |
6253 | 0 | __m128i xmm0_ori = _mm_loadu_si128( |
6254 | 0 | reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 0)); |
6255 | 0 | __m128i xmm1_ori = _mm_loadu_si128( |
6256 | 0 | reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 16)); |
6257 | 0 | __m128i xmm2_ori = _mm_loadu_si128( |
6258 | 0 | reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 32)); |
6259 | 0 | __m128i xmm3_ori = _mm_loadu_si128( |
6260 | 0 | reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 48)); |
6261 | |
|
6262 | 0 | _mm_storeu_si128( |
6263 | 0 | reinterpret_cast<__m128i *>(pabyDest0 + i), |
6264 | 0 | deinterleave<false, true>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori)); |
6265 | 0 | _mm_storeu_si128( |
6266 | 0 | reinterpret_cast<__m128i *>(pabyDest1 + i), |
6267 | 0 | deinterleave<true, true>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori)); |
6268 | 0 | _mm_storeu_si128( |
6269 | 0 | reinterpret_cast<__m128i *>(pabyDest2 + i), |
6270 | 0 | deinterleave<true, true>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori)); |
6271 | 0 | _mm_storeu_si128( |
6272 | 0 | reinterpret_cast<__m128i *>(pabyDest3 + i), |
6273 | 0 | deinterleave<true, false>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori)); |
6274 | 0 | } |
6275 | |
|
6276 | 0 | #if defined(__clang__) |
6277 | 0 | #pragma clang loop vectorize(disable) |
6278 | 0 | #endif |
6279 | 0 | for (; i < nIters; ++i) |
6280 | 0 | { |
6281 | 0 | pabyDest0[i] = pabySrc[4 * i + 0]; |
6282 | 0 | pabyDest1[i] = pabySrc[4 * i + 1]; |
6283 | 0 | pabyDest2[i] = pabySrc[4 * i + 2]; |
6284 | 0 | pabyDest3[i] = pabySrc[4 * i + 3]; |
6285 | 0 | } |
6286 | 0 | } |
6287 | | #endif |
6288 | | #else |
6289 | | // GCC autovectorizer does an excellent job |
6290 | | __attribute__((optimize("tree-vectorize"))) static void GDALDeinterleave4Byte( |
6291 | | const GByte *CPL_RESTRICT pabySrc, GByte *CPL_RESTRICT pabyDest0, |
6292 | | GByte *CPL_RESTRICT pabyDest1, GByte *CPL_RESTRICT pabyDest2, |
6293 | | GByte *CPL_RESTRICT pabyDest3, size_t nIters) |
6294 | | { |
6295 | | for (size_t i = 0; i < nIters; ++i) |
6296 | | { |
6297 | | pabyDest0[i] = pabySrc[4 * i + 0]; |
6298 | | pabyDest1[i] = pabySrc[4 * i + 1]; |
6299 | | pabyDest2[i] = pabySrc[4 * i + 2]; |
6300 | | pabyDest3[i] = pabySrc[4 * i + 3]; |
6301 | | } |
6302 | | } |
6303 | | #endif |
6304 | | |
6305 | | #else |
6306 | | |
6307 | | /************************************************************************/ |
6308 | | /* GDALDeinterleave3Byte() */ |
6309 | | /************************************************************************/ |
6310 | | |
6311 | | // TODO: Enabling below could help on non-Intel architectures where GCC knows |
6312 | | // how to auto-vectorize |
6313 | | // #if defined(__GNUC__) |
6314 | | //__attribute__((optimize("tree-vectorize"))) |
6315 | | // #endif |
6316 | | static void GDALDeinterleave3Byte(const GByte *CPL_RESTRICT pabySrc, |
6317 | | GByte *CPL_RESTRICT pabyDest0, |
6318 | | GByte *CPL_RESTRICT pabyDest1, |
6319 | | GByte *CPL_RESTRICT pabyDest2, size_t nIters) |
6320 | | { |
6321 | | for (size_t i = 0; i < nIters; ++i) |
6322 | | { |
6323 | | pabyDest0[i] = pabySrc[3 * i + 0]; |
6324 | | pabyDest1[i] = pabySrc[3 * i + 1]; |
6325 | | pabyDest2[i] = pabySrc[3 * i + 2]; |
6326 | | } |
6327 | | } |
6328 | | |
6329 | | /************************************************************************/ |
6330 | | /* GDALDeinterleave4Byte() */ |
6331 | | /************************************************************************/ |
6332 | | |
6333 | | // TODO: Enabling below could help on non-Intel architectures where gcc knows |
6334 | | // how to auto-vectorize |
6335 | | // #if defined(__GNUC__) |
6336 | | //__attribute__((optimize("tree-vectorize"))) |
6337 | | // #endif |
6338 | | static void GDALDeinterleave4Byte(const GByte *CPL_RESTRICT pabySrc, |
6339 | | GByte *CPL_RESTRICT pabyDest0, |
6340 | | GByte *CPL_RESTRICT pabyDest1, |
6341 | | GByte *CPL_RESTRICT pabyDest2, |
6342 | | GByte *CPL_RESTRICT pabyDest3, size_t nIters) |
6343 | | { |
6344 | | for (size_t i = 0; i < nIters; ++i) |
6345 | | { |
6346 | | pabyDest0[i] = pabySrc[4 * i + 0]; |
6347 | | pabyDest1[i] = pabySrc[4 * i + 1]; |
6348 | | pabyDest2[i] = pabySrc[4 * i + 2]; |
6349 | | pabyDest3[i] = pabySrc[4 * i + 3]; |
6350 | | } |
6351 | | } |
6352 | | |
6353 | | #endif |
6354 | | |
6355 | | /************************************************************************/ |
6356 | | /* GDALDeinterleave() */ |
6357 | | /************************************************************************/ |
6358 | | |
6359 | | /*! Copy values from a pixel-interleave buffer to multiple per-component |
6360 | | buffers. |
6361 | | |
6362 | | In pseudo-code |
6363 | | \verbatim |
6364 | | for(size_t i = 0; i < nIters; ++i) |
6365 | | for(int iComp = 0; iComp < nComponents; iComp++ ) |
6366 | | ppDestBuffer[iComp][i] = pSourceBuffer[nComponents * i + iComp] |
6367 | | \endverbatim |
6368 | | |
6369 | | The implementation is optimized for a few cases, like de-interleaving |
6370 | | of 3 or 4-components Byte buffers. |
6371 | | |
6372 | | \since GDAL 3.6 |
6373 | | */ |
6374 | | void GDALDeinterleave(const void *pSourceBuffer, GDALDataType eSourceDT, |
6375 | | int nComponents, void **ppDestBuffer, |
6376 | | GDALDataType eDestDT, size_t nIters) |
6377 | 0 | { |
6378 | 0 | if (eSourceDT == eDestDT) |
6379 | 0 | { |
6380 | 0 | if (eSourceDT == GDT_UInt8 || eSourceDT == GDT_Int8) |
6381 | 0 | { |
6382 | 0 | if (nComponents == 3) |
6383 | 0 | { |
6384 | 0 | const GByte *CPL_RESTRICT pabySrc = |
6385 | 0 | static_cast<const GByte *>(pSourceBuffer); |
6386 | 0 | GByte *CPL_RESTRICT pabyDest0 = |
6387 | 0 | static_cast<GByte *>(ppDestBuffer[0]); |
6388 | 0 | GByte *CPL_RESTRICT pabyDest1 = |
6389 | 0 | static_cast<GByte *>(ppDestBuffer[1]); |
6390 | 0 | GByte *CPL_RESTRICT pabyDest2 = |
6391 | 0 | static_cast<GByte *>(ppDestBuffer[2]); |
6392 | 0 | GDALDeinterleave3Byte(pabySrc, pabyDest0, pabyDest1, pabyDest2, |
6393 | 0 | nIters); |
6394 | 0 | return; |
6395 | 0 | } |
6396 | 0 | else if (nComponents == 4) |
6397 | 0 | { |
6398 | 0 | const GByte *CPL_RESTRICT pabySrc = |
6399 | 0 | static_cast<const GByte *>(pSourceBuffer); |
6400 | 0 | GByte *CPL_RESTRICT pabyDest0 = |
6401 | 0 | static_cast<GByte *>(ppDestBuffer[0]); |
6402 | 0 | GByte *CPL_RESTRICT pabyDest1 = |
6403 | 0 | static_cast<GByte *>(ppDestBuffer[1]); |
6404 | 0 | GByte *CPL_RESTRICT pabyDest2 = |
6405 | 0 | static_cast<GByte *>(ppDestBuffer[2]); |
6406 | 0 | GByte *CPL_RESTRICT pabyDest3 = |
6407 | 0 | static_cast<GByte *>(ppDestBuffer[3]); |
6408 | 0 | GDALDeinterleave4Byte(pabySrc, pabyDest0, pabyDest1, pabyDest2, |
6409 | 0 | pabyDest3, nIters); |
6410 | 0 | return; |
6411 | 0 | } |
6412 | 0 | } |
6413 | | #if ((defined(__GNUC__) && !defined(__clang__)) || \ |
6414 | | defined(__INTEL_CLANG_COMPILER)) && \ |
6415 | | defined(HAVE_SSE2) && defined(HAVE_SSSE3_AT_COMPILE_TIME) |
6416 | | else if ((eSourceDT == GDT_Int16 || eSourceDT == GDT_UInt16) && |
6417 | | CPLHaveRuntimeSSSE3()) |
6418 | | { |
6419 | | if (nComponents == 3) |
6420 | | { |
6421 | | const GUInt16 *CPL_RESTRICT panSrc = |
6422 | | static_cast<const GUInt16 *>(pSourceBuffer); |
6423 | | GUInt16 *CPL_RESTRICT panDest0 = |
6424 | | static_cast<GUInt16 *>(ppDestBuffer[0]); |
6425 | | GUInt16 *CPL_RESTRICT panDest1 = |
6426 | | static_cast<GUInt16 *>(ppDestBuffer[1]); |
6427 | | GUInt16 *CPL_RESTRICT panDest2 = |
6428 | | static_cast<GUInt16 *>(ppDestBuffer[2]); |
6429 | | GDALDeinterleave3UInt16_SSSE3(panSrc, panDest0, panDest1, |
6430 | | panDest2, nIters); |
6431 | | return; |
6432 | | } |
6433 | | #if !defined(__INTEL_CLANG_COMPILER) |
6434 | | // ICC autovectorizer doesn't do a good job, at least with icx |
6435 | | // 2022.1.0.20220316 |
6436 | | else if (nComponents == 4) |
6437 | | { |
6438 | | const GUInt16 *CPL_RESTRICT panSrc = |
6439 | | static_cast<const GUInt16 *>(pSourceBuffer); |
6440 | | GUInt16 *CPL_RESTRICT panDest0 = |
6441 | | static_cast<GUInt16 *>(ppDestBuffer[0]); |
6442 | | GUInt16 *CPL_RESTRICT panDest1 = |
6443 | | static_cast<GUInt16 *>(ppDestBuffer[1]); |
6444 | | GUInt16 *CPL_RESTRICT panDest2 = |
6445 | | static_cast<GUInt16 *>(ppDestBuffer[2]); |
6446 | | GUInt16 *CPL_RESTRICT panDest3 = |
6447 | | static_cast<GUInt16 *>(ppDestBuffer[3]); |
6448 | | GDALDeinterleave4UInt16_SSSE3(panSrc, panDest0, panDest1, |
6449 | | panDest2, panDest3, nIters); |
6450 | | return; |
6451 | | } |
6452 | | #endif |
6453 | | } |
6454 | | #endif |
6455 | 0 | } |
6456 | | |
6457 | 0 | const int nSourceDTSize = GDALGetDataTypeSizeBytes(eSourceDT); |
6458 | 0 | const int nDestDTSize = GDALGetDataTypeSizeBytes(eDestDT); |
6459 | 0 | for (int iComp = 0; iComp < nComponents; iComp++) |
6460 | 0 | { |
6461 | 0 | GDALCopyWords64(static_cast<const GByte *>(pSourceBuffer) + |
6462 | 0 | iComp * nSourceDTSize, |
6463 | 0 | eSourceDT, nComponents * nSourceDTSize, |
6464 | 0 | ppDestBuffer[iComp], eDestDT, nDestDTSize, nIters); |
6465 | 0 | } |
6466 | 0 | } |
6467 | | |
6468 | | /************************************************************************/ |
6469 | | /* GDALTranspose2DSingleToSingle() */ |
6470 | | /************************************************************************/ |
6471 | | /** |
6472 | | * Transpose a 2D array of non-complex values, in a efficient (cache-oblivious) way. |
6473 | | * |
6474 | | * @param pSrc Source array of height = nSrcHeight and width = nSrcWidth. |
6475 | | * @param pDst Destination transposed array of height = nSrcWidth and width = nSrcHeight. |
6476 | | * @param nSrcWidth Width of pSrc array. |
6477 | | * @param nSrcHeight Height of pSrc array. |
6478 | | */ |
6479 | | |
6480 | | template <class DST, class SRC> |
6481 | | void GDALTranspose2DSingleToSingle(const SRC *CPL_RESTRICT pSrc, |
6482 | | DST *CPL_RESTRICT pDst, size_t nSrcWidth, |
6483 | | size_t nSrcHeight) |
6484 | 0 | { |
6485 | 0 | constexpr size_t blocksize = 32; |
6486 | 0 | for (size_t i = 0; i < nSrcHeight; i += blocksize) |
6487 | 0 | { |
6488 | 0 | const size_t max_k = std::min(i + blocksize, nSrcHeight); |
6489 | 0 | for (size_t j = 0; j < nSrcWidth; j += blocksize) |
6490 | 0 | { |
6491 | | // transpose the block beginning at [i,j] |
6492 | 0 | const size_t max_l = std::min(j + blocksize, nSrcWidth); |
6493 | 0 | for (size_t k = i; k < max_k; ++k) |
6494 | 0 | { |
6495 | 0 | for (size_t l = j; l < max_l; ++l) |
6496 | 0 | { |
6497 | 0 | GDALCopyWord(pSrc[l + k * nSrcWidth], |
6498 | 0 | pDst[k + l * nSrcHeight]); |
6499 | 0 | } |
6500 | 0 | } |
6501 | 0 | } |
6502 | 0 | } |
6503 | 0 | } Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, unsigned char>(unsigned char const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, signed char>(signed char const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, unsigned short>(unsigned short const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, short>(short const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, unsigned int>(unsigned int const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, int>(int const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, unsigned long>(unsigned long const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, long>(long const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, cpl::Float16>(cpl::Float16 const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, float>(float const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, double>(double const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, unsigned char>(unsigned char const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, signed char>(signed char const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, unsigned short>(unsigned short const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, short>(short const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, unsigned int>(unsigned int const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, int>(int const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, unsigned long>(unsigned long const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, long>(long const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, cpl::Float16>(cpl::Float16 const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, float>(float const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, double>(double const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, unsigned char>(unsigned char const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, signed char>(signed char const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, unsigned short>(unsigned short const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, short>(short const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, unsigned int>(unsigned int const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, int>(int const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, unsigned long>(unsigned long const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, long>(long const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, cpl::Float16>(cpl::Float16 const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, float>(float const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, double>(double const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, unsigned char>(unsigned char const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, signed char>(signed char const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, unsigned short>(unsigned short const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, short>(short const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, unsigned int>(unsigned int const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, int>(int const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, unsigned long>(unsigned long const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, long>(long const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, cpl::Float16>(cpl::Float16 const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, float>(float const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, double>(double const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, unsigned char>(unsigned char const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, signed char>(signed char const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, unsigned short>(unsigned short const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, short>(short const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, unsigned int>(unsigned int const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, int>(int const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, unsigned long>(unsigned long const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, long>(long const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, cpl::Float16>(cpl::Float16 const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, float>(float const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, double>(double const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, unsigned char>(unsigned char const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, signed char>(signed char const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, unsigned short>(unsigned short const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, short>(short const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, unsigned int>(unsigned int const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, int>(int const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, unsigned long>(unsigned long const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, long>(long const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, cpl::Float16>(cpl::Float16 const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, float>(float const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, double>(double const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, unsigned char>(unsigned char const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, signed char>(signed char const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, unsigned short>(unsigned short const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, short>(short const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, unsigned int>(unsigned int const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, int>(int const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, unsigned long>(unsigned long const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, long>(long const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, cpl::Float16>(cpl::Float16 const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, float>(float const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, double>(double const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, unsigned char>(unsigned char const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, signed char>(signed char const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, unsigned short>(unsigned short const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, short>(short const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, unsigned int>(unsigned int const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, int>(int const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, unsigned long>(unsigned long const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, long>(long const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, cpl::Float16>(cpl::Float16 const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, float>(float const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, double>(double const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, unsigned char>(unsigned char const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, signed char>(signed char const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, unsigned short>(unsigned short const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, short>(short const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, unsigned int>(unsigned int const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, int>(int const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, unsigned long>(unsigned long const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, long>(long const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, cpl::Float16>(cpl::Float16 const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, float>(float const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, double>(double const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, unsigned char>(unsigned char const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, signed char>(signed char const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, unsigned short>(unsigned short const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, short>(short const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, unsigned int>(unsigned int const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, int>(int const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, unsigned long>(unsigned long const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, long>(long const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, cpl::Float16>(cpl::Float16 const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, float>(float const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, double>(double const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, unsigned char>(unsigned char const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, signed char>(signed char const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, unsigned short>(unsigned short const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, short>(short const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, unsigned int>(unsigned int const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, int>(int const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, unsigned long>(unsigned long const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, long>(long const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, cpl::Float16>(cpl::Float16 const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, float>(float const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, double>(double const*, double*, unsigned long, unsigned long) |
6504 | | |
6505 | | /************************************************************************/ |
6506 | | /* GDALTranspose2DComplexToComplex() */ |
6507 | | /************************************************************************/ |
6508 | | /** |
6509 | | * Transpose a 2D array of complex values into an array of complex values, |
6510 | | * in a efficient (cache-oblivious) way. |
6511 | | * |
6512 | | * @param pSrc Source array of height = nSrcHeight and width = nSrcWidth. |
6513 | | * @param pDst Destination transposed array of height = nSrcWidth and width = nSrcHeight. |
6514 | | * @param nSrcWidth Width of pSrc array. |
6515 | | * @param nSrcHeight Height of pSrc array. |
6516 | | */ |
6517 | | template <class DST, class SRC> |
6518 | | void GDALTranspose2DComplexToComplex(const SRC *CPL_RESTRICT pSrc, |
6519 | | DST *CPL_RESTRICT pDst, size_t nSrcWidth, |
6520 | | size_t nSrcHeight) |
6521 | 0 | { |
6522 | 0 | constexpr size_t blocksize = 32; |
6523 | 0 | for (size_t i = 0; i < nSrcHeight; i += blocksize) |
6524 | 0 | { |
6525 | 0 | const size_t max_k = std::min(i + blocksize, nSrcHeight); |
6526 | 0 | for (size_t j = 0; j < nSrcWidth; j += blocksize) |
6527 | 0 | { |
6528 | | // transpose the block beginning at [i,j] |
6529 | 0 | const size_t max_l = std::min(j + blocksize, nSrcWidth); |
6530 | 0 | for (size_t k = i; k < max_k; ++k) |
6531 | 0 | { |
6532 | 0 | for (size_t l = j; l < max_l; ++l) |
6533 | 0 | { |
6534 | 0 | GDALCopyWord(pSrc[2 * (l + k * nSrcWidth) + 0], |
6535 | 0 | pDst[2 * (k + l * nSrcHeight) + 0]); |
6536 | 0 | GDALCopyWord(pSrc[2 * (l + k * nSrcWidth) + 1], |
6537 | 0 | pDst[2 * (k + l * nSrcHeight) + 1]); |
6538 | 0 | } |
6539 | 0 | } |
6540 | 0 | } |
6541 | 0 | } |
6542 | 0 | } Unexecuted instantiation: void GDALTranspose2DComplexToComplex<short, short>(short const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<short, int>(int const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<short, cpl::Float16>(cpl::Float16 const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<short, float>(float const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<short, double>(double const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<int, short>(short const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<int, int>(int const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<int, cpl::Float16>(cpl::Float16 const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<int, float>(float const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<int, double>(double const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<cpl::Float16, short>(short const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<cpl::Float16, int>(int const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<cpl::Float16, cpl::Float16>(cpl::Float16 const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<cpl::Float16, float>(float const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<cpl::Float16, double>(double const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<float, short>(short const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<float, int>(int const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<float, cpl::Float16>(cpl::Float16 const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<float, float>(float const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<float, double>(double const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<double, short>(short const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<double, int>(int const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<double, cpl::Float16>(cpl::Float16 const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<double, float>(float const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<double, double>(double const*, double*, unsigned long, unsigned long) |
6543 | | |
6544 | | /************************************************************************/ |
6545 | | /* GDALTranspose2DComplexToSingle() */ |
6546 | | /************************************************************************/ |
6547 | | /** |
6548 | | * Transpose a 2D array of complex values into an array of non-complex values, |
6549 | | * in a efficient (cache-oblivious) way. |
6550 | | * |
6551 | | * @param pSrc Source array of height = nSrcHeight and width = nSrcWidth. |
6552 | | * @param pDst Destination transposed array of height = nSrcWidth and width = nSrcHeight. |
6553 | | * @param nSrcWidth Width of pSrc array. |
6554 | | * @param nSrcHeight Height of pSrc array. |
6555 | | */ |
6556 | | template <class DST, class SRC> |
6557 | | void GDALTranspose2DComplexToSingle(const SRC *CPL_RESTRICT pSrc, |
6558 | | DST *CPL_RESTRICT pDst, size_t nSrcWidth, |
6559 | | size_t nSrcHeight) |
6560 | 0 | { |
6561 | 0 | constexpr size_t blocksize = 32; |
6562 | 0 | for (size_t i = 0; i < nSrcHeight; i += blocksize) |
6563 | 0 | { |
6564 | 0 | const size_t max_k = std::min(i + blocksize, nSrcHeight); |
6565 | 0 | for (size_t j = 0; j < nSrcWidth; j += blocksize) |
6566 | 0 | { |
6567 | | // transpose the block beginning at [i,j] |
6568 | 0 | const size_t max_l = std::min(j + blocksize, nSrcWidth); |
6569 | 0 | for (size_t k = i; k < max_k; ++k) |
6570 | 0 | { |
6571 | 0 | for (size_t l = j; l < max_l; ++l) |
6572 | 0 | { |
6573 | 0 | GDALCopyWord(pSrc[2 * (l + k * nSrcWidth) + 0], |
6574 | 0 | pDst[k + l * nSrcHeight]); |
6575 | 0 | } |
6576 | 0 | } |
6577 | 0 | } |
6578 | 0 | } |
6579 | 0 | } Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned char, short>(short const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned char, int>(int const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned char, cpl::Float16>(cpl::Float16 const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned char, float>(float const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned char, double>(double const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<signed char, short>(short const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<signed char, int>(int const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<signed char, cpl::Float16>(cpl::Float16 const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<signed char, float>(float const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<signed char, double>(double const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned short, short>(short const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned short, int>(int const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned short, cpl::Float16>(cpl::Float16 const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned short, float>(float const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned short, double>(double const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<short, short>(short const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<short, int>(int const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<short, cpl::Float16>(cpl::Float16 const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<short, float>(float const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<short, double>(double const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned int, short>(short const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned int, int>(int const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned int, cpl::Float16>(cpl::Float16 const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned int, float>(float const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned int, double>(double const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<int, short>(short const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<int, int>(int const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<int, cpl::Float16>(cpl::Float16 const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<int, float>(float const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<int, double>(double const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned long, short>(short const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned long, int>(int const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned long, cpl::Float16>(cpl::Float16 const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned long, float>(float const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned long, double>(double const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<long, short>(short const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<long, int>(int const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<long, cpl::Float16>(cpl::Float16 const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<long, float>(float const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<long, double>(double const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<cpl::Float16, short>(short const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<cpl::Float16, int>(int const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<cpl::Float16, cpl::Float16>(cpl::Float16 const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<cpl::Float16, float>(float const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<cpl::Float16, double>(double const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<float, short>(short const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<float, int>(int const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<float, cpl::Float16>(cpl::Float16 const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<float, float>(float const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<float, double>(double const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<double, short>(short const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<double, int>(int const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<double, cpl::Float16>(cpl::Float16 const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<double, float>(float const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<double, double>(double const*, double*, unsigned long, unsigned long) |
6580 | | |
6581 | | /************************************************************************/ |
6582 | | /* GDALTranspose2DSingleToComplex() */ |
6583 | | /************************************************************************/ |
6584 | | /** |
6585 | | * Transpose a 2D array of non-complex values into an array of complex values, |
6586 | | * in a efficient (cache-oblivious) way. |
6587 | | * |
6588 | | * @param pSrc Source array of height = nSrcHeight and width = nSrcWidth. |
6589 | | * @param pDst Destination transposed array of height = nSrcWidth and width = nSrcHeight. |
6590 | | * @param nSrcWidth Width of pSrc array. |
6591 | | * @param nSrcHeight Height of pSrc array. |
6592 | | */ |
6593 | | template <class DST, class SRC> |
6594 | | void GDALTranspose2DSingleToComplex(const SRC *CPL_RESTRICT pSrc, |
6595 | | DST *CPL_RESTRICT pDst, size_t nSrcWidth, |
6596 | | size_t nSrcHeight) |
6597 | 0 | { |
6598 | 0 | constexpr size_t blocksize = 32; |
6599 | 0 | for (size_t i = 0; i < nSrcHeight; i += blocksize) |
6600 | 0 | { |
6601 | 0 | const size_t max_k = std::min(i + blocksize, nSrcHeight); |
6602 | 0 | for (size_t j = 0; j < nSrcWidth; j += blocksize) |
6603 | 0 | { |
6604 | | // transpose the block beginning at [i,j] |
6605 | 0 | const size_t max_l = std::min(j + blocksize, nSrcWidth); |
6606 | 0 | for (size_t k = i; k < max_k; ++k) |
6607 | 0 | { |
6608 | 0 | for (size_t l = j; l < max_l; ++l) |
6609 | 0 | { |
6610 | 0 | GDALCopyWord(pSrc[l + k * nSrcWidth], |
6611 | 0 | pDst[2 * (k + l * nSrcHeight) + 0]); |
6612 | 0 | pDst[2 * (k + l * nSrcHeight) + 1] = 0; |
6613 | 0 | } |
6614 | 0 | } |
6615 | 0 | } |
6616 | 0 | } |
6617 | 0 | } Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, unsigned char>(unsigned char const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, signed char>(signed char const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, unsigned short>(unsigned short const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, short>(short const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, unsigned int>(unsigned int const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, int>(int const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, unsigned long>(unsigned long const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, long>(long const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, cpl::Float16>(cpl::Float16 const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, float>(float const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, double>(double const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, unsigned char>(unsigned char const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, signed char>(signed char const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, unsigned short>(unsigned short const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, short>(short const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, unsigned int>(unsigned int const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, int>(int const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, unsigned long>(unsigned long const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, long>(long const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, cpl::Float16>(cpl::Float16 const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, float>(float const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, double>(double const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, unsigned char>(unsigned char const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, signed char>(signed char const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, unsigned short>(unsigned short const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, short>(short const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, unsigned int>(unsigned int const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, int>(int const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, unsigned long>(unsigned long const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, long>(long const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, cpl::Float16>(cpl::Float16 const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, float>(float const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, double>(double const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, unsigned char>(unsigned char const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, signed char>(signed char const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, unsigned short>(unsigned short const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, short>(short const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, unsigned int>(unsigned int const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, int>(int const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, unsigned long>(unsigned long const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, long>(long const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, cpl::Float16>(cpl::Float16 const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, float>(float const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, double>(double const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, unsigned char>(unsigned char const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, signed char>(signed char const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, unsigned short>(unsigned short const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, short>(short const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, unsigned int>(unsigned int const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, int>(int const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, unsigned long>(unsigned long const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, long>(long const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, cpl::Float16>(cpl::Float16 const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, float>(float const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, double>(double const*, double*, unsigned long, unsigned long) |
6618 | | |
6619 | | /************************************************************************/ |
6620 | | /* GDALTranspose2D() */ |
6621 | | /************************************************************************/ |
6622 | | |
6623 | | template <class DST, bool DST_IS_COMPLEX> |
6624 | | static void GDALTranspose2D(const void *pSrc, GDALDataType eSrcType, DST *pDst, |
6625 | | size_t nSrcWidth, size_t nSrcHeight) |
6626 | 0 | { |
6627 | 0 | #define CALL_GDALTranspose2D_internal(SRC_TYPE) \ |
6628 | 0 | do \ |
6629 | 0 | { \ |
6630 | 0 | if constexpr (DST_IS_COMPLEX) \ |
6631 | 0 | { \ |
6632 | 0 | GDALTranspose2DSingleToComplex( \ |
6633 | 0 | static_cast<const SRC_TYPE *>(pSrc), pDst, nSrcWidth, \ |
6634 | 0 | nSrcHeight); \ |
6635 | 0 | } \ |
6636 | 0 | else \ |
6637 | 0 | { \ |
6638 | 0 | GDALTranspose2DSingleToSingle(static_cast<const SRC_TYPE *>(pSrc), \ |
6639 | 0 | pDst, nSrcWidth, nSrcHeight); \ |
6640 | 0 | } \ |
6641 | 0 | } while (0) |
6642 | |
|
6643 | 0 | #define CALL_GDALTranspose2DComplex_internal(SRC_TYPE) \ |
6644 | 0 | do \ |
6645 | 0 | { \ |
6646 | 0 | if constexpr (DST_IS_COMPLEX) \ |
6647 | 0 | { \ |
6648 | 0 | GDALTranspose2DComplexToComplex( \ |
6649 | 0 | static_cast<const SRC_TYPE *>(pSrc), pDst, nSrcWidth, \ |
6650 | 0 | nSrcHeight); \ |
6651 | 0 | } \ |
6652 | 0 | else \ |
6653 | 0 | { \ |
6654 | 0 | GDALTranspose2DComplexToSingle( \ |
6655 | 0 | static_cast<const SRC_TYPE *>(pSrc), pDst, nSrcWidth, \ |
6656 | 0 | nSrcHeight); \ |
6657 | 0 | } \ |
6658 | 0 | } while (0) |
6659 | | |
6660 | | // clang-format off |
6661 | 0 | switch (eSrcType) |
6662 | 0 | { |
6663 | 0 | case GDT_UInt8: CALL_GDALTranspose2D_internal(uint8_t); break; |
6664 | 0 | case GDT_Int8: CALL_GDALTranspose2D_internal(int8_t); break; |
6665 | 0 | case GDT_UInt16: CALL_GDALTranspose2D_internal(uint16_t); break; |
6666 | 0 | case GDT_Int16: CALL_GDALTranspose2D_internal(int16_t); break; |
6667 | 0 | case GDT_UInt32: CALL_GDALTranspose2D_internal(uint32_t); break; |
6668 | 0 | case GDT_Int32: CALL_GDALTranspose2D_internal(int32_t); break; |
6669 | 0 | case GDT_UInt64: CALL_GDALTranspose2D_internal(uint64_t); break; |
6670 | 0 | case GDT_Int64: CALL_GDALTranspose2D_internal(int64_t); break; |
6671 | 0 | case GDT_Float16: CALL_GDALTranspose2D_internal(GFloat16); break; |
6672 | 0 | case GDT_Float32: CALL_GDALTranspose2D_internal(float); break; |
6673 | 0 | case GDT_Float64: CALL_GDALTranspose2D_internal(double); break; |
6674 | 0 | case GDT_CInt16: CALL_GDALTranspose2DComplex_internal(int16_t); break; |
6675 | 0 | case GDT_CInt32: CALL_GDALTranspose2DComplex_internal(int32_t); break; |
6676 | 0 | case GDT_CFloat16: CALL_GDALTranspose2DComplex_internal(GFloat16); break; |
6677 | 0 | case GDT_CFloat32: CALL_GDALTranspose2DComplex_internal(float); break; |
6678 | 0 | case GDT_CFloat64: CALL_GDALTranspose2DComplex_internal(double); break; |
6679 | 0 | case GDT_Unknown: |
6680 | 0 | case GDT_TypeCount: |
6681 | 0 | break; |
6682 | 0 | } |
6683 | | // clang-format on |
6684 | |
|
6685 | 0 | #undef CALL_GDALTranspose2D_internal |
6686 | 0 | #undef CALL_GDALTranspose2DComplex_internal |
6687 | 0 | } Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<unsigned char, false>(void const*, GDALDataType, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<signed char, false>(void const*, GDALDataType, signed char*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<unsigned short, false>(void const*, GDALDataType, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<short, false>(void const*, GDALDataType, short*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<unsigned int, false>(void const*, GDALDataType, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<int, false>(void const*, GDALDataType, int*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<unsigned long, false>(void const*, GDALDataType, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<long, false>(void const*, GDALDataType, long*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<cpl::Float16, false>(void const*, GDALDataType, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<float, false>(void const*, GDALDataType, float*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<double, false>(void const*, GDALDataType, double*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<short, true>(void const*, GDALDataType, short*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<int, true>(void const*, GDALDataType, int*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<cpl::Float16, true>(void const*, GDALDataType, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<float, true>(void const*, GDALDataType, float*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<double, true>(void const*, GDALDataType, double*, unsigned long, unsigned long) |
6688 | | |
6689 | | /************************************************************************/ |
6690 | | /* GDALInterleave2Byte() */ |
6691 | | /************************************************************************/ |
6692 | | |
6693 | | #if defined(HAVE_SSE2) && \ |
6694 | | (!defined(__GNUC__) || defined(__INTEL_CLANG_COMPILER)) |
6695 | | |
6696 | | // ICC autovectorizer doesn't do a good job at generating good SSE code, |
6697 | | // at least with icx 2024.0.2.20231213, but it nicely unrolls the below loop. |
6698 | | #if defined(__GNUC__) |
6699 | | __attribute__((noinline)) |
6700 | | #endif |
6701 | | static void |
6702 | | GDALInterleave2Byte(const uint8_t *CPL_RESTRICT pSrc, |
6703 | | uint8_t *CPL_RESTRICT pDst, size_t nIters) |
6704 | | { |
6705 | | size_t i = 0; |
6706 | | constexpr size_t VALS_PER_ITER = 16; |
6707 | | for (i = 0; i + VALS_PER_ITER <= nIters; i += VALS_PER_ITER) |
6708 | | { |
6709 | | __m128i xmm0 = |
6710 | | _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + i)); |
6711 | | __m128i xmm1 = _mm_loadu_si128( |
6712 | | reinterpret_cast<__m128i const *>(pSrc + i + nIters)); |
6713 | | _mm_storeu_si128(reinterpret_cast<__m128i *>(pDst + 2 * i), |
6714 | | _mm_unpacklo_epi8(xmm0, xmm1)); |
6715 | | _mm_storeu_si128( |
6716 | | reinterpret_cast<__m128i *>(pDst + 2 * i + VALS_PER_ITER), |
6717 | | _mm_unpackhi_epi8(xmm0, xmm1)); |
6718 | | } |
6719 | | #if defined(__clang__) |
6720 | | #pragma clang loop vectorize(disable) |
6721 | | #endif |
6722 | | for (; i < nIters; ++i) |
6723 | | { |
6724 | | pDst[2 * i + 0] = pSrc[i + 0 * nIters]; |
6725 | | pDst[2 * i + 1] = pSrc[i + 1 * nIters]; |
6726 | | } |
6727 | | } |
6728 | | |
6729 | | #else |
6730 | | |
6731 | | #if defined(__GNUC__) && !defined(__clang__) |
6732 | | __attribute__((optimize("tree-vectorize"))) |
6733 | | #endif |
6734 | | #if defined(__GNUC__) |
6735 | | __attribute__((noinline)) |
6736 | | #endif |
6737 | | #if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER) |
6738 | | // clang++ -O2 -fsanitize=undefined fails to vectorize, ignore that warning |
6739 | | #pragma clang diagnostic push |
6740 | | #pragma clang diagnostic ignored "-Wpass-failed" |
6741 | | #endif |
6742 | | static void |
6743 | | GDALInterleave2Byte(const uint8_t *CPL_RESTRICT pSrc, |
6744 | | uint8_t *CPL_RESTRICT pDst, size_t nIters) |
6745 | 0 | { |
6746 | 0 | #if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER) |
6747 | 0 | #pragma clang loop vectorize(enable) |
6748 | 0 | #endif |
6749 | 0 | for (size_t i = 0; i < nIters; ++i) |
6750 | 0 | { |
6751 | 0 | pDst[2 * i + 0] = pSrc[i + 0 * nIters]; |
6752 | 0 | pDst[2 * i + 1] = pSrc[i + 1 * nIters]; |
6753 | 0 | } |
6754 | 0 | } |
6755 | | #if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER) |
6756 | | #pragma clang diagnostic pop |
6757 | | #endif |
6758 | | |
6759 | | #endif |
6760 | | |
6761 | | /************************************************************************/ |
6762 | | /* GDALInterleave4Byte() */ |
6763 | | /************************************************************************/ |
6764 | | |
6765 | | #if defined(HAVE_SSE2) && \ |
6766 | | (!defined(__GNUC__) || defined(__INTEL_CLANG_COMPILER)) |
6767 | | |
6768 | | // ICC autovectorizer doesn't do a good job at generating good SSE code, |
6769 | | // at least with icx 2024.0.2.20231213, but it nicely unrolls the below loop. |
6770 | | #if defined(__GNUC__) |
6771 | | __attribute__((noinline)) |
6772 | | #endif |
6773 | | static void |
6774 | | GDALInterleave4Byte(const uint8_t *CPL_RESTRICT pSrc, |
6775 | | uint8_t *CPL_RESTRICT pDst, size_t nIters) |
6776 | | { |
6777 | | size_t i = 0; |
6778 | | constexpr size_t VALS_PER_ITER = 16; |
6779 | | for (i = 0; i + VALS_PER_ITER <= nIters; i += VALS_PER_ITER) |
6780 | | { |
6781 | | __m128i xmm0 = _mm_loadu_si128( |
6782 | | reinterpret_cast<__m128i const *>(pSrc + i + 0 * nIters)); |
6783 | | __m128i xmm1 = _mm_loadu_si128( |
6784 | | reinterpret_cast<__m128i const *>(pSrc + i + 1 * nIters)); |
6785 | | __m128i xmm2 = _mm_loadu_si128( |
6786 | | reinterpret_cast<__m128i const *>(pSrc + i + 2 * nIters)); |
6787 | | __m128i xmm3 = _mm_loadu_si128( |
6788 | | reinterpret_cast<__m128i const *>(pSrc + i + 3 * nIters)); |
6789 | | auto tmp0 = _mm_unpacklo_epi8( |
6790 | | xmm0, |
6791 | | xmm1); // (xmm0_0, xmm1_0, xmm0_1, xmm1_1, xmm0_2, xmm1_2, ...) |
6792 | | auto tmp1 = _mm_unpackhi_epi8( |
6793 | | xmm0, |
6794 | | xmm1); // (xmm0_8, xmm1_8, xmm0_9, xmm1_9, xmm0_10, xmm1_10, ...) |
6795 | | auto tmp2 = _mm_unpacklo_epi8( |
6796 | | xmm2, |
6797 | | xmm3); // (xmm2_0, xmm3_0, xmm2_1, xmm3_1, xmm2_2, xmm3_2, ...) |
6798 | | auto tmp3 = _mm_unpackhi_epi8( |
6799 | | xmm2, |
6800 | | xmm3); // (xmm2_8, xmm3_8, xmm2_9, xmm3_9, xmm2_10, xmm3_10, ...) |
6801 | | auto tmp2_0 = _mm_unpacklo_epi16( |
6802 | | tmp0, |
6803 | | tmp2); // (xmm0_0, xmm1_0, xmm2_0, xmm3_0, xmm0_1, xmm1_1, xmm2_1, xmm3_1, ...) |
6804 | | auto tmp2_1 = _mm_unpackhi_epi16(tmp0, tmp2); |
6805 | | auto tmp2_2 = _mm_unpacklo_epi16(tmp1, tmp3); |
6806 | | auto tmp2_3 = _mm_unpackhi_epi16(tmp1, tmp3); |
6807 | | _mm_storeu_si128( |
6808 | | reinterpret_cast<__m128i *>(pDst + 4 * i + 0 * VALS_PER_ITER), |
6809 | | tmp2_0); |
6810 | | _mm_storeu_si128( |
6811 | | reinterpret_cast<__m128i *>(pDst + 4 * i + 1 * VALS_PER_ITER), |
6812 | | tmp2_1); |
6813 | | _mm_storeu_si128( |
6814 | | reinterpret_cast<__m128i *>(pDst + 4 * i + 2 * VALS_PER_ITER), |
6815 | | tmp2_2); |
6816 | | _mm_storeu_si128( |
6817 | | reinterpret_cast<__m128i *>(pDst + 4 * i + 3 * VALS_PER_ITER), |
6818 | | tmp2_3); |
6819 | | } |
6820 | | #if defined(__clang__) |
6821 | | #pragma clang loop vectorize(disable) |
6822 | | #endif |
6823 | | for (; i < nIters; ++i) |
6824 | | { |
6825 | | pDst[4 * i + 0] = pSrc[i + 0 * nIters]; |
6826 | | pDst[4 * i + 1] = pSrc[i + 1 * nIters]; |
6827 | | pDst[4 * i + 2] = pSrc[i + 2 * nIters]; |
6828 | | pDst[4 * i + 3] = pSrc[i + 3 * nIters]; |
6829 | | } |
6830 | | } |
6831 | | |
6832 | | #else |
6833 | | |
6834 | | #if defined(__GNUC__) && !defined(__clang__) |
6835 | | __attribute__((optimize("tree-vectorize"))) |
6836 | | #endif |
6837 | | #if defined(__GNUC__) |
6838 | | __attribute__((noinline)) |
6839 | | #endif |
6840 | | #if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER) |
6841 | | // clang++ -O2 -fsanitize=undefined fails to vectorize, ignore that warning |
6842 | | #pragma clang diagnostic push |
6843 | | #pragma clang diagnostic ignored "-Wpass-failed" |
6844 | | #endif |
6845 | | static void |
6846 | | GDALInterleave4Byte(const uint8_t *CPL_RESTRICT pSrc, |
6847 | | uint8_t *CPL_RESTRICT pDst, size_t nIters) |
6848 | 0 | { |
6849 | 0 | #if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER) |
6850 | 0 | #pragma clang loop vectorize(enable) |
6851 | 0 | #endif |
6852 | 0 | for (size_t i = 0; i < nIters; ++i) |
6853 | 0 | { |
6854 | 0 | pDst[4 * i + 0] = pSrc[i + 0 * nIters]; |
6855 | 0 | pDst[4 * i + 1] = pSrc[i + 1 * nIters]; |
6856 | 0 | pDst[4 * i + 2] = pSrc[i + 2 * nIters]; |
6857 | 0 | pDst[4 * i + 3] = pSrc[i + 3 * nIters]; |
6858 | 0 | } |
6859 | 0 | } |
6860 | | #if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER) |
6861 | | #pragma clang diagnostic pop |
6862 | | #endif |
6863 | | |
6864 | | #endif |
6865 | | |
6866 | | /************************************************************************/ |
6867 | | /* GDALTranspose2D() */ |
6868 | | /************************************************************************/ |
6869 | | |
6870 | | /** |
6871 | | * Transpose a 2D array in a efficient (cache-oblivious) way. |
6872 | | * |
6873 | | * @param pSrc Source array of width = nSrcWidth and height = nSrcHeight. |
6874 | | * @param eSrcType Data type of pSrc. |
6875 | | * @param pDst Destination transposed array of width = nSrcHeight and height = nSrcWidth. |
6876 | | * @param eDstType Data type of pDst. |
6877 | | * @param nSrcWidth Width of pSrc array. |
6878 | | * @param nSrcHeight Height of pSrc array. |
6879 | | * @since GDAL 3.11 |
6880 | | */ |
6881 | | |
6882 | | void GDALTranspose2D(const void *pSrc, GDALDataType eSrcType, void *pDst, |
6883 | | GDALDataType eDstType, size_t nSrcWidth, size_t nSrcHeight) |
6884 | 0 | { |
6885 | 0 | if (eSrcType == eDstType && (eSrcType == GDT_UInt8 || eSrcType == GDT_Int8)) |
6886 | 0 | { |
6887 | 0 | if (nSrcHeight == 2) |
6888 | 0 | { |
6889 | 0 | GDALInterleave2Byte(static_cast<const uint8_t *>(pSrc), |
6890 | 0 | static_cast<uint8_t *>(pDst), nSrcWidth); |
6891 | 0 | return; |
6892 | 0 | } |
6893 | 0 | if (nSrcHeight == 4) |
6894 | 0 | { |
6895 | 0 | GDALInterleave4Byte(static_cast<const uint8_t *>(pSrc), |
6896 | 0 | static_cast<uint8_t *>(pDst), nSrcWidth); |
6897 | 0 | return; |
6898 | 0 | } |
6899 | 0 | #if (defined(HAVE_SSSE3_AT_COMPILE_TIME) && \ |
6900 | 0 | (defined(__x86_64) || defined(_M_X64))) |
6901 | 0 | if (CPLHaveRuntimeSSSE3()) |
6902 | 0 | { |
6903 | 0 | GDALTranspose2D_Byte_SSSE3(static_cast<const uint8_t *>(pSrc), |
6904 | 0 | static_cast<uint8_t *>(pDst), nSrcWidth, |
6905 | 0 | nSrcHeight); |
6906 | 0 | return; |
6907 | 0 | } |
6908 | | #elif defined(USE_NEON_OPTIMIZATIONS) |
6909 | | { |
6910 | | GDALTranspose2D_Byte_SSSE3(static_cast<const uint8_t *>(pSrc), |
6911 | | static_cast<uint8_t *>(pDst), nSrcWidth, |
6912 | | nSrcHeight); |
6913 | | return; |
6914 | | } |
6915 | | #endif |
6916 | 0 | } |
6917 | | |
6918 | 0 | #define CALL_GDALTranspose2D_internal(DST_TYPE, DST_IS_COMPLEX) \ |
6919 | 0 | GDALTranspose2D<DST_TYPE, DST_IS_COMPLEX>( \ |
6920 | 0 | pSrc, eSrcType, static_cast<DST_TYPE *>(pDst), nSrcWidth, nSrcHeight) |
6921 | | |
6922 | | // clang-format off |
6923 | 0 | switch (eDstType) |
6924 | 0 | { |
6925 | 0 | case GDT_UInt8: CALL_GDALTranspose2D_internal(uint8_t, false); break; |
6926 | 0 | case GDT_Int8: CALL_GDALTranspose2D_internal(int8_t, false); break; |
6927 | 0 | case GDT_UInt16: CALL_GDALTranspose2D_internal(uint16_t, false); break; |
6928 | 0 | case GDT_Int16: CALL_GDALTranspose2D_internal(int16_t, false); break; |
6929 | 0 | case GDT_UInt32: CALL_GDALTranspose2D_internal(uint32_t, false); break; |
6930 | 0 | case GDT_Int32: CALL_GDALTranspose2D_internal(int32_t, false); break; |
6931 | 0 | case GDT_UInt64: CALL_GDALTranspose2D_internal(uint64_t, false); break; |
6932 | 0 | case GDT_Int64: CALL_GDALTranspose2D_internal(int64_t, false); break; |
6933 | 0 | case GDT_Float16: CALL_GDALTranspose2D_internal(GFloat16, false); break; |
6934 | 0 | case GDT_Float32: CALL_GDALTranspose2D_internal(float, false); break; |
6935 | 0 | case GDT_Float64: CALL_GDALTranspose2D_internal(double, false); break; |
6936 | 0 | case GDT_CInt16: CALL_GDALTranspose2D_internal(int16_t, true); break; |
6937 | 0 | case GDT_CInt32: CALL_GDALTranspose2D_internal(int32_t, true); break; |
6938 | 0 | case GDT_CFloat16: CALL_GDALTranspose2D_internal(GFloat16, true); break; |
6939 | 0 | case GDT_CFloat32: CALL_GDALTranspose2D_internal(float, true); break; |
6940 | 0 | case GDT_CFloat64: CALL_GDALTranspose2D_internal(double, true); break; |
6941 | 0 | case GDT_Unknown: |
6942 | 0 | case GDT_TypeCount: |
6943 | 0 | break; |
6944 | 0 | } |
6945 | | // clang-format on |
6946 | |
|
6947 | 0 | #undef CALL_GDALTranspose2D_internal |
6948 | 0 | } |
6949 | | |
6950 | | /************************************************************************/ |
6951 | | /* ExtractBitAndConvertTo255() */ |
6952 | | /************************************************************************/ |
6953 | | |
6954 | | #if defined(__GNUC__) || defined(_MSC_VER) |
6955 | | // Signedness of char implementation dependent, so be explicit. |
6956 | | // Assumes 2-complement integer types and sign extension of right shifting |
6957 | | // GCC guarantees such: |
6958 | | // https://gcc.gnu.org/onlinedocs/gcc/Integers-implementation.html#Integers-implementation |
6959 | | static inline GByte ExtractBitAndConvertTo255(GByte byVal, int nBit) |
6960 | 0 | { |
6961 | 0 | return static_cast<GByte>(static_cast<signed char>(byVal << (7 - nBit)) >> |
6962 | 0 | 7); |
6963 | 0 | } |
6964 | | #else |
6965 | | // Portable way |
6966 | | static inline GByte ExtractBitAndConvertTo255(GByte byVal, int nBit) |
6967 | | { |
6968 | | return (byVal & (1 << nBit)) ? 255 : 0; |
6969 | | } |
6970 | | #endif |
6971 | | |
6972 | | /************************************************************************/ |
6973 | | /* ExpandEightPackedBitsToByteAt255() */ |
6974 | | /************************************************************************/ |
6975 | | |
6976 | | static inline void ExpandEightPackedBitsToByteAt255(GByte byVal, |
6977 | | GByte abyOutput[8]) |
6978 | 0 | { |
6979 | 0 | abyOutput[0] = ExtractBitAndConvertTo255(byVal, 7); |
6980 | 0 | abyOutput[1] = ExtractBitAndConvertTo255(byVal, 6); |
6981 | 0 | abyOutput[2] = ExtractBitAndConvertTo255(byVal, 5); |
6982 | 0 | abyOutput[3] = ExtractBitAndConvertTo255(byVal, 4); |
6983 | 0 | abyOutput[4] = ExtractBitAndConvertTo255(byVal, 3); |
6984 | 0 | abyOutput[5] = ExtractBitAndConvertTo255(byVal, 2); |
6985 | 0 | abyOutput[6] = ExtractBitAndConvertTo255(byVal, 1); |
6986 | 0 | abyOutput[7] = ExtractBitAndConvertTo255(byVal, 0); |
6987 | 0 | } |
6988 | | |
6989 | | /************************************************************************/ |
6990 | | /* GDALExpandPackedBitsToByteAt0Or255() */ |
6991 | | /************************************************************************/ |
6992 | | |
6993 | | /** Expand packed-bits (ordered from most-significant bit to least one) |
6994 | | into a byte each, where a bit at 0 is expanded to a byte at 0, and a bit |
6995 | | at 1 to a byte at 255. |
6996 | | |
6997 | | The function does (in a possibly more optimized way) the following: |
6998 | | \code{.cpp} |
6999 | | for (size_t i = 0; i < nInputBits; ++i ) |
7000 | | { |
7001 | | pabyOutput[i] = (pabyInput[i / 8] & (1 << (7 - (i % 8)))) ? 255 : 0; |
7002 | | } |
7003 | | \endcode |
7004 | | |
7005 | | @param pabyInput Input array of (nInputBits + 7) / 8 bytes. |
7006 | | @param pabyOutput Output array of nInputBits bytes. |
7007 | | @param nInputBits Number of valid bits in pabyInput. |
7008 | | |
7009 | | @since 3.11 |
7010 | | */ |
7011 | | |
7012 | | void GDALExpandPackedBitsToByteAt0Or255(const GByte *CPL_RESTRICT pabyInput, |
7013 | | GByte *CPL_RESTRICT pabyOutput, |
7014 | | size_t nInputBits) |
7015 | 0 | { |
7016 | 0 | const size_t nInputWholeBytes = nInputBits / 8; |
7017 | 0 | size_t iByte = 0; |
7018 | |
|
7019 | 0 | #ifdef HAVE_SSE2 |
7020 | | // Mask to isolate each bit |
7021 | 0 | const __m128i bit_mask = _mm_set_epi8(1, 2, 4, 8, 16, 32, 64, -128, 1, 2, 4, |
7022 | 0 | 8, 16, 32, 64, -128); |
7023 | 0 | const __m128i zero = _mm_setzero_si128(); |
7024 | 0 | const __m128i all_ones = _mm_set1_epi8(-1); |
7025 | | #ifdef __SSSE3__ |
7026 | | const __m128i dispatch_two_bytes = |
7027 | | _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0); |
7028 | | #endif |
7029 | 0 | constexpr size_t SSE_REG_SIZE = sizeof(bit_mask); |
7030 | 0 | for (; iByte + SSE_REG_SIZE <= nInputWholeBytes; iByte += SSE_REG_SIZE) |
7031 | 0 | { |
7032 | 0 | __m128i reg_ori = _mm_loadu_si128( |
7033 | 0 | reinterpret_cast<const __m128i *>(pabyInput + iByte)); |
7034 | |
|
7035 | 0 | constexpr int NUM_PROCESSED_BYTES_PER_REG = 2; |
7036 | 0 | for (size_t k = 0; k < SSE_REG_SIZE / NUM_PROCESSED_BYTES_PER_REG; ++k) |
7037 | 0 | { |
7038 | | // Given reg_ori = (A, B, ... 14 other bytes ...), |
7039 | | // expand to (A, A, A, A, A, A, A, A, B, B, B, B, B, B, B, B) |
7040 | | #ifdef __SSSE3__ |
7041 | | __m128i reg = _mm_shuffle_epi8(reg_ori, dispatch_two_bytes); |
7042 | | #else |
7043 | 0 | __m128i reg = _mm_unpacklo_epi8(reg_ori, reg_ori); |
7044 | 0 | reg = _mm_unpacklo_epi16(reg, reg); |
7045 | 0 | reg = _mm_unpacklo_epi32(reg, reg); |
7046 | 0 | #endif |
7047 | | |
7048 | | // Test if bits of interest are set |
7049 | 0 | reg = _mm_and_si128(reg, bit_mask); |
7050 | | |
7051 | | // Now test if those bits are set, by comparing to zero. So the |
7052 | | // result will be that bytes where bits are set will be at 0, and |
7053 | | // ones where they are cleared will be at 0xFF. So the inverse of |
7054 | | // the end result we want! |
7055 | 0 | reg = _mm_cmpeq_epi8(reg, zero); |
7056 | | |
7057 | | // Invert the result |
7058 | 0 | reg = _mm_andnot_si128(reg, all_ones); |
7059 | |
|
7060 | 0 | _mm_storeu_si128(reinterpret_cast<__m128i *>(pabyOutput), reg); |
7061 | |
|
7062 | 0 | pabyOutput += SSE_REG_SIZE; |
7063 | | |
7064 | | // Right-shift of 2 bytes |
7065 | 0 | reg_ori = _mm_bsrli_si128(reg_ori, NUM_PROCESSED_BYTES_PER_REG); |
7066 | 0 | } |
7067 | 0 | } |
7068 | |
|
7069 | 0 | #endif // HAVE_SSE2 |
7070 | |
|
7071 | 0 | for (; iByte < nInputWholeBytes; ++iByte) |
7072 | 0 | { |
7073 | 0 | ExpandEightPackedBitsToByteAt255(pabyInput[iByte], pabyOutput); |
7074 | 0 | pabyOutput += 8; |
7075 | 0 | } |
7076 | 0 | for (int iBit = 0; iBit < static_cast<int>(nInputBits % 8); ++iBit) |
7077 | 0 | { |
7078 | 0 | *pabyOutput = ExtractBitAndConvertTo255(pabyInput[iByte], 7 - iBit); |
7079 | 0 | ++pabyOutput; |
7080 | 0 | } |
7081 | 0 | } |
7082 | | |
7083 | | /************************************************************************/ |
7084 | | /* ExpandEightPackedBitsToByteAt1() */ |
7085 | | /************************************************************************/ |
7086 | | |
7087 | | static inline void ExpandEightPackedBitsToByteAt1(GByte byVal, |
7088 | | GByte abyOutput[8]) |
7089 | 0 | { |
7090 | 0 | abyOutput[0] = (byVal >> 7) & 0x1; |
7091 | 0 | abyOutput[1] = (byVal >> 6) & 0x1; |
7092 | 0 | abyOutput[2] = (byVal >> 5) & 0x1; |
7093 | 0 | abyOutput[3] = (byVal >> 4) & 0x1; |
7094 | 0 | abyOutput[4] = (byVal >> 3) & 0x1; |
7095 | 0 | abyOutput[5] = (byVal >> 2) & 0x1; |
7096 | 0 | abyOutput[6] = (byVal >> 1) & 0x1; |
7097 | 0 | abyOutput[7] = (byVal >> 0) & 0x1; |
7098 | 0 | } |
7099 | | |
7100 | | /************************************************************************/ |
7101 | | /* GDALExpandPackedBitsToByteAt0Or1() */ |
7102 | | /************************************************************************/ |
7103 | | |
7104 | | /** Expand packed-bits (ordered from most-significant bit to least one) |
7105 | | into a byte each, where a bit at 0 is expanded to a byte at 0, and a bit |
7106 | | at 1 to a byte at 1. |
7107 | | |
7108 | | The function does (in a possibly more optimized way) the following: |
7109 | | \code{.cpp} |
7110 | | for (size_t i = 0; i < nInputBits; ++i ) |
7111 | | { |
7112 | | pabyOutput[i] = (pabyInput[i / 8] & (1 << (7 - (i % 8)))) ? 1 : 0; |
7113 | | } |
7114 | | \endcode |
7115 | | |
7116 | | @param pabyInput Input array of (nInputBits + 7) / 8 bytes. |
7117 | | @param pabyOutput Output array of nInputBits bytes. |
7118 | | @param nInputBits Number of valid bits in pabyInput. |
7119 | | |
7120 | | @since 3.11 |
7121 | | */ |
7122 | | |
7123 | | void GDALExpandPackedBitsToByteAt0Or1(const GByte *CPL_RESTRICT pabyInput, |
7124 | | GByte *CPL_RESTRICT pabyOutput, |
7125 | | size_t nInputBits) |
7126 | 0 | { |
7127 | 0 | const size_t nInputWholeBytes = nInputBits / 8; |
7128 | 0 | size_t iByte = 0; |
7129 | 0 | for (; iByte < nInputWholeBytes; ++iByte) |
7130 | 0 | { |
7131 | 0 | ExpandEightPackedBitsToByteAt1(pabyInput[iByte], pabyOutput); |
7132 | 0 | pabyOutput += 8; |
7133 | 0 | } |
7134 | 0 | for (int iBit = 0; iBit < static_cast<int>(nInputBits % 8); ++iBit) |
7135 | 0 | { |
7136 | 0 | *pabyOutput = (pabyInput[iByte] >> (7 - iBit)) & 0x1; |
7137 | 0 | ++pabyOutput; |
7138 | 0 | } |
7139 | 0 | } |