/src/gdal/gcore/rasterio.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Project: GDAL Core |
4 | | * Purpose: Contains default implementation of GDALRasterBand::IRasterIO() |
5 | | * and supporting functions of broader utility. |
6 | | * Author: Frank Warmerdam, warmerdam@pobox.com |
7 | | * |
8 | | ****************************************************************************** |
9 | | * Copyright (c) 1998, Frank Warmerdam |
10 | | * Copyright (c) 2007-2014, Even Rouault <even dot rouault at spatialys.com> |
11 | | * |
12 | | * SPDX-License-Identifier: MIT |
13 | | ****************************************************************************/ |
14 | | |
15 | | #include "cpl_port.h" |
16 | | #include "gdal.h" |
17 | | #include "gdal_priv.h" |
18 | | |
19 | | #include <cassert> |
20 | | #include <climits> |
21 | | #include <cmath> |
22 | | #include <cstddef> |
23 | | #include <cstdio> |
24 | | #include <cstdlib> |
25 | | #include <cstring> |
26 | | |
27 | | #include <algorithm> |
28 | | #include <limits> |
29 | | #include <stdexcept> |
30 | | #include <type_traits> |
31 | | |
32 | | #include "cpl_conv.h" |
33 | | #include "cpl_cpu_features.h" |
34 | | #include "cpl_error.h" |
35 | | #include "cpl_float.h" |
36 | | #include "cpl_progress.h" |
37 | | #include "cpl_string.h" |
38 | | #include "cpl_vsi.h" |
39 | | #include "gdal_priv_templates.hpp" |
40 | | #include "gdal_vrt.h" |
41 | | #include "gdalwarper.h" |
42 | | #include "memdataset.h" |
43 | | #include "vrtdataset.h" |
44 | | |
45 | | #if defined(__x86_64) || defined(_M_X64) |
46 | | #include <emmintrin.h> |
47 | | #define HAVE_SSE2 |
48 | | #elif defined(USE_NEON_OPTIMIZATIONS) |
49 | | #include "include_sse2neon.h" |
50 | | #define HAVE_SSE2 |
51 | | #endif |
52 | | |
53 | | #ifdef HAVE_SSSE3_AT_COMPILE_TIME |
54 | | #include "rasterio_ssse3.h" |
55 | | #ifdef __SSSE3__ |
56 | | #include <tmmintrin.h> |
57 | | #endif |
58 | | #endif |
59 | | |
60 | | static void GDALFastCopyByte(const GByte *CPL_RESTRICT pSrcData, |
61 | | int nSrcPixelStride, GByte *CPL_RESTRICT pDstData, |
62 | | int nDstPixelStride, GPtrDiff_t nWordCount); |
63 | | |
64 | | /************************************************************************/ |
65 | | /* DownsamplingIntegerXFactor() */ |
66 | | /************************************************************************/ |
67 | | |
68 | | template <bool bSameDataType, int DATA_TYPE_SIZE> |
69 | | static bool DownsamplingIntegerXFactor( |
70 | | GDALRasterBand *poBand, int iSrcX, int nSrcXInc, GPtrDiff_t iSrcOffsetCst, |
71 | | GByte *CPL_RESTRICT pabyDstData, int nPixelSpace, int nBufXSize, |
72 | | GDALDataType eDataType, GDALDataType eBufType, int &nStartBlockX, |
73 | | int nBlockXSize, GDALRasterBlock *&poBlock, int nLBlockY) |
74 | 0 | { |
75 | 0 | const int nBandDataSize = |
76 | 0 | bSameDataType ? DATA_TYPE_SIZE : GDALGetDataTypeSizeBytes(eDataType); |
77 | 0 | int nOuterLoopIters = nBufXSize - 1; |
78 | 0 | const int nIncSrcOffset = nSrcXInc * nBandDataSize; |
79 | 0 | const GByte *CPL_RESTRICT pabySrcData; |
80 | 0 | int nEndBlockX = nBlockXSize + nStartBlockX; |
81 | |
|
82 | 0 | if (iSrcX < nEndBlockX) |
83 | 0 | { |
84 | 0 | CPLAssert(poBlock); |
85 | 0 | goto no_reload_block; |
86 | 0 | } |
87 | 0 | goto reload_block; |
88 | | |
89 | | // Don't do the last iteration in the loop, as iSrcX might go beyond |
90 | | // nRasterXSize - 1 |
91 | 0 | while (--nOuterLoopIters >= 1) |
92 | 0 | { |
93 | 0 | iSrcX += nSrcXInc; |
94 | 0 | pabySrcData += nIncSrcOffset; |
95 | 0 | pabyDstData += nPixelSpace; |
96 | | |
97 | | /* -------------------------------------------------------------------- |
98 | | */ |
99 | | /* Ensure we have the appropriate block loaded. */ |
100 | | /* -------------------------------------------------------------------- |
101 | | */ |
102 | 0 | if (iSrcX >= nEndBlockX) |
103 | 0 | { |
104 | 0 | reload_block: |
105 | 0 | { |
106 | 0 | const int nLBlockX = iSrcX / nBlockXSize; |
107 | 0 | nStartBlockX = nLBlockX * nBlockXSize; |
108 | 0 | nEndBlockX = nStartBlockX + nBlockXSize; |
109 | |
|
110 | 0 | if (poBlock != nullptr) |
111 | 0 | poBlock->DropLock(); |
112 | |
|
113 | 0 | poBlock = poBand->GetLockedBlockRef(nLBlockX, nLBlockY, FALSE); |
114 | 0 | if (poBlock == nullptr) |
115 | 0 | { |
116 | 0 | return false; |
117 | 0 | } |
118 | 0 | } |
119 | | |
120 | 0 | no_reload_block: |
121 | 0 | const GByte *pabySrcBlock = |
122 | 0 | static_cast<const GByte *>(poBlock->GetDataRef()); |
123 | 0 | GPtrDiff_t iSrcOffset = |
124 | 0 | (iSrcX - nStartBlockX + iSrcOffsetCst) * nBandDataSize; |
125 | 0 | pabySrcData = pabySrcBlock + iSrcOffset; |
126 | 0 | } |
127 | | |
128 | | /* -------------------------------------------------------------------- |
129 | | */ |
130 | | /* Copy the maximum run of pixels. */ |
131 | | /* -------------------------------------------------------------------- |
132 | | */ |
133 | | |
134 | 0 | const int nIters = std::min( |
135 | 0 | (nEndBlockX - iSrcX + (nSrcXInc - 1)) / nSrcXInc, nOuterLoopIters); |
136 | 0 | if (bSameDataType) |
137 | 0 | { |
138 | 0 | memcpy(pabyDstData, pabySrcData, nBandDataSize); |
139 | 0 | if (nIters > 1) |
140 | 0 | { |
141 | 0 | if (DATA_TYPE_SIZE == 1) |
142 | 0 | { |
143 | 0 | pabySrcData += nIncSrcOffset; |
144 | 0 | pabyDstData += nPixelSpace; |
145 | 0 | GDALFastCopyByte(pabySrcData, nIncSrcOffset, pabyDstData, |
146 | 0 | nPixelSpace, nIters - 1); |
147 | 0 | pabySrcData += |
148 | 0 | static_cast<GPtrDiff_t>(nIncSrcOffset) * (nIters - 2); |
149 | 0 | pabyDstData += |
150 | 0 | static_cast<GPtrDiff_t>(nPixelSpace) * (nIters - 2); |
151 | 0 | } |
152 | 0 | else |
153 | 0 | { |
154 | 0 | for (int i = 0; i < nIters - 1; i++) |
155 | 0 | { |
156 | 0 | pabySrcData += nIncSrcOffset; |
157 | 0 | pabyDstData += nPixelSpace; |
158 | 0 | memcpy(pabyDstData, pabySrcData, nBandDataSize); |
159 | 0 | } |
160 | 0 | } |
161 | 0 | iSrcX += nSrcXInc * (nIters - 1); |
162 | 0 | nOuterLoopIters -= nIters - 1; |
163 | 0 | } |
164 | 0 | } |
165 | 0 | else |
166 | 0 | { |
167 | | // Type to type conversion ... |
168 | 0 | GDALCopyWords64(pabySrcData, eDataType, nIncSrcOffset, pabyDstData, |
169 | 0 | eBufType, nPixelSpace, std::max(1, nIters)); |
170 | 0 | if (nIters > 1) |
171 | 0 | { |
172 | 0 | pabySrcData += |
173 | 0 | static_cast<GPtrDiff_t>(nIncSrcOffset) * (nIters - 1); |
174 | 0 | pabyDstData += |
175 | 0 | static_cast<GPtrDiff_t>(nPixelSpace) * (nIters - 1); |
176 | 0 | iSrcX += nSrcXInc * (nIters - 1); |
177 | 0 | nOuterLoopIters -= nIters - 1; |
178 | 0 | } |
179 | 0 | } |
180 | 0 | } |
181 | | |
182 | | // Deal with last iteration to avoid iSrcX to go beyond nRasterXSize - 1 |
183 | 0 | if (nOuterLoopIters == 0) |
184 | 0 | { |
185 | 0 | const int nRasterXSize = poBand->GetXSize(); |
186 | 0 | iSrcX = |
187 | 0 | static_cast<int>(std::min(static_cast<GInt64>(iSrcX) + nSrcXInc, |
188 | 0 | static_cast<GInt64>(nRasterXSize - 1))); |
189 | 0 | pabyDstData += nPixelSpace; |
190 | 0 | if (iSrcX < nEndBlockX) |
191 | 0 | { |
192 | 0 | goto no_reload_block; |
193 | 0 | } |
194 | 0 | goto reload_block; |
195 | 0 | } |
196 | 0 | return true; |
197 | 0 | } Unexecuted instantiation: rasterio.cpp:bool DownsamplingIntegerXFactor<true, 1>(GDALRasterBand*, int, int, long long, unsigned char*, int, int, GDALDataType, GDALDataType, int&, int, GDALRasterBlock*&, int) Unexecuted instantiation: rasterio.cpp:bool DownsamplingIntegerXFactor<true, 2>(GDALRasterBand*, int, int, long long, unsigned char*, int, int, GDALDataType, GDALDataType, int&, int, GDALRasterBlock*&, int) Unexecuted instantiation: rasterio.cpp:bool DownsamplingIntegerXFactor<true, 4>(GDALRasterBand*, int, int, long long, unsigned char*, int, int, GDALDataType, GDALDataType, int&, int, GDALRasterBlock*&, int) Unexecuted instantiation: rasterio.cpp:bool DownsamplingIntegerXFactor<true, 8>(GDALRasterBand*, int, int, long long, unsigned char*, int, int, GDALDataType, GDALDataType, int&, int, GDALRasterBlock*&, int) Unexecuted instantiation: rasterio.cpp:bool DownsamplingIntegerXFactor<true, 16>(GDALRasterBand*, int, int, long long, unsigned char*, int, int, GDALDataType, GDALDataType, int&, int, GDALRasterBlock*&, int) Unexecuted instantiation: rasterio.cpp:bool DownsamplingIntegerXFactor<false, 0>(GDALRasterBand*, int, int, long long, unsigned char*, int, int, GDALDataType, GDALDataType, int&, int, GDALRasterBlock*&, int) |
198 | | |
199 | | template <class A, class B> |
200 | | CPL_NOSANITIZE_UNSIGNED_INT_OVERFLOW inline auto CPLUnsanitizedMul(A a, B b) |
201 | 0 | { |
202 | 0 | return a * b; |
203 | 0 | } |
204 | | |
205 | | /************************************************************************/ |
206 | | /* IRasterIO() */ |
207 | | /* */ |
208 | | /* Default internal implementation of RasterIO() ... utilizes */ |
209 | | /* the Block access methods to satisfy the request. This would */ |
210 | | /* normally only be overridden by formats with overviews. */ |
211 | | /************************************************************************/ |
212 | | |
213 | | CPLErr GDALRasterBand::IRasterIO(GDALRWFlag eRWFlag, int nXOff, int nYOff, |
214 | | int nXSize, int nYSize, void *pData, |
215 | | int nBufXSize, int nBufYSize, |
216 | | GDALDataType eBufType, GSpacing nPixelSpace, |
217 | | GSpacing nLineSpace, |
218 | | GDALRasterIOExtraArg *psExtraArg) |
219 | | |
220 | 0 | { |
221 | 0 | if (eRWFlag == GF_Write && eFlushBlockErr != CE_None) |
222 | 0 | { |
223 | 0 | CPLError(eFlushBlockErr, CPLE_AppDefined, |
224 | 0 | "An error occurred while writing a dirty block " |
225 | 0 | "from GDALRasterBand::IRasterIO"); |
226 | 0 | CPLErr eErr = eFlushBlockErr; |
227 | 0 | eFlushBlockErr = CE_None; |
228 | 0 | return eErr; |
229 | 0 | } |
230 | 0 | if (nBlockXSize <= 0 || nBlockYSize <= 0) |
231 | 0 | { |
232 | 0 | CPLError(CE_Failure, CPLE_AppDefined, "Invalid block size"); |
233 | 0 | return CE_Failure; |
234 | 0 | } |
235 | | |
236 | 0 | const int nBandDataSize = GDALGetDataTypeSizeBytes(eDataType); |
237 | 0 | const int nBufDataSize = GDALGetDataTypeSizeBytes(eBufType); |
238 | 0 | GByte dummyBlock[2] = {0, 0}; |
239 | 0 | GByte *pabySrcBlock = |
240 | 0 | dummyBlock; /* to avoid Coverity warning about nullptr dereference */ |
241 | 0 | GDALRasterBlock *poBlock = nullptr; |
242 | 0 | const bool bUseIntegerRequestCoords = |
243 | 0 | (!psExtraArg->bFloatingPointWindowValidity || |
244 | 0 | (nXOff == psExtraArg->dfXOff && nYOff == psExtraArg->dfYOff && |
245 | 0 | nXSize == psExtraArg->dfXSize && nYSize == psExtraArg->dfYSize)); |
246 | | |
247 | | /* ==================================================================== */ |
248 | | /* A common case is the data requested with the destination */ |
249 | | /* is packed, and the block width is the raster width. */ |
250 | | /* ==================================================================== */ |
251 | 0 | if (nPixelSpace == nBufDataSize && nLineSpace == nPixelSpace * nXSize && |
252 | 0 | nBlockXSize == GetXSize() && nBufXSize == nXSize && |
253 | 0 | nBufYSize == nYSize && bUseIntegerRequestCoords) |
254 | 0 | { |
255 | 0 | CPLErr eErr = CE_None; |
256 | 0 | int nLBlockY = -1; |
257 | |
|
258 | 0 | for (int iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff++) |
259 | 0 | { |
260 | 0 | const int iSrcY = iBufYOff + nYOff; |
261 | |
|
262 | 0 | if (iSrcY < nLBlockY * nBlockYSize || |
263 | 0 | iSrcY - nBlockYSize >= nLBlockY * nBlockYSize) |
264 | 0 | { |
265 | 0 | nLBlockY = iSrcY / nBlockYSize; |
266 | 0 | bool bJustInitialize = |
267 | 0 | eRWFlag == GF_Write && nXOff == 0 && |
268 | 0 | nXSize == nBlockXSize && nYOff <= nLBlockY * nBlockYSize && |
269 | 0 | nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize; |
270 | | |
271 | | // Is this a partial tile at right and/or bottom edges of |
272 | | // the raster, and that is going to be completely written? |
273 | | // If so, do not load it from storage, but zero it so that |
274 | | // the content outsize of the validity area is initialized. |
275 | 0 | bool bMemZeroBuffer = false; |
276 | 0 | if (eRWFlag == GF_Write && !bJustInitialize && nXOff == 0 && |
277 | 0 | nXSize == nBlockXSize && nYOff <= nLBlockY * nBlockYSize && |
278 | 0 | nYOff + nYSize == GetYSize() && |
279 | 0 | nLBlockY * nBlockYSize > GetYSize() - nBlockYSize) |
280 | 0 | { |
281 | 0 | bJustInitialize = true; |
282 | 0 | bMemZeroBuffer = true; |
283 | 0 | } |
284 | |
|
285 | 0 | if (poBlock) |
286 | 0 | poBlock->DropLock(); |
287 | |
|
288 | 0 | const GUInt32 nErrorCounter = CPLGetErrorCounter(); |
289 | 0 | poBlock = GetLockedBlockRef(0, nLBlockY, bJustInitialize); |
290 | 0 | if (poBlock == nullptr) |
291 | 0 | { |
292 | 0 | if (strstr(CPLGetLastErrorMsg(), "IReadBlock failed") == |
293 | 0 | nullptr) |
294 | 0 | { |
295 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
296 | 0 | "GetBlockRef failed at X block offset %d, " |
297 | 0 | "Y block offset %d%s", |
298 | 0 | 0, nLBlockY, |
299 | 0 | (nErrorCounter != CPLGetErrorCounter()) |
300 | 0 | ? CPLSPrintf(": %s", CPLGetLastErrorMsg()) |
301 | 0 | : ""); |
302 | 0 | } |
303 | 0 | eErr = CE_Failure; |
304 | 0 | break; |
305 | 0 | } |
306 | | |
307 | 0 | if (eRWFlag == GF_Write) |
308 | 0 | poBlock->MarkDirty(); |
309 | |
|
310 | 0 | pabySrcBlock = static_cast<GByte *>(poBlock->GetDataRef()); |
311 | 0 | if (bMemZeroBuffer) |
312 | 0 | { |
313 | 0 | memset(pabySrcBlock, 0, |
314 | 0 | static_cast<GPtrDiff_t>(nBandDataSize) * |
315 | 0 | nBlockXSize * nBlockYSize); |
316 | 0 | } |
317 | 0 | } |
318 | | |
319 | 0 | const auto nSrcByteOffset = |
320 | 0 | (static_cast<GPtrDiff_t>(iSrcY - nLBlockY * nBlockYSize) * |
321 | 0 | nBlockXSize + |
322 | 0 | nXOff) * |
323 | 0 | nBandDataSize; |
324 | |
|
325 | 0 | if (eDataType == eBufType) |
326 | 0 | { |
327 | 0 | if (eRWFlag == GF_Read) |
328 | 0 | memcpy(static_cast<GByte *>(pData) + |
329 | 0 | static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace, |
330 | 0 | pabySrcBlock + nSrcByteOffset, |
331 | 0 | static_cast<size_t>(nLineSpace)); |
332 | 0 | else |
333 | 0 | memcpy(pabySrcBlock + nSrcByteOffset, |
334 | 0 | static_cast<GByte *>(pData) + |
335 | 0 | static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace, |
336 | 0 | static_cast<size_t>(nLineSpace)); |
337 | 0 | } |
338 | 0 | else |
339 | 0 | { |
340 | | // Type to type conversion. |
341 | 0 | if (eRWFlag == GF_Read) |
342 | 0 | GDALCopyWords64( |
343 | 0 | pabySrcBlock + nSrcByteOffset, eDataType, nBandDataSize, |
344 | 0 | static_cast<GByte *>(pData) + |
345 | 0 | static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace, |
346 | 0 | eBufType, static_cast<int>(nPixelSpace), nBufXSize); |
347 | 0 | else |
348 | 0 | GDALCopyWords64(static_cast<GByte *>(pData) + |
349 | 0 | static_cast<GPtrDiff_t>(iBufYOff) * |
350 | 0 | nLineSpace, |
351 | 0 | eBufType, static_cast<int>(nPixelSpace), |
352 | 0 | pabySrcBlock + nSrcByteOffset, eDataType, |
353 | 0 | nBandDataSize, nBufXSize); |
354 | 0 | } |
355 | |
|
356 | 0 | if (psExtraArg->pfnProgress != nullptr && |
357 | 0 | !psExtraArg->pfnProgress(1.0 * (iBufYOff + 1) / nBufYSize, "", |
358 | 0 | psExtraArg->pProgressData)) |
359 | 0 | { |
360 | 0 | eErr = CE_Failure; |
361 | 0 | break; |
362 | 0 | } |
363 | 0 | } |
364 | |
|
365 | 0 | if (poBlock) |
366 | 0 | poBlock->DropLock(); |
367 | |
|
368 | 0 | return eErr; |
369 | 0 | } |
370 | | |
371 | | /* ==================================================================== */ |
372 | | /* Do we have overviews that would be appropriate to satisfy */ |
373 | | /* this request? */ |
374 | | /* ==================================================================== */ |
375 | 0 | if ((nBufXSize < nXSize || nBufYSize < nYSize) && GetOverviewCount() > 0 && |
376 | 0 | eRWFlag == GF_Read) |
377 | 0 | { |
378 | 0 | GDALRasterIOExtraArg sExtraArg; |
379 | 0 | GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg); |
380 | |
|
381 | 0 | const int nOverview = |
382 | 0 | GDALBandGetBestOverviewLevel2(this, nXOff, nYOff, nXSize, nYSize, |
383 | 0 | nBufXSize, nBufYSize, &sExtraArg); |
384 | 0 | if (nOverview >= 0) |
385 | 0 | { |
386 | 0 | GDALRasterBand *poOverviewBand = GetOverview(nOverview); |
387 | 0 | if (poOverviewBand == nullptr) |
388 | 0 | return CE_Failure; |
389 | | |
390 | 0 | return poOverviewBand->RasterIO( |
391 | 0 | eRWFlag, nXOff, nYOff, nXSize, nYSize, pData, nBufXSize, |
392 | 0 | nBufYSize, eBufType, nPixelSpace, nLineSpace, &sExtraArg); |
393 | 0 | } |
394 | 0 | } |
395 | | |
396 | 0 | if (eRWFlag == GF_Read && nBufXSize < nXSize / 100 && |
397 | 0 | nBufYSize < nYSize / 100 && nPixelSpace == nBufDataSize && |
398 | 0 | nLineSpace == nPixelSpace * nBufXSize && |
399 | 0 | CPLTestBool(CPLGetConfigOption("GDAL_NO_COSTLY_OVERVIEW", "NO"))) |
400 | 0 | { |
401 | 0 | memset(pData, 0, static_cast<size_t>(nLineSpace * nBufYSize)); |
402 | 0 | return CE_None; |
403 | 0 | } |
404 | | |
405 | | /* ==================================================================== */ |
406 | | /* The second case when we don't need subsample data but likely */ |
407 | | /* need data type conversion. */ |
408 | | /* ==================================================================== */ |
409 | 0 | if ( // nPixelSpace == nBufDataSize && |
410 | 0 | nXSize == nBufXSize && nYSize == nBufYSize && bUseIntegerRequestCoords) |
411 | 0 | { |
412 | | #if DEBUG_VERBOSE |
413 | | printf("IRasterIO(%d,%d,%d,%d) rw=%d case 2\n", /*ok*/ |
414 | | nXOff, nYOff, nXSize, nYSize, static_cast<int>(eRWFlag)); |
415 | | #endif |
416 | | |
417 | | /* -------------------------------------------------------------------- |
418 | | */ |
419 | | /* Loop over buffer computing source locations. */ |
420 | | /* -------------------------------------------------------------------- |
421 | | */ |
422 | | // Calculate starting values out of loop |
423 | 0 | const int nLBlockXStart = nXOff / nBlockXSize; |
424 | 0 | const int nXSpanEnd = nBufXSize + nXOff; |
425 | |
|
426 | 0 | int nYInc = 0; |
427 | 0 | for (int iBufYOff = 0, iSrcY = nYOff; iBufYOff < nBufYSize; |
428 | 0 | iBufYOff += nYInc, iSrcY += nYInc) |
429 | 0 | { |
430 | 0 | GPtrDiff_t iBufOffset = static_cast<GPtrDiff_t>(iBufYOff) * |
431 | 0 | static_cast<GPtrDiff_t>(nLineSpace); |
432 | 0 | int nLBlockY = iSrcY / nBlockYSize; |
433 | 0 | int nLBlockX = nLBlockXStart; |
434 | 0 | int iSrcX = nXOff; |
435 | 0 | while (iSrcX < nXSpanEnd) |
436 | 0 | { |
437 | 0 | int nXSpan = nLBlockX * nBlockXSize; |
438 | 0 | if (nXSpan < INT_MAX - nBlockXSize) |
439 | 0 | nXSpan += nBlockXSize; |
440 | 0 | else |
441 | 0 | nXSpan = INT_MAX; |
442 | 0 | const int nXRight = nXSpan; |
443 | 0 | nXSpan = (nXSpan < nXSpanEnd ? nXSpan : nXSpanEnd) - iSrcX; |
444 | |
|
445 | 0 | const size_t nXSpanSize = |
446 | 0 | CPLUnsanitizedMul(nXSpan, static_cast<size_t>(nPixelSpace)); |
447 | |
|
448 | 0 | bool bJustInitialize = |
449 | 0 | eRWFlag == GF_Write && nYOff <= nLBlockY * nBlockYSize && |
450 | 0 | nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize && |
451 | 0 | nXOff <= nLBlockX * nBlockXSize && |
452 | 0 | nXOff + nXSize >= nXRight; |
453 | | |
454 | | // Is this a partial tile at right and/or bottom edges of |
455 | | // the raster, and that is going to be completely written? |
456 | | // If so, do not load it from storage, but zero it so that |
457 | | // the content outsize of the validity area is initialized. |
458 | 0 | bool bMemZeroBuffer = false; |
459 | 0 | if (eRWFlag == GF_Write && !bJustInitialize && |
460 | 0 | nXOff <= nLBlockX * nBlockXSize && |
461 | 0 | nYOff <= nLBlockY * nBlockYSize && |
462 | 0 | (nXOff + nXSize >= nXRight || |
463 | | // cppcheck-suppress knownConditionTrueFalse |
464 | 0 | (nXOff + nXSize == GetXSize() && nXRight > GetXSize())) && |
465 | 0 | (nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize || |
466 | 0 | (nYOff + nYSize == GetYSize() && |
467 | 0 | nLBlockY * nBlockYSize > GetYSize() - nBlockYSize))) |
468 | 0 | { |
469 | 0 | bJustInitialize = true; |
470 | 0 | bMemZeroBuffer = true; |
471 | 0 | } |
472 | | |
473 | | /* -------------------------------------------------------------------- |
474 | | */ |
475 | | /* Ensure we have the appropriate block loaded. */ |
476 | | /* -------------------------------------------------------------------- |
477 | | */ |
478 | 0 | const GUInt32 nErrorCounter = CPLGetErrorCounter(); |
479 | 0 | poBlock = |
480 | 0 | GetLockedBlockRef(nLBlockX, nLBlockY, bJustInitialize); |
481 | 0 | if (!poBlock) |
482 | 0 | { |
483 | 0 | if (strstr(CPLGetLastErrorMsg(), "IReadBlock failed") == |
484 | 0 | nullptr) |
485 | 0 | { |
486 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
487 | 0 | "GetBlockRef failed at X block offset %d, " |
488 | 0 | "Y block offset %d%s", |
489 | 0 | nLBlockX, nLBlockY, |
490 | 0 | (nErrorCounter != CPLGetErrorCounter()) |
491 | 0 | ? CPLSPrintf(": %s", CPLGetLastErrorMsg()) |
492 | 0 | : ""); |
493 | 0 | } |
494 | 0 | return (CE_Failure); |
495 | 0 | } |
496 | | |
497 | 0 | if (eRWFlag == GF_Write) |
498 | 0 | poBlock->MarkDirty(); |
499 | |
|
500 | 0 | pabySrcBlock = static_cast<GByte *>(poBlock->GetDataRef()); |
501 | 0 | if (bMemZeroBuffer) |
502 | 0 | { |
503 | 0 | memset(pabySrcBlock, 0, |
504 | 0 | static_cast<GPtrDiff_t>(nBandDataSize) * |
505 | 0 | nBlockXSize * nBlockYSize); |
506 | 0 | } |
507 | | /* -------------------------------------------------------------------- |
508 | | */ |
509 | | /* Copy over this chunk of data. */ |
510 | | /* -------------------------------------------------------------------- |
511 | | */ |
512 | 0 | GPtrDiff_t iSrcOffset = |
513 | 0 | (static_cast<GPtrDiff_t>(iSrcX) - |
514 | 0 | static_cast<GPtrDiff_t>(nLBlockX * nBlockXSize) + |
515 | 0 | (static_cast<GPtrDiff_t>(iSrcY) - |
516 | 0 | static_cast<GPtrDiff_t>(nLBlockY) * nBlockYSize) * |
517 | 0 | nBlockXSize) * |
518 | 0 | nBandDataSize; |
519 | | // Fill up as many rows as possible for the loaded block. |
520 | 0 | const int kmax = std::min(nBlockYSize - (iSrcY % nBlockYSize), |
521 | 0 | nBufYSize - iBufYOff); |
522 | 0 | for (int k = 0; k < kmax; k++) |
523 | 0 | { |
524 | 0 | if (eDataType == eBufType && nPixelSpace == nBufDataSize) |
525 | 0 | { |
526 | 0 | if (eRWFlag == GF_Read) |
527 | 0 | memcpy(static_cast<GByte *>(pData) + iBufOffset + |
528 | 0 | static_cast<GPtrDiff_t>(k) * nLineSpace, |
529 | 0 | pabySrcBlock + iSrcOffset, nXSpanSize); |
530 | 0 | else |
531 | 0 | memcpy(pabySrcBlock + iSrcOffset, |
532 | 0 | static_cast<GByte *>(pData) + iBufOffset + |
533 | 0 | static_cast<GPtrDiff_t>(k) * nLineSpace, |
534 | 0 | nXSpanSize); |
535 | 0 | } |
536 | 0 | else |
537 | 0 | { |
538 | | /* type to type conversion */ |
539 | 0 | if (eRWFlag == GF_Read) |
540 | 0 | GDALCopyWords64( |
541 | 0 | pabySrcBlock + iSrcOffset, eDataType, |
542 | 0 | nBandDataSize, |
543 | 0 | static_cast<GByte *>(pData) + iBufOffset + |
544 | 0 | static_cast<GPtrDiff_t>(k) * nLineSpace, |
545 | 0 | eBufType, static_cast<int>(nPixelSpace), |
546 | 0 | nXSpan); |
547 | 0 | else |
548 | 0 | GDALCopyWords64( |
549 | 0 | static_cast<GByte *>(pData) + iBufOffset + |
550 | 0 | static_cast<GPtrDiff_t>(k) * nLineSpace, |
551 | 0 | eBufType, static_cast<int>(nPixelSpace), |
552 | 0 | pabySrcBlock + iSrcOffset, eDataType, |
553 | 0 | nBandDataSize, nXSpan); |
554 | 0 | } |
555 | |
|
556 | 0 | iSrcOffset += |
557 | 0 | static_cast<GPtrDiff_t>(nBlockXSize) * nBandDataSize; |
558 | 0 | } |
559 | |
|
560 | 0 | iBufOffset = |
561 | 0 | CPLUnsanitizedAdd<GPtrDiff_t>(iBufOffset, nXSpanSize); |
562 | 0 | nLBlockX++; |
563 | 0 | iSrcX += nXSpan; |
564 | |
|
565 | 0 | poBlock->DropLock(); |
566 | 0 | poBlock = nullptr; |
567 | 0 | } |
568 | | |
569 | | /* Compute the increment to go on a block boundary */ |
570 | 0 | nYInc = nBlockYSize - (iSrcY % nBlockYSize); |
571 | |
|
572 | 0 | if (psExtraArg->pfnProgress != nullptr && |
573 | 0 | !psExtraArg->pfnProgress( |
574 | 0 | 1.0 * std::min(nBufYSize, iBufYOff + nYInc) / nBufYSize, "", |
575 | 0 | psExtraArg->pProgressData)) |
576 | 0 | { |
577 | 0 | return CE_Failure; |
578 | 0 | } |
579 | 0 | } |
580 | | |
581 | 0 | return CE_None; |
582 | 0 | } |
583 | | |
584 | | /* ==================================================================== */ |
585 | | /* Loop reading required source blocks to satisfy output */ |
586 | | /* request. This is the most general implementation. */ |
587 | | /* ==================================================================== */ |
588 | | |
589 | 0 | double dfXOff = nXOff; |
590 | 0 | double dfYOff = nYOff; |
591 | 0 | double dfXSize = nXSize; |
592 | 0 | double dfYSize = nYSize; |
593 | 0 | if (psExtraArg->bFloatingPointWindowValidity) |
594 | 0 | { |
595 | 0 | dfXOff = psExtraArg->dfXOff; |
596 | 0 | dfYOff = psExtraArg->dfYOff; |
597 | 0 | dfXSize = psExtraArg->dfXSize; |
598 | 0 | dfYSize = psExtraArg->dfYSize; |
599 | 0 | } |
600 | | |
601 | | /* -------------------------------------------------------------------- */ |
602 | | /* Compute stepping increment. */ |
603 | | /* -------------------------------------------------------------------- */ |
604 | 0 | const double dfSrcXInc = dfXSize / static_cast<double>(nBufXSize); |
605 | 0 | const double dfSrcYInc = dfYSize / static_cast<double>(nBufYSize); |
606 | 0 | CPLErr eErr = CE_None; |
607 | |
|
608 | 0 | if (eRWFlag == GF_Write) |
609 | 0 | { |
610 | | /* -------------------------------------------------------------------- |
611 | | */ |
612 | | /* Write case */ |
613 | | /* Loop over raster window computing source locations in the buffer. |
614 | | */ |
615 | | /* -------------------------------------------------------------------- |
616 | | */ |
617 | 0 | GByte *pabyDstBlock = nullptr; |
618 | 0 | int nLBlockX = -1; |
619 | 0 | int nLBlockY = -1; |
620 | |
|
621 | 0 | for (int iDstY = nYOff; iDstY < nYOff + nYSize; iDstY++) |
622 | 0 | { |
623 | 0 | const int iBufYOff = static_cast<int>((iDstY - nYOff) / dfSrcYInc); |
624 | |
|
625 | 0 | for (int iDstX = nXOff; iDstX < nXOff + nXSize; iDstX++) |
626 | 0 | { |
627 | 0 | const int iBufXOff = |
628 | 0 | static_cast<int>((iDstX - nXOff) / dfSrcXInc); |
629 | 0 | GPtrDiff_t iBufOffset = |
630 | 0 | static_cast<GPtrDiff_t>(iBufYOff) * |
631 | 0 | static_cast<GPtrDiff_t>(nLineSpace) + |
632 | 0 | iBufXOff * static_cast<GPtrDiff_t>(nPixelSpace); |
633 | | |
634 | | // FIXME: this code likely doesn't work if the dirty block gets |
635 | | // flushed to disk before being completely written. |
636 | | // In the meantime, bJustInitialize should probably be set to |
637 | | // FALSE even if it is not ideal performance wise, and for |
638 | | // lossy compression. |
639 | | |
640 | | /* -------------------------------------------------------------------- |
641 | | */ |
642 | | /* Ensure we have the appropriate block loaded. */ |
643 | | /* -------------------------------------------------------------------- |
644 | | */ |
645 | 0 | if (iDstX < nLBlockX * nBlockXSize || |
646 | 0 | iDstX - nBlockXSize >= nLBlockX * nBlockXSize || |
647 | 0 | iDstY < nLBlockY * nBlockYSize || |
648 | 0 | iDstY - nBlockYSize >= nLBlockY * nBlockYSize) |
649 | 0 | { |
650 | 0 | nLBlockX = iDstX / nBlockXSize; |
651 | 0 | nLBlockY = iDstY / nBlockYSize; |
652 | |
|
653 | 0 | const bool bJustInitialize = |
654 | 0 | nYOff <= nLBlockY * nBlockYSize && |
655 | 0 | nYOff + nYSize - nBlockYSize >= |
656 | 0 | nLBlockY * nBlockYSize && |
657 | 0 | nXOff <= nLBlockX * nBlockXSize && |
658 | 0 | nXOff + nXSize - nBlockXSize >= nLBlockX * nBlockXSize; |
659 | | /*bool bMemZeroBuffer = FALSE; |
660 | | if( !bJustInitialize && |
661 | | nXOff <= nLBlockX * nBlockXSize && |
662 | | nYOff <= nLBlockY * nBlockYSize && |
663 | | (nXOff + nXSize >= (nLBlockX+1) * nBlockXSize || |
664 | | (nXOff + nXSize == GetXSize() && |
665 | | (nLBlockX+1) * nBlockXSize > GetXSize())) && |
666 | | (nYOff + nYSize >= (nLBlockY+1) * nBlockYSize || |
667 | | (nYOff + nYSize == GetYSize() && |
668 | | (nLBlockY+1) * nBlockYSize > GetYSize())) ) |
669 | | { |
670 | | bJustInitialize = TRUE; |
671 | | bMemZeroBuffer = TRUE; |
672 | | }*/ |
673 | 0 | if (poBlock != nullptr) |
674 | 0 | poBlock->DropLock(); |
675 | |
|
676 | 0 | poBlock = |
677 | 0 | GetLockedBlockRef(nLBlockX, nLBlockY, bJustInitialize); |
678 | 0 | if (poBlock == nullptr) |
679 | 0 | { |
680 | 0 | return (CE_Failure); |
681 | 0 | } |
682 | | |
683 | 0 | poBlock->MarkDirty(); |
684 | |
|
685 | 0 | pabyDstBlock = static_cast<GByte *>(poBlock->GetDataRef()); |
686 | | /*if( bMemZeroBuffer ) |
687 | | { |
688 | | memset(pabyDstBlock, 0, |
689 | | static_cast<GPtrDiff_t>(nBandDataSize) * nBlockXSize |
690 | | * nBlockYSize); |
691 | | }*/ |
692 | 0 | } |
693 | | |
694 | | // To make Coverity happy. Should not happen by design. |
695 | 0 | if (pabyDstBlock == nullptr) |
696 | 0 | { |
697 | 0 | CPLAssert(false); |
698 | 0 | eErr = CE_Failure; |
699 | 0 | break; |
700 | 0 | } |
701 | | |
702 | | /* -------------------------------------------------------------------- |
703 | | */ |
704 | | /* Copy over this pixel of data. */ |
705 | | /* -------------------------------------------------------------------- |
706 | | */ |
707 | 0 | GPtrDiff_t iDstOffset = |
708 | 0 | (static_cast<GPtrDiff_t>(iDstX) - |
709 | 0 | static_cast<GPtrDiff_t>(nLBlockX) * nBlockXSize + |
710 | 0 | (static_cast<GPtrDiff_t>(iDstY) - |
711 | 0 | static_cast<GPtrDiff_t>(nLBlockY) * nBlockYSize) * |
712 | 0 | nBlockXSize) * |
713 | 0 | nBandDataSize; |
714 | |
|
715 | 0 | if (eDataType == eBufType) |
716 | 0 | { |
717 | 0 | memcpy(pabyDstBlock + iDstOffset, |
718 | 0 | static_cast<GByte *>(pData) + iBufOffset, |
719 | 0 | nBandDataSize); |
720 | 0 | } |
721 | 0 | else |
722 | 0 | { |
723 | | /* type to type conversion ... ouch, this is expensive way |
724 | | of handling single words */ |
725 | 0 | GDALCopyWords64(static_cast<GByte *>(pData) + iBufOffset, |
726 | 0 | eBufType, 0, pabyDstBlock + iDstOffset, |
727 | 0 | eDataType, 0, 1); |
728 | 0 | } |
729 | 0 | } |
730 | | |
731 | 0 | if (psExtraArg->pfnProgress != nullptr && |
732 | 0 | !psExtraArg->pfnProgress(1.0 * (iDstY - nYOff + 1) / nYSize, "", |
733 | 0 | psExtraArg->pProgressData)) |
734 | 0 | { |
735 | 0 | eErr = CE_Failure; |
736 | 0 | break; |
737 | 0 | } |
738 | 0 | } |
739 | 0 | } |
740 | 0 | else |
741 | 0 | { |
742 | 0 | if (psExtraArg->eResampleAlg != GRIORA_NearestNeighbour) |
743 | 0 | { |
744 | 0 | if ((psExtraArg->eResampleAlg == GRIORA_Cubic || |
745 | 0 | psExtraArg->eResampleAlg == GRIORA_CubicSpline || |
746 | 0 | psExtraArg->eResampleAlg == GRIORA_Bilinear || |
747 | 0 | psExtraArg->eResampleAlg == GRIORA_Lanczos) && |
748 | 0 | GetColorTable() != nullptr) |
749 | 0 | { |
750 | 0 | CPLError(CE_Warning, CPLE_NotSupported, |
751 | 0 | "Resampling method not supported on paletted band. " |
752 | 0 | "Falling back to nearest neighbour"); |
753 | 0 | } |
754 | 0 | else if (psExtraArg->eResampleAlg == GRIORA_Gauss && |
755 | 0 | GDALDataTypeIsComplex(eDataType)) |
756 | 0 | { |
757 | 0 | CPLError(CE_Warning, CPLE_NotSupported, |
758 | 0 | "Resampling method not supported on complex data type " |
759 | 0 | "band. Falling back to nearest neighbour"); |
760 | 0 | } |
761 | 0 | else |
762 | 0 | { |
763 | 0 | return RasterIOResampled(eRWFlag, nXOff, nYOff, nXSize, nYSize, |
764 | 0 | pData, nBufXSize, nBufYSize, eBufType, |
765 | 0 | nPixelSpace, nLineSpace, psExtraArg); |
766 | 0 | } |
767 | 0 | } |
768 | | |
769 | 0 | int nLimitBlockY = 0; |
770 | 0 | const bool bByteCopy = eDataType == eBufType && nBandDataSize == 1; |
771 | 0 | int nStartBlockX = -nBlockXSize; |
772 | 0 | const double EPS = 1e-10; |
773 | 0 | int nLBlockY = -1; |
774 | 0 | const double dfSrcXStart = 0.5 * dfSrcXInc + dfXOff + EPS; |
775 | 0 | const bool bIntegerXFactor = |
776 | 0 | bUseIntegerRequestCoords && |
777 | 0 | static_cast<int>(dfSrcXInc) == dfSrcXInc && |
778 | 0 | static_cast<int>(dfSrcXInc) < INT_MAX / nBandDataSize; |
779 | | |
780 | | /* -------------------------------------------------------------------- |
781 | | */ |
782 | | /* Read case */ |
783 | | /* Loop over buffer computing source locations. */ |
784 | | /* -------------------------------------------------------------------- |
785 | | */ |
786 | 0 | for (int iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff++) |
787 | 0 | { |
788 | | // Add small epsilon to avoid some numeric precision issues. |
789 | 0 | const double dfSrcY = (iBufYOff + 0.5) * dfSrcYInc + dfYOff + EPS; |
790 | 0 | const int iSrcY = static_cast<int>(std::min( |
791 | 0 | std::max(0.0, dfSrcY), static_cast<double>(nRasterYSize - 1))); |
792 | |
|
793 | 0 | GPtrDiff_t iBufOffset = static_cast<GPtrDiff_t>(iBufYOff) * |
794 | 0 | static_cast<GPtrDiff_t>(nLineSpace); |
795 | |
|
796 | 0 | if (iSrcY >= nLimitBlockY) |
797 | 0 | { |
798 | 0 | nLBlockY = iSrcY / nBlockYSize; |
799 | 0 | nLimitBlockY = nLBlockY * nBlockYSize; |
800 | 0 | if (nLimitBlockY < INT_MAX - nBlockYSize) |
801 | 0 | nLimitBlockY += nBlockYSize; |
802 | 0 | else |
803 | 0 | nLimitBlockY = INT_MAX; |
804 | | // Make sure a new block is loaded. |
805 | 0 | nStartBlockX = -nBlockXSize; |
806 | 0 | } |
807 | 0 | else if (static_cast<int>(dfSrcXStart) < nStartBlockX) |
808 | 0 | { |
809 | | // Make sure a new block is loaded. |
810 | 0 | nStartBlockX = -nBlockXSize; |
811 | 0 | } |
812 | |
|
813 | 0 | GPtrDiff_t iSrcOffsetCst = (iSrcY - nLBlockY * nBlockYSize) * |
814 | 0 | static_cast<GPtrDiff_t>(nBlockXSize); |
815 | |
|
816 | 0 | if (bIntegerXFactor) |
817 | 0 | { |
818 | 0 | int iSrcX = static_cast<int>(dfSrcXStart); |
819 | 0 | const int nSrcXInc = static_cast<int>(dfSrcXInc); |
820 | 0 | GByte *pabyDstData = static_cast<GByte *>(pData) + iBufOffset; |
821 | 0 | bool bRet = false; |
822 | 0 | if (bByteCopy) |
823 | 0 | { |
824 | 0 | bRet = DownsamplingIntegerXFactor<true, 1>( |
825 | 0 | this, iSrcX, nSrcXInc, iSrcOffsetCst, pabyDstData, |
826 | 0 | static_cast<int>(nPixelSpace), nBufXSize, GDT_Byte, |
827 | 0 | GDT_Byte, nStartBlockX, nBlockXSize, poBlock, nLBlockY); |
828 | 0 | } |
829 | 0 | else if (eDataType == eBufType) |
830 | 0 | { |
831 | 0 | switch (nBandDataSize) |
832 | 0 | { |
833 | 0 | case 2: |
834 | 0 | bRet = DownsamplingIntegerXFactor<true, 2>( |
835 | 0 | this, iSrcX, nSrcXInc, iSrcOffsetCst, |
836 | 0 | pabyDstData, static_cast<int>(nPixelSpace), |
837 | 0 | nBufXSize, eDataType, eDataType, nStartBlockX, |
838 | 0 | nBlockXSize, poBlock, nLBlockY); |
839 | 0 | break; |
840 | 0 | case 4: |
841 | 0 | bRet = DownsamplingIntegerXFactor<true, 4>( |
842 | 0 | this, iSrcX, nSrcXInc, iSrcOffsetCst, |
843 | 0 | pabyDstData, static_cast<int>(nPixelSpace), |
844 | 0 | nBufXSize, eDataType, eDataType, nStartBlockX, |
845 | 0 | nBlockXSize, poBlock, nLBlockY); |
846 | 0 | break; |
847 | 0 | case 8: |
848 | 0 | bRet = DownsamplingIntegerXFactor<true, 8>( |
849 | 0 | this, iSrcX, nSrcXInc, iSrcOffsetCst, |
850 | 0 | pabyDstData, static_cast<int>(nPixelSpace), |
851 | 0 | nBufXSize, eDataType, eDataType, nStartBlockX, |
852 | 0 | nBlockXSize, poBlock, nLBlockY); |
853 | 0 | break; |
854 | 0 | case 16: |
855 | 0 | bRet = DownsamplingIntegerXFactor<true, 16>( |
856 | 0 | this, iSrcX, nSrcXInc, iSrcOffsetCst, |
857 | 0 | pabyDstData, static_cast<int>(nPixelSpace), |
858 | 0 | nBufXSize, eDataType, eDataType, nStartBlockX, |
859 | 0 | nBlockXSize, poBlock, nLBlockY); |
860 | 0 | break; |
861 | 0 | default: |
862 | 0 | CPLAssert(false); |
863 | 0 | break; |
864 | 0 | } |
865 | 0 | } |
866 | 0 | else |
867 | 0 | { |
868 | 0 | bRet = DownsamplingIntegerXFactor<false, 0>( |
869 | 0 | this, iSrcX, nSrcXInc, iSrcOffsetCst, pabyDstData, |
870 | 0 | static_cast<int>(nPixelSpace), nBufXSize, eDataType, |
871 | 0 | eBufType, nStartBlockX, nBlockXSize, poBlock, nLBlockY); |
872 | 0 | } |
873 | 0 | if (!bRet) |
874 | 0 | eErr = CE_Failure; |
875 | 0 | } |
876 | 0 | else |
877 | 0 | { |
878 | 0 | double dfSrcX = dfSrcXStart; |
879 | 0 | for (int iBufXOff = 0; iBufXOff < nBufXSize; |
880 | 0 | iBufXOff++, dfSrcX += dfSrcXInc) |
881 | 0 | { |
882 | | // TODO?: try to avoid the clamping for most iterations |
883 | 0 | const int iSrcX = static_cast<int>( |
884 | 0 | std::min(std::max(0.0, dfSrcX), |
885 | 0 | static_cast<double>(nRasterXSize - 1))); |
886 | | |
887 | | /* -------------------------------------------------------------------- |
888 | | */ |
889 | | /* Ensure we have the appropriate block loaded. */ |
890 | | /* -------------------------------------------------------------------- |
891 | | */ |
892 | 0 | if (iSrcX >= nBlockXSize + nStartBlockX) |
893 | 0 | { |
894 | 0 | const int nLBlockX = iSrcX / nBlockXSize; |
895 | 0 | nStartBlockX = nLBlockX * nBlockXSize; |
896 | |
|
897 | 0 | if (poBlock != nullptr) |
898 | 0 | poBlock->DropLock(); |
899 | |
|
900 | 0 | poBlock = GetLockedBlockRef(nLBlockX, nLBlockY, FALSE); |
901 | 0 | if (poBlock == nullptr) |
902 | 0 | { |
903 | 0 | eErr = CE_Failure; |
904 | 0 | break; |
905 | 0 | } |
906 | | |
907 | 0 | pabySrcBlock = |
908 | 0 | static_cast<GByte *>(poBlock->GetDataRef()); |
909 | 0 | } |
910 | 0 | const GPtrDiff_t nDiffX = |
911 | 0 | static_cast<GPtrDiff_t>(iSrcX - nStartBlockX); |
912 | | |
913 | | /* -------------------------------------------------------------------- |
914 | | */ |
915 | | /* Copy over this pixel of data. */ |
916 | | /* -------------------------------------------------------------------- |
917 | | */ |
918 | |
|
919 | 0 | if (bByteCopy) |
920 | 0 | { |
921 | 0 | GPtrDiff_t iSrcOffset = nDiffX + iSrcOffsetCst; |
922 | 0 | static_cast<GByte *>(pData)[iBufOffset] = |
923 | 0 | pabySrcBlock[iSrcOffset]; |
924 | 0 | } |
925 | 0 | else if (eDataType == eBufType) |
926 | 0 | { |
927 | 0 | GPtrDiff_t iSrcOffset = |
928 | 0 | (nDiffX + iSrcOffsetCst) * nBandDataSize; |
929 | 0 | memcpy(static_cast<GByte *>(pData) + iBufOffset, |
930 | 0 | pabySrcBlock + iSrcOffset, nBandDataSize); |
931 | 0 | } |
932 | 0 | else |
933 | 0 | { |
934 | | // Type to type conversion ... |
935 | 0 | GPtrDiff_t iSrcOffset = |
936 | 0 | (nDiffX + iSrcOffsetCst) * nBandDataSize; |
937 | 0 | GDALCopyWords64(pabySrcBlock + iSrcOffset, eDataType, 0, |
938 | 0 | static_cast<GByte *>(pData) + |
939 | 0 | iBufOffset, |
940 | 0 | eBufType, 0, 1); |
941 | 0 | } |
942 | |
|
943 | 0 | iBufOffset += static_cast<int>(nPixelSpace); |
944 | 0 | } |
945 | 0 | } |
946 | 0 | if (eErr == CE_Failure) |
947 | 0 | break; |
948 | | |
949 | 0 | if (psExtraArg->pfnProgress != nullptr && |
950 | 0 | !psExtraArg->pfnProgress(1.0 * (iBufYOff + 1) / nBufYSize, "", |
951 | 0 | psExtraArg->pProgressData)) |
952 | 0 | { |
953 | 0 | eErr = CE_Failure; |
954 | 0 | break; |
955 | 0 | } |
956 | 0 | } |
957 | 0 | } |
958 | | |
959 | 0 | if (poBlock != nullptr) |
960 | 0 | poBlock->DropLock(); |
961 | |
|
962 | 0 | return eErr; |
963 | 0 | } |
964 | | |
965 | | /************************************************************************/ |
966 | | /* GDALRasterIOTransformer() */ |
967 | | /************************************************************************/ |
968 | | |
969 | | struct GDALRasterIOTransformerStruct |
970 | | { |
971 | | double dfXOff; |
972 | | double dfYOff; |
973 | | double dfXRatioDstToSrc; |
974 | | double dfYRatioDstToSrc; |
975 | | }; |
976 | | |
977 | | static int GDALRasterIOTransformer(void *pTransformerArg, int bDstToSrc, |
978 | | int nPointCount, double *x, double *y, |
979 | | double * /* z */, int *panSuccess) |
980 | 0 | { |
981 | 0 | GDALRasterIOTransformerStruct *psParams = |
982 | 0 | static_cast<GDALRasterIOTransformerStruct *>(pTransformerArg); |
983 | 0 | if (bDstToSrc) |
984 | 0 | { |
985 | 0 | for (int i = 0; i < nPointCount; i++) |
986 | 0 | { |
987 | 0 | x[i] = x[i] * psParams->dfXRatioDstToSrc + psParams->dfXOff; |
988 | 0 | y[i] = y[i] * psParams->dfYRatioDstToSrc + psParams->dfYOff; |
989 | 0 | panSuccess[i] = TRUE; |
990 | 0 | } |
991 | 0 | } |
992 | 0 | else |
993 | 0 | { |
994 | 0 | for (int i = 0; i < nPointCount; i++) |
995 | 0 | { |
996 | 0 | x[i] = (x[i] - psParams->dfXOff) / psParams->dfXRatioDstToSrc; |
997 | 0 | y[i] = (y[i] - psParams->dfYOff) / psParams->dfYRatioDstToSrc; |
998 | 0 | panSuccess[i] = TRUE; |
999 | 0 | } |
1000 | 0 | } |
1001 | 0 | return TRUE; |
1002 | 0 | } |
1003 | | |
1004 | | /************************************************************************/ |
1005 | | /* RasterIOResampled() */ |
1006 | | /************************************************************************/ |
1007 | | |
1008 | | //! @cond Doxygen_Suppress |
1009 | | CPLErr GDALRasterBand::RasterIOResampled( |
1010 | | GDALRWFlag /* eRWFlag */, int nXOff, int nYOff, int nXSize, int nYSize, |
1011 | | void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType, |
1012 | | GSpacing nPixelSpace, GSpacing nLineSpace, GDALRasterIOExtraArg *psExtraArg) |
1013 | 0 | { |
1014 | | // Determine if we use warping resampling or overview resampling |
1015 | 0 | const bool bUseWarp = |
1016 | 0 | (GDALDataTypeIsComplex(eDataType) && |
1017 | 0 | psExtraArg->eResampleAlg != GRIORA_NearestNeighbour && |
1018 | 0 | psExtraArg->eResampleAlg != GRIORA_Mode); |
1019 | |
|
1020 | 0 | double dfXOff = nXOff; |
1021 | 0 | double dfYOff = nYOff; |
1022 | 0 | double dfXSize = nXSize; |
1023 | 0 | double dfYSize = nYSize; |
1024 | 0 | if (psExtraArg->bFloatingPointWindowValidity) |
1025 | 0 | { |
1026 | 0 | dfXOff = psExtraArg->dfXOff; |
1027 | 0 | dfYOff = psExtraArg->dfYOff; |
1028 | 0 | dfXSize = psExtraArg->dfXSize; |
1029 | 0 | dfYSize = psExtraArg->dfYSize; |
1030 | 0 | } |
1031 | |
|
1032 | 0 | const double dfXRatioDstToSrc = dfXSize / nBufXSize; |
1033 | 0 | const double dfYRatioDstToSrc = dfYSize / nBufYSize; |
1034 | | |
1035 | | // Determine the coordinates in the "virtual" output raster to see |
1036 | | // if there are not integers, in which case we will use them as a shift |
1037 | | // so that subwindow extracts give the exact same results as entire raster |
1038 | | // scaling. |
1039 | 0 | double dfDestXOff = dfXOff / dfXRatioDstToSrc; |
1040 | 0 | bool bHasXOffVirtual = false; |
1041 | 0 | int nDestXOffVirtual = 0; |
1042 | 0 | if (fabs(dfDestXOff - static_cast<int>(dfDestXOff + 0.5)) < 1e-8) |
1043 | 0 | { |
1044 | 0 | bHasXOffVirtual = true; |
1045 | 0 | dfXOff = nXOff; |
1046 | 0 | nDestXOffVirtual = static_cast<int>(dfDestXOff + 0.5); |
1047 | 0 | } |
1048 | |
|
1049 | 0 | double dfDestYOff = dfYOff / dfYRatioDstToSrc; |
1050 | 0 | bool bHasYOffVirtual = false; |
1051 | 0 | int nDestYOffVirtual = 0; |
1052 | 0 | if (fabs(dfDestYOff - static_cast<int>(dfDestYOff + 0.5)) < 1e-8) |
1053 | 0 | { |
1054 | 0 | bHasYOffVirtual = true; |
1055 | 0 | dfYOff = nYOff; |
1056 | 0 | nDestYOffVirtual = static_cast<int>(dfDestYOff + 0.5); |
1057 | 0 | } |
1058 | | |
1059 | | // Create a MEM dataset that wraps the output buffer. |
1060 | 0 | GDALDataset *poMEMDS; |
1061 | 0 | void *pTempBuffer = nullptr; |
1062 | 0 | GSpacing nPSMem = nPixelSpace; |
1063 | 0 | GSpacing nLSMem = nLineSpace; |
1064 | 0 | void *pDataMem = pData; |
1065 | 0 | GDALDataType eDTMem = eBufType; |
1066 | 0 | if (eBufType != eDataType) |
1067 | 0 | { |
1068 | 0 | nPSMem = GDALGetDataTypeSizeBytes(eDataType); |
1069 | 0 | nLSMem = nPSMem * nBufXSize; |
1070 | 0 | pTempBuffer = |
1071 | 0 | VSI_MALLOC2_VERBOSE(nBufYSize, static_cast<size_t>(nLSMem)); |
1072 | 0 | if (pTempBuffer == nullptr) |
1073 | 0 | return CE_Failure; |
1074 | 0 | pDataMem = pTempBuffer; |
1075 | 0 | eDTMem = eDataType; |
1076 | 0 | } |
1077 | | |
1078 | 0 | poMEMDS = |
1079 | 0 | MEMDataset::Create("", nDestXOffVirtual + nBufXSize, |
1080 | 0 | nDestYOffVirtual + nBufYSize, 0, eDTMem, nullptr); |
1081 | 0 | GByte *pabyData = static_cast<GByte *>(pDataMem) - |
1082 | 0 | nPSMem * nDestXOffVirtual - nLSMem * nDestYOffVirtual; |
1083 | 0 | GDALRasterBandH hMEMBand = MEMCreateRasterBandEx( |
1084 | 0 | poMEMDS, 1, pabyData, eDTMem, nPSMem, nLSMem, false); |
1085 | 0 | poMEMDS->SetBand(1, GDALRasterBand::FromHandle(hMEMBand)); |
1086 | |
|
1087 | 0 | const char *pszNBITS = GetMetadataItem("NBITS", "IMAGE_STRUCTURE"); |
1088 | 0 | const int nNBITS = pszNBITS ? atoi(pszNBITS) : 0; |
1089 | 0 | if (pszNBITS) |
1090 | 0 | GDALRasterBand::FromHandle(hMEMBand)->SetMetadataItem( |
1091 | 0 | "NBITS", pszNBITS, "IMAGE_STRUCTURE"); |
1092 | |
|
1093 | 0 | CPLErr eErr = CE_None; |
1094 | | |
1095 | | // Do the resampling. |
1096 | 0 | if (bUseWarp) |
1097 | 0 | { |
1098 | 0 | int bHasNoData = FALSE; |
1099 | 0 | double dfNoDataValue = GetNoDataValue(&bHasNoData); |
1100 | |
|
1101 | 0 | VRTDatasetH hVRTDS = nullptr; |
1102 | 0 | GDALRasterBandH hVRTBand = nullptr; |
1103 | 0 | if (GetDataset() == nullptr) |
1104 | 0 | { |
1105 | | /* Create VRT dataset that wraps the whole dataset */ |
1106 | 0 | hVRTDS = VRTCreate(nRasterXSize, nRasterYSize); |
1107 | 0 | VRTAddBand(hVRTDS, eDataType, nullptr); |
1108 | 0 | hVRTBand = GDALGetRasterBand(hVRTDS, 1); |
1109 | 0 | VRTAddSimpleSource(hVRTBand, this, 0, 0, nRasterXSize, nRasterYSize, |
1110 | 0 | 0, 0, nRasterXSize, nRasterYSize, nullptr, |
1111 | 0 | VRT_NODATA_UNSET); |
1112 | | |
1113 | | /* Add a mask band if needed */ |
1114 | 0 | if (GetMaskFlags() != GMF_ALL_VALID) |
1115 | 0 | { |
1116 | 0 | GDALDataset::FromHandle(hVRTDS)->CreateMaskBand(0); |
1117 | 0 | VRTSourcedRasterBand *poVRTMaskBand = |
1118 | 0 | reinterpret_cast<VRTSourcedRasterBand *>( |
1119 | 0 | reinterpret_cast<GDALRasterBand *>(hVRTBand) |
1120 | 0 | ->GetMaskBand()); |
1121 | 0 | poVRTMaskBand->AddMaskBandSource(this, 0, 0, nRasterXSize, |
1122 | 0 | nRasterYSize, 0, 0, |
1123 | 0 | nRasterXSize, nRasterYSize); |
1124 | 0 | } |
1125 | 0 | } |
1126 | |
|
1127 | 0 | GDALWarpOptions *psWarpOptions = GDALCreateWarpOptions(); |
1128 | 0 | switch (psExtraArg->eResampleAlg) |
1129 | 0 | { |
1130 | 0 | case GRIORA_NearestNeighbour: |
1131 | 0 | psWarpOptions->eResampleAlg = GRA_NearestNeighbour; |
1132 | 0 | break; |
1133 | 0 | case GRIORA_Bilinear: |
1134 | 0 | psWarpOptions->eResampleAlg = GRA_Bilinear; |
1135 | 0 | break; |
1136 | 0 | case GRIORA_Cubic: |
1137 | 0 | psWarpOptions->eResampleAlg = GRA_Cubic; |
1138 | 0 | break; |
1139 | 0 | case GRIORA_CubicSpline: |
1140 | 0 | psWarpOptions->eResampleAlg = GRA_CubicSpline; |
1141 | 0 | break; |
1142 | 0 | case GRIORA_Lanczos: |
1143 | 0 | psWarpOptions->eResampleAlg = GRA_Lanczos; |
1144 | 0 | break; |
1145 | 0 | case GRIORA_Average: |
1146 | 0 | psWarpOptions->eResampleAlg = GRA_Average; |
1147 | 0 | break; |
1148 | 0 | case GRIORA_RMS: |
1149 | 0 | psWarpOptions->eResampleAlg = GRA_RMS; |
1150 | 0 | break; |
1151 | 0 | case GRIORA_Mode: |
1152 | 0 | psWarpOptions->eResampleAlg = GRA_Mode; |
1153 | 0 | break; |
1154 | 0 | default: |
1155 | 0 | CPLAssert(false); |
1156 | 0 | psWarpOptions->eResampleAlg = GRA_NearestNeighbour; |
1157 | 0 | break; |
1158 | 0 | } |
1159 | 0 | psWarpOptions->hSrcDS = hVRTDS ? hVRTDS : GetDataset(); |
1160 | 0 | psWarpOptions->hDstDS = poMEMDS; |
1161 | 0 | psWarpOptions->nBandCount = 1; |
1162 | 0 | int nSrcBandNumber = hVRTDS ? 1 : nBand; |
1163 | 0 | int nDstBandNumber = 1; |
1164 | 0 | psWarpOptions->panSrcBands = &nSrcBandNumber; |
1165 | 0 | psWarpOptions->panDstBands = &nDstBandNumber; |
1166 | 0 | psWarpOptions->pfnProgress = psExtraArg->pfnProgress |
1167 | 0 | ? psExtraArg->pfnProgress |
1168 | 0 | : GDALDummyProgress; |
1169 | 0 | psWarpOptions->pProgressArg = psExtraArg->pProgressData; |
1170 | 0 | psWarpOptions->pfnTransformer = GDALRasterIOTransformer; |
1171 | 0 | if (bHasNoData) |
1172 | 0 | { |
1173 | 0 | psWarpOptions->papszWarpOptions = CSLSetNameValue( |
1174 | 0 | psWarpOptions->papszWarpOptions, "INIT_DEST", "NO_DATA"); |
1175 | 0 | if (psWarpOptions->padfSrcNoDataReal == nullptr) |
1176 | 0 | { |
1177 | 0 | psWarpOptions->padfSrcNoDataReal = |
1178 | 0 | static_cast<double *>(CPLMalloc(sizeof(double))); |
1179 | 0 | psWarpOptions->padfSrcNoDataReal[0] = dfNoDataValue; |
1180 | 0 | } |
1181 | |
|
1182 | 0 | if (psWarpOptions->padfDstNoDataReal == nullptr) |
1183 | 0 | { |
1184 | 0 | psWarpOptions->padfDstNoDataReal = |
1185 | 0 | static_cast<double *>(CPLMalloc(sizeof(double))); |
1186 | 0 | psWarpOptions->padfDstNoDataReal[0] = dfNoDataValue; |
1187 | 0 | } |
1188 | 0 | } |
1189 | |
|
1190 | 0 | GDALRasterIOTransformerStruct sTransformer; |
1191 | 0 | sTransformer.dfXOff = bHasXOffVirtual ? 0 : dfXOff; |
1192 | 0 | sTransformer.dfYOff = bHasYOffVirtual ? 0 : dfYOff; |
1193 | 0 | sTransformer.dfXRatioDstToSrc = dfXRatioDstToSrc; |
1194 | 0 | sTransformer.dfYRatioDstToSrc = dfYRatioDstToSrc; |
1195 | 0 | psWarpOptions->pTransformerArg = &sTransformer; |
1196 | |
|
1197 | 0 | GDALWarpOperationH hWarpOperation = |
1198 | 0 | GDALCreateWarpOperation(psWarpOptions); |
1199 | 0 | eErr = GDALChunkAndWarpImage(hWarpOperation, nDestXOffVirtual, |
1200 | 0 | nDestYOffVirtual, nBufXSize, nBufYSize); |
1201 | 0 | GDALDestroyWarpOperation(hWarpOperation); |
1202 | |
|
1203 | 0 | psWarpOptions->panSrcBands = nullptr; |
1204 | 0 | psWarpOptions->panDstBands = nullptr; |
1205 | 0 | GDALDestroyWarpOptions(psWarpOptions); |
1206 | |
|
1207 | 0 | if (hVRTDS) |
1208 | 0 | GDALClose(hVRTDS); |
1209 | 0 | } |
1210 | 0 | else |
1211 | 0 | { |
1212 | 0 | const char *pszResampling = |
1213 | 0 | (psExtraArg->eResampleAlg == GRIORA_Bilinear) ? "BILINEAR" |
1214 | 0 | : (psExtraArg->eResampleAlg == GRIORA_Cubic) ? "CUBIC" |
1215 | 0 | : (psExtraArg->eResampleAlg == GRIORA_CubicSpline) ? "CUBICSPLINE" |
1216 | 0 | : (psExtraArg->eResampleAlg == GRIORA_Lanczos) ? "LANCZOS" |
1217 | 0 | : (psExtraArg->eResampleAlg == GRIORA_Average) ? "AVERAGE" |
1218 | 0 | : (psExtraArg->eResampleAlg == GRIORA_RMS) ? "RMS" |
1219 | 0 | : (psExtraArg->eResampleAlg == GRIORA_Mode) ? "MODE" |
1220 | 0 | : (psExtraArg->eResampleAlg == GRIORA_Gauss) ? "GAUSS" |
1221 | 0 | : "UNKNOWN"; |
1222 | |
|
1223 | 0 | int nKernelRadius = 0; |
1224 | 0 | GDALResampleFunction pfnResampleFunc = |
1225 | 0 | GDALGetResampleFunction(pszResampling, &nKernelRadius); |
1226 | 0 | CPLAssert(pfnResampleFunc); |
1227 | 0 | GDALDataType eWrkDataType = |
1228 | 0 | GDALGetOvrWorkDataType(pszResampling, eDataType); |
1229 | 0 | int nHasNoData = 0; |
1230 | 0 | double dfNoDataValue = GetNoDataValue(&nHasNoData); |
1231 | 0 | const bool bHasNoData = CPL_TO_BOOL(nHasNoData); |
1232 | 0 | if (!bHasNoData) |
1233 | 0 | dfNoDataValue = 0.0; |
1234 | |
|
1235 | 0 | int nDstBlockXSize = nBufXSize; |
1236 | 0 | int nDstBlockYSize = nBufYSize; |
1237 | 0 | int nFullResXChunk = 0; |
1238 | 0 | int nFullResYChunk = 0; |
1239 | 0 | while (true) |
1240 | 0 | { |
1241 | 0 | nFullResXChunk = |
1242 | 0 | 3 + static_cast<int>(nDstBlockXSize * dfXRatioDstToSrc); |
1243 | 0 | nFullResYChunk = |
1244 | 0 | 3 + static_cast<int>(nDstBlockYSize * dfYRatioDstToSrc); |
1245 | 0 | if (nFullResXChunk > nRasterXSize) |
1246 | 0 | nFullResXChunk = nRasterXSize; |
1247 | 0 | if (nFullResYChunk > nRasterYSize) |
1248 | 0 | nFullResYChunk = nRasterYSize; |
1249 | 0 | if ((nDstBlockXSize == 1 && nDstBlockYSize == 1) || |
1250 | 0 | (static_cast<GIntBig>(nFullResXChunk) * nFullResYChunk <= |
1251 | 0 | 1024 * 1024)) |
1252 | 0 | break; |
1253 | | // When operating on the full width of a raster whose block width is |
1254 | | // the raster width, prefer doing chunks in height. |
1255 | 0 | if (nFullResXChunk >= nXSize && nXSize == nBlockXSize && |
1256 | 0 | nDstBlockYSize > 1) |
1257 | 0 | nDstBlockYSize /= 2; |
1258 | | /* Otherwise cut the maximal dimension */ |
1259 | 0 | else if (nDstBlockXSize > 1 && |
1260 | 0 | (nFullResXChunk > nFullResYChunk || nDstBlockYSize == 1)) |
1261 | 0 | nDstBlockXSize /= 2; |
1262 | 0 | else |
1263 | 0 | nDstBlockYSize /= 2; |
1264 | 0 | } |
1265 | |
|
1266 | 0 | int nOvrXFactor = static_cast<int>(0.5 + dfXRatioDstToSrc); |
1267 | 0 | int nOvrYFactor = static_cast<int>(0.5 + dfYRatioDstToSrc); |
1268 | 0 | if (nOvrXFactor == 0) |
1269 | 0 | nOvrXFactor = 1; |
1270 | 0 | if (nOvrYFactor == 0) |
1271 | 0 | nOvrYFactor = 1; |
1272 | 0 | int nFullResXSizeQueried = |
1273 | 0 | nFullResXChunk + 2 * nKernelRadius * nOvrXFactor; |
1274 | 0 | int nFullResYSizeQueried = |
1275 | 0 | nFullResYChunk + 2 * nKernelRadius * nOvrYFactor; |
1276 | |
|
1277 | 0 | if (nFullResXSizeQueried > nRasterXSize) |
1278 | 0 | nFullResXSizeQueried = nRasterXSize; |
1279 | 0 | if (nFullResYSizeQueried > nRasterYSize) |
1280 | 0 | nFullResYSizeQueried = nRasterYSize; |
1281 | |
|
1282 | 0 | void *pChunk = |
1283 | 0 | VSI_MALLOC3_VERBOSE(GDALGetDataTypeSizeBytes(eWrkDataType), |
1284 | 0 | nFullResXSizeQueried, nFullResYSizeQueried); |
1285 | 0 | GByte *pabyChunkNoDataMask = nullptr; |
1286 | |
|
1287 | 0 | GDALRasterBand *poMaskBand = GetMaskBand(); |
1288 | 0 | int l_nMaskFlags = GetMaskFlags(); |
1289 | |
|
1290 | 0 | bool bUseNoDataMask = ((l_nMaskFlags & GMF_ALL_VALID) == 0); |
1291 | 0 | if (bUseNoDataMask) |
1292 | 0 | { |
1293 | 0 | pabyChunkNoDataMask = static_cast<GByte *>(VSI_MALLOC2_VERBOSE( |
1294 | 0 | nFullResXSizeQueried, nFullResYSizeQueried)); |
1295 | 0 | } |
1296 | 0 | if (pChunk == nullptr || |
1297 | 0 | (bUseNoDataMask && pabyChunkNoDataMask == nullptr)) |
1298 | 0 | { |
1299 | 0 | GDALClose(poMEMDS); |
1300 | 0 | CPLFree(pChunk); |
1301 | 0 | CPLFree(pabyChunkNoDataMask); |
1302 | 0 | VSIFree(pTempBuffer); |
1303 | 0 | return CE_Failure; |
1304 | 0 | } |
1305 | | |
1306 | 0 | const int nTotalBlocks = DIV_ROUND_UP(nBufXSize, nDstBlockXSize) * |
1307 | 0 | DIV_ROUND_UP(nBufYSize, nDstBlockYSize); |
1308 | 0 | int nBlocksDone = 0; |
1309 | |
|
1310 | 0 | int nDstYOff; |
1311 | 0 | for (nDstYOff = 0; nDstYOff < nBufYSize && eErr == CE_None; |
1312 | 0 | nDstYOff += nDstBlockYSize) |
1313 | 0 | { |
1314 | 0 | int nDstYCount; |
1315 | 0 | if (nDstYOff + nDstBlockYSize <= nBufYSize) |
1316 | 0 | nDstYCount = nDstBlockYSize; |
1317 | 0 | else |
1318 | 0 | nDstYCount = nBufYSize - nDstYOff; |
1319 | |
|
1320 | 0 | int nChunkYOff = |
1321 | 0 | nYOff + static_cast<int>(nDstYOff * dfYRatioDstToSrc); |
1322 | 0 | int nChunkYOff2 = nYOff + 1 + |
1323 | 0 | static_cast<int>(ceil((nDstYOff + nDstYCount) * |
1324 | 0 | dfYRatioDstToSrc)); |
1325 | 0 | if (nChunkYOff2 > nRasterYSize) |
1326 | 0 | nChunkYOff2 = nRasterYSize; |
1327 | 0 | int nYCount = nChunkYOff2 - nChunkYOff; |
1328 | 0 | CPLAssert(nYCount <= nFullResYChunk); |
1329 | | |
1330 | 0 | int nChunkYOffQueried = nChunkYOff - nKernelRadius * nOvrYFactor; |
1331 | 0 | int nChunkYSizeQueried = nYCount + 2 * nKernelRadius * nOvrYFactor; |
1332 | 0 | if (nChunkYOffQueried < 0) |
1333 | 0 | { |
1334 | 0 | nChunkYSizeQueried += nChunkYOffQueried; |
1335 | 0 | nChunkYOffQueried = 0; |
1336 | 0 | } |
1337 | 0 | if (nChunkYSizeQueried + nChunkYOffQueried > nRasterYSize) |
1338 | 0 | nChunkYSizeQueried = nRasterYSize - nChunkYOffQueried; |
1339 | 0 | CPLAssert(nChunkYSizeQueried <= nFullResYSizeQueried); |
1340 | | |
1341 | 0 | int nDstXOff = 0; |
1342 | 0 | for (nDstXOff = 0; nDstXOff < nBufXSize && eErr == CE_None; |
1343 | 0 | nDstXOff += nDstBlockXSize) |
1344 | 0 | { |
1345 | 0 | int nDstXCount = 0; |
1346 | 0 | if (nDstXOff + nDstBlockXSize <= nBufXSize) |
1347 | 0 | nDstXCount = nDstBlockXSize; |
1348 | 0 | else |
1349 | 0 | nDstXCount = nBufXSize - nDstXOff; |
1350 | |
|
1351 | 0 | int nChunkXOff = |
1352 | 0 | nXOff + static_cast<int>(nDstXOff * dfXRatioDstToSrc); |
1353 | 0 | int nChunkXOff2 = |
1354 | 0 | nXOff + 1 + |
1355 | 0 | static_cast<int>( |
1356 | 0 | ceil((nDstXOff + nDstXCount) * dfXRatioDstToSrc)); |
1357 | 0 | if (nChunkXOff2 > nRasterXSize) |
1358 | 0 | nChunkXOff2 = nRasterXSize; |
1359 | 0 | int nXCount = nChunkXOff2 - nChunkXOff; |
1360 | 0 | CPLAssert(nXCount <= nFullResXChunk); |
1361 | | |
1362 | 0 | int nChunkXOffQueried = |
1363 | 0 | nChunkXOff - nKernelRadius * nOvrXFactor; |
1364 | 0 | int nChunkXSizeQueried = |
1365 | 0 | nXCount + 2 * nKernelRadius * nOvrXFactor; |
1366 | 0 | if (nChunkXOffQueried < 0) |
1367 | 0 | { |
1368 | 0 | nChunkXSizeQueried += nChunkXOffQueried; |
1369 | 0 | nChunkXOffQueried = 0; |
1370 | 0 | } |
1371 | 0 | if (nChunkXSizeQueried + nChunkXOffQueried > nRasterXSize) |
1372 | 0 | nChunkXSizeQueried = nRasterXSize - nChunkXOffQueried; |
1373 | 0 | CPLAssert(nChunkXSizeQueried <= nFullResXSizeQueried); |
1374 | | |
1375 | | // Read the source buffers. |
1376 | 0 | eErr = RasterIO(GF_Read, nChunkXOffQueried, nChunkYOffQueried, |
1377 | 0 | nChunkXSizeQueried, nChunkYSizeQueried, pChunk, |
1378 | 0 | nChunkXSizeQueried, nChunkYSizeQueried, |
1379 | 0 | eWrkDataType, 0, 0, nullptr); |
1380 | |
|
1381 | 0 | bool bSkipResample = false; |
1382 | 0 | bool bNoDataMaskFullyOpaque = false; |
1383 | 0 | if (eErr == CE_None && bUseNoDataMask) |
1384 | 0 | { |
1385 | 0 | eErr = poMaskBand->RasterIO( |
1386 | 0 | GF_Read, nChunkXOffQueried, nChunkYOffQueried, |
1387 | 0 | nChunkXSizeQueried, nChunkYSizeQueried, |
1388 | 0 | pabyChunkNoDataMask, nChunkXSizeQueried, |
1389 | 0 | nChunkYSizeQueried, GDT_Byte, 0, 0, nullptr); |
1390 | | |
1391 | | /* Optimizations if mask if fully opaque or transparent */ |
1392 | 0 | int nPixels = nChunkXSizeQueried * nChunkYSizeQueried; |
1393 | 0 | GByte bVal = pabyChunkNoDataMask[0]; |
1394 | 0 | int i = 1; |
1395 | 0 | for (; i < nPixels; i++) |
1396 | 0 | { |
1397 | 0 | if (pabyChunkNoDataMask[i] != bVal) |
1398 | 0 | break; |
1399 | 0 | } |
1400 | 0 | if (i == nPixels) |
1401 | 0 | { |
1402 | 0 | if (bVal == 0) |
1403 | 0 | { |
1404 | 0 | for (int j = 0; j < nDstYCount; j++) |
1405 | 0 | { |
1406 | 0 | GDALCopyWords64(&dfNoDataValue, GDT_Float64, 0, |
1407 | 0 | static_cast<GByte *>(pDataMem) + |
1408 | 0 | nLSMem * (j + nDstYOff) + |
1409 | 0 | nDstXOff * nPSMem, |
1410 | 0 | eDTMem, |
1411 | 0 | static_cast<int>(nPSMem), |
1412 | 0 | nDstXCount); |
1413 | 0 | } |
1414 | 0 | bSkipResample = true; |
1415 | 0 | } |
1416 | 0 | else |
1417 | 0 | { |
1418 | 0 | bNoDataMaskFullyOpaque = true; |
1419 | 0 | } |
1420 | 0 | } |
1421 | 0 | } |
1422 | |
|
1423 | 0 | if (!bSkipResample && eErr == CE_None) |
1424 | 0 | { |
1425 | 0 | const bool bPropagateNoData = false; |
1426 | 0 | void *pDstBuffer = nullptr; |
1427 | 0 | GDALDataType eDstBufferDataType = GDT_Unknown; |
1428 | 0 | GDALRasterBand *poMEMBand = |
1429 | 0 | GDALRasterBand::FromHandle(hMEMBand); |
1430 | 0 | GDALOverviewResampleArgs args; |
1431 | 0 | args.eSrcDataType = eDataType; |
1432 | 0 | args.eOvrDataType = poMEMBand->GetRasterDataType(); |
1433 | 0 | args.nOvrXSize = poMEMBand->GetXSize(); |
1434 | 0 | args.nOvrYSize = poMEMBand->GetYSize(); |
1435 | 0 | args.nOvrNBITS = nNBITS; |
1436 | 0 | args.dfXRatioDstToSrc = dfXRatioDstToSrc; |
1437 | 0 | args.dfYRatioDstToSrc = dfYRatioDstToSrc; |
1438 | 0 | args.dfSrcXDelta = |
1439 | 0 | dfXOff - nXOff; /* == 0 if bHasXOffVirtual */ |
1440 | 0 | args.dfSrcYDelta = |
1441 | 0 | dfYOff - nYOff; /* == 0 if bHasYOffVirtual */ |
1442 | 0 | args.eWrkDataType = eWrkDataType; |
1443 | 0 | args.pabyChunkNodataMask = |
1444 | 0 | bNoDataMaskFullyOpaque ? nullptr : pabyChunkNoDataMask; |
1445 | 0 | args.nChunkXOff = |
1446 | 0 | nChunkXOffQueried - (bHasXOffVirtual ? 0 : nXOff); |
1447 | 0 | args.nChunkXSize = nChunkXSizeQueried; |
1448 | 0 | args.nChunkYOff = |
1449 | 0 | nChunkYOffQueried - (bHasYOffVirtual ? 0 : nYOff); |
1450 | 0 | args.nChunkYSize = nChunkYSizeQueried; |
1451 | 0 | args.nDstXOff = nDstXOff + nDestXOffVirtual; |
1452 | 0 | args.nDstXOff2 = nDstXOff + nDestXOffVirtual + nDstXCount; |
1453 | 0 | args.nDstYOff = nDstYOff + nDestYOffVirtual; |
1454 | 0 | args.nDstYOff2 = nDstYOff + nDestYOffVirtual + nDstYCount; |
1455 | 0 | args.pszResampling = pszResampling; |
1456 | 0 | args.bHasNoData = bHasNoData; |
1457 | 0 | args.dfNoDataValue = dfNoDataValue; |
1458 | 0 | args.poColorTable = GetColorTable(); |
1459 | 0 | args.bPropagateNoData = bPropagateNoData; |
1460 | 0 | eErr = pfnResampleFunc(args, pChunk, &pDstBuffer, |
1461 | 0 | &eDstBufferDataType); |
1462 | 0 | if (eErr == CE_None) |
1463 | 0 | { |
1464 | 0 | eErr = poMEMBand->RasterIO( |
1465 | 0 | GF_Write, nDstXOff + nDestXOffVirtual, |
1466 | 0 | nDstYOff + nDestYOffVirtual, nDstXCount, nDstYCount, |
1467 | 0 | pDstBuffer, nDstXCount, nDstYCount, |
1468 | 0 | eDstBufferDataType, 0, 0, nullptr); |
1469 | 0 | } |
1470 | 0 | CPLFree(pDstBuffer); |
1471 | 0 | } |
1472 | |
|
1473 | 0 | nBlocksDone++; |
1474 | 0 | if (eErr == CE_None && psExtraArg->pfnProgress != nullptr && |
1475 | 0 | !psExtraArg->pfnProgress(1.0 * nBlocksDone / nTotalBlocks, |
1476 | 0 | "", psExtraArg->pProgressData)) |
1477 | 0 | { |
1478 | 0 | eErr = CE_Failure; |
1479 | 0 | } |
1480 | 0 | } |
1481 | 0 | } |
1482 | | |
1483 | 0 | CPLFree(pChunk); |
1484 | 0 | CPLFree(pabyChunkNoDataMask); |
1485 | 0 | } |
1486 | | |
1487 | 0 | if (eBufType != eDataType) |
1488 | 0 | { |
1489 | 0 | CPL_IGNORE_RET_VAL(poMEMDS->GetRasterBand(1)->RasterIO( |
1490 | 0 | GF_Read, nDestXOffVirtual, nDestYOffVirtual, nBufXSize, nBufYSize, |
1491 | 0 | pData, nBufXSize, nBufYSize, eBufType, nPixelSpace, nLineSpace, |
1492 | 0 | nullptr)); |
1493 | 0 | } |
1494 | 0 | GDALClose(poMEMDS); |
1495 | 0 | VSIFree(pTempBuffer); |
1496 | |
|
1497 | 0 | return eErr; |
1498 | 0 | } |
1499 | | |
1500 | | /************************************************************************/ |
1501 | | /* RasterIOResampled() */ |
1502 | | /************************************************************************/ |
1503 | | |
1504 | | CPLErr GDALDataset::RasterIOResampled( |
1505 | | GDALRWFlag /* eRWFlag */, int nXOff, int nYOff, int nXSize, int nYSize, |
1506 | | void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType, |
1507 | | int nBandCount, const int *panBandMap, GSpacing nPixelSpace, |
1508 | | GSpacing nLineSpace, GSpacing nBandSpace, GDALRasterIOExtraArg *psExtraArg) |
1509 | | |
1510 | 0 | { |
1511 | | #if 0 |
1512 | | // Determine if we use warping resampling or overview resampling |
1513 | | bool bUseWarp = false; |
1514 | | if( GDALDataTypeIsComplex( eDataType ) ) |
1515 | | bUseWarp = true; |
1516 | | #endif |
1517 | |
|
1518 | 0 | double dfXOff = nXOff; |
1519 | 0 | double dfYOff = nYOff; |
1520 | 0 | double dfXSize = nXSize; |
1521 | 0 | double dfYSize = nYSize; |
1522 | 0 | if (psExtraArg->bFloatingPointWindowValidity) |
1523 | 0 | { |
1524 | 0 | dfXOff = psExtraArg->dfXOff; |
1525 | 0 | dfYOff = psExtraArg->dfYOff; |
1526 | 0 | dfXSize = psExtraArg->dfXSize; |
1527 | 0 | dfYSize = psExtraArg->dfYSize; |
1528 | 0 | } |
1529 | |
|
1530 | 0 | const double dfXRatioDstToSrc = dfXSize / nBufXSize; |
1531 | 0 | const double dfYRatioDstToSrc = dfYSize / nBufYSize; |
1532 | | |
1533 | | // Determine the coordinates in the "virtual" output raster to see |
1534 | | // if there are not integers, in which case we will use them as a shift |
1535 | | // so that subwindow extracts give the exact same results as entire raster |
1536 | | // scaling. |
1537 | 0 | double dfDestXOff = dfXOff / dfXRatioDstToSrc; |
1538 | 0 | bool bHasXOffVirtual = false; |
1539 | 0 | int nDestXOffVirtual = 0; |
1540 | 0 | if (fabs(dfDestXOff - static_cast<int>(dfDestXOff + 0.5)) < 1e-8) |
1541 | 0 | { |
1542 | 0 | bHasXOffVirtual = true; |
1543 | 0 | dfXOff = nXOff; |
1544 | 0 | nDestXOffVirtual = static_cast<int>(dfDestXOff + 0.5); |
1545 | 0 | } |
1546 | |
|
1547 | 0 | double dfDestYOff = dfYOff / dfYRatioDstToSrc; |
1548 | 0 | bool bHasYOffVirtual = false; |
1549 | 0 | int nDestYOffVirtual = 0; |
1550 | 0 | if (fabs(dfDestYOff - static_cast<int>(dfDestYOff + 0.5)) < 1e-8) |
1551 | 0 | { |
1552 | 0 | bHasYOffVirtual = true; |
1553 | 0 | dfYOff = nYOff; |
1554 | 0 | nDestYOffVirtual = static_cast<int>(dfDestYOff + 0.5); |
1555 | 0 | } |
1556 | | |
1557 | | // Create a MEM dataset that wraps the output buffer. |
1558 | 0 | GDALDataset *poMEMDS = |
1559 | 0 | MEMDataset::Create("", nDestXOffVirtual + nBufXSize, |
1560 | 0 | nDestYOffVirtual + nBufYSize, 0, eBufType, nullptr); |
1561 | 0 | GDALRasterBand **papoDstBands = static_cast<GDALRasterBand **>( |
1562 | 0 | CPLMalloc(nBandCount * sizeof(GDALRasterBand *))); |
1563 | 0 | int nNBITS = 0; |
1564 | 0 | for (int i = 0; i < nBandCount; i++) |
1565 | 0 | { |
1566 | 0 | char szBuffer[32] = {'\0'}; |
1567 | 0 | int nRet = CPLPrintPointer( |
1568 | 0 | szBuffer, |
1569 | 0 | static_cast<GByte *>(pData) - nPixelSpace * nDestXOffVirtual - |
1570 | 0 | nLineSpace * nDestYOffVirtual + nBandSpace * i, |
1571 | 0 | sizeof(szBuffer)); |
1572 | 0 | szBuffer[nRet] = 0; |
1573 | |
|
1574 | 0 | char szBuffer0[64] = {'\0'}; |
1575 | 0 | snprintf(szBuffer0, sizeof(szBuffer0), "DATAPOINTER=%s", szBuffer); |
1576 | |
|
1577 | 0 | char szBuffer1[64] = {'\0'}; |
1578 | 0 | snprintf(szBuffer1, sizeof(szBuffer1), "PIXELOFFSET=" CPL_FRMT_GIB, |
1579 | 0 | static_cast<GIntBig>(nPixelSpace)); |
1580 | |
|
1581 | 0 | char szBuffer2[64] = {'\0'}; |
1582 | 0 | snprintf(szBuffer2, sizeof(szBuffer2), "LINEOFFSET=" CPL_FRMT_GIB, |
1583 | 0 | static_cast<GIntBig>(nLineSpace)); |
1584 | |
|
1585 | 0 | char *apszOptions[4] = {szBuffer0, szBuffer1, szBuffer2, nullptr}; |
1586 | |
|
1587 | 0 | poMEMDS->AddBand(eBufType, apszOptions); |
1588 | |
|
1589 | 0 | GDALRasterBand *poSrcBand = GetRasterBand(panBandMap[i]); |
1590 | 0 | papoDstBands[i] = poMEMDS->GetRasterBand(i + 1); |
1591 | 0 | const char *pszNBITS = |
1592 | 0 | poSrcBand->GetMetadataItem("NBITS", "IMAGE_STRUCTURE"); |
1593 | 0 | if (pszNBITS) |
1594 | 0 | { |
1595 | 0 | nNBITS = atoi(pszNBITS); |
1596 | 0 | poMEMDS->GetRasterBand(i + 1)->SetMetadataItem("NBITS", pszNBITS, |
1597 | 0 | "IMAGE_STRUCTURE"); |
1598 | 0 | } |
1599 | 0 | } |
1600 | |
|
1601 | 0 | CPLErr eErr = CE_None; |
1602 | | |
1603 | | // TODO(schwehr): Why disabled? Why not just delete? |
1604 | | // Looks like this code was initially added as disable by copying |
1605 | | // from RasterIO here: |
1606 | | // https://trac.osgeo.org/gdal/changeset/29572 |
1607 | | #if 0 |
1608 | | // Do the resampling. |
1609 | | if( bUseWarp ) |
1610 | | { |
1611 | | VRTDatasetH hVRTDS = nullptr; |
1612 | | GDALRasterBandH hVRTBand = nullptr; |
1613 | | if( GetDataset() == nullptr ) |
1614 | | { |
1615 | | /* Create VRT dataset that wraps the whole dataset */ |
1616 | | hVRTDS = VRTCreate(nRasterXSize, nRasterYSize); |
1617 | | VRTAddBand( hVRTDS, eDataType, nullptr ); |
1618 | | hVRTBand = GDALGetRasterBand(hVRTDS, 1); |
1619 | | VRTAddSimpleSource( (VRTSourcedRasterBandH)hVRTBand, |
1620 | | (GDALRasterBandH)this, |
1621 | | 0, 0, |
1622 | | nRasterXSize, nRasterYSize, |
1623 | | 0, 0, |
1624 | | nRasterXSize, nRasterYSize, |
1625 | | nullptr, VRT_NODATA_UNSET ); |
1626 | | |
1627 | | /* Add a mask band if needed */ |
1628 | | if( GetMaskFlags() != GMF_ALL_VALID ) |
1629 | | { |
1630 | | ((GDALDataset*)hVRTDS)->CreateMaskBand(0); |
1631 | | VRTSourcedRasterBand* poVRTMaskBand = |
1632 | | (VRTSourcedRasterBand*)(((GDALRasterBand*)hVRTBand)->GetMaskBand()); |
1633 | | poVRTMaskBand-> |
1634 | | AddMaskBandSource( this, |
1635 | | 0, 0, |
1636 | | nRasterXSize, nRasterYSize, |
1637 | | 0, 0, |
1638 | | nRasterXSize, nRasterYSize); |
1639 | | } |
1640 | | } |
1641 | | |
1642 | | GDALWarpOptions* psWarpOptions = GDALCreateWarpOptions(); |
1643 | | psWarpOptions->eResampleAlg = (GDALResampleAlg)psExtraArg->eResampleAlg; |
1644 | | psWarpOptions->hSrcDS = (GDALDatasetH) (hVRTDS ? hVRTDS : GetDataset()); |
1645 | | psWarpOptions->hDstDS = (GDALDatasetH) poMEMDS; |
1646 | | psWarpOptions->nBandCount = 1; |
1647 | | int nSrcBandNumber = (hVRTDS ? 1 : nBand); |
1648 | | int nDstBandNumber = 1; |
1649 | | psWarpOptions->panSrcBands = &nSrcBandNumber; |
1650 | | psWarpOptions->panDstBands = &nDstBandNumber; |
1651 | | psWarpOptions->pfnProgress = psExtraArg->pfnProgress ? |
1652 | | psExtraArg->pfnProgress : GDALDummyProgress; |
1653 | | psWarpOptions->pProgressArg = psExtraArg->pProgressData; |
1654 | | psWarpOptions->pfnTransformer = GDALRasterIOTransformer; |
1655 | | GDALRasterIOTransformerStruct sTransformer; |
1656 | | sTransformer.dfXOff = bHasXOffVirtual ? 0 : dfXOff; |
1657 | | sTransformer.dfYOff = bHasYOffVirtual ? 0 : dfYOff; |
1658 | | sTransformer.dfXRatioDstToSrc = dfXRatioDstToSrc; |
1659 | | sTransformer.dfYRatioDstToSrc = dfYRatioDstToSrc; |
1660 | | psWarpOptions->pTransformerArg = &sTransformer; |
1661 | | |
1662 | | GDALWarpOperationH hWarpOperation = GDALCreateWarpOperation(psWarpOptions); |
1663 | | eErr = GDALChunkAndWarpImage( hWarpOperation, |
1664 | | nDestXOffVirtual, nDestYOffVirtual, |
1665 | | nBufXSize, nBufYSize ); |
1666 | | GDALDestroyWarpOperation( hWarpOperation ); |
1667 | | |
1668 | | psWarpOptions->panSrcBands = nullptr; |
1669 | | psWarpOptions->panDstBands = nullptr; |
1670 | | GDALDestroyWarpOptions( psWarpOptions ); |
1671 | | |
1672 | | if( hVRTDS ) |
1673 | | GDALClose(hVRTDS); |
1674 | | } |
1675 | | else |
1676 | | #endif |
1677 | 0 | { |
1678 | 0 | const char *pszResampling = |
1679 | 0 | (psExtraArg->eResampleAlg == GRIORA_Bilinear) ? "BILINEAR" |
1680 | 0 | : (psExtraArg->eResampleAlg == GRIORA_Cubic) ? "CUBIC" |
1681 | 0 | : (psExtraArg->eResampleAlg == GRIORA_CubicSpline) ? "CUBICSPLINE" |
1682 | 0 | : (psExtraArg->eResampleAlg == GRIORA_Lanczos) ? "LANCZOS" |
1683 | 0 | : (psExtraArg->eResampleAlg == GRIORA_Average) ? "AVERAGE" |
1684 | 0 | : (psExtraArg->eResampleAlg == GRIORA_RMS) ? "RMS" |
1685 | 0 | : (psExtraArg->eResampleAlg == GRIORA_Mode) ? "MODE" |
1686 | 0 | : (psExtraArg->eResampleAlg == GRIORA_Gauss) ? "GAUSS" |
1687 | 0 | : "UNKNOWN"; |
1688 | |
|
1689 | 0 | GDALRasterBand *poFirstSrcBand = GetRasterBand(panBandMap[0]); |
1690 | 0 | GDALDataType eDataType = poFirstSrcBand->GetRasterDataType(); |
1691 | 0 | int nBlockXSize, nBlockYSize; |
1692 | 0 | poFirstSrcBand->GetBlockSize(&nBlockXSize, &nBlockYSize); |
1693 | |
|
1694 | 0 | int nKernelRadius; |
1695 | 0 | GDALResampleFunction pfnResampleFunc = |
1696 | 0 | GDALGetResampleFunction(pszResampling, &nKernelRadius); |
1697 | 0 | CPLAssert(pfnResampleFunc); |
1698 | | #ifdef GDAL_ENABLE_RESAMPLING_MULTIBAND |
1699 | | GDALResampleFunctionMultiBands pfnResampleFuncMultiBands = |
1700 | | GDALGetResampleFunctionMultiBands(pszResampling, &nKernelRadius); |
1701 | | #endif |
1702 | 0 | GDALDataType eWrkDataType = |
1703 | 0 | GDALGetOvrWorkDataType(pszResampling, eDataType); |
1704 | |
|
1705 | 0 | int nDstBlockXSize = nBufXSize; |
1706 | 0 | int nDstBlockYSize = nBufYSize; |
1707 | 0 | int nFullResXChunk, nFullResYChunk; |
1708 | 0 | while (true) |
1709 | 0 | { |
1710 | 0 | nFullResXChunk = |
1711 | 0 | 3 + static_cast<int>(nDstBlockXSize * dfXRatioDstToSrc); |
1712 | 0 | nFullResYChunk = |
1713 | 0 | 3 + static_cast<int>(nDstBlockYSize * dfYRatioDstToSrc); |
1714 | 0 | if (nFullResXChunk > nRasterXSize) |
1715 | 0 | nFullResXChunk = nRasterXSize; |
1716 | 0 | if (nFullResYChunk > nRasterYSize) |
1717 | 0 | nFullResYChunk = nRasterYSize; |
1718 | 0 | if ((nDstBlockXSize == 1 && nDstBlockYSize == 1) || |
1719 | 0 | (static_cast<GIntBig>(nFullResXChunk) * nFullResYChunk <= |
1720 | 0 | 1024 * 1024)) |
1721 | 0 | break; |
1722 | | // When operating on the full width of a raster whose block width is |
1723 | | // the raster width, prefer doing chunks in height. |
1724 | 0 | if (nFullResXChunk >= nXSize && nXSize == nBlockXSize && |
1725 | 0 | nDstBlockYSize > 1) |
1726 | 0 | nDstBlockYSize /= 2; |
1727 | | /* Otherwise cut the maximal dimension */ |
1728 | 0 | else if (nDstBlockXSize > 1 && |
1729 | 0 | (nFullResXChunk > nFullResYChunk || nDstBlockYSize == 1)) |
1730 | 0 | nDstBlockXSize /= 2; |
1731 | 0 | else |
1732 | 0 | nDstBlockYSize /= 2; |
1733 | 0 | } |
1734 | |
|
1735 | 0 | int nOvrFactor = std::max(static_cast<int>(0.5 + dfXRatioDstToSrc), |
1736 | 0 | static_cast<int>(0.5 + dfYRatioDstToSrc)); |
1737 | 0 | if (nOvrFactor == 0) |
1738 | 0 | nOvrFactor = 1; |
1739 | 0 | int nFullResXSizeQueried = |
1740 | 0 | nFullResXChunk + 2 * nKernelRadius * nOvrFactor; |
1741 | 0 | int nFullResYSizeQueried = |
1742 | 0 | nFullResYChunk + 2 * nKernelRadius * nOvrFactor; |
1743 | |
|
1744 | 0 | if (nFullResXSizeQueried > nRasterXSize) |
1745 | 0 | nFullResXSizeQueried = nRasterXSize; |
1746 | 0 | if (nFullResYSizeQueried > nRasterYSize) |
1747 | 0 | nFullResYSizeQueried = nRasterYSize; |
1748 | |
|
1749 | 0 | void *pChunk = VSI_MALLOC3_VERBOSE( |
1750 | 0 | cpl::fits_on<int>(GDALGetDataTypeSizeBytes(eWrkDataType) * |
1751 | 0 | nBandCount), |
1752 | 0 | nFullResXSizeQueried, nFullResYSizeQueried); |
1753 | 0 | GByte *pabyChunkNoDataMask = nullptr; |
1754 | |
|
1755 | 0 | GDALRasterBand *poMaskBand = poFirstSrcBand->GetMaskBand(); |
1756 | 0 | int nMaskFlags = poFirstSrcBand->GetMaskFlags(); |
1757 | |
|
1758 | 0 | bool bUseNoDataMask = ((nMaskFlags & GMF_ALL_VALID) == 0); |
1759 | 0 | if (bUseNoDataMask) |
1760 | 0 | { |
1761 | 0 | pabyChunkNoDataMask = static_cast<GByte *>(VSI_MALLOC2_VERBOSE( |
1762 | 0 | nFullResXSizeQueried, nFullResYSizeQueried)); |
1763 | 0 | } |
1764 | 0 | if (pChunk == nullptr || |
1765 | 0 | (bUseNoDataMask && pabyChunkNoDataMask == nullptr)) |
1766 | 0 | { |
1767 | 0 | GDALClose(poMEMDS); |
1768 | 0 | CPLFree(pChunk); |
1769 | 0 | CPLFree(pabyChunkNoDataMask); |
1770 | 0 | CPLFree(papoDstBands); |
1771 | 0 | return CE_Failure; |
1772 | 0 | } |
1773 | | |
1774 | 0 | const int nTotalBlocks = DIV_ROUND_UP(nBufXSize, nDstBlockXSize) * |
1775 | 0 | DIV_ROUND_UP(nBufYSize, nDstBlockYSize); |
1776 | 0 | int nBlocksDone = 0; |
1777 | |
|
1778 | 0 | int nDstYOff; |
1779 | 0 | for (nDstYOff = 0; nDstYOff < nBufYSize && eErr == CE_None; |
1780 | 0 | nDstYOff += nDstBlockYSize) |
1781 | 0 | { |
1782 | 0 | int nDstYCount; |
1783 | 0 | if (nDstYOff + nDstBlockYSize <= nBufYSize) |
1784 | 0 | nDstYCount = nDstBlockYSize; |
1785 | 0 | else |
1786 | 0 | nDstYCount = nBufYSize - nDstYOff; |
1787 | |
|
1788 | 0 | int nChunkYOff = |
1789 | 0 | nYOff + static_cast<int>(nDstYOff * dfYRatioDstToSrc); |
1790 | 0 | int nChunkYOff2 = nYOff + 1 + |
1791 | 0 | static_cast<int>(ceil((nDstYOff + nDstYCount) * |
1792 | 0 | dfYRatioDstToSrc)); |
1793 | 0 | if (nChunkYOff2 > nRasterYSize) |
1794 | 0 | nChunkYOff2 = nRasterYSize; |
1795 | 0 | int nYCount = nChunkYOff2 - nChunkYOff; |
1796 | 0 | CPLAssert(nYCount <= nFullResYChunk); |
1797 | | |
1798 | 0 | int nChunkYOffQueried = nChunkYOff - nKernelRadius * nOvrFactor; |
1799 | 0 | int nChunkYSizeQueried = nYCount + 2 * nKernelRadius * nOvrFactor; |
1800 | 0 | if (nChunkYOffQueried < 0) |
1801 | 0 | { |
1802 | 0 | nChunkYSizeQueried += nChunkYOffQueried; |
1803 | 0 | nChunkYOffQueried = 0; |
1804 | 0 | } |
1805 | 0 | if (nChunkYSizeQueried + nChunkYOffQueried > nRasterYSize) |
1806 | 0 | nChunkYSizeQueried = nRasterYSize - nChunkYOffQueried; |
1807 | 0 | CPLAssert(nChunkYSizeQueried <= nFullResYSizeQueried); |
1808 | | |
1809 | 0 | int nDstXOff; |
1810 | 0 | for (nDstXOff = 0; nDstXOff < nBufXSize && eErr == CE_None; |
1811 | 0 | nDstXOff += nDstBlockXSize) |
1812 | 0 | { |
1813 | 0 | int nDstXCount; |
1814 | 0 | if (nDstXOff + nDstBlockXSize <= nBufXSize) |
1815 | 0 | nDstXCount = nDstBlockXSize; |
1816 | 0 | else |
1817 | 0 | nDstXCount = nBufXSize - nDstXOff; |
1818 | |
|
1819 | 0 | int nChunkXOff = |
1820 | 0 | nXOff + static_cast<int>(nDstXOff * dfXRatioDstToSrc); |
1821 | 0 | int nChunkXOff2 = |
1822 | 0 | nXOff + 1 + |
1823 | 0 | static_cast<int>( |
1824 | 0 | ceil((nDstXOff + nDstXCount) * dfXRatioDstToSrc)); |
1825 | 0 | if (nChunkXOff2 > nRasterXSize) |
1826 | 0 | nChunkXOff2 = nRasterXSize; |
1827 | 0 | int nXCount = nChunkXOff2 - nChunkXOff; |
1828 | 0 | CPLAssert(nXCount <= nFullResXChunk); |
1829 | | |
1830 | 0 | int nChunkXOffQueried = nChunkXOff - nKernelRadius * nOvrFactor; |
1831 | 0 | int nChunkXSizeQueried = |
1832 | 0 | nXCount + 2 * nKernelRadius * nOvrFactor; |
1833 | 0 | if (nChunkXOffQueried < 0) |
1834 | 0 | { |
1835 | 0 | nChunkXSizeQueried += nChunkXOffQueried; |
1836 | 0 | nChunkXOffQueried = 0; |
1837 | 0 | } |
1838 | 0 | if (nChunkXSizeQueried + nChunkXOffQueried > nRasterXSize) |
1839 | 0 | nChunkXSizeQueried = nRasterXSize - nChunkXOffQueried; |
1840 | 0 | CPLAssert(nChunkXSizeQueried <= nFullResXSizeQueried); |
1841 | | |
1842 | 0 | bool bSkipResample = false; |
1843 | 0 | bool bNoDataMaskFullyOpaque = false; |
1844 | 0 | if (eErr == CE_None && bUseNoDataMask) |
1845 | 0 | { |
1846 | 0 | eErr = poMaskBand->RasterIO( |
1847 | 0 | GF_Read, nChunkXOffQueried, nChunkYOffQueried, |
1848 | 0 | nChunkXSizeQueried, nChunkYSizeQueried, |
1849 | 0 | pabyChunkNoDataMask, nChunkXSizeQueried, |
1850 | 0 | nChunkYSizeQueried, GDT_Byte, 0, 0, nullptr); |
1851 | | |
1852 | | /* Optimizations if mask if fully opaque or transparent */ |
1853 | 0 | const int nPixels = nChunkXSizeQueried * nChunkYSizeQueried; |
1854 | 0 | const GByte bVal = pabyChunkNoDataMask[0]; |
1855 | 0 | int i = 1; // Used after for. |
1856 | 0 | for (; i < nPixels; i++) |
1857 | 0 | { |
1858 | 0 | if (pabyChunkNoDataMask[i] != bVal) |
1859 | 0 | break; |
1860 | 0 | } |
1861 | 0 | if (i == nPixels) |
1862 | 0 | { |
1863 | 0 | if (bVal == 0) |
1864 | 0 | { |
1865 | 0 | GByte abyZero[16] = {0}; |
1866 | 0 | for (int iBand = 0; iBand < nBandCount; iBand++) |
1867 | 0 | { |
1868 | 0 | for (int j = 0; j < nDstYCount; j++) |
1869 | 0 | { |
1870 | 0 | GDALCopyWords64( |
1871 | 0 | abyZero, GDT_Byte, 0, |
1872 | 0 | static_cast<GByte *>(pData) + |
1873 | 0 | iBand * nBandSpace + |
1874 | 0 | nLineSpace * (j + nDstYOff) + |
1875 | 0 | nDstXOff * nPixelSpace, |
1876 | 0 | eBufType, static_cast<int>(nPixelSpace), |
1877 | 0 | nDstXCount); |
1878 | 0 | } |
1879 | 0 | } |
1880 | 0 | bSkipResample = true; |
1881 | 0 | } |
1882 | 0 | else |
1883 | 0 | { |
1884 | 0 | bNoDataMaskFullyOpaque = true; |
1885 | 0 | } |
1886 | 0 | } |
1887 | 0 | } |
1888 | |
|
1889 | 0 | if (!bSkipResample && eErr == CE_None) |
1890 | 0 | { |
1891 | | /* Read the source buffers */ |
1892 | 0 | eErr = RasterIO( |
1893 | 0 | GF_Read, nChunkXOffQueried, nChunkYOffQueried, |
1894 | 0 | nChunkXSizeQueried, nChunkYSizeQueried, pChunk, |
1895 | 0 | nChunkXSizeQueried, nChunkYSizeQueried, eWrkDataType, |
1896 | 0 | nBandCount, panBandMap, 0, 0, 0, nullptr); |
1897 | 0 | } |
1898 | |
|
1899 | | #ifdef GDAL_ENABLE_RESAMPLING_MULTIBAND |
1900 | | if (pfnResampleFuncMultiBands && !bSkipResample && |
1901 | | eErr == CE_None) |
1902 | | { |
1903 | | eErr = pfnResampleFuncMultiBands( |
1904 | | dfXRatioDstToSrc, dfYRatioDstToSrc, |
1905 | | dfXOff - nXOff, /* == 0 if bHasXOffVirtual */ |
1906 | | dfYOff - nYOff, /* == 0 if bHasYOffVirtual */ |
1907 | | eWrkDataType, (GByte *)pChunk, nBandCount, |
1908 | | bNoDataMaskFullyOpaque ? nullptr : pabyChunkNoDataMask, |
1909 | | nChunkXOffQueried - (bHasXOffVirtual ? 0 : nXOff), |
1910 | | nChunkXSizeQueried, |
1911 | | nChunkYOffQueried - (bHasYOffVirtual ? 0 : nYOff), |
1912 | | nChunkYSizeQueried, nDstXOff + nDestXOffVirtual, |
1913 | | nDstXOff + nDestXOffVirtual + nDstXCount, |
1914 | | nDstYOff + nDestYOffVirtual, |
1915 | | nDstYOff + nDestYOffVirtual + nDstYCount, papoDstBands, |
1916 | | pszResampling, FALSE /*bHasNoData*/, |
1917 | | 0.0 /* dfNoDataValue */, nullptr /* color table*/, |
1918 | | eDataType); |
1919 | | } |
1920 | | else |
1921 | | #endif |
1922 | 0 | { |
1923 | 0 | size_t nChunkBandOffset = |
1924 | 0 | static_cast<size_t>(nChunkXSizeQueried) * |
1925 | 0 | nChunkYSizeQueried * |
1926 | 0 | GDALGetDataTypeSizeBytes(eWrkDataType); |
1927 | 0 | for (int i = 0; |
1928 | 0 | i < nBandCount && !bSkipResample && eErr == CE_None; |
1929 | 0 | i++) |
1930 | 0 | { |
1931 | 0 | const bool bPropagateNoData = false; |
1932 | 0 | void *pDstBuffer = nullptr; |
1933 | 0 | GDALDataType eDstBufferDataType = GDT_Unknown; |
1934 | 0 | GDALRasterBand *poMEMBand = |
1935 | 0 | poMEMDS->GetRasterBand(i + 1); |
1936 | 0 | GDALOverviewResampleArgs args; |
1937 | 0 | args.eSrcDataType = eDataType; |
1938 | 0 | args.eOvrDataType = poMEMBand->GetRasterDataType(); |
1939 | 0 | args.nOvrXSize = poMEMBand->GetXSize(); |
1940 | 0 | args.nOvrYSize = poMEMBand->GetYSize(); |
1941 | 0 | args.nOvrNBITS = nNBITS; |
1942 | 0 | args.dfXRatioDstToSrc = dfXRatioDstToSrc; |
1943 | 0 | args.dfYRatioDstToSrc = dfYRatioDstToSrc; |
1944 | 0 | args.dfSrcXDelta = |
1945 | 0 | dfXOff - nXOff; /* == 0 if bHasXOffVirtual */ |
1946 | 0 | args.dfSrcYDelta = |
1947 | 0 | dfYOff - nYOff; /* == 0 if bHasYOffVirtual */ |
1948 | 0 | args.eWrkDataType = eWrkDataType; |
1949 | 0 | args.pabyChunkNodataMask = bNoDataMaskFullyOpaque |
1950 | 0 | ? nullptr |
1951 | 0 | : pabyChunkNoDataMask; |
1952 | 0 | args.nChunkXOff = |
1953 | 0 | nChunkXOffQueried - (bHasXOffVirtual ? 0 : nXOff); |
1954 | 0 | args.nChunkXSize = nChunkXSizeQueried; |
1955 | 0 | args.nChunkYOff = |
1956 | 0 | nChunkYOffQueried - (bHasYOffVirtual ? 0 : nYOff); |
1957 | 0 | args.nChunkYSize = nChunkYSizeQueried; |
1958 | 0 | args.nDstXOff = nDstXOff + nDestXOffVirtual; |
1959 | 0 | args.nDstXOff2 = |
1960 | 0 | nDstXOff + nDestXOffVirtual + nDstXCount; |
1961 | 0 | args.nDstYOff = nDstYOff + nDestYOffVirtual; |
1962 | 0 | args.nDstYOff2 = |
1963 | 0 | nDstYOff + nDestYOffVirtual + nDstYCount; |
1964 | 0 | args.pszResampling = pszResampling; |
1965 | 0 | args.bHasNoData = false; |
1966 | 0 | args.dfNoDataValue = 0.0; |
1967 | 0 | args.poColorTable = nullptr; |
1968 | 0 | args.bPropagateNoData = bPropagateNoData; |
1969 | |
|
1970 | 0 | eErr = |
1971 | 0 | pfnResampleFunc(args, |
1972 | 0 | reinterpret_cast<GByte *>(pChunk) + |
1973 | 0 | i * nChunkBandOffset, |
1974 | 0 | &pDstBuffer, &eDstBufferDataType); |
1975 | 0 | if (eErr == CE_None) |
1976 | 0 | { |
1977 | 0 | eErr = poMEMBand->RasterIO( |
1978 | 0 | GF_Write, nDstXOff + nDestXOffVirtual, |
1979 | 0 | nDstYOff + nDestYOffVirtual, nDstXCount, |
1980 | 0 | nDstYCount, pDstBuffer, nDstXCount, nDstYCount, |
1981 | 0 | eDstBufferDataType, 0, 0, nullptr); |
1982 | 0 | } |
1983 | 0 | CPLFree(pDstBuffer); |
1984 | 0 | } |
1985 | 0 | } |
1986 | |
|
1987 | 0 | nBlocksDone++; |
1988 | 0 | if (eErr == CE_None && psExtraArg->pfnProgress != nullptr && |
1989 | 0 | !psExtraArg->pfnProgress(1.0 * nBlocksDone / nTotalBlocks, |
1990 | 0 | "", psExtraArg->pProgressData)) |
1991 | 0 | { |
1992 | 0 | eErr = CE_Failure; |
1993 | 0 | } |
1994 | 0 | } |
1995 | 0 | } |
1996 | | |
1997 | 0 | CPLFree(pChunk); |
1998 | 0 | CPLFree(pabyChunkNoDataMask); |
1999 | 0 | } |
2000 | | |
2001 | 0 | CPLFree(papoDstBands); |
2002 | 0 | GDALClose(poMEMDS); |
2003 | |
|
2004 | 0 | return eErr; |
2005 | 0 | } |
2006 | | |
2007 | | //! @endcond |
2008 | | |
2009 | | /************************************************************************/ |
2010 | | /* GDALSwapWords() */ |
2011 | | /************************************************************************/ |
2012 | | |
2013 | | /** |
2014 | | * Byte swap words in-place. |
2015 | | * |
2016 | | * This function will byte swap a set of 2, 4 or 8 byte words "in place" in |
2017 | | * a memory array. No assumption is made that the words being swapped are |
2018 | | * word aligned in memory. Use the CPL_LSB and CPL_MSB macros from cpl_port.h |
2019 | | * to determine if the current platform is big endian or little endian. Use |
2020 | | * The macros like CPL_SWAP32() to byte swap single values without the overhead |
2021 | | * of a function call. |
2022 | | * |
2023 | | * @param pData pointer to start of data buffer. |
2024 | | * @param nWordSize size of words being swapped in bytes. Normally 2, 4 or 8. |
2025 | | * @param nWordCount the number of words to be swapped in this call. |
2026 | | * @param nWordSkip the byte offset from the start of one word to the start of |
2027 | | * the next. For packed buffers this is the same as nWordSize. |
2028 | | */ |
2029 | | |
2030 | | void CPL_STDCALL GDALSwapWords(void *pData, int nWordSize, int nWordCount, |
2031 | | int nWordSkip) |
2032 | | |
2033 | 0 | { |
2034 | 0 | if (nWordCount > 0) |
2035 | 0 | VALIDATE_POINTER0(pData, "GDALSwapWords"); |
2036 | | |
2037 | 0 | GByte *pabyData = static_cast<GByte *>(pData); |
2038 | |
|
2039 | 0 | switch (nWordSize) |
2040 | 0 | { |
2041 | 0 | case 1: |
2042 | 0 | break; |
2043 | | |
2044 | 0 | case 2: |
2045 | 0 | CPLAssert(nWordSkip >= 2 || nWordCount == 1); |
2046 | 0 | for (int i = 0; i < nWordCount; i++) |
2047 | 0 | { |
2048 | 0 | CPL_SWAP16PTR(pabyData); |
2049 | 0 | pabyData += nWordSkip; |
2050 | 0 | } |
2051 | 0 | break; |
2052 | | |
2053 | 0 | case 4: |
2054 | 0 | CPLAssert(nWordSkip >= 4 || nWordCount == 1); |
2055 | 0 | if (CPL_IS_ALIGNED(pabyData, 4) && (nWordSkip % 4) == 0) |
2056 | 0 | { |
2057 | 0 | for (int i = 0; i < nWordCount; i++) |
2058 | 0 | { |
2059 | 0 | *reinterpret_cast<GUInt32 *>(pabyData) = CPL_SWAP32( |
2060 | 0 | *reinterpret_cast<const GUInt32 *>(pabyData)); |
2061 | 0 | pabyData += nWordSkip; |
2062 | 0 | } |
2063 | 0 | } |
2064 | 0 | else |
2065 | 0 | { |
2066 | 0 | for (int i = 0; i < nWordCount; i++) |
2067 | 0 | { |
2068 | 0 | CPL_SWAP32PTR(pabyData); |
2069 | 0 | pabyData += nWordSkip; |
2070 | 0 | } |
2071 | 0 | } |
2072 | 0 | break; |
2073 | | |
2074 | 0 | case 8: |
2075 | 0 | CPLAssert(nWordSkip >= 8 || nWordCount == 1); |
2076 | 0 | if (CPL_IS_ALIGNED(pabyData, 8) && (nWordSkip % 8) == 0) |
2077 | 0 | { |
2078 | 0 | for (int i = 0; i < nWordCount; i++) |
2079 | 0 | { |
2080 | 0 | *reinterpret_cast<GUInt64 *>(pabyData) = CPL_SWAP64( |
2081 | 0 | *reinterpret_cast<const GUInt64 *>(pabyData)); |
2082 | 0 | pabyData += nWordSkip; |
2083 | 0 | } |
2084 | 0 | } |
2085 | 0 | else |
2086 | 0 | { |
2087 | 0 | for (int i = 0; i < nWordCount; i++) |
2088 | 0 | { |
2089 | 0 | CPL_SWAP64PTR(pabyData); |
2090 | 0 | pabyData += nWordSkip; |
2091 | 0 | } |
2092 | 0 | } |
2093 | 0 | break; |
2094 | | |
2095 | 0 | default: |
2096 | 0 | CPLAssert(false); |
2097 | 0 | } |
2098 | 0 | } |
2099 | | |
2100 | | /************************************************************************/ |
2101 | | /* GDALSwapWordsEx() */ |
2102 | | /************************************************************************/ |
2103 | | |
2104 | | /** |
2105 | | * Byte swap words in-place. |
2106 | | * |
2107 | | * This function will byte swap a set of 2, 4 or 8 byte words "in place" in |
2108 | | * a memory array. No assumption is made that the words being swapped are |
2109 | | * word aligned in memory. Use the CPL_LSB and CPL_MSB macros from cpl_port.h |
2110 | | * to determine if the current platform is big endian or little endian. Use |
2111 | | * The macros like CPL_SWAP32() to byte swap single values without the overhead |
2112 | | * of a function call. |
2113 | | * |
2114 | | * @param pData pointer to start of data buffer. |
2115 | | * @param nWordSize size of words being swapped in bytes. Normally 2, 4 or 8. |
2116 | | * @param nWordCount the number of words to be swapped in this call. |
2117 | | * @param nWordSkip the byte offset from the start of one word to the start of |
2118 | | * the next. For packed buffers this is the same as nWordSize. |
2119 | | * @since GDAL 2.1 |
2120 | | */ |
2121 | | void CPL_STDCALL GDALSwapWordsEx(void *pData, int nWordSize, size_t nWordCount, |
2122 | | int nWordSkip) |
2123 | 0 | { |
2124 | 0 | GByte *pabyData = static_cast<GByte *>(pData); |
2125 | 0 | while (nWordCount) |
2126 | 0 | { |
2127 | | // Pick-up a multiple of 8 as max chunk size. |
2128 | 0 | const int nWordCountSmall = |
2129 | 0 | (nWordCount > (1 << 30)) ? (1 << 30) : static_cast<int>(nWordCount); |
2130 | 0 | GDALSwapWords(pabyData, nWordSize, nWordCountSmall, nWordSkip); |
2131 | 0 | pabyData += static_cast<size_t>(nWordSkip) * nWordCountSmall; |
2132 | 0 | nWordCount -= nWordCountSmall; |
2133 | 0 | } |
2134 | 0 | } |
2135 | | |
2136 | | // Place the new GDALCopyWords helpers in an anonymous namespace |
2137 | | namespace |
2138 | | { |
2139 | | |
2140 | | /************************************************************************/ |
2141 | | /* GDALCopyWordsT() */ |
2142 | | /************************************************************************/ |
2143 | | /** |
2144 | | * Template function, used to copy data from pSrcData into buffer |
2145 | | * pDstData, with stride nSrcPixelStride in the source data and |
2146 | | * stride nDstPixelStride in the destination data. This template can |
2147 | | * deal with the case where the input data type is real or complex and |
2148 | | * the output is real. |
2149 | | * |
2150 | | * @param pSrcData the source data buffer |
2151 | | * @param nSrcPixelStride the stride, in the buffer pSrcData for pixels |
2152 | | * of interest. |
2153 | | * @param pDstData the destination buffer. |
2154 | | * @param nDstPixelStride the stride in the buffer pDstData for pixels of |
2155 | | * interest. |
2156 | | * @param nWordCount the total number of pixel words to copy |
2157 | | * |
2158 | | * @code |
2159 | | * // Assume an input buffer of type GUInt16 named pBufferIn |
2160 | | * GByte *pBufferOut = new GByte[numBytesOut]; |
2161 | | * GDALCopyWordsT<GUInt16, GByte>(pSrcData, 2, pDstData, 1, numBytesOut); |
2162 | | * @endcode |
2163 | | * @note |
2164 | | * This is a private function, and should not be exposed outside of |
2165 | | * rasterio.cpp. External users should call the GDALCopyWords driver function. |
2166 | | */ |
2167 | | |
2168 | | template <class Tin, class Tout> |
2169 | | static void inline GDALCopyWordsGenericT(const Tin *const CPL_RESTRICT pSrcData, |
2170 | | int nSrcPixelStride, |
2171 | | Tout *const CPL_RESTRICT pDstData, |
2172 | | int nDstPixelStride, |
2173 | | GPtrDiff_t nWordCount) |
2174 | 0 | { |
2175 | 0 | decltype(nWordCount) nDstOffset = 0; |
2176 | |
|
2177 | 0 | const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData); |
2178 | 0 | char *const pDstDataPtr = reinterpret_cast<char *>(pDstData); |
2179 | 0 | for (decltype(nWordCount) n = 0; n < nWordCount; n++) |
2180 | 0 | { |
2181 | 0 | const Tin tValue = |
2182 | 0 | *reinterpret_cast<const Tin *>(pSrcDataPtr + (n * nSrcPixelStride)); |
2183 | 0 | Tout *const pOutPixel = |
2184 | 0 | reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset); |
2185 | |
|
2186 | 0 | GDALCopyWord(tValue, *pOutPixel); |
2187 | |
|
2188 | 0 | nDstOffset += nDstPixelStride; |
2189 | 0 | } |
2190 | 0 | } Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, unsigned char>(unsigned char const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, signed char>(unsigned char const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, unsigned short>(unsigned char const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, short>(unsigned char const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, unsigned int>(unsigned char const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, int>(unsigned char const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, unsigned long>(unsigned char const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, long>(unsigned char const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, cpl::Float16>(unsigned char const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, float>(unsigned char const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned char, double>(unsigned char const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, unsigned char>(signed char const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, signed char>(signed char const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, unsigned short>(signed char const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, short>(signed char const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, unsigned int>(signed char const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, int>(signed char const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, unsigned long>(signed char const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, long>(signed char const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, cpl::Float16>(signed char const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, float>(signed char const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<signed char, double>(signed char const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, unsigned char>(unsigned short const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, signed char>(unsigned short const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, unsigned short>(unsigned short const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, short>(unsigned short const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, unsigned int>(unsigned short const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, int>(unsigned short const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, unsigned long>(unsigned short const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, long>(unsigned short const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, cpl::Float16>(unsigned short const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, float>(unsigned short const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned short, double>(unsigned short const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, unsigned char>(short const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, signed char>(short const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, unsigned short>(short const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, short>(short const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, unsigned int>(short const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, int>(short const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, unsigned long>(short const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, long>(short const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, cpl::Float16>(short const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, float>(short const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<short, double>(short const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, unsigned char>(unsigned int const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, signed char>(unsigned int const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, unsigned short>(unsigned int const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, short>(unsigned int const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, unsigned int>(unsigned int const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, int>(unsigned int const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, unsigned long>(unsigned int const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, long>(unsigned int const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, cpl::Float16>(unsigned int const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, float>(unsigned int const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned int, double>(unsigned int const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, unsigned char>(int const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, signed char>(int const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, unsigned short>(int const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, short>(int const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, unsigned int>(int const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, int>(int const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, unsigned long>(int const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, long>(int const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, cpl::Float16>(int const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, float>(int const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<int, double>(int const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, unsigned char>(unsigned long const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, signed char>(unsigned long const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, unsigned short>(unsigned long const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, short>(unsigned long const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, unsigned int>(unsigned long const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, int>(unsigned long const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, unsigned long>(unsigned long const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, long>(unsigned long const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, cpl::Float16>(unsigned long const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, float>(unsigned long const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<unsigned long, double>(unsigned long const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, unsigned char>(long const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, signed char>(long const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, unsigned short>(long const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, short>(long const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, unsigned int>(long const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, int>(long const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, unsigned long>(long const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, long>(long const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, cpl::Float16>(long const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, float>(long const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<long, double>(long const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, unsigned char>(cpl::Float16 const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, signed char>(cpl::Float16 const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, unsigned short>(cpl::Float16 const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, short>(cpl::Float16 const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, unsigned int>(cpl::Float16 const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, int>(cpl::Float16 const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, unsigned long>(cpl::Float16 const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, long>(cpl::Float16 const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, cpl::Float16>(cpl::Float16 const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, float>(cpl::Float16 const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<cpl::Float16, double>(cpl::Float16 const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<float, signed char>(float const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<float, unsigned int>(float const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<float, int>(float const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<float, unsigned long>(float const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<float, long>(float const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<float, cpl::Float16>(float const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<float, float>(float const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, unsigned char>(double const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, signed char>(double const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, short>(double const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, unsigned int>(double const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, int>(double const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, unsigned long>(double const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, long>(double const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, cpl::Float16>(double const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsGenericT<double, double>(double const*, int, double*, int, long long) |
2191 | | |
2192 | | template <class Tin, class Tout> |
2193 | | static void inline GDALCopyWordsT(const Tin *const CPL_RESTRICT pSrcData, |
2194 | | int nSrcPixelStride, |
2195 | | Tout *const CPL_RESTRICT pDstData, |
2196 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2197 | 0 | { |
2198 | 0 | GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, nDstPixelStride, |
2199 | 0 | nWordCount); |
2200 | 0 | } Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned char, unsigned char>(unsigned char const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned char, signed char>(unsigned char const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned char, unsigned long>(unsigned char const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned char, long>(unsigned char const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned char, cpl::Float16>(unsigned char const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, unsigned char>(signed char const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, signed char>(signed char const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, unsigned short>(signed char const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, short>(signed char const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, unsigned int>(signed char const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, int>(signed char const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, unsigned long>(signed char const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, long>(signed char const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, cpl::Float16>(signed char const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, float>(signed char const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<signed char, double>(signed char const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned short, signed char>(unsigned short const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned short, unsigned short>(unsigned short const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned short, unsigned int>(unsigned short const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned short, int>(unsigned short const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned short, unsigned long>(unsigned short const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned short, long>(unsigned short const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned short, cpl::Float16>(unsigned short const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, unsigned char>(short const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, signed char>(short const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, unsigned short>(short const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, short>(short const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, unsigned int>(short const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, int>(short const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, unsigned long>(short const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, long>(short const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, cpl::Float16>(short const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, float>(short const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<short, double>(short const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, unsigned char>(unsigned int const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, signed char>(unsigned int const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, unsigned short>(unsigned int const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, short>(unsigned int const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, unsigned int>(unsigned int const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, int>(unsigned int const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, unsigned long>(unsigned int const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, long>(unsigned int const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, cpl::Float16>(unsigned int const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, float>(unsigned int const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned int, double>(unsigned int const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, unsigned char>(int const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, signed char>(int const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, unsigned short>(int const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, short>(int const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, unsigned int>(int const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, int>(int const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, unsigned long>(int const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, long>(int const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, cpl::Float16>(int const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, float>(int const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<int, double>(int const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, unsigned char>(unsigned long const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, signed char>(unsigned long const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, unsigned short>(unsigned long const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, short>(unsigned long const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, unsigned int>(unsigned long const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, int>(unsigned long const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, unsigned long>(unsigned long const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, long>(unsigned long const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, cpl::Float16>(unsigned long const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, float>(unsigned long const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<unsigned long, double>(unsigned long const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, unsigned char>(long const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, signed char>(long const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, unsigned short>(long const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, short>(long const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, unsigned int>(long const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, int>(long const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, unsigned long>(long const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, long>(long const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, cpl::Float16>(long const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, float>(long const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<long, double>(long const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, unsigned char>(cpl::Float16 const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, signed char>(cpl::Float16 const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, unsigned short>(cpl::Float16 const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, short>(cpl::Float16 const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, unsigned int>(cpl::Float16 const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, int>(cpl::Float16 const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, unsigned long>(cpl::Float16 const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, long>(cpl::Float16 const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, cpl::Float16>(cpl::Float16 const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, float>(cpl::Float16 const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<cpl::Float16, double>(cpl::Float16 const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<float, signed char>(float const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<float, unsigned int>(float const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<float, int>(float const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<float, unsigned long>(float const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<float, long>(float const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<float, cpl::Float16>(float const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<float, float>(float const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, unsigned char>(double const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, signed char>(double const*, int, signed char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, short>(double const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, unsigned int>(double const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, int>(double const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, unsigned long>(double const*, int, unsigned long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, long>(double const*, int, long*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, cpl::Float16>(double const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT<double, double>(double const*, int, double*, int, long long) |
2201 | | |
2202 | | template <class Tin, class Tout> |
2203 | | static void inline GDALCopyWordsT_8atatime( |
2204 | | const Tin *const CPL_RESTRICT pSrcData, int nSrcPixelStride, |
2205 | | Tout *const CPL_RESTRICT pDstData, int nDstPixelStride, |
2206 | | GPtrDiff_t nWordCount) |
2207 | 0 | { |
2208 | 0 | decltype(nWordCount) nDstOffset = 0; |
2209 | |
|
2210 | 0 | const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData); |
2211 | 0 | char *const pDstDataPtr = reinterpret_cast<char *>(pDstData); |
2212 | 0 | decltype(nWordCount) n = 0; |
2213 | 0 | if (nSrcPixelStride == static_cast<int>(sizeof(Tin)) && |
2214 | 0 | nDstPixelStride == static_cast<int>(sizeof(Tout))) |
2215 | 0 | { |
2216 | 0 | for (; n < nWordCount - 7; n += 8) |
2217 | 0 | { |
2218 | 0 | const Tin *pInValues = reinterpret_cast<const Tin *>( |
2219 | 0 | pSrcDataPtr + (n * nSrcPixelStride)); |
2220 | 0 | Tout *const pOutPixels = |
2221 | 0 | reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset); |
2222 | |
|
2223 | 0 | GDALCopy8Words(pInValues, pOutPixels); |
2224 | |
|
2225 | 0 | nDstOffset += 8 * nDstPixelStride; |
2226 | 0 | } |
2227 | 0 | } |
2228 | 0 | for (; n < nWordCount; n++) |
2229 | 0 | { |
2230 | 0 | const Tin tValue = |
2231 | 0 | *reinterpret_cast<const Tin *>(pSrcDataPtr + (n * nSrcPixelStride)); |
2232 | 0 | Tout *const pOutPixel = |
2233 | 0 | reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset); |
2234 | |
|
2235 | 0 | GDALCopyWord(tValue, *pOutPixel); |
2236 | |
|
2237 | 0 | nDstOffset += nDstPixelStride; |
2238 | 0 | } |
2239 | 0 | } Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<float, unsigned char>(float const*, int, unsigned char*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<float, unsigned short>(float const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<float, short>(float const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<float, double>(float const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<double, unsigned short>(double const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsT_8atatime<double, float>(double const*, int, float*, int, long long) |
2240 | | |
2241 | | #ifdef HAVE_SSE2 |
2242 | | |
2243 | | template <class Tout> |
2244 | | void GDALCopyWordsByteTo16Bit(const GByte *const CPL_RESTRICT pSrcData, |
2245 | | int nSrcPixelStride, |
2246 | | Tout *const CPL_RESTRICT pDstData, |
2247 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2248 | 0 | { |
2249 | 0 | static_assert(std::is_integral<Tout>::value && |
2250 | 0 | sizeof(Tout) == sizeof(uint16_t), |
2251 | 0 | "Bad Tout"); |
2252 | 0 | if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) && |
2253 | 0 | nDstPixelStride == static_cast<int>(sizeof(*pDstData))) |
2254 | 0 | { |
2255 | 0 | decltype(nWordCount) n = 0; |
2256 | 0 | const __m128i xmm_zero = _mm_setzero_si128(); |
2257 | 0 | GByte *CPL_RESTRICT pabyDstDataPtr = |
2258 | 0 | reinterpret_cast<GByte *>(pDstData); |
2259 | 0 | for (; n < nWordCount - 15; n += 16) |
2260 | 0 | { |
2261 | 0 | __m128i xmm = _mm_loadu_si128( |
2262 | 0 | reinterpret_cast<const __m128i *>(pSrcData + n)); |
2263 | 0 | __m128i xmm0 = _mm_unpacklo_epi8(xmm, xmm_zero); |
2264 | 0 | __m128i xmm1 = _mm_unpackhi_epi8(xmm, xmm_zero); |
2265 | 0 | _mm_storeu_si128( |
2266 | 0 | reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 2), xmm0); |
2267 | 0 | _mm_storeu_si128( |
2268 | 0 | reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 2 + 16), xmm1); |
2269 | 0 | } |
2270 | 0 | for (; n < nWordCount; n++) |
2271 | 0 | { |
2272 | 0 | pDstData[n] = pSrcData[n]; |
2273 | 0 | } |
2274 | 0 | } |
2275 | 0 | else |
2276 | 0 | { |
2277 | 0 | GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, |
2278 | 0 | nDstPixelStride, nWordCount); |
2279 | 0 | } |
2280 | 0 | } Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsByteTo16Bit<unsigned short>(unsigned char const*, int, unsigned short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsByteTo16Bit<short>(unsigned char const*, int, short*, int, long long) |
2281 | | |
2282 | | template <> |
2283 | | void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData, |
2284 | | int nSrcPixelStride, GUInt16 *const CPL_RESTRICT pDstData, |
2285 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2286 | 0 | { |
2287 | 0 | GDALCopyWordsByteTo16Bit(pSrcData, nSrcPixelStride, pDstData, |
2288 | 0 | nDstPixelStride, nWordCount); |
2289 | 0 | } |
2290 | | |
2291 | | template <> |
2292 | | void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData, |
2293 | | int nSrcPixelStride, GInt16 *const CPL_RESTRICT pDstData, |
2294 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2295 | 0 | { |
2296 | 0 | GDALCopyWordsByteTo16Bit(pSrcData, nSrcPixelStride, pDstData, |
2297 | 0 | nDstPixelStride, nWordCount); |
2298 | 0 | } |
2299 | | |
2300 | | template <class Tout> |
2301 | | void GDALCopyWordsByteTo32Bit(const GByte *const CPL_RESTRICT pSrcData, |
2302 | | int nSrcPixelStride, |
2303 | | Tout *const CPL_RESTRICT pDstData, |
2304 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2305 | 0 | { |
2306 | 0 | static_assert(std::is_integral<Tout>::value && |
2307 | 0 | sizeof(Tout) == sizeof(uint32_t), |
2308 | 0 | "Bad Tout"); |
2309 | 0 | if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) && |
2310 | 0 | nDstPixelStride == static_cast<int>(sizeof(*pDstData))) |
2311 | 0 | { |
2312 | 0 | decltype(nWordCount) n = 0; |
2313 | 0 | const __m128i xmm_zero = _mm_setzero_si128(); |
2314 | 0 | GByte *CPL_RESTRICT pabyDstDataPtr = |
2315 | 0 | reinterpret_cast<GByte *>(pDstData); |
2316 | 0 | for (; n < nWordCount - 15; n += 16) |
2317 | 0 | { |
2318 | 0 | __m128i xmm = _mm_loadu_si128( |
2319 | 0 | reinterpret_cast<const __m128i *>(pSrcData + n)); |
2320 | 0 | __m128i xmm_low = _mm_unpacklo_epi8(xmm, xmm_zero); |
2321 | 0 | __m128i xmm_high = _mm_unpackhi_epi8(xmm, xmm_zero); |
2322 | 0 | __m128i xmm0 = _mm_unpacklo_epi16(xmm_low, xmm_zero); |
2323 | 0 | __m128i xmm1 = _mm_unpackhi_epi16(xmm_low, xmm_zero); |
2324 | 0 | __m128i xmm2 = _mm_unpacklo_epi16(xmm_high, xmm_zero); |
2325 | 0 | __m128i xmm3 = _mm_unpackhi_epi16(xmm_high, xmm_zero); |
2326 | 0 | _mm_storeu_si128( |
2327 | 0 | reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4), xmm0); |
2328 | 0 | _mm_storeu_si128( |
2329 | 0 | reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4 + 16), xmm1); |
2330 | 0 | _mm_storeu_si128( |
2331 | 0 | reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4 + 32), xmm2); |
2332 | 0 | _mm_storeu_si128( |
2333 | 0 | reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4 + 48), xmm3); |
2334 | 0 | } |
2335 | 0 | for (; n < nWordCount; n++) |
2336 | 0 | { |
2337 | 0 | pDstData[n] = pSrcData[n]; |
2338 | 0 | } |
2339 | 0 | } |
2340 | 0 | else |
2341 | 0 | { |
2342 | 0 | GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, |
2343 | 0 | nDstPixelStride, nWordCount); |
2344 | 0 | } |
2345 | 0 | } Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsByteTo32Bit<unsigned int>(unsigned char const*, int, unsigned int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsByteTo32Bit<int>(unsigned char const*, int, int*, int, long long) |
2346 | | |
2347 | | template <> |
2348 | | void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData, |
2349 | | int nSrcPixelStride, GUInt32 *const CPL_RESTRICT pDstData, |
2350 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2351 | 0 | { |
2352 | 0 | GDALCopyWordsByteTo32Bit(pSrcData, nSrcPixelStride, pDstData, |
2353 | 0 | nDstPixelStride, nWordCount); |
2354 | 0 | } |
2355 | | |
2356 | | template <> |
2357 | | void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData, |
2358 | | int nSrcPixelStride, GInt32 *const CPL_RESTRICT pDstData, |
2359 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2360 | 0 | { |
2361 | 0 | GDALCopyWordsByteTo32Bit(pSrcData, nSrcPixelStride, pDstData, |
2362 | 0 | nDstPixelStride, nWordCount); |
2363 | 0 | } |
2364 | | |
2365 | | template <> |
2366 | | void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData, |
2367 | | int nSrcPixelStride, float *const CPL_RESTRICT pDstData, |
2368 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2369 | 0 | { |
2370 | 0 | if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) && |
2371 | 0 | nDstPixelStride == static_cast<int>(sizeof(*pDstData))) |
2372 | 0 | { |
2373 | 0 | decltype(nWordCount) n = 0; |
2374 | 0 | const __m128i xmm_zero = _mm_setzero_si128(); |
2375 | 0 | GByte *CPL_RESTRICT pabyDstDataPtr = |
2376 | 0 | reinterpret_cast<GByte *>(pDstData); |
2377 | 0 | for (; n < nWordCount - 15; n += 16) |
2378 | 0 | { |
2379 | 0 | __m128i xmm = _mm_loadu_si128( |
2380 | 0 | reinterpret_cast<const __m128i *>(pSrcData + n)); |
2381 | 0 | __m128i xmm_low = _mm_unpacklo_epi8(xmm, xmm_zero); |
2382 | 0 | __m128i xmm_high = _mm_unpackhi_epi8(xmm, xmm_zero); |
2383 | 0 | __m128i xmm0 = _mm_unpacklo_epi16(xmm_low, xmm_zero); |
2384 | 0 | __m128i xmm1 = _mm_unpackhi_epi16(xmm_low, xmm_zero); |
2385 | 0 | __m128i xmm2 = _mm_unpacklo_epi16(xmm_high, xmm_zero); |
2386 | 0 | __m128i xmm3 = _mm_unpackhi_epi16(xmm_high, xmm_zero); |
2387 | 0 | __m128 xmm0_f = _mm_cvtepi32_ps(xmm0); |
2388 | 0 | __m128 xmm1_f = _mm_cvtepi32_ps(xmm1); |
2389 | 0 | __m128 xmm2_f = _mm_cvtepi32_ps(xmm2); |
2390 | 0 | __m128 xmm3_f = _mm_cvtepi32_ps(xmm3); |
2391 | 0 | _mm_storeu_ps(reinterpret_cast<float *>(pabyDstDataPtr + n * 4), |
2392 | 0 | xmm0_f); |
2393 | 0 | _mm_storeu_ps( |
2394 | 0 | reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 16), xmm1_f); |
2395 | 0 | _mm_storeu_ps( |
2396 | 0 | reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 32), xmm2_f); |
2397 | 0 | _mm_storeu_ps( |
2398 | 0 | reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 48), xmm3_f); |
2399 | 0 | } |
2400 | 0 | for (; n < nWordCount; n++) |
2401 | 0 | { |
2402 | 0 | pDstData[n] = pSrcData[n]; |
2403 | 0 | } |
2404 | 0 | } |
2405 | 0 | else |
2406 | 0 | { |
2407 | 0 | GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, |
2408 | 0 | nDstPixelStride, nWordCount); |
2409 | 0 | } |
2410 | 0 | } |
2411 | | |
2412 | | template <> |
2413 | | void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData, |
2414 | | int nSrcPixelStride, double *const CPL_RESTRICT pDstData, |
2415 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2416 | 0 | { |
2417 | 0 | if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) && |
2418 | 0 | nDstPixelStride == static_cast<int>(sizeof(*pDstData))) |
2419 | 0 | { |
2420 | 0 | decltype(nWordCount) n = 0; |
2421 | 0 | const __m128i xmm_zero = _mm_setzero_si128(); |
2422 | 0 | GByte *CPL_RESTRICT pabyDstDataPtr = |
2423 | 0 | reinterpret_cast<GByte *>(pDstData); |
2424 | 0 | for (; n < nWordCount - 15; n += 16) |
2425 | 0 | { |
2426 | 0 | __m128i xmm = _mm_loadu_si128( |
2427 | 0 | reinterpret_cast<const __m128i *>(pSrcData + n)); |
2428 | 0 | __m128i xmm_low = _mm_unpacklo_epi8(xmm, xmm_zero); |
2429 | 0 | __m128i xmm_high = _mm_unpackhi_epi8(xmm, xmm_zero); |
2430 | 0 | __m128i xmm0 = _mm_unpacklo_epi16(xmm_low, xmm_zero); |
2431 | 0 | __m128i xmm1 = _mm_unpackhi_epi16(xmm_low, xmm_zero); |
2432 | 0 | __m128i xmm2 = _mm_unpacklo_epi16(xmm_high, xmm_zero); |
2433 | 0 | __m128i xmm3 = _mm_unpackhi_epi16(xmm_high, xmm_zero); |
2434 | |
|
2435 | 0 | __m128d xmm0_low_d = _mm_cvtepi32_pd(xmm0); |
2436 | 0 | __m128d xmm1_low_d = _mm_cvtepi32_pd(xmm1); |
2437 | 0 | __m128d xmm2_low_d = _mm_cvtepi32_pd(xmm2); |
2438 | 0 | __m128d xmm3_low_d = _mm_cvtepi32_pd(xmm3); |
2439 | 0 | xmm0 = _mm_srli_si128(xmm0, 8); |
2440 | 0 | xmm1 = _mm_srli_si128(xmm1, 8); |
2441 | 0 | xmm2 = _mm_srli_si128(xmm2, 8); |
2442 | 0 | xmm3 = _mm_srli_si128(xmm3, 8); |
2443 | 0 | __m128d xmm0_high_d = _mm_cvtepi32_pd(xmm0); |
2444 | 0 | __m128d xmm1_high_d = _mm_cvtepi32_pd(xmm1); |
2445 | 0 | __m128d xmm2_high_d = _mm_cvtepi32_pd(xmm2); |
2446 | 0 | __m128d xmm3_high_d = _mm_cvtepi32_pd(xmm3); |
2447 | |
|
2448 | 0 | _mm_storeu_pd(reinterpret_cast<double *>(pabyDstDataPtr + n * 8), |
2449 | 0 | xmm0_low_d); |
2450 | 0 | _mm_storeu_pd( |
2451 | 0 | reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 16), |
2452 | 0 | xmm0_high_d); |
2453 | 0 | _mm_storeu_pd( |
2454 | 0 | reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 32), |
2455 | 0 | xmm1_low_d); |
2456 | 0 | _mm_storeu_pd( |
2457 | 0 | reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 48), |
2458 | 0 | xmm1_high_d); |
2459 | 0 | _mm_storeu_pd( |
2460 | 0 | reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 64), |
2461 | 0 | xmm2_low_d); |
2462 | 0 | _mm_storeu_pd( |
2463 | 0 | reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 80), |
2464 | 0 | xmm2_high_d); |
2465 | 0 | _mm_storeu_pd( |
2466 | 0 | reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 96), |
2467 | 0 | xmm3_low_d); |
2468 | 0 | _mm_storeu_pd( |
2469 | 0 | reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 112), |
2470 | 0 | xmm3_high_d); |
2471 | 0 | } |
2472 | 0 | for (; n < nWordCount; n++) |
2473 | 0 | { |
2474 | 0 | pDstData[n] = pSrcData[n]; |
2475 | 0 | } |
2476 | 0 | } |
2477 | 0 | else |
2478 | 0 | { |
2479 | 0 | GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, |
2480 | 0 | nDstPixelStride, nWordCount); |
2481 | 0 | } |
2482 | 0 | } |
2483 | | |
2484 | | template <> |
2485 | | void GDALCopyWordsT(const GUInt16 *const CPL_RESTRICT pSrcData, |
2486 | | int nSrcPixelStride, GByte *const CPL_RESTRICT pDstData, |
2487 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2488 | 0 | { |
2489 | 0 | if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) && |
2490 | 0 | nDstPixelStride == static_cast<int>(sizeof(*pDstData))) |
2491 | 0 | { |
2492 | 0 | decltype(nWordCount) n = 0; |
2493 | | // In SSE2, min_epu16 does not exist, so shift from |
2494 | | // UInt16 to SInt16 to be able to use min_epi16 |
2495 | 0 | const __m128i xmm_UINT16_to_INT16 = _mm_set1_epi16(-32768); |
2496 | 0 | const __m128i xmm_m255_shifted = _mm_set1_epi16(255 - 32768); |
2497 | 0 | for (; n < nWordCount - 7; n += 8) |
2498 | 0 | { |
2499 | 0 | __m128i xmm = _mm_loadu_si128( |
2500 | 0 | reinterpret_cast<const __m128i *>(pSrcData + n)); |
2501 | 0 | xmm = _mm_add_epi16(xmm, xmm_UINT16_to_INT16); |
2502 | 0 | xmm = _mm_min_epi16(xmm, xmm_m255_shifted); |
2503 | 0 | xmm = _mm_sub_epi16(xmm, xmm_UINT16_to_INT16); |
2504 | 0 | xmm = _mm_packus_epi16(xmm, xmm); |
2505 | 0 | GDALCopyXMMToInt64(xmm, |
2506 | 0 | reinterpret_cast<GPtrDiff_t *>(pDstData + n)); |
2507 | 0 | } |
2508 | 0 | for (; n < nWordCount; n++) |
2509 | 0 | { |
2510 | 0 | pDstData[n] = |
2511 | 0 | pSrcData[n] >= 255 ? 255 : static_cast<GByte>(pSrcData[n]); |
2512 | 0 | } |
2513 | 0 | } |
2514 | 0 | else |
2515 | 0 | { |
2516 | 0 | GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, |
2517 | 0 | nDstPixelStride, nWordCount); |
2518 | 0 | } |
2519 | 0 | } |
2520 | | |
2521 | | template <> |
2522 | | void GDALCopyWordsT(const GUInt16 *const CPL_RESTRICT pSrcData, |
2523 | | int nSrcPixelStride, GInt16 *const CPL_RESTRICT pDstData, |
2524 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2525 | 0 | { |
2526 | 0 | if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) && |
2527 | 0 | nDstPixelStride == static_cast<int>(sizeof(*pDstData))) |
2528 | 0 | { |
2529 | 0 | decltype(nWordCount) n = 0; |
2530 | | // In SSE2, min_epu16 does not exist, so shift from |
2531 | | // UInt16 to SInt16 to be able to use min_epi16 |
2532 | 0 | const __m128i xmm_UINT16_to_INT16 = _mm_set1_epi16(-32768); |
2533 | 0 | const __m128i xmm_32767_shifted = _mm_set1_epi16(32767 - 32768); |
2534 | 0 | for (; n < nWordCount - 7; n += 8) |
2535 | 0 | { |
2536 | 0 | __m128i xmm = _mm_loadu_si128( |
2537 | 0 | reinterpret_cast<const __m128i *>(pSrcData + n)); |
2538 | 0 | xmm = _mm_add_epi16(xmm, xmm_UINT16_to_INT16); |
2539 | 0 | xmm = _mm_min_epi16(xmm, xmm_32767_shifted); |
2540 | 0 | xmm = _mm_sub_epi16(xmm, xmm_UINT16_to_INT16); |
2541 | 0 | _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n), xmm); |
2542 | 0 | } |
2543 | 0 | for (; n < nWordCount; n++) |
2544 | 0 | { |
2545 | 0 | pDstData[n] = |
2546 | 0 | pSrcData[n] >= 32767 ? 32767 : static_cast<GInt16>(pSrcData[n]); |
2547 | 0 | } |
2548 | 0 | } |
2549 | 0 | else |
2550 | 0 | { |
2551 | 0 | GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, |
2552 | 0 | nDstPixelStride, nWordCount); |
2553 | 0 | } |
2554 | 0 | } |
2555 | | |
2556 | | template <> |
2557 | | void GDALCopyWordsT(const GUInt16 *const CPL_RESTRICT pSrcData, |
2558 | | int nSrcPixelStride, float *const CPL_RESTRICT pDstData, |
2559 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2560 | 0 | { |
2561 | 0 | if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) && |
2562 | 0 | nDstPixelStride == static_cast<int>(sizeof(*pDstData))) |
2563 | 0 | { |
2564 | 0 | decltype(nWordCount) n = 0; |
2565 | 0 | const __m128i xmm_zero = _mm_setzero_si128(); |
2566 | 0 | GByte *CPL_RESTRICT pabyDstDataPtr = |
2567 | 0 | reinterpret_cast<GByte *>(pDstData); |
2568 | 0 | for (; n < nWordCount - 7; n += 8) |
2569 | 0 | { |
2570 | 0 | __m128i xmm = _mm_loadu_si128( |
2571 | 0 | reinterpret_cast<const __m128i *>(pSrcData + n)); |
2572 | 0 | __m128i xmm0 = _mm_unpacklo_epi16(xmm, xmm_zero); |
2573 | 0 | __m128i xmm1 = _mm_unpackhi_epi16(xmm, xmm_zero); |
2574 | 0 | __m128 xmm0_f = _mm_cvtepi32_ps(xmm0); |
2575 | 0 | __m128 xmm1_f = _mm_cvtepi32_ps(xmm1); |
2576 | 0 | _mm_storeu_ps(reinterpret_cast<float *>(pabyDstDataPtr + n * 4), |
2577 | 0 | xmm0_f); |
2578 | 0 | _mm_storeu_ps( |
2579 | 0 | reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 16), xmm1_f); |
2580 | 0 | } |
2581 | 0 | for (; n < nWordCount; n++) |
2582 | 0 | { |
2583 | 0 | pDstData[n] = pSrcData[n]; |
2584 | 0 | } |
2585 | 0 | } |
2586 | 0 | else |
2587 | 0 | { |
2588 | 0 | GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, |
2589 | 0 | nDstPixelStride, nWordCount); |
2590 | 0 | } |
2591 | 0 | } |
2592 | | |
2593 | | template <> |
2594 | | void GDALCopyWordsT(const GUInt16 *const CPL_RESTRICT pSrcData, |
2595 | | int nSrcPixelStride, double *const CPL_RESTRICT pDstData, |
2596 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2597 | 0 | { |
2598 | 0 | if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) && |
2599 | 0 | nDstPixelStride == static_cast<int>(sizeof(*pDstData))) |
2600 | 0 | { |
2601 | 0 | decltype(nWordCount) n = 0; |
2602 | 0 | const __m128i xmm_zero = _mm_setzero_si128(); |
2603 | 0 | GByte *CPL_RESTRICT pabyDstDataPtr = |
2604 | 0 | reinterpret_cast<GByte *>(pDstData); |
2605 | 0 | for (; n < nWordCount - 7; n += 8) |
2606 | 0 | { |
2607 | 0 | __m128i xmm = _mm_loadu_si128( |
2608 | 0 | reinterpret_cast<const __m128i *>(pSrcData + n)); |
2609 | 0 | __m128i xmm0 = _mm_unpacklo_epi16(xmm, xmm_zero); |
2610 | 0 | __m128i xmm1 = _mm_unpackhi_epi16(xmm, xmm_zero); |
2611 | |
|
2612 | 0 | __m128d xmm0_low_d = _mm_cvtepi32_pd(xmm0); |
2613 | 0 | __m128d xmm1_low_d = _mm_cvtepi32_pd(xmm1); |
2614 | 0 | xmm0 = _mm_srli_si128(xmm0, 8); |
2615 | 0 | xmm1 = _mm_srli_si128(xmm1, 8); |
2616 | 0 | __m128d xmm0_high_d = _mm_cvtepi32_pd(xmm0); |
2617 | 0 | __m128d xmm1_high_d = _mm_cvtepi32_pd(xmm1); |
2618 | |
|
2619 | 0 | _mm_storeu_pd(reinterpret_cast<double *>(pabyDstDataPtr + n * 8), |
2620 | 0 | xmm0_low_d); |
2621 | 0 | _mm_storeu_pd( |
2622 | 0 | reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 16), |
2623 | 0 | xmm0_high_d); |
2624 | 0 | _mm_storeu_pd( |
2625 | 0 | reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 32), |
2626 | 0 | xmm1_low_d); |
2627 | 0 | _mm_storeu_pd( |
2628 | 0 | reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 48), |
2629 | 0 | xmm1_high_d); |
2630 | 0 | } |
2631 | 0 | for (; n < nWordCount; n++) |
2632 | 0 | { |
2633 | 0 | pDstData[n] = pSrcData[n]; |
2634 | 0 | } |
2635 | 0 | } |
2636 | 0 | else |
2637 | 0 | { |
2638 | 0 | GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, |
2639 | 0 | nDstPixelStride, nWordCount); |
2640 | 0 | } |
2641 | 0 | } |
2642 | | |
2643 | | template <> |
2644 | | void GDALCopyWordsT(const double *const CPL_RESTRICT pSrcData, |
2645 | | int nSrcPixelStride, GUInt16 *const CPL_RESTRICT pDstData, |
2646 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2647 | 0 | { |
2648 | 0 | GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData, |
2649 | 0 | nDstPixelStride, nWordCount); |
2650 | 0 | } |
2651 | | |
2652 | | template <> |
2653 | | void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData, |
2654 | | int nSrcPixelStride, double *const CPL_RESTRICT pDstData, |
2655 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2656 | 0 | { |
2657 | 0 | GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData, |
2658 | 0 | nDstPixelStride, nWordCount); |
2659 | 0 | } |
2660 | | |
2661 | | template <> |
2662 | | void GDALCopyWordsT(const double *const CPL_RESTRICT pSrcData, |
2663 | | int nSrcPixelStride, float *const CPL_RESTRICT pDstData, |
2664 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2665 | 0 | { |
2666 | 0 | GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData, |
2667 | 0 | nDstPixelStride, nWordCount); |
2668 | 0 | } |
2669 | | |
2670 | | #endif // HAVE_SSE2 |
2671 | | |
2672 | | template <> |
2673 | | void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData, |
2674 | | int nSrcPixelStride, GByte *const CPL_RESTRICT pDstData, |
2675 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2676 | 0 | { |
2677 | 0 | GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData, |
2678 | 0 | nDstPixelStride, nWordCount); |
2679 | 0 | } |
2680 | | |
2681 | | template <> |
2682 | | void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData, |
2683 | | int nSrcPixelStride, GInt16 *const CPL_RESTRICT pDstData, |
2684 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2685 | 0 | { |
2686 | 0 | GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData, |
2687 | 0 | nDstPixelStride, nWordCount); |
2688 | 0 | } |
2689 | | |
2690 | | template <> |
2691 | | void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData, |
2692 | | int nSrcPixelStride, GUInt16 *const CPL_RESTRICT pDstData, |
2693 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2694 | 0 | { |
2695 | 0 | GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData, |
2696 | 0 | nDstPixelStride, nWordCount); |
2697 | 0 | } |
2698 | | |
2699 | | #ifdef __F16C__ |
2700 | | |
2701 | | template <> |
2702 | | void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData, |
2703 | | int nSrcPixelStride, GFloat16 *const CPL_RESTRICT pDstData, |
2704 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2705 | | { |
2706 | | GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData, |
2707 | | nDstPixelStride, nWordCount); |
2708 | | } |
2709 | | |
2710 | | template <> |
2711 | | void GDALCopyWordsT(const GFloat16 *const CPL_RESTRICT pSrcData, |
2712 | | int nSrcPixelStride, float *const CPL_RESTRICT pDstData, |
2713 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2714 | | { |
2715 | | GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData, |
2716 | | nDstPixelStride, nWordCount); |
2717 | | } |
2718 | | |
2719 | | template <> |
2720 | | void GDALCopyWordsT(const double *const CPL_RESTRICT pSrcData, |
2721 | | int nSrcPixelStride, GFloat16 *const CPL_RESTRICT pDstData, |
2722 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2723 | | { |
2724 | | GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData, |
2725 | | nDstPixelStride, nWordCount); |
2726 | | } |
2727 | | |
2728 | | template <> |
2729 | | void GDALCopyWordsT(const GFloat16 *const CPL_RESTRICT pSrcData, |
2730 | | int nSrcPixelStride, double *const CPL_RESTRICT pDstData, |
2731 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2732 | | { |
2733 | | GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData, |
2734 | | nDstPixelStride, nWordCount); |
2735 | | } |
2736 | | |
2737 | | #endif |
2738 | | |
2739 | | /************************************************************************/ |
2740 | | /* GDALCopyWordsComplexT() */ |
2741 | | /************************************************************************/ |
2742 | | /** |
2743 | | * Template function, used to copy data from pSrcData into buffer |
2744 | | * pDstData, with stride nSrcPixelStride in the source data and |
2745 | | * stride nDstPixelStride in the destination data. Deals with the |
2746 | | * complex case, where input is complex and output is complex. |
2747 | | * |
2748 | | * @param pSrcData the source data buffer |
2749 | | * @param nSrcPixelStride the stride, in the buffer pSrcData for pixels |
2750 | | * of interest. |
2751 | | * @param pDstData the destination buffer. |
2752 | | * @param nDstPixelStride the stride in the buffer pDstData for pixels of |
2753 | | * interest. |
2754 | | * @param nWordCount the total number of pixel words to copy |
2755 | | * |
2756 | | */ |
2757 | | template <class Tin, class Tout> |
2758 | | inline void GDALCopyWordsComplexT(const Tin *const CPL_RESTRICT pSrcData, |
2759 | | int nSrcPixelStride, |
2760 | | Tout *const CPL_RESTRICT pDstData, |
2761 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2762 | 0 | { |
2763 | 0 | decltype(nWordCount) nDstOffset = 0; |
2764 | 0 | const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData); |
2765 | 0 | char *const pDstDataPtr = reinterpret_cast<char *>(pDstData); |
2766 | |
|
2767 | 0 | for (decltype(nWordCount) n = 0; n < nWordCount; n++) |
2768 | 0 | { |
2769 | 0 | const Tin *const pPixelIn = |
2770 | 0 | reinterpret_cast<const Tin *>(pSrcDataPtr + n * nSrcPixelStride); |
2771 | 0 | Tout *const pPixelOut = |
2772 | 0 | reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset); |
2773 | |
|
2774 | 0 | GDALCopyWord(pPixelIn[0], pPixelOut[0]); |
2775 | 0 | GDALCopyWord(pPixelIn[1], pPixelOut[1]); |
2776 | |
|
2777 | 0 | nDstOffset += nDstPixelStride; |
2778 | 0 | } |
2779 | 0 | } Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned char, short>(unsigned char const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned char, int>(unsigned char const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned char, cpl::Float16>(unsigned char const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned char, float>(unsigned char const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned char, double>(unsigned char const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<signed char, short>(signed char const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<signed char, int>(signed char const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<signed char, cpl::Float16>(signed char const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<signed char, float>(signed char const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<signed char, double>(signed char const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned short, short>(unsigned short const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned short, int>(unsigned short const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned short, cpl::Float16>(unsigned short const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned short, float>(unsigned short const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned short, double>(unsigned short const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<short, short>(short const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<short, int>(short const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<short, cpl::Float16>(short const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<short, float>(short const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<short, double>(short const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned int, short>(unsigned int const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned int, int>(unsigned int const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned int, cpl::Float16>(unsigned int const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned int, float>(unsigned int const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned int, double>(unsigned int const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<int, short>(int const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<int, int>(int const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<int, cpl::Float16>(int const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<int, float>(int const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<int, double>(int const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned long, short>(unsigned long const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned long, int>(unsigned long const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned long, cpl::Float16>(unsigned long const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned long, float>(unsigned long const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<unsigned long, double>(unsigned long const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<long, short>(long const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<long, int>(long const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<long, cpl::Float16>(long const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<long, float>(long const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<long, double>(long const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<cpl::Float16, short>(cpl::Float16 const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<cpl::Float16, int>(cpl::Float16 const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<cpl::Float16, cpl::Float16>(cpl::Float16 const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<cpl::Float16, float>(cpl::Float16 const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<cpl::Float16, double>(cpl::Float16 const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<float, short>(float const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<float, int>(float const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<float, cpl::Float16>(float const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<float, float>(float const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<float, double>(float const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<double, short>(double const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<double, int>(double const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<double, cpl::Float16>(double const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<double, float>(double const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexT<double, double>(double const*, int, double*, int, long long) |
2780 | | |
2781 | | /************************************************************************/ |
2782 | | /* GDALCopyWordsComplexOutT() */ |
2783 | | /************************************************************************/ |
2784 | | /** |
2785 | | * Template function, used to copy data from pSrcData into buffer |
2786 | | * pDstData, with stride nSrcPixelStride in the source data and |
2787 | | * stride nDstPixelStride in the destination data. Deals with the |
2788 | | * case where the value is real coming in, but complex going out. |
2789 | | * |
2790 | | * @param pSrcData the source data buffer |
2791 | | * @param nSrcPixelStride the stride, in the buffer pSrcData for pixels |
2792 | | * of interest, in bytes. |
2793 | | * @param pDstData the destination buffer. |
2794 | | * @param nDstPixelStride the stride in the buffer pDstData for pixels of |
2795 | | * interest, in bytes. |
2796 | | * @param nWordCount the total number of pixel words to copy |
2797 | | * |
2798 | | */ |
2799 | | template <class Tin, class Tout> |
2800 | | inline void GDALCopyWordsComplexOutT(const Tin *const CPL_RESTRICT pSrcData, |
2801 | | int nSrcPixelStride, |
2802 | | Tout *const CPL_RESTRICT pDstData, |
2803 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
2804 | 0 | { |
2805 | 0 | decltype(nWordCount) nDstOffset = 0; |
2806 | |
|
2807 | 0 | const Tout tOutZero = static_cast<Tout>(0); |
2808 | |
|
2809 | 0 | const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData); |
2810 | 0 | char *const pDstDataPtr = reinterpret_cast<char *>(pDstData); |
2811 | |
|
2812 | 0 | for (decltype(nWordCount) n = 0; n < nWordCount; n++) |
2813 | 0 | { |
2814 | 0 | const Tin tValue = |
2815 | 0 | *reinterpret_cast<const Tin *>(pSrcDataPtr + n * nSrcPixelStride); |
2816 | 0 | Tout *const pPixelOut = |
2817 | 0 | reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset); |
2818 | 0 | GDALCopyWord(tValue, *pPixelOut); |
2819 | |
|
2820 | 0 | pPixelOut[1] = tOutZero; |
2821 | |
|
2822 | 0 | nDstOffset += nDstPixelStride; |
2823 | 0 | } |
2824 | 0 | } Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned char, short>(unsigned char const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned char, int>(unsigned char const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned char, cpl::Float16>(unsigned char const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned char, float>(unsigned char const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned char, double>(unsigned char const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<signed char, short>(signed char const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<signed char, int>(signed char const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<signed char, cpl::Float16>(signed char const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<signed char, float>(signed char const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<signed char, double>(signed char const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned short, short>(unsigned short const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned short, int>(unsigned short const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned short, cpl::Float16>(unsigned short const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned short, float>(unsigned short const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned short, double>(unsigned short const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<short, short>(short const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<short, int>(short const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<short, cpl::Float16>(short const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<short, float>(short const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<short, double>(short const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned int, short>(unsigned int const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned int, int>(unsigned int const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned int, cpl::Float16>(unsigned int const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned int, float>(unsigned int const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned int, double>(unsigned int const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<int, short>(int const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<int, int>(int const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<int, cpl::Float16>(int const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<int, float>(int const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<int, double>(int const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned long, short>(unsigned long const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned long, int>(unsigned long const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned long, cpl::Float16>(unsigned long const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned long, float>(unsigned long const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<unsigned long, double>(unsigned long const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<long, short>(long const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<long, int>(long const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<long, cpl::Float16>(long const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<long, float>(long const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<long, double>(long const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<cpl::Float16, short>(cpl::Float16 const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<cpl::Float16, int>(cpl::Float16 const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<cpl::Float16, cpl::Float16>(cpl::Float16 const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<cpl::Float16, float>(cpl::Float16 const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<cpl::Float16, double>(cpl::Float16 const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<float, short>(float const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<float, int>(float const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<float, cpl::Float16>(float const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<float, float>(float const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<float, double>(float const*, int, double*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<double, short>(double const*, int, short*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<double, int>(double const*, int, int*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<double, cpl::Float16>(double const*, int, cpl::Float16*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<double, float>(double const*, int, float*, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsComplexOutT<double, double>(double const*, int, double*, int, long long) |
2825 | | |
2826 | | /************************************************************************/ |
2827 | | /* GDALCopyWordsFromT() */ |
2828 | | /************************************************************************/ |
2829 | | /** |
2830 | | * Template driver function. Given the input type T, call the appropriate |
2831 | | * GDALCopyWordsT function template for the desired output type. You should |
2832 | | * never call this function directly (call GDALCopyWords instead). |
2833 | | * |
2834 | | * @param pSrcData source data buffer |
2835 | | * @param nSrcPixelStride pixel stride in input buffer, in pixel words |
2836 | | * @param bInComplex input is complex |
2837 | | * @param pDstData destination data buffer |
2838 | | * @param eDstType destination data type |
2839 | | * @param nDstPixelStride pixel stride in output buffer, in pixel words |
2840 | | * @param nWordCount number of pixel words to be copied |
2841 | | */ |
2842 | | template <class T> |
2843 | | inline void GDALCopyWordsFromT(const T *const CPL_RESTRICT pSrcData, |
2844 | | int nSrcPixelStride, bool bInComplex, |
2845 | | void *CPL_RESTRICT pDstData, |
2846 | | GDALDataType eDstType, int nDstPixelStride, |
2847 | | GPtrDiff_t nWordCount) |
2848 | 0 | { |
2849 | 0 | switch (eDstType) |
2850 | 0 | { |
2851 | 0 | case GDT_Byte: |
2852 | 0 | GDALCopyWordsT(pSrcData, nSrcPixelStride, |
2853 | 0 | static_cast<unsigned char *>(pDstData), |
2854 | 0 | nDstPixelStride, nWordCount); |
2855 | 0 | break; |
2856 | 0 | case GDT_Int8: |
2857 | 0 | GDALCopyWordsT(pSrcData, nSrcPixelStride, |
2858 | 0 | static_cast<signed char *>(pDstData), |
2859 | 0 | nDstPixelStride, nWordCount); |
2860 | 0 | break; |
2861 | 0 | case GDT_UInt16: |
2862 | 0 | GDALCopyWordsT(pSrcData, nSrcPixelStride, |
2863 | 0 | static_cast<unsigned short *>(pDstData), |
2864 | 0 | nDstPixelStride, nWordCount); |
2865 | 0 | break; |
2866 | 0 | case GDT_Int16: |
2867 | 0 | GDALCopyWordsT(pSrcData, nSrcPixelStride, |
2868 | 0 | static_cast<short *>(pDstData), nDstPixelStride, |
2869 | 0 | nWordCount); |
2870 | 0 | break; |
2871 | 0 | case GDT_UInt32: |
2872 | 0 | GDALCopyWordsT(pSrcData, nSrcPixelStride, |
2873 | 0 | static_cast<unsigned int *>(pDstData), |
2874 | 0 | nDstPixelStride, nWordCount); |
2875 | 0 | break; |
2876 | 0 | case GDT_Int32: |
2877 | 0 | GDALCopyWordsT(pSrcData, nSrcPixelStride, |
2878 | 0 | static_cast<int *>(pDstData), nDstPixelStride, |
2879 | 0 | nWordCount); |
2880 | 0 | break; |
2881 | 0 | case GDT_UInt64: |
2882 | 0 | GDALCopyWordsT(pSrcData, nSrcPixelStride, |
2883 | 0 | static_cast<std::uint64_t *>(pDstData), |
2884 | 0 | nDstPixelStride, nWordCount); |
2885 | 0 | break; |
2886 | 0 | case GDT_Int64: |
2887 | 0 | GDALCopyWordsT(pSrcData, nSrcPixelStride, |
2888 | 0 | static_cast<std::int64_t *>(pDstData), |
2889 | 0 | nDstPixelStride, nWordCount); |
2890 | 0 | break; |
2891 | 0 | case GDT_Float16: |
2892 | 0 | GDALCopyWordsT(pSrcData, nSrcPixelStride, |
2893 | 0 | static_cast<GFloat16 *>(pDstData), nDstPixelStride, |
2894 | 0 | nWordCount); |
2895 | 0 | break; |
2896 | 0 | case GDT_Float32: |
2897 | 0 | GDALCopyWordsT(pSrcData, nSrcPixelStride, |
2898 | 0 | static_cast<float *>(pDstData), nDstPixelStride, |
2899 | 0 | nWordCount); |
2900 | 0 | break; |
2901 | 0 | case GDT_Float64: |
2902 | 0 | GDALCopyWordsT(pSrcData, nSrcPixelStride, |
2903 | 0 | static_cast<double *>(pDstData), nDstPixelStride, |
2904 | 0 | nWordCount); |
2905 | 0 | break; |
2906 | 0 | case GDT_CInt16: |
2907 | 0 | if (bInComplex) |
2908 | 0 | { |
2909 | 0 | GDALCopyWordsComplexT(pSrcData, nSrcPixelStride, |
2910 | 0 | static_cast<short *>(pDstData), |
2911 | 0 | nDstPixelStride, nWordCount); |
2912 | 0 | } |
2913 | 0 | else // input is not complex, so we need to promote to a complex |
2914 | | // buffer |
2915 | 0 | { |
2916 | 0 | GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride, |
2917 | 0 | static_cast<short *>(pDstData), |
2918 | 0 | nDstPixelStride, nWordCount); |
2919 | 0 | } |
2920 | 0 | break; |
2921 | 0 | case GDT_CInt32: |
2922 | 0 | if (bInComplex) |
2923 | 0 | { |
2924 | 0 | GDALCopyWordsComplexT(pSrcData, nSrcPixelStride, |
2925 | 0 | static_cast<int *>(pDstData), |
2926 | 0 | nDstPixelStride, nWordCount); |
2927 | 0 | } |
2928 | 0 | else // input is not complex, so we need to promote to a complex |
2929 | | // buffer |
2930 | 0 | { |
2931 | 0 | GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride, |
2932 | 0 | static_cast<int *>(pDstData), |
2933 | 0 | nDstPixelStride, nWordCount); |
2934 | 0 | } |
2935 | 0 | break; |
2936 | 0 | case GDT_CFloat16: |
2937 | 0 | if (bInComplex) |
2938 | 0 | { |
2939 | 0 | GDALCopyWordsComplexT(pSrcData, nSrcPixelStride, |
2940 | 0 | static_cast<GFloat16 *>(pDstData), |
2941 | 0 | nDstPixelStride, nWordCount); |
2942 | 0 | } |
2943 | 0 | else // input is not complex, so we need to promote to a complex |
2944 | | // buffer |
2945 | 0 | { |
2946 | 0 | GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride, |
2947 | 0 | static_cast<GFloat16 *>(pDstData), |
2948 | 0 | nDstPixelStride, nWordCount); |
2949 | 0 | } |
2950 | 0 | break; |
2951 | 0 | case GDT_CFloat32: |
2952 | 0 | if (bInComplex) |
2953 | 0 | { |
2954 | 0 | GDALCopyWordsComplexT(pSrcData, nSrcPixelStride, |
2955 | 0 | static_cast<float *>(pDstData), |
2956 | 0 | nDstPixelStride, nWordCount); |
2957 | 0 | } |
2958 | 0 | else // input is not complex, so we need to promote to a complex |
2959 | | // buffer |
2960 | 0 | { |
2961 | 0 | GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride, |
2962 | 0 | static_cast<float *>(pDstData), |
2963 | 0 | nDstPixelStride, nWordCount); |
2964 | 0 | } |
2965 | 0 | break; |
2966 | 0 | case GDT_CFloat64: |
2967 | 0 | if (bInComplex) |
2968 | 0 | { |
2969 | 0 | GDALCopyWordsComplexT(pSrcData, nSrcPixelStride, |
2970 | 0 | static_cast<double *>(pDstData), |
2971 | 0 | nDstPixelStride, nWordCount); |
2972 | 0 | } |
2973 | 0 | else // input is not complex, so we need to promote to a complex |
2974 | | // buffer |
2975 | 0 | { |
2976 | 0 | GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride, |
2977 | 0 | static_cast<double *>(pDstData), |
2978 | 0 | nDstPixelStride, nWordCount); |
2979 | 0 | } |
2980 | 0 | break; |
2981 | 0 | case GDT_Unknown: |
2982 | 0 | case GDT_TypeCount: |
2983 | 0 | CPLAssert(false); |
2984 | 0 | } |
2985 | 0 | } Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<unsigned char>(unsigned char const*, int, bool, void*, GDALDataType, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<signed char>(signed char const*, int, bool, void*, GDALDataType, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<unsigned short>(unsigned short const*, int, bool, void*, GDALDataType, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<short>(short const*, int, bool, void*, GDALDataType, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<unsigned int>(unsigned int const*, int, bool, void*, GDALDataType, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<int>(int const*, int, bool, void*, GDALDataType, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<unsigned long>(unsigned long const*, int, bool, void*, GDALDataType, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<long>(long const*, int, bool, void*, GDALDataType, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<cpl::Float16>(cpl::Float16 const*, int, bool, void*, GDALDataType, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<float>(float const*, int, bool, void*, GDALDataType, int, long long) Unexecuted instantiation: rasterio.cpp:void (anonymous namespace)::GDALCopyWordsFromT<double>(double const*, int, bool, void*, GDALDataType, int, long long) |
2986 | | |
2987 | | } // end anonymous namespace |
2988 | | |
2989 | | /************************************************************************/ |
2990 | | /* GDALReplicateWord() */ |
2991 | | /************************************************************************/ |
2992 | | |
2993 | | template <class T> |
2994 | | inline void GDALReplicateWordT(void *pDstData, int nDstPixelStride, |
2995 | | GPtrDiff_t nWordCount) |
2996 | 0 | { |
2997 | 0 | const T valSet = *static_cast<const T *>(pDstData); |
2998 | 0 | if (nDstPixelStride == static_cast<int>(sizeof(T))) |
2999 | 0 | { |
3000 | 0 | T *pDstPtr = static_cast<T *>(pDstData) + 1; |
3001 | 0 | while (nWordCount >= 4) |
3002 | 0 | { |
3003 | 0 | nWordCount -= 4; |
3004 | 0 | pDstPtr[0] = valSet; |
3005 | 0 | pDstPtr[1] = valSet; |
3006 | 0 | pDstPtr[2] = valSet; |
3007 | 0 | pDstPtr[3] = valSet; |
3008 | 0 | pDstPtr += 4; |
3009 | 0 | } |
3010 | 0 | while (nWordCount > 0) |
3011 | 0 | { |
3012 | 0 | --nWordCount; |
3013 | 0 | *pDstPtr = valSet; |
3014 | 0 | pDstPtr++; |
3015 | 0 | } |
3016 | 0 | } |
3017 | 0 | else |
3018 | 0 | { |
3019 | 0 | GByte *pabyDstPtr = static_cast<GByte *>(pDstData) + nDstPixelStride; |
3020 | 0 | while (nWordCount > 0) |
3021 | 0 | { |
3022 | 0 | --nWordCount; |
3023 | 0 | *reinterpret_cast<T *>(pabyDstPtr) = valSet; |
3024 | 0 | pabyDstPtr += nDstPixelStride; |
3025 | 0 | } |
3026 | 0 | } |
3027 | 0 | } Unexecuted instantiation: void GDALReplicateWordT<unsigned short>(void*, int, long long) Unexecuted instantiation: void GDALReplicateWordT<short>(void*, int, long long) Unexecuted instantiation: void GDALReplicateWordT<unsigned int>(void*, int, long long) Unexecuted instantiation: void GDALReplicateWordT<int>(void*, int, long long) Unexecuted instantiation: void GDALReplicateWordT<unsigned long>(void*, int, long long) Unexecuted instantiation: void GDALReplicateWordT<long>(void*, int, long long) Unexecuted instantiation: void GDALReplicateWordT<cpl::Float16>(void*, int, long long) Unexecuted instantiation: void GDALReplicateWordT<float>(void*, int, long long) Unexecuted instantiation: void GDALReplicateWordT<double>(void*, int, long long) |
3028 | | |
3029 | | static void GDALReplicateWord(const void *CPL_RESTRICT pSrcData, |
3030 | | GDALDataType eSrcType, |
3031 | | void *CPL_RESTRICT pDstData, |
3032 | | GDALDataType eDstType, int nDstPixelStride, |
3033 | | GPtrDiff_t nWordCount) |
3034 | 0 | { |
3035 | | /* ----------------------------------------------------------------------- |
3036 | | */ |
3037 | | /* Special case when the source data is always the same value */ |
3038 | | /* (for VRTSourcedRasterBand::IRasterIO and |
3039 | | * VRTDerivedRasterBand::IRasterIO*/ |
3040 | | /* for example) */ |
3041 | | /* ----------------------------------------------------------------------- |
3042 | | */ |
3043 | | // Let the general translation case do the necessary conversions |
3044 | | // on the first destination element. |
3045 | 0 | GDALCopyWords64(pSrcData, eSrcType, 0, pDstData, eDstType, 0, 1); |
3046 | | |
3047 | | // Now copy the first element to the nWordCount - 1 following destination |
3048 | | // elements. |
3049 | 0 | nWordCount--; |
3050 | 0 | GByte *pabyDstWord = reinterpret_cast<GByte *>(pDstData) + nDstPixelStride; |
3051 | |
|
3052 | 0 | switch (eDstType) |
3053 | 0 | { |
3054 | 0 | case GDT_Byte: |
3055 | 0 | case GDT_Int8: |
3056 | 0 | { |
3057 | 0 | if (nDstPixelStride == 1) |
3058 | 0 | { |
3059 | 0 | if (nWordCount > 0) |
3060 | 0 | memset(pabyDstWord, |
3061 | 0 | *reinterpret_cast<const GByte *>(pDstData), |
3062 | 0 | nWordCount); |
3063 | 0 | } |
3064 | 0 | else |
3065 | 0 | { |
3066 | 0 | GByte valSet = *reinterpret_cast<const GByte *>(pDstData); |
3067 | 0 | while (nWordCount > 0) |
3068 | 0 | { |
3069 | 0 | --nWordCount; |
3070 | 0 | *pabyDstWord = valSet; |
3071 | 0 | pabyDstWord += nDstPixelStride; |
3072 | 0 | } |
3073 | 0 | } |
3074 | 0 | break; |
3075 | 0 | } |
3076 | | |
3077 | 0 | #define CASE_DUPLICATE_SIMPLE(enum_type, c_type) \ |
3078 | 0 | case enum_type: \ |
3079 | 0 | { \ |
3080 | 0 | GDALReplicateWordT<c_type>(pDstData, nDstPixelStride, nWordCount); \ |
3081 | 0 | break; \ |
3082 | 0 | } |
3083 | | |
3084 | 0 | CASE_DUPLICATE_SIMPLE(GDT_UInt16, GUInt16) |
3085 | 0 | CASE_DUPLICATE_SIMPLE(GDT_Int16, GInt16) |
3086 | 0 | CASE_DUPLICATE_SIMPLE(GDT_UInt32, GUInt32) |
3087 | 0 | CASE_DUPLICATE_SIMPLE(GDT_Int32, GInt32) |
3088 | 0 | CASE_DUPLICATE_SIMPLE(GDT_UInt64, std::uint64_t) |
3089 | 0 | CASE_DUPLICATE_SIMPLE(GDT_Int64, std::int64_t) |
3090 | 0 | CASE_DUPLICATE_SIMPLE(GDT_Float16, GFloat16) |
3091 | 0 | CASE_DUPLICATE_SIMPLE(GDT_Float32, float) |
3092 | 0 | CASE_DUPLICATE_SIMPLE(GDT_Float64, double) |
3093 | | |
3094 | 0 | #define CASE_DUPLICATE_COMPLEX(enum_type, c_type) \ |
3095 | 0 | case enum_type: \ |
3096 | 0 | { \ |
3097 | 0 | c_type valSet1 = reinterpret_cast<const c_type *>(pDstData)[0]; \ |
3098 | 0 | c_type valSet2 = reinterpret_cast<const c_type *>(pDstData)[1]; \ |
3099 | 0 | while (nWordCount > 0) \ |
3100 | 0 | { \ |
3101 | 0 | --nWordCount; \ |
3102 | 0 | reinterpret_cast<c_type *>(pabyDstWord)[0] = valSet1; \ |
3103 | 0 | reinterpret_cast<c_type *>(pabyDstWord)[1] = valSet2; \ |
3104 | 0 | pabyDstWord += nDstPixelStride; \ |
3105 | 0 | } \ |
3106 | 0 | break; \ |
3107 | 0 | } |
3108 | | |
3109 | 0 | CASE_DUPLICATE_COMPLEX(GDT_CInt16, GInt16) |
3110 | 0 | CASE_DUPLICATE_COMPLEX(GDT_CInt32, GInt32) |
3111 | 0 | CASE_DUPLICATE_COMPLEX(GDT_CFloat16, GFloat16) |
3112 | 0 | CASE_DUPLICATE_COMPLEX(GDT_CFloat32, float) |
3113 | 0 | CASE_DUPLICATE_COMPLEX(GDT_CFloat64, double) |
3114 | | |
3115 | 0 | case GDT_Unknown: |
3116 | 0 | case GDT_TypeCount: |
3117 | 0 | CPLAssert(false); |
3118 | 0 | } |
3119 | 0 | } |
3120 | | |
3121 | | /************************************************************************/ |
3122 | | /* GDALUnrolledCopy() */ |
3123 | | /************************************************************************/ |
3124 | | |
3125 | | template <class T, int srcStride, int dstStride> |
3126 | | static inline void GDALUnrolledCopyGeneric(T *CPL_RESTRICT pDest, |
3127 | | const T *CPL_RESTRICT pSrc, |
3128 | | GPtrDiff_t nIters) |
3129 | 0 | { |
3130 | 0 | if (nIters >= 16) |
3131 | 0 | { |
3132 | 0 | for (GPtrDiff_t i = nIters / 16; i != 0; i--) |
3133 | 0 | { |
3134 | 0 | pDest[0 * dstStride] = pSrc[0 * srcStride]; |
3135 | 0 | pDest[1 * dstStride] = pSrc[1 * srcStride]; |
3136 | 0 | pDest[2 * dstStride] = pSrc[2 * srcStride]; |
3137 | 0 | pDest[3 * dstStride] = pSrc[3 * srcStride]; |
3138 | 0 | pDest[4 * dstStride] = pSrc[4 * srcStride]; |
3139 | 0 | pDest[5 * dstStride] = pSrc[5 * srcStride]; |
3140 | 0 | pDest[6 * dstStride] = pSrc[6 * srcStride]; |
3141 | 0 | pDest[7 * dstStride] = pSrc[7 * srcStride]; |
3142 | 0 | pDest[8 * dstStride] = pSrc[8 * srcStride]; |
3143 | 0 | pDest[9 * dstStride] = pSrc[9 * srcStride]; |
3144 | 0 | pDest[10 * dstStride] = pSrc[10 * srcStride]; |
3145 | 0 | pDest[11 * dstStride] = pSrc[11 * srcStride]; |
3146 | 0 | pDest[12 * dstStride] = pSrc[12 * srcStride]; |
3147 | 0 | pDest[13 * dstStride] = pSrc[13 * srcStride]; |
3148 | 0 | pDest[14 * dstStride] = pSrc[14 * srcStride]; |
3149 | 0 | pDest[15 * dstStride] = pSrc[15 * srcStride]; |
3150 | 0 | pDest += 16 * dstStride; |
3151 | 0 | pSrc += 16 * srcStride; |
3152 | 0 | } |
3153 | 0 | nIters = nIters % 16; |
3154 | 0 | } |
3155 | 0 | for (GPtrDiff_t i = 0; i < nIters; i++) |
3156 | 0 | { |
3157 | 0 | pDest[i * dstStride] = *pSrc; |
3158 | 0 | pSrc += srcStride; |
3159 | 0 | } |
3160 | 0 | } Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<unsigned char, 3, 1>(unsigned char*, unsigned char const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<unsigned char, 1, 2>(unsigned char*, unsigned char const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<unsigned char, 1, 3>(unsigned char*, unsigned char const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<unsigned char, 1, 4>(unsigned char*, unsigned char const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<short, 2, 1>(short*, short const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<short, 3, 1>(short*, short const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<short, 4, 1>(short*, short const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<short, 1, 2>(short*, short const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<short, 1, 3>(short*, short const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopyGeneric<short, 1, 4>(short*, short const*, long long) |
3161 | | |
3162 | | template <class T, int srcStride, int dstStride> |
3163 | | static inline void GDALUnrolledCopy(T *CPL_RESTRICT pDest, |
3164 | | const T *CPL_RESTRICT pSrc, |
3165 | | GPtrDiff_t nIters) |
3166 | 0 | { |
3167 | 0 | GDALUnrolledCopyGeneric<T, srcStride, dstStride>(pDest, pSrc, nIters); |
3168 | 0 | } Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<unsigned char, 1, 2>(unsigned char*, unsigned char const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<unsigned char, 1, 3>(unsigned char*, unsigned char const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<unsigned char, 1, 4>(unsigned char*, unsigned char const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<short, 2, 1>(short*, short const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<short, 3, 1>(short*, short const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<short, 4, 1>(short*, short const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<short, 1, 2>(short*, short const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<short, 1, 3>(short*, short const*, long long) Unexecuted instantiation: rasterio.cpp:void GDALUnrolledCopy<short, 1, 4>(short*, short const*, long long) |
3169 | | |
3170 | | #ifdef HAVE_SSE2 |
3171 | | |
3172 | | template <> |
3173 | | void GDALUnrolledCopy<GByte, 2, 1>(GByte *CPL_RESTRICT pDest, |
3174 | | const GByte *CPL_RESTRICT pSrc, |
3175 | | GPtrDiff_t nIters) |
3176 | 0 | { |
3177 | 0 | decltype(nIters) i = 0; |
3178 | 0 | if (nIters > 16) |
3179 | 0 | { |
3180 | 0 | const __m128i xmm_mask = _mm_set1_epi16(0xff); |
3181 | | // If we were sure that there would always be 1 trailing byte, we could |
3182 | | // check against nIters - 15 |
3183 | 0 | for (; i < nIters - 16; i += 16) |
3184 | 0 | { |
3185 | 0 | __m128i xmm0 = |
3186 | 0 | _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 0)); |
3187 | 0 | __m128i xmm1 = |
3188 | 0 | _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 16)); |
3189 | | // Set higher 8bit of each int16 packed word to 0 |
3190 | 0 | xmm0 = _mm_and_si128(xmm0, xmm_mask); |
3191 | 0 | xmm1 = _mm_and_si128(xmm1, xmm_mask); |
3192 | | // Pack int16 to uint8 and merge back both vector |
3193 | 0 | xmm0 = _mm_packus_epi16(xmm0, xmm1); |
3194 | | |
3195 | | // Store result |
3196 | 0 | _mm_storeu_si128(reinterpret_cast<__m128i *>(pDest + i), xmm0); |
3197 | |
|
3198 | 0 | pSrc += 2 * 16; |
3199 | 0 | } |
3200 | 0 | } |
3201 | 0 | for (; i < nIters; i++) |
3202 | 0 | { |
3203 | 0 | pDest[i] = *pSrc; |
3204 | 0 | pSrc += 2; |
3205 | 0 | } |
3206 | 0 | } |
3207 | | |
3208 | | #ifdef HAVE_SSSE3_AT_COMPILE_TIME |
3209 | | |
3210 | | template <> |
3211 | | void GDALUnrolledCopy<GByte, 3, 1>(GByte *CPL_RESTRICT pDest, |
3212 | | const GByte *CPL_RESTRICT pSrc, |
3213 | | GPtrDiff_t nIters) |
3214 | 0 | { |
3215 | 0 | if (nIters > 16 && CPLHaveRuntimeSSSE3()) |
3216 | 0 | { |
3217 | 0 | GDALUnrolledCopy_GByte_3_1_SSSE3(pDest, pSrc, nIters); |
3218 | 0 | } |
3219 | 0 | else |
3220 | 0 | { |
3221 | 0 | GDALUnrolledCopyGeneric<GByte, 3, 1>(pDest, pSrc, nIters); |
3222 | 0 | } |
3223 | 0 | } |
3224 | | |
3225 | | #endif |
3226 | | |
3227 | | template <> |
3228 | | void GDALUnrolledCopy<GByte, 4, 1>(GByte *CPL_RESTRICT pDest, |
3229 | | const GByte *CPL_RESTRICT pSrc, |
3230 | | GPtrDiff_t nIters) |
3231 | 0 | { |
3232 | 0 | decltype(nIters) i = 0; |
3233 | 0 | if (nIters > 16) |
3234 | 0 | { |
3235 | 0 | const __m128i xmm_mask = _mm_set1_epi32(0xff); |
3236 | | // If we were sure that there would always be 3 trailing bytes, we could |
3237 | | // check against nIters - 15 |
3238 | 0 | for (; i < nIters - 16; i += 16) |
3239 | 0 | { |
3240 | 0 | __m128i xmm0 = |
3241 | 0 | _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 0)); |
3242 | 0 | __m128i xmm1 = |
3243 | 0 | _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 16)); |
3244 | 0 | __m128i xmm2 = |
3245 | 0 | _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 32)); |
3246 | 0 | __m128i xmm3 = |
3247 | 0 | _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 48)); |
3248 | | // Set higher 24bit of each int32 packed word to 0 |
3249 | 0 | xmm0 = _mm_and_si128(xmm0, xmm_mask); |
3250 | 0 | xmm1 = _mm_and_si128(xmm1, xmm_mask); |
3251 | 0 | xmm2 = _mm_and_si128(xmm2, xmm_mask); |
3252 | 0 | xmm3 = _mm_and_si128(xmm3, xmm_mask); |
3253 | | // Pack int32 to int16 |
3254 | 0 | xmm0 = _mm_packs_epi32(xmm0, xmm1); |
3255 | 0 | xmm2 = _mm_packs_epi32(xmm2, xmm3); |
3256 | | // Pack int16 to uint8 |
3257 | 0 | xmm0 = _mm_packus_epi16(xmm0, xmm2); |
3258 | | |
3259 | | // Store result |
3260 | 0 | _mm_storeu_si128(reinterpret_cast<__m128i *>(pDest + i), xmm0); |
3261 | |
|
3262 | 0 | pSrc += 4 * 16; |
3263 | 0 | } |
3264 | 0 | } |
3265 | 0 | for (; i < nIters; i++) |
3266 | 0 | { |
3267 | 0 | pDest[i] = *pSrc; |
3268 | 0 | pSrc += 4; |
3269 | 0 | } |
3270 | 0 | } |
3271 | | #endif // HAVE_SSE2 |
3272 | | |
3273 | | /************************************************************************/ |
3274 | | /* GDALFastCopy() */ |
3275 | | /************************************************************************/ |
3276 | | |
3277 | | template <class T> |
3278 | | static inline void GDALFastCopy(T *CPL_RESTRICT pDest, int nDestStride, |
3279 | | const T *CPL_RESTRICT pSrc, int nSrcStride, |
3280 | | GPtrDiff_t nIters) |
3281 | 0 | { |
3282 | 0 | constexpr int sizeofT = static_cast<int>(sizeof(T)); |
3283 | 0 | if (nIters == 1) |
3284 | 0 | { |
3285 | 0 | *pDest = *pSrc; |
3286 | 0 | } |
3287 | 0 | else if (nDestStride == sizeofT) |
3288 | 0 | { |
3289 | 0 | if (nSrcStride == sizeofT) |
3290 | 0 | { |
3291 | 0 | memcpy(pDest, pSrc, nIters * sizeof(T)); |
3292 | 0 | } |
3293 | 0 | else if (nSrcStride == 2 * sizeofT) |
3294 | 0 | { |
3295 | 0 | GDALUnrolledCopy<T, 2, 1>(pDest, pSrc, nIters); |
3296 | 0 | } |
3297 | 0 | else if (nSrcStride == 3 * sizeofT) |
3298 | 0 | { |
3299 | 0 | GDALUnrolledCopy<T, 3, 1>(pDest, pSrc, nIters); |
3300 | 0 | } |
3301 | 0 | else if (nSrcStride == 4 * sizeofT) |
3302 | 0 | { |
3303 | 0 | GDALUnrolledCopy<T, 4, 1>(pDest, pSrc, nIters); |
3304 | 0 | } |
3305 | 0 | else |
3306 | 0 | { |
3307 | 0 | while (nIters-- > 0) |
3308 | 0 | { |
3309 | 0 | *pDest = *pSrc; |
3310 | 0 | pSrc += nSrcStride / sizeofT; |
3311 | 0 | pDest++; |
3312 | 0 | } |
3313 | 0 | } |
3314 | 0 | } |
3315 | 0 | else if (nSrcStride == sizeofT) |
3316 | 0 | { |
3317 | 0 | if (nDestStride == 2 * sizeofT) |
3318 | 0 | { |
3319 | 0 | GDALUnrolledCopy<T, 1, 2>(pDest, pSrc, nIters); |
3320 | 0 | } |
3321 | 0 | else if (nDestStride == 3 * sizeofT) |
3322 | 0 | { |
3323 | 0 | GDALUnrolledCopy<T, 1, 3>(pDest, pSrc, nIters); |
3324 | 0 | } |
3325 | 0 | else if (nDestStride == 4 * sizeofT) |
3326 | 0 | { |
3327 | 0 | GDALUnrolledCopy<T, 1, 4>(pDest, pSrc, nIters); |
3328 | 0 | } |
3329 | 0 | else |
3330 | 0 | { |
3331 | 0 | while (nIters-- > 0) |
3332 | 0 | { |
3333 | 0 | *pDest = *pSrc; |
3334 | 0 | pSrc++; |
3335 | 0 | pDest += nDestStride / sizeofT; |
3336 | 0 | } |
3337 | 0 | } |
3338 | 0 | } |
3339 | 0 | else |
3340 | 0 | { |
3341 | 0 | while (nIters-- > 0) |
3342 | 0 | { |
3343 | 0 | *pDest = *pSrc; |
3344 | 0 | pSrc += nSrcStride / sizeofT; |
3345 | 0 | pDest += nDestStride / sizeofT; |
3346 | 0 | } |
3347 | 0 | } |
3348 | 0 | } Unexecuted instantiation: rasterio.cpp:void GDALFastCopy<unsigned char>(unsigned char*, int, unsigned char const*, int, long long) Unexecuted instantiation: rasterio.cpp:void GDALFastCopy<short>(short*, int, short const*, int, long long) |
3349 | | |
3350 | | /************************************************************************/ |
3351 | | /* GDALFastCopyByte() */ |
3352 | | /************************************************************************/ |
3353 | | |
3354 | | static void GDALFastCopyByte(const GByte *CPL_RESTRICT pSrcData, |
3355 | | int nSrcPixelStride, GByte *CPL_RESTRICT pDstData, |
3356 | | int nDstPixelStride, GPtrDiff_t nWordCount) |
3357 | 0 | { |
3358 | 0 | GDALFastCopy(pDstData, nDstPixelStride, pSrcData, nSrcPixelStride, |
3359 | 0 | nWordCount); |
3360 | 0 | } |
3361 | | |
3362 | | /************************************************************************/ |
3363 | | /* GDALCopyWords() */ |
3364 | | /************************************************************************/ |
3365 | | |
3366 | | /** |
3367 | | * Copy pixel words from buffer to buffer. |
3368 | | * |
3369 | | * @see GDALCopyWords64() |
3370 | | */ |
3371 | | void CPL_STDCALL GDALCopyWords(const void *CPL_RESTRICT pSrcData, |
3372 | | GDALDataType eSrcType, int nSrcPixelStride, |
3373 | | void *CPL_RESTRICT pDstData, |
3374 | | GDALDataType eDstType, int nDstPixelStride, |
3375 | | int nWordCount) |
3376 | 0 | { |
3377 | 0 | GDALCopyWords64(pSrcData, eSrcType, nSrcPixelStride, pDstData, eDstType, |
3378 | 0 | nDstPixelStride, nWordCount); |
3379 | 0 | } |
3380 | | |
3381 | | /************************************************************************/ |
3382 | | /* GDALCopyWords64() */ |
3383 | | /************************************************************************/ |
3384 | | |
3385 | | /** |
3386 | | * Copy pixel words from buffer to buffer. |
3387 | | * |
3388 | | * This function is used to copy pixel word values from one memory buffer |
3389 | | * to another, with support for conversion between data types, and differing |
3390 | | * step factors. The data type conversion is done using the following |
3391 | | * rules: |
3392 | | * <ul> |
3393 | | * <li>Values assigned to a lower range integer type are clipped. For |
3394 | | * instance assigning GDT_Int16 values to a GDT_Byte buffer will cause values |
3395 | | * less the 0 to be set to 0, and values larger than 255 to be set to 255. |
3396 | | * </li> |
3397 | | * <li> |
3398 | | * Assignment from floating point to integer rounds to closest integer. |
3399 | | * +Infinity is mapped to the largest integer. -Infinity is mapped to the |
3400 | | * smallest integer. NaN is mapped to 0. |
3401 | | * </li> |
3402 | | * <li> |
3403 | | * Assignment from non-complex to complex will result in the imaginary part |
3404 | | * being set to zero on output. |
3405 | | * </li> |
3406 | | * <li> Assignment from complex to |
3407 | | * non-complex will result in the complex portion being lost and the real |
3408 | | * component being preserved (<i>not magnitude!</i>). |
3409 | | * </li> |
3410 | | * </ul> |
3411 | | * |
3412 | | * No assumptions are made about the source or destination words occurring |
3413 | | * on word boundaries. It is assumed that all values are in native machine |
3414 | | * byte order. |
3415 | | * |
3416 | | * @param pSrcData Pointer to source data to be converted. |
3417 | | * @param eSrcType the source data type (see GDALDataType enum) |
3418 | | * @param nSrcPixelStride Source pixel stride (i.e. distance between 2 words), |
3419 | | * in bytes |
3420 | | * @param pDstData Pointer to buffer where destination data should go |
3421 | | * @param eDstType the destination data type (see GDALDataType enum) |
3422 | | * @param nDstPixelStride Destination pixel stride (i.e. distance between 2 |
3423 | | * words), in bytes |
3424 | | * @param nWordCount number of words to be copied |
3425 | | * |
3426 | | * @note |
3427 | | * When adding a new data type to GDAL, you must do the following to |
3428 | | * support it properly within the GDALCopyWords function: |
3429 | | * 1. Add the data type to the switch on eSrcType in GDALCopyWords. |
3430 | | * This should invoke the appropriate GDALCopyWordsFromT wrapper. |
3431 | | * 2. Add the data type to the switch on eDstType in GDALCopyWordsFromT. |
3432 | | * This should call the appropriate GDALCopyWordsT template. |
3433 | | * 3. If appropriate, overload the appropriate CopyWord template in the |
3434 | | * above namespace. This will ensure that any conversion issues are |
3435 | | * handled (cases like the float -> int32 case, where the min/max) |
3436 | | * values are subject to roundoff error. |
3437 | | */ |
3438 | | |
3439 | | void CPL_STDCALL GDALCopyWords64(const void *CPL_RESTRICT pSrcData, |
3440 | | GDALDataType eSrcType, int nSrcPixelStride, |
3441 | | void *CPL_RESTRICT pDstData, |
3442 | | GDALDataType eDstType, int nDstPixelStride, |
3443 | | GPtrDiff_t nWordCount) |
3444 | | |
3445 | 0 | { |
3446 | | // On platforms where alignment matters, be careful |
3447 | 0 | const int nSrcDataTypeSize = GDALGetDataTypeSizeBytes(eSrcType); |
3448 | 0 | const int nDstDataTypeSize = GDALGetDataTypeSizeBytes(eDstType); |
3449 | 0 | if (CPL_UNLIKELY(nSrcDataTypeSize == 0 || nDstDataTypeSize == 0)) |
3450 | 0 | { |
3451 | 0 | CPLError(CE_Failure, CPLE_NotSupported, |
3452 | 0 | "GDALCopyWords64(): unsupported GDT_Unknown/GDT_TypeCount " |
3453 | 0 | "argument"); |
3454 | 0 | return; |
3455 | 0 | } |
3456 | 0 | if (!(eSrcType == eDstType && nSrcPixelStride == nDstPixelStride) && |
3457 | 0 | ((reinterpret_cast<uintptr_t>(pSrcData) % nSrcDataTypeSize) != 0 || |
3458 | 0 | (reinterpret_cast<uintptr_t>(pDstData) % nDstDataTypeSize) != 0 || |
3459 | 0 | (nSrcPixelStride % nSrcDataTypeSize) != 0 || |
3460 | 0 | (nDstPixelStride % nDstDataTypeSize) != 0)) |
3461 | 0 | { |
3462 | 0 | if (eSrcType == eDstType) |
3463 | 0 | { |
3464 | 0 | for (decltype(nWordCount) i = 0; i < nWordCount; i++) |
3465 | 0 | { |
3466 | 0 | memcpy(static_cast<GByte *>(pDstData) + nDstPixelStride * i, |
3467 | 0 | static_cast<const GByte *>(pSrcData) + |
3468 | 0 | nSrcPixelStride * i, |
3469 | 0 | nDstDataTypeSize); |
3470 | 0 | } |
3471 | 0 | } |
3472 | 0 | else |
3473 | 0 | { |
3474 | 0 | const auto getAlignedPtr = [](GByte *ptr, int align) |
3475 | 0 | { |
3476 | 0 | return ptr + |
3477 | 0 | ((align - (reinterpret_cast<uintptr_t>(ptr) % align)) % |
3478 | 0 | align); |
3479 | 0 | }; |
3480 | | |
3481 | | // The largest we need is for CFloat64 (16 bytes), so 32 bytes to |
3482 | | // be sure to get correctly aligned pointer. |
3483 | 0 | constexpr size_t SIZEOF_CFLOAT64 = 2 * sizeof(double); |
3484 | 0 | GByte abySrcBuffer[2 * SIZEOF_CFLOAT64]; |
3485 | 0 | GByte abyDstBuffer[2 * SIZEOF_CFLOAT64]; |
3486 | 0 | GByte *pabySrcBuffer = |
3487 | 0 | getAlignedPtr(abySrcBuffer, nSrcDataTypeSize); |
3488 | 0 | GByte *pabyDstBuffer = |
3489 | 0 | getAlignedPtr(abyDstBuffer, nDstDataTypeSize); |
3490 | 0 | for (decltype(nWordCount) i = 0; i < nWordCount; i++) |
3491 | 0 | { |
3492 | 0 | memcpy(pabySrcBuffer, |
3493 | 0 | static_cast<const GByte *>(pSrcData) + |
3494 | 0 | nSrcPixelStride * i, |
3495 | 0 | nSrcDataTypeSize); |
3496 | 0 | GDALCopyWords64(pabySrcBuffer, eSrcType, 0, pabyDstBuffer, |
3497 | 0 | eDstType, 0, 1); |
3498 | 0 | memcpy(static_cast<GByte *>(pDstData) + nDstPixelStride * i, |
3499 | 0 | pabyDstBuffer, nDstDataTypeSize); |
3500 | 0 | } |
3501 | 0 | } |
3502 | 0 | return; |
3503 | 0 | } |
3504 | | |
3505 | | // Deal with the case where we're replicating a single word into the |
3506 | | // provided buffer |
3507 | 0 | if (nSrcPixelStride == 0 && nWordCount > 1) |
3508 | 0 | { |
3509 | 0 | GDALReplicateWord(pSrcData, eSrcType, pDstData, eDstType, |
3510 | 0 | nDstPixelStride, nWordCount); |
3511 | 0 | return; |
3512 | 0 | } |
3513 | | |
3514 | 0 | if (eSrcType == eDstType) |
3515 | 0 | { |
3516 | 0 | if (eSrcType == GDT_Byte || eSrcType == GDT_Int8) |
3517 | 0 | { |
3518 | 0 | GDALFastCopy(static_cast<GByte *>(pDstData), nDstPixelStride, |
3519 | 0 | static_cast<const GByte *>(pSrcData), nSrcPixelStride, |
3520 | 0 | nWordCount); |
3521 | 0 | return; |
3522 | 0 | } |
3523 | | |
3524 | 0 | if (nSrcDataTypeSize == 2 && (nSrcPixelStride % 2) == 0 && |
3525 | 0 | (nDstPixelStride % 2) == 0) |
3526 | 0 | { |
3527 | 0 | GDALFastCopy(static_cast<short *>(pDstData), nDstPixelStride, |
3528 | 0 | static_cast<const short *>(pSrcData), nSrcPixelStride, |
3529 | 0 | nWordCount); |
3530 | 0 | return; |
3531 | 0 | } |
3532 | | |
3533 | 0 | if (nWordCount == 1) |
3534 | 0 | { |
3535 | | #if defined(CSA_BUILD) || defined(__COVERITY__) |
3536 | | // Avoid false positives... |
3537 | | memcpy(pDstData, pSrcData, nSrcDataTypeSize); |
3538 | | #else |
3539 | 0 | if (nSrcDataTypeSize == 2) |
3540 | 0 | memcpy(pDstData, pSrcData, 2); |
3541 | 0 | else if (nSrcDataTypeSize == 4) |
3542 | 0 | memcpy(pDstData, pSrcData, 4); |
3543 | 0 | else if (nSrcDataTypeSize == 8) |
3544 | 0 | memcpy(pDstData, pSrcData, 8); |
3545 | 0 | else /* if( eSrcType == GDT_CFloat64 ) */ |
3546 | 0 | memcpy(pDstData, pSrcData, 16); |
3547 | 0 | #endif |
3548 | 0 | return; |
3549 | 0 | } |
3550 | | |
3551 | | // Let memcpy() handle the case where we're copying a packed buffer |
3552 | | // of pixels. |
3553 | 0 | if (nSrcPixelStride == nDstPixelStride) |
3554 | 0 | { |
3555 | 0 | if (nSrcPixelStride == nSrcDataTypeSize) |
3556 | 0 | { |
3557 | 0 | memcpy(pDstData, pSrcData, nWordCount * nSrcDataTypeSize); |
3558 | 0 | return; |
3559 | 0 | } |
3560 | 0 | } |
3561 | 0 | } |
3562 | | |
3563 | | // Handle the more general case -- deals with conversion of data types |
3564 | | // directly. |
3565 | 0 | switch (eSrcType) |
3566 | 0 | { |
3567 | 0 | case GDT_Byte: |
3568 | 0 | GDALCopyWordsFromT<unsigned char>( |
3569 | 0 | static_cast<const unsigned char *>(pSrcData), nSrcPixelStride, |
3570 | 0 | false, pDstData, eDstType, nDstPixelStride, nWordCount); |
3571 | 0 | break; |
3572 | 0 | case GDT_Int8: |
3573 | 0 | GDALCopyWordsFromT<signed char>( |
3574 | 0 | static_cast<const signed char *>(pSrcData), nSrcPixelStride, |
3575 | 0 | false, pDstData, eDstType, nDstPixelStride, nWordCount); |
3576 | 0 | break; |
3577 | 0 | case GDT_UInt16: |
3578 | 0 | GDALCopyWordsFromT<unsigned short>( |
3579 | 0 | static_cast<const unsigned short *>(pSrcData), nSrcPixelStride, |
3580 | 0 | false, pDstData, eDstType, nDstPixelStride, nWordCount); |
3581 | 0 | break; |
3582 | 0 | case GDT_Int16: |
3583 | 0 | GDALCopyWordsFromT<short>(static_cast<const short *>(pSrcData), |
3584 | 0 | nSrcPixelStride, false, pDstData, |
3585 | 0 | eDstType, nDstPixelStride, nWordCount); |
3586 | 0 | break; |
3587 | 0 | case GDT_UInt32: |
3588 | 0 | GDALCopyWordsFromT<unsigned int>( |
3589 | 0 | static_cast<const unsigned int *>(pSrcData), nSrcPixelStride, |
3590 | 0 | false, pDstData, eDstType, nDstPixelStride, nWordCount); |
3591 | 0 | break; |
3592 | 0 | case GDT_Int32: |
3593 | 0 | GDALCopyWordsFromT<int>(static_cast<const int *>(pSrcData), |
3594 | 0 | nSrcPixelStride, false, pDstData, eDstType, |
3595 | 0 | nDstPixelStride, nWordCount); |
3596 | 0 | break; |
3597 | 0 | case GDT_UInt64: |
3598 | 0 | GDALCopyWordsFromT<std::uint64_t>( |
3599 | 0 | static_cast<const std::uint64_t *>(pSrcData), nSrcPixelStride, |
3600 | 0 | false, pDstData, eDstType, nDstPixelStride, nWordCount); |
3601 | 0 | break; |
3602 | 0 | case GDT_Int64: |
3603 | 0 | GDALCopyWordsFromT<std::int64_t>( |
3604 | 0 | static_cast<const std::int64_t *>(pSrcData), nSrcPixelStride, |
3605 | 0 | false, pDstData, eDstType, nDstPixelStride, nWordCount); |
3606 | 0 | break; |
3607 | 0 | case GDT_Float16: |
3608 | 0 | GDALCopyWordsFromT<GFloat16>( |
3609 | 0 | static_cast<const GFloat16 *>(pSrcData), nSrcPixelStride, false, |
3610 | 0 | pDstData, eDstType, nDstPixelStride, nWordCount); |
3611 | 0 | break; |
3612 | 0 | case GDT_Float32: |
3613 | 0 | GDALCopyWordsFromT<float>(static_cast<const float *>(pSrcData), |
3614 | 0 | nSrcPixelStride, false, pDstData, |
3615 | 0 | eDstType, nDstPixelStride, nWordCount); |
3616 | 0 | break; |
3617 | 0 | case GDT_Float64: |
3618 | 0 | GDALCopyWordsFromT<double>(static_cast<const double *>(pSrcData), |
3619 | 0 | nSrcPixelStride, false, pDstData, |
3620 | 0 | eDstType, nDstPixelStride, nWordCount); |
3621 | 0 | break; |
3622 | 0 | case GDT_CInt16: |
3623 | 0 | GDALCopyWordsFromT<short>(static_cast<const short *>(pSrcData), |
3624 | 0 | nSrcPixelStride, true, pDstData, eDstType, |
3625 | 0 | nDstPixelStride, nWordCount); |
3626 | 0 | break; |
3627 | 0 | case GDT_CInt32: |
3628 | 0 | GDALCopyWordsFromT<int>(static_cast<const int *>(pSrcData), |
3629 | 0 | nSrcPixelStride, true, pDstData, eDstType, |
3630 | 0 | nDstPixelStride, nWordCount); |
3631 | 0 | break; |
3632 | 0 | case GDT_CFloat16: |
3633 | 0 | GDALCopyWordsFromT<GFloat16>( |
3634 | 0 | static_cast<const GFloat16 *>(pSrcData), nSrcPixelStride, true, |
3635 | 0 | pDstData, eDstType, nDstPixelStride, nWordCount); |
3636 | 0 | break; |
3637 | 0 | case GDT_CFloat32: |
3638 | 0 | GDALCopyWordsFromT<float>(static_cast<const float *>(pSrcData), |
3639 | 0 | nSrcPixelStride, true, pDstData, eDstType, |
3640 | 0 | nDstPixelStride, nWordCount); |
3641 | 0 | break; |
3642 | 0 | case GDT_CFloat64: |
3643 | 0 | GDALCopyWordsFromT<double>(static_cast<const double *>(pSrcData), |
3644 | 0 | nSrcPixelStride, true, pDstData, |
3645 | 0 | eDstType, nDstPixelStride, nWordCount); |
3646 | 0 | break; |
3647 | 0 | case GDT_Unknown: |
3648 | 0 | case GDT_TypeCount: |
3649 | 0 | CPLAssert(false); |
3650 | 0 | } |
3651 | 0 | } |
3652 | | |
3653 | | /************************************************************************/ |
3654 | | /* GDALCopyBits() */ |
3655 | | /************************************************************************/ |
3656 | | |
3657 | | /** |
3658 | | * Bitwise word copying. |
3659 | | * |
3660 | | * A function for moving sets of partial bytes around. Loosely |
3661 | | * speaking this is a bitwise analog to GDALCopyWords(). |
3662 | | * |
3663 | | * It copies nStepCount "words" where each word is nBitCount bits long. |
3664 | | * The nSrcStep and nDstStep are the number of bits from the start of one |
3665 | | * word to the next (same as nBitCount if they are packed). The nSrcOffset |
3666 | | * and nDstOffset are the offset into the source and destination buffers |
3667 | | * to start at, also measured in bits. |
3668 | | * |
3669 | | * All bit offsets are assumed to start from the high order bit in a byte |
3670 | | * (i.e. most significant bit first). Currently this function is not very |
3671 | | * optimized, but it may be improved for some common cases in the future |
3672 | | * as needed. |
3673 | | * |
3674 | | * @param pabySrcData the source data buffer. |
3675 | | * @param nSrcOffset the offset (in bits) in pabySrcData to the start of the |
3676 | | * first word to copy. |
3677 | | * @param nSrcStep the offset in bits from the start one source word to the |
3678 | | * start of the next. |
3679 | | * @param pabyDstData the destination data buffer. |
3680 | | * @param nDstOffset the offset (in bits) in pabyDstData to the start of the |
3681 | | * first word to copy over. |
3682 | | * @param nDstStep the offset in bits from the start one word to the |
3683 | | * start of the next. |
3684 | | * @param nBitCount the number of bits in a word to be copied. |
3685 | | * @param nStepCount the number of words to copy. |
3686 | | */ |
3687 | | |
3688 | | void GDALCopyBits(const GByte *pabySrcData, int nSrcOffset, int nSrcStep, |
3689 | | GByte *pabyDstData, int nDstOffset, int nDstStep, |
3690 | | int nBitCount, int nStepCount) |
3691 | | |
3692 | 0 | { |
3693 | 0 | VALIDATE_POINTER0(pabySrcData, "GDALCopyBits"); |
3694 | | |
3695 | 0 | for (int iStep = 0; iStep < nStepCount; iStep++) |
3696 | 0 | { |
3697 | 0 | for (int iBit = 0; iBit < nBitCount; iBit++) |
3698 | 0 | { |
3699 | 0 | if (pabySrcData[nSrcOffset >> 3] & (0x80 >> (nSrcOffset & 7))) |
3700 | 0 | pabyDstData[nDstOffset >> 3] |= (0x80 >> (nDstOffset & 7)); |
3701 | 0 | else |
3702 | 0 | pabyDstData[nDstOffset >> 3] &= ~(0x80 >> (nDstOffset & 7)); |
3703 | |
|
3704 | 0 | nSrcOffset++; |
3705 | 0 | nDstOffset++; |
3706 | 0 | } |
3707 | |
|
3708 | 0 | nSrcOffset += (nSrcStep - nBitCount); |
3709 | 0 | nDstOffset += (nDstStep - nBitCount); |
3710 | 0 | } |
3711 | 0 | } |
3712 | | |
3713 | | /************************************************************************/ |
3714 | | /* GDALGetBestOverviewLevel() */ |
3715 | | /* */ |
3716 | | /* Returns the best overview level to satisfy the query or -1 if none */ |
3717 | | /* Also updates nXOff, nYOff, nXSize, nYSize and psExtraArg when */ |
3718 | | /* returning a valid overview level */ |
3719 | | /************************************************************************/ |
3720 | | |
3721 | | int GDALBandGetBestOverviewLevel(GDALRasterBand *poBand, int &nXOff, int &nYOff, |
3722 | | int &nXSize, int &nYSize, int nBufXSize, |
3723 | | int nBufYSize) |
3724 | 0 | { |
3725 | 0 | return GDALBandGetBestOverviewLevel2(poBand, nXOff, nYOff, nXSize, nYSize, |
3726 | 0 | nBufXSize, nBufYSize, nullptr); |
3727 | 0 | } |
3728 | | |
3729 | | int GDALBandGetBestOverviewLevel2(GDALRasterBand *poBand, int &nXOff, |
3730 | | int &nYOff, int &nXSize, int &nYSize, |
3731 | | int nBufXSize, int nBufYSize, |
3732 | | GDALRasterIOExtraArg *psExtraArg) |
3733 | 0 | { |
3734 | 0 | if (psExtraArg != nullptr && psExtraArg->nVersion > 1 && |
3735 | 0 | psExtraArg->bUseOnlyThisScale) |
3736 | 0 | return -1; |
3737 | | /* -------------------------------------------------------------------- */ |
3738 | | /* Compute the desired downsampling factor. It is */ |
3739 | | /* based on the least reduced axis, and represents the number */ |
3740 | | /* of source pixels to one destination pixel. */ |
3741 | | /* -------------------------------------------------------------------- */ |
3742 | 0 | const double dfDesiredDownsamplingFactor = |
3743 | 0 | ((nXSize / static_cast<double>(nBufXSize)) < |
3744 | 0 | (nYSize / static_cast<double>(nBufYSize)) || |
3745 | 0 | nBufYSize == 1) |
3746 | 0 | ? nXSize / static_cast<double>(nBufXSize) |
3747 | 0 | : nYSize / static_cast<double>(nBufYSize); |
3748 | | |
3749 | | /* -------------------------------------------------------------------- */ |
3750 | | /* Find the overview level that largest downsampling factor (most */ |
3751 | | /* downsampled) that is still less than (or only a little more) */ |
3752 | | /* downsampled than the request. */ |
3753 | | /* -------------------------------------------------------------------- */ |
3754 | 0 | const int nOverviewCount = poBand->GetOverviewCount(); |
3755 | 0 | GDALRasterBand *poBestOverview = nullptr; |
3756 | 0 | double dfBestDownsamplingFactor = 0; |
3757 | 0 | int nBestOverviewLevel = -1; |
3758 | |
|
3759 | 0 | const char *pszOversampligThreshold = |
3760 | 0 | CPLGetConfigOption("GDAL_OVERVIEW_OVERSAMPLING_THRESHOLD", nullptr); |
3761 | | |
3762 | | // Note: keep this logic for overview selection in sync between |
3763 | | // gdalwarp_lib.cpp and rasterio.cpp |
3764 | | // Cf https://github.com/OSGeo/gdal/pull/9040#issuecomment-1898524693 |
3765 | 0 | const double dfOversamplingThreshold = |
3766 | 0 | pszOversampligThreshold ? CPLAtof(pszOversampligThreshold) |
3767 | 0 | : psExtraArg && psExtraArg->eResampleAlg != GRIORA_NearestNeighbour |
3768 | 0 | ? 1.0 |
3769 | 0 | : 1.2; |
3770 | 0 | for (int iOverview = 0; iOverview < nOverviewCount; iOverview++) |
3771 | 0 | { |
3772 | 0 | GDALRasterBand *poOverview = poBand->GetOverview(iOverview); |
3773 | 0 | if (poOverview == nullptr || |
3774 | 0 | poOverview->GetXSize() > poBand->GetXSize() || |
3775 | 0 | poOverview->GetYSize() > poBand->GetYSize()) |
3776 | 0 | { |
3777 | 0 | continue; |
3778 | 0 | } |
3779 | | |
3780 | | // Compute downsampling factor of this overview |
3781 | 0 | const double dfDownsamplingFactor = std::min( |
3782 | 0 | poBand->GetXSize() / static_cast<double>(poOverview->GetXSize()), |
3783 | 0 | poBand->GetYSize() / static_cast<double>(poOverview->GetYSize())); |
3784 | | |
3785 | | // Is it nearly the requested factor and better (lower) than |
3786 | | // the current best factor? |
3787 | | // Use an epsilon because of numerical instability. |
3788 | 0 | constexpr double EPSILON = 1e-1; |
3789 | 0 | if (dfDownsamplingFactor >= |
3790 | 0 | dfDesiredDownsamplingFactor * dfOversamplingThreshold + |
3791 | 0 | EPSILON || |
3792 | 0 | dfDownsamplingFactor <= dfBestDownsamplingFactor) |
3793 | 0 | { |
3794 | 0 | continue; |
3795 | 0 | } |
3796 | | |
3797 | | // Ignore AVERAGE_BIT2GRAYSCALE overviews for RasterIO purposes. |
3798 | 0 | const char *pszResampling = poOverview->GetMetadataItem("RESAMPLING"); |
3799 | |
|
3800 | 0 | if (pszResampling != nullptr && |
3801 | 0 | STARTS_WITH_CI(pszResampling, "AVERAGE_BIT2")) |
3802 | 0 | continue; |
3803 | | |
3804 | | // OK, this is our new best overview. |
3805 | 0 | poBestOverview = poOverview; |
3806 | 0 | nBestOverviewLevel = iOverview; |
3807 | 0 | dfBestDownsamplingFactor = dfDownsamplingFactor; |
3808 | |
|
3809 | 0 | if (std::abs(dfDesiredDownsamplingFactor - dfDownsamplingFactor) < |
3810 | 0 | EPSILON) |
3811 | 0 | { |
3812 | 0 | break; |
3813 | 0 | } |
3814 | 0 | } |
3815 | | |
3816 | | /* -------------------------------------------------------------------- */ |
3817 | | /* If we didn't find an overview that helps us, just return */ |
3818 | | /* indicating failure and the full resolution image will be used. */ |
3819 | | /* -------------------------------------------------------------------- */ |
3820 | 0 | if (nBestOverviewLevel < 0) |
3821 | 0 | return -1; |
3822 | | |
3823 | | /* -------------------------------------------------------------------- */ |
3824 | | /* Recompute the source window in terms of the selected */ |
3825 | | /* overview. */ |
3826 | | /* -------------------------------------------------------------------- */ |
3827 | 0 | const double dfXFactor = |
3828 | 0 | poBand->GetXSize() / static_cast<double>(poBestOverview->GetXSize()); |
3829 | 0 | const double dfYFactor = |
3830 | 0 | poBand->GetYSize() / static_cast<double>(poBestOverview->GetYSize()); |
3831 | 0 | CPLDebug("GDAL", "Selecting overview %d x %d", poBestOverview->GetXSize(), |
3832 | 0 | poBestOverview->GetYSize()); |
3833 | |
|
3834 | 0 | const int nOXOff = std::min(poBestOverview->GetXSize() - 1, |
3835 | 0 | static_cast<int>(nXOff / dfXFactor + 0.5)); |
3836 | 0 | const int nOYOff = std::min(poBestOverview->GetYSize() - 1, |
3837 | 0 | static_cast<int>(nYOff / dfYFactor + 0.5)); |
3838 | 0 | int nOXSize = std::max(1, static_cast<int>(nXSize / dfXFactor + 0.5)); |
3839 | 0 | int nOYSize = std::max(1, static_cast<int>(nYSize / dfYFactor + 0.5)); |
3840 | 0 | if (nOXOff + nOXSize > poBestOverview->GetXSize()) |
3841 | 0 | nOXSize = poBestOverview->GetXSize() - nOXOff; |
3842 | 0 | if (nOYOff + nOYSize > poBestOverview->GetYSize()) |
3843 | 0 | nOYSize = poBestOverview->GetYSize() - nOYOff; |
3844 | |
|
3845 | 0 | if (psExtraArg) |
3846 | 0 | { |
3847 | 0 | if (psExtraArg->bFloatingPointWindowValidity) |
3848 | 0 | { |
3849 | 0 | psExtraArg->dfXOff /= dfXFactor; |
3850 | 0 | psExtraArg->dfXSize /= dfXFactor; |
3851 | 0 | psExtraArg->dfYOff /= dfYFactor; |
3852 | 0 | psExtraArg->dfYSize /= dfYFactor; |
3853 | 0 | } |
3854 | 0 | else if (psExtraArg->eResampleAlg != GRIORA_NearestNeighbour) |
3855 | 0 | { |
3856 | 0 | psExtraArg->bFloatingPointWindowValidity = true; |
3857 | 0 | psExtraArg->dfXOff = nXOff / dfXFactor; |
3858 | 0 | psExtraArg->dfXSize = nXSize / dfXFactor; |
3859 | 0 | psExtraArg->dfYOff = nYOff / dfYFactor; |
3860 | 0 | psExtraArg->dfYSize = nYSize / dfYFactor; |
3861 | 0 | } |
3862 | 0 | } |
3863 | |
|
3864 | 0 | nXOff = nOXOff; |
3865 | 0 | nYOff = nOYOff; |
3866 | 0 | nXSize = nOXSize; |
3867 | 0 | nYSize = nOYSize; |
3868 | |
|
3869 | 0 | return nBestOverviewLevel; |
3870 | 0 | } |
3871 | | |
3872 | | /************************************************************************/ |
3873 | | /* OverviewRasterIO() */ |
3874 | | /* */ |
3875 | | /* Special work function to utilize available overviews to */ |
3876 | | /* more efficiently satisfy downsampled requests. It will */ |
3877 | | /* return CE_Failure if there are no appropriate overviews */ |
3878 | | /* available but it doesn't emit any error messages. */ |
3879 | | /************************************************************************/ |
3880 | | |
3881 | | //! @cond Doxygen_Suppress |
3882 | | CPLErr GDALRasterBand::OverviewRasterIO( |
3883 | | GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize, |
3884 | | void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType, |
3885 | | GSpacing nPixelSpace, GSpacing nLineSpace, GDALRasterIOExtraArg *psExtraArg) |
3886 | | |
3887 | 0 | { |
3888 | 0 | GDALRasterIOExtraArg sExtraArg; |
3889 | 0 | GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg); |
3890 | |
|
3891 | 0 | const int nOverview = GDALBandGetBestOverviewLevel2( |
3892 | 0 | this, nXOff, nYOff, nXSize, nYSize, nBufXSize, nBufYSize, &sExtraArg); |
3893 | 0 | if (nOverview < 0) |
3894 | 0 | return CE_Failure; |
3895 | | |
3896 | | /* -------------------------------------------------------------------- */ |
3897 | | /* Recast the call in terms of the new raster layer. */ |
3898 | | /* -------------------------------------------------------------------- */ |
3899 | 0 | GDALRasterBand *poOverviewBand = GetOverview(nOverview); |
3900 | 0 | if (poOverviewBand == nullptr) |
3901 | 0 | return CE_Failure; |
3902 | | |
3903 | 0 | return poOverviewBand->RasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize, |
3904 | 0 | pData, nBufXSize, nBufYSize, eBufType, |
3905 | 0 | nPixelSpace, nLineSpace, &sExtraArg); |
3906 | 0 | } |
3907 | | |
3908 | | /************************************************************************/ |
3909 | | /* TryOverviewRasterIO() */ |
3910 | | /************************************************************************/ |
3911 | | |
3912 | | CPLErr GDALRasterBand::TryOverviewRasterIO( |
3913 | | GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize, |
3914 | | void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType, |
3915 | | GSpacing nPixelSpace, GSpacing nLineSpace, GDALRasterIOExtraArg *psExtraArg, |
3916 | | int *pbTried) |
3917 | 0 | { |
3918 | 0 | int nXOffMod = nXOff; |
3919 | 0 | int nYOffMod = nYOff; |
3920 | 0 | int nXSizeMod = nXSize; |
3921 | 0 | int nYSizeMod = nYSize; |
3922 | 0 | GDALRasterIOExtraArg sExtraArg; |
3923 | |
|
3924 | 0 | GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg); |
3925 | |
|
3926 | 0 | int iOvrLevel = GDALBandGetBestOverviewLevel2( |
3927 | 0 | this, nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, nBufXSize, nBufYSize, |
3928 | 0 | &sExtraArg); |
3929 | |
|
3930 | 0 | if (iOvrLevel >= 0) |
3931 | 0 | { |
3932 | 0 | GDALRasterBand *poOverviewBand = GetOverview(iOvrLevel); |
3933 | 0 | if (poOverviewBand) |
3934 | 0 | { |
3935 | 0 | *pbTried = TRUE; |
3936 | 0 | return poOverviewBand->RasterIO( |
3937 | 0 | eRWFlag, nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, pData, |
3938 | 0 | nBufXSize, nBufYSize, eBufType, nPixelSpace, nLineSpace, |
3939 | 0 | &sExtraArg); |
3940 | 0 | } |
3941 | 0 | } |
3942 | | |
3943 | 0 | *pbTried = FALSE; |
3944 | 0 | return CE_None; |
3945 | 0 | } |
3946 | | |
3947 | | /************************************************************************/ |
3948 | | /* TryOverviewRasterIO() */ |
3949 | | /************************************************************************/ |
3950 | | |
3951 | | CPLErr GDALDataset::TryOverviewRasterIO( |
3952 | | GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize, |
3953 | | void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType, |
3954 | | int nBandCount, const int *panBandMap, GSpacing nPixelSpace, |
3955 | | GSpacing nLineSpace, GSpacing nBandSpace, GDALRasterIOExtraArg *psExtraArg, |
3956 | | int *pbTried) |
3957 | 0 | { |
3958 | 0 | int nXOffMod = nXOff; |
3959 | 0 | int nYOffMod = nYOff; |
3960 | 0 | int nXSizeMod = nXSize; |
3961 | 0 | int nYSizeMod = nYSize; |
3962 | 0 | GDALRasterIOExtraArg sExtraArg; |
3963 | 0 | GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg); |
3964 | |
|
3965 | 0 | int iOvrLevel = GDALBandGetBestOverviewLevel2( |
3966 | 0 | papoBands[0], nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, nBufXSize, |
3967 | 0 | nBufYSize, &sExtraArg); |
3968 | |
|
3969 | 0 | if (iOvrLevel >= 0 && papoBands[0]->GetOverview(iOvrLevel) != nullptr && |
3970 | 0 | papoBands[0]->GetOverview(iOvrLevel)->GetDataset() != nullptr) |
3971 | 0 | { |
3972 | 0 | *pbTried = TRUE; |
3973 | 0 | return papoBands[0]->GetOverview(iOvrLevel)->GetDataset()->RasterIO( |
3974 | 0 | eRWFlag, nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, pData, nBufXSize, |
3975 | 0 | nBufYSize, eBufType, nBandCount, panBandMap, nPixelSpace, |
3976 | 0 | nLineSpace, nBandSpace, &sExtraArg); |
3977 | 0 | } |
3978 | 0 | else |
3979 | 0 | { |
3980 | 0 | *pbTried = FALSE; |
3981 | 0 | return CE_None; |
3982 | 0 | } |
3983 | 0 | } |
3984 | | |
3985 | | /************************************************************************/ |
3986 | | /* GetBestOverviewLevel() */ |
3987 | | /* */ |
3988 | | /* Returns the best overview level to satisfy the query or -1 if none */ |
3989 | | /* Also updates nXOff, nYOff, nXSize, nYSize when returning a valid */ |
3990 | | /* overview level */ |
3991 | | /************************************************************************/ |
3992 | | |
3993 | | static int GDALDatasetGetBestOverviewLevel(GDALDataset *poDS, int &nXOff, |
3994 | | int &nYOff, int &nXSize, int &nYSize, |
3995 | | int nBufXSize, int nBufYSize, |
3996 | | int nBandCount, |
3997 | | const int *panBandMap, |
3998 | | GDALRasterIOExtraArg *psExtraArg) |
3999 | 0 | { |
4000 | 0 | int nOverviewCount = 0; |
4001 | 0 | GDALRasterBand *poFirstBand = nullptr; |
4002 | | |
4003 | | /* -------------------------------------------------------------------- */ |
4004 | | /* Check that all bands have the same number of overviews and */ |
4005 | | /* that they have all the same size and block dimensions */ |
4006 | | /* -------------------------------------------------------------------- */ |
4007 | 0 | for (int iBand = 0; iBand < nBandCount; iBand++) |
4008 | 0 | { |
4009 | 0 | GDALRasterBand *poBand = poDS->GetRasterBand(panBandMap[iBand]); |
4010 | 0 | if (poBand == nullptr) |
4011 | 0 | return -1; |
4012 | 0 | if (iBand == 0) |
4013 | 0 | { |
4014 | 0 | poFirstBand = poBand; |
4015 | 0 | nOverviewCount = poBand->GetOverviewCount(); |
4016 | 0 | } |
4017 | 0 | else if (nOverviewCount != poBand->GetOverviewCount()) |
4018 | 0 | { |
4019 | 0 | CPLDebug("GDAL", "GDALDataset::GetBestOverviewLevel() ... " |
4020 | 0 | "mismatched overview count, use std method."); |
4021 | 0 | return -1; |
4022 | 0 | } |
4023 | 0 | else |
4024 | 0 | { |
4025 | 0 | for (int iOverview = 0; iOverview < nOverviewCount; iOverview++) |
4026 | 0 | { |
4027 | 0 | GDALRasterBand *poOvrBand = poBand->GetOverview(iOverview); |
4028 | 0 | GDALRasterBand *poOvrFirstBand = |
4029 | 0 | poFirstBand->GetOverview(iOverview); |
4030 | 0 | if (poOvrBand == nullptr || poOvrFirstBand == nullptr) |
4031 | 0 | continue; |
4032 | | |
4033 | 0 | if (poOvrFirstBand->GetXSize() != poOvrBand->GetXSize() || |
4034 | 0 | poOvrFirstBand->GetYSize() != poOvrBand->GetYSize()) |
4035 | 0 | { |
4036 | 0 | CPLDebug("GDAL", |
4037 | 0 | "GDALDataset::GetBestOverviewLevel() ... " |
4038 | 0 | "mismatched overview sizes, use std method."); |
4039 | 0 | return -1; |
4040 | 0 | } |
4041 | 0 | int nBlockXSizeFirst = 0; |
4042 | 0 | int nBlockYSizeFirst = 0; |
4043 | 0 | poOvrFirstBand->GetBlockSize(&nBlockXSizeFirst, |
4044 | 0 | &nBlockYSizeFirst); |
4045 | |
|
4046 | 0 | int nBlockXSizeCurrent = 0; |
4047 | 0 | int nBlockYSizeCurrent = 0; |
4048 | 0 | poOvrBand->GetBlockSize(&nBlockXSizeCurrent, |
4049 | 0 | &nBlockYSizeCurrent); |
4050 | |
|
4051 | 0 | if (nBlockXSizeFirst != nBlockXSizeCurrent || |
4052 | 0 | nBlockYSizeFirst != nBlockYSizeCurrent) |
4053 | 0 | { |
4054 | 0 | CPLDebug("GDAL", "GDALDataset::GetBestOverviewLevel() ... " |
4055 | 0 | "mismatched block sizes, use std method."); |
4056 | 0 | return -1; |
4057 | 0 | } |
4058 | 0 | } |
4059 | 0 | } |
4060 | 0 | } |
4061 | 0 | if (poFirstBand == nullptr) |
4062 | 0 | return -1; |
4063 | | |
4064 | 0 | return GDALBandGetBestOverviewLevel2(poFirstBand, nXOff, nYOff, nXSize, |
4065 | 0 | nYSize, nBufXSize, nBufYSize, |
4066 | 0 | psExtraArg); |
4067 | 0 | } |
4068 | | |
4069 | | /************************************************************************/ |
4070 | | /* BlockBasedRasterIO() */ |
4071 | | /* */ |
4072 | | /* This convenience function implements a dataset level */ |
4073 | | /* RasterIO() interface based on calling down to fetch blocks, */ |
4074 | | /* much like the GDALRasterBand::IRasterIO(), but it handles */ |
4075 | | /* all bands at once, so that a format driver that handles a */ |
4076 | | /* request for different bands of the same block efficiently */ |
4077 | | /* (i.e. without re-reading interleaved data) will efficiently. */ |
4078 | | /* */ |
4079 | | /* This method is intended to be called by an overridden */ |
4080 | | /* IRasterIO() method in the driver specific GDALDataset */ |
4081 | | /* derived class. */ |
4082 | | /* */ |
4083 | | /* Default internal implementation of RasterIO() ... utilizes */ |
4084 | | /* the Block access methods to satisfy the request. This would */ |
4085 | | /* normally only be overridden by formats with overviews. */ |
4086 | | /* */ |
4087 | | /* To keep things relatively simple, this method does not */ |
4088 | | /* currently take advantage of some special cases addressed in */ |
4089 | | /* GDALRasterBand::IRasterIO(), so it is likely best to only */ |
4090 | | /* call it when you know it will help. That is in cases where */ |
4091 | | /* data is at 1:1 to the buffer, and you know the driver is */ |
4092 | | /* implementing interleaved IO efficiently on a block by block */ |
4093 | | /* basis. Overviews will be used when possible. */ |
4094 | | /************************************************************************/ |
4095 | | |
4096 | | CPLErr GDALDataset::BlockBasedRasterIO( |
4097 | | GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize, |
4098 | | void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType, |
4099 | | int nBandCount, const int *panBandMap, GSpacing nPixelSpace, |
4100 | | GSpacing nLineSpace, GSpacing nBandSpace, GDALRasterIOExtraArg *psExtraArg) |
4101 | | |
4102 | 0 | { |
4103 | 0 | CPLAssert(nullptr != pData); |
4104 | | |
4105 | 0 | GByte **papabySrcBlock = nullptr; |
4106 | 0 | GDALRasterBlock *poBlock = nullptr; |
4107 | 0 | GDALRasterBlock **papoBlocks = nullptr; |
4108 | 0 | int nLBlockX = -1; |
4109 | 0 | int nLBlockY = -1; |
4110 | 0 | int iBufYOff; |
4111 | 0 | int iBufXOff; |
4112 | 0 | int nBlockXSize = 1; |
4113 | 0 | int nBlockYSize = 1; |
4114 | 0 | CPLErr eErr = CE_None; |
4115 | 0 | GDALDataType eDataType = GDT_Byte; |
4116 | |
|
4117 | 0 | const bool bUseIntegerRequestCoords = |
4118 | 0 | (!psExtraArg->bFloatingPointWindowValidity || |
4119 | 0 | (nXOff == psExtraArg->dfXOff && nYOff == psExtraArg->dfYOff && |
4120 | 0 | nXSize == psExtraArg->dfXSize && nYSize == psExtraArg->dfYSize)); |
4121 | | |
4122 | | /* -------------------------------------------------------------------- */ |
4123 | | /* Ensure that all bands share a common block size and data type. */ |
4124 | | /* -------------------------------------------------------------------- */ |
4125 | 0 | for (int iBand = 0; iBand < nBandCount; iBand++) |
4126 | 0 | { |
4127 | 0 | GDALRasterBand *poBand = GetRasterBand(panBandMap[iBand]); |
4128 | |
|
4129 | 0 | if (iBand == 0) |
4130 | 0 | { |
4131 | 0 | poBand->GetBlockSize(&nBlockXSize, &nBlockYSize); |
4132 | 0 | eDataType = poBand->GetRasterDataType(); |
4133 | 0 | } |
4134 | 0 | else |
4135 | 0 | { |
4136 | 0 | int nThisBlockXSize = 0; |
4137 | 0 | int nThisBlockYSize = 0; |
4138 | 0 | poBand->GetBlockSize(&nThisBlockXSize, &nThisBlockYSize); |
4139 | 0 | if (nThisBlockXSize != nBlockXSize || |
4140 | 0 | nThisBlockYSize != nBlockYSize) |
4141 | 0 | { |
4142 | 0 | CPLDebug("GDAL", "GDALDataset::BlockBasedRasterIO() ... " |
4143 | 0 | "mismatched block sizes, use std method."); |
4144 | 0 | return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize, |
4145 | 0 | pData, nBufXSize, nBufYSize, eBufType, |
4146 | 0 | nBandCount, panBandMap, nPixelSpace, |
4147 | 0 | nLineSpace, nBandSpace, psExtraArg); |
4148 | 0 | } |
4149 | | |
4150 | 0 | if (eDataType != poBand->GetRasterDataType() && |
4151 | 0 | (nXSize != nBufXSize || nYSize != nBufYSize)) |
4152 | 0 | { |
4153 | 0 | CPLDebug("GDAL", "GDALDataset::BlockBasedRasterIO() ... " |
4154 | 0 | "mismatched band data types, use std method."); |
4155 | 0 | return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize, |
4156 | 0 | pData, nBufXSize, nBufYSize, eBufType, |
4157 | 0 | nBandCount, panBandMap, nPixelSpace, |
4158 | 0 | nLineSpace, nBandSpace, psExtraArg); |
4159 | 0 | } |
4160 | 0 | } |
4161 | 0 | } |
4162 | | |
4163 | | /* ==================================================================== */ |
4164 | | /* In this special case at full resolution we step through in */ |
4165 | | /* blocks, turning the request over to the per-band */ |
4166 | | /* IRasterIO(), but ensuring that all bands of one block are */ |
4167 | | /* called before proceeding to the next. */ |
4168 | | /* ==================================================================== */ |
4169 | | |
4170 | 0 | if (nXSize == nBufXSize && nYSize == nBufYSize && bUseIntegerRequestCoords) |
4171 | 0 | { |
4172 | 0 | GDALRasterIOExtraArg sDummyExtraArg; |
4173 | 0 | INIT_RASTERIO_EXTRA_ARG(sDummyExtraArg); |
4174 | |
|
4175 | 0 | int nChunkYSize = 0; |
4176 | 0 | int nChunkXSize = 0; |
4177 | |
|
4178 | 0 | for (iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff += nChunkYSize) |
4179 | 0 | { |
4180 | 0 | const int nChunkYOff = iBufYOff + nYOff; |
4181 | 0 | nChunkYSize = nBlockYSize - (nChunkYOff % nBlockYSize); |
4182 | 0 | if (nChunkYOff + nChunkYSize > nYOff + nYSize) |
4183 | 0 | nChunkYSize = (nYOff + nYSize) - nChunkYOff; |
4184 | |
|
4185 | 0 | for (iBufXOff = 0; iBufXOff < nBufXSize; iBufXOff += nChunkXSize) |
4186 | 0 | { |
4187 | 0 | const int nChunkXOff = iBufXOff + nXOff; |
4188 | 0 | nChunkXSize = nBlockXSize - (nChunkXOff % nBlockXSize); |
4189 | 0 | if (nChunkXOff + nChunkXSize > nXOff + nXSize) |
4190 | 0 | nChunkXSize = (nXOff + nXSize) - nChunkXOff; |
4191 | |
|
4192 | 0 | GByte *pabyChunkData = |
4193 | 0 | static_cast<GByte *>(pData) + iBufXOff * nPixelSpace + |
4194 | 0 | static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace; |
4195 | |
|
4196 | 0 | for (int iBand = 0; iBand < nBandCount; iBand++) |
4197 | 0 | { |
4198 | 0 | GDALRasterBand *poBand = GetRasterBand(panBandMap[iBand]); |
4199 | |
|
4200 | 0 | eErr = poBand->IRasterIO( |
4201 | 0 | eRWFlag, nChunkXOff, nChunkYOff, nChunkXSize, |
4202 | 0 | nChunkYSize, |
4203 | 0 | pabyChunkData + |
4204 | 0 | static_cast<GPtrDiff_t>(iBand) * nBandSpace, |
4205 | 0 | nChunkXSize, nChunkYSize, eBufType, nPixelSpace, |
4206 | 0 | nLineSpace, &sDummyExtraArg); |
4207 | 0 | if (eErr != CE_None) |
4208 | 0 | return eErr; |
4209 | 0 | } |
4210 | 0 | } |
4211 | | |
4212 | 0 | if (psExtraArg->pfnProgress != nullptr && |
4213 | 0 | !psExtraArg->pfnProgress( |
4214 | 0 | 1.0 * std::min(nBufYSize, iBufYOff + nChunkYSize) / |
4215 | 0 | nBufYSize, |
4216 | 0 | "", psExtraArg->pProgressData)) |
4217 | 0 | { |
4218 | 0 | return CE_Failure; |
4219 | 0 | } |
4220 | 0 | } |
4221 | | |
4222 | 0 | return CE_None; |
4223 | 0 | } |
4224 | | |
4225 | | /* Below code is not compatible with that case. It would need a complete */ |
4226 | | /* separate code like done in GDALRasterBand::IRasterIO. */ |
4227 | 0 | if (eRWFlag == GF_Write && (nBufXSize < nXSize || nBufYSize < nYSize)) |
4228 | 0 | { |
4229 | 0 | return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize, pData, |
4230 | 0 | nBufXSize, nBufYSize, eBufType, nBandCount, |
4231 | 0 | panBandMap, nPixelSpace, nLineSpace, |
4232 | 0 | nBandSpace, psExtraArg); |
4233 | 0 | } |
4234 | | |
4235 | | /* We could have a smarter implementation, but that will do for now */ |
4236 | 0 | if (psExtraArg->eResampleAlg != GRIORA_NearestNeighbour && |
4237 | 0 | (nBufXSize != nXSize || nBufYSize != nYSize)) |
4238 | 0 | { |
4239 | 0 | return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize, pData, |
4240 | 0 | nBufXSize, nBufYSize, eBufType, nBandCount, |
4241 | 0 | panBandMap, nPixelSpace, nLineSpace, |
4242 | 0 | nBandSpace, psExtraArg); |
4243 | 0 | } |
4244 | | |
4245 | | /* ==================================================================== */ |
4246 | | /* Loop reading required source blocks to satisfy output */ |
4247 | | /* request. This is the most general implementation. */ |
4248 | | /* ==================================================================== */ |
4249 | | |
4250 | 0 | const int nBandDataSize = GDALGetDataTypeSizeBytes(eDataType); |
4251 | |
|
4252 | 0 | papabySrcBlock = |
4253 | 0 | static_cast<GByte **>(CPLCalloc(sizeof(GByte *), nBandCount)); |
4254 | 0 | papoBlocks = |
4255 | 0 | static_cast<GDALRasterBlock **>(CPLCalloc(sizeof(void *), nBandCount)); |
4256 | | |
4257 | | /* -------------------------------------------------------------------- */ |
4258 | | /* Select an overview level if appropriate. */ |
4259 | | /* -------------------------------------------------------------------- */ |
4260 | |
|
4261 | 0 | GDALRasterIOExtraArg sExtraArg; |
4262 | 0 | GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg); |
4263 | 0 | const int nOverviewLevel = GDALDatasetGetBestOverviewLevel( |
4264 | 0 | this, nXOff, nYOff, nXSize, nYSize, nBufXSize, nBufYSize, nBandCount, |
4265 | 0 | panBandMap, &sExtraArg); |
4266 | 0 | if (nOverviewLevel >= 0) |
4267 | 0 | { |
4268 | 0 | GetRasterBand(panBandMap[0]) |
4269 | 0 | ->GetOverview(nOverviewLevel) |
4270 | 0 | ->GetBlockSize(&nBlockXSize, &nBlockYSize); |
4271 | 0 | } |
4272 | |
|
4273 | 0 | double dfXOff = nXOff; |
4274 | 0 | double dfYOff = nYOff; |
4275 | 0 | double dfXSize = nXSize; |
4276 | 0 | double dfYSize = nYSize; |
4277 | 0 | if (sExtraArg.bFloatingPointWindowValidity) |
4278 | 0 | { |
4279 | 0 | dfXOff = sExtraArg.dfXOff; |
4280 | 0 | dfYOff = sExtraArg.dfYOff; |
4281 | 0 | dfXSize = sExtraArg.dfXSize; |
4282 | 0 | dfYSize = sExtraArg.dfYSize; |
4283 | 0 | } |
4284 | | |
4285 | | /* -------------------------------------------------------------------- */ |
4286 | | /* Compute stepping increment. */ |
4287 | | /* -------------------------------------------------------------------- */ |
4288 | 0 | const double dfSrcXInc = dfXSize / static_cast<double>(nBufXSize); |
4289 | 0 | const double dfSrcYInc = dfYSize / static_cast<double>(nBufYSize); |
4290 | |
|
4291 | 0 | constexpr double EPS = 1e-10; |
4292 | | /* -------------------------------------------------------------------- */ |
4293 | | /* Loop over buffer computing source locations. */ |
4294 | | /* -------------------------------------------------------------------- */ |
4295 | 0 | for (iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff++) |
4296 | 0 | { |
4297 | 0 | GPtrDiff_t iSrcOffset; |
4298 | | |
4299 | | // Add small epsilon to avoid some numeric precision issues. |
4300 | 0 | const double dfSrcY = (iBufYOff + 0.5) * dfSrcYInc + dfYOff + EPS; |
4301 | 0 | const int iSrcY = static_cast<int>(std::min( |
4302 | 0 | std::max(0.0, dfSrcY), static_cast<double>(nRasterYSize - 1))); |
4303 | |
|
4304 | 0 | GPtrDiff_t iBufOffset = static_cast<GPtrDiff_t>(iBufYOff) * |
4305 | 0 | static_cast<GPtrDiff_t>(nLineSpace); |
4306 | |
|
4307 | 0 | for (iBufXOff = 0; iBufXOff < nBufXSize; iBufXOff++) |
4308 | 0 | { |
4309 | 0 | const double dfSrcX = (iBufXOff + 0.5) * dfSrcXInc + dfXOff + EPS; |
4310 | 0 | const int iSrcX = static_cast<int>(std::min( |
4311 | 0 | std::max(0.0, dfSrcX), static_cast<double>(nRasterXSize - 1))); |
4312 | | |
4313 | | // FIXME: this code likely doesn't work if the dirty block gets |
4314 | | // flushed to disk before being completely written. In the meantime, |
4315 | | // bJustInitialize should probably be set to FALSE even if it is not |
4316 | | // ideal performance wise, and for lossy compression |
4317 | | |
4318 | | /* -------------------------------------------------------------------- |
4319 | | */ |
4320 | | /* Ensure we have the appropriate block loaded. */ |
4321 | | /* -------------------------------------------------------------------- |
4322 | | */ |
4323 | 0 | if (iSrcX < nLBlockX * nBlockXSize || |
4324 | 0 | iSrcX - nBlockXSize >= nLBlockX * nBlockXSize || |
4325 | 0 | iSrcY < nLBlockY * nBlockYSize || |
4326 | 0 | iSrcY - nBlockYSize >= nLBlockY * nBlockYSize) |
4327 | 0 | { |
4328 | 0 | nLBlockX = iSrcX / nBlockXSize; |
4329 | 0 | nLBlockY = iSrcY / nBlockYSize; |
4330 | |
|
4331 | 0 | const bool bJustInitialize = |
4332 | 0 | eRWFlag == GF_Write && nYOff <= nLBlockY * nBlockYSize && |
4333 | 0 | nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize && |
4334 | 0 | nXOff <= nLBlockX * nBlockXSize && |
4335 | 0 | nXOff + nXSize - nBlockXSize >= nLBlockX * nBlockXSize; |
4336 | | /*bool bMemZeroBuffer = FALSE; |
4337 | | if( eRWFlag == GF_Write && !bJustInitialize && |
4338 | | nXOff <= nLBlockX * nBlockXSize && |
4339 | | nYOff <= nLBlockY * nBlockYSize && |
4340 | | (nXOff + nXSize >= (nLBlockX+1) * nBlockXSize || |
4341 | | (nXOff + nXSize == GetRasterXSize() && |
4342 | | (nLBlockX+1) * nBlockXSize > GetRasterXSize())) && |
4343 | | (nYOff + nYSize >= (nLBlockY+1) * nBlockYSize || |
4344 | | (nYOff + nYSize == GetRasterYSize() && |
4345 | | (nLBlockY+1) * nBlockYSize > GetRasterYSize())) ) |
4346 | | { |
4347 | | bJustInitialize = TRUE; |
4348 | | bMemZeroBuffer = TRUE; |
4349 | | }*/ |
4350 | 0 | for (int iBand = 0; iBand < nBandCount; iBand++) |
4351 | 0 | { |
4352 | 0 | GDALRasterBand *poBand = GetRasterBand(panBandMap[iBand]); |
4353 | 0 | if (nOverviewLevel >= 0) |
4354 | 0 | poBand = poBand->GetOverview(nOverviewLevel); |
4355 | 0 | poBlock = poBand->GetLockedBlockRef(nLBlockX, nLBlockY, |
4356 | 0 | bJustInitialize); |
4357 | 0 | if (poBlock == nullptr) |
4358 | 0 | { |
4359 | 0 | eErr = CE_Failure; |
4360 | 0 | goto CleanupAndReturn; |
4361 | 0 | } |
4362 | | |
4363 | 0 | if (eRWFlag == GF_Write) |
4364 | 0 | poBlock->MarkDirty(); |
4365 | |
|
4366 | 0 | if (papoBlocks[iBand] != nullptr) |
4367 | 0 | papoBlocks[iBand]->DropLock(); |
4368 | |
|
4369 | 0 | papoBlocks[iBand] = poBlock; |
4370 | |
|
4371 | 0 | papabySrcBlock[iBand] = |
4372 | 0 | static_cast<GByte *>(poBlock->GetDataRef()); |
4373 | | /*if( bMemZeroBuffer ) |
4374 | | { |
4375 | | memset(papabySrcBlock[iBand], 0, |
4376 | | static_cast<GPtrDiff_t>(nBandDataSize) * nBlockXSize |
4377 | | * nBlockYSize); |
4378 | | }*/ |
4379 | 0 | } |
4380 | 0 | } |
4381 | | |
4382 | | /* -------------------------------------------------------------------- |
4383 | | */ |
4384 | | /* Copy over this pixel of data. */ |
4385 | | /* -------------------------------------------------------------------- |
4386 | | */ |
4387 | 0 | iSrcOffset = (static_cast<GPtrDiff_t>(iSrcX) - |
4388 | 0 | static_cast<GPtrDiff_t>(nLBlockX) * nBlockXSize + |
4389 | 0 | (static_cast<GPtrDiff_t>(iSrcY) - |
4390 | 0 | static_cast<GPtrDiff_t>(nLBlockY) * nBlockYSize) * |
4391 | 0 | nBlockXSize) * |
4392 | 0 | nBandDataSize; |
4393 | |
|
4394 | 0 | for (int iBand = 0; iBand < nBandCount; iBand++) |
4395 | 0 | { |
4396 | 0 | GByte *pabySrcBlock = papabySrcBlock[iBand]; |
4397 | 0 | GPtrDiff_t iBandBufOffset = |
4398 | 0 | iBufOffset + static_cast<GPtrDiff_t>(iBand) * |
4399 | 0 | static_cast<GPtrDiff_t>(nBandSpace); |
4400 | |
|
4401 | 0 | if (eDataType == eBufType) |
4402 | 0 | { |
4403 | 0 | if (eRWFlag == GF_Read) |
4404 | 0 | memcpy(static_cast<GByte *>(pData) + iBandBufOffset, |
4405 | 0 | pabySrcBlock + iSrcOffset, nBandDataSize); |
4406 | 0 | else |
4407 | 0 | memcpy(pabySrcBlock + iSrcOffset, |
4408 | 0 | static_cast<const GByte *>(pData) + |
4409 | 0 | iBandBufOffset, |
4410 | 0 | nBandDataSize); |
4411 | 0 | } |
4412 | 0 | else |
4413 | 0 | { |
4414 | | /* type to type conversion ... ouch, this is expensive way |
4415 | | of handling single words */ |
4416 | |
|
4417 | 0 | if (eRWFlag == GF_Read) |
4418 | 0 | GDALCopyWords64(pabySrcBlock + iSrcOffset, eDataType, 0, |
4419 | 0 | static_cast<GByte *>(pData) + |
4420 | 0 | iBandBufOffset, |
4421 | 0 | eBufType, 0, 1); |
4422 | 0 | else |
4423 | 0 | GDALCopyWords64(static_cast<const GByte *>(pData) + |
4424 | 0 | iBandBufOffset, |
4425 | 0 | eBufType, 0, pabySrcBlock + iSrcOffset, |
4426 | 0 | eDataType, 0, 1); |
4427 | 0 | } |
4428 | 0 | } |
4429 | |
|
4430 | 0 | iBufOffset += static_cast<int>(nPixelSpace); |
4431 | 0 | } |
4432 | 0 | } |
4433 | | |
4434 | | /* -------------------------------------------------------------------- */ |
4435 | | /* CleanupAndReturn. */ |
4436 | | /* -------------------------------------------------------------------- */ |
4437 | 0 | CleanupAndReturn: |
4438 | 0 | CPLFree(papabySrcBlock); |
4439 | 0 | if (papoBlocks != nullptr) |
4440 | 0 | { |
4441 | 0 | for (int iBand = 0; iBand < nBandCount; iBand++) |
4442 | 0 | { |
4443 | 0 | if (papoBlocks[iBand] != nullptr) |
4444 | 0 | papoBlocks[iBand]->DropLock(); |
4445 | 0 | } |
4446 | 0 | CPLFree(papoBlocks); |
4447 | 0 | } |
4448 | |
|
4449 | 0 | return eErr; |
4450 | 0 | } |
4451 | | |
4452 | | //! @endcond |
4453 | | |
4454 | | /************************************************************************/ |
4455 | | /* GDALCopyWholeRasterGetSwathSize() */ |
4456 | | /************************************************************************/ |
4457 | | |
4458 | | static void GDALCopyWholeRasterGetSwathSize(GDALRasterBand *poSrcPrototypeBand, |
4459 | | GDALRasterBand *poDstPrototypeBand, |
4460 | | int nBandCount, |
4461 | | int bDstIsCompressed, |
4462 | | int bInterleave, int *pnSwathCols, |
4463 | | int *pnSwathLines) |
4464 | 0 | { |
4465 | 0 | GDALDataType eDT = poDstPrototypeBand->GetRasterDataType(); |
4466 | 0 | int nSrcBlockXSize = 0; |
4467 | 0 | int nSrcBlockYSize = 0; |
4468 | 0 | int nBlockXSize = 0; |
4469 | 0 | int nBlockYSize = 0; |
4470 | |
|
4471 | 0 | int nXSize = poSrcPrototypeBand->GetXSize(); |
4472 | 0 | int nYSize = poSrcPrototypeBand->GetYSize(); |
4473 | |
|
4474 | 0 | poSrcPrototypeBand->GetBlockSize(&nSrcBlockXSize, &nSrcBlockYSize); |
4475 | 0 | poDstPrototypeBand->GetBlockSize(&nBlockXSize, &nBlockYSize); |
4476 | |
|
4477 | 0 | const int nMaxBlockXSize = std::max(nBlockXSize, nSrcBlockXSize); |
4478 | 0 | const int nMaxBlockYSize = std::max(nBlockYSize, nSrcBlockYSize); |
4479 | |
|
4480 | 0 | int nPixelSize = GDALGetDataTypeSizeBytes(eDT); |
4481 | 0 | if (bInterleave) |
4482 | 0 | nPixelSize *= nBandCount; |
4483 | | |
4484 | | // aim for one row of blocks. Do not settle for less. |
4485 | 0 | int nSwathCols = nXSize; |
4486 | 0 | int nSwathLines = nMaxBlockYSize; |
4487 | |
|
4488 | 0 | const char *pszSrcCompression = |
4489 | 0 | poSrcPrototypeBand->GetMetadataItem("COMPRESSION", "IMAGE_STRUCTURE"); |
4490 | 0 | if (pszSrcCompression == nullptr) |
4491 | 0 | { |
4492 | 0 | auto poSrcDS = poSrcPrototypeBand->GetDataset(); |
4493 | 0 | if (poSrcDS) |
4494 | 0 | pszSrcCompression = |
4495 | 0 | poSrcDS->GetMetadataItem("COMPRESSION", "IMAGE_STRUCTURE"); |
4496 | 0 | } |
4497 | | |
4498 | | /* -------------------------------------------------------------------- */ |
4499 | | /* What will our swath size be? */ |
4500 | | /* -------------------------------------------------------------------- */ |
4501 | | // When writing interleaved data in a compressed format, we want to be sure |
4502 | | // that each block will only be written once, so the swath size must not be |
4503 | | // greater than the block cache. |
4504 | 0 | const char *pszSwathSize = CPLGetConfigOption("GDAL_SWATH_SIZE", nullptr); |
4505 | 0 | int nTargetSwathSize; |
4506 | 0 | if (pszSwathSize != nullptr) |
4507 | 0 | nTargetSwathSize = static_cast<int>( |
4508 | 0 | std::min(GIntBig(INT_MAX), CPLAtoGIntBig(pszSwathSize))); |
4509 | 0 | else |
4510 | 0 | { |
4511 | | // As a default, take one 1/4 of the cache size. |
4512 | 0 | nTargetSwathSize = static_cast<int>( |
4513 | 0 | std::min(GIntBig(INT_MAX), GDALGetCacheMax64() / 4)); |
4514 | | |
4515 | | // but if the minimum idal swath buf size is less, then go for it to |
4516 | | // avoid unnecessarily abusing RAM usage. |
4517 | | // but try to use 10 MB at least. |
4518 | 0 | GIntBig nIdealSwathBufSize = |
4519 | 0 | static_cast<GIntBig>(nSwathCols) * nSwathLines * nPixelSize; |
4520 | 0 | int nMinTargetSwathSize = 10 * 1000 * 1000; |
4521 | |
|
4522 | 0 | if ((poSrcPrototypeBand->GetSuggestedBlockAccessPattern() & |
4523 | 0 | GSBAP_LARGEST_CHUNK_POSSIBLE) != 0) |
4524 | 0 | { |
4525 | 0 | nMinTargetSwathSize = nTargetSwathSize; |
4526 | 0 | } |
4527 | |
|
4528 | 0 | if (nIdealSwathBufSize < nTargetSwathSize && |
4529 | 0 | nIdealSwathBufSize < nMinTargetSwathSize) |
4530 | 0 | { |
4531 | 0 | nIdealSwathBufSize = nMinTargetSwathSize; |
4532 | 0 | } |
4533 | |
|
4534 | 0 | if (pszSrcCompression != nullptr && |
4535 | 0 | EQUAL(pszSrcCompression, "JPEG2000") && |
4536 | 0 | (!bDstIsCompressed || ((nSrcBlockXSize % nBlockXSize) == 0 && |
4537 | 0 | (nSrcBlockYSize % nBlockYSize) == 0))) |
4538 | 0 | { |
4539 | 0 | nIdealSwathBufSize = |
4540 | 0 | std::max(nIdealSwathBufSize, static_cast<GIntBig>(nSwathCols) * |
4541 | 0 | nSrcBlockYSize * nPixelSize); |
4542 | 0 | } |
4543 | 0 | if (nTargetSwathSize > nIdealSwathBufSize) |
4544 | 0 | nTargetSwathSize = static_cast<int>( |
4545 | 0 | std::min(GIntBig(INT_MAX), nIdealSwathBufSize)); |
4546 | 0 | } |
4547 | |
|
4548 | 0 | if (nTargetSwathSize < 1000000) |
4549 | 0 | nTargetSwathSize = 1000000; |
4550 | | |
4551 | | /* But let's check that */ |
4552 | 0 | if (bDstIsCompressed && bInterleave && |
4553 | 0 | nTargetSwathSize > GDALGetCacheMax64()) |
4554 | 0 | { |
4555 | 0 | CPLError(CE_Warning, CPLE_AppDefined, |
4556 | 0 | "When translating into a compressed interleave format, " |
4557 | 0 | "the block cache size (" CPL_FRMT_GIB ") " |
4558 | 0 | "should be at least the size of the swath (%d) " |
4559 | 0 | "(GDAL_SWATH_SIZE config. option)", |
4560 | 0 | GDALGetCacheMax64(), nTargetSwathSize); |
4561 | 0 | } |
4562 | |
|
4563 | 0 | #define IS_DIVIDER_OF(x, y) ((y) % (x) == 0) |
4564 | 0 | #define ROUND_TO(x, y) (((x) / (y)) * (y)) |
4565 | | |
4566 | | // if both input and output datasets are tiled, that the tile dimensions |
4567 | | // are "compatible", try to stick to a swath dimension that is a multiple |
4568 | | // of input and output block dimensions. |
4569 | 0 | if (nBlockXSize != nXSize && nSrcBlockXSize != nXSize && |
4570 | 0 | IS_DIVIDER_OF(nBlockXSize, nMaxBlockXSize) && |
4571 | 0 | IS_DIVIDER_OF(nSrcBlockXSize, nMaxBlockXSize) && |
4572 | 0 | IS_DIVIDER_OF(nBlockYSize, nMaxBlockYSize) && |
4573 | 0 | IS_DIVIDER_OF(nSrcBlockYSize, nMaxBlockYSize)) |
4574 | 0 | { |
4575 | 0 | if (static_cast<GIntBig>(nMaxBlockXSize) * nMaxBlockYSize * |
4576 | 0 | nPixelSize <= |
4577 | 0 | static_cast<GIntBig>(nTargetSwathSize)) |
4578 | 0 | { |
4579 | 0 | nSwathCols = nTargetSwathSize / (nMaxBlockYSize * nPixelSize); |
4580 | 0 | nSwathCols = ROUND_TO(nSwathCols, nMaxBlockXSize); |
4581 | 0 | if (nSwathCols == 0) |
4582 | 0 | nSwathCols = nMaxBlockXSize; |
4583 | 0 | if (nSwathCols > nXSize) |
4584 | 0 | nSwathCols = nXSize; |
4585 | 0 | nSwathLines = nMaxBlockYSize; |
4586 | |
|
4587 | 0 | if (static_cast<GIntBig>(nSwathCols) * nSwathLines * nPixelSize > |
4588 | 0 | static_cast<GIntBig>(nTargetSwathSize)) |
4589 | 0 | { |
4590 | 0 | nSwathCols = nXSize; |
4591 | 0 | nSwathLines = nBlockYSize; |
4592 | 0 | } |
4593 | 0 | } |
4594 | 0 | } |
4595 | |
|
4596 | 0 | const GIntBig nMemoryPerCol = static_cast<GIntBig>(nSwathCols) * nPixelSize; |
4597 | 0 | const GIntBig nSwathBufSize = nMemoryPerCol * nSwathLines; |
4598 | 0 | if (nSwathBufSize > static_cast<GIntBig>(nTargetSwathSize)) |
4599 | 0 | { |
4600 | 0 | nSwathLines = static_cast<int>(nTargetSwathSize / nMemoryPerCol); |
4601 | 0 | if (nSwathLines == 0) |
4602 | 0 | nSwathLines = 1; |
4603 | |
|
4604 | 0 | CPLDebug( |
4605 | 0 | "GDAL", |
4606 | 0 | "GDALCopyWholeRasterGetSwathSize(): adjusting to %d line swath " |
4607 | 0 | "since requirement (" CPL_FRMT_GIB " bytes) exceed target swath " |
4608 | 0 | "size (%d bytes) (GDAL_SWATH_SIZE config. option)", |
4609 | 0 | nSwathLines, nBlockYSize * nMemoryPerCol, nTargetSwathSize); |
4610 | 0 | } |
4611 | | // If we are processing single scans, try to handle several at once. |
4612 | | // If we are handling swaths already, only grow the swath if a row |
4613 | | // of blocks is substantially less than our target buffer size. |
4614 | 0 | else if (nSwathLines == 1 || |
4615 | 0 | nMemoryPerCol * nSwathLines < |
4616 | 0 | static_cast<GIntBig>(nTargetSwathSize) / 10) |
4617 | 0 | { |
4618 | 0 | nSwathLines = std::min( |
4619 | 0 | nYSize, |
4620 | 0 | std::max(1, static_cast<int>(nTargetSwathSize / nMemoryPerCol))); |
4621 | | |
4622 | | /* If possible try to align to source and target block height */ |
4623 | 0 | if ((nSwathLines % nMaxBlockYSize) != 0 && |
4624 | 0 | nSwathLines > nMaxBlockYSize && |
4625 | 0 | IS_DIVIDER_OF(nBlockYSize, nMaxBlockYSize) && |
4626 | 0 | IS_DIVIDER_OF(nSrcBlockYSize, nMaxBlockYSize)) |
4627 | 0 | nSwathLines = ROUND_TO(nSwathLines, nMaxBlockYSize); |
4628 | 0 | } |
4629 | |
|
4630 | 0 | if (pszSrcCompression != nullptr && EQUAL(pszSrcCompression, "JPEG2000") && |
4631 | 0 | (!bDstIsCompressed || (IS_DIVIDER_OF(nBlockXSize, nSrcBlockXSize) && |
4632 | 0 | IS_DIVIDER_OF(nBlockYSize, nSrcBlockYSize)))) |
4633 | 0 | { |
4634 | | // Typical use case: converting from Pleaiades that is 2048x2048 tiled. |
4635 | 0 | if (nSwathLines < nSrcBlockYSize) |
4636 | 0 | { |
4637 | 0 | nSwathLines = nSrcBlockYSize; |
4638 | | |
4639 | | // Number of pixels that can be read/write simultaneously. |
4640 | 0 | nSwathCols = nTargetSwathSize / (nSrcBlockXSize * nPixelSize); |
4641 | 0 | nSwathCols = ROUND_TO(nSwathCols, nSrcBlockXSize); |
4642 | 0 | if (nSwathCols == 0) |
4643 | 0 | nSwathCols = nSrcBlockXSize; |
4644 | 0 | if (nSwathCols > nXSize) |
4645 | 0 | nSwathCols = nXSize; |
4646 | |
|
4647 | 0 | CPLDebug( |
4648 | 0 | "GDAL", |
4649 | 0 | "GDALCopyWholeRasterGetSwathSize(): because of compression and " |
4650 | 0 | "too high block, " |
4651 | 0 | "use partial width at one time"); |
4652 | 0 | } |
4653 | 0 | else if ((nSwathLines % nSrcBlockYSize) != 0) |
4654 | 0 | { |
4655 | | /* Round on a multiple of nSrcBlockYSize */ |
4656 | 0 | nSwathLines = ROUND_TO(nSwathLines, nSrcBlockYSize); |
4657 | 0 | CPLDebug( |
4658 | 0 | "GDAL", |
4659 | 0 | "GDALCopyWholeRasterGetSwathSize(): because of compression, " |
4660 | 0 | "round nSwathLines to block height : %d", |
4661 | 0 | nSwathLines); |
4662 | 0 | } |
4663 | 0 | } |
4664 | 0 | else if (bDstIsCompressed) |
4665 | 0 | { |
4666 | 0 | if (nSwathLines < nBlockYSize) |
4667 | 0 | { |
4668 | 0 | nSwathLines = nBlockYSize; |
4669 | | |
4670 | | // Number of pixels that can be read/write simultaneously. |
4671 | 0 | nSwathCols = nTargetSwathSize / (nSwathLines * nPixelSize); |
4672 | 0 | nSwathCols = ROUND_TO(nSwathCols, nBlockXSize); |
4673 | 0 | if (nSwathCols == 0) |
4674 | 0 | nSwathCols = nBlockXSize; |
4675 | 0 | if (nSwathCols > nXSize) |
4676 | 0 | nSwathCols = nXSize; |
4677 | |
|
4678 | 0 | CPLDebug( |
4679 | 0 | "GDAL", |
4680 | 0 | "GDALCopyWholeRasterGetSwathSize(): because of compression and " |
4681 | 0 | "too high block, " |
4682 | 0 | "use partial width at one time"); |
4683 | 0 | } |
4684 | 0 | else if ((nSwathLines % nBlockYSize) != 0) |
4685 | 0 | { |
4686 | | // Round on a multiple of nBlockYSize. |
4687 | 0 | nSwathLines = ROUND_TO(nSwathLines, nBlockYSize); |
4688 | 0 | CPLDebug( |
4689 | 0 | "GDAL", |
4690 | 0 | "GDALCopyWholeRasterGetSwathSize(): because of compression, " |
4691 | 0 | "round nSwathLines to block height : %d", |
4692 | 0 | nSwathLines); |
4693 | 0 | } |
4694 | 0 | } |
4695 | |
|
4696 | 0 | *pnSwathCols = nSwathCols; |
4697 | 0 | *pnSwathLines = nSwathLines; |
4698 | 0 | } |
4699 | | |
4700 | | /************************************************************************/ |
4701 | | /* GDALDatasetCopyWholeRaster() */ |
4702 | | /************************************************************************/ |
4703 | | |
4704 | | /** |
4705 | | * \brief Copy all dataset raster data. |
4706 | | * |
4707 | | * This function copies the complete raster contents of one dataset to |
4708 | | * another similarly configured dataset. The source and destination |
4709 | | * dataset must have the same number of bands, and the same width |
4710 | | * and height. The bands do not have to have the same data type. |
4711 | | * |
4712 | | * This function is primarily intended to support implementation of |
4713 | | * driver specific CreateCopy() functions. It implements efficient copying, |
4714 | | * in particular "chunking" the copy in substantial blocks and, if appropriate, |
4715 | | * performing the transfer in a pixel interleaved fashion. |
4716 | | * |
4717 | | * Currently the only papszOptions value supported are : |
4718 | | * <ul> |
4719 | | * <li>"INTERLEAVE=PIXEL/BAND" to force pixel (resp. band) interleaved read and |
4720 | | * write access pattern (this does not modify the layout of the destination |
4721 | | * data)</li> <li>"COMPRESSED=YES" to force alignment on target dataset block |
4722 | | * sizes to achieve best compression.</li> <li>"SKIP_HOLES=YES" to skip chunks |
4723 | | * for which GDALGetDataCoverageStatus() returns GDAL_DATA_COVERAGE_STATUS_EMPTY |
4724 | | * (GDAL >= 2.2)</li> |
4725 | | * </ul> |
4726 | | * More options may be supported in the future. |
4727 | | * |
4728 | | * @param hSrcDS the source dataset |
4729 | | * @param hDstDS the destination dataset |
4730 | | * @param papszOptions transfer hints in "StringList" Name=Value format. |
4731 | | * @param pfnProgress progress reporting function. |
4732 | | * @param pProgressData callback data for progress function. |
4733 | | * |
4734 | | * @return CE_None on success, or CE_Failure on failure. |
4735 | | */ |
4736 | | |
4737 | | CPLErr CPL_STDCALL GDALDatasetCopyWholeRaster(GDALDatasetH hSrcDS, |
4738 | | GDALDatasetH hDstDS, |
4739 | | CSLConstList papszOptions, |
4740 | | GDALProgressFunc pfnProgress, |
4741 | | void *pProgressData) |
4742 | | |
4743 | 0 | { |
4744 | 0 | VALIDATE_POINTER1(hSrcDS, "GDALDatasetCopyWholeRaster", CE_Failure); |
4745 | 0 | VALIDATE_POINTER1(hDstDS, "GDALDatasetCopyWholeRaster", CE_Failure); |
4746 | | |
4747 | 0 | GDALDataset *poSrcDS = GDALDataset::FromHandle(hSrcDS); |
4748 | 0 | GDALDataset *poDstDS = GDALDataset::FromHandle(hDstDS); |
4749 | |
|
4750 | 0 | if (pfnProgress == nullptr) |
4751 | 0 | pfnProgress = GDALDummyProgress; |
4752 | | |
4753 | | /* -------------------------------------------------------------------- */ |
4754 | | /* Confirm the datasets match in size and band counts. */ |
4755 | | /* -------------------------------------------------------------------- */ |
4756 | 0 | const int nXSize = poDstDS->GetRasterXSize(); |
4757 | 0 | const int nYSize = poDstDS->GetRasterYSize(); |
4758 | 0 | const int nBandCount = poDstDS->GetRasterCount(); |
4759 | |
|
4760 | 0 | if (poSrcDS->GetRasterXSize() != nXSize || |
4761 | 0 | poSrcDS->GetRasterYSize() != nYSize || |
4762 | 0 | poSrcDS->GetRasterCount() != nBandCount) |
4763 | 0 | { |
4764 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
4765 | 0 | "Input and output dataset sizes or band counts do not\n" |
4766 | 0 | "match in GDALDatasetCopyWholeRaster()"); |
4767 | 0 | return CE_Failure; |
4768 | 0 | } |
4769 | | |
4770 | | /* -------------------------------------------------------------------- */ |
4771 | | /* Report preliminary (0) progress. */ |
4772 | | /* -------------------------------------------------------------------- */ |
4773 | 0 | if (!pfnProgress(0.0, nullptr, pProgressData)) |
4774 | 0 | { |
4775 | 0 | CPLError(CE_Failure, CPLE_UserInterrupt, |
4776 | 0 | "User terminated CreateCopy()"); |
4777 | 0 | return CE_Failure; |
4778 | 0 | } |
4779 | | |
4780 | | /* -------------------------------------------------------------------- */ |
4781 | | /* Get our prototype band, and assume the others are similarly */ |
4782 | | /* configured. */ |
4783 | | /* -------------------------------------------------------------------- */ |
4784 | 0 | if (nBandCount == 0) |
4785 | 0 | return CE_None; |
4786 | | |
4787 | 0 | GDALRasterBand *poSrcPrototypeBand = poSrcDS->GetRasterBand(1); |
4788 | 0 | GDALRasterBand *poDstPrototypeBand = poDstDS->GetRasterBand(1); |
4789 | 0 | GDALDataType eDT = poDstPrototypeBand->GetRasterDataType(); |
4790 | | |
4791 | | /* -------------------------------------------------------------------- */ |
4792 | | /* Do we want to try and do the operation in a pixel */ |
4793 | | /* interleaved fashion? */ |
4794 | | /* -------------------------------------------------------------------- */ |
4795 | 0 | bool bInterleave = false; |
4796 | 0 | const char *pszInterleave = |
4797 | 0 | poSrcDS->GetMetadataItem("INTERLEAVE", "IMAGE_STRUCTURE"); |
4798 | 0 | if (pszInterleave != nullptr && |
4799 | 0 | (EQUAL(pszInterleave, "PIXEL") || EQUAL(pszInterleave, "LINE"))) |
4800 | 0 | bInterleave = true; |
4801 | |
|
4802 | 0 | pszInterleave = poDstDS->GetMetadataItem("INTERLEAVE", "IMAGE_STRUCTURE"); |
4803 | 0 | if (pszInterleave != nullptr && |
4804 | 0 | (EQUAL(pszInterleave, "PIXEL") || EQUAL(pszInterleave, "LINE"))) |
4805 | 0 | bInterleave = true; |
4806 | |
|
4807 | 0 | pszInterleave = CSLFetchNameValue(papszOptions, "INTERLEAVE"); |
4808 | 0 | if (pszInterleave != nullptr && EQUAL(pszInterleave, "PIXEL")) |
4809 | 0 | bInterleave = true; |
4810 | 0 | else if (pszInterleave != nullptr && EQUAL(pszInterleave, "BAND")) |
4811 | 0 | bInterleave = false; |
4812 | | // attributes is specific to the TileDB driver |
4813 | 0 | else if (pszInterleave != nullptr && EQUAL(pszInterleave, "ATTRIBUTES")) |
4814 | 0 | bInterleave = true; |
4815 | 0 | else if (pszInterleave != nullptr) |
4816 | 0 | { |
4817 | 0 | CPLError(CE_Warning, CPLE_NotSupported, |
4818 | 0 | "Unsupported value for option INTERLEAVE"); |
4819 | 0 | } |
4820 | | |
4821 | | // If the destination is compressed, we must try to write blocks just once, |
4822 | | // to save disk space (GTiff case for example), and to avoid data loss |
4823 | | // (JPEG compression for example). |
4824 | 0 | bool bDstIsCompressed = false; |
4825 | 0 | const char *pszDstCompressed = |
4826 | 0 | CSLFetchNameValue(papszOptions, "COMPRESSED"); |
4827 | 0 | if (pszDstCompressed != nullptr && CPLTestBool(pszDstCompressed)) |
4828 | 0 | bDstIsCompressed = true; |
4829 | | |
4830 | | /* -------------------------------------------------------------------- */ |
4831 | | /* What will our swath size be? */ |
4832 | | /* -------------------------------------------------------------------- */ |
4833 | |
|
4834 | 0 | int nSwathCols = 0; |
4835 | 0 | int nSwathLines = 0; |
4836 | 0 | GDALCopyWholeRasterGetSwathSize(poSrcPrototypeBand, poDstPrototypeBand, |
4837 | 0 | nBandCount, bDstIsCompressed, bInterleave, |
4838 | 0 | &nSwathCols, &nSwathLines); |
4839 | |
|
4840 | 0 | int nPixelSize = GDALGetDataTypeSizeBytes(eDT); |
4841 | 0 | if (bInterleave) |
4842 | 0 | nPixelSize *= nBandCount; |
4843 | |
|
4844 | 0 | void *pSwathBuf = VSI_MALLOC3_VERBOSE(nSwathCols, nSwathLines, nPixelSize); |
4845 | 0 | if (pSwathBuf == nullptr) |
4846 | 0 | { |
4847 | 0 | return CE_Failure; |
4848 | 0 | } |
4849 | | |
4850 | 0 | CPLDebug("GDAL", |
4851 | 0 | "GDALDatasetCopyWholeRaster(): %d*%d swaths, bInterleave=%d", |
4852 | 0 | nSwathCols, nSwathLines, static_cast<int>(bInterleave)); |
4853 | | |
4854 | | // Advise the source raster that we are going to read it completely |
4855 | | // Note: this might already have been done by GDALCreateCopy() in the |
4856 | | // likely case this function is indirectly called by it |
4857 | 0 | poSrcDS->AdviseRead(0, 0, nXSize, nYSize, nXSize, nYSize, eDT, nBandCount, |
4858 | 0 | nullptr, nullptr); |
4859 | | |
4860 | | /* ==================================================================== */ |
4861 | | /* Band oriented (uninterleaved) case. */ |
4862 | | /* ==================================================================== */ |
4863 | 0 | CPLErr eErr = CE_None; |
4864 | 0 | const bool bCheckHoles = |
4865 | 0 | CPLTestBool(CSLFetchNameValueDef(papszOptions, "SKIP_HOLES", "NO")); |
4866 | |
|
4867 | 0 | if (!bInterleave) |
4868 | 0 | { |
4869 | 0 | GDALRasterIOExtraArg sExtraArg; |
4870 | 0 | INIT_RASTERIO_EXTRA_ARG(sExtraArg); |
4871 | 0 | CPL_IGNORE_RET_VAL(sExtraArg.pfnProgress); // to make cppcheck happy |
4872 | |
|
4873 | 0 | const GIntBig nTotalBlocks = static_cast<GIntBig>(nBandCount) * |
4874 | 0 | DIV_ROUND_UP(nYSize, nSwathLines) * |
4875 | 0 | DIV_ROUND_UP(nXSize, nSwathCols); |
4876 | 0 | GIntBig nBlocksDone = 0; |
4877 | |
|
4878 | 0 | for (int iBand = 0; iBand < nBandCount && eErr == CE_None; iBand++) |
4879 | 0 | { |
4880 | 0 | int nBand = iBand + 1; |
4881 | |
|
4882 | 0 | for (int iY = 0; iY < nYSize && eErr == CE_None; iY += nSwathLines) |
4883 | 0 | { |
4884 | 0 | int nThisLines = nSwathLines; |
4885 | |
|
4886 | 0 | if (iY + nThisLines > nYSize) |
4887 | 0 | nThisLines = nYSize - iY; |
4888 | |
|
4889 | 0 | for (int iX = 0; iX < nXSize && eErr == CE_None; |
4890 | 0 | iX += nSwathCols) |
4891 | 0 | { |
4892 | 0 | int nThisCols = nSwathCols; |
4893 | |
|
4894 | 0 | if (iX + nThisCols > nXSize) |
4895 | 0 | nThisCols = nXSize - iX; |
4896 | |
|
4897 | 0 | int nStatus = GDAL_DATA_COVERAGE_STATUS_DATA; |
4898 | 0 | if (bCheckHoles) |
4899 | 0 | { |
4900 | 0 | nStatus = poSrcDS->GetRasterBand(nBand) |
4901 | 0 | ->GetDataCoverageStatus( |
4902 | 0 | iX, iY, nThisCols, nThisLines, |
4903 | 0 | GDAL_DATA_COVERAGE_STATUS_DATA); |
4904 | 0 | } |
4905 | 0 | if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA) |
4906 | 0 | { |
4907 | 0 | sExtraArg.pfnProgress = GDALScaledProgress; |
4908 | 0 | sExtraArg.pProgressData = GDALCreateScaledProgress( |
4909 | 0 | nBlocksDone / static_cast<double>(nTotalBlocks), |
4910 | 0 | (nBlocksDone + 0.5) / |
4911 | 0 | static_cast<double>(nTotalBlocks), |
4912 | 0 | pfnProgress, pProgressData); |
4913 | 0 | if (sExtraArg.pProgressData == nullptr) |
4914 | 0 | sExtraArg.pfnProgress = nullptr; |
4915 | |
|
4916 | 0 | eErr = poSrcDS->RasterIO(GF_Read, iX, iY, nThisCols, |
4917 | 0 | nThisLines, pSwathBuf, |
4918 | 0 | nThisCols, nThisLines, eDT, 1, |
4919 | 0 | &nBand, 0, 0, 0, &sExtraArg); |
4920 | |
|
4921 | 0 | GDALDestroyScaledProgress(sExtraArg.pProgressData); |
4922 | |
|
4923 | 0 | if (eErr == CE_None) |
4924 | 0 | eErr = poDstDS->RasterIO( |
4925 | 0 | GF_Write, iX, iY, nThisCols, nThisLines, |
4926 | 0 | pSwathBuf, nThisCols, nThisLines, eDT, 1, |
4927 | 0 | &nBand, 0, 0, 0, nullptr); |
4928 | 0 | } |
4929 | |
|
4930 | 0 | nBlocksDone++; |
4931 | 0 | if (eErr == CE_None && |
4932 | 0 | !pfnProgress(nBlocksDone / |
4933 | 0 | static_cast<double>(nTotalBlocks), |
4934 | 0 | nullptr, pProgressData)) |
4935 | 0 | { |
4936 | 0 | eErr = CE_Failure; |
4937 | 0 | CPLError(CE_Failure, CPLE_UserInterrupt, |
4938 | 0 | "User terminated CreateCopy()"); |
4939 | 0 | } |
4940 | 0 | } |
4941 | 0 | } |
4942 | 0 | } |
4943 | 0 | } |
4944 | | |
4945 | | /* ==================================================================== */ |
4946 | | /* Pixel interleaved case. */ |
4947 | | /* ==================================================================== */ |
4948 | 0 | else /* if( bInterleave ) */ |
4949 | 0 | { |
4950 | 0 | GDALRasterIOExtraArg sExtraArg; |
4951 | 0 | INIT_RASTERIO_EXTRA_ARG(sExtraArg); |
4952 | 0 | CPL_IGNORE_RET_VAL(sExtraArg.pfnProgress); // to make cppcheck happy |
4953 | |
|
4954 | 0 | const GIntBig nTotalBlocks = |
4955 | 0 | static_cast<GIntBig>(DIV_ROUND_UP(nYSize, nSwathLines)) * |
4956 | 0 | DIV_ROUND_UP(nXSize, nSwathCols); |
4957 | 0 | GIntBig nBlocksDone = 0; |
4958 | |
|
4959 | 0 | for (int iY = 0; iY < nYSize && eErr == CE_None; iY += nSwathLines) |
4960 | 0 | { |
4961 | 0 | int nThisLines = nSwathLines; |
4962 | |
|
4963 | 0 | if (iY + nThisLines > nYSize) |
4964 | 0 | nThisLines = nYSize - iY; |
4965 | |
|
4966 | 0 | for (int iX = 0; iX < nXSize && eErr == CE_None; iX += nSwathCols) |
4967 | 0 | { |
4968 | 0 | int nThisCols = nSwathCols; |
4969 | |
|
4970 | 0 | if (iX + nThisCols > nXSize) |
4971 | 0 | nThisCols = nXSize - iX; |
4972 | |
|
4973 | 0 | int nStatus = GDAL_DATA_COVERAGE_STATUS_DATA; |
4974 | 0 | if (bCheckHoles) |
4975 | 0 | { |
4976 | 0 | nStatus = 0; |
4977 | 0 | for (int iBand = 0; iBand < nBandCount; iBand++) |
4978 | 0 | { |
4979 | 0 | nStatus |= poSrcDS->GetRasterBand(iBand + 1) |
4980 | 0 | ->GetDataCoverageStatus( |
4981 | 0 | iX, iY, nThisCols, nThisLines, |
4982 | 0 | GDAL_DATA_COVERAGE_STATUS_DATA); |
4983 | 0 | if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA) |
4984 | 0 | break; |
4985 | 0 | } |
4986 | 0 | } |
4987 | 0 | if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA) |
4988 | 0 | { |
4989 | 0 | sExtraArg.pfnProgress = GDALScaledProgress; |
4990 | 0 | sExtraArg.pProgressData = GDALCreateScaledProgress( |
4991 | 0 | nBlocksDone / static_cast<double>(nTotalBlocks), |
4992 | 0 | (nBlocksDone + 0.5) / static_cast<double>(nTotalBlocks), |
4993 | 0 | pfnProgress, pProgressData); |
4994 | 0 | if (sExtraArg.pProgressData == nullptr) |
4995 | 0 | sExtraArg.pfnProgress = nullptr; |
4996 | |
|
4997 | 0 | eErr = poSrcDS->RasterIO(GF_Read, iX, iY, nThisCols, |
4998 | 0 | nThisLines, pSwathBuf, nThisCols, |
4999 | 0 | nThisLines, eDT, nBandCount, |
5000 | 0 | nullptr, 0, 0, 0, &sExtraArg); |
5001 | |
|
5002 | 0 | GDALDestroyScaledProgress(sExtraArg.pProgressData); |
5003 | |
|
5004 | 0 | if (eErr == CE_None) |
5005 | 0 | eErr = poDstDS->RasterIO( |
5006 | 0 | GF_Write, iX, iY, nThisCols, nThisLines, pSwathBuf, |
5007 | 0 | nThisCols, nThisLines, eDT, nBandCount, nullptr, 0, |
5008 | 0 | 0, 0, nullptr); |
5009 | 0 | } |
5010 | |
|
5011 | 0 | nBlocksDone++; |
5012 | 0 | if (eErr == CE_None && |
5013 | 0 | !pfnProgress(nBlocksDone / |
5014 | 0 | static_cast<double>(nTotalBlocks), |
5015 | 0 | nullptr, pProgressData)) |
5016 | 0 | { |
5017 | 0 | eErr = CE_Failure; |
5018 | 0 | CPLError(CE_Failure, CPLE_UserInterrupt, |
5019 | 0 | "User terminated CreateCopy()"); |
5020 | 0 | } |
5021 | 0 | } |
5022 | 0 | } |
5023 | 0 | } |
5024 | | |
5025 | | /* -------------------------------------------------------------------- */ |
5026 | | /* Cleanup */ |
5027 | | /* -------------------------------------------------------------------- */ |
5028 | 0 | CPLFree(pSwathBuf); |
5029 | |
|
5030 | 0 | return eErr; |
5031 | 0 | } |
5032 | | |
5033 | | /************************************************************************/ |
5034 | | /* GDALRasterBandCopyWholeRaster() */ |
5035 | | /************************************************************************/ |
5036 | | |
5037 | | /** |
5038 | | * \brief Copy a whole raster band |
5039 | | * |
5040 | | * This function copies the complete raster contents of one band to |
5041 | | * another similarly configured band. The source and destination |
5042 | | * bands must have the same width and height. The bands do not have |
5043 | | * to have the same data type. |
5044 | | * |
5045 | | * It implements efficient copying, in particular "chunking" the copy in |
5046 | | * substantial blocks. |
5047 | | * |
5048 | | * Currently the only papszOptions value supported are : |
5049 | | * <ul> |
5050 | | * <li>"COMPRESSED=YES" to force alignment on target dataset block sizes to |
5051 | | * achieve best compression.</li> |
5052 | | * <li>"SKIP_HOLES=YES" to skip chunks for which GDALGetDataCoverageStatus() |
5053 | | * returns GDAL_DATA_COVERAGE_STATUS_EMPTY (GDAL >= 2.2)</li> |
5054 | | * </ul> |
5055 | | * |
5056 | | * @param hSrcBand the source band |
5057 | | * @param hDstBand the destination band |
5058 | | * @param papszOptions transfer hints in "StringList" Name=Value format. |
5059 | | * @param pfnProgress progress reporting function. |
5060 | | * @param pProgressData callback data for progress function. |
5061 | | * |
5062 | | * @return CE_None on success, or CE_Failure on failure. |
5063 | | */ |
5064 | | |
5065 | | CPLErr CPL_STDCALL GDALRasterBandCopyWholeRaster( |
5066 | | GDALRasterBandH hSrcBand, GDALRasterBandH hDstBand, |
5067 | | const char *const *const papszOptions, GDALProgressFunc pfnProgress, |
5068 | | void *pProgressData) |
5069 | | |
5070 | 0 | { |
5071 | 0 | VALIDATE_POINTER1(hSrcBand, "GDALRasterBandCopyWholeRaster", CE_Failure); |
5072 | 0 | VALIDATE_POINTER1(hDstBand, "GDALRasterBandCopyWholeRaster", CE_Failure); |
5073 | | |
5074 | 0 | GDALRasterBand *poSrcBand = GDALRasterBand::FromHandle(hSrcBand); |
5075 | 0 | GDALRasterBand *poDstBand = GDALRasterBand::FromHandle(hDstBand); |
5076 | 0 | CPLErr eErr = CE_None; |
5077 | |
|
5078 | 0 | if (pfnProgress == nullptr) |
5079 | 0 | pfnProgress = GDALDummyProgress; |
5080 | | |
5081 | | /* -------------------------------------------------------------------- */ |
5082 | | /* Confirm the datasets match in size and band counts. */ |
5083 | | /* -------------------------------------------------------------------- */ |
5084 | 0 | int nXSize = poSrcBand->GetXSize(); |
5085 | 0 | int nYSize = poSrcBand->GetYSize(); |
5086 | |
|
5087 | 0 | if (poDstBand->GetXSize() != nXSize || poDstBand->GetYSize() != nYSize) |
5088 | 0 | { |
5089 | 0 | CPLError(CE_Failure, CPLE_AppDefined, |
5090 | 0 | "Input and output band sizes do not\n" |
5091 | 0 | "match in GDALRasterBandCopyWholeRaster()"); |
5092 | 0 | return CE_Failure; |
5093 | 0 | } |
5094 | | |
5095 | | /* -------------------------------------------------------------------- */ |
5096 | | /* Report preliminary (0) progress. */ |
5097 | | /* -------------------------------------------------------------------- */ |
5098 | 0 | if (!pfnProgress(0.0, nullptr, pProgressData)) |
5099 | 0 | { |
5100 | 0 | CPLError(CE_Failure, CPLE_UserInterrupt, |
5101 | 0 | "User terminated CreateCopy()"); |
5102 | 0 | return CE_Failure; |
5103 | 0 | } |
5104 | | |
5105 | 0 | GDALDataType eDT = poDstBand->GetRasterDataType(); |
5106 | | |
5107 | | // If the destination is compressed, we must try to write blocks just once, |
5108 | | // to save disk space (GTiff case for example), and to avoid data loss |
5109 | | // (JPEG compression for example). |
5110 | 0 | bool bDstIsCompressed = false; |
5111 | 0 | const char *pszDstCompressed = |
5112 | 0 | CSLFetchNameValue(const_cast<char **>(papszOptions), "COMPRESSED"); |
5113 | 0 | if (pszDstCompressed != nullptr && CPLTestBool(pszDstCompressed)) |
5114 | 0 | bDstIsCompressed = true; |
5115 | | |
5116 | | /* -------------------------------------------------------------------- */ |
5117 | | /* What will our swath size be? */ |
5118 | | /* -------------------------------------------------------------------- */ |
5119 | |
|
5120 | 0 | int nSwathCols = 0; |
5121 | 0 | int nSwathLines = 0; |
5122 | 0 | GDALCopyWholeRasterGetSwathSize(poSrcBand, poDstBand, 1, bDstIsCompressed, |
5123 | 0 | FALSE, &nSwathCols, &nSwathLines); |
5124 | |
|
5125 | 0 | const int nPixelSize = GDALGetDataTypeSizeBytes(eDT); |
5126 | |
|
5127 | 0 | void *pSwathBuf = VSI_MALLOC3_VERBOSE(nSwathCols, nSwathLines, nPixelSize); |
5128 | 0 | if (pSwathBuf == nullptr) |
5129 | 0 | { |
5130 | 0 | return CE_Failure; |
5131 | 0 | } |
5132 | | |
5133 | 0 | CPLDebug("GDAL", "GDALRasterBandCopyWholeRaster(): %d*%d swaths", |
5134 | 0 | nSwathCols, nSwathLines); |
5135 | |
|
5136 | 0 | const bool bCheckHoles = |
5137 | 0 | CPLTestBool(CSLFetchNameValueDef(papszOptions, "SKIP_HOLES", "NO")); |
5138 | | |
5139 | | // Advise the source raster that we are going to read it completely |
5140 | 0 | poSrcBand->AdviseRead(0, 0, nXSize, nYSize, nXSize, nYSize, eDT, nullptr); |
5141 | | |
5142 | | /* ==================================================================== */ |
5143 | | /* Band oriented (uninterleaved) case. */ |
5144 | | /* ==================================================================== */ |
5145 | |
|
5146 | 0 | for (int iY = 0; iY < nYSize && eErr == CE_None; iY += nSwathLines) |
5147 | 0 | { |
5148 | 0 | int nThisLines = nSwathLines; |
5149 | |
|
5150 | 0 | if (iY + nThisLines > nYSize) |
5151 | 0 | nThisLines = nYSize - iY; |
5152 | |
|
5153 | 0 | for (int iX = 0; iX < nXSize && eErr == CE_None; iX += nSwathCols) |
5154 | 0 | { |
5155 | 0 | int nThisCols = nSwathCols; |
5156 | |
|
5157 | 0 | if (iX + nThisCols > nXSize) |
5158 | 0 | nThisCols = nXSize - iX; |
5159 | |
|
5160 | 0 | int nStatus = GDAL_DATA_COVERAGE_STATUS_DATA; |
5161 | 0 | if (bCheckHoles) |
5162 | 0 | { |
5163 | 0 | nStatus = poSrcBand->GetDataCoverageStatus( |
5164 | 0 | iX, iY, nThisCols, nThisLines, |
5165 | 0 | GDAL_DATA_COVERAGE_STATUS_DATA); |
5166 | 0 | } |
5167 | 0 | if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA) |
5168 | 0 | { |
5169 | 0 | eErr = poSrcBand->RasterIO(GF_Read, iX, iY, nThisCols, |
5170 | 0 | nThisLines, pSwathBuf, nThisCols, |
5171 | 0 | nThisLines, eDT, 0, 0, nullptr); |
5172 | |
|
5173 | 0 | if (eErr == CE_None) |
5174 | 0 | eErr = poDstBand->RasterIO(GF_Write, iX, iY, nThisCols, |
5175 | 0 | nThisLines, pSwathBuf, nThisCols, |
5176 | 0 | nThisLines, eDT, 0, 0, nullptr); |
5177 | 0 | } |
5178 | |
|
5179 | 0 | if (eErr == CE_None && |
5180 | 0 | !pfnProgress((iY + nThisLines) / static_cast<float>(nYSize), |
5181 | 0 | nullptr, pProgressData)) |
5182 | 0 | { |
5183 | 0 | eErr = CE_Failure; |
5184 | 0 | CPLError(CE_Failure, CPLE_UserInterrupt, |
5185 | 0 | "User terminated CreateCopy()"); |
5186 | 0 | } |
5187 | 0 | } |
5188 | 0 | } |
5189 | | |
5190 | | /* -------------------------------------------------------------------- */ |
5191 | | /* Cleanup */ |
5192 | | /* -------------------------------------------------------------------- */ |
5193 | 0 | CPLFree(pSwathBuf); |
5194 | |
|
5195 | 0 | return eErr; |
5196 | 0 | } |
5197 | | |
5198 | | /************************************************************************/ |
5199 | | /* GDALCopyRasterIOExtraArg () */ |
5200 | | /************************************************************************/ |
5201 | | |
5202 | | void GDALCopyRasterIOExtraArg(GDALRasterIOExtraArg *psDestArg, |
5203 | | GDALRasterIOExtraArg *psSrcArg) |
5204 | 0 | { |
5205 | 0 | INIT_RASTERIO_EXTRA_ARG(*psDestArg); |
5206 | 0 | if (psSrcArg) |
5207 | 0 | { |
5208 | 0 | psDestArg->eResampleAlg = psSrcArg->eResampleAlg; |
5209 | 0 | psDestArg->pfnProgress = psSrcArg->pfnProgress; |
5210 | 0 | psDestArg->pProgressData = psSrcArg->pProgressData; |
5211 | 0 | psDestArg->bFloatingPointWindowValidity = |
5212 | 0 | psSrcArg->bFloatingPointWindowValidity; |
5213 | 0 | if (psSrcArg->bFloatingPointWindowValidity) |
5214 | 0 | { |
5215 | 0 | psDestArg->dfXOff = psSrcArg->dfXOff; |
5216 | 0 | psDestArg->dfYOff = psSrcArg->dfYOff; |
5217 | 0 | psDestArg->dfXSize = psSrcArg->dfXSize; |
5218 | 0 | psDestArg->dfYSize = psSrcArg->dfYSize; |
5219 | 0 | } |
5220 | 0 | if (psSrcArg->nVersion >= 2) |
5221 | 0 | { |
5222 | 0 | psDestArg->bUseOnlyThisScale = psSrcArg->bUseOnlyThisScale; |
5223 | 0 | } |
5224 | 0 | } |
5225 | 0 | } |
5226 | | |
5227 | | /************************************************************************/ |
5228 | | /* HasOnlyNoData() */ |
5229 | | /************************************************************************/ |
5230 | | |
5231 | | template <class T> static inline bool IsEqualToNoData(T value, T noDataValue) |
5232 | 0 | { |
5233 | 0 | return value == noDataValue; |
5234 | 0 | } Unexecuted instantiation: rasterio.cpp:bool IsEqualToNoData<unsigned char>(unsigned char, unsigned char) Unexecuted instantiation: rasterio.cpp:bool IsEqualToNoData<unsigned short>(unsigned short, unsigned short) Unexecuted instantiation: rasterio.cpp:bool IsEqualToNoData<unsigned int>(unsigned int, unsigned int) Unexecuted instantiation: rasterio.cpp:bool IsEqualToNoData<unsigned long>(unsigned long, unsigned long) |
5235 | | |
5236 | | template <> bool IsEqualToNoData<GFloat16>(GFloat16 value, GFloat16 noDataValue) |
5237 | 0 | { |
5238 | 0 | using std::isnan; |
5239 | 0 | return isnan(noDataValue) ? isnan(value) : value == noDataValue; |
5240 | 0 | } |
5241 | | |
5242 | | template <> bool IsEqualToNoData<float>(float value, float noDataValue) |
5243 | 0 | { |
5244 | 0 | return std::isnan(noDataValue) ? std::isnan(value) : value == noDataValue; |
5245 | 0 | } |
5246 | | |
5247 | | template <> bool IsEqualToNoData<double>(double value, double noDataValue) |
5248 | 0 | { |
5249 | 0 | return std::isnan(noDataValue) ? std::isnan(value) : value == noDataValue; |
5250 | 0 | } |
5251 | | |
5252 | | template <class T> |
5253 | | static bool HasOnlyNoDataT(const T *pBuffer, T noDataValue, size_t nWidth, |
5254 | | size_t nHeight, size_t nLineStride, |
5255 | | size_t nComponents) |
5256 | 0 | { |
5257 | | // Fast test: check the 4 corners and the middle pixel. |
5258 | 0 | for (size_t iBand = 0; iBand < nComponents; iBand++) |
5259 | 0 | { |
5260 | 0 | if (!(IsEqualToNoData(pBuffer[iBand], noDataValue) && |
5261 | 0 | IsEqualToNoData(pBuffer[(nWidth - 1) * nComponents + iBand], |
5262 | 0 | noDataValue) && |
5263 | 0 | IsEqualToNoData( |
5264 | 0 | pBuffer[((nHeight - 1) / 2 * nLineStride + (nWidth - 1) / 2) * |
5265 | 0 | nComponents + |
5266 | 0 | iBand], |
5267 | 0 | noDataValue) && |
5268 | 0 | IsEqualToNoData( |
5269 | 0 | pBuffer[(nHeight - 1) * nLineStride * nComponents + iBand], |
5270 | 0 | noDataValue) && |
5271 | 0 | IsEqualToNoData( |
5272 | 0 | pBuffer[((nHeight - 1) * nLineStride + nWidth - 1) * |
5273 | 0 | nComponents + |
5274 | 0 | iBand], |
5275 | 0 | noDataValue))) |
5276 | 0 | { |
5277 | 0 | return false; |
5278 | 0 | } |
5279 | 0 | } |
5280 | | |
5281 | | // Test all pixels. |
5282 | 0 | for (size_t iY = 0; iY < nHeight; iY++) |
5283 | 0 | { |
5284 | 0 | const T *pBufferLine = pBuffer + iY * nLineStride * nComponents; |
5285 | 0 | for (size_t iX = 0; iX < nWidth * nComponents; iX++) |
5286 | 0 | { |
5287 | 0 | if (!IsEqualToNoData(pBufferLine[iX], noDataValue)) |
5288 | 0 | { |
5289 | 0 | return false; |
5290 | 0 | } |
5291 | 0 | } |
5292 | 0 | } |
5293 | 0 | return true; |
5294 | 0 | } Unexecuted instantiation: rasterio.cpp:bool HasOnlyNoDataT<unsigned char>(unsigned char const*, unsigned char, unsigned long, unsigned long, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:bool HasOnlyNoDataT<unsigned short>(unsigned short const*, unsigned short, unsigned long, unsigned long, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:bool HasOnlyNoDataT<unsigned int>(unsigned int const*, unsigned int, unsigned long, unsigned long, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:bool HasOnlyNoDataT<unsigned long>(unsigned long const*, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:bool HasOnlyNoDataT<cpl::Float16>(cpl::Float16 const*, cpl::Float16, unsigned long, unsigned long, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:bool HasOnlyNoDataT<float>(float const*, float, unsigned long, unsigned long, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:bool HasOnlyNoDataT<double>(double const*, double, unsigned long, unsigned long, unsigned long, unsigned long) |
5295 | | |
5296 | | /************************************************************************/ |
5297 | | /* GDALBufferHasOnlyNoData() */ |
5298 | | /************************************************************************/ |
5299 | | |
5300 | | bool GDALBufferHasOnlyNoData(const void *pBuffer, double dfNoDataValue, |
5301 | | size_t nWidth, size_t nHeight, size_t nLineStride, |
5302 | | size_t nComponents, int nBitsPerSample, |
5303 | | GDALBufferSampleFormat nSampleFormat) |
5304 | 0 | { |
5305 | | // In the case where the nodata is 0, we can compare several bytes at |
5306 | | // once. Select the largest natural integer type for the architecture. |
5307 | 0 | #if SIZEOF_VOIDP >= 8 || defined(__x86_64__) |
5308 | | // We test __x86_64__ for x32 arch where SIZEOF_VOIDP == 4 |
5309 | 0 | typedef std::uint64_t WordType; |
5310 | | #else |
5311 | | typedef std::uint32_t WordType; |
5312 | | #endif |
5313 | 0 | if (dfNoDataValue == 0.0 && nWidth == nLineStride && |
5314 | | // Do not use this optimized code path for floating point numbers, |
5315 | | // as it can't detect negative zero. |
5316 | 0 | nSampleFormat != GSF_FLOATING_POINT) |
5317 | 0 | { |
5318 | 0 | const GByte *pabyBuffer = static_cast<const GByte *>(pBuffer); |
5319 | 0 | const size_t nSize = |
5320 | 0 | (nWidth * nHeight * nComponents * nBitsPerSample + 7) / 8; |
5321 | 0 | size_t i = 0; |
5322 | 0 | const size_t nInitialIters = |
5323 | 0 | std::min(sizeof(WordType) - |
5324 | 0 | static_cast<size_t>( |
5325 | 0 | reinterpret_cast<std::uintptr_t>(pabyBuffer) % |
5326 | 0 | sizeof(WordType)), |
5327 | 0 | nSize); |
5328 | 0 | for (; i < nInitialIters; i++) |
5329 | 0 | { |
5330 | 0 | if (pabyBuffer[i]) |
5331 | 0 | return false; |
5332 | 0 | } |
5333 | 0 | for (; i + sizeof(WordType) - 1 < nSize; i += sizeof(WordType)) |
5334 | 0 | { |
5335 | 0 | if (*(reinterpret_cast<const WordType *>(pabyBuffer + i))) |
5336 | 0 | return false; |
5337 | 0 | } |
5338 | 0 | for (; i < nSize; i++) |
5339 | 0 | { |
5340 | 0 | if (pabyBuffer[i]) |
5341 | 0 | return false; |
5342 | 0 | } |
5343 | 0 | return true; |
5344 | 0 | } |
5345 | | |
5346 | 0 | if (nBitsPerSample == 8 && nSampleFormat == GSF_UNSIGNED_INT) |
5347 | 0 | { |
5348 | 0 | return GDALIsValueInRange<uint8_t>(dfNoDataValue) && |
5349 | 0 | HasOnlyNoDataT(static_cast<const uint8_t *>(pBuffer), |
5350 | 0 | static_cast<uint8_t>(dfNoDataValue), nWidth, |
5351 | 0 | nHeight, nLineStride, nComponents); |
5352 | 0 | } |
5353 | 0 | if (nBitsPerSample == 8 && nSampleFormat == GSF_SIGNED_INT) |
5354 | 0 | { |
5355 | | // Use unsigned implementation by converting the nodatavalue to |
5356 | | // unsigned |
5357 | 0 | return GDALIsValueInRange<int8_t>(dfNoDataValue) && |
5358 | 0 | HasOnlyNoDataT( |
5359 | 0 | static_cast<const uint8_t *>(pBuffer), |
5360 | 0 | static_cast<uint8_t>(static_cast<int8_t>(dfNoDataValue)), |
5361 | 0 | nWidth, nHeight, nLineStride, nComponents); |
5362 | 0 | } |
5363 | 0 | if (nBitsPerSample == 16 && nSampleFormat == GSF_UNSIGNED_INT) |
5364 | 0 | { |
5365 | 0 | return GDALIsValueInRange<uint16_t>(dfNoDataValue) && |
5366 | 0 | HasOnlyNoDataT(static_cast<const uint16_t *>(pBuffer), |
5367 | 0 | static_cast<uint16_t>(dfNoDataValue), nWidth, |
5368 | 0 | nHeight, nLineStride, nComponents); |
5369 | 0 | } |
5370 | 0 | if (nBitsPerSample == 16 && nSampleFormat == GSF_SIGNED_INT) |
5371 | 0 | { |
5372 | | // Use unsigned implementation by converting the nodatavalue to |
5373 | | // unsigned |
5374 | 0 | return GDALIsValueInRange<int16_t>(dfNoDataValue) && |
5375 | 0 | HasOnlyNoDataT( |
5376 | 0 | static_cast<const uint16_t *>(pBuffer), |
5377 | 0 | static_cast<uint16_t>(static_cast<int16_t>(dfNoDataValue)), |
5378 | 0 | nWidth, nHeight, nLineStride, nComponents); |
5379 | 0 | } |
5380 | 0 | if (nBitsPerSample == 32 && nSampleFormat == GSF_UNSIGNED_INT) |
5381 | 0 | { |
5382 | 0 | return GDALIsValueInRange<uint32_t>(dfNoDataValue) && |
5383 | 0 | HasOnlyNoDataT(static_cast<const uint32_t *>(pBuffer), |
5384 | 0 | static_cast<uint32_t>(dfNoDataValue), nWidth, |
5385 | 0 | nHeight, nLineStride, nComponents); |
5386 | 0 | } |
5387 | 0 | if (nBitsPerSample == 32 && nSampleFormat == GSF_SIGNED_INT) |
5388 | 0 | { |
5389 | | // Use unsigned implementation by converting the nodatavalue to |
5390 | | // unsigned |
5391 | 0 | return GDALIsValueInRange<int32_t>(dfNoDataValue) && |
5392 | 0 | HasOnlyNoDataT( |
5393 | 0 | static_cast<const uint32_t *>(pBuffer), |
5394 | 0 | static_cast<uint32_t>(static_cast<int32_t>(dfNoDataValue)), |
5395 | 0 | nWidth, nHeight, nLineStride, nComponents); |
5396 | 0 | } |
5397 | 0 | if (nBitsPerSample == 64 && nSampleFormat == GSF_UNSIGNED_INT) |
5398 | 0 | { |
5399 | 0 | return GDALIsValueInRange<uint64_t>(dfNoDataValue) && |
5400 | 0 | HasOnlyNoDataT(static_cast<const uint64_t *>(pBuffer), |
5401 | 0 | static_cast<uint64_t>(dfNoDataValue), nWidth, |
5402 | 0 | nHeight, nLineStride, nComponents); |
5403 | 0 | } |
5404 | 0 | if (nBitsPerSample == 64 && nSampleFormat == GSF_SIGNED_INT) |
5405 | 0 | { |
5406 | | // Use unsigned implementation by converting the nodatavalue to |
5407 | | // unsigned |
5408 | 0 | return GDALIsValueInRange<int64_t>(dfNoDataValue) && |
5409 | 0 | HasOnlyNoDataT( |
5410 | 0 | static_cast<const uint64_t *>(pBuffer), |
5411 | 0 | static_cast<uint64_t>(static_cast<int64_t>(dfNoDataValue)), |
5412 | 0 | nWidth, nHeight, nLineStride, nComponents); |
5413 | 0 | } |
5414 | 0 | if (nBitsPerSample == 16 && nSampleFormat == GSF_FLOATING_POINT) |
5415 | 0 | { |
5416 | 0 | return (std::isnan(dfNoDataValue) || |
5417 | 0 | GDALIsValueInRange<GFloat16>(dfNoDataValue)) && |
5418 | 0 | HasOnlyNoDataT(static_cast<const GFloat16 *>(pBuffer), |
5419 | 0 | static_cast<GFloat16>(dfNoDataValue), nWidth, |
5420 | 0 | nHeight, nLineStride, nComponents); |
5421 | 0 | } |
5422 | 0 | if (nBitsPerSample == 32 && nSampleFormat == GSF_FLOATING_POINT) |
5423 | 0 | { |
5424 | 0 | return (std::isnan(dfNoDataValue) || |
5425 | 0 | GDALIsValueInRange<float>(dfNoDataValue)) && |
5426 | 0 | HasOnlyNoDataT(static_cast<const float *>(pBuffer), |
5427 | 0 | static_cast<float>(dfNoDataValue), nWidth, |
5428 | 0 | nHeight, nLineStride, nComponents); |
5429 | 0 | } |
5430 | 0 | if (nBitsPerSample == 64 && nSampleFormat == GSF_FLOATING_POINT) |
5431 | 0 | { |
5432 | 0 | return HasOnlyNoDataT(static_cast<const double *>(pBuffer), |
5433 | 0 | dfNoDataValue, nWidth, nHeight, nLineStride, |
5434 | 0 | nComponents); |
5435 | 0 | } |
5436 | 0 | return false; |
5437 | 0 | } |
5438 | | |
5439 | | #ifdef HAVE_SSE2 |
5440 | | |
5441 | | /************************************************************************/ |
5442 | | /* GDALDeinterleave3Byte() */ |
5443 | | /************************************************************************/ |
5444 | | |
5445 | | #if defined(__GNUC__) && !defined(__clang__) |
5446 | | __attribute__((optimize("no-tree-vectorize"))) |
5447 | | #endif |
5448 | | static void |
5449 | | GDALDeinterleave3Byte(const GByte *CPL_RESTRICT pabySrc, |
5450 | | GByte *CPL_RESTRICT pabyDest0, |
5451 | | GByte *CPL_RESTRICT pabyDest1, |
5452 | | GByte *CPL_RESTRICT pabyDest2, size_t nIters) |
5453 | | #ifdef USE_NEON_OPTIMIZATIONS |
5454 | | { |
5455 | | return GDALDeinterleave3Byte_SSSE3(pabySrc, pabyDest0, pabyDest1, pabyDest2, |
5456 | | nIters); |
5457 | | } |
5458 | | #else |
5459 | 0 | { |
5460 | 0 | #ifdef HAVE_SSSE3_AT_COMPILE_TIME |
5461 | 0 | if (CPLHaveRuntimeSSSE3()) |
5462 | 0 | { |
5463 | 0 | return GDALDeinterleave3Byte_SSSE3(pabySrc, pabyDest0, pabyDest1, |
5464 | 0 | pabyDest2, nIters); |
5465 | 0 | } |
5466 | 0 | #endif |
5467 | | |
5468 | 0 | size_t i = 0; |
5469 | 0 | if (((reinterpret_cast<uintptr_t>(pabySrc) | |
5470 | 0 | reinterpret_cast<uintptr_t>(pabyDest0) | |
5471 | 0 | reinterpret_cast<uintptr_t>(pabyDest1) | |
5472 | 0 | reinterpret_cast<uintptr_t>(pabyDest2)) % |
5473 | 0 | sizeof(unsigned int)) == 0) |
5474 | 0 | { |
5475 | | // Slightly better than GCC autovectorizer |
5476 | 0 | for (size_t j = 0; i + 3 < nIters; i += 4, ++j) |
5477 | 0 | { |
5478 | 0 | unsigned int word0 = |
5479 | 0 | *reinterpret_cast<const unsigned int *>(pabySrc + 3 * i); |
5480 | 0 | unsigned int word1 = |
5481 | 0 | *reinterpret_cast<const unsigned int *>(pabySrc + 3 * i + 4); |
5482 | 0 | unsigned int word2 = |
5483 | 0 | *reinterpret_cast<const unsigned int *>(pabySrc + 3 * i + 8); |
5484 | 0 | reinterpret_cast<unsigned int *>(pabyDest0)[j] = |
5485 | 0 | (word0 & 0xff) | ((word0 >> 24) << 8) | (word1 & 0x00ff0000) | |
5486 | 0 | ((word2 >> 8) << 24); |
5487 | 0 | reinterpret_cast<unsigned int *>(pabyDest1)[j] = |
5488 | 0 | ((word0 >> 8) & 0xff) | ((word1 & 0xff) << 8) | |
5489 | 0 | (((word1 >> 24)) << 16) | ((word2 >> 16) << 24); |
5490 | 0 | pabyDest2[j * 4] = static_cast<GByte>(word0 >> 16); |
5491 | 0 | pabyDest2[j * 4 + 1] = static_cast<GByte>(word1 >> 8); |
5492 | 0 | pabyDest2[j * 4 + 2] = static_cast<GByte>(word2); |
5493 | 0 | pabyDest2[j * 4 + 3] = static_cast<GByte>(word2 >> 24); |
5494 | 0 | } |
5495 | 0 | } |
5496 | 0 | #if defined(__clang__) |
5497 | 0 | #pragma clang loop vectorize(disable) |
5498 | 0 | #endif |
5499 | 0 | for (; i < nIters; ++i) |
5500 | 0 | { |
5501 | 0 | pabyDest0[i] = pabySrc[3 * i + 0]; |
5502 | 0 | pabyDest1[i] = pabySrc[3 * i + 1]; |
5503 | 0 | pabyDest2[i] = pabySrc[3 * i + 2]; |
5504 | 0 | } |
5505 | 0 | } |
5506 | | #endif |
5507 | | |
5508 | | /************************************************************************/ |
5509 | | /* GDALDeinterleave4Byte() */ |
5510 | | /************************************************************************/ |
5511 | | |
5512 | | #if !defined(__GNUC__) || defined(__clang__) |
5513 | | |
5514 | | /************************************************************************/ |
5515 | | /* deinterleave() */ |
5516 | | /************************************************************************/ |
5517 | | |
5518 | | template <bool SHIFT, bool MASK> |
5519 | | inline __m128i deinterleave(__m128i &xmm0_ori, __m128i &xmm1_ori, |
5520 | | __m128i &xmm2_ori, __m128i &xmm3_ori) |
5521 | 0 | { |
5522 | | // Set higher 24bit of each int32 packed word to 0 |
5523 | 0 | if (SHIFT) |
5524 | 0 | { |
5525 | 0 | xmm0_ori = _mm_srli_epi32(xmm0_ori, 8); |
5526 | 0 | xmm1_ori = _mm_srli_epi32(xmm1_ori, 8); |
5527 | 0 | xmm2_ori = _mm_srli_epi32(xmm2_ori, 8); |
5528 | 0 | xmm3_ori = _mm_srli_epi32(xmm3_ori, 8); |
5529 | 0 | } |
5530 | 0 | __m128i xmm0; |
5531 | 0 | __m128i xmm1; |
5532 | 0 | __m128i xmm2; |
5533 | 0 | __m128i xmm3; |
5534 | 0 | if (MASK) |
5535 | 0 | { |
5536 | 0 | const __m128i xmm_mask = _mm_set1_epi32(0xff); |
5537 | 0 | xmm0 = _mm_and_si128(xmm0_ori, xmm_mask); |
5538 | 0 | xmm1 = _mm_and_si128(xmm1_ori, xmm_mask); |
5539 | 0 | xmm2 = _mm_and_si128(xmm2_ori, xmm_mask); |
5540 | 0 | xmm3 = _mm_and_si128(xmm3_ori, xmm_mask); |
5541 | 0 | } |
5542 | 0 | else |
5543 | 0 | { |
5544 | 0 | xmm0 = xmm0_ori; |
5545 | 0 | xmm1 = xmm1_ori; |
5546 | 0 | xmm2 = xmm2_ori; |
5547 | 0 | xmm3 = xmm3_ori; |
5548 | 0 | } |
5549 | | // Pack int32 to int16 |
5550 | 0 | xmm0 = _mm_packs_epi32(xmm0, xmm1); |
5551 | 0 | xmm2 = _mm_packs_epi32(xmm2, xmm3); |
5552 | | // Pack int16 to uint8 |
5553 | 0 | xmm0 = _mm_packus_epi16(xmm0, xmm2); |
5554 | 0 | return xmm0; |
5555 | 0 | } Unexecuted instantiation: long long __vector(2) deinterleave<false, true>(long long __vector(2)&, long long __vector(2)&, long long __vector(2)&, long long __vector(2)&) Unexecuted instantiation: long long __vector(2) deinterleave<true, true>(long long __vector(2)&, long long __vector(2)&, long long __vector(2)&, long long __vector(2)&) Unexecuted instantiation: long long __vector(2) deinterleave<true, false>(long long __vector(2)&, long long __vector(2)&, long long __vector(2)&, long long __vector(2)&) |
5556 | | |
5557 | | static void GDALDeinterleave4Byte(const GByte *CPL_RESTRICT pabySrc, |
5558 | | GByte *CPL_RESTRICT pabyDest0, |
5559 | | GByte *CPL_RESTRICT pabyDest1, |
5560 | | GByte *CPL_RESTRICT pabyDest2, |
5561 | | GByte *CPL_RESTRICT pabyDest3, size_t nIters) |
5562 | | #ifdef USE_NEON_OPTIMIZATIONS |
5563 | | { |
5564 | | return GDALDeinterleave4Byte_SSSE3(pabySrc, pabyDest0, pabyDest1, pabyDest2, |
5565 | | pabyDest3, nIters); |
5566 | | } |
5567 | | #else |
5568 | 0 | { |
5569 | 0 | #ifdef HAVE_SSSE3_AT_COMPILE_TIME |
5570 | 0 | if (CPLHaveRuntimeSSSE3()) |
5571 | 0 | { |
5572 | 0 | return GDALDeinterleave4Byte_SSSE3(pabySrc, pabyDest0, pabyDest1, |
5573 | 0 | pabyDest2, pabyDest3, nIters); |
5574 | 0 | } |
5575 | 0 | #endif |
5576 | | |
5577 | | // Not the optimal SSE2-only code, as gcc auto-vectorizer manages to |
5578 | | // do something slightly better. |
5579 | 0 | size_t i = 0; |
5580 | 0 | for (; i + 15 < nIters; i += 16) |
5581 | 0 | { |
5582 | 0 | __m128i xmm0_ori = _mm_loadu_si128( |
5583 | 0 | reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 0)); |
5584 | 0 | __m128i xmm1_ori = _mm_loadu_si128( |
5585 | 0 | reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 16)); |
5586 | 0 | __m128i xmm2_ori = _mm_loadu_si128( |
5587 | 0 | reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 32)); |
5588 | 0 | __m128i xmm3_ori = _mm_loadu_si128( |
5589 | 0 | reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 48)); |
5590 | |
|
5591 | 0 | _mm_storeu_si128( |
5592 | 0 | reinterpret_cast<__m128i *>(pabyDest0 + i), |
5593 | 0 | deinterleave<false, true>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori)); |
5594 | 0 | _mm_storeu_si128( |
5595 | 0 | reinterpret_cast<__m128i *>(pabyDest1 + i), |
5596 | 0 | deinterleave<true, true>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori)); |
5597 | 0 | _mm_storeu_si128( |
5598 | 0 | reinterpret_cast<__m128i *>(pabyDest2 + i), |
5599 | 0 | deinterleave<true, true>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori)); |
5600 | 0 | _mm_storeu_si128( |
5601 | 0 | reinterpret_cast<__m128i *>(pabyDest3 + i), |
5602 | 0 | deinterleave<true, false>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori)); |
5603 | 0 | } |
5604 | |
|
5605 | 0 | #if defined(__clang__) |
5606 | 0 | #pragma clang loop vectorize(disable) |
5607 | 0 | #endif |
5608 | 0 | for (; i < nIters; ++i) |
5609 | 0 | { |
5610 | 0 | pabyDest0[i] = pabySrc[4 * i + 0]; |
5611 | 0 | pabyDest1[i] = pabySrc[4 * i + 1]; |
5612 | 0 | pabyDest2[i] = pabySrc[4 * i + 2]; |
5613 | 0 | pabyDest3[i] = pabySrc[4 * i + 3]; |
5614 | 0 | } |
5615 | 0 | } |
5616 | | #endif |
5617 | | #else |
5618 | | // GCC autovectorizer does an excellent job |
5619 | | __attribute__((optimize("tree-vectorize"))) static void GDALDeinterleave4Byte( |
5620 | | const GByte *CPL_RESTRICT pabySrc, GByte *CPL_RESTRICT pabyDest0, |
5621 | | GByte *CPL_RESTRICT pabyDest1, GByte *CPL_RESTRICT pabyDest2, |
5622 | | GByte *CPL_RESTRICT pabyDest3, size_t nIters) |
5623 | | { |
5624 | | for (size_t i = 0; i < nIters; ++i) |
5625 | | { |
5626 | | pabyDest0[i] = pabySrc[4 * i + 0]; |
5627 | | pabyDest1[i] = pabySrc[4 * i + 1]; |
5628 | | pabyDest2[i] = pabySrc[4 * i + 2]; |
5629 | | pabyDest3[i] = pabySrc[4 * i + 3]; |
5630 | | } |
5631 | | } |
5632 | | #endif |
5633 | | |
5634 | | #else |
5635 | | |
5636 | | /************************************************************************/ |
5637 | | /* GDALDeinterleave3Byte() */ |
5638 | | /************************************************************************/ |
5639 | | |
5640 | | // TODO: Enabling below could help on non-Intel architectures where GCC knows |
5641 | | // how to auto-vectorize |
5642 | | // #if defined(__GNUC__) |
5643 | | //__attribute__((optimize("tree-vectorize"))) |
5644 | | // #endif |
5645 | | static void GDALDeinterleave3Byte(const GByte *CPL_RESTRICT pabySrc, |
5646 | | GByte *CPL_RESTRICT pabyDest0, |
5647 | | GByte *CPL_RESTRICT pabyDest1, |
5648 | | GByte *CPL_RESTRICT pabyDest2, size_t nIters) |
5649 | | { |
5650 | | for (size_t i = 0; i < nIters; ++i) |
5651 | | { |
5652 | | pabyDest0[i] = pabySrc[3 * i + 0]; |
5653 | | pabyDest1[i] = pabySrc[3 * i + 1]; |
5654 | | pabyDest2[i] = pabySrc[3 * i + 2]; |
5655 | | } |
5656 | | } |
5657 | | |
5658 | | /************************************************************************/ |
5659 | | /* GDALDeinterleave4Byte() */ |
5660 | | /************************************************************************/ |
5661 | | |
5662 | | // TODO: Enabling below could help on non-Intel architectures where gcc knows |
5663 | | // how to auto-vectorize |
5664 | | // #if defined(__GNUC__) |
5665 | | //__attribute__((optimize("tree-vectorize"))) |
5666 | | // #endif |
5667 | | static void GDALDeinterleave4Byte(const GByte *CPL_RESTRICT pabySrc, |
5668 | | GByte *CPL_RESTRICT pabyDest0, |
5669 | | GByte *CPL_RESTRICT pabyDest1, |
5670 | | GByte *CPL_RESTRICT pabyDest2, |
5671 | | GByte *CPL_RESTRICT pabyDest3, size_t nIters) |
5672 | | { |
5673 | | for (size_t i = 0; i < nIters; ++i) |
5674 | | { |
5675 | | pabyDest0[i] = pabySrc[4 * i + 0]; |
5676 | | pabyDest1[i] = pabySrc[4 * i + 1]; |
5677 | | pabyDest2[i] = pabySrc[4 * i + 2]; |
5678 | | pabyDest3[i] = pabySrc[4 * i + 3]; |
5679 | | } |
5680 | | } |
5681 | | |
5682 | | #endif |
5683 | | |
5684 | | /************************************************************************/ |
5685 | | /* GDALDeinterleave() */ |
5686 | | /************************************************************************/ |
5687 | | |
5688 | | /*! Copy values from a pixel-interleave buffer to multiple per-component |
5689 | | buffers. |
5690 | | |
5691 | | In pseudo-code |
5692 | | \verbatim |
5693 | | for(size_t i = 0; i < nIters; ++i) |
5694 | | for(int iComp = 0; iComp < nComponents; iComp++ ) |
5695 | | ppDestBuffer[iComp][i] = pSourceBuffer[nComponents * i + iComp] |
5696 | | \endverbatim |
5697 | | |
5698 | | The implementation is optimized for a few cases, like de-interleaving |
5699 | | of 3 or 4-components Byte buffers. |
5700 | | |
5701 | | \since GDAL 3.6 |
5702 | | */ |
5703 | | void GDALDeinterleave(const void *pSourceBuffer, GDALDataType eSourceDT, |
5704 | | int nComponents, void **ppDestBuffer, |
5705 | | GDALDataType eDestDT, size_t nIters) |
5706 | 0 | { |
5707 | 0 | if (eSourceDT == eDestDT) |
5708 | 0 | { |
5709 | 0 | if (eSourceDT == GDT_Byte || eSourceDT == GDT_Int8) |
5710 | 0 | { |
5711 | 0 | if (nComponents == 3) |
5712 | 0 | { |
5713 | 0 | const GByte *CPL_RESTRICT pabySrc = |
5714 | 0 | static_cast<const GByte *>(pSourceBuffer); |
5715 | 0 | GByte *CPL_RESTRICT pabyDest0 = |
5716 | 0 | static_cast<GByte *>(ppDestBuffer[0]); |
5717 | 0 | GByte *CPL_RESTRICT pabyDest1 = |
5718 | 0 | static_cast<GByte *>(ppDestBuffer[1]); |
5719 | 0 | GByte *CPL_RESTRICT pabyDest2 = |
5720 | 0 | static_cast<GByte *>(ppDestBuffer[2]); |
5721 | 0 | GDALDeinterleave3Byte(pabySrc, pabyDest0, pabyDest1, pabyDest2, |
5722 | 0 | nIters); |
5723 | 0 | return; |
5724 | 0 | } |
5725 | 0 | else if (nComponents == 4) |
5726 | 0 | { |
5727 | 0 | const GByte *CPL_RESTRICT pabySrc = |
5728 | 0 | static_cast<const GByte *>(pSourceBuffer); |
5729 | 0 | GByte *CPL_RESTRICT pabyDest0 = |
5730 | 0 | static_cast<GByte *>(ppDestBuffer[0]); |
5731 | 0 | GByte *CPL_RESTRICT pabyDest1 = |
5732 | 0 | static_cast<GByte *>(ppDestBuffer[1]); |
5733 | 0 | GByte *CPL_RESTRICT pabyDest2 = |
5734 | 0 | static_cast<GByte *>(ppDestBuffer[2]); |
5735 | 0 | GByte *CPL_RESTRICT pabyDest3 = |
5736 | 0 | static_cast<GByte *>(ppDestBuffer[3]); |
5737 | 0 | GDALDeinterleave4Byte(pabySrc, pabyDest0, pabyDest1, pabyDest2, |
5738 | 0 | pabyDest3, nIters); |
5739 | 0 | return; |
5740 | 0 | } |
5741 | 0 | } |
5742 | | #if ((defined(__GNUC__) && !defined(__clang__)) || \ |
5743 | | defined(__INTEL_CLANG_COMPILER)) && \ |
5744 | | defined(HAVE_SSE2) && defined(HAVE_SSSE3_AT_COMPILE_TIME) |
5745 | | else if ((eSourceDT == GDT_Int16 || eSourceDT == GDT_UInt16) && |
5746 | | CPLHaveRuntimeSSSE3()) |
5747 | | { |
5748 | | if (nComponents == 3) |
5749 | | { |
5750 | | const GUInt16 *CPL_RESTRICT panSrc = |
5751 | | static_cast<const GUInt16 *>(pSourceBuffer); |
5752 | | GUInt16 *CPL_RESTRICT panDest0 = |
5753 | | static_cast<GUInt16 *>(ppDestBuffer[0]); |
5754 | | GUInt16 *CPL_RESTRICT panDest1 = |
5755 | | static_cast<GUInt16 *>(ppDestBuffer[1]); |
5756 | | GUInt16 *CPL_RESTRICT panDest2 = |
5757 | | static_cast<GUInt16 *>(ppDestBuffer[2]); |
5758 | | GDALDeinterleave3UInt16_SSSE3(panSrc, panDest0, panDest1, |
5759 | | panDest2, nIters); |
5760 | | return; |
5761 | | } |
5762 | | #if !defined(__INTEL_CLANG_COMPILER) |
5763 | | // ICC autovectorizer doesn't do a good job, at least with icx |
5764 | | // 2022.1.0.20220316 |
5765 | | else if (nComponents == 4) |
5766 | | { |
5767 | | const GUInt16 *CPL_RESTRICT panSrc = |
5768 | | static_cast<const GUInt16 *>(pSourceBuffer); |
5769 | | GUInt16 *CPL_RESTRICT panDest0 = |
5770 | | static_cast<GUInt16 *>(ppDestBuffer[0]); |
5771 | | GUInt16 *CPL_RESTRICT panDest1 = |
5772 | | static_cast<GUInt16 *>(ppDestBuffer[1]); |
5773 | | GUInt16 *CPL_RESTRICT panDest2 = |
5774 | | static_cast<GUInt16 *>(ppDestBuffer[2]); |
5775 | | GUInt16 *CPL_RESTRICT panDest3 = |
5776 | | static_cast<GUInt16 *>(ppDestBuffer[3]); |
5777 | | GDALDeinterleave4UInt16_SSSE3(panSrc, panDest0, panDest1, |
5778 | | panDest2, panDest3, nIters); |
5779 | | return; |
5780 | | } |
5781 | | #endif |
5782 | | } |
5783 | | #endif |
5784 | 0 | } |
5785 | | |
5786 | 0 | const int nSourceDTSize = GDALGetDataTypeSizeBytes(eSourceDT); |
5787 | 0 | const int nDestDTSize = GDALGetDataTypeSizeBytes(eDestDT); |
5788 | 0 | for (int iComp = 0; iComp < nComponents; iComp++) |
5789 | 0 | { |
5790 | 0 | GDALCopyWords64(static_cast<const GByte *>(pSourceBuffer) + |
5791 | 0 | iComp * nSourceDTSize, |
5792 | 0 | eSourceDT, nComponents * nSourceDTSize, |
5793 | 0 | ppDestBuffer[iComp], eDestDT, nDestDTSize, nIters); |
5794 | 0 | } |
5795 | 0 | } |
5796 | | |
5797 | | /************************************************************************/ |
5798 | | /* GDALTranspose2DSingleToSingle() */ |
5799 | | /************************************************************************/ |
5800 | | /** |
5801 | | * Transpose a 2D array of non-complex values, in a efficient (cache-oblivious) way. |
5802 | | * |
5803 | | * @param pSrc Source array of height = nSrcHeight and width = nSrcWidth. |
5804 | | * @param pDst Destination transposed array of height = nSrcWidth and width = nSrcHeight. |
5805 | | * @param nSrcWidth Width of pSrc array. |
5806 | | * @param nSrcHeight Height of pSrc array. |
5807 | | */ |
5808 | | |
5809 | | template <class DST, class SRC> |
5810 | | void GDALTranspose2DSingleToSingle(const SRC *CPL_RESTRICT pSrc, |
5811 | | DST *CPL_RESTRICT pDst, size_t nSrcWidth, |
5812 | | size_t nSrcHeight) |
5813 | 0 | { |
5814 | 0 | constexpr size_t blocksize = 32; |
5815 | 0 | for (size_t i = 0; i < nSrcHeight; i += blocksize) |
5816 | 0 | { |
5817 | 0 | const size_t max_k = std::min(i + blocksize, nSrcHeight); |
5818 | 0 | for (size_t j = 0; j < nSrcWidth; j += blocksize) |
5819 | 0 | { |
5820 | | // transpose the block beginning at [i,j] |
5821 | 0 | const size_t max_l = std::min(j + blocksize, nSrcWidth); |
5822 | 0 | for (size_t k = i; k < max_k; ++k) |
5823 | 0 | { |
5824 | 0 | for (size_t l = j; l < max_l; ++l) |
5825 | 0 | { |
5826 | 0 | GDALCopyWord(pSrc[l + k * nSrcWidth], |
5827 | 0 | pDst[k + l * nSrcHeight]); |
5828 | 0 | } |
5829 | 0 | } |
5830 | 0 | } |
5831 | 0 | } |
5832 | 0 | } Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, unsigned char>(unsigned char const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, signed char>(signed char const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, unsigned short>(unsigned short const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, short>(short const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, unsigned int>(unsigned int const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, int>(int const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, unsigned long>(unsigned long const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, long>(long const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, cpl::Float16>(cpl::Float16 const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, float>(float const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned char, double>(double const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, unsigned char>(unsigned char const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, signed char>(signed char const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, unsigned short>(unsigned short const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, short>(short const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, unsigned int>(unsigned int const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, int>(int const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, unsigned long>(unsigned long const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, long>(long const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, cpl::Float16>(cpl::Float16 const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, float>(float const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<signed char, double>(double const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, unsigned char>(unsigned char const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, signed char>(signed char const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, unsigned short>(unsigned short const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, short>(short const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, unsigned int>(unsigned int const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, int>(int const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, unsigned long>(unsigned long const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, long>(long const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, cpl::Float16>(cpl::Float16 const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, float>(float const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned short, double>(double const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, unsigned char>(unsigned char const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, signed char>(signed char const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, unsigned short>(unsigned short const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, short>(short const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, unsigned int>(unsigned int const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, int>(int const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, unsigned long>(unsigned long const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, long>(long const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, cpl::Float16>(cpl::Float16 const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, float>(float const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<short, double>(double const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, unsigned char>(unsigned char const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, signed char>(signed char const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, unsigned short>(unsigned short const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, short>(short const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, unsigned int>(unsigned int const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, int>(int const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, unsigned long>(unsigned long const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, long>(long const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, cpl::Float16>(cpl::Float16 const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, float>(float const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned int, double>(double const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, unsigned char>(unsigned char const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, signed char>(signed char const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, unsigned short>(unsigned short const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, short>(short const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, unsigned int>(unsigned int const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, int>(int const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, unsigned long>(unsigned long const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, long>(long const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, cpl::Float16>(cpl::Float16 const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, float>(float const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<int, double>(double const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, unsigned char>(unsigned char const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, signed char>(signed char const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, unsigned short>(unsigned short const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, short>(short const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, unsigned int>(unsigned int const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, int>(int const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, unsigned long>(unsigned long const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, long>(long const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, cpl::Float16>(cpl::Float16 const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, float>(float const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<unsigned long, double>(double const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, unsigned char>(unsigned char const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, signed char>(signed char const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, unsigned short>(unsigned short const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, short>(short const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, unsigned int>(unsigned int const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, int>(int const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, unsigned long>(unsigned long const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, long>(long const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, cpl::Float16>(cpl::Float16 const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, float>(float const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<long, double>(double const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, unsigned char>(unsigned char const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, signed char>(signed char const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, unsigned short>(unsigned short const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, short>(short const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, unsigned int>(unsigned int const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, int>(int const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, unsigned long>(unsigned long const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, long>(long const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, cpl::Float16>(cpl::Float16 const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, float>(float const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<cpl::Float16, double>(double const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, unsigned char>(unsigned char const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, signed char>(signed char const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, unsigned short>(unsigned short const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, short>(short const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, unsigned int>(unsigned int const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, int>(int const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, unsigned long>(unsigned long const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, long>(long const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, cpl::Float16>(cpl::Float16 const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, float>(float const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<float, double>(double const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, unsigned char>(unsigned char const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, signed char>(signed char const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, unsigned short>(unsigned short const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, short>(short const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, unsigned int>(unsigned int const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, int>(int const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, unsigned long>(unsigned long const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, long>(long const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, cpl::Float16>(cpl::Float16 const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, float>(float const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToSingle<double, double>(double const*, double*, unsigned long, unsigned long) |
5833 | | |
5834 | | /************************************************************************/ |
5835 | | /* GDALTranspose2DComplexToComplex() */ |
5836 | | /************************************************************************/ |
5837 | | /** |
5838 | | * Transpose a 2D array of complex values into an array of complex values, |
5839 | | * in a efficient (cache-oblivious) way. |
5840 | | * |
5841 | | * @param pSrc Source array of height = nSrcHeight and width = nSrcWidth. |
5842 | | * @param pDst Destination transposed array of height = nSrcWidth and width = nSrcHeight. |
5843 | | * @param nSrcWidth Width of pSrc array. |
5844 | | * @param nSrcHeight Height of pSrc array. |
5845 | | */ |
5846 | | template <class DST, class SRC> |
5847 | | void GDALTranspose2DComplexToComplex(const SRC *CPL_RESTRICT pSrc, |
5848 | | DST *CPL_RESTRICT pDst, size_t nSrcWidth, |
5849 | | size_t nSrcHeight) |
5850 | 0 | { |
5851 | 0 | constexpr size_t blocksize = 32; |
5852 | 0 | for (size_t i = 0; i < nSrcHeight; i += blocksize) |
5853 | 0 | { |
5854 | 0 | const size_t max_k = std::min(i + blocksize, nSrcHeight); |
5855 | 0 | for (size_t j = 0; j < nSrcWidth; j += blocksize) |
5856 | 0 | { |
5857 | | // transpose the block beginning at [i,j] |
5858 | 0 | const size_t max_l = std::min(j + blocksize, nSrcWidth); |
5859 | 0 | for (size_t k = i; k < max_k; ++k) |
5860 | 0 | { |
5861 | 0 | for (size_t l = j; l < max_l; ++l) |
5862 | 0 | { |
5863 | 0 | GDALCopyWord(pSrc[2 * (l + k * nSrcWidth) + 0], |
5864 | 0 | pDst[2 * (k + l * nSrcHeight) + 0]); |
5865 | 0 | GDALCopyWord(pSrc[2 * (l + k * nSrcWidth) + 1], |
5866 | 0 | pDst[2 * (k + l * nSrcHeight) + 1]); |
5867 | 0 | } |
5868 | 0 | } |
5869 | 0 | } |
5870 | 0 | } |
5871 | 0 | } Unexecuted instantiation: void GDALTranspose2DComplexToComplex<short, short>(short const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<short, int>(int const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<short, cpl::Float16>(cpl::Float16 const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<short, float>(float const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<short, double>(double const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<int, short>(short const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<int, int>(int const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<int, cpl::Float16>(cpl::Float16 const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<int, float>(float const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<int, double>(double const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<cpl::Float16, short>(short const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<cpl::Float16, int>(int const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<cpl::Float16, cpl::Float16>(cpl::Float16 const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<cpl::Float16, float>(float const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<cpl::Float16, double>(double const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<float, short>(short const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<float, int>(int const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<float, cpl::Float16>(cpl::Float16 const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<float, float>(float const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<float, double>(double const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<double, short>(short const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<double, int>(int const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<double, cpl::Float16>(cpl::Float16 const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<double, float>(float const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToComplex<double, double>(double const*, double*, unsigned long, unsigned long) |
5872 | | |
5873 | | /************************************************************************/ |
5874 | | /* GDALTranspose2DComplexToSingle() */ |
5875 | | /************************************************************************/ |
5876 | | /** |
5877 | | * Transpose a 2D array of complex values into an array of non-complex values, |
5878 | | * in a efficient (cache-oblivious) way. |
5879 | | * |
5880 | | * @param pSrc Source array of height = nSrcHeight and width = nSrcWidth. |
5881 | | * @param pDst Destination transposed array of height = nSrcWidth and width = nSrcHeight. |
5882 | | * @param nSrcWidth Width of pSrc array. |
5883 | | * @param nSrcHeight Height of pSrc array. |
5884 | | */ |
5885 | | template <class DST, class SRC> |
5886 | | void GDALTranspose2DComplexToSingle(const SRC *CPL_RESTRICT pSrc, |
5887 | | DST *CPL_RESTRICT pDst, size_t nSrcWidth, |
5888 | | size_t nSrcHeight) |
5889 | 0 | { |
5890 | 0 | constexpr size_t blocksize = 32; |
5891 | 0 | for (size_t i = 0; i < nSrcHeight; i += blocksize) |
5892 | 0 | { |
5893 | 0 | const size_t max_k = std::min(i + blocksize, nSrcHeight); |
5894 | 0 | for (size_t j = 0; j < nSrcWidth; j += blocksize) |
5895 | 0 | { |
5896 | | // transpose the block beginning at [i,j] |
5897 | 0 | const size_t max_l = std::min(j + blocksize, nSrcWidth); |
5898 | 0 | for (size_t k = i; k < max_k; ++k) |
5899 | 0 | { |
5900 | 0 | for (size_t l = j; l < max_l; ++l) |
5901 | 0 | { |
5902 | 0 | GDALCopyWord(pSrc[2 * (l + k * nSrcWidth) + 0], |
5903 | 0 | pDst[k + l * nSrcHeight]); |
5904 | 0 | } |
5905 | 0 | } |
5906 | 0 | } |
5907 | 0 | } |
5908 | 0 | } Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned char, short>(short const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned char, int>(int const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned char, cpl::Float16>(cpl::Float16 const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned char, float>(float const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned char, double>(double const*, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<signed char, short>(short const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<signed char, int>(int const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<signed char, cpl::Float16>(cpl::Float16 const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<signed char, float>(float const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<signed char, double>(double const*, signed char*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned short, short>(short const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned short, int>(int const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned short, cpl::Float16>(cpl::Float16 const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned short, float>(float const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned short, double>(double const*, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<short, short>(short const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<short, int>(int const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<short, cpl::Float16>(cpl::Float16 const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<short, float>(float const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<short, double>(double const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned int, short>(short const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned int, int>(int const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned int, cpl::Float16>(cpl::Float16 const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned int, float>(float const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned int, double>(double const*, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<int, short>(short const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<int, int>(int const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<int, cpl::Float16>(cpl::Float16 const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<int, float>(float const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<int, double>(double const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned long, short>(short const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned long, int>(int const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned long, cpl::Float16>(cpl::Float16 const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned long, float>(float const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<unsigned long, double>(double const*, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<long, short>(short const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<long, int>(int const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<long, cpl::Float16>(cpl::Float16 const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<long, float>(float const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<long, double>(double const*, long*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<cpl::Float16, short>(short const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<cpl::Float16, int>(int const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<cpl::Float16, cpl::Float16>(cpl::Float16 const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<cpl::Float16, float>(float const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<cpl::Float16, double>(double const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<float, short>(short const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<float, int>(int const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<float, cpl::Float16>(cpl::Float16 const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<float, float>(float const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<float, double>(double const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<double, short>(short const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<double, int>(int const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<double, cpl::Float16>(cpl::Float16 const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<double, float>(float const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DComplexToSingle<double, double>(double const*, double*, unsigned long, unsigned long) |
5909 | | |
5910 | | /************************************************************************/ |
5911 | | /* GDALTranspose2DSingleToComplex() */ |
5912 | | /************************************************************************/ |
5913 | | /** |
5914 | | * Transpose a 2D array of non-complex values into an array of complex values, |
5915 | | * in a efficient (cache-oblivious) way. |
5916 | | * |
5917 | | * @param pSrc Source array of height = nSrcHeight and width = nSrcWidth. |
5918 | | * @param pDst Destination transposed array of height = nSrcWidth and width = nSrcHeight. |
5919 | | * @param nSrcWidth Width of pSrc array. |
5920 | | * @param nSrcHeight Height of pSrc array. |
5921 | | */ |
5922 | | template <class DST, class SRC> |
5923 | | void GDALTranspose2DSingleToComplex(const SRC *CPL_RESTRICT pSrc, |
5924 | | DST *CPL_RESTRICT pDst, size_t nSrcWidth, |
5925 | | size_t nSrcHeight) |
5926 | 0 | { |
5927 | 0 | constexpr size_t blocksize = 32; |
5928 | 0 | for (size_t i = 0; i < nSrcHeight; i += blocksize) |
5929 | 0 | { |
5930 | 0 | const size_t max_k = std::min(i + blocksize, nSrcHeight); |
5931 | 0 | for (size_t j = 0; j < nSrcWidth; j += blocksize) |
5932 | 0 | { |
5933 | | // transpose the block beginning at [i,j] |
5934 | 0 | const size_t max_l = std::min(j + blocksize, nSrcWidth); |
5935 | 0 | for (size_t k = i; k < max_k; ++k) |
5936 | 0 | { |
5937 | 0 | for (size_t l = j; l < max_l; ++l) |
5938 | 0 | { |
5939 | 0 | GDALCopyWord(pSrc[l + k * nSrcWidth], |
5940 | 0 | pDst[2 * (k + l * nSrcHeight) + 0]); |
5941 | 0 | pDst[2 * (k + l * nSrcHeight) + 1] = 0; |
5942 | 0 | } |
5943 | 0 | } |
5944 | 0 | } |
5945 | 0 | } |
5946 | 0 | } Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, unsigned char>(unsigned char const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, signed char>(signed char const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, unsigned short>(unsigned short const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, short>(short const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, unsigned int>(unsigned int const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, int>(int const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, unsigned long>(unsigned long const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, long>(long const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, cpl::Float16>(cpl::Float16 const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, float>(float const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<short, double>(double const*, short*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, unsigned char>(unsigned char const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, signed char>(signed char const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, unsigned short>(unsigned short const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, short>(short const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, unsigned int>(unsigned int const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, int>(int const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, unsigned long>(unsigned long const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, long>(long const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, cpl::Float16>(cpl::Float16 const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, float>(float const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<int, double>(double const*, int*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, unsigned char>(unsigned char const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, signed char>(signed char const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, unsigned short>(unsigned short const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, short>(short const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, unsigned int>(unsigned int const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, int>(int const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, unsigned long>(unsigned long const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, long>(long const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, cpl::Float16>(cpl::Float16 const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, float>(float const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<cpl::Float16, double>(double const*, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, unsigned char>(unsigned char const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, signed char>(signed char const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, unsigned short>(unsigned short const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, short>(short const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, unsigned int>(unsigned int const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, int>(int const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, unsigned long>(unsigned long const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, long>(long const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, cpl::Float16>(cpl::Float16 const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, float>(float const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<float, double>(double const*, float*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, unsigned char>(unsigned char const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, signed char>(signed char const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, unsigned short>(unsigned short const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, short>(short const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, unsigned int>(unsigned int const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, int>(int const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, unsigned long>(unsigned long const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, long>(long const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, cpl::Float16>(cpl::Float16 const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, float>(float const*, double*, unsigned long, unsigned long) Unexecuted instantiation: void GDALTranspose2DSingleToComplex<double, double>(double const*, double*, unsigned long, unsigned long) |
5947 | | |
5948 | | /************************************************************************/ |
5949 | | /* GDALTranspose2D() */ |
5950 | | /************************************************************************/ |
5951 | | |
5952 | | template <class DST, bool DST_IS_COMPLEX> |
5953 | | static void GDALTranspose2D(const void *pSrc, GDALDataType eSrcType, DST *pDst, |
5954 | | size_t nSrcWidth, size_t nSrcHeight) |
5955 | 0 | { |
5956 | 0 | #define CALL_GDALTranspose2D_internal(SRC_TYPE) \ |
5957 | 0 | do \ |
5958 | 0 | { \ |
5959 | 0 | if constexpr (DST_IS_COMPLEX) \ |
5960 | 0 | { \ |
5961 | 0 | GDALTranspose2DSingleToComplex( \ |
5962 | 0 | static_cast<const SRC_TYPE *>(pSrc), pDst, nSrcWidth, \ |
5963 | 0 | nSrcHeight); \ |
5964 | 0 | } \ |
5965 | 0 | else \ |
5966 | 0 | { \ |
5967 | 0 | GDALTranspose2DSingleToSingle(static_cast<const SRC_TYPE *>(pSrc), \ |
5968 | 0 | pDst, nSrcWidth, nSrcHeight); \ |
5969 | 0 | } \ |
5970 | 0 | } while (0) |
5971 | |
|
5972 | 0 | #define CALL_GDALTranspose2DComplex_internal(SRC_TYPE) \ |
5973 | 0 | do \ |
5974 | 0 | { \ |
5975 | 0 | if constexpr (DST_IS_COMPLEX) \ |
5976 | 0 | { \ |
5977 | 0 | GDALTranspose2DComplexToComplex( \ |
5978 | 0 | static_cast<const SRC_TYPE *>(pSrc), pDst, nSrcWidth, \ |
5979 | 0 | nSrcHeight); \ |
5980 | 0 | } \ |
5981 | 0 | else \ |
5982 | 0 | { \ |
5983 | 0 | GDALTranspose2DComplexToSingle( \ |
5984 | 0 | static_cast<const SRC_TYPE *>(pSrc), pDst, nSrcWidth, \ |
5985 | 0 | nSrcHeight); \ |
5986 | 0 | } \ |
5987 | 0 | } while (0) |
5988 | | |
5989 | | // clang-format off |
5990 | 0 | switch (eSrcType) |
5991 | 0 | { |
5992 | 0 | case GDT_Byte: CALL_GDALTranspose2D_internal(uint8_t); break; |
5993 | 0 | case GDT_Int8: CALL_GDALTranspose2D_internal(int8_t); break; |
5994 | 0 | case GDT_UInt16: CALL_GDALTranspose2D_internal(uint16_t); break; |
5995 | 0 | case GDT_Int16: CALL_GDALTranspose2D_internal(int16_t); break; |
5996 | 0 | case GDT_UInt32: CALL_GDALTranspose2D_internal(uint32_t); break; |
5997 | 0 | case GDT_Int32: CALL_GDALTranspose2D_internal(int32_t); break; |
5998 | 0 | case GDT_UInt64: CALL_GDALTranspose2D_internal(uint64_t); break; |
5999 | 0 | case GDT_Int64: CALL_GDALTranspose2D_internal(int64_t); break; |
6000 | 0 | case GDT_Float16: CALL_GDALTranspose2D_internal(GFloat16); break; |
6001 | 0 | case GDT_Float32: CALL_GDALTranspose2D_internal(float); break; |
6002 | 0 | case GDT_Float64: CALL_GDALTranspose2D_internal(double); break; |
6003 | 0 | case GDT_CInt16: CALL_GDALTranspose2DComplex_internal(int16_t); break; |
6004 | 0 | case GDT_CInt32: CALL_GDALTranspose2DComplex_internal(int32_t); break; |
6005 | 0 | case GDT_CFloat16: CALL_GDALTranspose2DComplex_internal(GFloat16); break; |
6006 | 0 | case GDT_CFloat32: CALL_GDALTranspose2DComplex_internal(float); break; |
6007 | 0 | case GDT_CFloat64: CALL_GDALTranspose2DComplex_internal(double); break; |
6008 | 0 | case GDT_Unknown: |
6009 | 0 | case GDT_TypeCount: |
6010 | 0 | break; |
6011 | 0 | } |
6012 | | // clang-format on |
6013 | |
|
6014 | 0 | #undef CALL_GDALTranspose2D_internal |
6015 | 0 | #undef CALL_GDALTranspose2DComplex_internal |
6016 | 0 | } Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<unsigned char, false>(void const*, GDALDataType, unsigned char*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<signed char, false>(void const*, GDALDataType, signed char*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<unsigned short, false>(void const*, GDALDataType, unsigned short*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<short, false>(void const*, GDALDataType, short*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<unsigned int, false>(void const*, GDALDataType, unsigned int*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<int, false>(void const*, GDALDataType, int*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<unsigned long, false>(void const*, GDALDataType, unsigned long*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<long, false>(void const*, GDALDataType, long*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<cpl::Float16, false>(void const*, GDALDataType, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<float, false>(void const*, GDALDataType, float*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<double, false>(void const*, GDALDataType, double*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<short, true>(void const*, GDALDataType, short*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<int, true>(void const*, GDALDataType, int*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<cpl::Float16, true>(void const*, GDALDataType, cpl::Float16*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<float, true>(void const*, GDALDataType, float*, unsigned long, unsigned long) Unexecuted instantiation: rasterio.cpp:void GDALTranspose2D<double, true>(void const*, GDALDataType, double*, unsigned long, unsigned long) |
6017 | | |
6018 | | /************************************************************************/ |
6019 | | /* GDALInterleave2Byte() */ |
6020 | | /************************************************************************/ |
6021 | | |
6022 | | #if defined(HAVE_SSE2) && \ |
6023 | | (!defined(__GNUC__) || defined(__INTEL_CLANG_COMPILER)) |
6024 | | |
6025 | | // ICC autovectorizer doesn't do a good job at generating good SSE code, |
6026 | | // at least with icx 2024.0.2.20231213, but it nicely unrolls the below loop. |
6027 | | #if defined(__GNUC__) |
6028 | | __attribute__((noinline)) |
6029 | | #endif |
6030 | | static void |
6031 | | GDALInterleave2Byte(const uint8_t *CPL_RESTRICT pSrc, |
6032 | | uint8_t *CPL_RESTRICT pDst, size_t nIters) |
6033 | | { |
6034 | | size_t i = 0; |
6035 | | constexpr size_t VALS_PER_ITER = 16; |
6036 | | for (i = 0; i + VALS_PER_ITER <= nIters; i += VALS_PER_ITER) |
6037 | | { |
6038 | | __m128i xmm0 = |
6039 | | _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + i)); |
6040 | | __m128i xmm1 = _mm_loadu_si128( |
6041 | | reinterpret_cast<__m128i const *>(pSrc + i + nIters)); |
6042 | | _mm_storeu_si128(reinterpret_cast<__m128i *>(pDst + 2 * i), |
6043 | | _mm_unpacklo_epi8(xmm0, xmm1)); |
6044 | | _mm_storeu_si128( |
6045 | | reinterpret_cast<__m128i *>(pDst + 2 * i + VALS_PER_ITER), |
6046 | | _mm_unpackhi_epi8(xmm0, xmm1)); |
6047 | | } |
6048 | | #if defined(__clang__) |
6049 | | #pragma clang loop vectorize(disable) |
6050 | | #endif |
6051 | | for (; i < nIters; ++i) |
6052 | | { |
6053 | | pDst[2 * i + 0] = pSrc[i + 0 * nIters]; |
6054 | | pDst[2 * i + 1] = pSrc[i + 1 * nIters]; |
6055 | | } |
6056 | | } |
6057 | | |
6058 | | #else |
6059 | | |
6060 | | #if defined(__GNUC__) && !defined(__clang__) |
6061 | | __attribute__((optimize("tree-vectorize"))) |
6062 | | #endif |
6063 | | #if defined(__GNUC__) |
6064 | | __attribute__((noinline)) |
6065 | | #endif |
6066 | | #if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER) |
6067 | | // clang++ -O2 -fsanitize=undefined fails to vectorize, ignore that warning |
6068 | | #pragma clang diagnostic push |
6069 | | #pragma clang diagnostic ignored "-Wpass-failed" |
6070 | | #endif |
6071 | | static void |
6072 | | GDALInterleave2Byte(const uint8_t *CPL_RESTRICT pSrc, |
6073 | | uint8_t *CPL_RESTRICT pDst, size_t nIters) |
6074 | 0 | { |
6075 | 0 | #if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER) |
6076 | 0 | #pragma clang loop vectorize(enable) |
6077 | 0 | #endif |
6078 | 0 | for (size_t i = 0; i < nIters; ++i) |
6079 | 0 | { |
6080 | 0 | pDst[2 * i + 0] = pSrc[i + 0 * nIters]; |
6081 | 0 | pDst[2 * i + 1] = pSrc[i + 1 * nIters]; |
6082 | 0 | } |
6083 | 0 | } |
6084 | | #if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER) |
6085 | | #pragma clang diagnostic pop |
6086 | | #endif |
6087 | | |
6088 | | #endif |
6089 | | |
6090 | | /************************************************************************/ |
6091 | | /* GDALInterleave4Byte() */ |
6092 | | /************************************************************************/ |
6093 | | |
6094 | | #if defined(HAVE_SSE2) && \ |
6095 | | (!defined(__GNUC__) || defined(__INTEL_CLANG_COMPILER)) |
6096 | | |
6097 | | // ICC autovectorizer doesn't do a good job at generating good SSE code, |
6098 | | // at least with icx 2024.0.2.20231213, but it nicely unrolls the below loop. |
6099 | | #if defined(__GNUC__) |
6100 | | __attribute__((noinline)) |
6101 | | #endif |
6102 | | static void |
6103 | | GDALInterleave4Byte(const uint8_t *CPL_RESTRICT pSrc, |
6104 | | uint8_t *CPL_RESTRICT pDst, size_t nIters) |
6105 | | { |
6106 | | size_t i = 0; |
6107 | | constexpr size_t VALS_PER_ITER = 16; |
6108 | | for (i = 0; i + VALS_PER_ITER <= nIters; i += VALS_PER_ITER) |
6109 | | { |
6110 | | __m128i xmm0 = _mm_loadu_si128( |
6111 | | reinterpret_cast<__m128i const *>(pSrc + i + 0 * nIters)); |
6112 | | __m128i xmm1 = _mm_loadu_si128( |
6113 | | reinterpret_cast<__m128i const *>(pSrc + i + 1 * nIters)); |
6114 | | __m128i xmm2 = _mm_loadu_si128( |
6115 | | reinterpret_cast<__m128i const *>(pSrc + i + 2 * nIters)); |
6116 | | __m128i xmm3 = _mm_loadu_si128( |
6117 | | reinterpret_cast<__m128i const *>(pSrc + i + 3 * nIters)); |
6118 | | auto tmp0 = _mm_unpacklo_epi8( |
6119 | | xmm0, |
6120 | | xmm1); // (xmm0_0, xmm1_0, xmm0_1, xmm1_1, xmm0_2, xmm1_2, ...) |
6121 | | auto tmp1 = _mm_unpackhi_epi8( |
6122 | | xmm0, |
6123 | | xmm1); // (xmm0_8, xmm1_8, xmm0_9, xmm1_9, xmm0_10, xmm1_10, ...) |
6124 | | auto tmp2 = _mm_unpacklo_epi8( |
6125 | | xmm2, |
6126 | | xmm3); // (xmm2_0, xmm3_0, xmm2_1, xmm3_1, xmm2_2, xmm3_2, ...) |
6127 | | auto tmp3 = _mm_unpackhi_epi8( |
6128 | | xmm2, |
6129 | | xmm3); // (xmm2_8, xmm3_8, xmm2_9, xmm3_9, xmm2_10, xmm3_10, ...) |
6130 | | auto tmp2_0 = _mm_unpacklo_epi16( |
6131 | | tmp0, |
6132 | | tmp2); // (xmm0_0, xmm1_0, xmm2_0, xmm3_0, xmm0_1, xmm1_1, xmm2_1, xmm3_1, ...) |
6133 | | auto tmp2_1 = _mm_unpackhi_epi16(tmp0, tmp2); |
6134 | | auto tmp2_2 = _mm_unpacklo_epi16(tmp1, tmp3); |
6135 | | auto tmp2_3 = _mm_unpackhi_epi16(tmp1, tmp3); |
6136 | | _mm_storeu_si128( |
6137 | | reinterpret_cast<__m128i *>(pDst + 4 * i + 0 * VALS_PER_ITER), |
6138 | | tmp2_0); |
6139 | | _mm_storeu_si128( |
6140 | | reinterpret_cast<__m128i *>(pDst + 4 * i + 1 * VALS_PER_ITER), |
6141 | | tmp2_1); |
6142 | | _mm_storeu_si128( |
6143 | | reinterpret_cast<__m128i *>(pDst + 4 * i + 2 * VALS_PER_ITER), |
6144 | | tmp2_2); |
6145 | | _mm_storeu_si128( |
6146 | | reinterpret_cast<__m128i *>(pDst + 4 * i + 3 * VALS_PER_ITER), |
6147 | | tmp2_3); |
6148 | | } |
6149 | | #if defined(__clang__) |
6150 | | #pragma clang loop vectorize(disable) |
6151 | | #endif |
6152 | | for (; i < nIters; ++i) |
6153 | | { |
6154 | | pDst[4 * i + 0] = pSrc[i + 0 * nIters]; |
6155 | | pDst[4 * i + 1] = pSrc[i + 1 * nIters]; |
6156 | | pDst[4 * i + 2] = pSrc[i + 2 * nIters]; |
6157 | | pDst[4 * i + 3] = pSrc[i + 3 * nIters]; |
6158 | | } |
6159 | | } |
6160 | | |
6161 | | #else |
6162 | | |
6163 | | #if defined(__GNUC__) && !defined(__clang__) |
6164 | | __attribute__((optimize("tree-vectorize"))) |
6165 | | #endif |
6166 | | #if defined(__GNUC__) |
6167 | | __attribute__((noinline)) |
6168 | | #endif |
6169 | | #if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER) |
6170 | | // clang++ -O2 -fsanitize=undefined fails to vectorize, ignore that warning |
6171 | | #pragma clang diagnostic push |
6172 | | #pragma clang diagnostic ignored "-Wpass-failed" |
6173 | | #endif |
6174 | | static void |
6175 | | GDALInterleave4Byte(const uint8_t *CPL_RESTRICT pSrc, |
6176 | | uint8_t *CPL_RESTRICT pDst, size_t nIters) |
6177 | 0 | { |
6178 | 0 | #if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER) |
6179 | 0 | #pragma clang loop vectorize(enable) |
6180 | 0 | #endif |
6181 | 0 | for (size_t i = 0; i < nIters; ++i) |
6182 | 0 | { |
6183 | 0 | pDst[4 * i + 0] = pSrc[i + 0 * nIters]; |
6184 | 0 | pDst[4 * i + 1] = pSrc[i + 1 * nIters]; |
6185 | 0 | pDst[4 * i + 2] = pSrc[i + 2 * nIters]; |
6186 | 0 | pDst[4 * i + 3] = pSrc[i + 3 * nIters]; |
6187 | 0 | } |
6188 | 0 | } |
6189 | | #if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER) |
6190 | | #pragma clang diagnostic pop |
6191 | | #endif |
6192 | | |
6193 | | #endif |
6194 | | |
6195 | | /************************************************************************/ |
6196 | | /* GDALTranspose2D() */ |
6197 | | /************************************************************************/ |
6198 | | |
6199 | | /** |
6200 | | * Transpose a 2D array in a efficient (cache-oblivious) way. |
6201 | | * |
6202 | | * @param pSrc Source array of width = nSrcWidth and height = nSrcHeight. |
6203 | | * @param eSrcType Data type of pSrc. |
6204 | | * @param pDst Destination transposed array of width = nSrcHeight and height = nSrcWidth. |
6205 | | * @param eDstType Data type of pDst. |
6206 | | * @param nSrcWidth Width of pSrc array. |
6207 | | * @param nSrcHeight Height of pSrc array. |
6208 | | * @since GDAL 3.11 |
6209 | | */ |
6210 | | |
6211 | | void GDALTranspose2D(const void *pSrc, GDALDataType eSrcType, void *pDst, |
6212 | | GDALDataType eDstType, size_t nSrcWidth, size_t nSrcHeight) |
6213 | 0 | { |
6214 | 0 | if (eSrcType == eDstType && (eSrcType == GDT_Byte || eSrcType == GDT_Int8)) |
6215 | 0 | { |
6216 | 0 | if (nSrcHeight == 2) |
6217 | 0 | { |
6218 | 0 | GDALInterleave2Byte(static_cast<const uint8_t *>(pSrc), |
6219 | 0 | static_cast<uint8_t *>(pDst), nSrcWidth); |
6220 | 0 | return; |
6221 | 0 | } |
6222 | 0 | if (nSrcHeight == 4) |
6223 | 0 | { |
6224 | 0 | GDALInterleave4Byte(static_cast<const uint8_t *>(pSrc), |
6225 | 0 | static_cast<uint8_t *>(pDst), nSrcWidth); |
6226 | 0 | return; |
6227 | 0 | } |
6228 | 0 | #if (defined(HAVE_SSSE3_AT_COMPILE_TIME) && \ |
6229 | 0 | (defined(__x86_64) || defined(_M_X64))) |
6230 | 0 | if (CPLHaveRuntimeSSSE3()) |
6231 | 0 | { |
6232 | 0 | GDALTranspose2D_Byte_SSSE3(static_cast<const uint8_t *>(pSrc), |
6233 | 0 | static_cast<uint8_t *>(pDst), nSrcWidth, |
6234 | 0 | nSrcHeight); |
6235 | 0 | return; |
6236 | 0 | } |
6237 | | #elif defined(USE_NEON_OPTIMIZATIONS) |
6238 | | { |
6239 | | GDALTranspose2D_Byte_SSSE3(static_cast<const uint8_t *>(pSrc), |
6240 | | static_cast<uint8_t *>(pDst), nSrcWidth, |
6241 | | nSrcHeight); |
6242 | | return; |
6243 | | } |
6244 | | #endif |
6245 | 0 | } |
6246 | | |
6247 | 0 | #define CALL_GDALTranspose2D_internal(DST_TYPE, DST_IS_COMPLEX) \ |
6248 | 0 | GDALTranspose2D<DST_TYPE, DST_IS_COMPLEX>( \ |
6249 | 0 | pSrc, eSrcType, static_cast<DST_TYPE *>(pDst), nSrcWidth, nSrcHeight) |
6250 | | |
6251 | | // clang-format off |
6252 | 0 | switch (eDstType) |
6253 | 0 | { |
6254 | 0 | case GDT_Byte: CALL_GDALTranspose2D_internal(uint8_t, false); break; |
6255 | 0 | case GDT_Int8: CALL_GDALTranspose2D_internal(int8_t, false); break; |
6256 | 0 | case GDT_UInt16: CALL_GDALTranspose2D_internal(uint16_t, false); break; |
6257 | 0 | case GDT_Int16: CALL_GDALTranspose2D_internal(int16_t, false); break; |
6258 | 0 | case GDT_UInt32: CALL_GDALTranspose2D_internal(uint32_t, false); break; |
6259 | 0 | case GDT_Int32: CALL_GDALTranspose2D_internal(int32_t, false); break; |
6260 | 0 | case GDT_UInt64: CALL_GDALTranspose2D_internal(uint64_t, false); break; |
6261 | 0 | case GDT_Int64: CALL_GDALTranspose2D_internal(int64_t, false); break; |
6262 | 0 | case GDT_Float16: CALL_GDALTranspose2D_internal(GFloat16, false); break; |
6263 | 0 | case GDT_Float32: CALL_GDALTranspose2D_internal(float, false); break; |
6264 | 0 | case GDT_Float64: CALL_GDALTranspose2D_internal(double, false); break; |
6265 | 0 | case GDT_CInt16: CALL_GDALTranspose2D_internal(int16_t, true); break; |
6266 | 0 | case GDT_CInt32: CALL_GDALTranspose2D_internal(int32_t, true); break; |
6267 | 0 | case GDT_CFloat16: CALL_GDALTranspose2D_internal(GFloat16, true); break; |
6268 | 0 | case GDT_CFloat32: CALL_GDALTranspose2D_internal(float, true); break; |
6269 | 0 | case GDT_CFloat64: CALL_GDALTranspose2D_internal(double, true); break; |
6270 | 0 | case GDT_Unknown: |
6271 | 0 | case GDT_TypeCount: |
6272 | 0 | break; |
6273 | 0 | } |
6274 | | // clang-format on |
6275 | |
|
6276 | 0 | #undef CALL_GDALTranspose2D_internal |
6277 | 0 | } |
6278 | | |
6279 | | /************************************************************************/ |
6280 | | /* ExtractBitAndConvertTo255() */ |
6281 | | /************************************************************************/ |
6282 | | |
6283 | | #if defined(__GNUC__) || defined(_MSC_VER) |
6284 | | // Signedness of char implementation dependent, so be explicit. |
6285 | | // Assumes 2-complement integer types and sign extension of right shifting |
6286 | | // GCC guarantees such: |
6287 | | // https://gcc.gnu.org/onlinedocs/gcc/Integers-implementation.html#Integers-implementation |
6288 | | static inline GByte ExtractBitAndConvertTo255(GByte byVal, int nBit) |
6289 | 0 | { |
6290 | 0 | return static_cast<GByte>(static_cast<signed char>(byVal << (7 - nBit)) >> |
6291 | 0 | 7); |
6292 | 0 | } |
6293 | | #else |
6294 | | // Portable way |
6295 | | static inline GByte ExtractBitAndConvertTo255(GByte byVal, int nBit) |
6296 | | { |
6297 | | return (byVal & (1 << nBit)) ? 255 : 0; |
6298 | | } |
6299 | | #endif |
6300 | | |
6301 | | /************************************************************************/ |
6302 | | /* ExpandEightPackedBitsToByteAt255() */ |
6303 | | /************************************************************************/ |
6304 | | |
6305 | | static inline void ExpandEightPackedBitsToByteAt255(GByte byVal, |
6306 | | GByte abyOutput[8]) |
6307 | 0 | { |
6308 | 0 | abyOutput[0] = ExtractBitAndConvertTo255(byVal, 7); |
6309 | 0 | abyOutput[1] = ExtractBitAndConvertTo255(byVal, 6); |
6310 | 0 | abyOutput[2] = ExtractBitAndConvertTo255(byVal, 5); |
6311 | 0 | abyOutput[3] = ExtractBitAndConvertTo255(byVal, 4); |
6312 | 0 | abyOutput[4] = ExtractBitAndConvertTo255(byVal, 3); |
6313 | 0 | abyOutput[5] = ExtractBitAndConvertTo255(byVal, 2); |
6314 | 0 | abyOutput[6] = ExtractBitAndConvertTo255(byVal, 1); |
6315 | 0 | abyOutput[7] = ExtractBitAndConvertTo255(byVal, 0); |
6316 | 0 | } |
6317 | | |
6318 | | /************************************************************************/ |
6319 | | /* GDALExpandPackedBitsToByteAt0Or255() */ |
6320 | | /************************************************************************/ |
6321 | | |
6322 | | /** Expand packed-bits (ordered from most-significant bit to least one) |
6323 | | into a byte each, where a bit at 0 is expanded to a byte at 0, and a bit |
6324 | | at 1 to a byte at 255. |
6325 | | |
6326 | | The function does (in a possibly more optimized way) the following: |
6327 | | \code{.cpp} |
6328 | | for (size_t i = 0; i < nInputBits; ++i ) |
6329 | | { |
6330 | | pabyOutput[i] = (pabyInput[i / 8] & (1 << (7 - (i % 8)))) ? 255 : 0; |
6331 | | } |
6332 | | \endcode |
6333 | | |
6334 | | @param pabyInput Input array of (nInputBits + 7) / 8 bytes. |
6335 | | @param pabyOutput Output array of nInputBits bytes. |
6336 | | @param nInputBits Number of valid bits in pabyInput. |
6337 | | |
6338 | | @since 3.11 |
6339 | | */ |
6340 | | |
6341 | | void GDALExpandPackedBitsToByteAt0Or255(const GByte *CPL_RESTRICT pabyInput, |
6342 | | GByte *CPL_RESTRICT pabyOutput, |
6343 | | size_t nInputBits) |
6344 | 0 | { |
6345 | 0 | const size_t nInputWholeBytes = nInputBits / 8; |
6346 | 0 | size_t iByte = 0; |
6347 | |
|
6348 | 0 | #ifdef HAVE_SSE2 |
6349 | | // Mask to isolate each bit |
6350 | 0 | const __m128i bit_mask = _mm_set_epi8(1, 2, 4, 8, 16, 32, 64, -128, 1, 2, 4, |
6351 | 0 | 8, 16, 32, 64, -128); |
6352 | 0 | const __m128i zero = _mm_setzero_si128(); |
6353 | 0 | const __m128i all_ones = _mm_set1_epi8(-1); |
6354 | | #ifdef __SSSE3__ |
6355 | | const __m128i dispatch_two_bytes = |
6356 | | _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0); |
6357 | | #endif |
6358 | 0 | constexpr size_t SSE_REG_SIZE = sizeof(bit_mask); |
6359 | 0 | for (; iByte + SSE_REG_SIZE <= nInputWholeBytes; iByte += SSE_REG_SIZE) |
6360 | 0 | { |
6361 | 0 | __m128i reg_ori = _mm_loadu_si128( |
6362 | 0 | reinterpret_cast<const __m128i *>(pabyInput + iByte)); |
6363 | |
|
6364 | 0 | constexpr int NUM_PROCESSED_BYTES_PER_REG = 2; |
6365 | 0 | for (size_t k = 0; k < SSE_REG_SIZE / NUM_PROCESSED_BYTES_PER_REG; ++k) |
6366 | 0 | { |
6367 | | // Given reg_ori = (A, B, ... 14 other bytes ...), |
6368 | | // expand to (A, A, A, A, A, A, A, A, B, B, B, B, B, B, B, B) |
6369 | | #ifdef __SSSE3__ |
6370 | | __m128i reg = _mm_shuffle_epi8(reg_ori, dispatch_two_bytes); |
6371 | | #else |
6372 | 0 | __m128i reg = _mm_unpacklo_epi8(reg_ori, reg_ori); |
6373 | 0 | reg = _mm_unpacklo_epi16(reg, reg); |
6374 | 0 | reg = _mm_unpacklo_epi32(reg, reg); |
6375 | 0 | #endif |
6376 | | |
6377 | | // Test if bits of interest are set |
6378 | 0 | reg = _mm_and_si128(reg, bit_mask); |
6379 | | |
6380 | | // Now test if those bits are set, by comparing to zero. So the |
6381 | | // result will be that bytes where bits are set will be at 0, and |
6382 | | // ones where they are cleared will be at 0xFF. So the inverse of |
6383 | | // the end result we want! |
6384 | 0 | reg = _mm_cmpeq_epi8(reg, zero); |
6385 | | |
6386 | | // Invert the result |
6387 | 0 | reg = _mm_andnot_si128(reg, all_ones); |
6388 | |
|
6389 | 0 | _mm_storeu_si128(reinterpret_cast<__m128i *>(pabyOutput), reg); |
6390 | |
|
6391 | 0 | pabyOutput += SSE_REG_SIZE; |
6392 | | |
6393 | | // Right-shift of 2 bytes |
6394 | 0 | reg_ori = _mm_bsrli_si128(reg_ori, NUM_PROCESSED_BYTES_PER_REG); |
6395 | 0 | } |
6396 | 0 | } |
6397 | |
|
6398 | 0 | #endif // HAVE_SSE2 |
6399 | |
|
6400 | 0 | for (; iByte < nInputWholeBytes; ++iByte) |
6401 | 0 | { |
6402 | 0 | ExpandEightPackedBitsToByteAt255(pabyInput[iByte], pabyOutput); |
6403 | 0 | pabyOutput += 8; |
6404 | 0 | } |
6405 | 0 | for (int iBit = 0; iBit < static_cast<int>(nInputBits % 8); ++iBit) |
6406 | 0 | { |
6407 | 0 | *pabyOutput = ExtractBitAndConvertTo255(pabyInput[iByte], 7 - iBit); |
6408 | 0 | ++pabyOutput; |
6409 | 0 | } |
6410 | 0 | } |
6411 | | |
6412 | | /************************************************************************/ |
6413 | | /* ExpandEightPackedBitsToByteAt1() */ |
6414 | | /************************************************************************/ |
6415 | | |
6416 | | static inline void ExpandEightPackedBitsToByteAt1(GByte byVal, |
6417 | | GByte abyOutput[8]) |
6418 | 0 | { |
6419 | 0 | abyOutput[0] = (byVal >> 7) & 0x1; |
6420 | 0 | abyOutput[1] = (byVal >> 6) & 0x1; |
6421 | 0 | abyOutput[2] = (byVal >> 5) & 0x1; |
6422 | 0 | abyOutput[3] = (byVal >> 4) & 0x1; |
6423 | 0 | abyOutput[4] = (byVal >> 3) & 0x1; |
6424 | 0 | abyOutput[5] = (byVal >> 2) & 0x1; |
6425 | 0 | abyOutput[6] = (byVal >> 1) & 0x1; |
6426 | 0 | abyOutput[7] = (byVal >> 0) & 0x1; |
6427 | 0 | } |
6428 | | |
6429 | | /************************************************************************/ |
6430 | | /* GDALExpandPackedBitsToByteAt0Or1() */ |
6431 | | /************************************************************************/ |
6432 | | |
6433 | | /** Expand packed-bits (ordered from most-significant bit to least one) |
6434 | | into a byte each, where a bit at 0 is expanded to a byte at 0, and a bit |
6435 | | at 1 to a byte at 1. |
6436 | | |
6437 | | The function does (in a possibly more optimized way) the following: |
6438 | | \code{.cpp} |
6439 | | for (size_t i = 0; i < nInputBits; ++i ) |
6440 | | { |
6441 | | pabyOutput[i] = (pabyInput[i / 8] & (1 << (7 - (i % 8)))) ? 1 : 0; |
6442 | | } |
6443 | | \endcode |
6444 | | |
6445 | | @param pabyInput Input array of (nInputBits + 7) / 8 bytes. |
6446 | | @param pabyOutput Output array of nInputBits bytes. |
6447 | | @param nInputBits Number of valid bits in pabyInput. |
6448 | | |
6449 | | @since 3.11 |
6450 | | */ |
6451 | | |
6452 | | void GDALExpandPackedBitsToByteAt0Or1(const GByte *CPL_RESTRICT pabyInput, |
6453 | | GByte *CPL_RESTRICT pabyOutput, |
6454 | | size_t nInputBits) |
6455 | 0 | { |
6456 | 0 | const size_t nInputWholeBytes = nInputBits / 8; |
6457 | 0 | size_t iByte = 0; |
6458 | 0 | for (; iByte < nInputWholeBytes; ++iByte) |
6459 | 0 | { |
6460 | 0 | ExpandEightPackedBitsToByteAt1(pabyInput[iByte], pabyOutput); |
6461 | 0 | pabyOutput += 8; |
6462 | 0 | } |
6463 | 0 | for (int iBit = 0; iBit < static_cast<int>(nInputBits % 8); ++iBit) |
6464 | 0 | { |
6465 | 0 | *pabyOutput = (pabyInput[iByte] >> (7 - iBit)) & 0x1; |
6466 | 0 | ++pabyOutput; |
6467 | 0 | } |
6468 | 0 | } |